#!/usr/bin/perl # Time-stamp: "2008-05-31 22:20:34 ADT" # See http://www.interglacial.com/rss/ for info on RSS feeds require 5; use strict; use constant DEBUG => $ENV{'MAILTO'} ? 0 : 4; use XML::RSS::SimpleGen 12; my $url = 'http://www.mcsweeneys.net'; rss_new( $url, "McSweeney's", "Timothy McSweeney's Internet Tendency" ); rss_every_other_hour(); rss_webMaster('sburke@cpan.org'); rss_language("en-US"); rss_item_limit(30); rss_self_url( "http://interglacial.com/rss/mcsweeneys.rss" ); rss_generator_url( "http://interglacial.com/rss/mcsweeneys.pl" ); rss_livejournal( 'mcsweeneysrss' ); # Because it's outside the normal system with red dots, it seems: if(0) { get_url('http://www.mcsweeneys.net/links/bush/'); if( m/

Day (\d+):/i) { rss_item("http://www.mcsweeneys.net/links/bush/#$1", "Daily Reason to Dispatch Bush, #$1", ); DEBUG and print "Noting Bush #$1\n"; } else { print "Odd, no Daily Reason to Dispatch Bush!\n"; } } DEBUG and print "Polling the McSweeney's site...\n"; get_url('http://www.mcsweeneys.net/'); m/(.+)/s or m/(.+)/s or die "$0: Can't find end-of-text marker in:\n{{\n$_\n}}\n"; $_ = $1; my $stories; m{(.+)(.+)}s or die "$0: Can't find 5-stories marker in:\n$_\n"; $stories = $1; $_ = $2; my(@items); DEBUG and print "\nToday's content (reddot):\n"; while( m{^[^\cm\cj]*?]+)\">([^\cm\cj]*?)}gsm ) { push @items, ["$url$1", lc($2)]; DEBUG and print " $url$1\n = \L$2\n"; } DEBUG and print "\nNewish content (blackdot):\n"; while( m{^[^\cm\cj]*?]+)\">([^\cm\cj]*?)}gsm ) { push @items, ["$url$1", lc($2)]; DEBUG and print " $url$1\n = \L$2\n"; } DEBUG and print "\n\nScanning the most recent stories:\n"; while( $stories =~ m{^]+)\">(.*?)(.*?)$}gsm ) { push @items, ["$url$1", "«$2» $3"]; DEBUG and print " $url$1\n = «$2» $3\n"; } @items or die "$0: No items!?"; DEBUG and print "\n\n"; my %seen; foreach my $i (@items) { next if $seen{$i->[0]}; ++$seen{$i->[0]}; DEBUG > 1 and print "Item @$i\n"; if($i->[0] =~ m{/$}s or $i->[0] =~ m!/books/future\.html\z!s ) { scan_page_for_title($i); } else { DEBUG and print "Not bothering to scan @$i\n"; } rss_item(@$i); DEBUG and print "\n"; } DEBUG and print "Saving...\n"; rss_save("mcsweeneys.rss", 10); DEBUG and print "Done.\n"; exit; #--------------------------------------------------------------------------- sub scan_page_for_title { my $i = $_[0]; DEBUG and print "* I will try looking at $$i[0] 's title element...\n"; get_url($i->[0]); if( m{(.+)}smi ) { $_ = $1 } s{}{}gs; s{.+}{}smi; my $title; if( m{(.*?)}si ) { $title = $1; $title =~ s/^\s*(?:Timothy )?McSweeney\'s.*?:\s*//s; $title =~ s/\s+$//s; my $orig_title = $title; $title =~ s/\W//g; $title = 'events' if $title eq 'McSweeneysRelatedEventsandVariousBookTourDates'; $title = substr($title,0,35) if length($title) > 35; $title =~ s/^(An)?OpenLetters?to(to|the|my)?//si; next if $title eq 'ReasonstoDispatchBush'; $title = '' if $title eq 'Letters' or $title eq 'Lists' or $title eq 'Sestinas' or $title eq 'DispatchesfromRoyKeseyAnAmericanGuy' or $title eq 'DispatchesfromaPublicLibrarian' or $title eq 'FavoriteSongs' or $title eq 'McSweeneysRecommends' # or $title eq 'FutureMcSweeneysBooks' or $title eq 'ExpertHelpforYourFantasyBaseballFranchise' or $title eq 'InterviewswithPeopleWhoHaveInterest' ; if($title) { DEBUG and print " Title: $title\n"; if( $title eq 'events' ) { $title = ''; } else { #push @$i, $orig_title; splice @$i, 1, 0, $orig_title; # Demote the link-text to being the description } } else { DEBUG and print " No recoverable HTML title from [", m{(.*?)}si ? $1 : "!?!?!?", "]\n"; } } if( !$title and m{

([A-Z][a-z]+) (\d\d+), (20\d\d)

} ) { $title = "$3-$2-$1"; $i->[1] .= " ($title)"; DEBUG and print " Fallback title type 1: $title\n"; } if( !$title and m{>Date: +[A-Z][a-z][a-z], +(\d\d?) +([A-Z][a-z][a-z]) +(20\d\d)\b} ) { $title = "$3-$2-$1"; $i->[1] .= " ($title)"; DEBUG and print " Fallback title type 2: $title\n"; } if( !$title and m{>\s*By\s+[^\n\r]*?\((\d\d?)\/(\d\d?)\/([01]\d)\)}s ) { $title = "20$3-$1-$2"; $i->[1] .= " ($title)"; DEBUG and print " Fallback title type 3: $title\n"; } if( !$title and m{:]+)">}i ) { $title = $1; $title =~ s/\.html?$//s; $i->[1] .= " {$title}"; $title =~ s/\W+//g; DEBUG and print " Fallback title type 4: $title\n"; } if( !$title and m! ^([^\n]{1,160}?)

\s+Submitted !msix ) { $title = $1; if($title =~ m/\s+href\s*=/i ) { DEBUG and print " Blocking type-5 fallback title: $title\n"; $title = ""; } else { $title =~ s{<.*?>}{}g; $i->[1] .= " {$title}"; $title =~ s/\W//g; DEBUG and print " Fallback title type 5: $title\n"; } } #if( !$title and m{

(.{1,160}?)(?:\n|

)}si ) { if( !$title and m{

(.{1,160}?)}si ) { $title = $1; if($title =~ m/\s+href\s*=/i ) { DEBUG and print " Blocking type-6 fallback title: $title\n"; $title = ""; } else { $title =~ s{<.*?>}{}g; $i->[1] .= " {$title}"; $title =~ s/\W//g; DEBUG and print " Fallback title type 6: $title\n"; } } if($title) { $title = lc $title if $title eq uc $title; $i->[0] .= "#$title"; DEBUG and print " Setting url to $$i[0]\n"; } else { DEBUG and print " NO TITLE OR DATE!\n"; } return; } #--------------------------------------------------------------------------- __END__