#!/usr/bin/perl # Time-stamp: "2008-05-31 22:20:34 ADT" # See http://www.interglacial.com/rss/ for info on RSS feeds require 5; use strict; use constant DEBUG => $ENV{'MAILTO'} ? 0 : 4; use XML::RSS::SimpleGen 12; my $url = 'http://www.mcsweeneys.net'; rss_new( $url, "McSweeney's", "Timothy McSweeney's Internet Tendency" ); rss_every_other_hour(); rss_webMaster('sburke@cpan.org'); rss_language("en-US"); rss_item_limit(30); rss_self_url( "http://interglacial.com/rss/mcsweeneys.rss" ); rss_generator_url( "http://interglacial.com/rss/mcsweeneys.pl" ); rss_livejournal( 'mcsweeneysrss' ); # Because it's outside the normal system with red dots, it seems: if(0) { get_url('http://www.mcsweeneys.net/links/bush/'); if( m/
Day (\d+):/i) {
rss_item("http://www.mcsweeneys.net/links/bush/#$1",
"Daily Reason to Dispatch Bush, #$1",
);
DEBUG and print "Noting Bush #$1\n";
} else {
print "Odd, no Daily Reason to Dispatch Bush!\n";
}
}
DEBUG and print "Polling the McSweeney's site...\n";
get_url('http://www.mcsweeneys.net/');
m/(.+)/s
or m/(.+)/s
or die "$0: Can't find end-of-text marker in:\n{{\n$_\n}}\n";
$_ = $1;
my $stories;
m{(.+)(.+)}s
or die "$0: Can't find 5-stories marker in:\n$_\n";
$stories = $1;
$_ = $2;
my(@items);
DEBUG and print "\nToday's content (reddot):\n";
while(
m{^
[^\cm\cj]*?]+)\">([^\cm\cj]*?)}gsm
) {
push @items, ["$url$1", lc($2)];
DEBUG and print " $url$1\n = \L$2\n";
}
DEBUG and print "\nNewish content (blackdot):\n";
while(
m{^
[^\cm\cj]*?]+)\">([^\cm\cj]*?)}gsm
) {
push @items, ["$url$1", lc($2)];
DEBUG and print " $url$1\n = \L$2\n";
}
DEBUG and print "\n\nScanning the most recent stories:\n";
while( $stories =~
m{^
]+)\">(.*?)(.*?)$}gsm
) {
push @items, ["$url$1", "«$2» $3"];
DEBUG and print " $url$1\n = «$2» $3\n";
}
@items or die "$0: No items!?";
DEBUG and print "\n\n";
my %seen;
foreach my $i (@items) {
next if $seen{$i->[0]};
++$seen{$i->[0]};
DEBUG > 1 and print "Item @$i\n";
if($i->[0] =~ m{/$}s or $i->[0] =~ m!/books/future\.html\z!s ) {
scan_page_for_title($i);
} else {
DEBUG and print "Not bothering to scan @$i\n";
}
rss_item(@$i);
DEBUG and print "\n";
}
DEBUG and print "Saving...\n";
rss_save("mcsweeneys.rss", 10);
DEBUG and print "Done.\n";
exit;
#---------------------------------------------------------------------------
sub scan_page_for_title {
my $i = $_[0];
DEBUG and print "* I will try looking at $$i[0] 's title element...\n";
get_url($i->[0]);
if( m{(.+)}smi ) { $_ = $1 }
s{}{}gs;
s{
.+}{}smi;
my $title;
if( m{
([A-Z][a-z]+) (\d\d+), (20\d\d)
} ) { $title = "$3-$2-$1"; $i->[1] .= " ($title)"; DEBUG and print " Fallback title type 1: $title\n"; } if( !$title and m{>Date: +[A-Z][a-z][a-z], +(\d\d?) +([A-Z][a-z][a-z]) +(20\d\d)\b} ) { $title = "$3-$2-$1"; $i->[1] .= " ($title)"; DEBUG and print " Fallback title type 2: $title\n"; } if( !$title and m{>\s*By\s+[^\n\r]*?\((\d\d?)\/(\d\d?)\/([01]\d)\)}s ) { $title = "20$3-$1-$2"; $i->[1] .= " ($title)"; DEBUG and print " Fallback title type 3: $title\n"; } if( !$title and m{:]+)">}i ) { $title = $1; $title =~ s/\.html?$//s; $i->[1] .= " {$title}"; $title =~ s/\W+//g; DEBUG and print " Fallback title type 4: $title\n"; } if( !$title and m! ^([^\n]{1,160}?)
\s+Submitted !msix ) { $title = $1; if($title =~ m/\s+href\s*=/i ) { DEBUG and print " Blocking type-5 fallback title: $title\n"; $title = ""; } else { $title =~ s{<.*?>}{}g; $i->[1] .= " {$title}"; $title =~ s/\W//g; DEBUG and print " Fallback title type 5: $title\n"; } } #if( !$title and m{
(.{1,160}?)(?:\n|
)}si ) { if( !$title and m{(.{1,160}?)}si ) { $title = $1; if($title =~ m/\s+href\s*=/i ) { DEBUG and print " Blocking type-6 fallback title: $title\n"; $title = ""; } else { $title =~ s{<.*?>}{}g; $i->[1] .= " {$title}"; $title =~ s/\W//g; DEBUG and print " Fallback title type 6: $title\n"; } } if($title) { $title = lc $title if $title eq uc $title; $i->[0] .= "#$title"; DEBUG and print " Setting url to $$i[0]\n"; } else { DEBUG and print " NO TITLE OR DATE!\n"; } return; } #--------------------------------------------------------------------------- __END__