#!/usr/bin/perl # Time-stamp: "2024-11-14 05:15:26 MST" # # See http://www.interglacial.com/rss/ for info on RSS feeds #---------------------------------------------------------------------------- # # Sean M. Burke sburke@pobox.com 2008-05-26 # This has been one of the trickiest feed-generators I've ever written, # because the content isn't just coming out of some CMS. # #---------------------------------------------------------------------------- BEGIN { binmode($_, ":utf8") foreach (*STDIN, *STDOUT, *STDERR); } use strict; use warnings; $|=1; use constant DEBUG => $ENV{'MAILTO'} ? 0 : 10; my $A = "\n Aborting"; use XML::RSS::SimpleGen 12; use constant AM_TESTING => (($ENV{'HOST'}||'') =~ /gleiv/) ? 1 : 0; my $nombre = 'cake'; use Encode qw(decode); rss_new( (my $base = "https://cakemusic.com/news/"), "CAKE", "News and ideas from the band CAKE", ); rss_thrice_daily(); rss_webmaster('sburke@pobox.com'); rss_self_url( "http://interglacial.com/rss/$nombre.rss" ); rss_generator_url( "http://interglacial.com/rss/$nombre.pl" ); rss_livejournal( 'cake_news' ); rss_item_limit(20); run_all(); exit; #====================================================================== sub cake_exceptionals { if($^T < 1467888519) { # Thu Jul 7 10:48:39 2016 GMT rss_item( "http://interglacial.com/rss/cake.rss", 'The Cake RSS is back!', 'The Cake RSS was dead. For a *very* long time. Now it\'s back!' ); } rss_item( "https://www.facebook.com/cakemusic/", "CAKE on Facebook", ); return; } #====================================================================== sub run_all { my $start = time(); cake_exceptionals(); cake_main(); #cake_weekly(); cake_you(); cake_tour(); #cake_band(); cake_news(); rss_item_count or die "$0: No items?$A"; rss_save("$nombre.rss", 30); if(AM_TESTING and -e "$nombre.rss") { use File::Copy; copy "$nombre.rss", "$nombre.rss~$^T"; } DEBUG and print "All done with everything. Runtime: ", time() - $start, "s\n"; return; } #====================================================================== sub cake_tour { my $url = #'http://cakemusic.com/tour.html' 'http://cakemusic.com/tour/' ; DEBUG > 20 and print "\n\nScanning $url ...\n"; get_url($url); maybe_save('tour.html', $_); DEBUG > 20 and print "Got from $url ", length($_) , " bytes\n{{\n$_\n}}\n\n"; $_ = w3m( $_ ); s<\s+>< >g; DEBUG > 5 and print "\nw3m'd: {{ $_ }}\n\n"; my $checksum = checksum( $_ ); DEBUG > 5 and print "$url => checksum $checksum\n"; rss_item("$url#X$checksum", "Update to Cake Tour info page"); return; } #====================================================================== sub cake_band { my $url = 'https://cakemusic.com/Band/band.html'; DEBUG and print "\n\nScanning $url ...\n"; get_url($url); maybe_save('band.html', $_); $_ = w3m( $_ ); s<\s+>< >g; #print "{{ $_ }}\n"; my $checksum = checksum( $_ ); DEBUG > 5 and print "$url => checksum $checksum\n"; rss_item("$url#X$checksum", "Update to Cake Road Journal"); return; } #====================================================================== sub cake_main { # $_ = "

guh !"; #$_ = strip_nontexty($_); #print; #exit; my $url = 'https://cakemusic.com/' ; DEBUG and print "\n\nScanning $url ...\n"; get_url($url); maybe_save('main.html', $_); DEBUG > 15 and print "From $url: \n<<\n$_\n>>\n\n"; my $count = 0; $_ = strip_nontexty($_); $_ = w3m($_); $_ = decode('UTF-8', $_); #, Encode::FB_CROAK); DEBUG > 14 and print "Gotten <<\n$_\n>>\n"; $_ = join " ", split /\s+/, $_; $_ =~ s<^ ><>; $_ =~ s< $><>; $_ =~ s{\bmute audio\b}{ }; DEBUG > 1 and print "From main page, Cake thought: {{\n$_\n}}\n"; if( m/\w/ and length($_) > 5 ) { my $tag = join '', m{(\w+)}g; $tag = substr($tag,0,10); DEBUG and print "Tag: $tag < $url\n"; ++$count; rss_item("$url#$tag", "CAKE Thought", $_); } die "!!!!!\n\tNothing in $url ?!$A" unless $count; return; } #====================================================================== sub cake_you { my $url = #'http://cakemusic.com/you.html' 'http://www.cakemusic.com/advice/' ; DEBUG and print "\n\nScanning $url ...\n"; get_url($url); maybe_save('you.html', $_); my $count = 0; DEBUG > 20 and print "In $url, got: {{$_}}\n\n"; s{.*? \b (Dear \s+ CAKE .+)} {$1}ismx or die "No 'Dear Cake' in $url {{\n$_\n}}\n\n"; DEBUG > 20 and print "\n\nReduced to {{\n$_\n}}\n\n"; $_ = w3m($_); DEBUG > 14 and print "Gotten <<\n$_\n>>\n"; $_ = join " ", split /\s+/, $_; $_ =~ s<^ ><>; $_ =~ s< $><>; $_ =~ s{^\s*,\s*}{ }s; DEBUG > 15 and print "{{\n$_\n}}\n"; if( m/\w/ and length($_) > 5 ) { my $tag = join '', m{(\w+)}g; $tag =~ s<^DearCAKE><>s; $tag = substr($tag,0,16); DEBUG and print "Tag: $tag < $url\n"; ++$count; if(length($_) > 90) { $_ = substr($_, 0,90); s{\s+\z}{}s or s{\s+\S+\z}{}s; # don't end in mid-word $_ .= "..."; } rss_item("$url#$tag", "CAKE Advice", $_); } die "Nothing in $url ?!$A" unless $count; return; } #====================================================================== sub cake_weekly { my $url = 'https://cakemusic.com/weekly_files/weekly.html'; DEBUG and print "\n\nScanning $url ...\n"; get_url($url); maybe_save('weekly.html', $_); my $count = 0; s{^.+]+>}{}smi; $_ = strip_nontexty($_); $_ = w3m($_); DEBUG > 13 and print "Gotten <<\n$_\n>>\n"; $_ = join " ", split /\s+/, $_; $_ =~ s<^ ><>; $_ =~ s< $><>; # Kill the: # HOME | NEWS | TOUR | MUSIC | STORE | BAND | WEEKLY | ADVICE | POLL $_ =~ s{ ^ \s* [A-Z]+ ( \s+ \| \s+ [A-Z]+ )+ }{}sgmx; $_ =~ s{^ \s* [A-Z]+ [ ]? -- [ ]? }{}sx; # like "TRUCK -- " DEBUG > 15 and print "{{\n$_\n}}\n"; if( m/\w/ and length($_) > 5 ) { my $tag = join '', m{(\w+)}g; $tag = substr($tag,0,20); DEBUG and print "Tag: $tag < $url\n"; ++$count; if(length($_) > 90) { $_ = substr($_, 0,90); s{\s+\z}{}s or s{\s+\S+\z}{}s; # don't end in mid-word $_ .= "..."; } rss_item("$url#$tag", "CAKE Weekly/Poll", $_); } die "Nothing in $url ?!$A" unless $count; return; } #====================================================================== sub cake_news { my $url = 'https://www.cakemusic.com/news/'; DEBUG and print "\n\nScanning $url ...\n"; get_url($url); #maybe_save('news.html', $_); my $count = 0; #s{^.+]+>}{}smi; #$_ = strip_nontexty($_); #s{

}{ // }gi; #$_ = w3m($_); DEBUG > 19 and print "Gotten <<\n$_\n>>\n"; use Encode qw(decode); $_ = decode('UTF-8', $_); #, Encode::FB_CROAK); #

Newsflash for December 8, 2015

Emotionless guy on slingshot. while( m{ (Newsflash \s+ for \s+ [^<>]+\D\d\d\d\d)
\s*
- ]+)\s*\" (?: \s+ target="_blank" )? >(.*?)\s*