#!/usr/bin/perl
# Time-stamp: "2024-11-14 05:15:26 MST"
#
# See http://www.interglacial.com/rss/ for info on RSS feeds
#----------------------------------------------------------------------------
#
# Sean M. Burke sburke@pobox.com 2008-05-26
# This has been one of the trickiest feed-generators I've ever written,
# because the content isn't just coming out of some CMS.
#
#----------------------------------------------------------------------------

BEGIN { binmode($_, ":utf8") foreach (*STDIN, *STDOUT, *STDERR); }

use strict; use warnings;
$|=1;
use constant DEBUG => $ENV{'MAILTO'} ? 0 : 10;
my $A = "\n Aborting";
use XML::RSS::SimpleGen 12;

use constant AM_TESTING => (($ENV{'HOST'}||'') =~ /gleiv/) ? 1 : 0;

my $nombre = 'cake';

use Encode qw(decode);

rss_new(
  (my $base = "https://cakemusic.com/news/"),
  "CAKE",
  "News and ideas from the band CAKE",
);
rss_thrice_daily();
rss_webmaster('sburke@pobox.com');
rss_self_url(      "http://interglacial.com/rss/$nombre.rss" );
rss_generator_url( "http://interglacial.com/rss/$nombre.pl" );
rss_livejournal( 'cake_news' );
rss_item_limit(20);

run_all();
exit;

#======================================================================

sub cake_exceptionals {
  
  if($^T < 1467888519) { # Thu Jul  7 10:48:39 2016 GMT

    rss_item(
     "http://interglacial.com/rss/cake.rss",
     'The Cake RSS is back!',
     'The Cake RSS was dead. For a *very* long time.  Now it\'s back!'
    );

  }

  rss_item(
   "https://www.facebook.com/cakemusic/",
   "CAKE on Facebook",
  );

  return;
}


#======================================================================

sub run_all {
  my $start = time();

  cake_exceptionals();

  cake_main();

  #cake_weekly();
  cake_you();

  cake_tour();
  #cake_band();



  cake_news();

  rss_item_count or die "$0: No items?$A";
  rss_save("$nombre.rss", 30);


  if(AM_TESTING and -e "$nombre.rss") {
    use File::Copy;
    copy "$nombre.rss", "$nombre.rss~$^T";
  }


  DEBUG and print "All done with everything.  Runtime: ", time() - $start, "s\n";
  return;
}

#======================================================================

sub cake_tour {
  my $url = 
     #'http://cakemusic.com/tour.html'
     'http://cakemusic.com/tour/'
  ;
  DEBUG > 20 and print "\n\nScanning $url ...\n";
  get_url($url);
  maybe_save('tour.html', $_);

  DEBUG > 20 and print "Got from $url ", length($_) , " bytes\n{{\n$_\n}}\n\n";

  $_ = w3m( $_ );
  s<\s+>< >g;
  DEBUG > 5 and print "\nw3m'd: {{ $_ }}\n\n";

  my $checksum = checksum( $_ );

  DEBUG > 5 and print "$url => checksum $checksum\n";
  rss_item("$url#X$checksum", "Update to Cake Tour info page");

  return;
}

#======================================================================

sub cake_band {
  my $url = 'https://cakemusic.com/Band/band.html';
  DEBUG and print "\n\nScanning $url ...\n";
  get_url($url);
  maybe_save('band.html', $_);

  $_ = w3m( $_ );
  s<\s+>< >g;

  #print "{{ $_ }}\n";

  my $checksum = checksum( $_ );

  DEBUG > 5 and print "$url => checksum $checksum\n";
  rss_item("$url#X$checksum", "Update to Cake Road Journal");

  return;
}

#======================================================================

sub cake_main {

  #  $_ = " <embed>guh</embed> !";
  #$_ = strip_nontexty($_);
  #print;
  #exit;

  my $url =
    'https://cakemusic.com/'
  ;
  DEBUG and print "\n\nScanning $url ...\n";
  get_url($url);
  maybe_save('main.html', $_);


  DEBUG > 15 and print "From $url: \n<<\n$_\n>>\n\n";
  my $count = 0;



  $_ = strip_nontexty($_);
  $_ = w3m($_);

  $_ = decode('UTF-8', $_); #,     Encode::FB_CROAK);


  DEBUG > 14 and print "Gotten <<\n$_\n>>\n";
  $_ = join " ", split /\s+/, $_;
  $_ =~ s<^ ><>;
  $_ =~ s< $><>;
  $_ =~ s{\bmute audio\b}{ };
  DEBUG > 1 and print "From main page, Cake thought: {{\n$_\n}}\n";


  if( m/\w/ and length($_) > 5 ) {
    my $tag = join '', m{(\w+)}g;
    $tag = substr($tag,0,10);
    DEBUG and print "Tag: $tag < $url\n";
    ++$count;

    rss_item("$url#$tag", "CAKE Thought", $_);
  }

  die "!!!!!\n\tNothing in $url ?!$A" unless $count;
  return;
}

#======================================================================

sub cake_you {
  my $url = 
    #'http://cakemusic.com/you.html'
    'http://www.cakemusic.com/advice/'
  ;
  DEBUG and print "\n\nScanning $url ...\n";
  get_url($url);
  maybe_save('you.html', $_);
  my $count = 0;

  DEBUG > 20 and print "In $url, got: {{$_}}\n\n";

  s{.*? \b (Dear \s+ CAKE .+)}
   {$1}ismx
    or die "No 'Dear Cake' in $url {{\n$_\n}}\n\n";

  DEBUG > 20 and print "\n\nReduced to {{\n$_\n}}\n\n";

  $_ = w3m($_);
  DEBUG > 14 and print "Gotten <<\n$_\n>>\n";
  $_ = join " ", split /\s+/, $_;
  $_ =~ s<^ ><>;
  $_ =~ s< $><>;
  $_ =~ s{^\s*,\s*}{ }s;
  DEBUG > 15 and print "{{\n$_\n}}\n";
  if( m/\w/ and length($_) > 5 ) {
    my $tag = join '', m{(\w+)}g;
    $tag =~ s<^DearCAKE><>s;
    $tag = substr($tag,0,16);
    DEBUG and print "Tag: $tag < $url\n";
    ++$count;
    if(length($_) >     90) {
      $_ = substr($_, 0,90);
      s{\s+\z}{}s or s{\s+\S+\z}{}s;  # don't end in mid-word
      $_ .= "...";
    }
    rss_item("$url#$tag", "CAKE Advice", $_);
  }

  die "Nothing in $url ?!$A" unless $count;
  return;
}

#======================================================================


sub cake_weekly {
  my $url = 'https://cakemusic.com/weekly_files/weekly.html';
  DEBUG and print "\n\nScanning $url ...\n";
  get_url($url);
  maybe_save('weekly.html', $_);
  my $count = 0;

  s{^.+<body\b[^<>]+>}{}smi;
  $_ = strip_nontexty($_);

  $_ = w3m($_);


  DEBUG > 13 and print "Gotten <<\n$_\n>>\n";

  $_ = join " ", split /\s+/, $_;
  $_ =~ s<^ ><>;
  $_ =~ s< $><>;


  # Kill the:
  #   HOME | NEWS | TOUR | MUSIC | STORE | BAND | WEEKLY | ADVICE | POLL

  $_ =~ s{
   ^
    \s*
    [A-Z]+
   
   (
    \s+ \| \s+
    [A-Z]+
   )+

  }{}sgmx;

  $_ =~ s{^ \s* [A-Z]+ [ ]? -- [ ]? }{}sx; # like "TRUCK -- "

  DEBUG > 15 and print "{{\n$_\n}}\n";

  if( m/\w/ and length($_) > 5 ) {
    my $tag = join '', m{(\w+)}g;
    $tag = substr($tag,0,20);
    DEBUG and print "Tag: $tag < $url\n";
    ++$count;
    if(length($_) >     90) {
      $_ = substr($_, 0,90);
      s{\s+\z}{}s or s{\s+\S+\z}{}s;  # don't end in mid-word
      $_ .= "...";
    }
    rss_item("$url#$tag", "CAKE Weekly/Poll", $_);
  }


  die "Nothing in $url ?!$A" unless $count;
  return;
}

#======================================================================



sub cake_news {
  my $url = 'https://www.cakemusic.com/news/';
  DEBUG and print "\n\nScanning $url ...\n";
  get_url($url);
  #maybe_save('news.html', $_);
  my $count = 0;

  #s{^.+<body\b[^<>]+>}{}smi;
  #$_ = strip_nontexty($_);

  #s{<li>}{ // }gi;

  #$_ = w3m($_);

  DEBUG > 19 and print "Gotten <<\n$_\n>>\n";

  use Encode qw(decode);
  $_ = decode('UTF-8', $_); #,     Encode::FB_CROAK);


#          <p class="newsheader">Newsflash for December 8, 2015</p>
#  <ul><li><a href="http://dangerousminds.net/comments/emotionless_guy_on_slingshot_theme_park_ride_set_to_simon_garfunkels_the_so
#"> Emotionless guy on slingshot.


  while( 
    m{<p\s+class="newsheader">
           (Newsflash \s+ for \s+ [^<>]+\D\d\d\d\d)</p>
      \s*
      <ul>\s*
      <li><a \s+ 
      href=\" \s*  (https?://[^\"<> ]+)\s*\"
      (?: \s+ target="_blank" )?
         >(.*?)</a>\s*</li>\s*
   }gxsm
  ) {

    my($pretitle, $url, $title) = (map $_//"", $1,$2,$3);
    DEBUG > 9 and print "  Got: $pretitle | $url | $title\n";
    rss_item( $url, "$pretitle: $title");
    ++$count;
  }

  die "Nothing in $url ?!$A" unless $count;
  DEBUG and print "$count items in $url\n\n";
  return;
}

#======================================================================


#======================================================================
#
# Util functions under here
#

sub checksum {
  return unpack("%32C*", join '', @_);
}

sub w3m {
  # return the result of passing the source string thru w3m

  my $source = $_[0] || die "No source?!";

  $source =~ s/&#145;/'/g;
  $source =~ s/&#146;/'/g;
  $source =~ s/&#147;/"/g;
  $source =~ s/&#148;/"/g;

  use IPC::Open2;
  my($From, $To);
  DEBUG and print "  Passing to w3m\n";
  #my $code = join "\n", 'scale=10', '',  <STDIN>, '';
  my $pid = open2($From, $To, 'w3m', '-T', 'text/html' );
           # ^^ on fail, that dies.
  DEBUG > 5 and print "  Opened w3m is process $pid\n";
  print $To $source or die "Couldn't print to w3m: $!$A";
  close $To or die "Couldn't close channel to w3m: $!$A";
  DEBUG > 5 and print "  Closing w3m channel\n";

  my $out = join "\n", <$From>;
  DEBUG > 5 and print "  Back from w3m\n";

  close $From or die "Couldn't close channel from w3m: $!$A";
  waitpid $pid, 0; # make some effort to let it die
  DEBUG > 5 and print "  Done with waitpid\n";
  return $out;
}


sub nix_spans {
  my($x, $gi) = @_;

  $x =~ s{
   < ($gi) \b [^<>]* >
   .*?
   < / \1 \s* >
  }{}xsmi;

  return $x;
}

sub strip_nontexty {
  my $x = $_[0];

  $x =~
    s{
      </?
      (?: img | body | p | table | tr | td | br )
      \b
      [^<>]+
      >
    }{ }igx;

  $x = nix_spans($x, 'embed');

  DEBUG > 20 and print "\n\nFILTERED: {{\n$x\n}}\n\n";

  return $x;
}

#======================================================================

sub maybe_save {
  my( $filename, $content ) = @_;
  return unless AM_TESTING;

  my $out_fs = "$filename~$^T";
  open my $OUT, ">", $out_fs or die "Can't write-open $out_fs: $!";
  binmode($OUT);
  print $OUT $content;
  close($OUT);

  return;
}

#======================================================================

__END__
