#!/home/sburke/bin/perl # Time-stamp: "2005-01-06 14:34:37 AST" # Folks, this is not my best code ever. But it works. -- sburke@cpan.org require 5; use constant DEBUG => $ENV{'MAILTO'} ? 0 : 1; use warnings; use strict; use URI; use HTML::Entities; use Time::Local; use LWP::Simple (); # BookTV Schedule scraper/converter. # sburke@cpan.org my $master_url = 'http://www.booktv.org/schedule/'; my @Times; # {'url' => thaturl}, ... my %Url2id; # url => { ... } my %ID2Time; my $id_counter = '0'; my $xml = 'book_tv_schedule.xml'; my $xml_out_temp = 'book_tv_schedule.xml.tmp'; my $temp_sched = 'schedule.html'; my $Year_Earlier = 1900 + (localtime($^T - 900_000))[5]; my $Year_Later = 1900 + (localtime($^T + 900_000))[5]; init_times_tables(); learn_schedule( parse_schedule() ); write_schedule(); DEBUG and print "\nDone at ", scalar(localtime), "\n\n"; exit; #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ my(%dow_before, @months, %month_before, @hour_before, @dows, @tzs); my($dow_re, $month_re, %month2num); sub init_times_tables { @dows = qw(Sunday Monday Tuesday Wednesday Thursday Friday Saturday); @dow_before{@dows} = ($dows[6], @dows[0..5]); # 0 1 .. 23 @hour_before = [23, 0 .. 22]; @months = qw{January February March April May June July August September October November December}; @month_before{@months} = ($months[11], @months[0..10]); @month2num{@months} = (0..11); $dow_re = re_join(@dows); $month_re = re_join(@months); @tzs = qw( Eastern Central Mountain Pacific Alaska Hawaii); return; } #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ sub learn_schedule { @Times = @_; # so be it learnt! # Set up the IDs... foreach my $t (@Times) { $t->{'id'} = ++$id_counter } # Set up the Url2id list... foreach my $t (@Times) { push @{ $Url2id{ $t->{'url'} }{'xids'} } => $t->{'id'}; $Url2id{ $t->{'url'} }{'data'}{'desc'} ||= $t->{'desc'}; $Url2id{ $t->{'url'} }{'data'}{'short_title'} ||= $t->{'short_title'}; } foreach my $t (@Times) { my $id = $t->{'id'}; $ID2Time{$id} = $t; my @others = grep $_ ne $id, @{ $Url2id{ $t->{'url'} }{'xids'} }; $t->{'other_showings'} = join ' ', @others; # TODO: } return; } #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ sub icsprint { my @lines = @_; foreach my $line (@lines) { $line =~ s{([,;\\])}{\\$1}g; $line =~ s/^\s+//s; $line =~ s/\s$//s; $line =~ s{\n}{\\n}g; #$line =~ s/(.{70})(?=.)/$1\cm\cj\t/mg; #print ICS $line, "\cm\cj"; #$line =~ s/(.{70})(?=.)/$1\n\t/mg; print ICS $line, "\n"; } return; } #--------------------------------------------------------------------------- sub write_schedule { foreach my $tz (@tzs) { my $html_out = "bt$tz\_long.html"; my $html_out_temp = "bt$tz\_long.html.tmp"; my $rss_out = "bt$tz.rss"; my $rss_out_temp = "bt$tz.rss.tmp"; my $ics_out = "bt$tz.ics"; my $ics_out_temp = "bt$tz.ics.tmp"; open(HTML, ">$html_out_temp") or die "Can't write-open $html_out_temp -- $!"; open(RSS, ">$rss_out_temp") or die "Can't write-open $rss_out_temp -- $!"; open(ICS, ">$ics_out_temp") or die "Can't write-open $ics_out_temp -- $!"; print HTML qq[
[End]