#!/usr/local/bin/perl
# Time-stamp: "2005-08-19 01:36:26 ADT" sburke@cpan.org
# desc{ scans HTML for images lacking alt attributes }
# This program looks for files that look like HTML, and
# does a cheap (non)parse on them to look for IMG tags
# that're missing ALT, HEIGHT, WIDTH, and SRC.
# This is NOT a real parse, it's just a cheap hack.
no locale;
use strict;
use File::Find;
my $max_length = 40000; # don't read files larger than this.
exit unless @ARGV;
die "Tell me what files/dirs to recurse.\n" unless @ARGV;
my @file_list;
print "Looking...\n";
find(sub {
push(@file_list, $File::Find::name)
if /html?|tmpl?|incl?$/i
&& -f $File::Find::name;
}, @ARGV);
unless(@file_list) {
print
"No HTML files to scan under:\n",
map(" $_\n", @ARGV),
"\n\n";
exit;
}
print
"About to scan ", scalar(@file_list), ' ',
@file_list == 1? 'file' : 'files',
".\n";
my($in, $contents);
foreach my $file (@file_list){
if( (-s $file) > $max_length ) {
print "$file is over the $max_length-byte limit. Skipping\n";
next;
}
unless( open(IN, "<$file")) {
print "Can't open $file : $!\n";
close(IN);
next;
}
print "Scanning: $file\n";
read(IN, $in, $max_length);
# print "Length: ", length($in), "\n";
while($in =~ m/]+)>/ig) {
$contents = $1;
print " at byte ", pos($in), ": \n"
unless (
$contents =~ /ALT/i
and $contents =~ /HEIGHT/i
and $contents =~ /WIDTH/i
and $contents =~ /SRC/i
and ($contents =~ tr<\cm\cj\t>< > || 1)
);
}
}
print "Done at ", scalar(localtime), ".\n\n";
exit;
__END__