#!/usr/local/bin/perl # Time-stamp: "2005-08-19 01:36:26 ADT" sburke@cpan.org # desc{ scans HTML for images lacking alt attributes } # This program looks for files that look like HTML, and # does a cheap (non)parse on them to look for IMG tags # that're missing ALT, HEIGHT, WIDTH, and SRC. # This is NOT a real parse, it's just a cheap hack. no locale; use strict; use File::Find; my $max_length = 40000; # don't read files larger than this. exit unless @ARGV; die "Tell me what files/dirs to recurse.\n" unless @ARGV; my @file_list; print "Looking...\n"; find(sub { push(@file_list, $File::Find::name) if /html?|tmpl?|incl?$/i && -f $File::Find::name; }, @ARGV); unless(@file_list) { print "No HTML files to scan under:\n", map(" $_\n", @ARGV), "\n\n"; exit; } print "About to scan ", scalar(@file_list), ' ', @file_list == 1? 'file' : 'files', ".\n"; my($in, $contents); foreach my $file (@file_list){ if( (-s $file) > $max_length ) { print "$file is over the $max_length-byte limit. Skipping\n"; next; } unless( open(IN, "<$file")) { print "Can't open $file : $!\n"; close(IN); next; } print "Scanning: $file\n"; read(IN, $in, $max_length); # print "Length: ", length($in), "\n"; while($in =~ m/]+)>/ig) { $contents = $1; print " at byte ", pos($in), ": \n" unless ( $contents =~ /ALT/i and $contents =~ /HEIGHT/i and $contents =~ /WIDTH/i and $contents =~ /SRC/i and ($contents =~ tr<\cm\cj\t>< > || 1) ); } } print "Done at ", scalar(localtime), ".\n\n"; exit; __END__