#!/usr/bin/perl
# Time-stamp: "2005-08-19 01:46:07 ADT"    sburke@cpan.org
#
# desc{    scan all man pages for a given string    }

# deroffing logic cribbed from Nathan Scott Thompson's "deroff" from the PPT.
#  (http://language.perl.com/ppt/)

# You may use this according to the GNU Public License: see http://www.gnu.org
# Documentation at bottom.

use strict;
use Getopt::Std;
use File::Find;

my %o;
if(!getopts('BLECc', \%o) or !@ARGV) {
  my $self = $0;
  $self = $1 if $self =~ m<([^/]+)$>s;
  print "Usage:\n$self [options] words...\n", <<'EOTHING';
    -B   don't abbreviate filename
    -C   print count of matching lines per file
    -c   same as -C, but suppresses 0-match lines
    -L   show only filenames with matching content, no lines.
    -E   interpret first argument as a RE
EOTHING
  exit 2;
}

$o{'C'} = 1 if $o{'c'};

#my $conf = '/etc/man.conf';
#open(CONF, "</etc/man.conf") or die "Can't open $conf: $1";
my @paths =
  grep { length $_ and -e $_ and -d _ and -r _}
    split ':',
      $ENV{'MANPATH'} || die "No MANPATH in env?"
;
# TODO: read /etc/man.conf if no MANPATH?

my $HC = '\%';     # default hyphenation characters
#print "Paths: @paths\n";

my $count;
my $RE;
if($o{'E'}) {
  die "No pattern?\n" unless @ARGV;
  die "Too many args?\n" if @ARGV > 1;
  $RE = $ARGV[0];
  eval {
    'foo' =~ m/$RE/os;   # test the RE
  };
  die $@ if $@;
} else {
  $RE = join '|',
    map quotemeta($_),
      sort {length $b <=> length $a or $a cmp $b}
        grep length($_),
          @ARGV
  ;
}
$RE = '^' unless length $RE;
#print "RE: /$RE/\n";


find(
  sub {
    if(not(m/\.(?:dat|idx|gif|jpg|jpeg|tif|tiff|png|pdf)(?:\.|$)/is) and -f $_) {
      
      # return if it's a scary filename?
      
      local $_ = $_;
      #print "$File::Find::name\n";
      scan_file($File::Find::name, $_);
      # exit if ++$count > 10;
    }
  },
  @paths
);
exit;

my(%included);

sub scan_file {
  my($filename, $base) = @_;
  
  if($base =~ s<\.(Z|gz)$><>s) {
    open(IN, "/usr/bin/gunzip -c $filename|") or die
     "Can't open gunzip -c $filename: $!\n";
  } else {
    open(IN, $filename ) or die "Can't open $filename: $!\n";
  }
  my $match = 0;
  my $is_man = $base =~ m/\.\d+$/s;
  $base = $filename if $o{'B'};
  while ( <IN> ) {
    if($is_man) {
      next if ( /^[.']\s*TS/ .. /^[.']\s*TE/ );       # skip tbl constructs
      next if ( /^[.']\s*EQ/ .. /^[.']\s*EN/ );       # skip eqn constructs
      
      # Handle .nx by by closing the current file.
      # Handle both .nx and .so by opening the indicated file
      # only if it hasn't been read before.
      
      if ( /^[.']\s*(so|nx)\s+(\S+)/ ) {
        last;
        # we'll get to that file eventually
      }
      
      m/^[.']hc\s+(\S)/s and $HC = $1; # save optional hyphenation character
      s/^[.']\s*[A-Z]\w*\s*//s;        # strip macro name, save arguments
      next if m/^[.']/s;                # ditch all other control requests
      
      s/\\".*//s;                      # strip comments
      s/\\\((f[ifl])/$1/g;            # replace fi, ff, fl ligatures
      s/\\\(F([il])/ff$1/g;           # replace ffi, ffl ligatures
      s/\\0/ /g;                      # replace \0 with space
      s/\\\((hy|mi|em)/-/g;           # replace \(hy, \(mi, \(em with dash
      s/\\\(../ /g;                   # replace all others with space
      
      s/\\[*fgns][+-]?\(..//g;        # remove \f(xx etc.
      s/\\[*fgn][+-]?.//g;            # remove \fx etc.
      s/\\s[+-]?\d+//g;               # remove \sN
      s/\\[bCDhHlLNoSvwxX]'[^']*'//g; # remove those with quoted arguments
      s/\\[e'`|^&%acdprtu{}]//g;      # remove one character escapes
      s/\\[\$kz].//g;                 # remove \$x, \kx, \zx
      s/\\$//;                        # remove line continuation
      
      s/\\(.)/$1/g;                   # save all other escaped characters
      s/$HC//og;                      # remove optional hyphenation
    }
    
    if(m/$RE/os) {
      if($o{'L'}) {
        print $filename, "\n";
        last;
      } elsif($o{'C'}) {
        ++$match;
        #print "$base:$.:<$match>:$_";
      } else {
        print "$base:$.:$_";
      }
    }
  }
  if($o{'C'}) { # c or C
    print "$filename:$match\n" if $match || !$o{'c'};
  }
  close(IN);
}

__END__
