#!/usr/local/bin/perl # desc{ Convert Common Log Format files to tab-separated-values format } # # clf2tsv -- Convert Common Log Format files to tab-separated-values format # Actually uses some Combined Log Format extensions # # Time-stamp: "2005-08-19 01:16:00 ADT" # sburke@cpan.org @m{ qw } = map sprintf("%02d",$_), 1 .. 12; while(<>) { @x = m{^ ([-._a-zA-Z0-9]+) # %h: IP address Col 1 \x20 (\S+) # %l: identd username Col 2 \x20 (\S+) # %u: HTTPAuth username Col 3 \x20 \[ # %t: timestamp Col 4 ( (\d\d) # day-of-month Col 5 / (Jan|Feb|Mar|Apr|May|Jun |Jul|Aug|Sep|Oct|Nov|Dec) # monthname Col 6 / ([12]\d\d\d) # year Col 7 ) : ( # time-of-day Col 8 (\d\d):(\d\d):(\d\d) # hrs, mns, secs Col 9,10,11 ) \x20 ([-+]\d\d\d\d) # TZoffset Col 12 \] \x20 " # %r -- request line ([A-Za-z]+) # request method Col 13 \x20 (\S+) # path + query-string Col 14 \s+ (\S+) # protocol Col 15 " \x20 (\d\d\d) # %>s: status code Col 16 \x20 ([-0-9]+) # %b: bytecount of return Col 17 (?: # Combined Log Format extensions: \x20 ("[^\n\r"]*") # %{Referer}i Col 18 \x20 ("[^\n\r"]*") # %{User-agent}i Col 19 )? [\n\r]* $ }xs; next unless @x; $x[5] = $m{$x[5]}; push @x, '', '' unless @x == 19; tr/\t/ / foreach @x; print join("\t", @x), "\n"; } __END__