#!/usr/bin/perl # The Missing Textutils, Ondrej Bojar, obo@cuni.cz # http://www.cuni.cz/~obo/textutils # # 'contexts' searches the stdin for the given RE and lists all contexts of it # sorted by frequence. # # $Id: contexts,v 1.2 2010-11-02 16:07:41 bojar Exp $ use Getopt::Long; $beflen = 5; $aftlen = 5; $befre = 0; $aftre = 0; $usage = 0; $cont = 0; $contre = 0; GetOptions("before=i" => \$beflen, "after=i" => \$aftlen, "cont=i" => \$cont, "help" => \$usage, "befre=s" => \$befre, "aftre=s" => \$aftre, "contre=s"=>\$contre); $re = shift; if (!$re || $usage) { print STDERR "Usage: contexts RE searches the stdin for the given RE and counts all contexts of it by frequence Options: --before=X ... use before context of X chars (default:5) --after=X ... use after context of X chars (default:5) --cont=X ... use both before and after context of X chars --contre=RE ... use both before and after context matching the RE --befre=RE ... use before context matching the RE --aftre=RE ... use after context matching the RE (the RE-contexts override the length contexts) (the both-contexts override the single contexts) "; exit 1; } $beflen = $cont if $cont; $aftlen = $cont if $cont; $befre = ".{$beflen}" if !$befre; $aftre = ".{$aftlen}" if !$aftre; $befre = $aftre = $contre if $contre; $re =~ s/'/\\'/g; $fullre = "($befre)($re)($aftre)"; print STDERR "Full re: $fullre\n"; $print = 1; while (<>) { while (/$fullre/o) { $found = $1.$2.$3; $a = length($1); $b = length($2); $c = length($3); $a{$found} = $1; $b{$found} = $2; #print STDERR "$found ---> $b{$found}\n"; $c{$found} = $3; s/$fullre//; $seen{$found} ++; $maxa = $a if $a>$maxa; $maxb = $b if $b>$maxb; $maxc = $c if $c>$maxc; } } for my $found (sort {$seen{$b}<=>$seen{$a}} keys %seen) { print "$seen{$found}\t"; print " "x($maxa - length($a{$found})); print "$a{$found}"; $gap = $maxb - length($b{$found}); $gapa = int($gap / 2); $gapb = $gap - $gapa; print (" "x$gapa); print $b{$found}; print (" "x$gapb); print $c{$found}; print "\n"; }