#!/usr/bin/perl
# The Missing Textutils, Ondrej Bojar, obo@cuni.cz
# http://www.cuni.cz/~obo/textutils
#
# 'contexts'  searches the stdin for the given RE and lists all contexts of it
# sorted by frequence.
#
# $Id: contexts,v 1.2 2010-11-02 16:07:41 bojar Exp $

use Getopt::Long;

$beflen = 5;
$aftlen = 5;
$befre = 0;
$aftre = 0;
$usage = 0;
$cont = 0;
$contre = 0;
GetOptions("before=i" => \$beflen, "after=i" => \$aftlen, "cont=i" => \$cont,
"help" => \$usage,
"befre=s" => \$befre, "aftre=s" => \$aftre, "contre=s"=>\$contre);

$re = shift;

if (!$re || $usage) {
  print STDERR "Usage: contexts RE
  searches the stdin for the given RE and counts all contexts of it by frequence
Options:
--before=X   ... use before context of X chars (default:5)
--after=X    ... use after context of X chars (default:5)
--cont=X     ... use both before and after context of X chars
--contre=RE  ... use both before and after context matching the RE
--befre=RE   ... use before context matching the RE
--aftre=RE   ... use after context matching the RE
                 (the RE-contexts override the length contexts)
		 (the both-contexts override the single contexts)
";
  exit 1;
}

$beflen = $cont if $cont;
$aftlen = $cont if $cont;
$befre = ".{$beflen}" if !$befre;
$aftre = ".{$aftlen}" if !$aftre;
$befre = $aftre = $contre if $contre;

$re =~ s/'/\\'/g;
$fullre = "($befre)($re)($aftre)";
print STDERR "Full re: $fullre\n";
$print = 1;
while (<>) {
  while (/$fullre/o) {
    $found = $1.$2.$3;
    $a = length($1);
    $b = length($2);
    $c = length($3);
    $a{$found} = $1;
    $b{$found} = $2;
    #print STDERR "$found ---> $b{$found}\n";
    $c{$found} = $3;
    s/$fullre//;
    $seen{$found} ++;
    $maxa = $a if $a>$maxa;
    $maxb = $b if $b>$maxb;
    $maxc = $c if $c>$maxc;
  }
}

for my $found (sort {$seen{$b}<=>$seen{$a}} keys %seen) {
  print "$seen{$found}\t";
  print " "x($maxa - length($a{$found}));
  print "$a{$found}";
  $gap = $maxb - length($b{$found});
  $gapa = int($gap / 2);
  $gapb = $gap - $gapa;
  print (" "x$gapa);
  print $b{$found};
  print (" "x$gapb);
  print $c{$found};
  print "\n";
}
