#!/usr/bin/perl
# The Missing Textutils, Ondrej Bojar, obo@cuni.cz
# http://www.cuni.cz/~obo/textutils
#
# Given a key-value pairs prints out all the pairs where for the given key
# the value was always ('immer', in German) the same.
#
# Output: key\tvalue\tcount
#
# $Id: immer,v 1.2 2005/10/10 07:33:49 bojar Exp $
#

use Getopt::Long;

sub usage {
  print STDERR "immer  <stdin >stdout
Options:
  --srccol=X   ... use the given col. instead of the default col 1
  --desccol=X  ... use the given col. instead of the default col 2
  --trim       ... ignore whitespace at start/end
  --minobs=X   ... do not print, if a pair occurred less than X times
";
  exit 1;
}

$trim = 0;
$srccol = 1;
$destcol = 2;
$minobs = 1;
$usage = 0;
GetOptions("help" => \$usage,
           "minobs=i" => \$minobs,
           "trim" => \$trim,
           "srccol=i" => \$srccol, "destcol=i" => \$destcol,
           );
usage() if $usage;
$srccol --; $destcol --;

while (<>) {
  chomp;
  @line = split /\t/;
  $line[$srccol] =~ s/^ *| *$//g if $trim;
  $line[$destcol] =~ s/^ *| *$//g if $trim;
  next if $notimmer{$line[$srccol]};
  $obs{$line[$srccol]}++;
  if (defined $seen{$line[$srccol]}) {
    if ($seen{$line[$srccol]} ne $line[$destcol]) {
      $notimmer{$line[$srccol]} = 1;
    }
  } else {
    $seen{$line[$srccol]} = $line[$destcol];
  }
}

foreach my $k (keys %seen) {
  next if $notimmer{$k};
  next if $obs{$k} < $minobs;
  print "$k\t$seen{$k}\t$obs{$k}\n";
}



