#!/usr/bin/perl # The Missing Textutils, Ondrej Bojar, obo@cuni.cz # http://www.cuni.cz/~obo/textutils # # Given a key-value pairs prints out all the pairs where for the given key # the value was always ('immer', in German) the same. # # Output: key\tvalue\tcount # # $Id: immer,v 1.2 2005/10/10 07:33:49 bojar Exp $ # use Getopt::Long; sub usage { print STDERR "immer stdout Options: --srccol=X ... use the given col. instead of the default col 1 --desccol=X ... use the given col. instead of the default col 2 --trim ... ignore whitespace at start/end --minobs=X ... do not print, if a pair occurred less than X times "; exit 1; } $trim = 0; $srccol = 1; $destcol = 2; $minobs = 1; $usage = 0; GetOptions("help" => \$usage, "minobs=i" => \$minobs, "trim" => \$trim, "srccol=i" => \$srccol, "destcol=i" => \$destcol, ); usage() if $usage; $srccol --; $destcol --; while (<>) { chomp; @line = split /\t/; $line[$srccol] =~ s/^ *| *$//g if $trim; $line[$destcol] =~ s/^ *| *$//g if $trim; next if $notimmer{$line[$srccol]}; $obs{$line[$srccol]}++; if (defined $seen{$line[$srccol]}) { if ($seen{$line[$srccol]} ne $line[$destcol]) { $notimmer{$line[$srccol]} = 1; } } else { $seen{$line[$srccol]} = $line[$destcol]; } } foreach my $k (keys %seen) { next if $notimmer{$k}; next if $obs{$k} < $minobs; print "$k\t$seen{$k}\t$obs{$k}\n"; }