#!/usr/bin/perl
# The Missing Textutils, Ondrej Bojar, obo@cuni.cz
# http://www.cuni.cz/~obo/textutils
#
# 'colgrep' is just like grep (but using Perl regular expressions, not grep
# ones), but it checks only specified tab-delimited columns of the file.
#
# $Id: colgrep,v 1.6 2006/08/18 15:09:26 bojar Exp $
#

use Getopt::Long;
use strict;

my $inverse = 0;
my $extrare = 0;
my $skip = 0;
my $help = 0;
GetOptions(
  "inverse" => \$inverse,
  "help" => \$help,
  "skip=i" => \$skip,
  're=s'=>\$extrare,
) or exit 1;

my $cols=shift;
my $re;
if ($extrare || $extrare eq "0") {
  $re = $extrare;
} else {
  $re=shift;
}

if ($help || !$cols || !($re || $re eq "0")) {
  print STDERR "usage: colgrep 1,2,3 --re=\"re_to_lookup\"
Options:
  --inverse   ... print lines where the RE was not found
  --skip=N    ... skip first N lines
";
  exit 1;
}

my @cols = split /,/, $cols;
my %cols;
foreach my $col (@cols) {
  $cols{$col-1} = 1;
}

print STDERR "Searching cols $cols for $re.\n";

while ($skip > 0) {
  my $l = <>;
  print $l;
  $skip --;
}


while (<>) {
  my @line = split /\t/;
  my $pattern = "";
  foreach my $col (sort keys %cols) {
    $pattern .= "$line[$col]\t";
  }
  chop $pattern;
  my $hit = ($pattern =~ /$re/);
  print if ($hit && !$inverse) || (!$hit && $inverse);
}

