#!/usr/bin/perl # The Missing Textutils, Ondrej Bojar, obo@cuni.cz # http://www.cuni.cz/~obo/textutils # # 'pickre' is extremely useful to collect specific information from every line. # The $1 output of a given regexp is prepended (tab-delimited) at the beginning # of every line. # # $Id: pickre,v 1.6 2009-04-02 11:58:45 bojar Exp $ # use strict; use Getopt::Long; my $re = 0; my $collect = 0; my $uniq = 0; my $pick = 0; my $delim = " "; my $cut = 0; my $col = undef; GetOptions( "re=s"=>\$re, "delim=s"=>\$delim, "collect"=>\$collect, "uniq"=>\$uniq, "pick"=>\$pick, "cut"=>\$cut, "col=i"=>\$col, ); $collect = 1 if $uniq; usage() if !$re; sub usage { print STDERR "pickre --re=what_to_pick Searches for a token on every line and precedes the line with an extra column containing the token (if found). Options: --collect ... delimit with space all collected tokens --uniq ... ignore number of occurrences and order of found tokens (implies collect) --pick ... print only lines where something was indeed found --delim=' ' ... join multiple tokens of output with this delimiter --cut ... don't append the original line (used to be called just-output --col=N ... pick only from column N (numbered from 1) "; exit 1; } $re = "($re)" if $re !~ /[^\\]\(|^\(/; while (<>) { my $line = $_; chomp $line; my @toks = (); my $done = 0; my $haystack; if (defined $col) { my @cols = split /\t/, $line; $haystack = $cols[$col-1]; } else { $haystack = $line; } while (($#toks < 0 || $collect) && !$done) { if ($haystack =~ /$re/o) { push @toks, $1; $haystack =~ s/$re//o; } else { $done = 1; } } my @usetoks; if ($uniq) { my %tmp = map {($_,1)} @toks; @usetoks = sort keys %tmp; } else { @usetoks = @toks; } my $info = join($delim, @usetoks); if (!$pick || scalar @usetoks) { print "$info"; if ($cut) { print "\n"; } else { print "\t$line\n"; } } }