#!/usr/bin/perl # The Missing Textutils, Ondrej Bojar, obo@cuni.cz # http://www.cuni.cz/~obo/textutils # # 'map' processes stdin to stdout, changing specified columns or expressions # according to a mapping given in a file or on the command line. # # $Id: map,v 1.15 2010-07-25 20:14:44 bojar Exp $ # use strict; use Getopt::Long; sub usage { print STDERR "map mapping_file stdout Options: --srccol=X ... source of the mapping, a column number of the mapping file ... default is the first column --tgtcol=X ... destination of the mapping, a column number of the m. file ... default is the second column (deprecated name: --destcol=X) --mapcols=X,Y... columns of stdin to be altered, default: all --pattern=RE ... map all occurrences of the given pattern ... default is to map exactly the given col --trim ... strip whitespace from data before mapping --default=S ... use this value, if the mapping doesn't define anything --quiet ... no warnings to stderr --restrict ... suppress the while input line, if something was not mapped! --map=PERLARRAY ... instead of mapping_file, one can specify the mapping as perl array, such as: '\"green\"=>1,\"red\"=>2' Limitations: Pattern can never contain . Mapping file is all read. "; exit 1; } my $default = undef; my $restrict = 0; my $mapcols = 0; my $srccol = 1; my $destcol = 2; my $pattern = 0; my $usage = 0; my $quiet = 0; my $trim = 0; my $skip = 0; my $must_use_all_mappings = 0; my $debug = 0; my $mapdata = 0; GetOptions("help" => \$usage, "mapcols=s" => \$mapcols, "trim" => \$trim, "srccol=i" => \$srccol, "tgtcol|destcol=i" => \$destcol, "map=s" => \$mapdata, "default=s" => \$default, "skip=i" => \$skip, "restrict" => \$restrict, "must-use-all-mappings" => \$must_use_all_mappings, "pattern=s" => \$pattern, "quiet" => \$quiet, ) or exit 1; usage() if $usage; $srccol --; $destcol --; my @mapcols = map { $_ -1 } (split /[, ]+/, $mapcols) if $mapcols; my %is_mapped; my %mapping; if (!$mapdata) { my $mapfile = shift; usage() if !$mapfile; open MF, "$mapfile" or die ("map: Can't open mapping file: $mapfile"); my $nl=0; while () { $nl++; chomp; my @line = split /\t/; $line[$srccol] =~ s/^ *| *$//g if $trim; $line[$destcol] =~ s/^ *| *$//g if $trim; if ($is_mapped{$line[$srccol]} && $mapping{$line[$srccol]} ne $line[$destcol]) { print STDERR "$mapfile:$nl:Ambigous mapping for >$line[$srccol]<\n" if !$quiet; } else { $is_mapped{$line[$srccol]} = 1; $mapping{$line[$srccol]} = $line[$destcol]; } } close MF; } else { eval '%mapping = ('.$mapdata.')'; foreach my $k (keys %mapping) { $is_mapped{$k} = 1; } } my $nl = 0; while ($skip > 0) { my $l = <>; print $l; $skip--; $nl++; } my $line_mapped; my %mapping_used; while (<>) { $nl++; $line_mapped = 1; chomp; my @line = split /\t/; if ($mapcols) { for my $col (@mapcols) { $line[$col] =~ s/^ *| *$//g if $trim; $line[$col] = do_map($line[$col]); } } else { for my $col (0 .. $#line) { $line[$col] =~ s/^ *| *$//g if $trim; $line[$col] = do_map($line[$col]); } } print join("\t", @line)."\n" if !$restrict || $line_mapped; } if ($must_use_all_mappings) { my $err = 0; foreach my $s (sort keys %mapping) { next if $mapping_used{$s}; print STDERR "Unused mapping: $s\t$mapping{$s}\n"; $err++; } exit 1 if $err; } sub perform_map { my $s = shift; print "MAPPING: $s ..> $mapping{$s}\n" if $debug; if ($is_mapped{$s}) { $mapping_used{$s}++; return $mapping{$s}; } $line_mapped = 0; if (!$quiet) { print STDERR "$nl:W:No mapping for >$s<" .(defined $default ? ", using the default >$default<" : "") ."\n"; } return $default if defined $default; return $s; } sub do_map { my $s = shift; if ($pattern) { if ($pattern =~ /(^|[^\\])\(/) { # pattern contains selector $s =~ s/$pattern/perform_map $1/ge; } else { # no selector in pattern, select the whole result $s =~ s/($pattern)/perform_map $1/ge; } return $s; } else { return perform_map($s); } }