#!/usr/bin/perl # The Missing Textutils, Ondrej Bojar, obo@cuni.cz # http://www.cuni.cz/~obo/textutils # # 'update_cols' updates stdin and produces stdout according to the given update # file. # All files are expected to be tab-delimited. # # $Id: update_cols,v 1.5 2005/10/10 07:33:50 bojar Exp $ # use Getopt::Long; sub usage { print STDERR "update_cols update_file stdout Options: --paste= ... comma delimited list of indices of cols in the pasted (update) file, with respect to the columns in the main file Use 0 to ignore the column. --keys= ... comma delimited set of col indices from pasted that serve as keys to decide whether the update is to be done on the current line --trim ... strip whitespace from data before mapping Limitations: Update file is read to the memory. Example: --keys=1 --paste=0,3,4 If the value of the column 1 in main input is equal to a value in the column 1 of a line in the update file, then the line of the update file is used as follows: the first column is not used, the second column is used to replace the column 3 and the third column is used to replace the column 4 of the input line. "; exit 1; } $pastecols = 0; $keycols = 0; $usage = 0; $trim = 0; GetOptions("help" => \$usage, "trim" => \$trim, "paste=s" => \$pastecols, "keys=s" => \$keycols); usage() if $usage; $mapfile = shift; usage() if !$mapfile || !$pastecols || !$keycols; @pastecols = map { $_ -1 } (split /[, ]+/, $pastecols); @keycols = map { $_-1 } (split /[, ]+/, $keycols); %index_in_paste_file = map { $i++; ($_, $i-1); } @pastecols; my $lastpastecol = 0; foreach my $c (@pastecols) { $lastpastecol = $c if $c > $lastpastecol; } #print STDERR join(":", %index_in_paste_file)."\n"; #print STDERR join(":", @pastecols)."\n"; #print STDERR join(":", @keycols)."\n"; open MF, "$mapfile" or die ("update_cols: Can't open update file: $mapfile"); while () { $nl++; chomp; my @line = (); if ($trim) { @line = map {$_=~s/^ *| *$//g; $_;} split /\t/; } else { @line = split /\t/; } #print STDERR join("#", @line)."\n"; $key = join("\t", map { #print STDERR "keycol $_, index_in_pastef $index_in_paste_file{$_}, " # ."value $line[$index_in_paste_file{$_}]\n"; $line[$_]} @keycols); # Unused variant, incomprehensible column indexing # $line[$index_in_paste_file{$_}]} @keycols); #print STDERR "Defining key: $key\n"; if (defined $mapping{$key}) { print STDERR "$mapfile:$nl:Key '$key' already defined.\n"; $err = 1; } else { $mapping{$key} = \@line; } } close MF; exit 1 if $err; $nl = 0; while (<>) { $nl++; chomp; @line = split /\t/; $key = join("\t", map {@line[$_]} @keycols); print STDERR "Using key: $key\n" if $verbose; if (defined $mapping{$key}) { my @update = @{$mapping{$key}}; print STDERR " update: @update\n" if $verbose; my $maxcol = $#line; $maxcol = $lastpastecol if $maxcol < $lastpastecol; for(my $pastecol=0; $pastecol <= $#pastecols; $pastecol++) { next if $pastecols[$pastecol] == -1; print STDERR "Replacing $pastecols[$pastecol] ($line[$pastecols[$pastecol]]) with $update[$pastecol]\n" if $verbose; $line[$pastecols[$pastecol]] = $update[$pastecol]; } #for my $col (0 .. $#line) { # if (defined $index_in_paste_file{$col}) { # print STDERR "Replacing $col ($line[$col]) with $update[$index_in_paste_file{$col}]\n"; # $line[$col] = $update[$index_in_paste_file{$col}] # } #} $used{$key} ++; } print join("\t", @line)."\n"; } exit; foreach my $k (keys %mapping) { next if $used{$k}; print STDERR "Unused update line: " .join("\t", @{$mapping{$k}})."\n" }