#!/usr/bin/perl # The Missing Textutils, Ondrej Bojar, obo@cuni.cz # http://www.cuni.cz/~obo/textutils # # 'unlcat' reverses what lcat does. It reads (the first) column of the file # and appends all lines to a file with the same name as specified in the # column. # # $Id: unlcat,v 1.3 2010-10-27 15:03:14 bojar Exp $ # use strict; use Getopt::Long; use File::Path; use File::Basename; binmode(STDIN, ":utf8"); binmode(STDOUT, ":utf8"); binmode(STDERR, ":utf8"); my $col = 1; my $changename = ""; # either appended, or * replaced with the colvalue my $usage = 0; GetOptions( "help" => \$usage, "col=i" => \$col, "changename=s" => \$changename, ); if ($usage) { print STDERR "unlcat < input Produces a separate file for all values seen in column 1. Options: --col=N ... use column N instead of 1 --changename=suffix | prefix*suffix ... modify the column value before creating the file "; exit 1; } $col--; my $data; while (<>) { chomp; my @line = split /\t/, $_; my $key = splice(@line, $col, 1); push @{$data->{$key}}, [@line]; } foreach my $key (sort keys %$data) { my $fname = $key.$changename; if ($changename =~ /\*/) { $fname = $changename; $fname =~ s/\*/$key/g; } my $outh = my_save($fname); foreach my $line (@{$data->{$key}}) { print $outh join("\t", @$line)."\n"; } close $outh; } sub my_save { my $f = shift; if ($f eq "-") { binmode(STDOUT, ":utf8"); return *STDOUT; } my $opn; my $hdl; # file might not recognize some files! if ($f =~ /\.gz$/) { $opn = "| gzip -c > '$f'"; } elsif ($f =~ /\.bz2$/) { $opn = "| bzip2 > '$f'"; } else { $opn = ">$f"; } mkpath( dirname($f) ); open $hdl, $opn or die "Can't write to '$opn': $!"; binmode $hdl, ":utf8"; return $hdl; }