#!/usr/bin/perl
# The Missing Textutils, Ondrej Bojar, obo@cuni.cz
# http://www.cuni.cz/~obo/textutils
#
# 'unlcat' reverses what lcat does. It reads (the first) column of the file
# and appends all lines to a file with the same name as specified in the
# column.
#
# $Id: unlcat,v 1.3 2010-10-27 15:03:14 bojar Exp $
#

use strict;
use Getopt::Long;
use File::Path;
use File::Basename;

binmode(STDIN, ":utf8");
binmode(STDOUT, ":utf8");
binmode(STDERR, ":utf8");

my $col = 1;
my $changename = ""; # either appended, or * replaced with the colvalue
my $usage = 0;

GetOptions(
  "help" => \$usage,
  "col=i" => \$col,
  "changename=s" => \$changename,
);

if ($usage) {
  print STDERR "unlcat < input 
Produces a separate file for all values seen in column 1.
Options:
  --col=N   ... use column N instead of 1
  --changename=suffix | prefix*suffix   ... modify the column value before
                                            creating the file
";
  exit 1;
}

$col--;

my $data;

while (<>) {
  chomp;
  my @line = split /\t/, $_;
  my $key = splice(@line, $col, 1);
  push @{$data->{$key}}, [@line];
}

foreach my $key (sort keys %$data) {
  my $fname = $key.$changename;
  if ($changename =~ /\*/) {
    $fname = $changename;
    $fname =~ s/\*/$key/g;
  }
  my $outh = my_save($fname);
  foreach my $line (@{$data->{$key}}) {
    print $outh join("\t", @$line)."\n";
  }
  close $outh;
}

sub my_save {
  my $f = shift;
  if ($f eq "-") {
    binmode(STDOUT, ":utf8");
    return *STDOUT;
  }

  my $opn;
  my $hdl;
  # file might not recognize some files!
  if ($f =~ /\.gz$/) {
    $opn = "| gzip -c > '$f'";
  } elsif ($f =~ /\.bz2$/) {
    $opn = "| bzip2 > '$f'";
  } else {
    $opn = ">$f";
  }
  mkpath( dirname($f) );
  open $hdl, $opn or die "Can't write to '$opn': $!";
  binmode $hdl, ":utf8";
  return $hdl;
}
