#!/usr/bin/perl
# The Missing Textutils, Ondrej Bojar, obo@cuni.cz
# http://www.cuni.cz/~obo/textutils
#
# Given a file of blocks (chunks of text delimited by a blank line or a
# specific delimiter), 'blockwise' runs an arbitrary command on each of the
# blocks separately.

# 'blockwise' can waste a lot of time, if there are lot of blocks in the input
# and/or the command takes long to execute!
#
# $Id: blockwise,v 1.6 2006/09/08 16:45:03 bojar Exp $

use strict;
use Getopt::Long;
use FileHandle;
use IPC::Open2;

$| = 1;

sub usage {
  print STDERR "blockwise COMMAND <stdin >stdout
  --dots         ... show progress dots every loaded line
  --delim=NEW_DELIMITER
  --auto-prefix  ... use the first column of the first line in each block
                     as a prefix for the output of the block
  --deprefix ... the command will not see the first column
";
  exit 1;
}

my $usage = 0;
my $delim = "";
my $autoprefix = 0;
my $deprefix = 0;
my $dots = 0;
GetOptions("help" => \$usage, "delim=s" => \$delim,
  "auto-prefix" => \$autoprefix,
  "deprefix" => \$deprefix,
  "dots" => \$dots
) or exit 1;
my $cmd = shift;
usage() if $usage || !defined $cmd;
 
my $data = "";
my $prefix = undef;
my $nr=0;
while (<>) {
  $nr++;
  if ($dots) {
    print STDERR "." if $nr % 1000 == 0;
    print STDERR "($nr)" if $nr % 10000 == 0;
  }
  my $line = $_;
  chomp;
  if ($_ eq $delim) {
    runcmd($cmd, $data, $prefix) if $data ne "";
    $data = "";
    $prefix = undef;
    print "$delim\n";
  } else {
    $prefix = $1 if !defined $prefix && $autoprefix && $line =~ /^([^\t]*)/;
    $line =~ s/^([^\t]*)\t// if $deprefix;
    $data .= $line;
  }
}
runcmd($cmd, $data, $prefix) if $data ne "";

sub runcmd {
  my $cmd = shift;
  my $data = shift;
  my $prefix = shift;

  my $pid = open2(*Reader, *Writer, $cmd );
  print Writer $data;
  close Writer;
  while (<Reader>) {
    print "$prefix\t" if defined $prefix;
    print;
  }
  close Reader;
  waitpid $pid, 0;
}
