#!/usr/bin/perl # The Missing Textutils, Ondrej Bojar, obo@cuni.cz # http://www.cuni.cz/~obo/textutils # # Given a file of blocks (chunks of text delimited by a blank line or a # specific delimiter), 'blockwise' runs an arbitrary command on each of the # blocks separately. # 'blockwise' can waste a lot of time, if there are lot of blocks in the input # and/or the command takes long to execute! # # $Id: blockwise,v 1.6 2006/09/08 16:45:03 bojar Exp $ use strict; use Getopt::Long; use FileHandle; use IPC::Open2; $| = 1; sub usage { print STDERR "blockwise COMMAND stdout --dots ... show progress dots every loaded line --delim=NEW_DELIMITER --auto-prefix ... use the first column of the first line in each block as a prefix for the output of the block --deprefix ... the command will not see the first column "; exit 1; } my $usage = 0; my $delim = ""; my $autoprefix = 0; my $deprefix = 0; my $dots = 0; GetOptions("help" => \$usage, "delim=s" => \$delim, "auto-prefix" => \$autoprefix, "deprefix" => \$deprefix, "dots" => \$dots ) or exit 1; my $cmd = shift; usage() if $usage || !defined $cmd; my $data = ""; my $prefix = undef; my $nr=0; while (<>) { $nr++; if ($dots) { print STDERR "." if $nr % 1000 == 0; print STDERR "($nr)" if $nr % 10000 == 0; } my $line = $_; chomp; if ($_ eq $delim) { runcmd($cmd, $data, $prefix) if $data ne ""; $data = ""; $prefix = undef; print "$delim\n"; } else { $prefix = $1 if !defined $prefix && $autoprefix && $line =~ /^([^\t]*)/; $line =~ s/^([^\t]*)\t// if $deprefix; $data .= $line; } } runcmd($cmd, $data, $prefix) if $data ne ""; sub runcmd { my $cmd = shift; my $data = shift; my $prefix = shift; my $pid = open2(*Reader, *Writer, $cmd ); print Writer $data; close Writer; while () { print "$prefix\t" if defined $prefix; print; } close Reader; waitpid $pid, 0; }