#!/usr/bin/perl
# The Missing Textutils, Ondrej Bojar, obo@cuni.cz
# http://www.cuni.cz/~obo/textutils
#
# Given a file of blocks (delimited by blank-lines), 'deblockize' returns a
# tab-delimited file. Each output line corresponds to one input block.
#
# See also 'blockize' and consider deblockize | grep | blockize.
#
# $Id: deblockize,v 1.4 2010-11-02 17:06:04 bojar Exp $

use Getopt::Long;
use strict;

sub usage {
  print STDERR "deblockize <stdin >stdout
  --delim=''  ... a line matching this delimits blocks in input
  --field-delimiter='\\t'  ... must not appear in input
";
  exit 1;
}

my $usage = 0;
my $delim = "";
my $field_delim = "\t";
GetOptions(
  "help" => \$usage,
  "delim=s" => \$delim,
  "field-delimiter=s" => \$field_delim,
) or exit 1;
usage() if $usage;

my $start = 1;
my $nl = 0;
my $err = 0;
while (<>) {
  $nl++;
  chomp;
  if (/\Q$field_delim/) {
    print STDERR "$nl:Warning, --field-delimiter='$field_delim' seen in input!\n"
      if $err < 20;
    $err++;
  }
  if ($_ eq $delim) {
    print "\n";
    $start = 1;
  } else {
    next if $start && $_ eq $delim; # consume blank input blocks
    print $field_delim if !$start;
    print "$_";
    $start = 0;
  }
}

die "There were $err errors in total." if $err;
