#!/usr/bin/perl
# The Missing Textutils, Ondrej Bojar, obo@cuni.cz
# http://www.cuni.cz/~obo/textutils
#
# 'tdf_to_xml' converts stdin to stdout, wrapping tab-delimited data with
# HTML/XML tags:
# &lt;table&gt;&lt;tr num="rowindex"&gt;&lt;td num="colindex"&gt;...
#
# $Id: tdf_to_xml,v 1.7 2013-09-21 12:46:51 bojar Exp $
#


use strict;
use Getopt::Long;

sub usage {
  print STDERR "tdf_to_html < tab-delimited > table.xml
Options:
  --border=i  ... border width
  --noescape  ... do not escape <>&
  --format=html|docbook  ... tags are name table/tr/td or tbody/row/entry
";
  exit 1;
}

my $usage = 0;
my $noesc = 0;
my $border = 0;
my $format = "html";
my $tr_per_line = 0;
GetOptions(
  "help" => \$usage,
  "border=i" => \$border,
  "noescape" => \$noesc,
  "tr-per-line!" => \$tr_per_line,
  "format=s" => \$format,
) or usage();
usage() if $usage;

my ($table, $tr, $td, $supports_border);
if ($format eq "html") {
  ($table, $tr, $td, $supports_border) = ("table", "tr", "td", 1);
} elsif ($format eq "docbook") {
  ($table, $tr, $td, $supports_border) = ("tbody", "row", "entry", 0);
} else {
  die "Unrecognized format $format";
}

if ($supports_border) {
  print "<$table border='$border'>\n";
} else {
  print "<$table>\n";
}
my $nr = 0;
while (<>) {
  $nr++;
  chomp;
  print "  <$tr num='$nr'>";
  print "\n" if !$tr_per_line;
  my $nd = 0;
  foreach my $d (split /\t/) {
    $nd ++;
    if (!$noesc) {
      $d =~ s/&/&amp;/g;
      $d =~ s/</&lt;/g;
      $d =~ s/>/&gt;/g;
    }
    print "    <$td num='$nd'>$d</$td>";
    print "\n" if !$tr_per_line;
  }
  print "  </$tr>\n";
}
print "</$table>\n";
