#!/usr/bin/perl # The Missing Textutils, Ondrej Bojar, obo@cuni.cz # http://www.cuni.cz/~obo/textutils # # 'tdf_to_xml' converts stdin to stdout, wrapping tab-delimited data with # HTML/XML tags: # <table><tr num="rowindex"><td num="colindex">... # # $Id: tdf_to_xml,v 1.6 2007/02/06 02:01:10 bojar Exp $ # use strict; use Getopt::Long; sub usage { print STDERR "tdf_to_html < tab-delimited > table.xml Options: --border=i ... border width --noescape ... do not escape <>& --format=html|docbook ... tags are name table/tr/td or tbody/row/entry "; exit 1; } my $usage = 0; my $noesc = 0; my $border = 0; my $format = "html"; GetOptions( "help" => \$usage, "border=i" => \$border, "noescape" => \$noesc, "format=s" => \$format, ) or usage(); usage() if $usage; my ($table, $tr, $td, $supports_border); if ($format eq "html") { ($table, $tr, $td, $supports_border) = ("table", "tr", "td", 1); } elsif ($format eq "docbook") { ($table, $tr, $td, $supports_border) = ("tbody", "row", "entry", 0); } else { die "Unrecognized format $format"; } if ($supports_border) { print "<$table border='$border'>\n"; } else { print "<$table>\n"; } my $nr = 0; while (<>) { $nr++; chomp; print " <$tr num='$nr'>\n"; my $nd = 0; foreach my $d (split /\t/) { $nd ++; if (!$noesc) { $d =~ s/&/&/g; $d =~ s//>/g; } print " <$td num='$nd'>$d\n"; } print " \n"; } print "\n";