#!/usr/bin/perl
# The Missing Textutils, Ondrej Bojar, obo@cuni.cz
# http://www.cuni.cz/~obo/textutils
#
# 'picktags' is a lazy hack for lazy programmers. It extracts values from
# specified SGML tags, without actually checking anything about the SGML.
#
# You'd probably find LT XML tools much more versatile:
#   http://www.ltg.ed.ac.uk/software/xml/index.html
#
# $Id: picktags,v 1.5 2008/12/12 10:39:46 bojar Exp $
#

use strict;
use Getopt::Long;

my $help = 0;
my $pick = 0;
GetOptions(
  "help"=>\$help,
  "pick"=>\$pick, # print only lines where something was found
) or exit 1;

my $tagcnt = 0;
my @tags = ();
while (my $tag = shift) {
  push @tags, $tag;
  $tagcnt++;
}
if ($help || !$tagcnt) {
  print STDERR "usage: picktags \"MMt.*?\" ...\n
outputs tab-separated file of *first* values of matching tags.
Tags assumed *non*pair.
Beware using greedy *! It would eat up also the end of the tag.
";
  exit 1;
}

while (<>) {
  chomp;
  my $line = $_;
  my @outline = ();
  my $got_some = 0;
  foreach my $tag (@tags) {
    if ($line =~ /<$tag>([^<]*)/) {
      push @outline, $1;
      $got_some = 1;
    } else {
      push @outline, "";
    }
  }
  print join("\t", @outline)."\n"
    if !$pick || $got_some;
}
