#!/usr/bin/perl # The Missing Textutils, Ondrej Bojar, obo@cuni.cz # http://www.cuni.cz/~obo/textutils # # 'picktags' is a lazy hack for lazy programmers. It extracts values from # specified SGML tags, without actually checking anything about the SGML. # # You'd probably find LT XML tools much more versatile: # http://www.ltg.ed.ac.uk/software/xml/index.html # # $Id: picktags,v 1.5 2008/12/12 10:39:46 bojar Exp $ # use strict; use Getopt::Long; my $help = 0; my $pick = 0; GetOptions( "help"=>\$help, "pick"=>\$pick, # print only lines where something was found ) or exit 1; my $tagcnt = 0; my @tags = (); while (my $tag = shift) { push @tags, $tag; $tagcnt++; } if ($help || !$tagcnt) { print STDERR "usage: picktags \"MMt.*?\" ...\n outputs tab-separated file of *first* values of matching tags. Tags assumed *non*pair. Beware using greedy *! It would eat up also the end of the tag. "; exit 1; } while (<>) { chomp; my $line = $_; my @outline = (); my $got_some = 0; foreach my $tag (@tags) { if ($line =~ /<$tag>([^<]*)/) { push @outline, $1; $got_some = 1; } else { push @outline, ""; } } print join("\t", @outline)."\n" if !$pick || $got_some; }