#!/usr/bin/perl
# The Missing Textutils, Ondrej Bojar, obo@cuni.cz
# http://www.cuni.cz/~obo/textutils
#
# 'parfiles' builds a table of filepathnames aligned by a common substring in
# the filename. The first argument is a regexp scanning for the identifier.
# All the following arguments are understood as 'globs', i.e. wildcard
# expressions each denoting a set of files. The wildcard are expanded, all
# pathnames are scanned for the identifier and all the files are aligned.
#
# The output contains all the identifiers in the first column and the following
# columns are devoted to the files selected by the respective arguments.
#
# $Id: parfiles,v 1.3 2006/09/08 10:15:59 bojar Exp $
#

use strict;
use Getopt::Long;
use File::Glob ':glob';

sub usage {
  print STDERR "parfiles regexp glob-or-filelist-1 glob-or-fileslist-2 ...
Options:
--matching ... ignore files that do not have all the corresponding files
";
  exit 1;
}

my $usage = 0;
my $just_matching = 0;
GetOptions("help" => \$usage,
  "matching" => \$just_matching,
) or exit(1);

my $re = shift;
usage() if $usage || !defined $re;

my @globs = @ARGV;
# eval globs or load filelist
my @files = map { glob_or_load($_) } @globs;

sub glob_or_load {
  my $glob_or_filename = shift;

  if ($glob_or_filename =~ /[\*\[\]]/) {
    # print STDERR "Globbing: $glob_or_filename\n";
    return [bsd_glob($glob_or_filename)];
  } else {
    # print STDERR "Loading: $glob_or_filename\n";
    open FL, $glob_or_filename or die "Can't read $glob_or_filename";
    my @out = ();
    while (<FL>) {
      chomp;
      push @out, $_;
    }
    return [@out];
  }
}

my %by_id;
for(my $col=0; $col<scalar @files; $col++) {
  foreach my $f (@{$files[$col]}) {
    if ($f =~ /$re/o) {
      my $id = $1;
      if (defined $by_id{$id}->[$col]) {
        print STDERR "$f: ID '$id' already occupied by $by_id{$id}->[$col], ignoring the file.\n";
      } else {
        $by_id{$id}->[$col] = $f;
      }
    } else {
      print STDERR "$f: No ID found, ignoring the file.\n";
    }
  }
}

foreach my $id (sort keys %by_id) {
  my @filenames = map {$by_id{$id}->[$_]} (0..$#files);
  next if $just_matching && 0<scalar map {defined $_?():(1)} @filenames;
  print join("\t", ($id, @filenames))."\n";
}
