#!/usr/bin/perl # The Missing Textutils, Ondrej Bojar, obo@cuni.cz # http://www.cuni.cz/~obo/textutils # # 'split_even' reads stdin saving the lines to N output files so that each of # the files will contain about the same number of lines. # # $Id: split_even,v 1.6 2008/10/29 14:58:35 bojar Exp $ # use Getopt::Long; use strict; my $usage = 0; my $numwidth = undef; my $gzip = 0; my $internal_gzip = 0; my $subdirs = 0; my $suffix = ""; GetOptions( "usage" => \$usage, "numwidth=i" => \$numwidth, "gzip" => \$gzip, "internal-gzip" => \$internal_gzip, "subdirs=i" => \$subdirs, "suffix=s" => \$suffix, ) or exit 1; my $parts = shift; my $prefix = shift; if ($usage || !defined $parts || !defined $prefix) { print STDERR "split_even NUM PREFIX < lines Options: --numwidth=2 ... override how many zeros to use in filename --gzip ... gzip output files using separate gzipping processes --internal-gzip ... gzip output files within main perl executable (much slower on multiprocessor machines, maybe always much slower) --subdirs=N ... generate the files not into the current directory but in N subdirs called PREFIX000 .. PREFIX(N-1) --suffix=STRING ... append this suffix to all created files "; exit 1; } if ($internal_gzip) { require IO::Compress::Gzip; } $numwidth = int(log($parts)/log(10)) + 1 if !defined $numwidth; my $dirnumwidth; $dirnumwidth = int(log($subdirs)/log(10)) + 1 if $subdirs; my @outfiles = (); for (my $i=1; $i<=$parts; $i++) { my $outfn = $prefix.sprintf("%0${numwidth}i", $i).$suffix; if ($subdirs) { my $dirname = $prefix.sprintf("%0${dirnumwidth}i", $i % $subdirs).$suffix; if (! -d $dirname) { mkdir $dirname or die "Can't create subdir $dirname"; } $outfn = $dirname."/".$outfn; } if ($internal_gzip) { $outfn .= ".gz"; $outfiles[$i-1] = new IO::Compress::Gzip $outfn or die "IO::Compress::Gzip failed to gzip to $outfn: $IO::Compress::Gzip::GzipError\n"; } elsif ($gzip) { $outfn .= ".gz"; open $outfiles[$i-1], "| gzip -c >$outfn" or die "Failed to gzip to $outfn"; } else{ open $outfiles[$i-1], ">$outfn" or die "Failed to write to $outfn"; } print STDERR "Output to $outfn\n"; } my $i = 0; while (<>) { my $fh = $outfiles[$i]; print $fh $_; $i++; $i %= $parts; } grep { close $_ } @outfiles;