#!/usr/bin/perl # The Missing Textutils, Ondrej Bojar, obo@cuni.cz # http://www.cuni.cz/~obo/textutils # # 'sample_nth' reads stdin (loads whole in!) and produces FILECOUNT files # each with N lines taking evenly selected lines from the input. # The remaining lines are printed to stdout. # Use '%i' in outname as the placeholder for filecount # # $Id: sample_nth,v 1.2 2007/05/18 08:25:29 bojar Exp $ # use strict; use Getopt::Long; use IO::File; my $usage = 0; my $filecount = 1; my $debug = 0; GetOptions( "help" => \$usage, "debug" => \$debug, "files=i" => \$filecount, ); my $lines = shift; my $outname = shift; if ($usage || !defined $lines || !defined $filecount || !defined $outname) { print STDERR "sample_nth N OUTFILENAME < input > remaining_lines ...reads stdin (loads whole in!) and produces FILECOUNT files each with N lines taking evenly selected lines from the input. The remaining lines are printed to stdout. Use '\%i' in outname as the placeholder for filecount Options: --files=FILECOUNT ... default: 1 "; exit 1; } my @streams = map { my $ofn = sprintf(">$outname", $_-1); IO::File->new($ofn) or die "Can't write $ofn"; } (1..$filecount); # load whole data my @data; while (<>) { push @data, $_; } my $nth = (scalar(@data)/$lines); my @saved = (); my $streamid = -1; for(my $i=0; $i<@data; $i++) { # my $rem = roundremainder($i, $nth); my $rem = $i % $nth; print STDERR "$i % $nth: $rem" if $debug; if ($rem < $filecount && $saved[$rem] < $lines) { *STREAM= $streams[$rem]; print STDERR ", fileid $rem\n" if $debug; print STREAM $data[$i]; $saved[$rem]++; } else { print STDERR "\n" if $debug; $saved[$filecount]++; print $data[$i]; } } for(my $i=0; $i<@saved; $i++) { print STDERR "File $i\t$saved[$i]\n"; } sub roundremainder { my $i = shift; my $div = shift; return sprintf("%.0f", $i-$div*int($i/$div)); }