#!/usr/bin/perl
# The Missing Textutils, Ondrej Bojar, obo@cuni.cz
# http://www.cuni.cz/~obo/textutils
#
# Given a command and an input file, finds the subsection of the file where
# the command fails.
# Strategy:
# 1. check if the whole file works => nothing to search for
# 2. bisect and find the shortest 'head' of the file that has the problem
# (3. bisect and find the largest 'skip' of the given head that has the problem)
#
# $Id: bisect,v 1.1 2012-05-17 09:48:26 bojar Exp $

use strict;
use Getopt::Long;
use FileHandle;
use IPC::Open2;

$| = 1;

sub usage {
  print STDERR "bisect COMMAND FILE
";
  exit 1;
}

my $usage = 0;
GetOptions("help" => \$usage,
) or exit 1;
my $cmd = shift;
my $infile = shift;
usage() if $usage || !defined $cmd || ! defined $infile;

my $hdl = my_open($infile);
my $nr = 0;
$nr++ while (<$hdl>);
close($hdl);

if ($nr < 2) {
  print STDERR "Input file too short, nothing to do.\n";
  exit 0;
}

my $ok = check(0, $nr, $infile, $cmd);
if ($ok) {
  print STDERR "The whole file seems ok, nothing to do.\n";
  exit 0;
}

my $head = int($nr/2);
my $head = bisect_head(0, $nr, $infile, $cmd);
print STDERR "head -n$head $infile  # will show the problem\n";

#my $skip = bisect_skip(0, $head, $infile, $cmd);


sub bisect_head {
  my $a = shift;
  my $b = shift;
  my $infile = shift;
  my $cmd = shift;

  print STDERR "bisect $a $b\n";

  return $a if $a == $b;
  my $mid = $a+int(($b-$a)/2);
  return $b if $mid == $a || $mid == $b;

  my $ok = check(0, $mid, $infile, $cmd);
  if ($ok) {
    # this part succeeds, we need to check further the lower part
    return bisect_head($mid, $b, $infile, $cmd);
  } else {
    return bisect_head($a, $mid, $infile, $cmd);
  }
}

sub check {
  my $skip = shift;
  my $head = shift; # first apply the head, then apply the skip!
  my $infile = shift;
  my $cmd = shift;

  my $h = my_open($infile);
  open CMD, "| $cmd > /dev/null 2>/dev/null" or die "Can't launch $cmd";
  my $nr = 0;
  while (<$h>) {
    $nr++;
    last if $nr > $head;
    print CMD $_ if $nr > $skip;
  }
  close $h;
  my $status = close CMD;
  if ($status) {
    return 1; # ok
  } else {
    return 0; # failed
  }
}

sub my_open {
  my $f = shift;
  if ($f eq "-") {
    binmode(STDIN, ":utf8");
    return *STDIN;
  }

  die "Not found: $f" if ! -e $f;

  my $opn;
  my $hdl;
  my $ft = `file '$f'`;
  # file might not recognize some files!
  if ($f =~ /\.gz$/ || $ft =~ /gzip compressed data/) {
    $opn = "zcat '$f' |";
  } elsif ($f =~ /\.bz2$/ || $ft =~ /bzip2 compressed data/) {
    $opn = "bzcat '$f' |";
  } else {
    $opn = "$f";
  }
  open $hdl, $opn or die "Can't open '$opn': $!";
  binmode $hdl, ":utf8";
  return $hdl;
}
