#!/usr/bin/perl
# The Missing Textutils, Ondrej Bojar, obo@cuni.cz
# http://www.cuni.cz/~obo/textutils
#
# 'weakly_correlated Xcol Ycol OutCol < input > output' replaces OutCol in
# the given data with a numeric value expressing the distance of the point
# x,y (from Xcol and Ycol) from the diagonal. The diagonal is the diagonal 
# of the smallest rectangle containing all the data.
#
# $Id: weakly_correlated,v 1.2 2006/04/14 10:48:50 bojar Exp $

use strict;
use Getopt::Long;

my $abs = 0; # take absolute value only
my $skip = 0;
GetOptions(
  "abs"=>\$abs,
  "skip=i"=>\$skip,
);
my $xcol = shift;
my $ycol = shift;
my $outcol = shift;
die "usage: weakly_correlated Xcol Ycol OutCol < input > output"
  if !$xcol || !$ycol || !$outcol;

while ($skip>0) {
  $_ = <>;
  print;
  $skip--;
}

$xcol --;
$ycol --;
$outcol --;

my ($minx, $miny, $maxx, $maxy);
my @data;
while (<>) {
  chomp;
  my @line = split /\t/;
  my $x = $line[$xcol];
  my $y = $line[$ycol];

  $minx = $x if !defined $minx || $minx > $x;
  $miny = $y if !defined $miny || $miny > $y;
  $maxx = $x if !defined $maxx || $maxx < $x;
  $maxy = $y if !defined $maxy || $maxy < $y;

  push @data, [@line];
}

my $a = ($maxy-$miny)/($maxx-$minx);
my $b = $miny;

my $alp = atan2($maxy-$miny, $maxx-$minx);
my $cosalp = cos($alp);

foreach my $l (@data) {
  my $x = $l->[$xcol];
  my $y = $l->[$ycol];
  my $dy = $y - $a*$x +$b;
  my $dist = $cosalp * $dy;
  $dist = abs($dist) if $abs;
  $l->[$outcol] = $dist;
  print join("\t", @$l)."\n";
}
