#!/usr/bin/perl # The Missing Textutils, Ondrej Bojar, obo@cuni.cz # http://www.cuni.cz/~obo/textutils # # 'weakly_correlated Xcol Ycol OutCol < input > output' replaces OutCol in # the given data with a numeric value expressing the distance of the point # x,y (from Xcol and Ycol) from the diagonal. The diagonal is the diagonal # of the smallest rectangle containing all the data. # # $Id: weakly_correlated,v 1.2 2006/04/14 10:48:50 bojar Exp $ use strict; use Getopt::Long; my $abs = 0; # take absolute value only my $skip = 0; GetOptions( "abs"=>\$abs, "skip=i"=>\$skip, ); my $xcol = shift; my $ycol = shift; my $outcol = shift; die "usage: weakly_correlated Xcol Ycol OutCol < input > output" if !$xcol || !$ycol || !$outcol; while ($skip>0) { $_ = <>; print; $skip--; } $xcol --; $ycol --; $outcol --; my ($minx, $miny, $maxx, $maxy); my @data; while (<>) { chomp; my @line = split /\t/; my $x = $line[$xcol]; my $y = $line[$ycol]; $minx = $x if !defined $minx || $minx > $x; $miny = $y if !defined $miny || $miny > $y; $maxx = $x if !defined $maxx || $maxx < $x; $maxy = $y if !defined $maxy || $maxy < $y; push @data, [@line]; } my $a = ($maxy-$miny)/($maxx-$minx); my $b = $miny; my $alp = atan2($maxy-$miny, $maxx-$minx); my $cosalp = cos($alp); foreach my $l (@data) { my $x = $l->[$xcol]; my $y = $l->[$ycol]; my $dy = $y - $a*$x +$b; my $dist = $cosalp * $dy; $dist = abs($dist) if $abs; $l->[$outcol] = $dist; print join("\t", @$l)."\n"; }