#!/usr/bin/perl # The Missing Textutils, Ondrej Bojar, obo@cuni.cz # http://www.cuni.cz/~obo/textutils # # 'tpr_tnr' reads a column in the input, calculating true positive rate # and true negative rate at various cut-offs. The column is expected to # contain 1 in case of a positive example and 0 in case of negative example. # If the lines are in perfect order, all positives come before all negatives. # Use this script on files sorted non-perfectly to examine the accuracy of # the sorting compared to the golden positive-negative dichotomy. # # $Id: tpr_tnr,v 1.1 2006/04/09 22:40:16 bojar Exp $ use strict; use Getopt::Long; my $usage = 0; my $skip = 0; # skip n lines before putting the headline my $step = 30; # produce stats every step lines GetOptions( "help"=>\$usage, "skip=i"=>\$skip, "step=i"=>\$step, ); my $col = shift; die "usage: tpr_tnr < infile > outfile" if !$col || $usage; if ($skip) { while (<>) { $skip--; print; last if $skip == 0; } } $col--; my @data = (); while (<>) { my $line = $_; chomp; my @cols = split /\t/; my $g = $cols[$col]; push @data, [ $g, $line ]; } for my $i (0..$#data) { my ($g, $line) = @{$data[$i]}; if ($i % $step == 0) { my %stat = (); for my $j (0..$#data) { my ($g, undef) = @{$data[$j]}; my $pred = ($j<$i) ? 1 : 0; $stat{"p$pred, g$g"}++; } foreach my $k (keys %stat) { print "$k: $stat{$k} "; } print "\n"; my $p = $stat{"p0, g1"} + $stat{"p1, g1"}; my $n = $stat{"p0, g0"} + $stat{"p1, g0"}; printf "TPR: %.2f ", $stat{"p1, g1"}/$p*100 if $p>0; printf "TNR: %.2f ", $stat{"p0, g0"}/$n*100 if $n>0; my $predp = $stat{"p1, g0"} + $stat{"p1, g1"}; printf "Prec: %.2f ", $stat{"p1, g1"}/$predp*100 if $predp>0; printf "Rec: %.2f ", $stat{"p1, g1"}/$p*100 if $p>0; print "\n"; } print $line; }