#!/usr/bin/perl # The Missing Textutils, Ondrej Bojar, obo@cuni.cz # http://www.cuni.cz/~obo/textutils # # 'avg' prints the input and adds one more line: the average and standard # deviation of the numbers in each column. # # $Id: avg,v 1.3 2010-11-04 14:17:57 bojar Exp $ # use strict; use Getopt::Long; my $stddev = 1; my $prec = 2; GetOptions( "stddev!" => \$stddev, "prec=i" => \$prec, ) or exit 1; my @sums; my @cnts; my @data; my $nl=0; my $unmatched = 0; my $maxi = undef; while (<>) { print; chomp; $nl++; my @line = split /\t/; for(my $i=0; $i <= $#line; $i++) { if ($line[$i] =~ /^-?([0-9]+|[0-9]*\.[0-9]+)/) { if ($stddev) { $data[$i] = [] if $cnts[$i] == 0; push @{$data[$i]}, $line[$i]; } $sums[$i] += $line[$i]; $cnts[$i] ++; } } $unmatched ++ if defined $maxi && $maxi != $#line; $maxi = $#line if !defined $maxi || $maxi < $#line; } my @stddevs = (); if ($stddev) { for(my $i=0; $i<=$maxi; $i++) { next if $cnts[$i] == 0; # no numbers in this column my $sqsum = 0; my $avg = $sums[$i]/$cnts[$i]; foreach my $num (@{$data[$i]}) { $sqsum += ($num - $avg)**2; } $stddevs[$i] = sqrt($sqsum/$cnts[$i]); } } my @avgs = map { $cnts[$_] > 0 ? sprintf("%.${prec}f", $sums[$_]/$cnts[$_]) . ( $stddev ? sprintf("±%.${prec}f", $stddevs[$_]) : "" ) : "-" } (0..$maxi); print join("\t", @avgs)."\n"; die "Some lines were shorter!" if $unmatched;