#!/usr/bin/perl
# The Missing Textutils, Ondrej Bojar, obo@cuni.cz
# http://www.cuni.cz/~obo/textutils
#
# 'avg' prints the input and adds one more line: the average and standard
# deviation of the numbers in each column.
#
# $Id: avg,v 1.5 2011-04-27 07:03:04 bojar Exp $
#

use strict;
use Getopt::Long;

my $stddev = 1;
my $prec = 2;

GetOptions(
  "stddev!" => \$stddev,
  "prec=i" => \$prec,
) or exit 1;

my @sums;
my @cnts;
my @data;
my $nl=0;
my $unmatched = 0;
my $maxi = undef;

while (<>) {
  print;
  chomp;
  $nl++;
  my @line = split /\t/;
  for(my $i=0; $i <= $#line; $i++) {
    if ($line[$i] =~ /^\s*-?([0-9]+|[0-9]*\.[0-9]+)/) {
      if ($stddev) {
        $data[$i] = [] if $cnts[$i] == 0;
        push @{$data[$i]}, $line[$i];
      }
      $sums[$i] += $line[$i];
      $cnts[$i] ++;
    }
  }
  $unmatched ++ if defined $maxi && $maxi != $#line;
  $maxi = $#line if !defined $maxi || $maxi < $#line;
}

my @stddevs = ();
if ($stddev) {
  for(my $i=0; $i<=$maxi; $i++) {
    next if $cnts[$i] == 0; # no numbers in this column
    my $sqsum = 0;
    my $avg = $sums[$i]/$cnts[$i];
    foreach my $num (@{$data[$i]}) {
      $sqsum += ($num - $avg)**2;
    }
    $stddevs[$i] = sqrt($sqsum/$cnts[$i]);
  }
}
my @avgs = map {
             $cnts[$_] > 0
             ? sprintf("%.${prec}f", $sums[$_]/$cnts[$_])
               . ( $stddev
                 ? sprintf("±%.${prec}f", $stddevs[$_]) : ""
                 )
             : "-"
           } (0..$maxi);
print join("\t", @avgs)."\n";

die "Some lines were shorter!" if $unmatched;
