#!/bin/env perl

=pod

=head1 NAME

resample - resample a data track

=head1 SYNOPSIS

  cat track.txt | resample -bin 1e6 [-avg] [-min] [-max]

=head1 DESCRIPTION

=head1 HISTORY

=over

=item * 6 Dec 2010

First version.

=back 

=head1 BUGS

=head1 AUTHOR

=head1 CONTACT

=cut

use strict;
use warnings FATAL=>"all";

use Carp;
use Config::General;
use Cwd qw(getcwd abs_path);
use File::Basename;
use FindBin;
use Getopt::Long;
use Pod::Usage;
use Time::HiRes qw(gettimeofday tv_interval);
use lib "$FindBin::RealBin";
use lib "$FindBin::RealBin/../lib";
use lib "$FindBin::RealBin/lib";

our (%OPT,%CONF,$conf);
our $VERSION = 0.01;

# common and custom module imports below

#use Data::Dumper;
#use IO::File;
#use List::Util;
#use List::MoreUtils;
use Math::VecStat qw(sum min max average);
#use Set::IntSpan;
#use Statistics::Descriptive;
#use Storable;

# read and parse configuration file
_parse_config();

# to see what was passed in on the command line (%OPT) 
# or the configuration tree %CONF created from .conf files and %OPT 
# uncomment next line
#printdumper(\%OPT,\%CONF);

my $fh = get_fh();

my %data;
while(<$fh>) {
    chomp;
    my ($chr,$start,$end,$value) = split;
    my $bin = int( ( ($start+$end)/2 ) / $CONF{bin} );
    push @{$data{$chr}{$bin}}, {start=>$start,
				end=>$end,
				values=>[split(",",$value)]};
}

for my $chr (sort keys %data) {
    for my $bin (sort {$a <=> $b} keys %{$data{$chr}}) {
	# lists of values in this bin
	my @values = map { $_->{values} } @{$data{$chr}{$bin}};
	# partition values into individual lists
	my @valuelists;
	for my $value (@values) {
	    for my $i (0..@$value-1) {
		push @{$valuelists[$i]}, $value->[$i];
	    }
	}
	# 
	my @binnedvalues;
	for my $valuelist (@valuelists) {
	    my $binnedvalue;
	    if($CONF{avg}) {
		$binnedvalue = average(@$valuelist);
	    } elsif($CONF{sum}) {
		$binnedvalue = sum(@$valuelist);
	    } elsif($CONF{min}) {
		$binnedvalue = min(@$valuelist);
	    }elsif ($CONF{max}) {
		$binnedvalue = max(@$valuelist);
	    } else {
		die "Must specify how to bin data using one of: -avg, -sum, -max, -min";
	    }
	    push @binnedvalues, $binnedvalue;
	}
	printinfo($chr,
		  $bin*$CONF{bin},
		  ($bin+1)*$CONF{bin}-1,
		  join(",",@binnedvalues));
    }
}

sub get_fh {
  my $fh;
  if ( my $file = $CONF{file} ) {
      $fh = IO::File->new($CONF{file});
  }
  else {
      $fh = \*STDIN;
  }
  return $fh;
}

sub validateconfiguration {
    die "Must specify bin size using -bin" unless $CONF{bin};
}

################################################################
#
# *** DO NOT EDIT BELOW THIS LINE ***
#
################################################################

sub _parse_config {
  my $dump_debug_level = 3;
  GetOptions(\%OPT, 
	     "file=s",
	     "bin=f",
	     "sum","avg","min","max",
	     "configfile=s",
	     "help",
	     "man",
	     "debug:i");
  pod2usage() if $OPT{help};
  pod2usage(-verbose=>2) if $OPT{man};
  loadconfiguration($OPT{configfile});
  populateconfiguration(); # copy command line options to config hash
  validateconfiguration(); 
  if(defined $CONF{debug} && $CONF{debug} == $dump_debug_level) {
    $Data::Dumper::Indent    = 2;
    $Data::Dumper::Quotekeys = 0;
    $Data::Dumper::Terse     = 0;
    $Data::Dumper::Sortkeys  = 1;
    $Data::Dumper::Varname = "OPT";
    printdumper(\%OPT);
    $Data::Dumper::Varname = "CONF";
    printdumper(\%CONF);
    exit;
  }
}

sub populateconfiguration {
  for my $var (keys %OPT) {
    $CONF{$var} = $OPT{$var};
  }
  repopulateconfiguration(\%CONF);
}

sub repopulateconfiguration {
  my $root     = shift;
	return unless ref($root) eq "HASH";
  for my $key (keys %$root) {
    my $value = $root->{$key};
    if(ref($value) eq "HASH") {
      repopulateconfiguration($value);
    } elsif (ref($value) eq "ARRAY") {
      for my $item (@$value) {
        repopulateconfiguration($item);
      }
    } elsif(defined $value) {
      while($value =~ /__([^_].+?)__/g) {
        my $source = "__" . $1 . "__";
        my $target = eval $1;
        $value =~ s/\Q$source\E/$target/g;
      }
      $root->{$key} = $value;
    }
  }
}

################################################################
#
#

sub loadconfiguration {
  my $file = shift;
  if(defined $file) {
    if(-e $file && -r _) {
      # provided configuration file exists and can be read
      $file = abs_path($file);
    } else {
      confess "The configuration file [$file] passed with -configfile does not exist or cannot be read.";
    }
  } else {
    # otherwise, try to automatically find a configuration file
    my ($scriptname,$path,$suffix) = fileparse($0);
    my $cwd     = getcwd();
    my $bindir  = $FindBin::RealBin;
    my $userdir = $ENV{HOME};
    my @candidate_files = (
			   "$cwd/$scriptname.conf",
			   "$cwd/etc/$scriptname.conf",
			   "$cwd/../etc/$scriptname.conf",
			   "$bindir/$scriptname.conf",
			   "$bindir/etc/$scriptname.conf",
			   "$bindir/../etc/$scriptname.conf",
			   "$userdir/.$scriptname.conf",
			   );
    my @additional_files = (

			   );
    for my $candidate_file (@additional_files,@candidate_files) {
      #printinfo("configsearch",$candidate_file);
      if(-e $candidate_file && -r _) {
	$file = $candidate_file;
	#printinfo("configfound",$candidate_file);
	last;
      }
    }
  }
  if(defined $file) {
    $OPT{configfile} = $file;
    $conf = new Config::General(
				-ConfigFile=>$file,
				-IncludeRelative=>1,
				-ExtendedAccess=>1,
				-AllowMultiOptions=>"yes",
				-LowerCaseNames=>1,
				-AutoTrue=>1
			       );
    %CONF = $conf->getall;
  }
}

sub printdebug {
  my ($level,@msg) = @_;
  my $prefix = "debug";
  if(defined $CONF{debug} && $CONF{debug} >= $level) {
    printinfo(sprintf("%s[%d]",$prefix,$level),@msg);
  }
}

sub printinfo {
  print join(" ",@_),"\n";
}

sub printdumper {
  use Data::Dumper;
  print Dumper(@_);
}

