#!/opt/bin/perl -w
#######  #######  #######  #######  
## myUtil.pm
## Version 1.0 prefinal release
#######  #######  #######  #######  
use strict;
package Gramene::Util::myStats;
use Gramene::Util::myUtil;
my $myUtil= Gramene::Util::myUtil->new();

##### ##### ##### ##### ##### ##### ##### 
##### new constructor.
##### ##### ##### ##### ##### ##### #####
sub new{	
 my $name = shift;
 my $class = ref($name) || $name;
 my $this = {};
 bless $this,$class;
 $this->initprob();
 return $this;
}

##### ##### ##### ##### ##### ##### ##### 
####  ave
##### ##### ##### ##### ##### ##### #####
sub ave {
  my $this = shift;
  my $P_arr=shift;
  my $sum=0;
  my $num=@{$P_arr};
  for(my $i=0;$i<$num;$i++){$sum+=$P_arr->[$i];}
  $sum/=$num;
  return $sum;
} 
##### ##### ##### ##### ##### ##### ##### 
####  avesqr
##### ##### ##### ##### ##### ##### #####
sub avesqr {
  my $this = shift;
  my $P_arr=shift;
  my $sum=0;
  my $num=@{$P_arr};
  for(my $i=0;$i<$num;$i++){$sum+=($P_arr->[$i]*$P_arr->[$i]);}
  $sum/=$num;
  return $sum;
} 
##### ##### ##### ##### ##### ##### ##### 
####  sig_est (From data)
##### returns the mean, its estimated sigma, and the data's estimated sigma
##### ##### ##### ##### ##### ##### #####
sub sig_est {
  my $this = shift;
  my $P_arr=shift;
  my $num=@{$P_arr};my $sqrsum=0;my $sum=0;
  for(my $i=0;$i<$num;$i++){
    $sqrsum+=($P_arr->[$i]*$P_arr->[$i]);
    $sum+=$P_arr->[$i];
  }
  my ($mean,$sig2_src,$sig_mean);
  $mean=$sum/$num;
  #  print "num=$num\n";
  $sig2_src = ($sqrsum-$sum*$sum/$num);
  #  print "sig2_src=$sig2_src\n";
  $sig2_src /= ($num-1);
  #  print "sig2_src=$sig2_src\n";
  my $sig_src = sqrt($sig2_src);
  #  print "sig_src=$sig_src\n";
  $sig_mean = sqrt($sig2_src/$num);
  # print "sig_mean=$sig_mean\n";
  return ($mean,$sig_mean,$sig_src,$num);
} 
##### ##### ##### ##### ##### ##### ##### 
####  mann_whitney2
##### ##### ##### ##### ##### ##### #####
sub mann_whitney2{
  my $this = shift;
  my $P_arr1=shift;
  my $P_arr2=shift;
  $this->mann_whitney($P_arr1,$P_arr2);
} 
##### ##### ##### ##### ##### ##### ##### 
####  mann_whitney
##### ##### ##### ##### ##### ##### #####
sub mann_whitney{
  my $this = shift;
  my $P_arr1=shift;
  my $P_arr2=shift;

  my $num1=@{$P_arr1};
  my $num2=@{$P_arr2};

  if($num1<5 || $num2<5) 
    {die "you cannot use this mann_whitney with such low dimensions\n";} 

  my @all = (@{$P_arr1},@{$P_arr2});
  my $num = @all;if($num != ($num1+$num2)){die "something fishy\n";} 
  my @ranks; $this->ranks(\@all,\@ranks);
  
  my $rnksm1=0; my $rnksm2=0;
  for(my $i=0;$i<$num1;$i++) {$rnksm1+=$ranks[$i];} 
  for(my $i=$num1;$i<$num;$i++){$rnksm2+=$ranks[$i];} 
  my $rnksm=$rnksm1+$rnksm2;
#  print "Ranksums=$rnksm,$rnksm1,$rnksm2\n";

  my $exprnksm1=$num1*($num+1)/2;
  my $exprnksm2=$num2*($num+1)/2;
  my $std=sqrt($num1*$num2*($num+1)/12);

  my $z;
  if($rnksm1<$exprnksm1){$z = ($rnksm1-$exprnksm1+0.5)/$std;}
  else {$z = ($rnksm1-$exprnksm1-0.5)/$std;} 
  my $prob=$this->get_zprob($z);
  return ($z,$prob);
}

##### ##### ##### ##### ##### ##### ##### 
####  spearmanrank
##### ##### ##### ##### ##### ##### #####
sub spearman_rank{
  my $this = shift;
  my $P_arr1=shift;
  my $P_arr2=shift;
  
  my $num1=@{$P_arr1};
  my $num2=@{$P_arr2};

  if($num1!=$num2){ die "gross error\n";}
  my (@ranks1,@ranks2);
  $this->ranks($P_arr1,\@ranks1);
  $this->ranks($P_arr2,\@ranks2);

  my $rs=0;
  for(my $i=0;$i<$num1;$i++){
    $rs += ($ranks1[$i]-$ranks2[$i])*($ranks1[$i]-$ranks2[$i]);
  }
  $rs = 1-6*$rs/($num1*($num1-1)*($num1+1));
  my $t=$rs*sqrt($num1-1);
  my $df = $num1-1;
  my $prob=$this->get_tprob($df,$t);
  #  print "rs=$rs,df=$df,t=$t,$prob\n";
  return ($t,$prob);
} 
################################################################3
## takes in array as ref, and a ref to a rank array, 
## fills up rank array for corresponding guys in ref
################################################################3
sub ranks{
  my $this=shift;
  my $P_all=shift;
  my $P_ranks=shift;
  
  my @all;
  my @ranks;

  ## clean up the ranks guy
  @{$P_ranks}=();
  my $num = @{$P_all};

  for(my $i=0;$i<$num;$i++){ 
    push(@all,[$P_all->[$i],$i]);
  } 
  @all = sort{$a->[0]<=>$b->[0]} @all;

  for(my $j=0;$j<$num;$j++)
    {
      my $k=$j;
      while($all[$j]->[0] eq  $all[$k]->[0]){
	$k++;
	if($k>=$num){ last;}## to prevent addition of element to P_all
      }
      my $rank = ($k*($k+1)/2-$j*($j+1)/2);
      $rank/= ($k-$j);
#      print "j=$j,k=$k,rank=$rank\n";
      for(my $ii=$j;$ii<$k;$ii++){push(@ranks,$rank);} 
      $j=($k-1);
    } 

  for(my $j=0;$j<$num;$j++){
    $P_ranks->[$all[$j]->[1]]=$ranks[$j];
  } 
  return;
} 
#########################################################################
#### initprob
#########################################################################
my %tprob;
my @df;
my @z;
my %gprob;
sub initprob{
  open IN,"/home/ravi/Util/statsData/tprob.txt" or die "tprob.txt";
  my $line=<IN>;
  while(my $line=<IN>){
    chomp($line); 
    my @tmp=split(/\s+/,$line);
    if($tmp[0] =~ /inf/){$tmp[0]=1000000;} 
    push(@{$tprob{$tmp[0]}},[$tmp[1],$tmp[2]]);
  }
  close IN;
  @df = keys(%tprob);
  @df = sort{$a<=>$b} @df;
#  $myUtil->prettyPrint(\%tprob);

  open IN,"/home/ravi/Util/statsData/gprob.txt" or die "gprob.txt";
  my $line=<IN>;
  while(my $line=<IN>){
    chomp($line); 
    my @tmp=split(/\s+/,$line);
    $gprob{$tmp[0]}=$tmp[1];
  }
  close IN;
  @z = keys(%gprob);
  @z = sort{$a<=>$b} @z;
#  $myUtil->prettyPrint(\%gprob);

#  $myUtil->prettyPrint(\@z);

#  $myUtil->prettyPrint(\@df);
  
}
#### #### #### #### #### #### #### #### #### 
#### get_zprob
#### #### #### #### #### #### #### #### #### 
sub get_zprob{
  my $this=shift;
  my $z=shift;
  $z = abs($z);
  my $i;
  for($i=0;$i<(@z-1);$i++){ 
    if($z >=$z[$i] && $z<$z[$i+1]){ 
      last;
    } 
  } 
  if($i==(@z-1)){ 
    return 0;
  } else {
    if(abs($z[$i+1]-$z[$i])<1e-4){
      return $gprob{$z[$i]};
    }
    my $rat=($z-$z[$i])/($z[$i+1]-$z[$i]);
    return ($rat*$gprob{$z[$i+1]}+(1-$rat)*$gprob{$z[$i]});
  } 
}
#### #### #### #### #### #### #### #### #### 
#### get_tprob
#### #### #### #### #### #### #### #### #### 
sub get_tprob{
  # my %tprob;
  # my @df;
  my $this=shift;
  my $df=shift;
  my $t=shift;
  $t = abs($t);
  my $i;
  for($i=0;$i<@df;$i++)
    {if($df>=$df[$i] && $df<$df[$i+1]){last;}} 
  if($df==$df[$i]){ 
    ## we have the degree of freedom we need.
    my @tvals1=@{$tprob{$df[$i]}};
    my $j1;

    for($j1=0;$j1<(@tvals1-1);$j1++)
      {if($t >=$tvals1[$j1]->[0] && $t<$tvals1[$j1+1]->[0]){last;}}
    if($t >$tvals1[-1]->[0]){return 0;} ## too large a value anyhow
    else { 
      my $rat = ($t-$tvals1[$j1]->[0])/
	($tvals1[$j1+1]->[0]-$tvals1[$j1]->[0]);
      return $rat*$tvals1[$j1+1]->[1]+ (1-$rat)*$tvals1[$j1]->[1];
    }
  } 
  ## have to interpolate degrees of freedom
  my $res1;
  my @tvals1=@{$tprob{$df[$i]}};
  my $j1;
  if($t<$tvals1[0]->[0]){return 1;} ## too small a value anyhow
  for($j1=0;$j1<(@tvals1-1);$j1++)
    {if($t >=$tvals1[$j1]->[0] && $t<$tvals1[$j1+1]->[0]){last;}}
  if($t >$tvals1[-1]->[0]){return 0;} ## too large a value anyhow
  else { 
    my $rat = ($t-$tvals1[$j1]->[0])/($tvals1[$j1+1]->[0]-$tvals1[$j1]->[0]);
    $res1= $rat*$tvals1[$j1+1]->[1]+ (1-$rat)*$tvals1[$j1]->[1];
  }


  my $res2;
  my @tvals2=@{$tprob{$df[$i+1]}};
  my $j2;
  if($t <$tvals2[0]->[0]){return 1;} ## too small a value anyhow
  for($j2=0;$j2<(@tvals2-1);$j2++)
    {if($t >=$tvals2[$j2]->[0] && $t<$tvals2[$j2+1]->[0]){last;}}
  if($t >$tvals2[-1]->[0]){return 0;} ## too large a value anyhow
  else { 
    my $rat = ($t-$tvals2[$j2]->[0])/($tvals2[$j2+1]->[0]-$tvals2[$j2]->[0]);
    $res2= $rat*$tvals2[$j2+1]->[1]+ (1-$rat)*$tvals2[$j2]->[1];
  }
  my $rat2 = ($df-$df[$i])/($df[$i+1]-$df[$i]);
  return $res2*$rat2+$res1*(1-$rat2);
}
#### #### #### #### #### #### #### #### #### 
#### get_tval given prob, finds the t value for given df
#### #### #### #### #### #### #### #### #### 
sub get_tval{
  # my %tprob;
  # my @df;
  my $this=shift;
  my $df=shift;
  my $prob=shift;
  my $i;
  for($i=0;$i<@df;$i++)
    {if($df >=$df[$i] && $df<$df[$i+1]){last;}} 
  if($df==$df[$i]){ 
    ## we have the degree of freedom we need.
    my @tvals1=@{$tprob{$df[$i]}};
    my $j1;
    if($prob >$tvals1[0]->[1]){return 0;} ## too large a value anyhow
    for($j1=0;$j1<(@tvals1-1);$j1++)
      {if($prob <=$tvals1[$j1]->[1] && $prob>$tvals1[$j1+1]->[1]){last;}}
    if($prob <$tvals1[-1]->[1]){return 10000;} ## too large a value anyhow
    else { 
      my $rat = ($prob-$tvals1[$j1+1]->[1])/
	($tvals1[$j1]->[1]-$tvals1[$j1+1]->[1]);
      return $rat*$tvals1[$j1]->[0]+ (1-$rat)*$tvals1[$j1+1]->[0];
    }
  } 
  ## have to interpolate degrees of freedom
  my $res1;
  my @tvals1=@{$tprob{$df[$i]}};
  my $j1;
  if($prob>$tvals1[0]->[1]){return 0;} ## too large a prob anyhow
  for($j1=0;$j1<(@tvals1-1);$j1++)
    {if($prob<=$tvals1[$j1]->[1] && $prob>$tvals1[$j1+1]->[1]){last;}}
  if($prob <$tvals1[-1]->[1]){return 10000;} ## too small a prob value anyhow
  else { 
    my $rat=($prob-$tvals1[$j1+1]->[1])/
      ($tvals1[$j1]->[1]-$tvals1[$j1+1]->[1]);
    $res1= $rat*$tvals1[$j1]->[0]+ (1-$rat)*$tvals1[$j1+1]->[0];
  }
  my $res2;
  my @tvals2=@{$tprob{$df[$i+1]}};
  my $j2;
  if($prob>$tvals2[0]->[1]){return 0;} ## too large a prob anyhow
  for($j2=0;$j2<(@tvals2-1);$j2++)
    {if($prob<=$tvals2[$j2]->[1] && $prob>$tvals2[$j2+1]->[1]){last;}}
  if($prob<$tvals2[-1]->[1]){return 10000;} ## too small a prob anyhow
  else { 
    my $rat=
      ($prob-$tvals2[$j2+1]->[1])/($tvals2[$j2]->[1]-$tvals2[$j2+1]->[1]);
    $res2= $rat*$tvals2[$j2]->[0]+ (1-$rat)*$tvals2[$j2+1]->[0];
  }
  my $rat2 = ($df-$df[$i])/($df[$i+1]-$df[$i]);
  return $res2*$rat2+$res1*(1-$rat2);
}

1;
=head1 NAME

SNP::myStats.pm - set of utilities for my use.

=head1 SYNOPSIS

    use SNP::myUtil;
    $mUtil =  SNP::myUtil->new();
    $mUtil->prettyPrint($PH_tmp,$body,$prefix);

=head1 REQUIRES

B<Needs the following to be installed> I<Time::CTime, Time::ParseDate,

=head1 EXPORTS

nothing

=head1 METHODS AND USAGE

There are several methods in this package for making life easier.
They do not fit into any other package, so appear here.

=over 6

=item SNP::myUtil->new()

No special inputs required

=item $myUtil->prettyPrint($PH_tmp,\$body,[$prefix]);

I<$PH_tmp> is a reference to the structure to be printed. 
I<$body> is where the pretty print string is placed, print
it from your program, I<$prefix> if used, could be something
like "\t" or "***" to beautify output.

=item  $myUtil->iniReader()

iniReader reads *.ini files in the working directory and returns a 
reference to a Hash which contains the key/value pairs. Nice thing 

=item  $myUtil->isNumber($number)

isNumber returns 1  if $number is  a string that can be 
interpreted as a number by PERL, else returns 0. 

=back

=head1 AUTHOR

Ravi Sachidanandam, CSHL. ravi@cshl.org

=cut


