#!/opt/bin/perl -w
#######  #######  #######  #######  
## myUtil.pm
## Version 1.0 prefinal release
#######  #######  #######  #######  
use strict;
package Gramene::Util::myProt;
#use Gramene::Util::myUtil;my $myUtil=Gramene::Util::myUtil->new();
my %aa;
my %codon;

##### ##### ##### ##### ##### ##### #####
##### new constructor.
##### ##### ##### ##### ##### ##### #####
sub new{	
 my $name = shift;
 my $class = ref($name) || $name;
 my $this = {};
 bless $this,$class;
 $this->initcodon();
 return $this;
}

##### ##### ##### ##### ##### ##### #####
##### getCodons
##### ##### ##### ##### ##### ##### #####
sub getCodon{	
  my $this = shift;
  my $aa = shift;
  return $aa{$aa};
}

##### ##### ##### ##### ##### ##### #####
##### getAmino
##### ##### ##### ##### ##### ##### #####
sub getAmino{	
  my $this = shift;
  my $codon = shift;
  $codon=~tr/a-z/A-Z/;
  return $codon{$codon};
}
##### ##### ##### ##### ##### ##### #####
##### getProts
#####   returns 3 frame translations.
##### ##### ##### ##### ##### ##### #####
sub getProts{	
  my $this = shift;
  my $P_mrna = shift;
  my $len = length($$P_mrna);
  ###  print "getProts, len=$len\n";
  my ($errCode,$errMessage)=(1,"");
  my @prot;
  for(my $j=0;$j<3;$j++){
    my $prot="";
    for(my $i=(0+$j);$i<=($len-3);$i+=3){
      my $codon = substr($$P_mrna,$i,3);
      my $amino = $this->getAmino($codon);
      #####  print STDERR "$codon,$amino\n";
      if (!defined($amino))
	{
	    print STDERR "amino undef,codon=$codon\n";
	    $errMessage.="amino undef,codon=$codon\n";
	    $errCode=0;
	}
      if($amino eq "END"){ $amino = "#";}
      elsif($amino eq "NNN"){ $amino = "*";}
      $prot.=$amino;
    }
    push(@prot,$prot);
  }
  return(\@prot,$errCode,$errMessage);
}
##### ##### ##### ##### ##### ##### #####
##### getAllProts
#####   returns 6 frame translations.
##### ##### ##### ##### ##### ##### #####
sub getAllProts{	
  my $this = shift;
  my $P_mrna = shift;
  my $len = length($$P_mrna);
  ###  print "getProts, len=$len\n";
  my @prot;
  my ($errCode,$errMessage)=(1,"");
  for(my $j=0;$j<3;$j++){
    my $prot="";
    for(my $i=(0+$j);$i<=($len-3);$i+=3){
      my $codon = substr($$P_mrna,$i,3);
      my $amino = $this->getAmino($codon);
      #####  print STDERR "$codon,$amino\n";
      if (!defined($amino))
	
	{
	    print STDERR "amino undef,codon=$codon\n";	   
	    $errMessage.="amino undef,codon=$codon\n";
	    $errCode=0;
	}
      if($amino eq "END"){ $amino = "#";}
      elsif($amino eq "NNN"){ $amino = "*";}
      $prot.=$amino;
    }
    push(@prot,$prot);
  }
  $$P_mrna = reverse($$P_mrna);$$P_mrna =~ tr/ACGT/TGCA/;
  for(my $j=0;$j<3;$j++){
    my $prot="";
    for(my $i=(0+$j);$i<=($len-3);$i+=3){
      my $codon = substr($$P_mrna,$i,3);
      my $amino = $this->getAmino($codon);
      #####  print STDERR "$codon,$amino\n";
      if (!defined($amino))
	{
	    print STDERR "amino undef,codon=$codon\n";
	    $errMessage.="amino undef,codon=$codon\n";
	    $errCode=0;
	}
      if($amino eq "END"){ $amino = "#";}
      elsif($amino eq "NNN"){ $amino = "*";}
      $prot.=$amino;
    }
    push(@prot,$prot);
  }
  return(\@prot,$errCode,$errMessage);
}

##########    ##########    ##########    ##########    ##########
##########    initcodon
##########    ##########    ##########    ##########    ##########
sub initcodon{
  my $this = shift;

  $aa{A}={};
  $aa{A}->{name}="Alanine";
  $aa{A}->{short}="Ala";
  $aa{A}->{codon}=["GCT","GCC","GCA","GCG"];
  $aa{A}->{bias}=[0.28,0.40,0.22,0.10];
  $aa{A}->{chem}="C3H5NO";
  $aa{A}->{idealM}=71.0371;
  $aa{A}->{aveM}=71.080;

  $aa{R}={};
  $aa{R}->{name}="Arginine";
  $aa{R}->{short}="Arg";
  $aa{R}->{codon}=["CGT","CGC","CGA","CGG","AGA","AGG"];
  $aa{R}->{bias}=[0.09,0.19,0.10,0.19,0.21,0.22];
  $aa{R}->{chem}="C6H12N4O";
  $aa{R}->{idealM}=156.1007;
  $aa{R}->{aveM}=156.1901;

  $aa{N}={};
  $aa{N}->{name}="Asparagine";
  $aa{N}->{short}="Asn";
  $aa{N}->{codon}=["AAT","AAC"];
  $aa{N}->{bias}=[0.44,0.56];
  $aa{N}->{chem}="C4H6N2O2";
  $aa{N}->{idealM}=144.0429;
  $aa{N}->{aveM}=144.1054;

  $aa{D}={};
  $aa{D}->{name}="Aspartic Acid";
  $aa{D}->{short}="Asp";
  $aa{D}->{codon}=["GAT","GAC"];
  $aa{D}->{bias}=[0.44,0.56];
  $aa{D}->{chem}="C3H5NO3";
  $aa{D}->{idealM}=115.0269;
  $aa{D}->{aveM}=115.0900;

  $aa{C}={};
  $aa{C}->{name}="Cysteine";
  $aa{C}->{short}="Cys";
  $aa{C}->{codon}=["TGT","TGC"];
  $aa{C}->{bias}=[0.42,0.58];
  $aa{C}->{chem}="C3H5NOS";
  $aa{C}->{idealM}=103.0092;
  $aa{C}->{aveM}=103.1444;


  $aa{E}={};
  $aa{E}->{name}="Glutamic acid";
  $aa{E}->{short}="Glu";
  $aa{E}->{codon}=["GAA","GAG"];
  $aa{E}->{bias}=[0.41,0.59];
  $aa{E}->{chem}="C5H7NO3";
  $aa{E}->{idealM}=129.0426;
  $aa{E}->{aveM}=129.1173;

  $aa{Q}={};
  $aa{Q}->{name}="Glutamine";
  $aa{Q}->{short}="Gln";
  $aa{Q}->{codon}=["CAA","CAG"];
  $aa{Q}->{bias}=[0.27,0.73];
  $aa{Q}->{chem}="C5H7N2O2";
  $aa{Q}->{idealM}=129.0426;
  $aa{Q}->{aveM}=129.1173;

  $aa{G}={};
  $aa{G}->{name}="Glycine";
  $aa{G}->{short}="Gly";
  $aa{G}->{codon}=["GGT","GGC","GGA","GGG"];
  $aa{G}->{bias}=[0.18,0.33,0.26,0.23];
  $aa{G}->{chem}="C2H3NO";
  $aa{G}->{idealM}=57.0215;
  $aa{G}->{aveM}=57.0527;

  $aa{H}={};
  $aa{H}->{name}="Histidine";
  $aa{H}->{short}="His";
  $aa{H}->{codon}=["CAT","CAC"];
  $aa{H}->{bias}=[0.41,0.59];
  $aa{H}->{chem}="C6H7N3O";
  $aa{H}->{idealM}=137.0589;
  $aa{H}->{aveM}=137.1435;
  
  $aa{I}={};
  $aa{I}->{name}="Isoleucine";
  $aa{I}->{short}="Ile";
  $aa{I}->{codon}=["ATT","ATC","ATA"];
  $aa{I}->{bias}=[0.35,0.52,0.14];
  $aa{I}->{chem}="C6H11NO";
  $aa{I}->{idealM}=113.0841;
  $aa{I}->{aveM}=113.1620;

  $aa{L}={};
  $aa{L}->{name}="Leucine";
  $aa{L}->{short}="Leu";
  $aa{L}->{codon}=["TTA","TTG","CTT","CTC","CTA","CTG"];
  $aa{L}->{bias}=[0.06,0.12,0.12,0.20,0.07,0.43];
  $aa{L}->{chem}="C6H11NO";
  $aa{L}->{idealM}=113.0841;
  $aa{L}->{aveM}=113.1620;

  $aa{K}={};
  $aa{K}->{name}="Lysine";
  $aa{K}->{short}="Lys";
  $aa{K}->{codon}=["AAA","AAG"];
  $aa{K}->{bias}=[0.40,0.60];
  $aa{K}->{chem}="C6H12N2O";
  $aa{K}->{idealM}=128.0950;
  $aa{K}->{aveM}=128.1767;

  $aa{M}={};
  $aa{M}->{name}="Methionine";
  $aa{M}->{short}="Met";
  $aa{M}->{codon}=["ATG"];
  $aa{M}->{bias}=[1.00];
  $aa{M}->{chem}="C5H9NOS";
  $aa{M}->{idealM}=131.0405;
  $aa{M}->{aveM}=131.1991;

  $aa{F}={};
  $aa{F}->{name}="Phenylalanine";
  $aa{F}->{short}="Phe";
  $aa{F}->{codon}=["TTT","TTC"];
  $aa{F}->{bias}=[0.43,0.57];
  $aa{F}->{chem}="C9H9NO";
  $aa{F}->{idealM}=147.0684;
  $aa{F}->{aveM}=147.1801;

  $aa{P}={};
  $aa{P}->{name}="proline";
  $aa{P}->{short}="Pro";
  $aa{P}->{codon}=["CCT","CCC","CCA","CCG"];
  $aa{P}->{bias}=[0.29,0.33,0.27,0.11];
  $aa{P}->{chem}="C5H7NO";
  $aa{P}->{idealM}=97.0528;
  $aa{P}->{aveM}=97.1187;

  $aa{S}={};
  $aa{S}->{name}="Serine";
  $aa{S}->{short}="Ser";
  $aa{S}->{codon}=["TCT","TCC","TCA","TCG","AGT","AGC"];
  $aa{S}->{bias}=[0.18,0.23,0.15,0.06,0.14,0.25];
  $aa{S}->{chem}="C3H5N2O2";
  $aa{S}->{idealM}=87.0320;
  $aa{S}->{aveM}=87.0793;
  
  $aa{T}={};
  $aa{T}->{name}="Threonine";
  $aa{T}->{short}="Thr";
  $aa{T}->{codon}=["ACT","ACC","ACA","ACG"];
  $aa{T}->{bias}=[0.23,0.38,0.27,0.12];
  $aa{T}->{chem}="C4H7NO2";
  $aa{T}->{idealM}=101.0477;
  $aa{T}->{aveM}=101.1066;

  $aa{W}={};
  $aa{W}->{name}="Tryptophan";
  $aa{W}->{short}="Trp";
  $aa{W}->{codon}=["TGG"];
  $aa{W}->{bias}=[1.00];
  $aa{W}->{chem}="C11H10N2O";
  $aa{W}->{idealM}=186.0793;
  $aa{W}->{aveM}=186.2176;

  $aa{Y}={};
  $aa{Y}->{name}="Tyrosine";
  $aa{Y}->{short}="Tyr";
  $aa{Y}->{codon}=["TAT","TAC"];
  $aa{Y}->{bias}=[0.42,0.58];
  $aa{Y}->{chem}="C9H9NO2";
  $aa{Y}->{idealM}=163.0633;
  $aa{Y}->{aveM}=163.1794;

  $aa{V}={};
  $aa{V}->{name}="Valine";
  $aa{V}->{short}="Val";
  $aa{V}->{codon}=["GTT","GTC","GTA","GTG"];
  $aa{V}->{bias}=[0.17,0.25,0.10,0.48];
  $aa{V}->{chem}="C5H9NO";
  $aa{V}->{idealM}=99.0684;
  $aa{V}->{aveM}=99.1347;

  $aa{END}={};
  $aa{END}->{name}="End";
  $aa{END}->{short}="End";
  $aa{END}->{codon}=["TGA","TAG","TAA"];
  $aa{END}->{bias}=[0.61,0.17,0.22];
  $aa{END}->{idealM}=99.0684;
  $aa{END}->{aveM}=99.1347;


  $aa{NNN}={};
  $aa{NNN}->{name}="Unknown";
  $aa{NNN}->{short}="Unknown";
  $aa{NNN}->{codon}=
    [
     "NNN",
     "NAA","NAC","NAG","NAT",
     "NCA","NCC","NCG","NCT",
     "NGA","NGC","NGG","NGT",
     "NTA","NTC","NTG","NTT",
     "ANA","ANC","ANG","ANT",
     "CNA","CNC","CNG","CNT",
     "GNA","GNC","GNG","GNT",
     "TNA","TNC","TNG","TNT",
     "AAN","ACN","AGN","ATN",
     "CAN","CCN","CGN","CTN",
     "GAN","GCN","GGN","GTN",
     "TAN","TCN","TGN","TTN",
     "TNN","CNN","GNN","ANN",
     "NAN","NCN","NGN","NTN",
     "NNA","NNC","NNG","NNT"
    ];
  $aa{NNN}->{bias}=[0.61,0.17,0.22];
  $aa{NNN}->{idealM}=99.0684;
  $aa{NNN}->{aveM}=99.1347;

  foreach(keys %aa){ 
    my $aa = $_;
    #    print "$_\n";
    my @tmp = @{$aa{$_}->{bias}};
    #    print "@tmp\n";
    for(my $i=1;$i<@tmp;$i++){$tmp[$i] += $tmp[$i-1];} 
    $tmp[-1]=1;
    $aa{$_}->{prob}=[@tmp];
    my @cod = @{$aa{$_}->{codon}};
    foreach my $cod (@cod){
	my $COD=$cod;
	$COD=~tr/a-z/A-Z/;
	$cod=~tr/A-Z/a-z/;
	$codon{$COD}=$aa;
	$codon{$cod}=$aa;
    } 
  } 
  $aa{p} = $aa{P};$aa{s} = $aa{S};$aa{t} = $aa{T};
  $aa{w} = $aa{W};$aa{y} = $aa{Y};$aa{v} = $aa{V};
  $aa{a} = $aa{A};$aa{r} = $aa{R};$aa{n} = $aa{N};
  $aa{d} = $aa{D};$aa{c} = $aa{C};$aa{e} = $aa{E};
  $aa{q} = $aa{Q};$aa{g} = $aa{G};$aa{h} = $aa{H};
  $aa{i} = $aa{I};$aa{l} = $aa{L};$aa{k} = $aa{K};
  $aa{m} = $aa{M};$aa{f} = $aa{F};
#  $myUtil->prettyPrint(\%codon);
}


1;

=head1 NAME

SNP::myUtil.pm - set of utilities for my use.

=head1 SYNOPSIS

    use SNP::myUtil;
    $mUtil =  SNP::myUtil->new();
    $mUtil->prettyPrint($PH_tmp,$body,$prefix);
    $PH_tmp = $mUtil->iniReader();
    $retVal = $mUtil->isNumber($number);
    ($time,$errCode,$errMessage) = $mUtil->UTCnow();
    ($time,$errCode,$errMessage) = $mUtil->oracleTime($TimeString);

=head1 REQUIRES

B<Needs the following to be installed> I<Time::CTime, Time::ParseDate,
Time::Timezone>

=head1 EXPORTS

nothing

=head1 METHODS AND USAGE

There are several methods in this package for making life easier.
They do not fit into any other package, so appear here.

=over 6

=item SNP::myUtil->new()

No special inputs required

=item $myUtil->prettyPrint($PH_tmp,\$body,[$prefix]);

I<$PH_tmp> is a reference to the structure to be printed. 
I<$body> is where the pretty print string is placed, print
it from your program, I<$prefix> if used, could be something
like "\t" or "***" to beautify output.

=item  $myUtil->iniReader()

iniReader reads *.ini files in the working directory and returns a 
reference to a Hash which contains the key/value pairs. Nice thing 
about this is the files can have blank lines, comments are marked 
with # and you can store many alternative definitions for variables, 
the last one is always chosen (or the value in the alphabetically 
last file). Translates all the Yes to 1 and all No to 0, since this 
is the code used internally in the programs.

=item  $myUtil->isNumber($number)

isNumber returns 1  if $number is  a string that can be 
interpreted as a number by PERL, else returns 0. 

=item  $myUtil->UTCnow()

returns an array ($time,$format,$errCode,$errMessage), with time
in the format recognised by Oracle, $format the format string and
$errCode (0,1) for error (yes,no) and an error message if there is one.

=item  $myUtil->oracleTime($timeString)

oracleTime take as input a string which can be reasonably taken
to be a time value, and returns an array 
($time,$format,$errCode,$errMessage)  with time in the format 
recognised by Oracle, $format the format string and
$errCode (0,1) for error (yes,no) and an error message if there 
is one. $time and $format can be used in the sqlplus function sysdate. 


=back

=head1 AUTHOR

Ravi Sachidanandam, CSHL. ravi@cshl.org


=cut


