#!/usr/local/bin/perl

package transview;

use strict;
use ExtIndex;
use ExtURL;
use Apache;
use EnsWeb;
use CGI qw(:standard :html3);
use GramenePage;
use SiteDefs;

use strict;

my $q = new CGI;
my $geneid = SiteDefs::idfix($q->param('gene'));
my $transid = SiteDefs::idfix($q->param('transcript'));

$|=1;

my $r;
$r = Apache->request();
$r->err_header_out('ensembl_headers_out'=>1);
print CGI::header();
my ($head,$onload,$js_divs)=&EnsWeb::cgi_header_info(('initfocus'=>1));
my $grpg=GramenePage->new($r);
print $head,$grpg->start_body(  -ensembl=>1, -bodyattr=>$onload, -bodyfirst=>$js_divs  );
print "<br><br><center>\n";

my $db;
my $gene;
my $got_clones =0;
my @clones;
my @trans;
my @tseq;
my $trans;
my $exon_count = 1;

my $indexer = new ExtIndex;

unless ($indexer) {
    print EnsWeb::print_form($transid, "transcript");
    print &ensembl_exception("The external indexes are not available. Please contact the webmaster\n");
print $grpg->end_body;
    #print EnsWeb::make_cgi_footer();
    Apache::exit();
}
	
#########################################
# Connect to database
# wrap all the db manipulation in an eval.
#########################################
eval {
    my $locator = &EnsWeb::get_locator();
    $db =  Bio::EnsEMBL::DBLoader->new($locator);
};

if( $@ ) {
    print EnsWeb::print_form($transid, "transcript");
    print &ensembl_exception("The Ensembl database is not available. Please contact the webmaster\n",$@);
print $grpg->end_body;
    print EnsWeb::make_cgi_footer();
    Apache::exit();
}

###################################################
# Find the gene corresponding to this transcript id 
###################################################
eval{
    if (!defined $geneid){
        $gene = $db->gene_Obj->get_Gene_by_Transcript_id($transid, 'evidence');	# load evidence as well
        $geneid = $gene->id;
    }
};

if( $@ ) {
    print EnsWeb::print_form($transid, "transcript");
    print &ensembl_exception("The transcript $transid cannot be found or an DB error occured during the fetch",$@);
print $grpg->end_body;
    print EnsWeb::make_cgi_footer();
    Apache::exit();
}

######################################################
# Now fetch the transcript object from the gene object
######################################################
eval{
    if (! $gene){
        $gene = $db->gene_Obj->get($geneid, 'evidence');
    }
        foreach my $temp_trans ( $gene->each_Transcript) {
	if( $temp_trans->id eq $transid ) {
	    $trans = $temp_trans;
	    last;
	}
    } 
};

if( $@ ) {
    print EnsWeb::print_form($transid, "transcript");
    print &ensembl_exception("An error occurred during the database query.",$@);
print $grpg->end_body;
    print EnsWeb::make_cgi_footer();
    Apache::exit();
}

if( ! defined $trans ) {
    print EnsWeb::print_form($transid, "transcript");
    print &ensembl_exception("Cannot find transcript $transid belonging to gene $geneid","",1);
print $grpg->end_body;
    print EnsWeb::make_cgi_footer();
    Apache::exit();
}

$db->gene_Obj->get_supporting_evidence_direct($trans->each_Exon);

print EnsWeb::print_form($transid, "transcript");

print("<h3>Transcript " . $trans->id . "</h3>\n");
print("<TABLE ALIGN=\"CENTER\" BGCOLOR=\"#ffffff\" WIDTH=\"100%\" BORDER=\"0\" CELLPADDING=\"3\" CELLSPACING=\"0\">\n");
print "<TR class=\"yellow2\">\n";
print "<th>No.</th><th>Exon</th><th>Start</th><th>End</th><th>Strand</th><th>Gene ID</th><th>Contig ID</th>\n";
print "</TR>\n";

foreach my $exon ($trans->each_Exon) {
    my $contig_id = $exon->contig_id;
    if ($exon->isa("Bio::EnsEMBL::StickyExon")) {
         my @tmp = $exon->each_component_Exon;
        $contig_id = $tmp[0]->contig_id;
    }
    print "<TR>\n";
    print "<td align=\"center\">$exon_count</td><td align=\"center\">" . $exon->id . "</td>\n";
    print "<td align=\"center\">" . $exon->start . "</td><td align=\"center\">" . $exon->end . "</td><td align=\"center\">" . $exon->strand ."</td>";
    print "<td align=\"center\"><a href=\"/perl/geneview?gene=" . $gene->id . "\">". $gene->id . "</a></td>\n";
    print "<td align=\"center\"><a href=\"/perl/contigview?clone=".$exon->clone_id."&id=" . $exon->clone_id ."\">".$contig_id."</a></td>\n";
    $exon_count++;
    print "</tr>\n";
}
print("</TABLE>\n");

# Stuff added by AS

my $this_exon;          # loop var of type exon
my $this_feature;       # loop var of type featurePair
my %all_datalibs;       # list of all datalibs keyed by seq_name
my %exon_ids;		# lists of all exonids keyed by seq_name
my $exon_count=0;	# count the number of exons
my %all_scores;         # lists of all scores keyed by seq_name
my %norm_all_scores;	# lists of all scores keyed by seq_name and normalised
my %total_scores=();	# Totals of %all_scores keyed by seq_name
my %top_score=();	# Top score for a sequence irrespective of which exon hit
my $dl_seq_name;        # Supporting feature Seq ID or ACC
my $score;		# a supporting feature score 
my $prev_score;		# the score for the previous exon linked to this seq
my $score_list;		# Reference to the array of scores for a seq_name
my %num_exons_hit=();	# count num of exons linked to this sequence
my %all_exons_len=();
my $i;			# loop var
my $j;			# another loop var
my $whole_len;		# total genomic length from which this score is linked
my $score_x;		# loop var for score

my $linked_datalib;	# Data lib to link
my $linked_seq_name;	# Sequence ID for linking
my $srs_result;		# Result from query to SRS
my %keywords;		# All keywords - stuff returned from SRS commandline getz
my $pfam_acc;		# Pfam accession number
my %links;		# links to SRS/PFAM keyed by sequence name
my $complete_srs_search=undef; # Full SRS search line
my $temp;		# temp string
my %ids;		# Sequence identifers
my $colour;		# row colour

#Retrieve info from relevant ensembl objects
foreach $this_exon ( $trans->each_Exon )
{
  $exon_count++;
  foreach $this_feature ( $this_exon->each_Supporting_Feature )
  {
    $exon_ids{$this_feature->hseqname}[$exon_count - 1 ] = $this_exon->id;
    if ( ! defined( $all_datalibs{$this_feature->hseqname} ) )
    {
      # Create array to hold the feature top-score for each exon
      $all_scores{$this_feature->hseqname} = [];
      # Hold the data library that this feature is from
      $all_datalibs{$this_feature->hseqname} = $this_feature->analysis->db;
      # This next line is just to force the output ordering
      $all_datalibs{$this_feature->hseqname} =~ s/swir/Swir/;
    }
    # Compare to see if this is the top-score
    if ( $this_feature->hscore > $all_scores{$this_feature->hseqname}[$exon_count - 1 ] )
    {
      # Adjust the top-score for this hit sequence
      # Subtract old score for this exon and add new score
      $total_scores{$this_feature->hseqname} = 
        $total_scores{$this_feature->hseqname} - $all_scores{$this_feature->hseqname}[$exon_count - 1 ] + $this_feature->hscore;
      # Keep this new top-score
      $all_scores{$this_feature->hseqname}[$exon_count - 1] =
        $this_feature->hscore;
      $all_exons_len{$this_feature->hseqname}[$exon_count - 1] =
        $this_exon->length;
      if ( $this_feature->hscore > $top_score{$this_feature->hseqname} )
      {
        $top_score{$this_feature->hseqname} = $this_feature->hscore;
      }
    } # END if 
  } # END foreach $this_feature
} # END foreach $this_exon

unless ( defined( %all_datalibs ) )
{
  print("</ul><P>\n");
  print "The supporting evidence for these transcripts has not yet been placed in the database.<P>\n";
  #print "Please use the <A HREF=\"/feedback.html\">Feedback</A> web page to send comments.</P>\n";
  print $grpg->end_body;
  #print EnsWeb::make_cgi_footer();
  Apache::exit;
}

print<<END;
<h4>Supporting evidence for exons</h4>
<p>Below is a table of database hits having overlaps with each exon in the
transcript.  The database hits are the results of a series of blast runs
against genscan predicted peptides.  They are ordered by;
</p>
<UL TYPE=DISC>
  <LI> Data library.
  <LI> Top scoring, exon vs sequence, hit.
  <LI> Number of exons.
</UL>
<P><B> A threshold has been applied to the supporting evidence table.</B>
<UL TYPE=DISC>
  <LI> For Blast hits this value is 80.
</UL>
<P><B>Note:</B><BR><UL><LI>An exon without supporting evidence means that it was generated by aligning a protein
      to the genomic sequence using genewise.</LI>
      <li>Low scoring evidence is greyed out.</LI>
</UL></P>

END

# Find how many exons hit this sequence
# Get the whole length of the hit for each sequence
while ( ( $dl_seq_name, $score_list ) = each ( %all_scores ) )
{
  $num_exons_hit{$dl_seq_name} = 0;
  for ( $i = 0; $i <= $#{$score_list}; $i++ )
  {
    $score  = @{$score_list}[$i];
    if ( defined( $score ) )
    {
      $num_exons_hit{$dl_seq_name}++;
    } # END if
  } # END for $i
} # END while


# Print out header for Supporting Evidence Table
print <<"HEADER";
<TABLE ALIGN="CENTER" WIDTH="100%" BORDER="0" CELLPADDING="0" CELLSPACING="0">
  <TR class="yellow2">
    <TH>&nbsp;&nbsp;</TH>
    <TH>Data Library</TH>
    <TH>&nbsp;&nbsp;</TH>
    <TH>Sequence</TH>
    <TH>&nbsp;&nbsp;</TH>
    <TH WIDTH="100%">Definition</TH>
    <TH>&nbsp;&nbsp;</TH>
HEADER
print"    <TH COLSPAN=\"$exon_count\" WIDTH=\"", $exon_count*38, "\">Exons</TH>\n";
print"  </TR>\n  <TR class=\"yellow2\">\n"; 
print "    <TH>&nbsp;&nbsp;</TH><TH>&nbsp;&nbsp;</TH><TH>&nbsp;&nbsp;</TH><TH>&nbsp;&nbsp;</TH><TH>&nbsp;&nbsp;</TH><TH>&nbsp;&nbsp;</TH><TH>&nbsp;&nbsp;</TH>";
# Create a COL for each Exon
for ( my $i = 1; $i <= $exon_count; $i++ )
{
  print "<TH WIDTH=\"38\">${i}</TH>";
}
print "\n  </TR>\n";

##############################################
# Retrieve SRS data and build link to web SRS
##############################################

my $urls = ExtURL->new;

foreach $dl_seq_name ( keys( %all_datalibs ) ){

  ( $linked_datalib, $linked_seq_name ) =
    ( $dl_seq_name =~ /^(\w+)(?:\:|\|[\w\.]+\|)([\w\.]+)/ );

  $ids{$dl_seq_name} = "$linked_seq_name";

    $ids{$dl_seq_name} = $dl_seq_name;
    if ( $all_datalibs{$dl_seq_name} =~ /vert/i ) # DNA
    {
	$keywords{$dl_seq_name} = &get_srs_desc_by_id("emblnew",$dl_seq_name);
	unless ($keywords{$dl_seq_name}){
	    $keywords{$dl_seq_name} = &get_srs_desc_by_id("embl",$dl_seq_name); 
	}
	$links{$dl_seq_name} = qq(<A HREF=").$urls->get_url('EMBL',$dl_seq_name).qq(">$dl_seq_name</A>);
    }
    # special case for PFAM which is linked, surprisingly enough, to PFAM
    elsif ( $all_datalibs{$dl_seq_name} =~ /pfamfrag/i ) # PFAM
    {
     $keywords{$dl_seq_name} = &get_srs_desc_by_id("pfam",$dl_seq_name);
	$links{$dl_seq_name} = qq(<A HREF=").$urls->get_url('PFAM',$dl_seq_name).qq(">$dl_seq_name</A>);
    }
  elsif ( $all_datalibs{$dl_seq_name} =~ /SPTR/i ) # SWISSPROT
  {
    $keywords{$dl_seq_name} = &get_srs_desc_by_id("swissnew",$dl_seq_name);
    unless ($keywords{$dl_seq_name}){
	$keywords{$dl_seq_name} = &get_srs_desc_by_id("swissprot",$dl_seq_name); 
    }
	$links{$dl_seq_name} = qq(<A HREF=").$urls->get_url('SWISSPROT',$dl_seq_name).qq(">$dl_seq_name</A>);
  }  
  
  elsif ( $all_datalibs{$dl_seq_name} =~ /TR/i ) # TREMBL
  {
    $keywords{$dl_seq_name} = &get_srs_desc_by_id("tremblnew",$dl_seq_name);
    unless ($keywords{$dl_seq_name}){
	$keywords{$dl_seq_name} = &get_srs_desc_by_id("sptrembl",$dl_seq_name); 
    }
    $links{$dl_seq_name} = qq(<A HREF=").$urls->get_url('SPTREMBL',$dl_seq_name).qq(">$dl_seq_name</A>);
  }
  
  elsif ( $all_datalibs{$dl_seq_name} =~ /WP/i ) # WORMPEP 
  {
    $keywords{$dl_seq_name} = "WORMPEP homolog.";
    $links{$dl_seq_name} = qq(<A HREF=").$urls->get_url('WORM',$dl_seq_name).qq(">$dl_seq_name</A>);
  }
  
  else	#DNA 
  {
    $keywords{$dl_seq_name} = &get_srs_desc_by_acc("emblnew",$dl_seq_name);
    unless ($keywords{$dl_seq_name}){
	$keywords{$dl_seq_name} = &get_srs_desc_by_acc("embl",$dl_seq_name); 
    }
    $links{$dl_seq_name} = qq(<A HREF=").$urls->get_url('EMBL',$dl_seq_name).qq(">$dl_seq_name</A>);
  }
    $keywords{$dl_seq_name} =~ s/DE\s+//g ;
    $keywords{$dl_seq_name} =~ tr/\n/ /;
} 

# Fill the TABLE
foreach $dl_seq_name ( sort { $all_datalibs{$a} cmp $all_datalibs{$b} ||
                         $top_score{$b} <=> $top_score{$a} ||
                         $num_exons_hit{$b} <=> $num_exons_hit{$a} ||
                         $total_scores{$b} <=> $total_scores{$a} }
                         keys( %all_datalibs ) )
{
  my $exon_gif = "/gfx/green.gif";
  #( $colour ) = ( $colour =~ /violet1/ ? "<TR class=\"yellow1\">" : "<TR>" );
  ( $colour ) = "<TR class=\"yellow1\">";
  if ( ( $all_datalibs{$dl_seq_name} =~ /pfamfrag/i && $top_score{$dl_seq_name} < 25 ) || ( $all_datalibs{$dl_seq_name} !~ /pfamfrag/i && $top_score{$dl_seq_name} < 80 ) )
  {
    $colour = "<TR BGCOLOR=\"#CCCCCC\">";
    $exon_gif = "/gfx/gray.gif";
  }
  print "$colour\n";
  print "<TD>&nbsp;</TD>";
  print "  <TD ALIGN=LEFT VALIGN=TOP>$all_datalibs{$dl_seq_name}&nbsp;&nbsp;&nbsp;&nbsp;</TD>";
  print "<TD>&nbsp;</TD>";
  print "<TD ALIGN=LEFT VALIGN=TOP>$links{$dl_seq_name}</TD>";
  print "<TD>&nbsp;</TD>";
  if ( defined( $keywords{$dl_seq_name} ) )
  {
    print "<TD><FONT SIZE=\"-2\">$keywords{$dl_seq_name}&nbsp;</FONT></TD>";
  }
  else
  {
    print "<TD>&nbsp;</TD>";
  }
  print "<TD WIDTH=\"3\">&nbsp;</TD>";
    
  for ( $i = 0; $i <= ( $exon_count - 1 ); $i++ )
  { 
    $score = $all_scores{$dl_seq_name}[$i];
    print "    <TD>";
    if ( defined( $score ) )
    {
      # Link to alignments
      if ( $all_datalibs{$dl_seq_name} !~ /pfamfrag/i )
      {
        if ( $all_datalibs{$dl_seq_name} =~ /swir/i )
        {
          # This is a WORMPEP entry id that ends in a letter
          # There are no sequences for these !!!!!!!
          if ( $dl_seq_name =~ /^WP.+[A-Z]$/i )
          {
            print "<IMG SRC=\"${exon_gif}\" HEIGHT=\"15\" WIDTH=\"35\" BORDER=\"0\" ALT=\"Score=$all_scores{$dl_seq_name}[$i]\">";
          }
          else
          {
            print "<A HREF=\"/perl/alignview?gene=$geneid&transcript=$transid&exon=$exon_ids{$dl_seq_name}[$i]&sequence=$dl_seq_name&seq_type=P\" TARGET=\"ALIGN\">";
            print "<IMG SRC=\"${exon_gif}\" HEIGHT=\"15\" WIDTH=\"35\" BORDER=\"0\" ALT=\"View Alignment: Score=$all_scores{$dl_seq_name}[$i]\">";
            print "</A>";
          }
        }
        else # DNA hit
        {
          print "<A HREF=\"/perl/alignview?gene=$geneid&transcript=$transid&exon=$exon_ids{$dl_seq_name}[$i]&sequence=$dl_seq_name&seq_type=N\" TARGET=\"ALIGN\">";
          print "<IMG SRC=\"${exon_gif}\" HEIGHT=\"15\" WIDTH=\"35\" BORDER=\"0\" ALT=\"View Alignment: Score=$all_scores{$dl_seq_name}[$i]\">";
          print "</A>";
        }
      }
      else # This is a PFAM hit
      {
        print "<IMG SRC=\"${exon_gif}\" HEIGHT=\"15\" WIDTH=\"35\" ALT=\"Score=$all_scores{$dl_seq_name}[$i]\">";
      }
      # If there is a following score add a linker gif
      #if ( check_for_more_scores($i, $all_scores{$dl_seq_name}, "+" ) )
      if ( $all_scores{$dl_seq_name}[$i+1] )
      {
        print "<IMG SRC=\"/gfx/space.gif\" HEIGHT=\"15\" WIDTH=\"3\">";
      }
    }
    else
    {
      print "<IMG SRC=\"/gfx/blank.gif\" HEIGHT=\"15\" WIDTH=\"38\">";
    }
    print "</TD>\n";
  }
  print "  </TR>\n";
}
print("</TABLE>\n");

print "<BR>\n";

print $grpg->end_body;


# **********************************************************************
# SUBROUTINE: check_for_more_scores
#
# Checks if there is a scoring exon upstream or downstream of the
# current exon position.
#
# RETURN:
# 1 = found neighbour
# 0 = for not found
#
# USAGE: check_for_more_scores( $num, $array_ref, $direction )
# $num       = current exon position
# $array_ref = reference to the exon score list for this sequence
# $direction = either "-" or "+"
# **********************************************************************

sub check_for_more_scores
{
  my $pos = $_[0];
  my $score_list = $_[1];
  my $dir = $_[2];
  ( my $op ) = ( $dir =~ /\+/ ? "<=" : ">=" );
  ( my $end ) = ( $dir =~ /\+/ ? $#{$score_list} : 0 );

  my $code = "\$pos$dir$dir;
          while ( \$pos $op $end )
          {
            if ( defined( \@{\$score_list}[\$pos] ) )
            {
              return 1;
            }
            \$pos$dir$dir;
          }
          return 0;";
  eval $code;
} # END sub check_for_more_scores



# **********************************************************************
# SUBROUTINE: get_srs_desc_by_id
#
# gets description of a sequence name from external index
#
# RETURN:
# String containing the description, or undef if not found
#
# USAGE: get_srs_desc_by_id($database_name, $sequence_name )
#
# **********************************************************************

sub get_srs_desc_by_id{
	my ($db, $id)=@_;
	$db=uc($db);	
	my $result_ref= $indexer->get_seq_by_id({
						DB	=> $db,
					 	ID	=> $id,
						OPTIONS => 'desc'
					 	});

	if ($result_ref){
	    $result_ref=join("<br>",@$result_ref);
	}
	
	return $result_ref;
	
}


# **********************************************************************
# SUBROUTINE: get_srs_desc_by_acc
#
# gets description of a sequence name from external index
#
# RETURN:
# String containing the description, or undef if not found
#
# USAGE: get_srs_desc_by_acc($database_name, $sequence_name )
#
# **********************************************************************

sub get_srs_desc_by_acc{
	my ($db, $acc)=@_;
	$db=uc($db);
    	my $result_ref= $indexer->get_seq_by_acc({
						DB	=> $db,
						ACC 	=> $acc,
						OPTIONS	=> 'desc'
					 	});

	if ($result_ref){
	    $result_ref=join("<br>",@$result_ref);
	}

	return $result_ref;
}

############################################################
