#!/usr/local/bin/perl
###############################################################################
#
#   Name:           contigview
#
#   Description:    mod_perl script to display a virtual contig showing genes,
#                   and a zoomed in virtual contig showing the detailed
#                   features of a region of the first vc.
#
#   History:        *cough*     as2/avc -   original versions
#                   2000-11-02  jws -   changes to work with static_golden_path
#                                       virtual contigs, and also clean up the
#                                       script interface a little.
#                   
#                   2000-11-10  jws     Added SSAHA feature handling.
#                   2000-11-17  jws     Modified SSAHA handling for generic
#                                       "feature" handling.
#		    2001-01-04  avc	Added markers to top vc
#		    2001-01-10	jws	ssaha now works slightly differently...
#		    2001-03-08	jws	Add checks for external feature factory
#					connections to ensure ctgview still 
#					works when they are not available
#		    2001-03-29	jws	Major changes.  Integrated in rmp and 
#					avc new drawing code.
#
###############################################################################

package contigview;

use strict;
use ExtIndex;
use ExtURL;
use Apache;
use EnsWeb;
use SiteDefs;
use CGI qw(:standard :html3 :cookie);
use Bio::EnsEMBL::DBSQL::ExternalWrapper;
use Bio::EnsEMBL::Map::DBSQL::Map;
use Bio::EnsEMBL::ExternalData::SNPSQL::WebSNPAdaptor;
#use Bio::EnsEMBL::ExternalData::SNPSQL::FullSNPAdaptor;
use Bio::EnsEMBL::ExternalData::TCORESQL::DBAdaptor;
use Bio::EnsEMBL::ExternalData::EXONERATESQL::DBAdaptor;
use WebUserConfig;
use Time::Local;
use Digest::MD5;
use Bio::EnsEMBL::Utils::Eprof qw(eprof_start eprof_end eprof_dump);
$|=1;

###############################################################################
&eprof_start('contigview');

my $cgi		= new CGI;
my $Config      = new WebUserConfig($cgi, 'contigviewbottom');
my $Config_top  = new WebUserConfig($cgi, 'contigviewtop');
my $r           = Apache->request();
my $type	= 'gif';

#################
# CGI parameters
#################
my $geneid          = $cgi->param('geneid');    # Gene ID passed from geneview.
my $contigid        = $cgi->param('contig');    # Contig ID passed from <FORM>
my $cloneid         = $cgi->param('clone');     # Clone ID passed from <FORM>.
my $chr             = $cgi->param('chr');       # Chromosome name.
my $genes = 1;
my $features = 1;
my $repeat   = 1;
my $contigs = 1;
my $snp  = 1;
my $threshold = 100;

my $jarURL = "http://ensrv1.sanger.ac.uk/applets/scoobynew.jar";
my $code   = "scooby.gui.GFFApplet.class";

if ($cgi->param('applet')) {
    print $cgi->header . "\n";;
    print "<applet archive=\"$jarURL\" code=\"$code\" width=200 height=60>\n";
    print "<param name=file value=\"http://ensrv1.sanger.ac.uk/perl/micheleview?chr=1&vc_start=1&vc_end=10000\">\n";
    print "<param name=name value=\"chr1 1-10000\">\n";
    print "</applet><p>\n";
    exit(0);
} 

my $vc_start        = $cgi->param('vc_start');  # Start-point (abs bp) for the
                                                # top virtual contig.
my $vc_end          = $cgi->param('vc_end');    # End-point (abs bp) for the
                                                # top virtual contig.

if ($vc_start>$vc_end){							# Make sure they are the right
    ($vc_start,$vc_end)=($vc_end,$vc_start);	# way around...
}

my $TMP = "/mysql/ensembl/www/server/htdocs/gfx/image_tmp";

$r->err_header_out('ensembl_headers_out'=>1);

my $vc = undef;	    # Main window (bottom) Virtual Contig
my $sa = undef;	    # StaticGoldenPathAdaptor
my $db = undef;	    # Ensembl Database


######################################
# Get a hold of the database handles
######################################
eval {
    my $locator = &EnsWeb::get_locator();
    $db =  Bio::EnsEMBL::DBLoader->new($locator);


    if ($ENSEMBL_EMBL){
	my $emblext= Bio::EnsEMBL::DBSQL::DBAdaptor->new(       
			    -user   => $ENSEMBL_DBUSER,
			    -dbname => $ENSEMBL_EMBL,
			    -host   => $ENSEMBL_HOST,
			    -port   => $ENSEMBL_HOST_PORT,
			    ); 
                        
	my $embldb= new Bio::EnsEMBL::DBSQL::ExternalWrapper($emblext);
	$db->add_ExternalFeatureFactory($embldb);
    }

    my $snpdb = Bio::EnsEMBL::ExternalData::SNPSQL::WebSNPAdaptor->new(   
			     -dbname => $ENSEMBL_SNP,
			     -user   => $ENSEMBL_DBUSER,
			     -host   => $ENSEMBL_HOST,
			     -port   => $ENSEMBL_HOST_PORT,
			     );

    $db->add_ExternalFeatureFactory($snpdb);

    if ($ENSEMBL_MOUSE){
	my $mouse = Bio::EnsEMBL::ExternalData::EXONERATESQL::DBAdaptor->new( 
			    -user   => $ENSEMBL_DBUSER,
			    -dbname => $ENSEMBL_MOUSE,
			    -host   => $ENSEMBL_HOST,
			    -port   => $ENSEMBL_HOST_PORT,
			    ); 

	my $mouse_ext_feature_factory = $mouse->get_ExonerateAdaptor();
	$db->add_ExternalFeatureFactory($mouse_ext_feature_factory);
    }

    $db->static_golden_path_type('UCSC');
    $sa = $db->get_StaticGoldenPathAdaptor();
};

if ($@){
    print &ensembl_exception("The Ensembl database is not available. Please contact the webmaster\n",$@);
    print &EnsWeb::make_cgi_footer();
    &Apache::exit();
}


########################
# Clean-up entry params
########################
if ($chr){      
    $chr=~s/\s//g;              # Fix up the chromosome if
    unless($chr=~/^chr/){       # necessary - ensembl calls
    $chr="chr".$chr;            # take "chrn", but we need to
    }				# be able to pass in just "n"
}


###############################################################################
# SET-UP FOR VIRTUAL CONTIG
#
# We can come into contigview by reposition click, clone, contig, contig & 
#   featureposition or by # chr, start, and end.  Also by geneid.
#
# The precedence order is:
#   vc_click
#   chr, start, end
#   fpos_start, fpos_end, contig
#   Geneid
#   Contig
#   Clone.
#
#   NB fpos_start and fpos_end are offsets from a RAW contig start/end
#
###############################################################################

##############################################################################
# Find out how we've been initialised, and build the vc.  At the end of this
# conditional block we will have a virtual contig ($vc) and will have defined
# $vc_size, $chr, $vc_start, and $vc_end
##############################################################################
my $vc_size;
if (defined($chr) && defined($vc_start) && defined($vc_end)) {    
    $vc_size = $vc_end - $vc_start;
    
    ##############################
    # Going in by Chr, Start, End
    ##############################
    $vc_size=$vc_end-$vc_start;
    if ($vc_size <= 10){$vc_end = $vc_start + 10};

    eval {  
	$vc = $sa->fetch_VirtualContig_by_chr_start_end($chr, 
							$vc_start, 
							$vc_end
							);
    };
    if ($@){
	print &ensembl_exception("Cannot create virtual contig from chromosome $chr, start $vc_start, end $vc_end",$@);
    &EnsWeb::ensembl_exit();
    }
}
elsif (defined $geneid) {
    #####################
    # Going in by Gene
    #####################
    $vc_size=1000000;

    unless ($geneid=~/${ENSEMBL_PREFIX}G\d{11}/){
        my @genes=$db->gene_Obj->get_Gene_by_DBLink($geneid);
        unless($genes[0]){
            print &ensembl_exception( "Cannot find Ensembl entry for gene $geneid","No gene from \$db->gene_Obj->get_Gene_by_DBLink(\$geneid)",1);
	    &EnsWeb::ensembl_exit();
        }
        $geneid=$genes[0]->id;
    }
    eval {
        $vc = $sa->fetch_VirtualContig_by_gene($geneid, $vc_size);
    };
    if ($@){
	print &ensembl_exception("Cannot create virtual contig by this gene",$@);
	&ensembl_exit();
    }
    $chr	= $vc->_chr_name;
    $vc_start	= $vc->_global_start;
    $vc_end	= $vc->_global_end;
}
elsif (defined $contigid) {
    #####################
    # Going in by Contig
    #####################
    $vc_size=1000000;
       
    eval {
            $vc = $sa->fetch_VirtualContig_by_contig($contigid, $vc_size);
        };
    if ($@){
        print &ensembl_exception("Cannot create virtual contig by contig $contigid",$@);
        &ensembl_exit();
    }
    $chr	= $vc->_chr_name;
    $vc_start	= $vc->_global_start;
    $vc_end	= $vc->_global_end;
}
elsif (defined $cloneid){
    ####################
    # Going in by Clone
    ####################
    $vc_size    = 1000000;

    eval {
        $vc = $sa->fetch_VirtualContig_by_clone($cloneid, $vc_size);
    };
    if ($@){
		print &ensembl_exception("Sorry, Cannot display clone $cloneid. It is not part of the current 'golden path' assembly",$@,1);
		&ensembl_exit();
    }
    $chr        = $vc->_chr_name;
    $vc_start   = $vc->_global_start;
    $vc_end     = $vc->_global_end;
}
else {
    #######################################################################
    # Presumably either no params have been passed, or the chr, start, end
    # params are incomplete, or some other catastrophe has arisen.
    # Whatever, we are going no further, so let's hit it and quit.
    #######################################################################
    print &ensembl_exception("Not enough information has been provided to build a virtual contig","",1);
    &ensembl_exit();
}

my @map_contigs = $vc->_vmap->each_MapContig;

foreach my $contig (@map_contigs) {
    my $strand = "+";
    if ($contig->orientation == -1) {
      $strand = "-";
    }
    if ($contig->orientation == 1) {
    print $contig->contig->id . 
    "\tsequence\tsimilarity\t" . 
    ($contig->start) . "\t" . 
    ($contig->end) . 
    #($contig->start-$contig->rawcontig_start+1) . "\t" . 
    #($contig->start-$contig->rawcontig_start+$contig->contig->length) . 
    "\t100\t" . $strand . "\t.\t" . 
    $contig->contig->id . "\t" . 
    $contig->rawcontig_start . "\t" . 
    $contig->rawcontig_end . "\n";
    } else {
    print $contig->contig->id . "\tsequence\tsimilarity\t" . 
    #($contig->start-$contig->rawcontig_start+1) . "\t" . 
    #($contig->end-$contig->rawcontig_start+$contig->contig->length) . 
    ($contig->start) . "\t" . 
    ($contig->end) . 
    "\t100\t" . $strand. "\t.\t" . 
    $contig->contig->id . "\t" . 
    $contig->rawcontig_start . "\t" . 
    $contig->rawcontig_end . "\n";
   }

}

print STDERR "Time after contigs " . time . "\n";
my @genes    = $vc->get_all_Genes_exononly           if ($genes);
my @preds    = $vc->get_all_PredictionFeatures      if ($genes);

print STDERR "Time after genes " . time . "\n";
my @features;
if ($features && $vc->length < 5000000) {
push(@features ,$vc->get_all_SimilarityFeatures_above_score('cpg',25));
push(@features ,$vc->get_all_SimilarityFeatures_above_score('sptr',100));
push(@features ,$vc->get_all_SimilarityFeatures_above_score('tRNA',80));
push(@features ,$vc->get_all_SimilarityFeatures_above_score('unigene.seq',100));
push(@features ,$vc->get_all_SimilarityFeatures_above_score('embl_vertrna',100));
}

print STDERR "Time after features " . time . "\n";

my @snp      = $vc->get_all_ExternalFeatures       if ($snp && $vc->length < 5000000);

print STDERR "Time after snps " . time . "\n";
my @repeats  = $vc->get_all_RepeatFeatures         if ($repeat && $vc->length < 5000000);

print STDERR "Time after repeats " . time . "\n";

 GENE: foreach my $g (@genes) {
    my $genetype = "ensembl";
    if (defined($g->type)) {
     $genetype = $g->type;
    }
    print STDERR "Time before links " . time . "\n";
    my @dblinks = $g->each_DBLink;
    print STDERR "Time after links " . time . "\n";
    my $geneid = $g->id;

    LINK :foreach my $link (@dblinks ) {
         if ($link->database eq "HUGO") {
             $geneid = $link->display_id;
             $genetype = "hugo";
             last LINK;
         } elsif ($link->database eq "SWISS") {
             $geneid = $link->display_id;
             $genetype = "swiss";
         } elsif ($link->database eq "SPTREMBL") {
             $geneid = $link->display_id;
             $genetype = "sptrembl";
         }
    }

    foreach my $tran ($g->each_Transcript) {
        foreach my $exon ($tran->each_Exon) {
            my $strand = "+";
            if ($exon->strand == -1) {
                $strand = "-";
            }
            my $phase = ".";
            if (defined($exon->phase)) {
               $phase = $exon->phase;
            }
            if ($exon->seqname eq $vc->id) {
            print $exon->id . "\t$genetype\texon\t" . $exon->start . "\t" . $exon->end
 . "\t100" . "\t" . $strand . "\t" . $phase . "\t" . $geneid . "\n";
        }
	}
    }
}

print STDERR "Time after dump genes " . time . "\n";

foreach my $p (@preds) {
    foreach my $f ($p->sub_SeqFeature) {
       my $strand = "+";
       if ($f->strand == -1) {
           $strand = "-";
       }
       print $f->id . "\t" . $f->source_tag . "\texon\t" . 
             $f->start . "\t" . $f->end . "\t100\t" . $strand . "\t.\t" . $p->id . "\n";
    }
}
foreach my $f (@repeats) {
    $f->primary_tag('similarity');
    $f->source_tag('RepeatMasker');
    print $f->gffstring . "\n";
}

print STDERR "Time after dump repeats " . time . "\n";

foreach my $f (@features) {
    $f->seqname("static0");
    print $f->gffstring . "\n";
}
foreach my $snp (@snp) {
   if ($snp->primary_tag eq "Variation") {
       $snp->source_tag("Variation");
   }
   print "static0\t" . $snp->source_tag . "\t" . $snp->primary_tag . "\t" . $snp->start . "\t" . $snp->end . "\t" . $snp->score . "\t" . $snp->strand . "\t.\n";
}
print STDERR "Time after dump features " . time . "\n";


