#!/usr/local/bin/perl 

###############################################################################
#
#	Name:		dumpview
#
#	Description:	mod_perl script to dump sequence data from a vc, clone,
#			contig, gene or genscan peptide.
#
#	History:	2000-11-09	jws - original version	
#				2001-04-01  avc - added support for dumping traces
#
###############################################################################

package dumpview;

use SiteDefs;
use EnsWeb;
use CGI qw(:standard :html3);
use Bio::EnsEMBL::DBLoader;
use Bio::EnsEMBL::DBSQL::Feature_Obj;
use Bio::EnsEMBL::ExternalData::SNPSQL::FullSNPAdaptor;
use Bio::EnsEMBL::ExternalData::TCORESQL::DBAdaptor;
use Bio::EnsEMBL::ExternalData::EXONERATESQL::DBAdaptor;
use Bio::EnsEMBL::ExternalData::GMAPAdaptor;
use Bio::EnsEMBL::EMBL_Dump;
use Bio::SeqIO;
use Text::Wrap;
use IO::Socket;
#use GramenePage;

use strict;
#	use diagnostics;

##################
# Read parameters
##################

my $q		= new CGI;
my $type	= $q->param('type');	    # gene|contig|clone|vc|genscan|trace ?
my $id		= $q->param('id');	    # if gene|contig|clone, then id
my $context	= $q->param('context')||0;  # bp either side of gene to include
my $vc_chr	= $q->param('chr');	    # Chromosome for vc
my $vc_start	= $q->param('start');	    # start bp for vc
my $vc_end	= $q->param('end');	    # end bp for vc
my $format	= $q->param('format')||'fasta';	# embl or genbank
my @include	= $q->param('include');	    # Features to include in the dump
my $noclip	= $q->param('noclip');	    # Features to include in the dump
#########################################
# fix the passed chromosome if necessary
#########################################
#if ($vc_chr){
#    unless($vc_chr=~/^chr/){
#    $vc_chr="chr".$vc_chr;
#    }
#}

#############################
# Variable declaration thang
#############################
my $db;		# ensembl database handle
my $stadp;	# static golden path adaptor
my $seqout;	# sequence output object
my $vc;		# virtual contig object.  Both golden, and static.

###############
# Print Header
###############
my $r = Apache->request();
print header();
$r->err_header_out('ensembl_headers_out'=>1);

# Don't do this because then they can't cut and paste the fasta sequence cleanly:
#my ($head,$onload,$js_divs)=&EnsWeb::cgi_header_info(('initfocus'=>1));
#my $grpg=GramenePage->new($r);
#print $head,$grpg->start_body(  -ensembl=>1, -bodyattr=>$onload, -bodyfirst=>$js_divs  );

print &EnsWeb::make_cgi_header(('initfocus'=>1));


#######################
# Connect to databases
#######################
eval{
    my $locator = &EnsWeb::get_locator();
    $db =  Bio::EnsEMBL::DBLoader->new($locator);
    $db->static_golden_path_type($ENSEMBL_GOLD);   # set the static golden path type
    $stadp = $db->get_StaticGoldenPathAdaptor();

	if ($ENSEMBL_GMAP){
	    my $gmapdb= Bio::EnsEMBL::ExternalData::GMAPAdaptor->new(       
				-driver => $ENSEMBL_DRIVER,
				-user   => $ENSEMBL_DBUSER,
				-pass   => $ENSEMBL_DBPASS,
				-dbname => $ENSEMBL_GMAP,
				-host   => $ENSEMBL_HOST,
				-port   => $ENSEMBL_HOST_PORT,
				); 
			    
	    $db->add_ExternalFeatureFactory($gmapdb);
	}
	if ($ENSEMBL_SNP){
    my $snpdb = Bio::EnsEMBL::ExternalData::SNPSQL::FullSNPAdaptor->new(
                                                  -dbname => $ENSEMBL_SNP,
                                                  -user   => $ENSEMBL_DBUSER,
						  -driver => $ENSEMBL_DRIVER,
                                                  -host   => $ENSEMBL_HOST,
						  -pass   => $ENSEMBL_DBPASS,
                                                  -port   => $ENSEMBL_HOST_PORT,
                                                    );
    $db->add_ExternalFeatureFactory($snpdb); 
	}
	if ($ENSEMBL_MOUSE){
	my $mouse = Bio::EnsEMBL::ExternalData::EXONERATESQL::DBAdaptor->new( 
				-user   => $ENSEMBL_DBUSER,
				-dbname => $ENSEMBL_MOUSE,
			        -driver => $ENSEMBL_DRIVER,
				-host   => $ENSEMBL_HOST,
			        -pass   => $ENSEMBL_DBPASS,
				-port   => $ENSEMBL_HOST_PORT,
				); 

	my $mouse_ext_feature_factory = $mouse->get_ExonerateAdaptor();
	$db->add_ExternalFeatureFactory($mouse_ext_feature_factory);
	}
};


if ($@){
    print &ensembl_exception("The Ensembl database is currently unavailable.",$@);
    &ensembl_exit();
}


########################################################
# Find out what we're doing, and get the appropriate vc
########################################################

##################################################################
# Genscan, cdna, and peptide types are special cases - not via VC
# Handle these first.
##################################################################
if ($type =~ /genscan/i){
    ################
    # Print Heading
    ################
    print "<h3>Genscan volatile id $id</h3>\n";
    print "<pre>\n";

    ###########################################################################
    # Get the genscan feature and do some munging to get all the bits in phase
    ###########################################################################
    eval {
	my $feature_obj=Bio::EnsEMBL::DBSQL::Feature_Obj->new($db);
	my $transcript=$feature_obj->get_PredictionFeature_as_Transcript($id);
	my $seq = $transcript->dna_seq;
	my $found;
	my $seq0    = $seq->translate('*','X',0);
	my $seqstr0 = $seq0->seq; chop($seqstr0);

	if ($seqstr0 !~ /\*/) {
	    $found = $seqstr0;
	}

	my $seq1    = $seq->translate('*','X',2);
	my $seqstr1 = $seq1->seq; chop($seqstr1);

	if ($seqstr1 !~ /\*/) {
	  $found = $seqstr1;
	}

	my $seq2    = $seq->translate('*','X',1);
	my $seqstr2 = $seq2->seq; chop($seqstr2);

	if ($seqstr2 !~ /\*/) {
	  $found = $seqstr2;
	}

	if (defined($found) && (length($found) > 0)) {
	    $Text::Wrap::columns=60;
	    print wrap('','',">" .$transcript->id . "$id\n$found\n");
	} else {
	    $transcript->warn("Couldn't translate " . $transcript->id . "\n");
	}
    };
    if ($@){
	print &ensembl_exception("Genscan peptide for id $id could not be retrieved.",$@);
	&ensembl_exit();
    }
}
elsif ($type =~ /cdna/i){
    ################
    # Print Heading
    ################
    print "<h3>cDNA sequence for transcript $id</h3>\n";
    print "<pre class=dna>\n";

    ###############
    # Get cDNA seq
    ###############
    my ($transcript,$seq);
    eval {
        my $gene = $db->gene_Obj->get_Gene_by_Transcript_id($id);
        foreach my $tempt ( $gene->each_Transcript ) {
            if( $tempt->id eq $id ) {
                $transcript = $tempt;
                last;
            }
	}
        if( !defined $transcript ) {
           &ensembl_exception("Unable to find transcript");
        }
	$seq = $transcript->dna_seq->seq;
	if (defined($seq) && (length($seq) > 0)) {
	    $Text::Wrap::columns=60;
	    print wrap('','',">" .$transcript->id . "\n$seq\n");
	} else {
	    $transcript->warn("Couldn't translate " . $transcript->id . "\n");
	}
    };
    if ($@){
	print &ensembl_exception("cDNA sequence for transcript $id could not be retrieved.",$@);
	&ensembl_exit();
    }
    
}
elsif ($type =~ /peptide/i){
    ################
    # Print Heading
    ################
    print "<h3>Peptide sequence for transcript $id</h3>\n";
    print "<pre class=peptide>\n";

    ##################
    # Get peptide seq
    ##################
    my ($transcript,$seq);

    eval {
        my $gene = $db->gene_Obj->get_Gene_by_Transcript_id($id);
        foreach my $tempt ( $gene->each_Transcript ) {
            if( $tempt->id eq $id ) {
                $transcript = $tempt;
                last;
	    }
        }
        if( !defined $transcript ) {
           &ensembl_exception("Unable to find transcript");
        }
	
	$seq = $transcript->translate->seq;
	if (defined($seq) && (length($seq) > 0)) {
	    $Text::Wrap::columns=60;
	    print wrap('','',">" .$transcript->id . "\n$seq\n");
	} else {
	    $transcript->warn("Couldn't translate " . $transcript->id . "\n");
	}
    };
    if ($@){
	print &ensembl_exception("Peptide sequence for transcript $id could not be retrieved.",$@);
	&ensembl_exit();
    }
}
else {
    ##########################################################################
    # Not genscan - the rest work by building an appropriate vc and dumping it
    ##########################################################################
    if ($type =~ /region/i){
		eval {
	    	$vc = $stadp->fetch_VirtualContig_by_chr_start_end( $vc_chr,
									$vc_start, 
									$vc_end,
							    	);
		};
		if ($@){
	    	print &ensembl_exception("Cannot create virtual contig from $vc_chr:$vc_start to $vc_end",$@);
	    	&ensembl_exit();
		}
		$id = "$vc_chr bases $vc_start to $vc_end";
    }
    elsif ($type =~ /trace/i){
		my @servers = (qw(plato.sanger.ac.uk infosrv1.sanger.ac.uk));
		my $server;
		my $port   = 22100;
		my $s;
		foreach (@servers){
        		$s = IO::Socket::INET->new(PeerAddr => $_,
                        		   PeerPort => $port,
                        		   Proto    => 'tcp',
                        		   Type     => SOCK_STREAM,
                        		   Timeout  => 10,
                        		   );
        		if($s) {
            		$s->autoflush(1);
                	$server = $_;
            		last;
        		} 
		}
        unless ($s) {
            print  STDERR "Sorry, the trace server on $server is currently unreachable.\n";
            print  "Sorry, the trace server on $server is currently unreachable.\n";
	    	&ensembl_exit();
        }

		my $id = "$vc_chr bases $vc_start to $vc_end";		
    	print "<h3>Trace dump for $id</h3>\n";
		unless($noclip == 1){
			my $url = &CGI::url(-query=>1) . "&noclip=1";
			print qq(<P><B>Note that sequence quality clipping has been applied to all traces</B><BR>
					</P>
			);
			#[<A HREF="$url">View unclipped sequences</A>]
		} else {
			print qq(<P><B>Note that trace sequences have not been quality clipped</B><BR>	);	
		}
    	print "<pre class=dna>\n";
		eval {
	    	$vc = $stadp->fetch_VirtualContig_by_chr_start_end( $vc_chr,
									$vc_start, 
									$vc_end,
							    	);
		};
		if ($@){
	    	print &ensembl_exception("Cannot create virtual contig from $vc_chr:$vc_start to $vc_end",$@);
	    	&ensembl_exit();
		}
		# need to get traces and export them as fasta...
    	my @xf=$vc->get_all_ExternalFeatures();
		my @trace;
		my $i = 0;
		foreach my $f (@xf){
			if ($f->isa("Bio::EnsEMBL::FeaturePair")) {
				# An Exonerate trace match
				if ($f->analysis->dbID == 7) { # its an exonerate mouse trace match
					push (@trace, $f->id());
					$i++;
				}
			}
			last if($i >= 1000);	# put a limit on trace dumps;
		}

		if($noclip == 1){
			print $s "-t -c ", join(" ", @trace), "\n";
		} else {
			print $s "-t ", join(" ", @trace), "\n";		
		}
		while(<$s>) {
				if (/no match/i){
					print STDERR ".";
					next;
				}
        		print $_;
		}
		print STDERR "\n";
    	
		print "</pre>\n";
		if ($1 < 1000){
			print "<P>Dumped ", $i, " traces<P>\n"; 
		} else {
			print "<P><B>Cannot dump more than 1000 traces at a time. Please zoom in and re-dump!</B><P>\n";
		}
	    &ensembl_exit();
    }
    elsif ($type =~ /clone/i){
		$id=~s/\.\d\d\d//;	#fix it if they gave us a clone id
		eval {
	    	$vc = $stadp->fetch_VirtualContig_of_clone($id,$context);
		};
		if ($@){
	    	print &ensembl_exception("Cannot create virtual contig from clone with accession=$id",$@);
	    	&ensembl_exit();
		}
    }
    elsif ($type =~ /contig/i){
		$id .= ".001" unless $id =~ /\./;
		eval {
	    	$vc = $stadp->fetch_VirtualContig_of_contig($id,$context);
		};
		if ($@){
	    	print &ensembl_exception("Cannot create contig $id",$@);
	    	&ensembl_exit();
		}
    }
    elsif ($type =~ /gene/i){
		unless ($id=~/${ENSEMBL_PREFIXG}\d+$/){   # ${ENSEMBL_PREFIX}G\d{11}
	    	my @genes=$db->gene_Obj->get_Gene_by_DBLink($id);
	    	unless($genes[0]){
			print &ensembl_exception("Cannot find Ensembl entry for gene $id","",1);
			&ensembl_exit();
	    	}
	    	$id=$genes[0]->id;
		}	
		eval {
	    	$vc = $stadp->fetch_VirtualContig_of_gene($id,$context);
		};
		if ($@){
	    	print &ensembl_exception("Cannot create virtual contig from gene $id",$@);
	    	&ensembl_exit();
		}
    }
    else {
	print &ensembl_exception("Unknown object type $type requested.","",1);
	&ensembl_exit();
    }

    ########################
    # Set-up dumping object
    ########################
    if ($format =~ /embl|genbank/i) {
	$seqout = Bio::SeqIO->new( '-format' => $format, -fh => \*STDOUT);
	&Bio::EnsEMBL::EMBL_Dump::ensembl_annseq_output($seqout);
    } 
    elsif ($format =~ /fasta/i) {
	$seqout = Bio::SeqIO->new( '-format' => 'Fasta' , -fh => \*STDOUT);
    }
    
    ################
    # Print Heading
    ################
    print "<h3>Gramene ",($type eq 'gene'?'': "\u$type")," $id in \U$format\E format</h3>\n";
    print "<pre>\n";
    
    
    #######
    # Dump
    #######
    $vc->id($id);

    ###############################################
    # build a list of things to skip and skip them
    ###############################################
    my @skippablefeatures = ("similarity","repeat","external","contig","gene");
    my %dontskip;
    @dontskip{@include} = ();

    foreach my $skippy(@skippablefeatures){
	$vc->skip_SeqFeature("$skippy",1) unless exists $dontskip{$skippy};
    }

    if ( $format =~ /fasta/ ) {
	$seqout->write_seq($vc->primary_seq());
    } 
    elsif ( $format =~ /embl/ ) {
	&Bio::EnsEMBL::EMBL_Dump::add_ensembl_comments($vc);
	$seqout->write_seq($vc);
    } 
    elsif ( $format =~ /genbank/ ) {
	&Bio::EnsEMBL::EMBL_Dump::add_ensembl_comments($vc);
	$seqout->write_seq($vc);
    }
    else {
	print &ensembl_exception("Unknown format $format requested.","");
	&ensembl_exit();
    }
}
print "</pre>";
print<<EOS;
<center>
    <B>[ <A HREF="javascript:window.history.go(-1);">BACK</A> | <A HREF="javascript:window.close();">CLOSE</A>]</B>
</CENTER>
<br><br>
EOS
#print $grpg->end_body;
print EnsWeb::make_cgi_footer();
1;

###############################################################################
################################# END OF SCRIPT ###############################
###############################################################################
