#!/usr/local/bin/perl
########################################################################
# Ensembl SSAHA Server
# Tony Cox, Sanger Centre, November,2000.
########################################################################

package ssahaview;


BEGIN{
		unshift (@INC, "../ensembl/modules");
		unshift (@INC, "../bioperl-live");
		unshift (@INC, "../perl");
		unshift (@INC, "../modules");
		unshift (@INC, "../conf");
		unshift (@INC, "../ensembl-view");
}

use strict;
use SiteDefs;
use EnsWeb;
use CGI qw(:standard :html3);
use Sys::Hostname;
use IO::Socket;

$| = 1;
my %URL 			= (); # SAX parsing, global requiring, headache inducing HACK!
my %MATCHES 		= (); 
my $SUBJECT 		= "";
my $SERVER_URL 		= "http://www.ensembl.org";
my $SSAHA_PORT   	= 24006;
my $SSAHA_SERVER 	= 'tcs1a.sanger.ac.uk';
my $SSAHA_CONTEXT 	= 1000;
my $SSAHA_CUTOFF 	= 14;
my $MATCH_COUNT	= 0;
my $RUNTYPE		= $0;

my $q 			= new CGI;
print $q->header();

#print STDERR "PARAMS:\n";
#print STDERR join("\n", $q->param());

# HTML header
if( defined( $ENV{'MOD_PERL'} ) ){
        my $r = Apache->request();
        $r->err_header_out('ensembl_headers_out'=>1);
}


my $llama=<<EOL;
>ENST00000062943
ATGTGCCGCGTTTGCACCAAAACCGTGAAGAAGGCGGCCCGGGTCATCAT
AGAAAAGTACTACACACGCCTGGGCAACGACTTCCACACGAACAAGCGCG
TGTGCAAGGAGATCGCCATTATCCCCAGCAAGAAGCTCCGCAACAAGATA
GCAGGCTATGTCACGCATCTGATGAAATGGATTCAGAGAGGCCCAGTAAG
AGGTATCTCCATCAAGCTGCAGGAGGAGGAGAGAGAAAGGAGAGACAATT
ATGTTCCTGAGGTCTCAGCCTTGGATCAGGAGATAATTGAAGTAGATCCT
GACACTAAGGAAATGCTGAAGCTTTTGGACTTCGGCAGTCTGTCCAACCT
GCAGCCTCATCTTTCCTGCGGCCTGAGTGTCTGGCTGGCTTTTTTCTTTC
TCGGGTGTTCTGCTAACAAGGGCCCCTTTAGTGACCAGATCCTGGTTTTG
ATTGGCACCAATGACGGCAAGAGAGAGTCCAACAGGATCCTACCAGTGAG
CAGTGACCCAGCAGCTCAGGCCAGCTGCATGGGAGCAG
EOL
my $camel=<<EOC;
>CAMEL
LNLLLAHWFSMKTLSCFILNSSLLFCSHRLLFFF
CHWLCSVLLKLPCPAFLYFRHWGSVAQWLRARAL
EOC

if (defined $q->param('sequence')){
	if ($q->param('type') eq "ssaha"){
		## we are doing a SSAHA search
		print &EnsWeb::make_cgi_header();
		print EnsWeb::print_form('', "ssaha");
		&do_ssaha_search();
	} 
	else{
		## we panic...
	}
}
else{
	## we just echo the correct form back
	if ($RUNTYPE =~ /ssahaview/){
		print &EnsWeb::make_cgi_header();
		print EnsWeb::print_form('', "ssaha");
		print &ssahaform($SUBJECT);
	} 
}

print &EnsWeb::make_cgi_footer();

1;

#######################################################################################
sub do_ssaha_search {

	if ($q->param('datasource') eq 'latestgp'){
		$SSAHA_PORT   = 24006;
		$SSAHA_SERVER = 'tcs1a.sanger.ac.uk';
		$SUBJECT 	  = "Latest Ensembl \"golden path\"";
	}
	if ($q->param('datasource') eq 'TraceRepos'){
		$SSAHA_PORT   = 21006;
		$SSAHA_SERVER = 'tcs1a.sanger.ac.uk';
		$SUBJECT 	  = "Ensembl Trace Repository";
	}
	if (defined $q->param('ssaha_context')){
		$SSAHA_CONTEXT = $q->param('ssaha_context');
		$SSAHA_CONTEXT *=2;
	}
	if (defined $q->param('ssaha_cutoff')){
		$SSAHA_CUTOFF = $q->param('ssaha_cutoff');
	}
	my $inseq = undef;
	if (defined $q->param('sequence') && $q->param('sequence') ne ""){
		$inseq = $q->param('sequence');
		$inseq = $llama if ($inseq eq 'llama');
	} elsif (defined $q->param('uploadfile') && $q->param('uploadfile') ne ""){
		local $^W=0;
		my $fh = $q->upload('uploadfile');
		while(<$fh>){
			$inseq .= $_;
		}
	} else {
		print qq(<H3>Ensembl SSAHA Server: $SUBJECT</H3>);
		print qq(<P>No FASTA sequence supplied!</P><BR>);
		print &EnsWeb::make_cgi_footer();
		#&ensembl_exit;			
	}

	## start the web page
	print qq(<H3>Ensembl SSAHA Server: $SUBJECT</H3>);
	print qq(<H4>Graphical match will be displayed centred in $SSAHA_CONTEXT base pairs. );
	print qq(Minimum length of match: $SSAHA_CUTOFF base pairs.</H4>);


	my @seqs = split(">", $inseq);
	foreach my $seq (@seqs){
		if ($seq =~ /\s*(.*)\n([\w\W]*)/){	# sanitize the sequence
			my $name = ">".$1;				# put back the ">" we split the sequence on....
			my $s = $2;
			$s =~ s/[\s\W\d]//ig;
			$seq = "$name\n$s\n";
			#print STDERR "SSAHA SEQ: \n$seq\n";
		} else {
			my $name = ">USER_QUERY\n";
			$seq =~ s/[\s\W\d]//ig;
			$seq = $name.$seq."\n";
			#print STDERR "SSAHA SEQ: \n$seq\n";
		}
        
		## get a server socket and send the query data
        my $socket;
		unless ($socket = &get_ssaha_server_socket($SSAHA_PORT, $SSAHA_SERVER)){
		    print qq(<H3>Ensembl SSAHA Server: $SUBJECT</H3>);
		    print qq(<P>Cannot connect to SSAHA server!</P><BR>);
            print &EnsWeb::make_cgi_footer();
            &ensembl_exit;
        }
        
        
		print $socket "$seq\xFF";

		&SSAHAparse($socket);
		close($socket);

	}

return();

}

#######################################################################################
sub SSAHAparse {
	my $socket = shift;
	my $c = 0;
	# ml2C-b109a03.p1c        AC000123.00001  256     269     +       22877   22890
	my ($hit, $contig, $hitstart, $hitend, $strand, $rcstart, $rcend);
	my $context = $q->param('ssaha_context');
	my $len;

	if($q->param('datasource') =~ /gp/){ # we are doing human
		while(<$socket>){
			next if(/[<>]/);
			($hit, $contig, $hitstart, $hitend, $strand, $rcstart, $rcend) = split ("\t", $_);
			$len = $rcend-$rcstart+1;
			next if ($len < $q->param('ssaha_cutoff'));


			print qq(<B>$hit bases $hitstart-$hitend matched contig $contig [${strand}ve strand] at position 
			<A TARGET="new" HREF="/perl/contigview?contig=$contig&fpos_start=$rcstart&fpos_end=$rcend&fpos_context=$context">$rcstart-$rcend</A> ($len bp)</B><BR>);
			$c++;
		}

	print qq(<BR><BR>);

}

}

#######################################################################################
sub get_ssaha_server_socket {

	my ($port, $server)		= @_;
	my $s 		  			= undef;
	my $DEBUG 	  			= 1;

		$s = IO::Socket::INET->new(PeerAddr => $server,
               		  PeerPort => $port,
               		  Proto    => 'tcp',
               		  Type     => SOCK_STREAM,
               		  Timeout  => 10,
               		  );
		if($s) {
			$s->autoflush(1);
		} 
		else {
			print STDERR "Warning: ssaha server on $server unreachable.\n" if ($DEBUG);
			return undef;
		}

	print STDERR "Connected to $server\n" if ($DEBUG);
	return $s;

}


##########################################################################################
sub ssahaform {

my ($data) = @_;

return<<END;

<SCRIPT LANGUAGE="JavaScript">
<!-- hide from old browsers

// **********************************************************************
// FUNCTION: checkSequence( sequence, search_type )
//
// Checks to see if the submitted sequence agrees with the requested
// search type eg If it is a "Prot vs Prot" query does the sequence
// contain less then 85% "ACGTNX"
// The FORM will only be submitted if this sequence returns TRUE
// **********************************************************************

  function checkSequence( sequence, search_type )
  {

    var i = 0;
    var count = 0;
    var residue = "";
    var percentage;
    var def_line_end;
    var sequence_to_check;
    var spaces = 0;
    var bases = "ACGTNX";
    var base_found;
    var space_or_digits = "01234 56789"
    var space_or_digit_found;

// **********************************************************************
// i                    loop variable
// count                holds the cumulative number of "ACGTNX"
// residue              single residue in the sequence 
// percentage           the % of the sequence that is "ACGTNX"
// def_line_end         position of the end of the definition line
// sequence_to_check    sequence without the definition line
// spaces               number of spaces or digits found
// bases                valid list of bases
// base_found           was a valid base found?
// space_or_digits      invalid chars 
// space_or_digit_found was an invalid char found?
// **********************************************************************

    // Check to see if it is FASTA
    if ( sequence[0] == '>' )
    {
      def_line_end = sequence.indexOf('\\n');
      sequence_to_check = sequence.substring( ( def_line_end + 1 ), sequence.length );
    }
    else // Raw Text
    {
      sequence_to_check = sequence;
    }

    sequence_to_check = sequence_to_check.toUpperCase();

    for ( var i = 0; i < sequence_to_check.length ; i++ )
    {
      residue = sequence_to_check.charAt(i);

      // Find all the ACGTNX chars - valid bases
      // If it is not found the return value is -1
      base_found = bases.indexOf( residue );

      if ( base_found >= 0 )
      {
        count++;
      } 

      // Find all the 1234 56789 chars 
      space_or_digit_found = space_or_digits.indexOf( residue )
      if ( space_or_digit_found >= 0 )
      {
        spaces++;
      }
    }

    percentage = ( count / ( sequence_to_check.length - spaces ) ) * 100;

    // Ask the user to confirm that the sequence they supplied is correct
    // window.confirm returns a BOOLEAN value which is used as the FUNCTION
    // return value

    if ( ( search_type == "wublastp" || search_type == "wutblastn" ) && ( percentage > 85 ) )
    {
      return window.confirm( "A search requiring a PROTEIN query sequence has\\nbeen selected, however the sequence looks to be DNA.\\n\\nPlease confirm that it is a PROTEIN sequence." );
    }
    else if ( ( search_type == "wublastn" || search_type == "wublastx" ) && ( percentage < 85 ) )
    {
      return window.confirm( "A search requiring a DNA query sequence has\\nbeen selected , however the sequence looks to be PROTEIN.\\n\\nPlease confirm that it is a DNA sequence." );
    }
    else
    {
      return true;
    }

  } // END checkSequence




// **********************************************************************
// FUNCTION: makeArray
//
// This is the constructor for objects of type makeArray it builds
// objects that are arrays
// **********************************************************************

  function makeArray()
  {
    // *.arguments is a standard method which each object has and contains the
    // arguments passed to it.

    var args = makeArray.arguments;

    for ( var i = 0; i < args.length; i++ )
    {
      this[i] = args[i];
    }
    this.length = args.length;

  } // END makeArray



// end script hiding -->
</SCRIPT>

<A HREF="http://www.digital.com"><IMG ALIGN="RIGHT" src="/gfx/alpha.gif" BORDER="0"></A>
<h2 align="center">Ensembl SSAHA Server</h2>

<center>
<table cellspacing="0" cellpadding="0" border="0" class="yellow1" width="450">


  <tr bgcolor="#ffffff"><td colspan="7"><img src="/gfx/blank.gif" width="1" height="20" alt=""></td></tr>

  <form NAME="ssaha_form" METHOD="POST" enctype='multipart/form-data'
  ACTION="/perl/ssahaview">
	<tr>
		<td rowspan="31" class="grey1"><img src="/gfx/blank.gif" width="1" height="1"></td>
		<td colspan="5" class="grey1"><img src="/gfx/blank.gif" width="300" height="1"></td>
		<td rowspan="31" class="grey1"><img src="/gfx/blank.gif" width="1" height="1"></td>
	</tr>
	<tr>
		<td colspan="5"><img src="/gfx/blank.gif" width="1" height="11"></td>
	</tr>
	<tr valign="top">
		<td><img src="/gfx/blank.gif" width="16" height="22"></td>
  		<td><small>Paste your <B>DNA</B> sequence(s) here in FASTA or plain text format.</small>
                <BR><BR>
                <center>
		  <INPUT TYPE="submit" VALUE="Search" class="red2">&nbsp;&nbsp;<input type="reset">
                </center></td>
		<td><img src="/gfx/blank.gif" width="16" height="22"></td>
  		<td align="left"><TEXTAREA NAME="sequence" ROWS="6" COLS="35"></TEXTAREA></td>
		<td><img src="/gfx/blank.gif" width="16" height="22"></td>
	</tr>
	<tr>
		<td><img src="/gfx/blank.gif" width="16" height="22"></td>
	  	<td><small><B>OR</B> select the sequence file you wish to search</small></td>
	  	<td><img src="/gfx/blank.gif" width="16" height="22"></td>
		<td align="left"><INPUT TYPE="hidden" NAME="type" VALUE="ssaha" checked>
         <INPUT TYPE="FILE" NAME="uploadfile" SIZE="30"></td>
		<td><img src="/gfx/blank.gif" width="16" height="22"></td>
	</tr>
	<tr>
		<td colspan="5"><img src="/gfx/blank.gif" width="1" height="11"></td>
	</tr>
	<tr>
		<td><img src="/gfx/blank.gif" width="16" height="22"></td>
	  	<!--td colspan="3" align="center" class="yellow2"><span class="h4">RESULTS</span></td-->
	  	<td colspan="3" align="center" class="yellow2"><span class="h4">SSAHA OPTIONS</span>&nbsp;&nbsp;&nbsp;
		  <a href="javascript:void(window.open('/perl/helpview?se=1&kw=ssahaview','helpview','width=400,height=500,resizable,scrollbars'));"><img src="/gfx/helpview/help.gif" border="0" align="absmiddle" alt="Help"></A> 
		</td>
		<td><img src="/gfx/blank.gif" width="16" height="22"></td>
	</tr>
	<tr>
		<td colspan="5"><img src="/gfx/blank.gif" width="1" height="11"></td>
	</tr>
	<tr valign="middle">
		<td><img src="/gfx/blank.gif" width="16" height="22"></td>
		<td>
		  Choose no. bp displayed either side of SSAHA matches
		  in contig displays (<B>human data only</B>).
		</td>
	  	<td><img src="/gfx/blank.gif" width="16" height="22"></td>
	  	<td align="left">
		  <!-- SSAHA context<br /> -->
		  <select name="ssaha_context">
			<option value="0">none</option>
			<option value="500">0.5Kbp</option>
			<option value="1000">1Kbp</option>
			<option value="2000">2Kbp</option>
			<option value="5000">5Kbp</option>
			<option value="10000" selected>10Kbp</option>
			<option value="50000">50Kbp</option>
			<option value="100000">100Kbp</option>
		  </select>&nbsp;&nbsp;&nbsp;(SSAHA context)
		</td>


		<td><img src="/gfx/blank.gif" width="16" height="22"></td>

	</tr>
	<tr>
		<td colspan="5"><img src="/gfx/blank.gif" width="1" height="11"></td>
	</tr>
	<tr valign="left">
		<td><img src="/gfx/blank.gif" width="16" height="22"></td>
		<td>
		  Only display sequence matches longer than  
		</td>
	  	<td><img src="/gfx/blank.gif" width="16" height="22"></td>
	  	<td align="left">
		  <!-- SSAHA cutoff<br /> -->
		  <select name="ssaha_cutoff">
			<option value="14">14bp</option>
			<option value="28" selected>28bp</option>
			<option value="42">42bp</option>
			<option value="56">56bp</option>
			<option value="70">70bp</option>
			<option value="140">140bp</option>
		  </select>&nbsp;&nbsp;&nbsp;(minimum match length)
		</td>
		<td><img src="/gfx/blank.gif" width="16" height="22"></td>

	</tr>
	<tr>
		<td colspan="5"><img src="/gfx/blank.gif" width="1" height="11"></td>
	</tr>


	<tr valign="middle">
		<td><img src="/gfx/blank.gif" width="16" height="22"></td>
		<td>
		  Select the data source for the SSAHA search. 
		</td>
	  	<td><img src="/gfx/blank.gif" width="16" height="22"></td>
	  	<td align="left">
		  <!-- SSAHA data source<br /> -->
		  <select name="datasource">
			<option value="latestgp" selected>Latest Ensembl "golden path"</option>
			<!-- option value="TraceRepos">Ensembl Trace Repository</option -->
		  </select>
		</td>
		<td><img src="/gfx/blank.gif" width="16" height="22"></td>

		<!--td><img src="/gfx/blank.gif" width="16" height="22"></td>
	  	<td><INPUT TYPE="radio" NAME="return" VALUE="HTML" checked> Browser</td>
	  	<td><img src="/gfx/blank.gif" width="16" height="22"></td>
		<td nowrap><b>OR</b>&nbsp;<INPUT TYPE="radio" NAME="return" VALUE="EMAIL"> Email to <INPUT NAME="address" SIZE="30"></td>
		<td><img src="/gfx/blank.gif" width="16" height="22"></td-->
	</tr>
	<tr>
		<td colspan="5"><img src="/gfx/blank.gif" width="1" height="11"></td>
	</tr>
	<tr>
		<td colspan="5" class="grey1"><img src="/gfx/blank.gif" width="300" height="1"></td>
	</tr>
  </form>
</table>
</center>
<BR>

END

}

###############################################################
sub determine_input {

	if (defined $q->param('sequence') && $q->param('sequence') eq "llama"){
		return($llama);
	}
	if (defined $q->param('sequence') && $q->param('sequence') eq "camel"){
		return($camel);	
	}
    if ($q->param('uploadfile') ne "" && $q->param('sequence') ne "") {
	    print "<B>Warning: ambiguous sequence source (both file and text box contain data).<BR>";
	    print "Assuming you want to use the sequence in the box.</B><P>";
		#print STDERR "1\n";
		return ($q->param('sequence'));
	}
    if ($q->param('uploadfile') eq "" && $q->param('sequence') ne "") {
		#print STDERR "2\n";
		return ($q->param('sequence'));
	}
    if ($q->param('uploadfile') ne "" && $q->param('sequence') eq "") {
		local $^W = 0;
		my $inseq = "";
		my $fh = $q->upload('uploadfile');
		while(<$fh>){
			$inseq .= $_;
		}
		#print STDERR "3\n";
		return ("$inseq");
	}
    if ($q->param('uploadfile') eq "" && $q->param('sequence') eq "") {
	    print "<B>You have not supplied a sequence to search against!<BR>";
		print &EnsWeb::make_cgi_footer();
		exit;
	}
} # end of sub
