#!/usr/local/bin/perl
###############################################################################
#
# SCRIPT      : 
# 
# DESCRIPTION : Web blast submitting and parsing system
# 
# AUTHOR      : James Stalker (jws@sanger.ac.uk)
# 
# CREATED     : August 2000
#
# CHANGE      : 2001-01-03  jws:migrated to mod_perl
#
###############################################################################

package blastview;
use EnsWeb;
use SiteDefs;
use CGI qw(standard);
use Sys::Hostname;
use strict;
#use Bio::EnsEMBL::Utils::Eprof qw(eprof_start eprof_end eprof_dump);

###############################################################################
################################ GLOBALS ######################################
###############################################################################

$ENV{'BLASTMAT'}    ="/data/blastdb";               # blast matrix location
$ENV{'BLASTFILTER'} ="/usr/local/pubseq/bin";       # set up the filtering
$ENV{'BLASTDB'}     ="/data/blastdb";               # where to find the Dbs

my $VERSION         = "0.2";
my $PROCESSORS      = 4;            # No. of processors to use on plato (max=6)
my $HALT            = 0;            # Set to 1 to deactivate blast
my $DEBUG           = 0;            # print debugging information
my $BLASTQUEUE      = "offlineblastq"; # default batch queue
my $RM_BINARY       = "/nfs/disk100/humpub/scripts/RepeatMasker";
my $BLAST_HOME      = "/nfs/WWW/data/blastqueue";       # output blast files 
my $GIF_HOME        = "${ENSEMBL_SERVERROOT}/htdocs/gfx/image_tmp";# output gif files
my $GIF_HOME_URL    = "/gfx/image_tmp";                 # output gif url
my $BLAST_HEADER_TXT;

###############################################################################
################################### MAIN ######################################
###############################################################################

# unbuffer output
$|=1;

#&eprof_start('entire');
#&eprof_start('headers');
my %parameters=&parse_params;

print CGI::header();
print &EnsWeb::make_cgi_header(('initfocus'=>1, 'menus'=>1));
print &EnsWeb::print_form ( "", "blast" );

#&eprof_end('headers');
if (-e "/nfs/WWW/NOLSF"){
    print "<br><br><br><br><center><h2>The BLAST service is temporarily unavailable.  <br>Please try again later.<h2></center><br><br><br><br>";
    print &EnsWeb::make_cgi_footer();
    &EnsWeb::ensembl_exit; 
}

if ($parameters{sequence}){
    $parameters{id}=submit_blast(\%parameters);
    select STDOUT;
    $|=1;
}

if ($parameters{id}) {
    if (-e "$BLAST_HOME/$parameters{id}"){
	#&eprof_start('render_total');
        &render_blast($parameters{id}, $parameters{format}, $parameters{hit});
	#&eprof_end('render_total');
    }
    else {
        &blast_not_ready($parameters{id},$parameters{format},$parameters{sequence});
    }
}
else {
    print &show_blank_form;
}

print &EnsWeb::make_cgi_footer();

#&eprof_end('entire');
#&eprof_dump(\*STDERR);

1;

###############################################################################
#   Exit stage left, pursued by bear...
###############################################################################


###############################################################################
################################## SUBS #######################################
###############################################################################

################################################################
# Parse the form parameters, and return them in a hash
################################################################
sub parse_params{
    
    my %params={};

    ########################################################
    # because I got tired of typing in test sequences...
    ########################################################
    my $llama=<<EOL;
>LLAMA
aatttaaaataaagcaggacagtgttttcacagagaaccagtgagcca
atgagacaggtaagt
EOL

    my $camel=<<EOC;
>CAMEL
LNLLLAHWFSMKTLSCFILNSSLLFCSHRLLFFF
CHWLCSVLLKLPCPAFLYFRHWGSVAQWLRARAL
EOC

    my $ram=<<EOR;
>RAM
METTLPLPFLRRVSVPPGLNDIKEGLSREEVSCLGCVFFEVKPQTLEKILRFLKRHNV
EFEPYFDVTALESIDDIITLLDAGARKCLSRPSSWPTSPHMVPRCPHCHWKQRCFAFL
RHRERPFALRLRSDCLRGCTVSGGGQRQENYPLLHQARSWGRSRTVHPGRRQRNAIPI
LPSTGLTTKKDEAGKLAISTILSSVWKSDRPDGLLPTVVVDEHDTALGLVYSSAARVN
EALRTQTGVYQSRKRGLWYKGATSGDTQELVRISLDCDNDALKFVVKQKGRFCHLDQS
GCFGQLKGLPKLEQTLISRKQSAPEGSYTARLFSDEKLVRAKIMEEAEELCTAQTPQE
IAFEAADLFYFALTRAVAAGVTLADIERSLDAKSWKVKRRTGDAKGKWAERRASTLRR
RLAATSAPVTKEAAQETTPEKITMRRFDASKVSTEELDAALKRPAQKSPMPSTMIIVP
IIEDVRKNGDKAVLSYTHKFEKATSLTSPVLKAPFPKELMQLPEETIAAIDVSFENIR
KFHAAQKEEKPLQVETMPGVVCSRFSRPIEAVGCYIPGGTAVLPSTALMLGVPAMVAG
CNKIVFASPPRADGTITPEIVYVAHKVGAESIVLAGGAQAVAAMVRPESITKVDKILG
PGNQFVTAANMFVSNDTNAAVGIDMPAGPSEVLVIAHKDANPAFVASDLLSRAEHGVD
SQVILIAIDLDEEHLQAIEDEVHRQATELPHVQIVRGSIAHSITVQIKTVEEAMELSN
KYAPEHLILQIKEAEKAVDLVMNAVVFIGATPSAWHYSAGVNHSLPTYGFAKQYSGVN
LASFVKHITSSNLTAEGLKNVGQAVMQLAKVGARGSRRAVSIRLEHMSKSNSITKVLS
EOR
    
    $params{'sequence'} = CGI::param('sequence');
    
    if ($params{'sequence'} =~ /^llama$/i){
        $params{'sequence'} = $llama;
    }
    elsif ($params{'sequence'} =~ /^camel$/i){
        $params{'sequence'} = $camel;
    }
    elsif ($params{'sequence'} =~ /^pelican$/i){
        $params{'sequence'} = $ram;
    }
    
    $params{'format'}=CGI::param('format');
    $params{'hit'}=CGI::param('hit');
    
    $params{'id'}=CGI::param('id');
    $params{'blast_type'} = CGI::param('blast_type');   # executable to use
    $params{'file'}         = CGI::param('uploadfile');   # => open filehandle
    $params{'address'}      = CGI::param('address');        # email address
    $params{'database'}     = CGI::param('database');
    $params{'align'}        = CGI::param('align');
    $params{'filtering'}    = CGI::param('filtering');
    $params{'repeatmasker'} = CGI::param('repeatmasker');
    $params{'outputtype'}   = CGI::param('return');
    $params{'matrixtype'}   = CGI::param('matrix');
    $params{'expect'}       = CGI::param('expect');
    $params{'descriptions'} = CGI::param('descriptions');
    $params{'sorttype'}     = CGI::param('sort');
    $params{'filtertype'}   = CGI::param('filter');
    $params{'gencode'}      = CGI::param('gencode');
    $params{'statistics'}   = CGI::param('statistics');
    $params{'altoptions'}   = CGI::param('altoptions');     # cmd line options
    $params{'histogram'}    = CGI::param('histogram');

    # Remove shell escapes for security
    $params{'altoptions'}   =~ s/[\`\'\;$\"\@\{\}\\\[\]~&\*!|]//ig;
    
    # Clean up some params
    $params{'address'}      =~ s/^\s+//;        # remove leading whitespace
    $params{'sequence'}     =~ s/^\s+//;        # remove leading whitespace
    
    return %params;
}

################################################################
# Do the blast submission thing
################################################################
sub submit_blast{
    my $paramhash = shift;
    my %parameters = %$paramhash;

    ################################################
    # Set-up sequence if an uploaded file provided
    ################################################
    &determine_input(\$parameters{sequence},\$parameters{file}); 

    ################################################
    # Clean the sequence, and get the name and type
    ################################################    
    my ($fasta_name, $seq_type) = &check_and_clean_sequence( \$parameters{sequence});
    
    ###############################
    # blast vs type sanity checks
    ###############################
    &trap_errors($seq_type,\$parameters{blast_type},\$parameters{address});
    
    ######################
    # build blast options
    ######################
    my $options=&process_options(   \$parameters{sequence},
				    \$parameters{blast_type},
				    \$parameters{align},
				    \$parameters{filtering},
				    \$parameters{filtertype},
				    \$parameters{repeatmasker},
				    \$parameters{matrixtype},
				    \$parameters{expect},
				    \$parameters{descriptions},
				    \$parameters{histogram},
				    \$parameters{sorttype},
				    \$parameters{statistics},
				    \$parameters{gencode},
				    \$parameters{altoptions},

				);
    
    ##################################
    # build unique id for this blast
    ##################################
    my $id=&generate_id();
    
    ######################################################  
    # store blast query as a temp file for input to blast
    ######################################################
    &make_temp_file($id,$fasta_name,\$parameters{sequence});

    ###########################
    # make a repeatmasked file
    ###########################
    if ($parameters{repeatmasker} eq  "yes") {
        &make_mask_file($id);
    }
    
    #######################
    # Submit the blast job
    #######################
    &submit_browser_job($id,$options,\$parameters{blast_type},\$parameters{database}); 

    &write_log($id,$options,\%parameters);


    ######################################################
    # set the param id so that the parser will kick in...
    ######################################################
    return $id;
}


###############################################################
#Resolve which input to use
###############################################################
sub determine_input {
    my ($sequence,$file) = @_;

    if ($$sequence eq "" && $$file) {
        # slurp the file in to sequence.
        my $sep=$/;
        $/=undef;
        $$sequence = $$file;
        $/=$sep;
    }

} 


################################################################
# Work out the sequence type and get the length of the sequence
################################################################
sub check_and_clean_sequence {
    my $sequence = shift;

    my ($templength, $A,$C,$G,$T,$total, $fasta_name);

    $$sequence=~ />(\w+)/;;
    $fasta_name=$1;
    
    if($fasta_name eq ""){$fasta_name = "UNKNOWN-QUERY";}
    
    $$sequence =~ s/>.+\n//; # remove the title of a FASTA sequence
    $$sequence =~ s/\s//g;
    $templength = length($$sequence);
    
    if ($templength < 1){
        &report_error_and_die("Sequence length is zero. Search aborted!");
    }

    $$sequence = uc($$sequence);

    $_ = $$sequence;
    $A = tr/A/a/;
    $C = tr/C/c/;
    $G = tr/G/g/;
    $T = tr/T/t/;
    $total = $A+$C+$G+$T;


    if(($total/$templength) >= 0.70){
        # it's probably DNA
        $$sequence =~ s/[^ACGTUNRYKMBDHVSWNX]//ig;
        return($fasta_name,"D");
    }
    else{                       
        # it's prolly protein
        $$sequence =~ s/[^ARNDCEQGHILKMFPSTWYVXZ]//ig;
        return($fasta_name,"P");
    }

}


###############################################################
#trap errors
###############################################################
sub trap_errors {

    my ($sequence_type,$blast_type,$address) = @_;

    my $blast_type=$$blast_type;
    
    if ($blast_type eq "") {
        &report_error_and_die("Error: No Blast executable specified!");
    }

    unless ($$address =~/\@/  or  $$address eq  "") {
        &report_error_and_die("Error: Invalid e-mail address specified"); 
    }

    if (($blast_type eq "wutblastn" || $blast_type eq "wublastp") 
         && ($sequence_type eq "D")){
        &report_error_and_die ("You appear to have submitted a DNA sequence<BR> but you cannot perform this type of search ($blast_type) using a DNA sequence");
    }
    
    if (($blast_type eq "wublastn" || $blast_type eq "wublastx")
        && ($sequence_type eq "P")){
        &report_error_and_die ("You appear to have submitted a protein sequence but you cannot perform this type of search ($blast_type) using a protein sequence");
    }

    if ($blast_type eq "wutblastx" && $sequence_type eq "P"){
        &report_error_and_die ("You appear to have submitted a protein sequence<BR> but you cannot perform this type of search ($blast_type) using a protein sequence");
    }

} 


###############################################################
#handle the options
###############################################################
sub process_options {
    my ($sequence,
	$blast_type,
	$align,
	$filtering,
	$filter_type,
	$repeatmasker,
	$matrixtype,
	$expect,
	$descriptions,
	$histogram,
	$sorttype,
	$statistics,
	$gencode,
	$altoptions) = @_;
    my $filter="";
    my $options = "-P$PROCESSORS -warnings";
    
    my $seqlength = length($$sequence);
    my $blast_type=$$blast_type; # we check this a lot here. 
    
    #Decrease the cut-off if the sequence is very small
    if ($seqlength < 30 ) {
        $options=$options . " S=10 ";
    }

    if ($$align eq "") {
        $$align="100";
    }
    
    $options .= " B=${$align} ";           # number of results to display
    
    unless ($$filtering){
        $$filtering = "off";
    }

    if ($blast_type ne "blastn" && $blast_type ne "wublastn" && $$filtering eq "on") {
    
        if ($$filter_type){
            $filter = " -filter=$$filter_type ";
        }
        else {
            $filter = " -filter=seg";
        }       
        $options=$options.$filter;
        $BLAST_HEADER_TXT.="<P><B>Low complexity filtering enabled</B>";
    }
    else{
        $filter = "";
        $BLAST_HEADER_TXT.="<P><B>Low complexity filtering disabled</B>";
    }

    if ($$repeatmasker ne "yes") {
        $BLAST_HEADER_TXT.= "<BR><B>Repeatmasker disabled</B><P>";
    } 
    else{
        $BLAST_HEADER_TXT.="<BR><B>Repeatmasker enabled</B><P>";
    }

    unless ($blast_type eq "wublastn" || $blast_type eq "blastn"){
        if (defined($$matrixtype)){
            $options .= " -matrix=$ENV{'BLASTMAT'}/$$matrixtype ";
        }
    }
    
    if (defined($$expect) && $$expect != 10 ){
        $options .=" E=$$expect";
    }
    
    if (defined($$descriptions) &&  $$descriptions != 500 ){
        $options .= " V=$$descriptions";
    }
    
    if (defined($$histogram) &&  $$histogram eq "yes" ){
        $options .= " H=1";
    }
    
    if ($$sorttype ne "-sort_by_pvalue"){
        $options .= " $$sorttype";
    }
    
    if ($$statistics ne "-sump"){
        $options .= " $$statistics";
    }

    if ($blast_type =~ /blastx/){       # only used by blastx and tblastx
        if (defined $$gencode && $$gencode != 1){
            $options .= " C=$$gencode";
        }
        else{
           $options.= " C=1";
        }
    }
    
    if ($$altoptions =~ /\w+/){
        $options=$options . " $$altoptions";
    }
    
    return $options;

} 


###############################################################################
# Generate a unique id for this blast job
###############################################################################
sub generate_id {
    my $hostname=substr(hostname(),-3);
    my $pid=$$;
    my $systime=substr(time, -7);
    
    # now encode pairs of numbers as letters, where possible
    my $numid=$pid.$systime;
    my %letters;
    @letters{(10..36 , 40..66)} = ('A'..'Z','a'..'z');
    
    my $packednum;
    foreach my $pair (unpack ("A2" x (1+length($numid)/2),$numid)){
	$packednum .= $letters{$pair} || $pair;
    }
    return $hostname.$packednum;
}


#################################################################
#make a temp file to hold the sequence
#################################################################
sub make_temp_file {
    my ($id, $fasta_name,$sequence)=@_;
    
    open (TEMP, ">$BLAST_HOME/$id.tmp") or &report_error_and_die ("Error: Couldn't create blast input file, try again later");

    select((select(TEMP),$|=1)[0]);

    print TEMP ">$fasta_name\n";
    print TEMP "${$sequence}\n";

    close (TEMP);

}


#####################################################################
#Make a repeat masked file sequence if required 
#####################################################################
sub make_mask_file {
    my $id=shift;
    
    # call repeatmasker on the temp query file
    system("$RM_BINARY $BLAST_HOME/$id.tmp");
    
    ## remove unwanted files
    unlink("$BLAST_HOME/$id.tmp.RepMask");  
    unlink("$BLAST_HOME/$id.tmp.RepMask.cat");
    unlink("$BLAST_HOME/$id.tmp.masked.log");

    my $maskfile="$id.tmp.masked";

    open(MASK,"$BLAST_HOME/$maskfile") or &report_error_and_die("Cannot open mask file:$!");
    
    open(TEMPFILE,">$BLAST_HOME/$id.tmp") or &report_error_and_die("Cannot create temp file: $!");

    while (<MASK>) {
        print TEMPFILE;
    }

    close (MASK);
    close (TEMPFILE);

    unlink ("$BLAST_HOME/$maskfile");

}


#####################################################################
# use lsf to submit the blast job
#####################################################################
sub submit_browser_job {
    my ($id,$options,$blast_type,$database)=@_;
    open (TOUCH,">$BLAST_HOME/_$id") or  &report_error_and_die("Blast job submission failed.  Please try again later.");
    close (TOUCH);

    my $status=system("bsub -q $BLASTQUEUE  \"${$blast_type} ${$database} $BLAST_HOME/$id.tmp $options >$BLAST_HOME/_$id ; mv $BLAST_HOME/_$id $BLAST_HOME/$id\" >/dev/null ");

    &report_error_ana_die("Blast job submission failed.  Please try again later.") unless $status == 0;
}



#####################################################################
#Log output to file
#####################################################################
sub write_log {
    my ($id,$options,$paramhash)=@_;
    my %parameters = %$paramhash;
    my ($logsequence,$source) = "";
    my $address=$parameters{address};
    my $blast_type=$parameters{blast_type};
    my $align=$parameters{align};
    my $database=$parameters{database};
    
    $logsequence = $parameters{'sequence'};
    $logsequence =~ s/[\n\r\t ]//ig;        # make sequence a single line
    if ($parameters{file} ne "") {
        $source = "upload";
    }
    else{
        $source = "box";
    }
    
    # open as briefly as possible....
    open (LOG, ">>$BLAST_HOME/_blastserver_log") || open (LOG, ">$BLAST_HOME/$id.failed_write_log");
    print LOG "\nPID=$$:Seq=$logsequence:SeqSrc=$source:";
    print LOG "Tmpfile=$BLAST_HOME/$id.tmp:Opts=$options:Addr=$address:";
    print LOG "Type=$blast_type:Alnmts=$align:Db=$database:";
    print LOG "Q=$BLASTQUEUE:Req=$ENV{'REMOTE_HOST'}";
    close LOG; 

} 


################################################################
# Call the formatter on the file
# Takes the filename to parse, and the name of a format module.
################################################################
sub render_blast{
    my ($filename, $formatter,$hit)=@_;
    my $gifpath="$GIF_HOME/$filename.gif";
    my $gifurl="$GIF_HOME_URL/$filename.gif";
    my $filepath="$BLAST_HOME/$filename";
    
    my $obj_format;

    #######################################################
    # Load the required formatter, and quit if problematic
    #######################################################
    #&eprof_start('require');

    $formatter = "karyo_format" unless $formatter;
    eval {
        require ("$formatter.pm");
    };
    if ($@){
	my $errstring = "Error loading the formatter $formatter";
        &report_error_and_die($errstring);
    }

    #&eprof_end('require');
    #######################################
    # Now create our formatter, or quit...
    #######################################
    #&eprof_start('parse');
    eval {
        $obj_format = new $formatter($filepath,$hit);
    };

    if ($@){
	my $errstring = "Could not parse blast output";
	if ($@ =~ m/^BLASTERR/){
	    $errstring = "No hits resulted from this blast query";
	}
        &report_error_and_die($errstring);
    }

    #&eprof_end('parse');
    ###########################################################
    # Draw any image that the formatter wishes to present
    ###########################################################
    #&eprof_start('drawing');
    if ($obj_format->can("generate_image")){
	print qq(<map name="blastmap">\n);
	my $gif;
	eval {
	   $gif=$obj_format->generate_image(\*STDOUT); 
	};
	if ($@){
	    &report_error_and_die("Error generating image of the blast results: $@");
	}
	print "</map>\n";
	
	#########################
	# Save off the image
	#########################
	open(GIF,">$gifpath");
	print GIF $gif;
	close (GIF);

	print qq(<p align="center"><img align="center" border="0" src="$gifurl" usemap="#blastmap"></p>\n);
    }
    
    ###################################################
    # Finally, parse the blast and dump it to the page
    ###################################################
    if ($obj_format->can("parse")){
	print "<pre>\n"; 
	eval {
	    $obj_format->parse(\*STDOUT);
	};
	
	if ($@){
	    &report_error_and_die("Error parsing the blast results: $@");
	}
	
	print "</pre>\n"; 
	print "<br><br>\n";
    }

    #&eprof_end('drawing');

}


sub blast_not_ready {
    my ($id,$format,$sequence) = @_;
    
    if ($sequence){
	##################################################################
	# We have just submitted a sequence, and need some explanation of
	# how this works...
	##################################################################
    print <<EOP;
    <br>
    <table cellspacing="0" cellpadding="0" border="0" class="yellow1" width="200" align="center">
	<tr>
	    <td rowspan="7" class="grey1"><img src="/gfx/blank.gif" width="1" height="1"></td>
	    <td colspan="3" class="grey1"><img src="/gfx/blank.gif" width="200" height="1"></td>
	    <td rowspan="7" class="grey1"><img src="/gfx/blank.gif" width="1" height="1"></td>
	</tr>
        <tr>
	    <td colspan="3"><img src="/gfx/blank.gif" width="1" height="11"></td>
        </tr>
 	<tr>
	    <td><img src="/gfx/blank.gif" width="16" height="22"></td>
	    <td align="center" class="yellow2" nowrap><span class="h4">&nbsp;&nbsp;BLAST RETRIEVAL ID&nbsp;&nbsp;</span></td>
	    <td><img src="/gfx/blank.gif" width="16" height="22"></td>
	</tr>
        <tr>
	    <td colspan="3"><img src="/gfx/blank.gif" width="1" height="11"></td>
        </tr>
        <tr>
	    <td><img src="/gfx/blank.gif" width="16" height="22"></td>
	    <td align="center"><a href="/perl/blastview?id=$id&format=$format"><large><b>$id</b></large></a></td>
	    <td><img src="/gfx/blank.gif" width="16" height="22"></td>
        </tr>
        <tr>
	    <td colspan="3"><img src="/gfx/blank.gif" width="1" height="11"></td>
        </tr>
        <tr>
	    <td colspan="3" class="grey1"><img src="/gfx/blank.gif" width="200" height="1"></td>
        </tr>
	</table>
	<br>
	
	<table cellspacing="0" cellpadding="0" border="0" class="yellow1" width="400" align="center">
	<tr>
	    <td rowspan="7" class="grey1"><img src="/gfx/blank.gif" width="1" height="1"></td>
	    <td colspan="3" class="grey1"><img src="/gfx/blank.gif" width="1" height="1"></td>
	    <td rowspan="7" class="grey1"><img src="/gfx/blank.gif" width="1" height="1"></td>
	</tr>
        <tr>
	    <td><img src="/gfx/blank.gif" width="16" height="22"></td>
	    <td><img src="/gfx/blank.gif" width="1" height="11"></td>
	    <td><img src="/gfx/blank.gif" width="16" height="22"></td>
        </tr>
 	<tr>
	    <td><img src="/gfx/blank.gif" width="16" height="22"></td>
	    <td align="center" class="yellow2" nowrap><span class="h4">&nbsp;&nbsp;HOW TO RETRIEVE BLAST RESULTS&nbsp;&nbsp;</span></td>
	    <td><img src="/gfx/blank.gif" width="16" height="22"></td>
	</tr>
        <tr>
	    <td colspan="3"><img src="/gfx/blank.gif" width="1" height="11"></td>
        <tr>
	    <td><img src="/gfx/blank.gif" width="16" height="22"></td>
	    <td width="100%"><p>Your BLAST query has been added to the queue of BLAST jobs.</p><p>You can use the ID in the box above to retrieve the results when they are ready.  The majority of BLASTs are completed within ten minutes.</p><p>To retrieve your results, either click on the link above (or bookmark the link for later), or take a note of the retrieval ID and enter it on the <a href="/perl/blastview">BLAST page</a>.</p><p>Your BLAST results are kept on our servers for one week after you submitted the query, and you can retrieve them as many times as you wish during this period.  After a week you will have to re-submit the query if you wish to re-examine the results.</p></td>
	    <td><img src="/gfx/blank.gif" width="16" height="22"></td>
	</tr>
        <tr>
	    <td><img src="/gfx/blank.gif" width="16" height="22"></td>
	    <td><img src="/gfx/blank.gif" width="1" height="11"></td>
	    <td><img src="/gfx/blank.gif" width="16" height="22"></td>
        </tr>
        <tr>
	    <td colspan="3" class="grey1"><img src="/gfx/blank.gif" width="1" height="1"></td>
        </tr>
	</table>
	<br><br>	
EOP
    }
    else {
    print <<EOP;
	<br><br>
	<table cellspacing="0" cellpadding="0" border="0" class="yellow1" width="400" align="center">
	<tr>
	    <td rowspan="10" class="grey1"><img src="/gfx/blank.gif" width="1" height="1"></td>
	    <td colspan="3" class="grey1"><img src="/gfx/blank.gif" width="1" height="1"></td>
	    <td rowspan="10" class="grey1"><img src="/gfx/blank.gif" width="1" height="1"></td>
	</tr>
        <tr>
	    <td><img src="/gfx/blank.gif" width="16" height="22"></td>
	    <td><img src="/gfx/blank.gif" width="1" height="11"></td>
	    <td><img src="/gfx/blank.gif" width="16" height="22"></td>
        </tr>
 	<tr>
	    <td><img src="/gfx/blank.gif" width="16" height="22"></td>
	    <td align="center" class="yellow2" nowrap><span class="h4">&nbsp;&nbsp;SORRY, BLAST RESULTS NOT READY&nbsp;&nbsp;</span></td>
	    <td><img src="/gfx/blank.gif" width="16" height="22"></td>
	</tr>
        <tr>
	    <td colspan="3"><img src="/gfx/blank.gif" width="1" height="11"></td>
        <tr>
	    <td><img src="/gfx/blank.gif" width="16" height="22"></td>
	    <td width="100%"><p>The results of BLAST query <b>$id</b> are not yet ready.  You can use the button below to try again in a few minutes, or you can bookmark this page to try later.</td>
	    <td><img src="/gfx/blank.gif" width="16" height="22"></td>
	</tr>
        <tr>
	    <td colspan="3"><img src="/gfx/blank.gif" width="1" height="11"></td>
        <tr>
        <tr>
	    <td><img src="/gfx/blank.gif" width="16" height="22"></td>
	    <td align="center"><form NAME="blast_retrieve" METHOD="GET" ACTION="/perl/blastview"><INPUT TYPE="hidden" NAME="id" value="$id"><INPUT TYPE="hidden" NAME="format" value="karyo_format"><INPUT TYPE="submit" VALUE="Retrieve Results" class="red2"></td>
	    <td><img src="/gfx/blank.gif" width="16" height="22"></td>
        <tr>
	    <td><img src="/gfx/blank.gif" width="16" height="22"></td>
	    <td><img src="/gfx/blank.gif" width="1" height="11"></td>
	    <td><img src="/gfx/blank.gif" width="16" height="22"></td>
        </tr>
        <tr>
	    <td colspan="3" class="grey1"><img src="/gfx/blank.gif" width="1" height="1"></td>
        </tr>
	</table>
	<br><br>
EOP
    }
}


sub show_blank_form {

return<<END;

<SCRIPT LANGUAGE="JavaScript">
<!-- hide from old browsers

// **********************************************************************
// FUNCTION: checkSequence( sequence, search_type )
//
// Checks to see if the submitted sequence agrees with the requested
// search type eg If it is a "Prot vs Prot" query does the sequence
// contain less then 85% "ACGTNX"
// The FORM will only be submitted if this sequence returns TRUE
// **********************************************************************

  function checkSequence( sequence, search_type )
  {

    var i = 0;
    var count = 0;
    var residue = "";
    var percentage;
    var def_line_end;
    var sequence_to_check;
    var spaces = 0;
    var bases = "ACGTNX";
    var base_found;
    var space_or_digits = "01234 56789"
    var space_or_digit_found;

// **********************************************************************
// i                    loop variable
// count                holds the cumulative number of "ACGTNX"
// residue              single residue in the sequence 
// percentage           the % of the sequence that is "ACGTNX"
// def_line_end         position of the end of the definition line
// sequence_to_check    sequence without the definition line
// spaces               number of spaces or digits found
// bases                valid list of bases
// base_found           was a valid base found?
// space_or_digits      invalid chars 
// space_or_digit_found was an invalid char found?
// **********************************************************************

    // Check to see if it is FASTA
    if ( sequence[0] == '>' )
    {
      def_line_end = sequence.indexOf('\\n');
      sequence_to_check = sequence.substring( ( def_line_end + 1 ), sequence.length );
    }
    else // Raw Text
    {
      sequence_to_check = sequence;
    }

    sequence_to_check = sequence_to_check.toUpperCase();

    for ( var i = 0; i < sequence_to_check.length ; i++ )
    {
      residue = sequence_to_check.charAt(i);

      // Find all the ACGTNX chars - valid bases
      // If it is not found the return value is -1
      base_found = bases.indexOf( residue );

      if ( base_found >= 0 )
      {
        count++;
      } 

      // Find all the 1234 56789 chars 
      space_or_digit_found = space_or_digits.indexOf( residue )
      if ( space_or_digit_found >= 0 )
      {
        spaces++;
      }
    }

    percentage = ( count / ( sequence_to_check.length - spaces ) ) * 100;

    // Ask the user to confirm that the sequence they supplied is correct
    // window.confirm returns a BOOLEAN value which is used as the FUNCTION
    // return value

    if ( ( search_type == "wublastp" || search_type == "wutblastn" ) && ( percentage > 85 ) )
    {
      return window.confirm( "A search requiring a PROTEIN query sequence has\\nbeen selected, however the sequence looks to be DNA.\\n\\nPlease confirm that it is a PROTEIN sequence." );
    }
    else if ( ( search_type == "wublastn" || search_type == "wublastx" ) && ( percentage < 85 ) )
    {
      return window.confirm( "A search requiring a DNA query sequence has\\nbeen selected , however the sequence looks to be PROTEIN.\\n\\nPlease confirm that it is a DNA sequence." );
    }
    else
    {
      return true;
    }

  } // END checkSequence



// **********************************************************************
// FUNCTION: setOtherMenus( chosen_option )
//
// Changes the text and value entries in the DATALIB and FILTER menus
// depending on what search type has been selected
// **********************************************************************

  function setOtherMenu( chosen_option )
  {

    // All the arrays below are made up of TEXT VALUE alternating pairs
    // eg "BLASTN (DNA vs. DNA)" = TEXT
    //    "wublastn"             = VALUE

    if ( chosen_option == "ensembl/ensembl.pep" || chosen_option == "ensembl/ensembl.genscan.fa" )
    {
    	// Search against a Prot data library
    	var new_options = new makeArray(
        "BLASTP (protein vs. protein)    ", "wublastp",
        "BLASTX (transl. DNA vs. protein)", "wublastx" );
    }
    else
    {
    	// Search against a DNA data library
    	var new_options = new makeArray(
    	"BLASTN (DNA vs. DNA)            ", "wublastn",
		"TBLASTN (protein vs. transl. DNA)", "wutblastn",
		"TBLASTX (transl. DNA vs. transl DNA)", "wutblastx" );
    }

    // Set new lengths for the menus which is 1/2 of the length of the option
    // arrays as they hold both text and value entries
    document.blast_form.blast_type.options.length = ( new_options.length / 2 );

    // Copy the data library options to the DATALIB menu
    for ( var i = 0; i < new_options.length; i+=2 )
    {
      document.blast_form.blast_type.options[i/2].text = new_options[i];
      document.blast_form.blast_type.options[i/2].value = new_options[i+1];
    }
    document.blast_form.blast_type.options[0].selected = true;

  } // END setOtherMenus



// **********************************************************************
// FUNCTION: makeArray
//
// This is the constructor for objects of type makeArray it builds
// objects that are arrays
// **********************************************************************

  function makeArray()
  {
    // *.arguments is a standard method which each object has and contains the
    // arguments passed to it.

    var args = makeArray.arguments;

    for ( var i = 0; i < args.length; i++ )
    {
      this[i] = args[i];
    }
    this.length = args.length;

  } // END makeArray



// end script hiding -->
</SCRIPT>

<A HREF="http://www.digital.com"><IMG ALIGN="RIGHT" src="/gfx/alpha.gif" BORDER="0"></A>
<h2 align="center">Ensembl BLAST Server</h2>

<center>
<table cellspacing="0" cellpadding="0" border="0" class="yellow1" width="450">
    <tr bgcolor="#ffffff"><td colspan="7"><img src="/gfx/blank.gif" width="1" height="10" alt=""></td></tr>
    <form NAME="blast_retrieve" METHOD="GET" ACTION="/perl/blastview">
	<tr>
		<td rowspan="7" class="grey1"><img src="/gfx/blank.gif" width="1" height="1"></td>
		<td colspan="5" class="grey1"><img src="/gfx/blank.gif" width="300" height="1"></td>
		<td rowspan="7" class="grey1"><img src="/gfx/blank.gif" width="1" height="1"></td>
	</tr>
        <tr>
                <td colspan="5"><img src="/gfx/blank.gif" width="1" height="11"></td>
        </tr>
 	<tr>
		<td><img src="/gfx/blank.gif" width="16" height="22"></td>
		<td colspan="3" align="center" class="yellow2"><span class="h4">RETRIEVE BLAST RESULTS</span>&nbsp;&nbsp; <a href="javascript:void(window.open('/perl/helpview?se=1&kw=blast#retrieve','helpview','width=400,height=500,resizable,scrollbars'));"><img src="/gfx/helpview/help.gif" border="0" align="absmiddle" alt="Help"></a></td>
  		<td><img src="/gfx/blank.gif" width="16" height="22"></td>
	</tr>
        <tr>
	    <td colspan="5"><img src="/gfx/blank.gif" width="1" height="11"></td>
        </tr>
	<tr>
		<td><img src="/gfx/blank.gif" width="16" height="22"></td>
	  	<td><small>Enter the blast retrieval ID: </small></td>
	  	<td><img src="/gfx/blank.gif" width="16" height="22"></td>
		<td align="left" nowrap><INPUT NAME="id" SIZE="20"><INPUT TYPE="hidden" NAME="format" value="karyo_format">&nbsp;&nbsp;<INPUT TYPE="submit" VALUE="Retrieve" class="red2"></td>
		<td><img src="/gfx/blank.gif" width="16" height="22"></td>
	</tr>
	<tr>
		<td colspan="5"><img src="/gfx/blank.gif" width="1" height="11"></td>
	</tr>
	</form>
        <tr>
	    <td colspan="5" class="grey1"><img src="/gfx/blank.gif" width="1" height="1"></td>
        </tr>

  <tr bgcolor="#ffffff"><td colspan="7"><img src="/gfx/blank.gif" width="1" height="20" alt=""></td></tr>

  <form NAME="blast_form" METHOD="POST" enctype='multipart/form-data' ACTION="/perl/blastview" onSubmit="return checkSequence( this.sequence.value, this.blast_type.options[this.blast_type.options.selectedIndex].value )">
	<tr>
		<td rowspan="31" class="grey1"><img src="/gfx/blank.gif" width="1" height="1"></td>
		<td colspan="5" class="grey1"><img src="/gfx/blank.gif" width="300" height="1"></td>
		<td rowspan="31" class="grey1"><img src="/gfx/blank.gif" width="1" height="1"></td>
	</tr>
        <tr>
                <td colspan="5"><img src="/gfx/blank.gif" width="1" height="11"></td>
        </tr>
 	<tr>
		<td><img src="/gfx/blank.gif" width="16" height="22"></td>
		<td colspan="3" align="center" class="yellow2"><span class="h4">SUBMIT A BLAST QUERY</span>&nbsp;&nbsp; <a href="javascript:void(window.open('/perl/helpview?se=1&kw=blast#submit','helpview','width=400,height=500,resizable,scrollbars'));"><img src="/gfx/helpview/help.gif" border="0" align="absmiddle" alt="Help"></a></td>
  		<td><img src="/gfx/blank.gif" width="16" height="22"></td>
	</tr>
        <tr>
	    <td colspan="5"><img src="/gfx/blank.gif" width="1" height="11"></td>
        </tr>
	<tr valign="top">
		<td><img src="/gfx/blank.gif" width="16" height="22"></td>
  		<td><small>Paste your sequence here in FASTA or plain text format.</small>
                <BR><BR>
                <center>
		  <INPUT TYPE="submit" VALUE="Search" class="red2">&nbsp;&nbsp;<input type="reset">
                </center></td>
		<td><img src="/gfx/blank.gif" width="16" height="22"></td>
  		<td align="left"><TEXTAREA NAME="sequence" ROWS="6" COLS="35"></TEXTAREA></td>
	    				 <INPUT TYPE="hidden" NAME="type" VALUE="blast">
	    				 <INPUT TYPE="hidden" NAME="format" VALUE="karyo_format">
	    				 <!-- INPUT TYPE="hidden" NAME="basehref" VALUE="http://us.ensembl.org/" -->
		<td><img src="/gfx/blank.gif" width="16" height="22"></td>
	</tr>
	<tr>
		<td><img src="/gfx/blank.gif" width="16" height="22"></td>
	  	<td><small><B>OR</B> select the sequence file you wish to search</small></td>
	  	<td><img src="/gfx/blank.gif" width="16" height="22"></td>
		<td align="left"><INPUT TYPE="FILE" NAME="uploadfile" SIZE="30"></td>
		<td><img src="/gfx/blank.gif" width="16" height="22"></td>
	</tr>
	<tr>
		<td colspan="5"><img src="/gfx/blank.gif" width="1" height="11"></td>
	</tr>

        <tr>
                <td colspan="5"><img src="/gfx/blank.gif" width="1" height="11"></td>
        </tr>
 	<tr>
		<td><img src="/gfx/blank.gif" width="16" height="22"></td>
		<td colspan="3" align="center" class="yellow2"><span class="h4">BLAST OPTIONS</span>&nbsp;&nbsp; <a href="javascript:void(window.open('/perl/helpview?se=1&kw=blast#options','helpview','width=400,height=500,resizable,scrollbars'));"><img src="/gfx/helpview/help.gif" border="0" align="absmiddle" alt="Help"></a></td>
  		<td><img src="/gfx/blank.gif" width="16" height="22"></td>
	</tr>
  	<tr>
		<td colspan="5"><img src="/gfx/blank.gif" width="1" height="11"></td>
	</tr>
	<tr>
		<td><img src="/gfx/blank.gif" width="16" height="22"></td>
		<td align="left">Database</td>
		<td><img src="/gfx/blank.gif" width="16" height="22"></td>
		<td align="left">
            <SELECT NAME="database" onChange="setOtherMenu( this.options[selectedIndex].value )">
				<option value="ensembl/latestgp" selected>Latest Ensembl "golden path"</option>
				<option value="ensembl/ensembl.cdna"> Ensembl confirmed cDNAs</option>			    
				<option value="ensembl/ensembl.pep"> Ensembl confirmed peptides</option>
				<option value="ensembl/ensembl.genscan.fa" >Ensembl predicted peptides</option>
				<option value="ensembl/ensembl.genscan.cdna.fa" >Ensembl predicted cDNAs</option>
			</SELECT></td>
		<td><img src="/gfx/blank.gif" width="16" height="22"></td>
  	</tr>
 	<tr>	
  		<td><img src="/gfx/blank.gif" width="16" height="22"></td>
		<td align="left">Executable</td>
  		<td><img src="/gfx/blank.gif" width="16" height="22"></td>
		<td align="left">
		  <select name="blast_type">
		    <option value="wublastn" selected>BLASTN (DNA vs.DNA)</option>
		    <option value="wutblastn">TBLASTN (protein vs. transl. DNA)</option>
		    <option value="wutblastx">TBLASTX (transl. DNA vs. transl DNA)</option>
		  </SELECT>
		</td>
		<td><img src="/gfx/blank.gif" width="16" height="22"></td>
	</tr>
	<tr>
		<td><img src="/gfx/blank.gif" width="16" height="22"></td>

		<td align="left" nowrap>Report <INPUT NAME="align" SIZE="3" VALUE="100"> alignments.</td>
		<td><img src="/gfx/blank.gif" width="16" height="22"></td>
		<td align="left">
			<INPUT TYPE="checkbox" checked NAME="repeatmasker" VALUE="yes">&nbsp;&nbsp;Mask repetitive sequences using Repeatmasker.<br>
			<INPUT TYPE="checkbox" checked NAME="filtering" VALUE="on">&nbsp;&nbsp;<A href="/filterinfo.html">Filter</a> low complexity regions.<br>
			<INPUT TYPE="checkbox" NAME="histogram" VALUE="yes">&nbsp;&nbsp;Display histogram of score statistics.</td>
		<td><img src="/gfx/blank.gif" width="16" height="22"></td>
  	</tr>
	<tr>
		<td colspan="5"><img src="/gfx/blank.gif" width="1" height="11"></td>
	</tr>

  	<tr>
  		<td><img src="/gfx/blank.gif" width="16" height="22"></td>
  		<td colspan="3" align="center" class="yellow2"><span class="h4">ADVANCED BLAST
		OPTIONS</span>&nbsp;&nbsp; <a href="javascript:void(window.open('/perl/helpview?se=1&kw=blast#advancedoptions','helpview','width=400,height=500,resizable,scrollbars'));"><img src="/gfx/helpview/help.gif" border="0" align="absmiddle" alt="Help"></a></td>
		<td><img src="/gfx/blank.gif" width="16" height="22"></td>
  	</tr>
	<tr>
		<td colspan="5"><img src="/gfx/blank.gif" width="1" height="11"></td>
	</tr>
	<tr>
		<td><img src="/gfx/blank.gif" width="16" height="22"></td>
		<td colspan="3">
		<table width="100%" border="0" cellpadding="0" cellspacing="0" class="yellow1">
			<tr>
				<td><p align="right">Matrix</p></td>
				<td><img src="/gfx/blank.gif" width="16" height="22"></td>
				<td><select name="matrix" size="1">
					<option value="BLOSUM30"> blosum30 </option>
					<option value="BLOSUM40"> blosum40 </option>
					<option value="BLOSUM50"> blosum50 </option>
					<option value="BLOSUM60"> blosum60 </option>
					<option selected value="BLOSUM62"> blosum62 </option>
					<option value="BLOSUM70"> blosum70 </option>
					<option value="BLOSUM80"> blosum80 </option>
					<option value="BLOSUM90"> blosum90 </option>
					<option value="BLOSUM100"> blosum100 </option>
					<option value="DAYHOFF"> dayhoff </option>
					<option value="DNA_MAT"> dna_mat </option>
					<option value="GONNET"> gonnet </option>
					<option value="IDENTITY"> identity </option>
					<option value="PAM30"> pam30 </option>
					<option value="PAM60"> pam60 </option>
					<option value="PAM90"> pam90 </option>
					<option value="PAM120"> pam120 </option>
					<option value="PAM150"> pam150 </option>
					<option value="PAM180"> pam180 </option>
					<option value="PAM210"> pam210 </option>
					<option value="PAM240"> pam240 </option>
				</select></td>
				<td><img src="/gfx/blank.gif" width="16" height="22"></td>
				<td><p align="right">Expect (E)</p></td>
				<td><img src="/gfx/blank.gif" width="16" height="22"></td>
				<td><select name="expect" size="1">
					<option value="0.0001"> 0.0001 </option>
					<option value="0.01"> 0.01 </option>
					<option value="1"> 1 </option>
					<option selected value="10"> 10 </option>
					<option value="100"> 100 </option>
					<option value="1000"> 1000 </option>
				</select></td>
			</tr>
			<tr>
				<td><p align="right">Descriptions</p></td>
				<td><img src="/gfx/blank.gif" width="16" height="22"></td>
				<td><select name="descriptions" size="1">
					<option value="10"> 10 </option>
					<option value="50"> 50 </option>
					<option selected value="100"> 100 </option>
					<option value="250"> 250 </option>
					<option value="500"> 500 </option>
				</select></td>
				<td><img src="/gfx/blank.gif" width="16" height="22"></td>
				<td><p align="right"><nobr>HSP score </nobr></p></td>
				<td><img src="/gfx/blank.gif" width="16" height="22"></td>
				<td><select name="statistics" size="1">
					<option selected value="-sump"> sump </option>
					<option value="-poissonp">poissonp </option>
				</select></td>
			</tr>
			<tr>
				<td><p align="right"><nobr>Sort results by</nobr></p></td>
				<td><img src="/gfx/blank.gif" width="16" height="22"></td>
				<td><select name="sort" size="1">
					<option selected value="-sort_by_pvalue"> pvalue </option>
					<option value="-sort_by_count"> count </option>
					<option value="-sort_by_highscore"> highscore </option>
					<option value="-sort_by_totalscore"> totalscore </option>
				</select></td>
				<td><img src="/gfx/blank.gif" width="16" height="22"></td>
				<td><p align="right">Filter type</p></td>
				<td><img src="/gfx/blank.gif" width="16" height="22"></td>
				<td><select name="filtertype" size="1">
					<option selected value="seg"> seg </option>
					<option value="xnu"> xnu </option>
					<option value="seg+xnu"> seg+xnu </option>
				</select></td>
			</tr>
			<tr>
				<td colspan="7"><img src="/gfx/blank.gif" width="1"
				height="11"></td>
			</tr>
			<tr>
				<td align="right" nowrap>Genetic Code</td>
				<td><img src="/gfx/blank.gif" width="16" height="22"></td>
				<td align="left"><select name="gencode" size="1">
					<option selected value="1"> Standard </option>
					<option value="2"> Vertebrate Mitochondrial </option>
					<option value="3"> Yeast Mitochondrial </option>
					<option value="4"> Mold Mitochondrial </option>
					<option value="5"> Invertebrate Mitochondrial </option>
					<option value="6"> Ciliate Nuclear </option>
					<option value="9"> Echinoderm Mitochondrial </option>
					<option value="11"> Eubacterial </option>
					<option value="12"> Alternative Yeast Nuclear </option>
					<option value="13"> Ascidian Mitochondrial </option>
					<option value="14"> Flatworm Mitochondrial </option>
				</select></td>
				<td><img src="/gfx/blank.gif" width="16" height="22"></td>
				<td align="left">&nbsp;(<i><nobr>blastx only</nobr></i>)</td>
				<td><img src="/gfx/blank.gif" width="16" height="22"></td>
				<td align="center">&nbsp;</td>
			</tr>
			<tr>
				<td align="right" nowrap><I><B>other options</B></I></td>
				<td><img src="/gfx/blank.gif" width="16" height="22"></td>
				<td colspan="5" align="left"><INPUT NAME="altoptions" SIZE="35">
			&nbsp;(<i>not validated</i>)</td>
			</tr>
			<tr>
				<td colspan="7"><img src="/gfx/blank.gif" width="1"
				height="11"></td>
			</tr>
  		</table></td>
		<td><img src="/gfx/blank.gif" width="16" height="22"></td>
	</tr>
	<tr>
		<td colspan="5" class="grey1"><img src="/gfx/blank.gif" width="300" height="1"></td>
	</tr>
  </form>
</table>
</center>
<BR>

END

}


sub report_error_and_die {

    my $complaint=shift;
    print STDOUT "</pre>";
    print STDOUT "<h3>$complaint</h3>\n";
    print STDOUT "<p>If the problem persists, please mail <a href=\"webmaster\@sanger.ac.uk\">webmaster\@sanger.ac.uk</a> with a description of the problem, including the database you were trying to blast, and the sequence you were trying to blast with.</p>";
    print &EnsWeb::make_cgi_footer();
    &EnsWeb::ensembl_exit; 
}

