#!/usr/bin/perl
# -*-mode: perl-*-
# blast search script used at the CSHL site.
# This contains some hard-coded path names, which will
# need to be edited for your site.  By default, this
# script is not included among the other search scripts.
# Also see util/dump_genomic.pl for the script used to
# make the DNA dump searched by this script.
# $Id: blast,v 1.20 2002/04/19 03:51:55 lenny Exp $


use lib '../lib';
use strict;
use vars qw/$STOP_AT %BLAST_OPTS/;
use Ace;
use CGI qw/:standard escapeHTML Sub/; 
use IO::File;
use Ace::Browser::AceSubs qw(:DEFAULT Configuration AceHeader);
use Ace::Browser::SearchSubs;
use Ace::Browser::GeneSubs 'NCBI';
use GrameneSubs;
use GramenePage;
use LWP;
use CSHL::Config;
use DBI;

use vars '$DB';

$DB = OpenDatabase() || AceError("Couldn't open database.");

%BLAST_OPTS= ('blastn'   =>  [qw/-progress 2 -hspmax 10/],
	      'tblastn'  =>  [qw/-progress 2 -hspmax 10/],
	      'blastp'  =>  [qw/-progress 2 -hspmax 10/],
	      'blastx'  =>  [qw/-progress 2 -hspmax 10/]
	     );

my $cutoff   = defined param('cutoff')   ? param('cutoff')   : Configuration->Blast_cutoff;
my $max_hits = defined param('max_hits') ? param('max_hits') : Configuration->Blast_maxhits;

$cutoff = 9999 unless $cutoff=~/^[.eE\d-]+$/;

AceHeader();
my $page = GramenePage->new(Apache->request) || AceError("Can't open Gramene page configurator");

print start_html(-Title=>'Gramene Blast Search',
		 -Style  => {-src => $page->stylesheet},
		 -Target=>'_top',
		 -Class=>'search');

print $page->start_body;

my $done;

do_autoload() if param('autoload');

if (param('sequence') && param('action')=~/BLAST/) {
  # check that the search program matches the search database
  my %ok_databases = map { $_=>1 } @{Configuration->Blast_ok->{param('program')}};
  if ($ok_databases{param('db')}) {
    do_search( param('sequence'),param('name'),param('program'),param('db') );
    $done++;
  } else {
    print p({-class=>'error'},
	     "You have chosen an incompatible combination of search program and database. Please adjust."
	   );
  }
}

do_prompt() unless $done;
print  $page->end_body;

sub do_prompt {
  my $labels = Configuration->Blast_labels;
  my ($def_program,$def_db) = @{Configuration->Blast_default};
  print start_form(),
  table({-class=>'databody',width=>'100%'},
	TR(
	   th ({-class=>'searchtitle'},'BLAST Search', '&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;(', a({-href=>"/documentation/blast_help.html", -class=>'gopage'}, 'Help' ), ')')
	  ),
	TR(
	   td({-align=>'center',-class=>'searchbody'},
	      'Seq. name (Opt.):',
	      textfield(-name=>'name', -size=>'10'),
	      'Program: ',
	      popup_menu(-name=>'program',
			 -value=>[sort keys %BLAST_OPTS] ,
			 -default=>$def_program),
	      'Database: ',
	      popup_menu(-name=>'db',-value=>$labels,-default=>$def_db)
	     )
	  ),
	TR(
	   td({-align=>'center',-class=>'searchbody'},
	      'P value cutoff: ',
	      popup_menu(-name=>'cutoff',
			 -value=>[qw/1E-100 1E-50 1E-10 1E-5 0.001 0.01 0.1 NONE/],
			 -default=>$cutoff), 
	      '&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Show max hits:',
	      popup_menu(-name=>'max_hits',
			 -value=>[qw/1 5 10 15 20 50 100 ALL/],
			 -default=>$max_hits,
			),
	     )
	  ),
	TR(
	   td({align=>'center',-class=>'searchbody'},
	      b('Sequence (raw or FASTA format):'),
	      br,
	      textarea(-name=>'sequence',-rows=>12,-cols=>40),
	      br,br,
	      reset(-name=>'Reset'), '&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;',
	      submit(-name=>'action',-value=>'Run BLAST'),
	      br,br,
	     )

	  ),
       ),
	 end_form;

  my $valid_programs = Configuration->Blast_ok;
  print
  table(Tr({-class=>'datatitle'},
	   th ['Program','Search Seq','Database(s)']),
	Tr({-class=>'databody'},
	   td ['blastn','nucleotide','all EST databases, irgsp clones, rice bacends, indica scaffolds, <a href="http://www.tigr.org/tdb/ogi/">TIGR Rice Gene Index</a>']),
	Tr({-class=>'databody'},
	   td ['tblastn','protein','all EST databases, irgsp clones, rice bacends, indica scaffolds, <a href="http://www.tigr.org/tdb/ogi/">TIGR Rice Gene Index</a>']),
	Tr({-class=>'databody'},
	   td ['blastp','protein','Rice_Swissprot']),
	Tr({-class=>'databody'},
	   td ['blastx','nucleotide','Rice_Swissprot']),
       );
  print p '<b>To download the fasta database files please click <a href="ftp://www.gramene.org/pub/gramene/databases">here</a></b>.';
}

sub do_search {
  my ($sequence,$name,$program,$db) = @_;
  my @blast_results;
  my $path = Configuration()->Blast_bin . "/$program";
  my $useragent  ||= LWP::UserAgent->new;
 # my $request=HTTP::Request->new( POST=>'http://'.EnsemblServerName.'/perl/blast/remote_blast.pl');
 my $request=HTTP::Request->new( POST=>'http://'.RemoteBlastScript);
  $request->content('sequence='.$sequence.'&path='.$path.'&blastmat='.Configuration->Blast_matrix.'&blastfilter='.Configuration->Blast_filter.'&blastdb='.Configuration->Blast_db.'&program='.$program."&db=".$db);

  my $response=$useragent->request($request);
  if(  $response->is_success ) {
    @blast_results=split(/\n/,$response->content);
    print "<PRE>\n";
    while (defined(my $item=shift(@blast_results))) {
      chomp;
      print(escapeHTML($item),"\n");
      last if $item=~m/total letters/;
    }
    print "</PRE>\n";
    while (defined(my $item=shift(@blast_results))) {
      chomp;
      last if $item=~m/done/;
    }
    
    print h2('Results Summary',);
    my $blanks = 0;
    my $hits = 0;
    my $count = 0;
    my @rows=TR(th(['Sequence','Description','Details','High Score','P'. Sub('N'),'N']));
    my %align;

    while (defined(my $item=shift(@blast_results))) {
      chomp;
      $_=$item;

      $blanks = 0 if $_;

      last if !$_ && $blanks++ > 1;  # quit when we see two blank lines in a row (hack)
      next unless my($hit,$description,$score,$probability,$n) =
	/^(\S+)\s+(.+)\s+(\d+)\s+([0-9e.-]+)\s+(\d+)$/;
      $hits++;
      $description=~s/\s+$//; # trim whitespace
      $description='&nbsp;' unless $description; # trim whitespace
      
      unless ( ($count++ >= $max_hits) || ($probability > $cutoff) ) {
	$align{$hit}++;
	push(@rows,
	     TR(th({-align=>'LEFT'},a({-href=>seq_url($hit),-name=>$hit},$hit)),
		td($description),
		td(a{-href=>"\#${hit}_hsp"},'Alignment'),
		td($score),
		td($probability),
		td($n))
	    );
      }
      
    }
    
    print table({-border=>undef,-width=>'100%'},
		join("\n",@rows)),"\n" if $hits > 0;
    
    $count = scalar keys %align;
    print p(strong({-style=>'color: red'},"$hits hits total ($count shown)"));
    
    # return to search page button
    print start_form;
    print hidden($_) foreach qw(sequence name db program cutoff max_hits);
    print submit(-name=>'action',-value=>'Search Again'),
      end_form;
    
    # print the HSP alignments
    my $actual_alignments=join ("\n",@blast_results);
   # print $actual_alignments;
    @blast_results=split (/\n\n\n/,$actual_alignments);
    print hr,h2('Alignments') if $count > 0;
    while (defined(my $item=shift(@blast_results))) {
      $_=$item;
      chomp;
      next unless /^>/;
      print_hsp($_,\%align);
    }
    
  }
  else{
    print "no response from ",EnsemblServerName,"\n";
  }
}

sub print_hsp {
  my $data = shift;
  my $show = shift;

  my ($title,@paras) = split("\n\n",$data);
  my ($hit) = $title=~/^>(\S+)\s+/;
  return unless $show->{$hit};

  my $description = $';
  print h3(a({-href=>seq_url($1),-name=>"$1_hsp"},escapeHTML($hit)));
  # fix something in the elegans database that I don't like
  $description =~ s!/cds=!/cds_count=!g;
  $description = escapeHTML($description);
  $description =~ s/gb=(\w+)/'gb='.
                    a({-href=>NCBI . "?db=n&form=1&field=Sequence+ID&term=$1"},$1)/meg;
  $description =~ s/map=(\S+)/
                    'map=' . a({-href=>Object2URL($1,'Map')},$1)/meg;
  $description =~ s!/(\w+)=!b({-Style=>'color: red'},$1).'='!ge;
  print p({-class=>'small'},$description);
  print pre(escapeHTML(join("\n\n",@paras))),"\n";
  print a({-href=>"#$hit"},"Summary table"),hr;
  return 1;
}
sub seq_url{
  my $hit=shift;
  my $url;  
  if (param('db')eq'Rice_Genome_Japonica'){
    #parse out the accession
    $hit=~s/^.*\|(.*)\|.*/\1/;
    $hit=~s/^.*\|(.*)$/\1/;
    #cut the version numbers, that is AC02.2->AC02
    $hit=~s/(.*?)\.\d+/\1/;
    $url="/perl/contigview?clone=$hit";
  }
  elsif(param('db')eq'Osgi'){
    $url="http://www.tigr.org/docs/tigr-scripts/nhgi_scripts/tc_report.pl?tc=$hit&species=Rice";
  }
  elsif(param('db')eq'Rice_Bac_End' ){
    $hit=~s/.*\|(.*)\|.*/\1/;
    #cut the version numbers, that is AC02.2->AC02
    $hit=~s/(.*?)\.\d+/\1/;
    if(is_mapped($hit)){$url="/perl/unisearch?type=bacend&q=$hit&x=14&y=12";}
    else{$url="http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Search&db=Nucleotide&term=$hit";}
  }
  elsif(param('db')eq'Rice_Genome_Indica' ){
    $hit=~s/.*\|(.*)\|.*/\1/;
    #cut the version numbers, that is AC02.2->AC02
    $hit=~s/(.*?)\.\d+/\1/;
    $url="http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Search&db=Nucleotide&term=$hit";
  }
  elsif(param('db')=~m/EST/){
    #cut the version numbers, that is AC02.2->AC02
    $hit=~s/(.*?)\.\d+/\1/;
    if(is_mapped($hit)){$url="/perl/unisearch?type=bacend&q=$hit&x=14&y=12";}
    else{$url="http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Search&db=Nucleotide&term=$hit";}
  }
  elsif(param('db')eq'Rice_Swissprot'){
    $url="/perl/protein_search?word=$hit";
  }
  return $url;
}
sub is_mapped{
  my $feature_name=shift;
  my $mapped=0;
  my $dbh=DBI->connect(SequenceDataSource,SequenceDBUser,SequenceDBPassword);
  my $sql_statement='SELECT * FROM feature where name=?';
  my $sth=$dbh->prepare($sql_statement);
  $sth->execute($feature_name);
  my @data=$sth->fetchrow_array();
  if(@data){
    $mapped=1;
  }
  return $mapped;
}
sub to_tree {
  my $hit = shift;
  return Object2URL($hit,'Sequence');
}


sub do_autoload {
  my $object = param('autoload');
  my ($id,$db) = $object =~ /^Blast (\S+) against (\S+)/;
  return unless $id && $db;
  my ($obj) = $DB->fetch(Sequence => $id);
  return unless $obj;
  param(name => "$obj");
  param(db => $db);
  if ($db eq 'WormPep') {
    param(program => 'blastp');
    param(sequence => $obj->asPeptide);
  } else {
    param(program => 'blastn');
    param(sequence => $obj->asDNA);
  }
}


