#!/usr/local/bin/perl 
##############################################################################
#   
#   Name:           unisearch           
#   
#   Description:    A universal searcher for ensembl - a passed string is used
#                   to look up all types of ensembl object.  If only one match
#                   is found, the user is redirected to that object.  If more 
#                   than one match is found, the user is presented with a page
#                   of matches, sorted by type.
#                   The number of matches/type is limited.
#
#   Author:         jws
#
#   History:        2000-10-05  jws:    initial version
#                   2000-10-20  jws:    improved portability
#                   2000-11-29  jws:    added Protein Feature searches
#                   2000-12-05  jws:    Added golden checking for clone|contig
#                   2001-02-12  jws:    Added Protein Family searches
#                   2003-03-15  tmw:    Added EST searches
#
##############################################################################
package unisearch;
use SiteDefs;
#use SpeciesDefs; #SNP only
use CGI;
use DBI;
use EnsWeb;
use EnsEMBL::DB::Core;
use EnsEMBL::Web::HelpView;
use EnsEMBL::HTML::Page;
use Gramene::Protein::GetProteinData; 
use Gramene::Config;
#use Bio::GMOD::CMap::Config; #for cmap feature search (marker)
#use Bio::GMOD::CMap; #for cmap feature search (marker)

use Carp qw(cluck);

use DiseaseKwIndex;
use strict;

#use vars qw( $SPECIES_DEFS );
#$SPECIES_DEFS = SpeciesDefs->new();

#use constant CMAP_DATA_SOURCE =>Gramene::Config->new->get('ensembl')->{cmap_datasource};

use constant SAFETY_LIMIT => 1000;      # for queries

my $Species_Description = { 
                     Zea_mays  => 'Zea mays FPC Map',
                     Zea_mays2 => 'Zea mays Sequenced BACs',
                };
  # use constant here leads to warnings
  #     "Constant subroutine unisearch::SPECIES_DESCRIPTION redefined "

$|=1;   #in case of error

############################
# Retrieve the passed query
############################

my $q           = &CGI::param('q');
&CGI::param(-name=>'q',-value=>$q) if $q=~s/ +$//;
my $search_type = lc(&CGI::param('type')|| &CGI::param('idx') || 'all');
my $table_only = &CGI::param('table') || 0;

#warn "perl/multi/unisearch:$q,$search_type,$table_only\n";

$search_type = lc ($search_type);
$search_type ||='all';

# clean up input
$q=~s/^\s*//;

unless ($q ne ''){
    # Nothing to search with, so forget it.
    &output_results();
}

##################
# Escape Query
##################
# query is escaped in each search sub


my $offset  = 0;                            # offset from start of matches
my $limit   = 10;                           # max number of matches to return
unless ($search_type eq 'all'){$limit=30};

##########################
# Set up database handles
##########################



my @databases = ('core');
#if( $SPECIES_DEFS->databases->{'ENSEMBL_SNP'} ){
#  push @databases, 'SNP';
#}
            #(Skip 'family', 'est', 'disease' )
my $databases = &EnsEMBL::DB::Core::get_databases( @databases );

eval {
    my $pro=Gramene::Protein::GetProteinData->new();
    $pro->connect_to_ora( );
    $databases->{'protein'}=$pro->db;
    #warn  $ENV{GrameneConfPath}." +++ ".$pro->db->{Name};
};
if($@) {
    warn "protein db:$@";
    $databases->{'protein'}=undef;
}

&ensembl_exception("An error occurred while loading the database"
                           ,$databases->{'error'}) 
       if($databases->{'error'});
#warn "Non fatal error $databases->{'non_fatal_error'}\n" 
#        if($databases->{'non_fatal_error'});
##########################################################################
# Set up Ensembl DB connection for golden-path checking on clones/contigs
##########################################################################
my $sa = $databases->{'core'}->get_SliceAdaptor();

######################
# Do all the searches
######################
my %urls   =(   gene        => '/'.$ENV{'ENSEMBL_SPECIES'}.'/geneview?gene=',
                chromosome  => '/'.$ENV{'ENSEMBL_SPECIES'}.'/mapview?chr=',
                clone       => '/'.$ENV{'ENSEMBL_SPECIES'}.'/contigview?clone=',
                contig      => '/'.$ENV{'ENSEMBL_SPECIES'}.'/cytoview?mapfrag=',
                domain      => '/'.$ENV{'ENSEMBL_SPECIES'}.'/domainview?domainentry=',  
                feature     => '/'.$ENV{'ENSEMBL_SPECIES'}.'/featureview?type=DnaAlignFeature&id=',
                marker      => '/'.$ENV{'ENSEMBL_SPECIES'}.'/markerview?marker=',
                peptide     => '/'.$ENV{'ENSEMBL_SPECIES'}.'/protview?peptide=',  
                pro_feature => '/'.$ENV{'ENSEMBL_SPECIES'}.'/protview?peptide=',  
                protein     => '/db/protein/protein_search?acc=',
                sequence    => '/'.$ENV{'ENSEMBL_SPECIES'}.'/contigview?region=',
                transcript  => '/'.$ENV{'ENSEMBL_SPECIES'}.'/transview?transcript=',
#                contig      => '/'.$ENV{'ENSEMBL_SPECIES'}.'/contigview?contig=',
#                disease     => '/'.$ENV{'ENSEMBL_SPECIES'}.'/diseaseview?disease=',
#                snp        => '/'.$ENV{'ENSEMBL_SPECIES'}.'/snpview?snp=',
#               est         => '/'.$ENV{'ENSEMBL_SPECIES'}.'/contigview?highlight=',  
#               family      => '/'.$ENV{'ENSEMBL_SPECIES'}.'/familyview?family=',  
);


        # Exons not searchable yet...
        #       exon        => '',
        #   );

my %dbneeded  =(   gene        => 'core',
#                   contig      => 'core',
                   clone       => 'core',
                   marker      => 'core',
                   transcript  => 'core',
                   chromosome  => 'core',
#                   snp        => 'SNP',
#                   disease     => 'disease',
                   domain      => 'core',
#                  family      => 'family',
                   peptide     => 'core',
                   feature     => 'core',
                   protein     => 'protein',
                   est         => 'core',
                   contig     => 'core',
);

my %matches;

my $match_count=0;

if ($search_type eq 'all'){
    foreach my $type(keys %urls){
        $databases->{$dbneeded{$type}} or next;
        next if $type eq "protein" && $table_only; #Added for browser (to get rid of the duplicate protein search results).
        #warn( "$type search") ;
        &search_function( $databases, $type, \%matches, \$match_count, $q, $offset, $limit );
    }
} elsif ($search_type eq 'sequence'){
    foreach my $type( 'clone',  'chromosome', 'contig' ) { #'contig' is
                                                           # now misc_attrib
        &search_function( $databases, $type, \%matches, \$match_count, $q, $offset, $limit );
    }
} else {
    &output_results() unless ($urls{$search_type});
    if( $databases->{$dbneeded{$search_type}} ) {
        &search_function( $databases, $search_type, \%matches, \$match_count, $q, $offset, $limit );
    } else {
        #not available
        &output_results(undef,$dbneeded{$search_type}." database not available",undef,$table_only);
                                #abusing that second argument
    }
}

#warn( "$match_count $table_only");

if ($match_count==0){ ############## No matches
    &output_results(undef,undef,undef,$table_only);
} elsif ($match_count==1 && !$table_only){ ###### find the result and jump to the correct page
    # unless we're a non-golden clone|contig
    foreach my $type(keys %matches){
        if (scalar @{$matches{$type}{'results'}}==1){
#           warn "one match type=$type\n";
            if ($type eq 'clone') {
                my $pvalue=$matches{$type}{'results'}[0];
                if( !(is_golden_static_clone($sa,$pvalue->[0] ,$pvalue->[2]))){
#                   print STDERR $pvalue->[0], " not golden\n";
                    &output_results(\%matches,\%urls, $sa,$table_only);
                } elsif ($pvalue->[2]>0) { #real chrom, use coords
                        &jump_to_result(
                     '/'.$ENV{'ENSEMBL_SPECIES'}.'/contigview?chr='
                        .CGI::escape($pvalue->[2])
                        .'&vc_start='.CGI::escape($pvalue->[3])
                        .'&vc_end=',CGI::escape($pvalue->[4]),'clone');
                }
            } elsif ($type eq 'gene'){
                        $matches{'gene'}{'results'}[0][0]=~s/([^:]*).*/$1/;
                        &jump_to_result($urls{'gene'},$matches{'gene'}{'results'}[0][0],'gene');
            } else { 
                &jump_to_result($urls{$type},$matches{$type}{'results'}[0][0],$type);
            } 
        }
    }
} else { ########################### display list of matches
    &output_results(\%matches,\%urls, $sa, $table_only);
}

sub search_function {
    my( $databases, $type, $matches_ref, $match_count_ref, $q, $offset, $limit ) = @_;
#    warn "search_function(,$type,,,$q,$offset,$limit )  ";
#    my @start_times=(time,times());
    no strict "refs";
    my $sub_name="search_$type";
    my $result = &$sub_name($databases, $q,$offset,$limit);
    #allow search script to change type
    my $rtype=$result->{type} || $type;
    $matches_ref->{$rtype} = $result; 
    ${$match_count_ref} +=scalar @{$matches_ref->{$rtype}{'results'}};
#    my @end_times=(time,times());
#    warn join("   ",map { $end_times[$_]-$start_times[$_] } (0..3));
}
# Go quietly...

###############################################################################
################################### SUBS ######################################
###############################################################################
sub fudge_comparator { # $_[0] - $keyword, $_[1] - $offset, $_[2] - $limit 
    my $comparator = ($_[0]=~s/\*/\%/g) ? 'LIKE' : '=';
    #if ($_[1]||$_[1] == 0){ $_[1]="LIMIT $_[1]"; }
    #if ($_[2]) { $_[2]=",$_[2]"; }
    $_[1]='';
    $_[2]='LIMIT '.SAFETY_LIMIT; #just for safety
    return $comparator;
}

sub search_feature {
    my ($databases,$keyword, $offset, $limit)=@_;
    $keyword='A*' if $keyword eq '*';
    my ($comparator) = &fudge_comparator( $keyword, $offset, $limit );
    $keyword= $databases->{'core'}->dbc->db_handle->quote($keyword);
    (my $keywordv= $keyword) =~ s/'$/%'/;

    my $keyworduc=uc($keyword);  #not all, since have hit_names like 'Barley_00014'
    my $keywordvuc=uc($keywordv);

#    print STDERR "search_feature\n";
    
    ## not nec: $keyword .='*' unless $keyword =~ /\*/;  #have versions in db
    # if ($keyword=~s/\*/\%/g){$comparator='LIKE';}

    #warn ("$comparator $keyword");

    my $result_ref = $databases->{'core'}->dbc->db_handle->selectall_arrayref("
                SELECT distinct dna_align_feature.hit_name,analysis.db
                               ,analysis.logic_name
                FROM dna_align_feature  USE INDEX (hit_idx),analysis
                WHERE  hit_name $comparator $keyword
                AND dna_align_feature.analysis_id=analysis.analysis_id
                ORDER BY analysis.db,dna_align_feature.hit_name
                $limit
                ");
    my $result_refq=[];
     eval {
        $result_refq = 
           $databases->{'core'}->dbc->db_handle->selectall_arrayref(
		    #max(binary ) below will usually prefer
		    # trait_name to trait_symbol 
           "SELECT distinct dna_align_feature.hit_name,'QTL'
                           ,concat( ifnull(max(qh.published_symbol),'')
                                ,' ' ,max(binary qh.search_text) )
            FROM dna_align_feature  ,qtl_helper qh
            WHERE  ( qh.search_text $comparator $keyword
                   OR  qh.published_symbol $comparator $keyword )
              AND  qh.dna_align_feature_id=
                    dna_align_feature.dna_align_feature_id
            GROUP BY dna_align_feature.hit_name
            $limit
            ");
     };
     warn "qtl_helper:$@" if $@;

    unless ( scalar(@$result_ref) || scalar(@$result_refq) 
                               || $keyword =~ /\.\d+'$|[^\\]\%'$/) { 
#        warn("search_feature second query $keyword $keywordv\n");
        $result_ref = $databases->{'core'}->dbc->db_handle->selectall_arrayref(
               "SELECT distinct dna_align_feature.hit_name,analysis.db
                               ,analysis.logic_name
                FROM dna_align_feature  USE INDEX (hit_idx),analysis
                WHERE ( hit_name LIKE $keywordv"
                .($keyword eq $keyworduc ? '' : 
                     " OR hit_name LIKE $keywordvuc")
                .")
                AND dna_align_feature.analysis_id=analysis.analysis_id
                ORDER BY analysis.db,dna_align_feature.hit_name
                $limit
                ");
    }


    my @results= map { [$_->[0],"$_->[0] ( $_->[1]".  
                          ( $_->[1] eq $_->[2]  ? ''
                                                : " $_->[2]")
                            ." )" ] }
                        @$result_ref,@$result_refq;

    #################################
    # build data structure to return
    #################################
    my $hashref =   {
                    count   => scalar(@results),        #$result_count,
                    results => \@results,
                    };
    
    return $hashref;
}



sub search_gene {
    my ($databases, $keyword, $offset, $limit)=@_;

    #Peculiar hacks:
    if($keyword eq '*') {
        $keyword='A*' ; #don't go crazy
    } elsif ( $keyword =~ /^At \d g \d+/xi && $keyword !~ /\*/ ) {
        $keyword.='*';  #arabidopsis: will have suffix -TIGR-G or .version
    } elsif ( $keyword =~ /^Os \d\d g \d/xi ) {
        $keyword ="Loc_$keyword";   #TIGR rice
    }

    my ($comparator) = &fudge_comparator( $keyword, $offset, $limit );
#    $keyword= $databases->{'core'}->dbc->db_handle->quote($keyword); -- no, it's a bind value
    my $dbh = $databases->{'core'}->get_db_adaptor('core')->dbc()->db_handle();
    my $result_ref = $dbh->selectall_arrayref(
        "(select stable_id gnen,'' external_name,substring(description,1,14)
                from  gene_stable_id left join gene 
                    on  gene.gene_id=gene_stable_id.gene_id
                where stable_id $comparator ?
         )
         UNION
         (
         select stable_id gnen, external_name ,substring(gd.description,1,14)
                from gramene_gene_ext_helper h left join gene gd
                                                on gd.gene_id=h.gene_id
                where h.external_name $comparator ?
         )
         ORDER BY gnen,external_name
         $limit
        "
                , {} #no bind values
                , $keyword ,$keyword #,$keyword

        );         # $offset $limit");
    { #remove duplicates
        my %results;
        for my $result (@$result_ref) {
#            warn join("=","gene",@$result);
            my ($id,$ext,$desc)=@$result;
            $results{$id}->[1]=$desc;
            $results{$id}->[0]{$ext}=1 if $ext ne '' && $ext ne $id;
        }
        my @results= map { 
            [$_  . ":" . (
              $results{$_}->[0] ? join ("=", sort keys %{$results{$_}->[0]})
                                : '' )
            ,$results{$_}->[1] ] }
             sort keys %results;
        $result_ref=\@results;
#        for my $result (@$result_ref) { warn join("=","gene2",@$result); }
    }

    #old did my @results= map { [$_->[1],"$_->[0] ($_->[1])"] } @$result_ref; where result was (xref.display_label,gene accession)
    unless (@$result_ref) { #look through helper made from gene_description
        my $dkey="%".lc($keyword)."%";
        $dkey =~ s/%%$/%/;
        $dkey =~ s/^%%/%/;
        $result_ref = $dbh->selectall_arrayref(
          "select stable_id,  substring(egh.description,1,54)
                from gene_stable_id , ensembl_gene_helper egh
                where gene_stable_id.gene_id=egh.gene_id 
                  and egh.description like ?
                order by stable_id
                $limit",{},$dkey);
    }

    my @results= map {[$_->[0],$_->[1]]} @$result_ref;
    
    ################################## build data structure to return
#    return { 'count'   => $result_count, 'results' => \@results };
    return { 'count'   => scalar(@$result_ref), 'results' => \@results };
}

sub search_peptide {
    my ($databases, $keyword, $offset, $limit)=@_;
    my ($comparator) = &fudge_comparator( $keyword, $offset, $limit );
    $keyword= $databases->{'core'}->dbc->db_handle->quote($keyword);
    
#    my ($result_count)= $databases->{'core'}->dbc->db_handle->selectrow_array(
#        "SELECT COUNT(*) 
#           FROM translation_stable_id
#          WHERE stable_id $comparator $keyword");
    my $result_ref = $databases->{'core'}->dbc->db_handle->selectall_arrayref(
        "SELECT stable_id
           FROM translation_stable_id
          WHERE stable_id $comparator $keyword
          order by stable_id
          $limit");
#                $offset $limit");
    my @results= map {[$_->[0]]} @$result_ref;
    
    ################################## build data structure to return
#    return { 'count'   => $result_count, 'results' => \@results };
    return { 'count'   => scalar(@results), 'results' => \@results };
}



sub search_clone {
    my ($databases, $keyword, $offset, $limit)=@_;
    my ($comparator) = &fudge_comparator( $keyword, $offset, $limit );
    $keyword= $databases->{'core'}->dbc->db_handle->quote($keyword);
    $keyword=uc($keyword);
    
#    my ($result_count)= $databases->{'core'}->dbc->db_handle->selectrow_array(
#        "SELECT COUNT(clone_id) 
#           FROM clone 
#          WHERE name $comparator $keyword");
    my $result_ref = $databases->{'core'}->dbc->db_handle->selectall_arrayref(
# was from when had extra fields in clone table:
# now would have to join with bacpac
#        "SELECT name, bacname
#           FROM clone 
#          WHERE name $comparator $keyword 
#             OR upper(bacname) $comparator $keyword 
#          order by name"
          "SELECT distinct embl_acc,bacname,chromosome,clone_chr_start,clone_chr_end
             FROM seqtable_helper
            WHERE embl_acc $comparator $keyword
               OR upper(bacname) $comparator $keyword
         ORDER BY embl_acc,chromosome,clone_chr_start
         $limit
         ");    
                #upper for bacname because cap of parts of names 
                                #not consistent
#                $offset $limit");
    my @results= map { [$_->[0], 
          $_->[0]." ($_->[1])"
                 .($_->[2] ? " chr ".$_->[2]." ".$_->[3]."-".$_->[4] : '')
         ,$_->[2] ,$_->[3],$_->[4] ] } @$result_ref;

    unless(@results) {
        my $result_ref = $databases->{'core'}->dbc->db_handle->selectall_arrayref(
            "SELECT seq_region.name , coord_system.name
               FROM seq_region , coord_system
              WHERE seq_region.name $comparator $keyword
                AND seq_region.coord_system_id=coord_system.coord_system_id
              order by seq_region.name
                    $offset $limit");
        my @results= map { [$_->[0],"$_->[1] $_->[0]"] } @$result_ref;
        return { 'count'   => scalar(@results), 'results' => \@results
               , 'type' => 'sequence' }
            if(@results);
    }

    
    ################################## build data structure to return
#    return { 'count'   => $result_count, 'results' => \@results };
    return { 'count'   => scalar(@results), 'results' => \@results };
}

sub search_transcript {
    my ($databases, $keyword, $offset, $limit)=@_;
    my ($comparator) = &fudge_comparator( $keyword, $offset, $limit );
    $keyword= $databases->{'core'}->dbc->db_handle->quote($keyword);
    
#    my ($result_count)= $databases->{'core'}->dbc->db_handle->selectrow_array(
#        "SELECT COUNT(*) 
#           FROM transcript_stable_id
#          WHERE stable_id $comparator $keyword");
    my $result_ref = $databases->{'core'}->dbc->db_handle->selectall_arrayref(
        "SELECT stable_id 
           FROM transcript_stable_id
          WHERE stable_id $comparator $keyword
          order by stable_id
          $limit
          ");
#                $offset $limit");
    my @results= map {[$_->[0]]} @$result_ref;
    
    ################################## build data structure to return
#    return { 'count'   => $result_count, 'results' => \@results };
    return { 'count'   => scalar(@results), 'results' => \@results };
}

sub search_exon {
    my ($databases, $keyword, $offset, $limit)=@_;
    my ($comparator) = &fudge_comparator( $keyword, $offset, $limit );
    $keyword= $databases->{'core'}->dbc->db_handle->quote($keyword);
#    $keyword=SiteDefs::idfix($keyword);
    
#    my ($result_count)= $databases->{'core'}->dbc->db_handle->selectrow_array(
#        "SELECT COUNT(id) 
#           FROM exon
#          WHERE id $comparator $keyword");
    my $result_ref = $databases->{'core'}->dbc->db_handle->selectall_arrayref(
        "SELECT id 
           FROM exon
          WHERE id $comparator $keyword 
          order by id
          $limit");
#                $offset $limit");
    my @results= map {[$_->[0]]} @$result_ref;
    
    ################################## build data structure to return
#    return { 'count'   => $result_count, 'results' => \@results };
    return { 'count'   => scalar(@results), 'results' => \@results };
}

sub search_marker {
    my ($databases, $keyword, $offset, $limit)=@_;
    my ($comparator) = &fudge_comparator( $keyword, $offset, $limit );
    $keyword= $databases->{'core'}->dbc->db_handle->quote($keyword);
    my $keyworduc=uc($keyword);
    my $equalskeyworduc= uc( $databases->{'core'}->dbc->db_handle->quote("%=$keyword"));
    my $keywordequalsuc= uc( $databases->{'core'}->dbc->db_handle->quote("$keyword=%"));
    my $dashkeyworduc= uc($databases->{'core'}->dbc->db_handle->quote("%-$keyword"));
    my @results;
    
#    my ($result_count)= $databases->{'core'}->dbc->db_handle->selectrow_array(
#            "SELECT COUNT(name) 
#               FROM marker_synonym
#              WHERE name $comparator $keyword");
    my $query= "SELECT distinct name 
               FROM marker_synonym 
              WHERE name $comparator $keyword";
    $query .= " OR name $comparator $keyworduc" if $keyword ne $keyworduc;
    $query .= " OR name LIKE $dashkeyworduc " if $keyword !~ /-/ && 
                                                $keyword !~ /^%/;
                  # - have ssr names like RFjell-MRG0068
                  #  mwalton-MRG0032 myano-MRG0019 qifazh-MRG0003
                  #  zli-MRG6643.  Everything else is uppercase
    $query .= " OR name LIKE $equalskeyworduc " if $keyword !~ /=/ &&
                                                $keyword !~ /^%/;
    $query .= " OR name LIKE $keywordequalsuc " if $keyword !~ /=/ &&
                                                $keyword !~ /%$/;
                # -- marker names like RM225=RM584
#    print STDERR "marker query $query\n";
    my $result_ref = $databases->{'core'}->dbc->db_handle->selectall_arrayref(
                    $query."
                    $offset $limit");

    my $data_source=Gramene::Config->new->get('ensembl')->{cmap_datasource};
    @results= map {[$_->[0]]} @$result_ref;
    
    ################################## build data structure to return
#    return { 'count'   => $result_count, 'results' => \@results };
    return { 'count'   => scalar(@results), 'results' => \@results };
}

sub search_chromosome {
    my ($databases, $keyword, $offset, $limit)=@_;

    $keyword=~s/\*/.*/g;
    my @results=  map { [ $_ ] } grep { /^$keyword$/ } 
                 @{EnsWeb::species_defs->ENSEMBL_CHROMOSOMES};
    return { 'count' => scalar(@results), 'results' => \@results };
}

sub search_contig {
    my ($databases, $keyword, $offset, $limit)=@_;
    my ($comparator) = &fudge_comparator( $keyword, $offset, $limit );
    $keyword= $databases->{'core'}->dbc->db_handle->quote($keyword);

    my $result_ref = $databases->{'core'}->dbc->db_handle->selectall_arrayref(
        qq{
        select name,attrib_value,attrib_name from 
           misc_attrib_helper
           where attrib_value $comparator $keyword
           $offset $limit
        }
        #'superctg' not needed - all these values occur as 'name'
    );

    my @results = map { [$_->[0],
                         $_->[0].($_->[2] eq 'name' ? '' 
                                                   : " ( $_->[2] $_->[1])"),
                         ] 
                       } @$result_ref;
    return { 'count'   => scalar(@results), 'results' => \@results }
    
}

# Seems pointless can just go to snpview  for id
#sub search_snp {
#    my ($databases, $keyword, $offset, $limit)=@_;
#    my ($comparator) = &fudge_comparator( $keyword, $offset, $limit );
#    $keyword= $databases->{'SNP'}->dbc->db_handle->quote($keyword);
#
##    my ($result_count)= $databases->{'SNP'}->db_handle->selectrow_array(
##        "SELECT COUNT(id) 
##           FROM RefSNP 
##          WHERE id $comparator $keyword");
#    my $result_ref = $databases->{'SNP'}->dbc->db_handle->selectall_arrayref(
#        "SELECT distinct id
#           FROM RefSNP  
#          WHERE id $comparator $keyword 
#                $offset $limit");
#    my @results= map {[$_->[0]]} @$result_ref;
#    
#    ################################## build data structure to return
##    return { 'count'   => $result_count, 'results' => \@results };
#    return { 'count'   => scalar(@results), 'results' => \@results };
#}



sub search_domain {
    my ($databases, $keyword, $offset, $limit)=@_;
    my ($comparator) = &fudge_comparator( $keyword, $offset, $limit );
    $keyword= $databases->{'core'}->dbc->db_handle->quote($keyword);
    #warn "search_domain(,$keyword,$offset,$limit) $comparator\n"; 
    
    #############################################
    # If we're an Interpro id then just return -
    # let domainview work out if it exists...
    #############################################
    return { 'count' => 1, 'results' => [[$keyword]] } if ($keyword=~/IPR\d{6}/);
    

    my $result_ref;
    my($result_count) = $databases->{'core'}->dbc->db_handle->selectrow_array(
        "SELECT count(*)
           FROM xref as x, external_db as e
          WHERE e.db_name = 'Interpro' and e.external_db_id = x.external_db_id and
                (  x.dbprimary_acc $comparator $keyword
                or x.display_label  $comparator $keyword
                )
        ");
    if( $result_count > 0 ) {
        $result_ref = $databases->{'core'}->dbc->db_handle->selectall_arrayref(
            "SELECT     x.dbprimary_acc , x.display_label
               FROM xref as x, external_db as e
              WHERE e.db_name = 'Interpro' and e.external_db_id = x.external_db_id and
                (  x.dbprimary_acc $comparator $keyword
                or x.display_label  $comparator $keyword
                )
             ORDER by x.dbprimary_acc
             $limit
           ");
    } else {
        ($result_count) = $databases->{'core'}->dbc->db_handle->selectrow_array(
            "SELECT     count(*)
               FROM xref as x, external_db as e
              WHERE e.db_name = 'Interpro' and e.external_db_id = x.external_db_id and
                    x.description $comparator $keyword");
        if($result_count>0) {
            $result_ref = $databases->{'core'}->dbc->db_handle->selectall_arrayref(
                "SELECT x.dbprimary_acc , x.display_label
                   FROM xref as x, external_db as e
                  WHERE e.db_name = 'Interpro' and e.external_db_id = x.external_db_id and
                        x.description $comparator $keyword
                        $offset $limit"
            );
        } else {
            ($result_count) = $databases->{'core'}->dbc->db_handle->selectrow_array(
                "SELECT count(*)
                   FROM interpro
                  WHERE id $comparator $keyword
                 ");
            if($result_count>0) {
                $result_ref = 
                         $databases->{'core'}->db_handle->selectall_arrayref(
                    "SELECT     interpro_ac,id
                       FROM interpro
                      WHERE id $comparator $keyword
                            $offset $limit"
                  );
            } else { #do  protein feature search & force result type
                my $res=search_pro_feature (@_);
                if($res->{count}>0) {
#                    warn $res->{count}." pro features instead ofdomain\n";
                    $res->{type}='pro_feature';
                    return $res;
                }
            }
        }
    }
    my @results= map {[$_->[0],"$_->[0]: $_->[1]"]} @$result_ref;
    
    ################################## build data structure to return
    return { 'count'   => $result_count, 'results' => \@results };
}
   

sub search_family {
    my ($databases, $keyword, $offset, $limit)=@_;
    my ($comparator) = &fudge_comparator( $keyword, $offset, $limit );
    $keyword= $databases->{'family'}->dbc->db_handle->quote($keyword);
    return { 'count' => 0, 'results' => [] } unless($databases->{'family'});
    
#    my ($result_count) = $databases->{'family'}->db_handle->selectrow_array(
#        "SELECT count(stable_id) 
#                  FROM family 
#                 WHERE stable_id $comparator $keyword");
    my $result_ref = $databases->{'family'}->dbc->db_handle->selectall_arrayref(
        "SELECT stable_id       
                   FROM family 
                  WHERE stable_id $comparator $keyword 
                $offset $limit");
    my @results= map {[$_->[0]]} @$result_ref;
    
    ################################## build data structure to return
#    return { 'count'   => $result_count, 'results' => \@results };
    return { 'count'   => scalar(@results), 'results' => \@results };
}

sub search_disease {
    my ($databases, $keyword, $offset, $limit)=@_;
    my $result_count;
    my @results;
    unless ($databases->{'disease'}){
        return { 'count'   => 0, 'results' => \@results };
    }
    $keyword= $databases->{'disease'}->dbc->db_handle->quote($keyword);
    my $comparator= '=';
    my $re= $keyword=~m/\*/ ? 1 : 0;

    my $index = DiseaseKwIndex->new(
        { dbh => $databases->{'disease'}->_dbc->db_handle, index_name => 'disease_index' }
    );
    my $arr_ref;
    
    if ($re){
        $arr_ref=$index->search( {words  => $keyword, re  => 1} );
    } else {
        $arr_ref=$index->search( {words  => $keyword, boolean => 'AND'} );
    }
    
    #######################################################################
    # arr_ref should now be a reference to an array of disease id numbers, 
    # so we need to pull the disease names out by these...
    #######################################################################

    if ($offset||$offset == 0){$offset='LIMIT '.$offset;}
    if($limit){$limit=','.$limit;}
    
    $result_count=scalar @$arr_ref;
    
    @results=@$arr_ref;
    
    if (scalar(@$arr_ref)){
        my $result_ref = $databases->{'disease'}->_db_handle->selectall_arrayref(
            "SELECT distinct disease
               FROM disease 
              WHERE disease.id in (" .join(',', @$arr_ref).") 
              LIMIT ".SAFETY_LIMIT
                    ); #$offset $limit ");
       @results= map {[$_->[0]]} @$result_ref;
        
    }

    ################################## build data structure to return
#    return { 'count'   => $result_count, 'results' => \@results };
    return { 'count'   => scalar(@results), 'results' => \@results };
}   


sub search_protein {

    my ($pdbh,$word, $offset, $limit)=@_;
    $word=~s/\*/\%/g;
    $word = '%'.uc($word).'%';
    $word =~ s/%%+/%/g;
    
    my $sth1 = $pdbh->{'protein'}->prepare( 
            "select 
               gene_product_id,gene_product_name,swissprot_acc,swissprot_id 
               from gene_product_helper
               where gene_product_name like ?
                  or swissprot_acc like ?
                  or swissprot_id like ?
               LIMIT ".SAFETY_LIMIT
                  #  don't need UPPER( )  for mysql
            );

    

    $sth1->bind_param( 1, $word );
    $sth1->bind_param( 2, $word );
    $sth1->bind_param( 3, $word );
    
    $sth1->execute();

    my %result; #is this really necessary?
    while( my( $id, $name, $acc, $sid ) = $sth1->fetchrow_array() ) {
        
                 #      0     1   2
        $result{$id}=[$name,$acc,$sid];
    
    }
    $sth1->finish;



    my @results=map {[$_->[1],
        $_->[1].($_->[2] ne $_->[1] ? " ($_->[2])":"" ).": $_->[0]" ] }
                             sort { $a->[1] cmp $b->[1]
                                    || $a->[2] cmp $b->[2]
                                    || $a->[0] cmp $b->[0] }
                                        values %result;

    #################################
    # build data structure to return
    #################################
    my $hashref =   {
                    count   => scalar(@results),
                    results => \@results,
                    };
                
    return $hashref;

}




sub output_results{
    #######################################################################
    # Get matches & urls, if passed.  If they aren't here, then we print a
    # "No Results" page...
    #######################################################################
    my ($match_hashref,$url_hashref, $sa, $table_only)=@_;
    my $q=&CGI::param('q');
#    print STDERR "output_results(",join(",", $match_hashref?ref($match_hashref):''
#                 , $url_hashref?ref($url_hashref):''
#                 , $sa?ref($sa):''
#                 ,$table_only),")\n";
    ####################
    # Print page header
    ####################
#    my $grpg;
    unless($table_only) {
    if( defined( $ENV{'MOD_PERL'})){
        my $r = Apache->request();
        print CGI::header();
        $r->err_header_out('ensembl_headers_out'=>1);
# #        print EnsWeb::make_cgi_header(('initfocus'=>0, 'menus'=>0));
#       my ($head,$onload,$js_divs)=&EnsWeb::cgi_header_info(('initfocus'=>0,'menus'=>1));
#       $grpg=Gramene::Page->new($r);
#       print $head,$grpg->start_body(  -ensembl=>1, -bodyattr=>$onload, -bodyfirst=>$js_divs  );
        print EnsEMBL::HTML::Page->ensembl_page_header();
        $ENV{ENSEMBL_SCRIPT}='search';  #So Help links to correct page
        print EnsEMBL::HTML::Page::ensembl_search_table($q||"",&CGI::param('type'));
        print "<br>";
    }

    my $this_search="unisearch?q=".&CGI::escape($q);
    my $searches= join "&nbsp;&nbsp;", 
                    map { 
                          #'<br>'.('&nbsp;' x 12 ) .
                          ('&nbsp;' x 3 ) .
                          CGI::a({href=> "/$_/$this_search"} , 
                                     $Species_Description->{$_} || $_ ) 
                        }
                      grep { $_ ne $ENV{ENSEMBL_SPECIES} } 
                  @{$SiteDefs::ENSEMBL_SPECIES};
    
    #######################
    # Print Search Heading
    #######################




    print qq(
    <table cellspacing="0" cellpadding="0" border="0" width="100%" class="background1">
        <tr>
            <td colspan="4" class="black"><img src="/gfx/blank.gif" width="1" height="1"></td>
        </tr>   
        <tr>
            <td colspan="4"><img src="/gfx/blank.gif" height="6"></td>
        </tr>
        <tr>
            <td>&nbsp;</td>
            <td><span class="h4">Search for "$q" in $ENV{'ENSEMBL_SPECIES'}</span></td>
            <td><img src="/gfx/blank.gif" height="8" width="1"></td>
            <td>&nbsp;</td>
        </tr>
        <tr>
            <td colspan="4"><img src="/gfx/blank.gif" height="6"></td>
        </tr>
            ).($searches?qq (
            <tr>
                <td>&nbsp;</td>
                <td><span class="h5">Retry in $searches</span></td>
                <td><img src="/gfx/blank.gif" height="8" width="1"></td>
                <td>&nbsp;</td>
            </tr>
            <tr>
                <td colspan="4"><img src="/gfx/blank.gif" height="6"></td>
            </tr>):qq())
        .qq(
        <tr>
            <td colspan="4" class="black"><img src="/gfx/blank.gif" width="1" height="1"></td>
        </tr>
    </table>
    );

    }   # end unless $table_only

    ##################################
    # Got matches, so display them...
    ##################################
    if($match_hashref){
        ######################
        # Print table heading
        ######################
        print( "&nbsp;" );      #why?
        print qq|<table border="0" width="100%" cellspacing="0" cellpadding="0">|;
        
        my %matches=%$match_hashref;
        my %urls=%$url_hashref;
        
#       #Distribute features into their own types
#       if( exists $matches{'feature'} && $matches{'feature'}{'count'}) {   #need this 'exists', otherwise autovivification of it causes problems below
#           foreach my  $f (@{$matches{'feature'}{'results'}}) {
#               push @{$matches{"feature: $f->[5]"}{'results'}},$f;
#               $urls{"feature: $f->[5]"}=$urls{feature};
#           }
#           delete $matches{'feature'};
#       }

        my $spacer=qq(<td><img src="/gfx/blank.gif" width="5" height="22"></td>\n);
        
        foreach my $type(sort keys %matches){

            #print STDERR "$type ",$matches{$type}," ",ref($matches{$type})," ",join("#",keys %{$matches{$type}}),"\n";
            my $type_count=$matches{$type}{'count'} || scalar(@{$matches{$type}{'results'}}); # || because of the feature distribution above
            next if($table_only && !$type_count) ;
            my @values=@{$matches{$type}{'results'}};
            #####################
            # Print Matches
            #####################
                my $countstring;
                if ($type eq 'disease'){
                    $countstring = '('.scalar @values. 
                                    " of $type_count results shown)". 
                                    ' (<a href="/'.$ENV{'ENSEMBL_SPECIES'}.'/diseaseview?disease='.
                                    &CGI::escape($q).'">'. 
                                    "Browse all matching diseases</a>)";
                }
                else {
                    $countstring =  "(".scalar @values. 
                                    " of $type_count results shown)";
                }
                    
                $countstring =  "(".scalar @values. " result". (scalar(@values)==1?"":"s").")";

              print qq|<tr class="background2">\n|;
              #print qq|<td><img src="/gfx/blank.gif" width="5" height="22"></td>\n|;
              #print qq|<td><b>$type</b></td>\n|;
              #print qq|<td><img src="/gfx/blank.gif" width="5" height="22"></td>\n|;
              print qq|$spacer<td><b>\u$type</b></td>\n|;
            print qq(<td colspan=3 class="smarial">&nbsp;&nbsp;&nbsp;$countstring</td>);
              print "</tr>\n";

            #####################
            # Print Matches
            #####################
            if (scalar @values){

                my @td=map { 
                          qq(<td class="arial">).   #&nbsp;&nbsp;&nbsp;
                          &make_link($sa,$urls{$type},$_,$type).qq(</td>)
                       } @values;
                my @td2=splice(@td,$#td/2+1);
                push @td2,$spacer;
                foreach my $td(@td){
                    print qq(<tr class="background1">$spacer$td$spacer).(shift @td2).
                        qq($spacer</tr>\n);
                }
            }
                    
            #####################
            # Print No Matches
            #####################
            else {
                print qq(
                    <tr class="background1">$spacer
                      <td colspan=3 class="smarial">&nbsp;&nbsp;&nbsp;No match for this search type</td>
                      $spacer
                    </tr>   
                                );
            }
        }
        #########################
        # Close up results table
        #########################
        print qq(
        <tr>
            <td colspan="5" class="black"><img src="/gfx/blank.gif" width="1" height="1"></td>
        </tr>
        </table>
        <br>
                );
    }
    elsif($url_hashref) {       #excuse me, it's an error message
        print "<h3>$url_hashref</h3>/n";
    }
    else {
                my $SELF   = qq(/$ENV{'ENSEMBL_SPECIES'}/$ENV{'ENSEMBL_SCRIPT'});
                $SELF =~ s|.*/(.*)|$1|;
                my $help_link = "";
                eval{
                    $help_link = &EnsEMBL::Web::HelpView::helplink($SELF);
                    #print STDERR "$help_link\n";
                };
                if ($@){
                    $help_link = "/$ENV{'ENSEMBL_SPECIES'}/helpview";    
                    warn "Error in helpview link creation: $@\n";
                }
        print qq(
    <h3>Your Search has Returned No Results</h3>
    <p>You can try another search by typing a string in the box above.
        Try adding a "*" character to do a wildcard search.</p>
    <p>Click <A HREF="$help_link"><img border="0" align="top" alt="Click for help" src="/gfx/helpview/help.gif"></A>
        for more help on Ensembl identifier searches.</p>
        );
    }
    
    unless($table_only) {
#       if($grpg) {
#           print $grpg->end_body;
#       } else {
            print EnsEMBL::HTML::Page::ensembl_page_footer();
#       }
    }
    Apache::exit;

}

sub search_pro_feature {
    my ($databases, $keyword, $offset, $limit)=@_;
    my ($comparator) = &fudge_comparator( $keyword, $offset, $limit );
    $keyword= $databases->{'core'}->dbc->db_handle->quote($keyword);

    #warn "search_pro_feature(,$keyword,$offset,$limit) $comparator\n"; 
    
    my $result_ref=[];
    my($result_count) = $databases->{'core'}->dbc->db_handle->selectrow_array(
        "SELECT count(*)
           FROM protein_feature
          WHERE hit_id $comparator $keyword");
    if( $result_count > 0 ) {
        #warn "  $result_count\n";
        $result_ref = $databases->{'core'}->dbc->db_handle->selectall_arrayref(
            "SELECT     tsi.stable_id,hit_id
               FROM translation_stable_id as tsi, protein_feature pf
              WHERE hit_id $comparator $keyword
                AND tsi.translation_id=pf.translation_id
                        $offset $limit
                ");
        #warn scalar(@$result_ref);
    }
    my @results= map {[$_->[0],"$_->[1] in $_->[0]"]} @$result_ref;
    
    ################################## build data structure to return
    return { 'count'   => $result_count, 'results' => \@results };
}
   
sub search_est {
    my ($databases, $est_acc, $offset, $limit)=@_;
    $est_acc= $databases->{'est'}->dbc->db_handle->quote($est_acc);
    my $est_accv= $databases->{'est'}->dbc->db_handle->quote("$est_acc.%");

    # see if est acc has version
    if ( index($est_acc, '.') < 0 ) {
      # no version, get latest version
      my $result_ref = $databases->{'est'}->dbc->db_handle->selectall_arrayref(
          "SELECT 
               max(substring( hit_name, INSTR(dna_align_feature.hit_name, '.')+1 ))
           FROM 
               dna_align_feature
           where 
               dna_align_feature.hit_name like $est_accv" );
      if ( @{$result_ref} > 0 ) {
        # found max version
        $est_acc = substr($est_acc,0,-1). '.'.$result_ref->[0]->[0]."'";
      }
      else {
        # couldn't find max version, returning no results
        return { 'count'   => 0, 'results' => [] };
      }
    }
    
    # get name and min/max position for the est on genome piece
    my $result_ref = $databases->{'est'}->dbc->db_handle->selectall_arrayref(
        "SELECT 
             seq_region.name,
             MIN(LEAST(seq_region_start, seq_region_end)),
             MAX(GREATEST(seq_region_start, seq_region_end))
         FROM 
             dna_align_feature,  seq_region
         WHERE 
             dna_align_feature.hit_name = $est_acc and
             seq_region.seq_region_id = dna_align_feature.seq_region_id
         GROUP BY
             seq_region.name
         LIMIT $limit");

    my @results = ();
    if ( @{$result_ref} > 0 ) {
      # got results, construct cgi param value
      foreach my $result (@{$result_ref}) {
        my $str  =  "$est_acc&contig=".$result->[0]."&fpos_start=".$result->[1];
            $str.= "&fpos_end=".$result->[2]."&fpos_context=20000";
        push @results, $str;
      }
    }

    ################################## build data structure to return
    return { 'count' => scalar(@results), 'results' => \@results };
}


sub jump_to_result {
    my ($url,$value,$type)=@_;
    if ($type eq 'disease'){
        $value=&CGI::escape($value);
    }
    $url=$url.$value;
    &CGI::redirect($url);
}


sub make_link {
    
    my ($sa, $urltype, $pvalue, $type)=@_;

    ref($pvalue) or cluck("need array ref") and return "";
    my ($value,$text)=@$pvalue;
    $text ||= $value;
#    print STDERR "make_link(,$urltype,[$value,$text],$type)\n";
   
    my $url ="";
 
    if ($type eq 'clone'){
        (my $clone=$value)=~ s/ .*//;
        if (is_golden_static_clone($sa,$clone,$pvalue->[2])){
            if($pvalue->[2]>0) { #use chr and position
                $url=qq(<a href="). '/'.$ENV{'ENSEMBL_SPECIES'}.'/contigview?chr='
                .CGI::escape($pvalue->[2]).'&vc_start='.CGI::escape($pvalue->[3]).'&vc_end='.CGI::escape($pvalue->[4]).qq(">$text</a>);
            } else {
                $url=qq(<a href=").$urltype.CGI::escape($value).qq(">$text</a>);
            }
        } else {
            $url ="$text (unavailable - not on golden path)";
        }
    } elsif ($type eq 'gene'){
        my ($int, $ext) = split (":",$value);
        if ($int && $ext){
            $ext = "$ext=$int";
        } else {
            $ext ||=$int;
        }
        $ext .= " $text" if $text ne $value;
        $url = qq(<a href=").$urltype.CGI::escape($int||$ext).qq(">$ext</a>);
    } elsif ($type eq 'est'){
        my ($ext, $int) = split (":",$value);
        if ($int){
                $ext = "$ext ($int)";
        } else {
            $int=$ext;
        }
        $url = qq(<a href=").$urltype.$value.qq(">$ext</a>);
    } else {
        $url=qq(<a href=").$urltype.CGI::escape($value).qq(">$text</a>);
    }
    return $url;
}


sub display_error_and_exit {
    my $error = shift;
    warn "unisearch:$error\n";
    ####################
    # Print page header
    ####################
    if( defined( $ENV{'MOD_PERL'})){
        my $r = Apache->request();
        print header();
        $r->err_header_out('ensembl_headers_out'=>1);
        print EnsWeb::make_cgi_header(('initfocus'=>0, 'menus'=>0));
        print EnsEMBL::HTML::Page(&CGI::param('q')||"",&CGI::param('type'));

        print "<br>";
    }
    print &ensembl_exception($error);
    &ensembl_exit;
}

sub is_golden_static_clone {
   my $sa = shift;
   my $acc = shift;
   my $chr = shift;
#   print STDERR "igs_clone(,$acc,$chr)\n";
   return 1 if $chr;    #from SeqTable
   eval {
      $sa->fetch_by_region( undef, $acc ); 
   };
   return $@ ? 0 : 1;
}    

# Tidy up before you go...
# END {
#     $ensembl_dbh->disconnect if $ensembl_dbh;
#     $marker_dbh->disconnect if $marker_dbh;
#     $snp_dbh->disconnect if $snp_dbh;
#     $disease_dbh->disconnect if $disease_dbh;
# }

1;


=head1 NOTES

for clone, uses contigview?clone=  for orphans and unique results
otherwise uses contigview?chr= &vc_start= & vc_end=

=cut

