#!/usr/local/bin/perl

package textview;

use SiteDefs;
use EnsWeb;
use Apache;
use CGI qw(:standard :form);
use AVIndexUtils;
use AltaVista::PerlSDK qw(
	AVS_OK
	avs_create_options
	avs_search
	avs_getsearchresults
	avs_search_getdata
	avs_errmsg
	avs_search_close
);

use strict;
my $AVINDEXROOT = '/mysql/ensembl/www/server/modules/AltaVista/ens100_index';
my $DEBUG = 0;
my $q = new CGI;
my $MODE = 'r';
my $query = $q->param('q');
my $num   = $q->param('num');
my $fmt   = $q->param('fmt');
my $page  = $q->param('page');
my $idx   = $q->param('idx');

if(defined $q->param('type')) {			# we came from the top search menu
	$idx  = ucfirst($q->param('type'));
}

$query =~ s/^\s+//;

if( defined( $ENV{'MOD_PERL'} ) ){
	my $r = Apache->request();
	$r->err_header_out('ensembl_headers_out'=>1);
}
print $q->header();
print EnsWeb::make_cgi_header();
#print EnsWeb::print_form('', "gene");

#opendir(IDXDIR, "$AVINDEXROOT") or die "Cannot open index directories\n";
#my @idx_list = sort grep !/^\.\.?/,readdir IDXDIR;
#closedir(IDXDIR);

my @idx_list = (qw( Gene.idx Clone.idx Contig.idx Peptide.idx Feature.idx Family.idx SNP.idx Marker.idx 
				Genscan.idx Domain.idx Disease.idx IPI.idx Chromosome.idx External.idx));
#print STDERR "@idx_list\n";

if ($query eq ""){ 
	print_form($query, $num, $fmt, $idx);
	print "<P><B>No search string specified!</B><P>\n";
	#print "Indexes: ", @idx_list;
	print_syntax();
	print &EnsWeb::make_cgi_footer();
	Apache::exit();
} elsif ($query =~ /^\w{1,2}[\*\?\.]+/){ 
	print_form($query, $num, $fmt, $idx);
	print "<P><B>Search string must have at least 3 characters before a wildcard!</B><P>\n";
	print_syntax();
	print &EnsWeb::make_cgi_footer();
	Apache::exit();
} elsif ($query =~ /^.+\*$/){ 
	$query =~ s/^(.*?)\*+$/$1\*\*/;
	print STDERR "<P><B>Search string modified!</B><P>\n";
	&do_search($AVINDEXROOT, \@idx_list, $query, $num, $fmt, $page, $idx);
}else {
	&do_search($AVINDEXROOT, \@idx_list, $query, $num, $fmt, $page, $idx);
}

print &EnsWeb::make_cgi_footer();

1;

#####################################################
## Search the index
## Returns 1 else -1 on error
#####################################################

sub do_search {

	my ($IDXROOT, $idx_list, $query, $num, $fmt, $page, $use_index) = @_;
	my ($options, $found, $returned, $termcount, $search, $status, $day, $month, $year);
	my $matches = 0;
	my $totdocs = 0;
	print_form($query, $num, $fmt, $use_index);
	

	my $Hidx = undef;
	my $d;
	my $index_name;
	my $f = 0;
	my $alldocs = 0;

	if ($use_index && $use_index ne "All"){	# single index search...
		my $maxdocs = 1000;     
		my $timeout = 100;
		$options = avs_create_options ($maxdocs,$timeout,1);
		my $idx = "$use_index.idx";
		eval{
			$Hidx = ens_avs_open("$IDXROOT/$idx") or die "Fatal: cannot open index: $idx\n";
			$status = avs_search($Hidx, "**", "", $options, $alldocs, $returned, $termcount, $search);
			if ($query =~ /AND|NOT|OR|NEAR|WITHIN|BEFORE|AFTER|WITHIN|ATLEAST/o){
				$status = avs_search($Hidx, "", $query, $options, $f, $returned, $termcount, $search);
			}else{
				print STDERR "Single index query: $query on $idx\n";
				$status = avs_search($Hidx, $query, "", $options, $f, $returned, $termcount, $search);
				print STDERR "found: $f\n";
			}
		};
		if ($@){
			print STDERR "ERROR: cannot search AV index $idx\n";
			return;
		}
		if ($f > 0){
			print qq(<P><B>$f</B> documents match your query <small><I>(Documents searched: $alldocs)</I></small><P>\n);
			full_index_search($search, $f, $query, $num, $fmt, $page, $use_index);
			$matches += $f;
		} else {
			# no matches
		}
		ens_avs_close($Hidx);

	} else {								# loop over all indexes...
		my $maxdocs = 5;  
		my $timeout = 100;
		$options = avs_create_options ($maxdocs,$timeout,1);
		foreach my $i (@$idx_list){
			eval{
				$Hidx = ens_avs_open("$IDXROOT/$i") or die "Fatal: cannot open index: $i\n";
				$status = avs_search($Hidx, "**", "", $options, $alldocs, $returned, $termcount, $search);
				if ($query =~ /AND|NOT|OR|NEAR|WITHIN|BEFORE|AFTER|WITHIN|ATLEAST/o){
					$status = avs_search($Hidx, "", $query, $options, $f, $returned, $termcount, $search);
				}else{
					print STDERR "Multi-index query: $query on $i\n";
					$status = avs_search($Hidx, $query, "", $options, $f, $returned, $termcount, $search);
					print STDERR "found: $f\n";
				}
			};
			if ($@){
				print STDERR "ERROR: cannot connect to AV index $i\n";
				next;
			}
			my $name = $i;
			$name =~ s/\.idx//;
			$found += $f;
			if ($f > 0){
				#print STDERR "Searching $i\n";
				single_index_search($search, $f, $name);
				$matches += $f;
			} else {
			# no matches
				$totdocs += $alldocs;
			}
			ens_avs_close($Hidx);
		}
	}
	if ($matches == 0){
		print qq(<P><B>Your query found no matches</B> <small><I>(Documents searched: $totdocs)</I></small><P>\n);
	}
	return;
}

########################################################################################################
sub full_index_search {

	my ($search, $f, $query, $num, $fmt, $page, $idx) = @_;

	my $pagemax = 20;				# number of hits to show on one page
	my $pagelinks;
	my $startdoc;
	my $status;
	my $maxdocs = $num;				# max no. of results to display
	$maxdocs ||= 100;				# default number of documents to display
	if ($maxdocs < $pagemax){
		$pagemax = $maxdocs;		# reduce in case only want 10 answers and page length is 20
	}
	if (!defined $page){
		$startdoc = 0;
	}else{
		$startdoc = $page;
	}
	if ($f%10 == 0 && $f%$pagemax == 0){ 
    	$pagelinks = int($f/$pagemax);
	}else{
    	$pagelinks = int($f/$pagemax)+1;
	}

	LOOP:for (my $i=$startdoc;$i<= ($startdoc+($pagemax-1));$i++){
		if ($i == ($startdoc+$pagemax)){ 		# fall through if reached document limit ($maxdoc)
	    	print qq(<P><BR><B>Reached limit of documents. 
			    	 You should consider refining your search query.</B><P>\n);
	    	return(1);
		}
		if ($i == $f){                          # fall through if reached document limit ($maxdoc)
        	unless ($f == 0){
				print "<P>End of results</P>";
            	if ($f > 20){
                	my $prevpage = $page - 20;
                	print qq(<A HREF="/perl/textview?idx=$idx&num=$num&fmt=$fmt&q=),
							&CGI::escape($query),
							qq(&page=$prevpage">[<B>&lt;&lt; Prev</B>]</A>&nbsp;\n</FONT>\n<BR>\n);
            	}
        	}
	    	return(1);
		}
		$status = avs_getsearchresults($search,$i);
		if ($status eq AVS_OK){
	    	my $header_info = avs_search_getdata($search);
	    	my %XML_tags = parse_XML_header($header_info);
			$XML_tags{'ABSTRACT'} = highlight_matches($XML_tags{'ABSTRACT'},$query);
			if ($XML_tags{'ABSLEN'} == 0){$XML_tags{'ABSLEN'} = "&lt;1";}
	    	my $count = $i+1;
	    	
			print qq(<BLOCKQUOTE>);
	    	print qq(<B>$count. $XML_tags{'TITLE'}: </B><A HREF="$XML_tags{'URL'}">$XML_tags{'NAME'}</A>);

	    	unless ($fmt eq "c"){ # want compact results so omit summary
		    	print qq(<BR><font size="-1" face="helvetica">$XML_tags{'ABSTRACT'}</font>);
	    	}

	    	print qq(<BR><img src = "/gfx/avshit.gif">&nbsp;\n);
	    	print qq(<font size="-1"><A HREF="$XML_tags{'URL'}">$XML_tags{'URL'}</A></font>);

	    	print qq(</P>);
			#print qq(<font size="-1" face="helvetica"> - Last Modified: <B>$XML_tags{'LASTMOD'}</B> - \n);
	    	#print qq(Size: <B>$XML_tags{'ABSLEN'}K</B></font></P>);

	    	print qq(</BLOCKQUOTE>);
		}else{
			my $error = avs_errmsg($status);
			print "Search error: cannot get search results: $error\n";
			return (-1);
		}

    } # end of for..

    $status = avs_search_close($search);

    #############################################
    ## Paging code here                        ##
    ## The vars are calculated above           ##
    #############################################

	#print "Pagemax: $pagemax\n";
	#print "Maxdocs: $maxdocs\n";
	#print "Found: $found<BR>\n";
	$query = &CGI::escape($query);

    unless ($f <= $pagemax or $pagemax >= $maxdocs){		# only show pagelinks if we have more than $pagemax results
        my $linksum =0;
		#print "<B>Results Pages</B>\n";
		print qq(<center>);
        if ($page >= 20){ # print the [<<prev] tag
            my $prevpage = $page - 20;
            print qq([<A HREF="/perl/textview?idx=$idx&num=$num&fmt=$fmt&q=$query&page=$prevpage"><B>&lt;&lt; Prev</B></A>]&nbsp;\n</FONT>\n);
        }
		for (my $j=1;$j<=$pagelinks;$j++){
	    	if ($j*$pagemax > $maxdocs){last;}
	    	if ($j*$pagemax-20 == $page){
		    	if ($j < 10){
			    	print qq([&nbsp;&nbsp;<B>$j</B>&nbsp;]);
		    	}
		    	else{
			    	print qq([&nbsp;<B>$j</B>&nbsp;&nbsp;]);
		    	}
	    	}
	    	else{
		    	if ($j < 10){
                	print qq(&nbsp;[&nbsp;<A HREF="/perl/textview?idx=$idx&num=$num&fmt=$fmt&q=$query&page=$linksum">$j</A>&nbsp]\n);
		    	}
		    	else{
                	print qq(&nbsp;[<A HREF="/perl/textview?idx=$idx&num=$num&fmt=$fmt&q=$query&page=$linksum">$j</A>]&nbsp;\n);
		    	}
	    	}
	    	$linksum = $j*$pagemax;
	    	last if ($j == 20);	# only display a max of 25 page links
		}
        my $nextpage = $page+20;
        if ($nextpage < $maxdocs){
            print qq([&nbsp;<A HREF="/perl/textview?idx=$idx&num=$num&fmt=$fmt&q=$query&page=$nextpage"><B>Next &gt;&gt;</B></A>]\n);
        }
        print "</font><BR>\n";
		print qq(<center>);
    }

} # end of sub


########################################################################################################
sub single_index_search {

	my ($search, $f, $iname) = @_;
	my $status;
	print STDERR "searching $iname\n";
	print qq(<BLOCKQUOTE><h3>$f matches in the Ensembl $iname index);
	print qq( [first 5 matches shown]) if $f > 5;
	print qq(:</h3></BLOCKQUOTE>\n);

	for (my $i=0;$i<=$f-1;$i++){
		last if ($i == 5);
		$status = avs_getsearchresults($search,$i);
		if ($status eq AVS_OK){
	    	my $header_info = avs_search_getdata($search);
			#print STDERR "$header_info\n";
	    	my %XML_tags = parse_XML_header($header_info);

			#foreach my $key (keys %XML_tags){
			#	print "$key ==> $XML_tags{$key}<BR>\n";
			#}

			$XML_tags{'ABSTRACT'} = highlight_matches($XML_tags{'ABSTRACT'},$query);
			if ($XML_tags{'ABSLEN'} == 0){$XML_tags{'ABSLEN'} = "&lt;1";}
	    	my $count = $i+1;
	    	print "<BLOCKQUOTE>";
	    	print qq(<B>$count. $XML_tags{'TITLE'}.</b> 
				<A HREF="$XML_tags{'URL'}">$XML_tags{'ENSACC'}</A></B>);
				
	    	unless ($fmt eq "c"){ # want compact results so omit summary
		    	print qq(<BR><font size="-1" face="helvetica">$XML_tags{'ABSTRACT'}</font>);
	    	}
	    	print qq(<BR><img src = "/gfx/avshit.gif">&nbsp;\n);
	    	print qq(<font size="-1"><A HREF="$XML_tags{'URL'}">$XML_tags{'URL'}</A></font>);

	    	print qq(</P>);
			#print qq(<font size="-1" face="helvetica"> - Last Modified: <B>$XML_tags{'LASTMOD'}</B> - \n);
	    	#print qq(Size: <B>$XML_tags{'ABSLEN'}K</B></font></P>);

	    	print qq(</BLOCKQUOTE>);
		}else{
			my $error = avs_errmsg($status);
			print "Search error ($i): cannot get search results: $error\n";
			return (-1);
		}

    } # end of for..
	if ($f > 5){
		&CGI::delete('idx'); # remove the old IDX parameter
		my $URL = &CGI::url(-query=>1) . qq(&idx=$iname);
	    print qq(<BLOCKQUOTE><P><B>[<A HREF="$URL">View more $iname matches...</A>]</B></P></BLOCKQUOTE>\n);
	    return(1);
	}

    $status = avs_search_close($search);

} # end of sub

# ########################################################################### #
# The XML is so small we might as well parse it ourselves...

sub parse_XML_header {
	my ($XML_string) = @_;
	#print "$XML_string\n\n";
	$XML_string =~ s/<\/?AV_IDX_HDR>//g;
	my %fields = ();
	$XML_string =~ s/<(.*?)>(.*?)<\/(.*?)>/$fields{$1}=$2/ieg;
	$fields{'ABSTRACT'} =~ s/ \*FIELD\* //g;
	$fields{'ABSTRACT'} =~ s/\*RECORD\*//g;
	return (%fields);
}

# ########################################################################### #
sub highlight_matches {

    my ($string, $query) = @_;
	my $color = "red";
	$query =~ s/WITHIN \d+//g; # This removes (eg) "WITHIN 5" clause from a search wordlist
	$query =~ s/ATLEAST \d+//g; # This removes (eg) "WITHIN 5" clause from a search wordlist
    $query =~ s/AND|NOT|OR|NEAR|BEFORE|AFTER|\(|\)|\*|\?|\"|\+//g; # remove AV-specific terms$
    foreach my $word (split(/\s+/,lc($query))){
		$word =~ s/\[/\\[/ig;	 # need to escape brackets in the regex or strangeness results...
		$word =~ s/\]/\\]/ig;    # (eg if a search string is: "[wbg104.7]"
		$word =~ s/\./\\./ig;    # (eg if a search string is: "[wbg104.7]"
		$string =~ s/($word)/<font color=\"$color\">$1<\/font>/ig;
    }
    return ($string);
} # end of sub
                
####################################3
sub print_syntax {

print qq(

<table align="center" width="600" border="0" cellspacing="0">
  <tr valign="top">
    <th class="yellow2">
      Quick Guide to Query Syntax
    </th>
  </tr>
  <tr valign="top">
    <td class="yellow1">
      <p>
	The Ensembl free-text search index contains database abstracts 
	from OMIM, Swissprot and Interpro.
      </p>
	wildcards are allowed after any 3 alphanumerics:<BR>
	? matches any trailing single characters<BR>
	** matches unlimited trailing characters.<BR>
	eg: ser*ip?t* could match serendipity<BR>
      </p>
    </td>
  </tr>
  <tr><td>&nbsp;</td></tr>

  <tr valign="top">
    <th class="yellow2">
      General Query Syntax
    </th>
  </tr>
  <tr valign="top">
    <td class="yellow1">
	<PRE>
	+clathrin (also -clathrin)
	clathrin AND vesicles
	clathrin AND NOT vesicles
	(clathin OR protein) AND vesicles
	clathrin NEAR vesicles
	clathrin WITHIN 2 vesicles
	</PRE>
    </td>
  </tr>
  <tr><td>&nbsp;</td></tr>

  <tr valign="top">
    <th class="yellow2">
      Case Sensitivity
    </th>
  </tr>
  <tr valign="top">
    <td class="yellow1">
	<PRE>
	adx (case insensitive search) or ADX (case sensitive)
	</PRE>
    </td>
  </tr>
  <tr><td>&nbsp;</td></tr>

  <tr valign="top">
    <th class="yellow2">
      Index Subsets
    </th>
  </tr>
  <tr valign="top">
    <td class="yellow1">
	To limit searches to a particular subset of the index prefix your query with: 
	<PRE>
	db:SWISSPROT AND (clathrin OR vesicles)
	  [subset identifiers OMIM, PDB and INTERPRO are also available]<BR> 
	</pre>
    </td>
  </tr>
  <tr><td>&nbsp;</td></tr>

  <tr valign="top">
    <th class="yellow2">
      Exact Matches
    </th>
  </tr>
  <tr valign="top">
    <td class="yellow1">
	<PRE>
	"Find this exact phrase"
	</PRE>
    </td>
  </tr>
</table>
</center>
<br />
);


}

####################################3
sub print_form {

my ($txt, $num, $fmt, $idx) 	= @_;

print qq(
	<BR><BR>
	<center>
	  <table border="0" cellpadding=0 cellspacing=0 width="545">
    	<tr>
    	  <td colspan="3" bgcolor="#aaaaaa" width="545"><img src="/gfx/blank.gif" height="1" width="1" border="0" ></td>
    	</tr>
    	<tr>
    	  <td bgcolor="#aaaaaa" width="2"><img src="/gfx/blank.gif" height="1" width="1" border="0"></td>
    	  <td class="yellow2" width="543">
        	<table border="0" class="yellow2" cellspacing="4" cellpadding="4" width="100%">
        	  <form name="IndexForm" method=GET action="/perl/textview">
            	<tr valign="top">
            	  <td width="100%">
                	&nbsp;&nbsp;<font size="-1" face="arial, helvetica">Display up to 
                	  <select name="num">
);

foreach (qw(20 40 60 100 500 1000)){
	if ($num == $_){
		print qq(<option value="$_" selected>$_</option>);
	} else {
		print qq(<option value="$_">$_</option>);
	}
}

print qq(
	</select>results in
	<select name="fmt">
);

if ($fmt eq "s"){
	print qq(
        <option value="s" selected>standard</option>
        <option value="c">compact</option>
	);
} elsif ($fmt eq "c"){
	print qq(
        <option value="s">standard</option>
        <option value="c" selected>compact</option>
	);
} else {
	print qq(
        <option value="s">standard</option>
        <option value="c">compact</option>
	);
}

print qq(
                	  </select>format
                	</font>
            	  </td>
            	</tr>
            	<tr valign="top">
            	  <td class="yellow2" align="center">
                	&nbsp;Search 
                	  <select name="idx">
);

#foreach (qw(All Gene Clone Contig Transcript SNP Feature Marker 
#			Genscan Domain Family Disease IPI IDArchive Chromosome External)){
foreach (qw(All Gene Clone Contig Peptide SNP Feature Family Marker 
			Genscan Domain Disease IPI Chromosome External)){

	if ($idx eq $_){
		print qq(<option value="$_" selected>$_</option>);
	} else {
		print qq(<option value="$_">$_</option>);
	}
}

print qq(
					  </select> indexes for:&nbsp;&nbsp;<input name="q" size="35" maxlength="800" value="$txt">&nbsp;&nbsp;
            	  </td>
            	</tr>
            	<tr>
            	  <td class="yellow2" align=right>
                	<img src="/gfx/altavista_icon.gif" alt="Powered By AltaVista" border="0" align=left>
			&nbsp;[<A href="/perl/textview">Help with AV query syntax</A>]&nbsp;
			<a href="javascript:void(window.open('/perl/helpview?se=1&kw=textview','textview','width=400,height=500,resizable,scrollbars'));">
			<img src="/gfx/helpview/help.gif" border="0" align="right" alt="Search Help"></a>                  
                	<input type=image src="/gfx/buttons/lookup.gif" border=0 alt="Search" name="search">
            	  </td>
            	</tr>
        	  </form>
        	</table>
    	  </td>
    	  <td bgcolor="#aaaaaa" width="1"><img src="/gfx/blank.gif" height="1" width="1" border="0"></td>
    	</tr>
    	<tr>
    	  <td colspan="3" bgcolor="#aaaaaa" width="545"><img src="/gfx/blank.gif" height="1" width="1" border="0" ></td>
    	</tr>
	  </table>
	</center>
	<BR><BR>
);

}
(1);


