#!/bin/perl -w

use strict;

#use CGI;
use CGI qw(:standard *table *TR *th *td *ul *ol);
use Gramene::Protein::GetProteinData; 
use Gramene::Page; 


my $page_title = "Advanced Protein Search";
my $q = CGI->new();
my $doc_root=Apache->request->document_root;
if( $q->param("text_ids") ) {

        my $pfam = $q->param("text_ids");
	$pfam =~ tr/a-z/A-Z/;
	display_id_list( $pfam,$doc_root );

} else {

my $page = Gramene::Page->new(Apache->request);


my $table_only = $q->param("table");

unless( ( $table_only ) && ( $table_only eq "1" ) ) {

    print $q->header;

    print $page->start_html( -title=>$page_title );

    



    print $page->start_body();

    #print("<center><h1>Rice Protein Database</h1></center>" );


print <<TOPBAR;
<table>
  <tr>
    <td>&nbsp;&nbsp;<img src="/images/icons/grain_icon.jpg" alt="grain_icon" height=16 width=16 align="top">
    </td>
    <td align="LEFT">
       &nbsp;<a href="/protein/index.html" class="gopage" ><b>Protein Home</b></a>&nbsp;
      |&nbsp;<a href="/db/protein/protein_search" class="gopage" ><b>Advanced Search</b></a>&nbsp;
      |&nbsp;<b>Search by Pfam or PROSITE</b>&nbsp;      
      |&nbsp;<a href="/protein/index.html#browse" class="gopage"><b>Browse by GO Slim</b></a>&nbsp; 
      |&nbsp;<a href="/protein/index.html#doc" class="gopage"><b>Documents</b></a>&nbsp;
      |&nbsp;<a href="/tutorials/proteins.html" class="gopage"><b>Tutorial</b></a>&nbsp;  
      |&nbsp;<a href="/fom/cache/1.html" class="gopage"><b>FAQ</b></a>&nbsp;
      |&nbsp;<a href="/documentation/protein_help.html" class="gopage"><b>Help</b></a>&nbsp;

    </td>
  </tr>
  <tr><td>&nbsp;</td></tr>  
</table>
TOPBAR

    print $q->table( {-border=>'0',
		  -cellspacing=>'0',
		  -cellpadding=>'3',
		  -width=>'98%',
                  -align=>'CENTER'},
		 $q->start_form(-method=>'GET'),

                 $q->Tr( {-align=>'CENTER', -nowrap=>'nowrap'},
	             
			 $q->th( {-align=>'CENTER',
				  -class=>'searchtitle'},
			         "Protein Database" )
		
		       ),
		 $q->Tr( {-align=>'CENTER',
			  -valign=>'CENTER',
			  -nowrap=>'nowrap',
			  -class=>'searchbody'},
			 $q->td( {-align=>'CENTER'},
				 "<span class=\"alert\">Search by Pfam or PROSITE:</span>&nbsp;&nbsp;&nbsp;&nbsp;",
				 $q->textfield( -name=>'pfam_query',
						-size=>30,
						-maxlength=>60 ),
				 "&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;",
                                 $q->submit( -name=>'pfam_search',
					     -value=>'Search' ),
				 $q->p( "[ e.g.",
					$q->a( {-href=>"/db/protein/protein_advanced?pfam_query=PF00281"}, "PF00281" ),
					",&nbsp;",
					$q->a( {-href=>"/db/protein/protein_advanced?pfam_query=Ribosomal_L5"}, "Ribosomal_L5" ),
					",&nbsp;",
					$q->a( {-href=>"/db/protein/protein_advanced?pfam_query=PS50255"}, "PS50255" ),
					",&nbsp;",
                                        $q->a( {-href=>"/db/protein/protein_advanced?pfam_query=CYTOCHROME_B5_2"}, "CYTOCHROME_B5_2" ), "]" ) )
			
			),
		 
		
		
		 $q->end_form() );
}

print $q->hr();


if( $q->param("pfam_query") ) {

    my $word = $q->param("pfam_query");
    my $original = $word;
    $word =~ tr/a-z/A-Z/;

    my $test_word = $word;
    #$test_word =~ tr/A-Z//cd;     #delete non-letters
    $test_word =~ s/(\W+)//;

    if( $test_word ) {

        my $db = Gramene::Protein::GetProteinData->new();
        $db->connect_to_ora( );

	my ( $key_ref, $desc_ref, $dbname_ref ) = $db->search_pfam_all( $word );
	display_list( $q, $key_ref, $desc_ref, $dbname_ref, $original );

    } else {
        
        print $q->h2( {-align=>'CENTER'}, "Sorry! The input is not valid. Please try again." );
    }
}



if( $q->param("pfam") ) {

    my $pfam = $q->param("pfam");
    $pfam =~ tr/a-z/A-Z/;

    my $db = Gramene::Protein::GetProteinData->new();
    $db->connect_to_ora( );

    my ( $key, $desc ) = $db->pfam_to_desc( $pfam );
    if( $key ) {
	display_pfam( $q, $key, $desc,$doc_root );
    } else {
	    
	print $q->h2( {-align=>'CENTER'}, "Sorry! The search returned no results." );
    }

}



if( $q->param("prosite") ) {

    my $prosite = $q->param("prosite");
    $prosite =~ tr/a-z/A-Z/;

    my $db = Gramene::Protein::GetProteinData->new();
    $db->connect_to_ora( );

    my ( $key, $desc ) = $db->prosite_to_desc( $prosite );
    if( $key ) {
	display_prosite( $q, $key, $desc, $doc_root );
    } else {
	    
	print $q->h2( {-align=>'CENTER'}, "Sorry! The search returned no results." );
    }

}


unless( ( $table_only ) && ( $table_only eq "1" ) ) {

    print $page->end_body;
}

} # End else
#################################


sub display_list {

    my( $q, $key_ref, $desc_ref, $dbname_ref, $original ) = @_; 
    my @keys = @$key_ref;
    my @descs = @$desc_ref;
    my @dbnames = @$dbname_ref;
    if( @keys ) {
        my $header;
        if( scalar @keys == 1 ) {
	    $header = "1 matching record has been found.";
	} else {
	    $header = (scalar @keys)." matching records have been found.";
	}

        print $q->table( {-border=>'0',
			  -cellpadding=>'2',
			  -width=>'98%',
			  -align=>'CENTER'},
			  $q->Tr( $q->th( {-class=>'resultstitle',
					   -align=>'CENTER'}, 
					  $header ) ) );
	
	print start_table( {-width=>'98%', -cellpadding=>'4', -border=>'1', -align=>'CENTER'} );
	print $q->Tr( $q->td( {-class=>'resultstitle',
			       -align=>'CENTER'}, "#" ),
		      $q->td( {-class=>'resultstitle',
			       -align=>'LEFT'}, "Type" ),
		      $q->td( {-class=>'resultstitle',
			       -align=>'LEFT'}, "ID" ),
		      $q->td( {-class=>'resultstitle',
			       -align=>'LEFT'}, "Name" ) );

	for( my $i=1; $i<=(scalar @keys); $i++ ) {

	    my $param_name = $dbnames[$i-1];
	    $param_name =~ tr/A-Z/a-z/;
	    my $new_key = $keys[$i-1];
	    my $new_desc = $descs[$i-1];

	    $new_key =~ s/($original)/<span class='matching'>$1<\/span>/ig;
	    $new_desc =~ s/($original)/<span class='matching'>$1<\/span>/ig;

	    if( ( $i % 2 ) == 1 ) {

	        print $q->Tr( $q->td( {-class=>'resultsbody',
				   -align=>'CENTER'}, $i ), 
			  $q->td( {-class=>'resultsbody',
				   -align=>'LEFT'}, $dbnames[$i-1] ),
			  $q->td( {-class=>'resultsbody',
				   -align=>'LEFT'}, $q->a( {-href=>"/db/protein/protein_advanced?$param_name=$keys[$i-1]"}, $new_key ) ),
			  $q->td( {-class=>'resultsbody',
				   -align=>'LEFT'}, $new_desc ) );
	    } else {

	        print $q->Tr( $q->td( {-align=>'CENTER'}, $i ), 
			      $q->td( {-align=>'LEFT'}, $dbnames[$i-1] ),
			      $q->td( {-align=>'LEFT'}, $q->a( {-href=>"/db/protein/protein_advanced?$param_name=$keys[$i-1]"}, $new_key ) ),
			      $q->td( {-align=>'LEFT'}, $new_desc ) );
	    }
	}
	print end_table;
    } else {

        print $q->h2( {-align=>'CENTER'}, "Sorry! The search returned no results." );
    }
}


##################################


sub display_pfam {
    
    my ( $q, $pfam, $desc,$doc_root ) = @_;
    $pfam =~ tr/a-z/A-Z/; 

    my $list_file = $pfam."_protein.txt";
    my $matrix_file = $pfam."_matrix.txt";
    my $full_file = $pfam."_full.txt";
    #my $fasta_file = $pfam.".fa";

    my $list = $q->a( {-target=>'text_file', -class=>'gopage', -href=>"/protein_curation/pfam/pfam11/$list_file"}, "protein ids(1)" );
    my $matrix = $q->a( {-target=>'text_file', -class=>'gopage', -href=>"/protein_curation/pfam/pfam12/$matrix_file"}, "matrix" );
    #my $fasta = $q->a( {-target=>'text_file', -class=>'gopage', -href=>"/protein_curation/fasta/$fasta_file"}, "FASTA" );

    my $list_2 = $q->a( {-target=>'text_file', -class=>'gopage', -href=>"/db/protein/protein_advanced?text_ids=$pfam"}, "protein ids(2)" );

    print $q->br();
    print $q->table( {-border=>'1',
		      -cellpadding=>'3',
		      -width=>'98%',
		      -align=>'CENTER'},
			 $q->Tr( $q->th( {-class=>'resultstitle',
					  -align=>'CENTER'}, 
					 "Proteins that belong to ", $q->a( {-target=>'pfam', -href=>"http://www.sanger.ac.uk/cgi-bin/Pfam/getacc?$pfam"}, $pfam, "; ", $desc ), "&nbsp;&nbsp;&nbsp;&nbsp;(&nbsp;", $list, "&nbsp;)&nbsp;",
                                         "&nbsp;(&nbsp;", $list_2, "&nbsp;)&nbsp;",
                                         "&nbsp;(&nbsp;", $matrix, "&nbsp;)&nbsp;" ) ) );

    print start_table( {-width=>'98%', -cellpadding=>'4', -border=>'1', -align=>'CENTER'} );

    print $q->Tr( $q->td( {-class=>'resultstitle',
			   -align=>'CENTER'}, "#" ),
		  $q->td( {-class=>'resultstitle',
			   -align=>'CENTER'}, "Name" ),
		  $q->td( {-class=>'resultstitle',
			   -align=>'CENTER'}, "Organism<br>(Cultivar)" ),
		  $q->td( {-class=>'resultstitle',
			   -align=>'CENTER'}, "SPTrembl Ac" ),
		  $q->td( {-class=>'resultstitle',
			   -align=>'CENTER'}, "GenBank id" ),
		  $q->td( {-class=>'resultstitle',
			   -align=>'CENTER'}, "Extended<br>Pfam Info." ) );

   
    open (PFAM,"$doc_root/protein_curation/pfam/pfam10/$full_file") || die "cannot open $full_file for reading: $!";

    my $index = 0;
    while( <PFAM> ) {
        chomp;
	if( ($_) && (index($_, "//") != 0) && (index($_, "PF") != 0) ) {
            my @fields = split(/\t/, $_);

	    $index = $index + 1;
	    my @gis;

	    if( $fields[4] ) {
	        my $m = 4;
		while( $fields[$m] ) {
		    @gis = ( @gis, $fields[$m] );
		    $m++;
		}
	    }

	
	    my $genbank_link;
	    if( @gis ) {
       
		foreach my $gii ( @gis ) {
		    if( $genbank_link ) {

			$genbank_link = $genbank_link.",&nbsp;&nbsp;<br>".$q->a( {-target=>'genbank', -href=>"http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=protein&list_uids=$gii&dopt=GenPept"}, $gii ); 
		    } else {
			$genbank_link = $q->a( {-target=>'genbank', -href=>"http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=protein&list_uids=$gii&dopt=GenPept"}, $gii ); 
		    }
		}

	    } else {
                $genbank_link = "Not available";
	    }


	    my $p_id = $fields[0];
	    my $acc = $fields[1];
	    my $name = $fields[2];
	    my $orga = $fields[3];
	    
	    substr( $orga, -1, 2 ) = "";
	    my @parts = split( /   /, $orga );
	    $orga = "<i>".$parts[0]."</i>"."<br>(".$parts[1].")";
	    $orga =~ s/Unknown/Not available/ig;

	    print $q->Tr( $q->td( {-class=>'resultsbody',
			       -align=>'CENTER'}, $index ),
		      $q->td( {-class=>'resultsbody',
			       -align=>'CENTER'}, $q->a( {-target=>'to_gramene', -href=>"/db/protein/protein_search?protein_id=$p_id"}, $name ) ),
		      $q->td( {-class=>'resultsbody',
			       -align=>'CENTER'}, $orga ),
		      $q->td( {-class=>'resultsbody',
			       -align=>'CENTER'}, $q->a( {-target=>'swissprot', -href=>"http://us.expasy.org/cgi-bin/niceprot.pl?$acc"}, $acc ) ),
		      $q->td( {-class=>'resultsbody',
			       -align=>'CENTER'}, $genbank_link ),
		      $q->td( {-class=>'resultsbody',
			       -align=>'CENTER'}, $q->a( {-target=>'pfam', -href=>"http://www.sanger.ac.uk/cgi-bin/Pfam/swisspfamget.pl?name=$acc"}, $acc ) ));

	}
    }

    print end_table();
    close( PFAM ) || die "can't close $full_file: $!\n";
    
}


#########################################


sub display_prosite {
    
    my ( $q, $pfam, $desc,$doc_root ) = @_;
    $pfam =~ tr/a-z/A-Z/; 

    my $list_file = $pfam."_protein.txt";
    my $matrix_file = $pfam."_matrix.txt";
    my $full_file = $pfam."_full.txt";

    my $list = $q->a( {-target=>'text_file', -class=>'gopage', -href=>"/protein_curation/prosite/prosite11/$list_file"}, "protein ids" );
    my $matrix = $q->a( {-target=>'text_file', -class=>'gopage', -href=>"/protein_curation/prosite/prosite12/$matrix_file"}, "matrix" );

    print $q->br();
    print $q->table( {-border=>'1',
		      -cellpadding=>'3',
		      -width=>'98%',
		      -align=>'CENTER'},
			 $q->Tr( $q->th( {-class=>'resultstitle',
					  -align=>'CENTER'}, 
					 "Proteins that belong to ", $q->a( {-target=>'pfam', -href=>"http://kr.expasy.org/cgi-bin/nicesite.pl?$pfam"}, $pfam, "; ", $desc ), "&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;(&nbsp;&nbsp;", $list, "&nbsp;&nbsp;)&nbsp;&nbsp;",
                                         "&nbsp;&nbsp;(&nbsp;&nbsp;", $matrix, "&nbsp;&nbsp;)&nbsp;&nbsp;" ) ) );

    print start_table( {-width=>'98%', -cellpadding=>'4', -border=>'1', -align=>'CENTER'} );

    print $q->Tr( $q->td( {-class=>'resultstitle',
			   -align=>'CENTER'}, "#" ),
		  $q->td( {-class=>'resultstitle',
			   -align=>'CENTER'}, "Name" ),
		  $q->td( {-class=>'resultstitle',
			   -align=>'CENTER'}, "Organism<br>(Cultivar)" ),
		  $q->td( {-class=>'resultstitle',
			   -align=>'CENTER'}, "Swissprot/Trembl Acc." ),
		  $q->td( {-class=>'resultstitle',
			   -align=>'CENTER'}, "GenBank id" ) );

   
    open (PFAM, "$doc_root/protein_curation/prosite/prosite10/$full_file") || die "cannot open $full_file for reading: $!";

    my $index = 0;
    while( <PFAM> ) {
        chomp;
	if( ($_) && (index($_, "//") != 0) && (index($_, "PS") != 0) ) {
            my @fields = split(/\t/, $_);

	    $index = $index + 1;
	    my @gis;

	    if( $fields[4] ) {
	        my $m = 4;
		while( $fields[$m] ) {
		    @gis = ( @gis, $fields[$m] );
		    $m++;
		}
	    }

	
	    my $genbank_link;
	    if( @gis ) {
       
            foreach my $gii ( @gis ) {
	        if( $genbank_link ) {
	            $genbank_link = $genbank_link.",&nbsp;&nbsp;<br>".$q->a( {-target=>'genbank', -href=>"http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=protein&list_uids=$gii&dopt=GenPept"}, $gii ); 
	        } else {
	            $genbank_link = $q->a( {-target=>'genbank', -href=>"http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=protein&list_uids=$gii&dopt=GenPept"}, $gii ); 
	        }
	    }

	    } else {
                $genbank_link = "Not available";
	    }


	    my $p_id = $fields[0];
	    my $acc = $fields[1];
	    my $name = $fields[2];
	    my $orga = $fields[3];
	    
	    substr( $orga, -1, 2 ) = "";
	    my @parts = split( /   /, $orga );
	    $orga = "<i>".$parts[0]."</i>"."<br>(".$parts[1].")";
	    $orga =~ s/Unknown/Not available/ig;
       
	    print $q->Tr( $q->td( {-class=>'resultsbody',
				   -align=>'CENTER'}, $index ),
			  $q->td( {-class=>'resultsbody',
				   -align=>'CENTER'}, $q->a( {-target=>'to_gramene', -href=>"/db/protein/protein_search?protein_id=$p_id"}, $name ) ),
			  $q->td( {-class=>'resultsbody',
				   -align=>'CENTER'}, $orga ),
			  $q->td( {-class=>'resultsbody',
				   -align=>'CENTER'}, $q->a( {-target=>'swissprot', -href=>"http://us.expasy.org/cgi-bin/niceprot.pl?$acc"}, $acc ) ),
			  $q->td( {-class=>'resultsbody',
				   -align=>'CENTER'}, $genbank_link ) );

	}
    }

    print end_table();
    close( PFAM ) || die "can't close $full_file: $!\n";
    
}


###############################################

sub display_id_list {
    
    my ( $pfam,$doc_root ) = @_;
    $pfam =~ tr/a-z/A-Z/; 

    my $full_file = $pfam."_full.txt";

   
    open (PFAM, "$doc_root/protein_curation/pfam/pfam10/$full_file") || die "cannot open $full_file for reading: $!";

    my ( @pids, @accs, @gis );
    while( <PFAM> ) {
        chomp;
	if( ($_) && (index($_, "//") != 0) && (index($_, "PF") != 0) ) {
            my @fields = split(/\t/, $_);

	    @pids = ( @pids, $fields[0] );
	    @accs = ( @accs, $fields[1] );

	    my $gi_string;
	    if( $fields[4] ) {
	        my $m = 4;
		while( $fields[$m] ) {
		    if( $gi_string ) {
		        $gi_string = $gi_string.", ".$fields[$m];
		    } else {
		        $gi_string = $fields[$m];
		    }
		    $m++;
		}
	    } else {
	        $gi_string = " ";
	    }
	    @gis = ( @gis, $gi_string );
	}
    }

    print( "\n//\n$pfam\n\n" );
    print( "Gramene IDs:\n" );
    foreach my $pid ( @pids ) {
        print( "$pid\n" );
    }

    print( "\n\nSPTrEMBL ACCs:\n" );
    my $acc_string = join( " | ", @accs );
    print( "$acc_string\n" );

    print( "\n\nGI numbers:\n" );
    foreach my $gi ( @gis ) {
        print( "$gi\n" );
    }

    ##############################

    close( PFAM ) || die "can't close $full_file: $!\n";
    
}

