#!/usr/bin/perl

# $Id: protein_search,v 1.103 2007/03/21 14:53:32 liya Exp $

=head1 NAME

protein_search 

=head1 DESCRIPTION

a script for searching and displaying protein information

=cut

# -------------------------------------------------------

use strict;
use Apache::Request;
use Template;

use Gramene::Config;
use Gramene::Utils;
use Gramene::Page;
use Gramene::Protein::ProteinDB;

use constant {
	DETAIL_TEMPLATE  => 'protein_detail.tmpl',
	DISPLAY_TEMPLATE => 'protein_display.tmpl',
	ERROR_TEMPLATE   => 'protein_error.tmpl',
	SEARCH_TEMPLATE  => 'protein_search.tmpl',
	PAGE_SIZE        => 25,
	MAX_PAGES        => 10,
};

my $apr            = Apache::Request->new( Apache->request );
my $page           = Gramene::Page->new($apr);
my $config         = Gramene::Config->new;
my $protein_config = $config->get('protein');
my $db             = Gramene::Protein::ProteinDB->new;
$db->connect_to_db;

my ( $template, $html, $pager );

# for gramene quick search
my $table_only = $apr->param('table');

my $search_fields = [
	'Name',
	'Accession',
	'Protein_ID(PID)',
	'SPTrEMBL ID',
	'GenBank ID(GI)',
	#'EC Number',
	'Cultivar',
	'All Fields'
];


my %searches = (
	'Name'       => \&search_by_name,
	'Accession'       => \&search_by_accession,
	'Protein_ID(PID)' => \&search_by_pid,
	'SPTrEMBL ID'     => \&search_by_sptrembl_id,
	'GenBank ID(GI)'       => \&search_by_gi,
	'EC Number'       => \&search_by_ec,
	'Cultivar'        => \&search_by_cultivar,
	'All Fields'      => \&search_by_all_fields,
);


# $species_fields[0] for displaying, $species_fields[1] for internal searching value
my $species_fields = [
	[ 'Oryza (rice)',              'Oryza' ],
	[ 'Zea (corn)',                'Zea' ],
	[ 'Triticum (wheat)',          'Triticum' ],
	[ 'Hordeum (barley)',          'Hordeum' ],
	[ 'Avena (oats)',              'Avena' ],
	[ 'Secale (rye)',              'Secale' ],
	[ 'Aegilops',                  'Aegilops' ],
	[ 'Sorghum',         'Sorghum' ],
	[ 'Pennisetum (pearl millet)', 'Pennisetum' ],
	[ 'Saccharum',                 'Saccharum' ],
	[ 'All Poaceae Family',        '' ],
];

my $xref_url_links= $db->get_all_xref_db_urls();

my %search_id_by_fields = (
    'protein_id' => \&get_protein_id_by_id,
    'acc'        => \&get_protein_id_by_acc,
    'swall'      => \&get_protein_id_by_swall,
    'pid'        => \&get_protein_id_by_pid,
    'gi'         => \&get_protein_id_by_gi,
);

eval {

	$template = Template->new(
		{
			INCLUDE_PATH => $protein_config->{'template_dir'},
			PRE_CHOMP    => 1,
			POST_CHOMP   => 1,
			TRIM         => 1,
			FILTERS      => { commify => \&Gramene::Utils::commify }

		}
	);

	if ( $apr->param('word') ) {    # search
		my $order_by = $apr->param('order_by') || 'swissprot_acc';
		my $page_no  = $apr->param('page_no')  || 1;
		my $query    = $apr->param('word');
		my $search_field = $apr->param('search_field') || 'All Fields';

		my $search_action = $searches{ $search_field };
		my @proteins      = $search_action->( $apr, $db );

		if (@proteins) {
			$pager = Data::Pageset->new(
				{
					total_entries    => scalar @proteins,
					entries_per_page => PAGE_SIZE,
					current_page     => $page_no,
					pages_per_set    => MAX_PAGES,
				}
			);
			@proteins = $pager->splice( \@proteins );
		}

		$template->process(
			DISPLAY_TEMPLATE,
			{
				gramene_page   => $page,
				apr            => $apr,
				search_fields  => $search_fields,
				species_fields => $species_fields,
				proteins       => \@proteins,
				pager          => $pager,
				title          => "Search for Protein &quot;$query&quot;"
				  . " by $search_field",
				table_only => $table_only
			},
			\$html
		  )
		  or $html = $template->error;

	}
	elsif ($apr->param('protein_id')
		|| $apr->param('acc')
		|| $apr->param('swall')
		|| $apr->param('pid')
		|| $apr->param('gi')         # protein detail 
		|| $apr->param('gene_product_id') 
    ) {
		my $protein_id = $apr->param('gene_product_id') || '';
        if ( !$protein_id ) {
            foreach my $field ( keys %search_id_by_fields ) {
                if ( $apr->param($field) ) {
                    my $get_id_action = $search_id_by_fields{$field};
                    $protein_id = $get_id_action->( $apr, $db );
                    last;
                }
            }
        }

        my $protein_obj;
		if ($protein_id) {
            $protein_obj = &get_protein_object($db, $protein_id);
		}
		$template->process(
			DETAIL_TEMPLATE,
			{
				gramene_page       => $page,
				apr                => $apr,
			    search_fields      => $search_fields,
				species_fields     => $species_fields,
				get_xref_url 	  => \&get_xref_url,
				protein            => $protein_obj,				
	         	title              => "Summary for Protein &quot;"
				  . $protein_obj->{'accession'}
				  . "&quot;" 
			},
			\$html
		  )
		  or $html = $template->error;

	}
	else {

		$template->process(
			SEARCH_TEMPLATE,
			{
				gramene_page   => $page,
				apr            => $apr,
				search_fields  => $search_fields,
				species_fields => $species_fields,
				title          => "Protein Search",
				search_page    => "1"
			},
			\$html
		  )
		  or $html = $template->error;
	}

};

if ( my $err = $@ ) {
	if ($template) {
		$template->process(
			ERROR_TEMPLATE,
			{
				gramene_page  => $page,
				error_message => $err,
				table_only    => $table_only
			},
			\$html
		  )
		  or $html = $template->error;
	}
	else {
		$html = "Error: $err";
	}
}

$apr->content_type('text/html');
$apr->send_http_header;
$apr->print($html);

sub search_by_accession {
	my ( $apr, $db ) = @_;
	my @proteins = $db->get_proteins_by_helper_field(
		'swissprot_acc',        $apr->param('word'),
		$apr->param('species'), $apr->param('order_by')
	);
	return @proteins;

}

sub search_by_sptrembl_id {
	my ( $apr, $db ) = @_;
	my @proteins = $db->get_proteins_by_helper_field(
		'swissprot_id',         $apr->param('word'),
		$apr->param('species'), $apr->param('order_by')
	);
	return @proteins;

}

sub search_by_name {
	my ( $apr, $db ) = @_;
	my @proteins = $db->get_proteins_by_name(
	        $apr->param('word'),
		$apr->param('species'), $apr->param('order_by')
	);
	return @proteins;

}

sub search_by_pid {
	my ( $apr, $db ) = @_;
	my @proteins = $db->get_proteins_by_pid(
		$apr->param('word'),
		$apr->param('species'),
		$apr->param('order_by')
	);
	return @proteins;

}

sub search_by_gi {
	my ( $apr, $db ) = @_;
	my @proteins = $db->get_proteins_by_gi(
		$apr->param('word'),
		$apr->param('species'),
		$apr->param('order_by')
	);
	return @proteins;

}

sub search_by_ec {
	my ( $apr, $db ) = @_;
	my @proteins = $db->get_proteins_by_ec(
		$apr->param('word'),
		$apr->param('species'),
		$apr->param('order_by')
	);
	return @proteins;

}

sub search_by_cultivar {
	my ( $apr, $db ) = @_;
	my @proteins = $db->get_proteins_by_cultivar(
		$apr->param('word'),
		$apr->param('species'),
		$apr->param('order_by')
	);
	return @proteins;

}

sub search_by_all_fields {
	my ( $apr, $db ) = @_;
	my @proteins = $db->get_proteins_by_all_search_fields(
		$apr->param('word'),
		$apr->param('species'),
		$apr->param('order_by')
	);
	return @proteins;

}

sub get_protein_id_by_id {
	my ( $apr, $db ) = @_;
	my $id = $db->validate_id( $apr->param('protein_id') );
	return $id;
}

sub get_protein_id_by_acc {
	my ( $apr, $db ) = @_;
	my $acc = $apr->param('acc');
	$acc =~ tr/a-z/A-Z/;
	my $id = $db->get_id_by_acc($acc) if $acc;
	return $id;
}

sub get_protein_id_by_swall {
	my ( $apr, $db ) = @_;
	my $swall_id = $apr->param('swall');
	$swall_id =~ tr/a-z/A-Z/;
	my $id = $db->get_id_by_swall($swall_id) if $swall_id;
	return $id;
}

sub get_protein_id_by_pid {
	my ( $apr, $db ) = @_;
	my $pid = $apr->param('pid');
	$pid =~ tr/a-z/A-Z/;
	my $id = $db->get_id_by_pid($pid) if $pid;
	return $id;
}

sub get_protein_id_by_gi {
	my ( $apr, $db ) = @_;
	my $gi = $apr->param('gi');
	$gi =~ tr/a-z/A-Z/;
	my $id = $db->get_id_by_pid($gi) if $gi;
	return $id;
}

sub get_protein_object {
	my ($db, $protein_id) = @_;
	return unless $protein_id;
	my $protein_obj = $db->get_protein_object($protein_id);
	my $acc = $db->get_protein_acc_by_id($protein_id);
	$protein_obj->{'accession' } = $acc;
	$protein_obj->{'swissprot_id'  } = $db->get_swall($protein_id);
	$protein_obj->{'synonyms'  } = $db->get_synonyms($protein_id);  # a string
	$protein_obj->{'gene_names'} = $db->get_gene_names($protein_id); # a string 
	$protein_obj->{'organelle' } = $db->get_organelle($protein_id);
	$protein_obj->{'sequence' } = $db->get_sequence($protein_id);
	$protein_obj->{'ensembl_xrefs'} = $db->get_ensembl_xrefs($protein_id); # an array ref
	$protein_obj->{'ecs'       } = $db->get_ecs($protein_id); # an array ref
	$protein_obj->{'pids'      } = $db->get_pids($protein_id);
	$protein_obj->{'acc_source'} = $db->get_protein_xref_db($protein_id);
	
	my ($genus, $spe, $taxa_id, $cul) = $db->get_organisms($protein_id);
	my ($gr_taxa_id) = $db->get_xrefs($protein_id, 'gramene.ontology.taxonomy','acc');
	$protein_obj->{'genus'     } = $genus;
	$protein_obj->{'species'   } = $spe;
    $protein_obj->{'ncbi_taxa_id'} = $taxa_id;
    $protein_obj->{'cultivar'     } = $cul;
    $protein_obj->{'gramene_taxa_id'} = $gr_taxa_id;

    if($genus =~/Zea/i){
	my $xdb = 'MaizeGDB';
	my $url ='http://www.maizegdb.org/cgi-bin/displayseqrecord.cgi?id=';
	$url = $url.''.$acc;
	push @{$protein_obj->{'dbxrefs'}}, [$xdb,"<a href=\"$url\">$acc</a>"];
    }
    if($genus =~/Triticum|horderum|avena|secale/i ){
	my $xdb = 'GrainGenes';
	my @embl_accs = $db->get_xrefs($protein_id,'EMBL','ACC');
	my $grain_url ='http://wheat.pw.usda.gov/cgi-bin/graingenes/report.cgi?cla ss=sequence&name=[%?%]';
	my @xurls;
	foreach my $embl_acc (@embl_accs){
	    my $xurl = $grain_url;
	    $xurl =~s/\Q[%?%]/$embl_acc/;
	    $xurl = "<a href=\"$xurl\">$embl_acc</a>";
	    push @xurls, $xurl;
	}

	my $grain_urls = join(", ",@xurls);
	    
	push @{$protein_obj->{'dbxrefs'}},[$xdb,  $grain_urls];
    }

    
    $protein_obj->{'phenotype_genes'} = $db->get_phenotype_genes($protein_id); # an array ref of hashes
    $protein_obj->{'public_comment'} = $db->get_comment($protein_id);
    
    
    my $gis = $db->get_gis($protein_id);
    my $gi = shift(@$gis) if $gis;
    
    my ($assocs,$terms) = $db->get_associations($protein_id);
    $protein_obj->{'associations'} = $assocs;
    $protein_obj->{'associated_terms'} = $terms;
    
    $protein_obj->{'keywords' } = $db->get_keywords($protein_id);
    
    $protein_obj->{'similarity_links'} = &get_similarity_links($protein_obj->{'accession'}, $gi);
    
    $protein_obj->{'pfams'         } = $db->get_pfams($protein_id);
    $protein_obj->{'prosites'      } = $db->get_prosites($protein_id);
    $protein_obj->{'features'      } = $db->get_features($protein_id);
    
  
    $protein_obj->{'references'    } = $db->get_references($protein_id);
    
    $protein_obj->{'aditional_references'} = $db->get_related_refs( $protein_id );
    
	return $protein_obj;
	
}

sub get_xref_url {
	my ($xref_dbname, $xref_value) = @_;
	my $xref_url;
	my $xref_url_ref = $xref_url_links->{uc($xref_dbname)};
	if($xref_url_ref){
		$xref_url = $xref_url_ref->{'url_syntax'};
	    $xref_url =~s/\Q[%?%]/$xref_value/;
	}

	return $xref_url;
}

sub get_similarity_links{
	my ($acc, $gi) = @_;
	
	    
    my %plants;
    $plants{'viridiplantae'} = 'Viridiplantae Green plants';
    $plants{'embryophytes'} = 'Embryophytes (plants)';
    $plants{'magnoliophytes'} = 'Magnoliophytes (flowering plants)';
    $plants{'monocots'} = 'Monocots';
    $plants{'grasses'} = 'Grasses';
    $plants{'rice'} = 'Rice';
    $plants{'maize'} = 'Maize';
    $plants{'sorghum'} ='Sorghum';
    $plants{'wheat'} ='Wheat';
    $plants{'barley'} ='Barley';
    $plants{'oat'} ='Oat';
    $plants{'rye'} ='Rye';
    $plants{'sugarcane'} ='Sugarcane';
    $plants{'dicots'} ='Dicots';
    $plants{'brassicaceae'} ='Brassicaceae';
    $plants{'arabidopsis'} ='Arabidopsis';
    $plants{'fabaceae'} ='Fabaceae (Legumes)';
    $plants{'solanaceae'} ='Solanaceae';
    $plants{'cucurbitaceae'} ='Cucurbitaceae';
    $plants{'fungi'} ='Fungi';
    $plants{'metazoa'} ='Metazoa';
 
    my %plants_to_taxon;
    $plants_to_taxon{'viridiplantae'} = 33090;
    $plants_to_taxon{'embryophytes'} = 3193;
    $plants_to_taxon{'magnoliophytes'} = 3398;
    $plants_to_taxon{'monocots'} = 4447;
    $plants_to_taxon{'grasses'} = 4479;
    $plants_to_taxon{'rice'} = 4527;
    $plants_to_taxon{'maize'} = 4575;
    $plants_to_taxon{'sorghum'} =4557;
    $plants_to_taxon{'wheat'} =4564;
    $plants_to_taxon{'barley'} = 4512;
    $plants_to_taxon{'oat'} =4496;
    $plants_to_taxon{'rye'} =4549;
    $plants_to_taxon{'sugarcane'} =4546;
    $plants_to_taxon{'dicots'} =71240;
    $plants_to_taxon{'brassicaceae'} =3700;
    $plants_to_taxon{'arabidopsis'} =3701;
    $plants_to_taxon{'fabaceae'} =3803;
    $plants_to_taxon{'solanaceae'} =4070;
    $plants_to_taxon{'cucurbitaceae'} =3650;
    $plants_to_taxon{'fungi'} =4751;
    $plants_to_taxon{'metazoa'} =33208;
     
   

    if( $gi ) {
        _add_plant_link_url(\%plants,\%plants_to_taxon,$gi);
    }
 
    my $structure_link ='';
    if( $gi ) {
        $structure_link = "<a target='cross_reference' href=\"http://www.ncbi.nlm.nih.gov:80/sutils/blink.cgi?pid=$gi&pdb=1&cut=100\">BLink from NCBI</a>&nbsp;&nbsp;";
     
     }
     #$structure_link = $structure_link."|&nbsp;&nbsp;<a target='cross_reference' href=\"http://www.biochem.ucl.ac.uk/cgi-bin/sas/process_form.cgi?sprotnum=$acc\">Sequence Annotated by Structure (SAS)</a>";
     $plants{'structure_link'} = $structure_link;
     
     return \%plants;
}


sub  _add_plant_link_url{
  my ($plants_ref, $plants_to_taxon_ref, $gi) = @_;
  
  foreach my $plant (keys %{$plants_to_taxon_ref}){
     my $taxon = $plants_to_taxon_ref->{$plant};
     my $plant_string = $plants_ref->{$plant};
     my $url="http://www.ncbi.nlm.nih.gov/sutils/blink.cgi?pid=$gi&tax=$taxon&org=0&pdb=0&sort=1&cut=100&all=0";
     $plants_ref->{$plant} = "<a href=$url target='cross_reference'>$plant_string</a>";
     
  }

}

# ------------------------------------------------------------

=pod

=head1 AUTHORS

Liya Ren E<lt>ren@cshl.eduE<gt>

=cut

