#!/bin/perl -w

# This script takes one or more protein accession numbers (separated by spaces) and generates a text dump for each of the accs.
# Text dumps will be named "XXXXXX_dump.txt" ( XXXXXX represents the acc ) and will be put in the "by_acc" directory.
# Wei Zhao
# Sep. 23, 2002


use lib '/export/share_more/ensembl/gramene_ensembl/load-scripts/protein_dump';

use strict;
use Gramene::ProteinDump;


print( "Please enter the protein accession numbers, and please separate multiple accs with spaces:" );
my $acc_string = <STDIN>;
chomp( $acc_string );

$acc_string =~ tr/a-z/A-Z/;
my @accs = split( / +/, $acc_string );


my $db = Gramene::ProteinDump->new();
$db->connect_to_ora( );

foreach my $acc ( @accs ) {

    my $matrix_file = "by_acc/".$acc."_dump.txt";
    open (PFAM, ">$matrix_file") || die "cannot open $matrix_file for writing: $!";

    print( "//$acc\n\n" );
    print PFAM ( "//$acc\n\n" );

    my $gp_id = $db->get_id_by_acc( $acc );
    if( $gp_id ) {


        my ( $acc, $swall ) = $db->get_acc( $gp_id );
	my ( $name, $symbol ) = $db->get_symbol( $gp_id );
	my $synonyms = $db->get_synonyms( $gp_id );
	my $organelle = $db->get_organelle( $gp_id );
	my $gns = $db->get_gns( $gp_id );
	my $ecs = $db->get_ecs( $gp_id );
    
    
	my ( $genus, $spe, $taxa_id, $sub, $cul ) = $db->get_organisms( $gp_id ); 
	my $trans = $db->get_trans( $gp_id );
	my @gis = $db->get_gis( $gp_id );
	my $gis;
	if( @gis ) {
	    $gis = join( "|", @gis );
	} else {
	    $gis = "Not available";
	}

	my @genbank_pids = $db->get_pids( $gp_id ); 
	my $genbank_pid;
	if( @genbank_pids ) {
	    $genbank_pid = join( "|", @genbank_pids );
	} else {
	    $genbank_pid = "Not available";
	}


	my $keyword = $db->get_keywords( $gp_id );
	my $exp = $db->get_exp( $gp_id );

	print( "GRID:   $gp_id\n" );
	print( "SPAC:   $acc\n" );
	print( "SPID:   $swall\n" );
	print( "GINM:   $gis\n" );
	print( "GPID:   $genbank_pid\n" );

	print( "NAME:   $name\n" );
	print( "SYMB:   $symbol\n" );
	print( "SYNN:   $synonyms\n" );
	print( "GNNM:   $gns\n" );
	print( "OGNE:   $organelle\n" );

	print( "ECNM:   $ecs\n" );
	print( "OSPE:   $genus|$spe|$taxa_id\n" );
	print( "OSUB:   $sub\n" );
	print( "OCUL:   $cul\n" );
	print( "TRAN:   $trans\n" );
	print( "KEYW:   $keyword\n" );
	print( "EXPR:   $exp\n" );


	print PFAM ( "GRID:   $gp_id\n" );
	print PFAM ( "SPAC:   $acc\n" );
	print PFAM ( "SPID:   $swall\n" );
	print PFAM ( "GINM:   $gis\n" );
	print PFAM ( "GPID:   $genbank_pid\n" );

	print PFAM ( "NAME:   $name\n" );
	print PFAM ( "SYMB:   $symbol\n" );
	print PFAM ( "SYNN:   $synonyms\n" );
	print PFAM ( "GNNM:   $gns\n" );
	print PFAM ( "OGNE:   $organelle\n" );

	print PFAM ( "ECNM:   $ecs\n" );
	print PFAM ( "OSPE:   $genus|$spe|$taxa_id\n" );
	print PFAM ( "OSUB:   $sub\n" );
	print PFAM ( "OCUL:   $cul\n" );
	print PFAM ( "TRAN:   $trans\n" );
	print PFAM ( "KEYW:   $keyword\n" );
	print PFAM ( "EXPR:   $exp\n" );
	


	#######################################
	my ( $asso_ref, $term_ref, $go_ref, $tid_ref ) = $db->get_associations( $gp_id );
   
        my @assos = @$asso_ref;
	my @terms = @$term_ref;
	my @gos = @$go_ref;
	my @tids = @$tid_ref;
	
	if( @assos ) {
	
	    for( my $i=0; $i<=$#assos; $i++ ) {
	  
	        my ( $key_ref, $db_name_ref, $code_ref );
		( $key_ref, $db_name_ref, $code_ref ) = $db->get_evidences( $assos[$i] );
		my @keys = @$key_ref;
		my @db_names = @$db_name_ref;
		my @codes = @$code_ref;

		my ( @new_keys, @new_names, @new_codes );
		$new_keys[0] = $keys[0];
		$new_names[0] = $db_names[0];
		$new_codes[0] = $codes[0];
		for( my $j=1; $j<=$#keys; $j++ ) {
		    my $is_new = 1;
		    my $k = 0;
		    while( ($is_new) && ($k<=$#new_keys) ) {
		        if( ( $keys[$j] eq $new_keys[$k] ) && ( $db_names[$j] eq $new_names[$k] ) ) {
			    $is_new = 0;
			}
			$k++;
		    }
		    if( $is_new ) {
		        @new_keys = ( @new_keys, $keys[$j] );
			@new_names = ( @new_names, $db_names[$j] );
			@new_codes = ( @new_codes, $codes[$j] );
		    } else {
		        $new_codes[$#new_codes] = $new_codes[$#new_codes].",".$codes[$j];
		    }
		}#End for
		    
		#my $row_number = $#new_keys + 1;
		my $t_type = $db->get_term_type( $tids[$i] );
		my $type_name;
		if( $t_type == 2 ) {
	            $type_name = "Biological Process";
		} elsif( $t_type == 3 ) {
		    $type_name = "Cellular Component";
		} elsif( $t_type == 4 ) {
		    $type_name = "Molecular Function";
		}

		for( my $j=0; $j<=$#new_keys; $j++ ) {

		    print( "GOAS:   $type_name|$terms[$i]|$gos[$i]|" );   
		    print PFAM ( "GOAS:   $type_name|$terms[$i]|$gos[$i]|" );   
	        
		    if( $new_names[$j] eq "gramene.ontology.reference" ) {
		        print( "GR_REF:$new_keys[$j]|" );
			print PFAM ( "GR_REF:$new_keys[$j]|" );

		    } elsif( $new_names[$j] =~ /interpro/i ) {
		        print( "InterPro:$new_keys[$j]|" );
			print PFAM ( "InterPro:$new_keys[$j]|" );
		    }

	            print( "$new_codes[$j]\n" );
		    print PFAM ( "$new_codes[$j]\n" );
		}
		
		
	    }#End for
	} else {
	    print( "GOAS:   TYPE|NAME|GOAC|DB:KEY|CODE,CODE,CODE\n" );
	    print PFAM ( "GOAS:   TYPE|NAME|GOAC|DB:KEY|CODE,CODE,CODE\n" );
	}
	
	########################################
	my ( $pfam_ref, $pfam_extra_ref ) = $db->get_pfam( $gp_id );
	my @pfams = @$pfam_ref;
	my @pfam_extras = @$pfam_extra_ref;

	if( @pfams ) {

	    for( my $i=1; $i<=scalar( @pfams ); $i++ ) {
	        print( "PFAM:   $pfams[$i-1]|$pfam_extras[$i-1]\n" );
		print PFAM ( "PFAM:   $pfams[$i-1]|$pfam_extras[$i-1]\n" );
	    }
	} else {
	    print( "PFAM:   KEY|DESC\n" );
	    print PFAM ( "PFAM:   KEY|DESC\n" );
	}

	#######################################

	my ( $prosite_key_ref, $prosite_desc_ref, $sequence_ref ) = $db->get_prosite( $gp_id );
	my @prosite_keys = @$prosite_key_ref;
	my @prosite_descs = @$prosite_desc_ref;
	my @sequences = @$sequence_ref;
	
	if( @prosite_keys ) {

	    for( my $i=1; $i<=scalar( @prosite_keys ); $i++ ) {
	        print( "PROS:   $prosite_keys[$i-1]|$prosite_descs[$i-1]|$sequences[$i-1]\n" );
		print PFAM ( "PROS:   $prosite_keys[$i-1]|$prosite_descs[$i-1]|$sequences[$i-1]\n" );
	    }
	} else {
	    print( "PROS:   KEY|DESC|SEQUENCE\n" );
	    print PFAM ( "PROS:   KEY|DESC|SEQUENCE\n" );
	}

	#########################################

	my $tissue = $db->get_tissue( $gp_id );
	if( $tissue ) {
	    print( "POAS:   $tissue\n" );
	    print PFAM ( "POAS:   $tissue\n" );
	} else {
	    print( "POAS:   TISSUE\n" );
	    print PFAM ( "POAS:   TISSUE\n" );
	}

	########################################

	my @ref_ids = $db->get_ref_ids( $gp_id );
	if( @ref_ids ) {
	    my $ref_ids = join( "|", @ref_ids );
	    print( "REFS:   $ref_ids\n" );
	    print PFAM ( "REFS:   $ref_ids\n" );
	} else {
	    print( "REFS:   0000\n" );
	    print PFAM ( "REFS:   0000\n" );
	}

	###########################################

	my ( $feature_ref, $from_ref, $to_ref ) = $db->get_features( $gp_id );
	my @features = @$feature_ref;
	my @froms = @$from_ref;
	my @tos = @$to_ref;

	if( @features ) {
	    
	    for( my $i=1; $i<=scalar( @features ); $i++ ) {
	        print( "FEAT:   $features[$i-1]|$froms[$i-1]-$tos[$i-1]\n" );
		print PFAM ( "FEAT:   $features[$i-1]|$froms[$i-1]-$tos[$i-1]\n" );
	    }
	} else {
	    print( "FEAT:   TYPE|FROM-TO\n" );
	    print PFAM ( "FEAT:   TYPE|FROM-TO\n" );
	}



	########################################

	print( "\n\n" );
	print PFAM ( "\n\n" );
    }

    close ( PFAM ) || die "cannot close: $!";
}



##################################

$db->terminate_database();


exit;


