#!/bin/perl -w

use strict;

use CGI;
use CGI qw(:standard *table *TR *th *td *ul *ol);
use GramenePage; 
use Gramene::RefCuration;


use Carp;
use LWP;
use Gramene::Medline;


my $page_title = "Curation tool";
my $q = CGI->new();
print $q->header;
my $page = GramenePage->new(Apache->request);

print $q->start_html( -title=>$page_title,
		      -style=>{'src'=>$page->stylesheet}
		    );
print $page->start_body();


########################
#print("<center><h1>Literature Database</h1></center>" );

print <<SMALLBAR;
<table align="CENTER">
  <tr>
    <td>&nbsp;<img src="/images/icons/grain_icon.jpg" alt="grain_icon" height=16 width=16 align="top">
    </td>
    <td>
      &nbsp;<b>[</b>&nbsp;<b>Literature Curation Tool</b>&nbsp;<b>]</b>&nbsp;&nbsp;
      &nbsp;<b>[</b>&nbsp;<a href="/perl/pub_search" class="gopage"><b>Literature Database</b></a>&nbsp;<b>]</b>&nbsp;&nbsp;
      &nbsp;<b>[</b>&nbsp;<a href="/newsletters/rice_genetics/" class="gopage"><b>Rice Genetics Newsletters</b></a>&nbsp;<b>]</b>&nbsp;&nbsp;
    </td>
  </tr>
</table>
SMALLBAR

print $q->table( {-border=>'0',
		  -cellspacing=>'0',
		  -cellpadding=>'3',
		  -width=>'98%',
                  -align=>'CENTER'},
		 $q->start_form(),

                 $q->Tr( {-align=>'CENTER', -nowrap=>'nowrap'},
	                 $q->th( {-align=>'CENTER',
				  -class=>'searchtitle'},
			         "Literature Curation" )
		
		       ),
		 $q->Tr( {-align=>'CENTER',
			  -valign=>'CENTER',
			  -nowrap=>'nowrap',
			  -class=>'searchbody'},
			 $q->td( {-align=>'CENTER', -valign=>'CENTER'},
				 "<span class='alert'>Enter PubMed ids:</span>",
			         "&nbsp;&nbsp;&nbsp;" )
		
			),
		 $q->Tr( {-align=>'CENTER',
			  -valign=>'CENTER',
			  -nowrap=>'nowrap',
			  -class=>'searchbody'},
			 $q->td( $q->textarea( -name=>'original_ids',
					       -rows=>10,
					       -columns=>40 ) )
		
			),
		 $q->Tr( {-align=>'CENTER',
			  -valign=>'CENTER',
			  -nowrap=>'nowrap',
			  -class=>'searchbody'},
			 $q->td( $q->submit( -name=>'do_it',
					     -value=>'Get References' ) )
		
			),
	
		 $q->end_form() );

print $q->hr();

###########################

unless( $q->param() ) {

#print <<FRONTPAGE;
#<hr>
#<table border="0" cellspacing="0" cellpadding="0" width="98%" align="CENTER"><tr><td>
#<ul>
#  <li>Enter one or more search terms.</li>
#  <li>Enter author names as "smith je", "smith j", or "smith". Initials are optional.</li>
#  <li>Enter journal titles.</li>
#</ul>
#</td></tr></table>
#FRONTPAGE

}  


if( $q->param("original_ids") ) {

    my $original_ids = $q->param("original_ids");
    my @original_ids = split( /\D+/, $original_ids );
    if( @original_ids ) {

        my $db = Gramene::RefCuration->whatever();
	$db->connect_to_ora( );

	my $pm_ref = \@original_ids;
	
	my ( $old_pid_ref, $old_rid_ref, $new_pid_ref ) = $db->get_exist_pubmeds( $pm_ref );
	my @old_pids = @$old_pid_ref;
	my @old_rids = @$old_rid_ref;
	my @new_pids = @$new_pid_ref;

	if( @old_pids ) {

	    print $q->table( {-border=>'0',
			  -cellpadding=>'2',
			  -width=>'98%',
			  -align=>'CENTER'},
			 $q->Tr( $q->th( {-class=>'resultstitle',
					  -align=>'CENTER'}, 
					 "We have the following records in our database." ) ) );

	    print start_table( {-width=>'98%', -cellpadding=>'4', -border=>'1', -align=>'CENTER'} );
	    print $q->Tr( $q->td( {-class=>'resultstitle',
			       -align=>'CENTER'}, "&nbsp;" ),
		      $q->td( {-class=>'resultstitle',
			       -align=>'CENTER'}, "PubMed id" ),
		      $q->td( {-class=>'resultstitle',
			       -align=>'CENTER'}, "Gramene internal id" ) );

	    for( my $i=1; $i<=(scalar @old_pids); $i++ ) {

	        print $q->Tr( {-valign=>'TOP'},
			  $q->td( {-class=>'resultsbody',
				   -align=>'CENTER'}, $i ),
			  $q->td( {-class=>'resultsbody',
				   -align=>'CENTER'}, $old_pids[$i-1] ),
			  $q->td( {-class=>'resultsbody',
				   -align=>'CENTER'}, $old_rids[$i-1] ) );
	    }

	    print end_table;
        }

	if( @new_pids ) {

	    my $pm_file = "PUBMED/pubmed_out.txt";
	    open (POUT, ">$pm_file") || die "cannot open $pm_file for writing: $!";

	    foreach my $oid ( @new_pids ) {
	        my $pmentry=pm_journal( $oid ) or next;
		#print "=====>$oid\n$pmentry\n";
		print POUT "=====>$oid\n$pmentry\n";
	    }

	    close (POUT) || die "cannot close $pm_file: $!";
  
	    open(REF,'>PUBMED/Reference_pm.txt') or die "Reference_pm.txt:$!";
	    open(AUTHOR,'>PUBMED/Author_pm.txt') or die "Author_pm.txt:$!";
	    open(DBX,'>PUBMED/Ref_dbxref_pm.txt') or die "Ref_dbxref_pm.txt:$!";
	    open(URL,'>PUBMED/Ref_URL_pm.txt') or die "Ref_URL_pm.txt:$!";

	    print AUTHOR "#reference\tcount\tauthor\n";
	    print DBX "#reference\tdatabase\taccession\n" ;
	    print REF "#reference\tsource\ttitle\tvolume\tyear\tstartpage\tendpage\tlanguage\tabstract\n";
	    print URL "#reference\tURL\n" ;

	    open(PIN, "$pm_file") or die "pubmed_out.txt:$!";

	    my $comment_end="\n".('*'x72)."\n\n";

	    my $entry=''; 
	    my $refid='';
	    while (<PIN>) {
	        if(/^=====>(.*)/) {
		    my $newrefid=$1;
		    process($refid,$entry) if $refid;
		    $refid=$newrefid;
		    $entry='';
		} else {
		    $entry.=$_;
		}
	    }
	    process($refid,$entry) if $refid;

	    close (REF) || die "cannot close Reference_pm.txt: $!";
	    close (AUTHOR) || die "cannot close Author_pm.txt: $!";
	    close (DBX) || die "cannot close Ref_dbxref_pm.txt: $!";
	    close (URL) || die "cannot close Ref_URL_pm.txt: $!";

	    open(REF,'PUBMED/Reference_pm.txt') or die "Reference_pm.txt:$!";

	    print $q->start_form();
	    print start_table( {-width=>'98%', -cellpadding=>'4', -border=>'0', -align=>'CENTER'} );
	    print $q->Tr( $q->th( {-class=>'resultstitle',
				   -align=>'CENTER'}, 
				   "The following records have been retrieved from PubMed." ) );
	    while( <REF> ) {
	        chomp;
		if( ($_) && (index($_, "#") != 0) ) {
		    my @fields = split(/\t/, $_);
		    print( "<tr><td class=\"resultsbody\"><p>$fields[0]: $fields[2]</p>" );
		    print( "<p>$fields[1], $fields[4], $fields[3], $fields[5]-$fields[6]</p><p></p></td></tr>" );
		}
	    }#End while
	    print $q->hidden( -name=>'load', -value=>'1', -override=>'1' );
	    print $q->Tr( {-align=>'CENTER',
			   -valign=>'CENTER',
			   -nowrap=>'nowrap',
			   -class=>'resultsbody'},
			  $q->td( $q->submit( -name=>'load_it',
					      -value=>'Load References' ) )
		
			);
	    print end_table;
	    print $q->end_form();
	}
    }

}


if( $q->param("load") == 1 ) {

    my $db = Gramene::RefCuration->whatever();
    $db->connect_to_ora( );

    $db->load_references();
    $db->load_authors();
    $db->load_dbxref();
    $db->do_extra_work();

    my ( $o_ref, $r_ref, $title_ref ) = $db->get_new_load();
    my @oids = @$o_ref;
    my @rids = @$r_ref;
    my @titles = @$title_ref;

    if( @rids ) {

	    print $q->table( {-border=>'0',
			  -cellpadding=>'2',
			  -width=>'98%',
			  -align=>'CENTER'},
			 $q->Tr( $q->th( {-class=>'resultstitle',
					  -align=>'CENTER'}, 
					 "The following references have been loaded into our database." ) ) );

	    print start_table( {-width=>'98%', -cellpadding=>'4', -border=>'1', -align=>'CENTER'} );
	    print $q->Tr( $q->td( {-class=>'resultstitle',
			       -align=>'CENTER'}, "PubMed id" ),
		      $q->td( {-class=>'resultstitle',
			       -align=>'CENTER'}, "Title" ),
		      $q->td( {-class=>'resultstitle',
			       -align=>'CENTER'}, "Gramene internal id" ) );

	    for( my $i=1; $i<=(scalar @oids); $i++ ) {

	        print $q->Tr( {-valign=>'TOP'},
			  $q->td( {-class=>'resultsbody',
				   -align=>'CENTER'}, $oids[$i-1] ),
			  $q->td( {-class=>'resultsbody',
				   -align=>'CENTER'}, $titles[$i-1] ),
			  $q->td( {-class=>'resultsbody',
				   -align=>'CENTER'}, $q->a( {-href=>"/perl/pub_search?ref_id=$rids[$i-1]"}, $rids[$i-1] ) ) );
	    }

	    print end_table;
    }

}


print $page->end_body;


#######################################


sub pm_journal {
    my ( $pmid )=@_;
    my $url="http://www.ncbi.nlm.nih.gov/entrez/utils/pmfetch.fcgi?db=PubMed&id=$pmid&report=medline&mode=text";

    my($issn,$jname,$biblio,$auth,$abstract,$pages,$title,$volume)=();

    my $request=HTTP::Request->new(GET=>$url);

    my $agent=LWP::UserAgent->new;
    my $response=$agent->request($request);
    sleep 2;
    if(  $response->is_success ) {
	 return $response->content;
    } else { 
        print STDERR "$url failed: ",$response->message,"\n" ;
	return undef;
    }
}


sub process {
    my($ref,$stuff)=@_;
    
    #$ref=$reftx{$ref} if exists $reftx{$ref};
    my $ml=Gramene::Medline->new($stuff);
    my $source = $ml->TA;
    $source =~ s/^In:\s+//;
    #$source=$jstd{$source} if exists $jstd{$source};
    print DBX "$ref\tPubMed\t",$ml->PMID,"\n" if $ml->PMID;
    my @pages=split /-/,$ml->PG;
    $pages[1]=
	substr($pages[0],0,length($pages[0])-length($pages[1])).$pages[1]
	if(length($pages[1])<length($pages[0]));
    $pages[0] ||= '';
    $pages[1] ||= '';
    {
	my $count=1;
	print AUTHOR map { "$ref\t".$count++."\t$_\n" } $ml->AU;
    }
    #my $comment=join("\n",$ref->Remark,$ref->Comment,$ref->Remarks);
    #print CMT "$ref\n$comment$comment_end" if $comment;
    print URL "$ref\t".$ml->URLF."\n" if $ml->URLF;
    print URL "$ref\t".$ml->URLS."\n" if $ml->URLS;
    my $title=$ml->TI || '';
    $title =~ s/\.$//;
    my $volume=$ml->VI || '';
    my $year = ($ml->DP =~ /(\d{4})/) ? $1 : '';
    my $language=$ml->LA || '';
    my $abs=$ml->AB || '';
    print REF join("\t",$ref,$source,$title,$volume
    				,$year,@pages,$language,$abs||''),"\n";
}





