#!/bin/perl -w

use strict;

use CGI;
use CGI qw(:standard *table *TR *th *td *ul *ol);
use Gramene::Page; 
use Gramene::Literature::Pubmed;


use Carp;
use LWP;
use Gramene::Literature::Medline;


my $page_title = "Curation tool";
my $q = CGI->new();
print $q->header;
my $page = Gramene::Page->new(Apache->request);
my $doc_root=Apache->request->document_root;

print $page->start_html( -title=>$page_title);
print $page->start_body();


########################
#print("<center><h1>Literature Database</h1></center>" );

print <<SMALLBAR;
<table>
  <tr>
    <td>&nbsp;&nbsp;<img src="/images/icons/grain_icon.jpg" alt="grain_icon" height=16 width=16 align="top">
    </td>
    <td>
      &nbsp;<a href="/literature/" class="gopage"><b>Literature Home</b></a>&nbsp;
      |&nbsp;<a href="/newsletters/rice_genetics/" class="gopage"><b>Rice Genetics Newsletters</b></a>&nbsp;
      |&nbsp;<b>Tool for adding PubMed records</b></a>&nbsp;
      |&nbsp;<a href="/db/admin/literature/add_ref" class="gopage"><b>Tool for adding general references</b></a>&nbsp;
      |&nbsp;<a href="/db/admin/literature/edit_ref" class="gopage"><b>Tool
      for edting references</b></a>&nbsp;
    </td>
  </tr>
</table>
<p></p>
SMALLBAR

print $q->table( {-border=>'0',
		  -cellspacing=>'0',
		  -cellpadding=>'3',
		  -width=>'98%',
                  -align=>'CENTER'},
		 $q->start_form(-method=>'GET'),

                 $q->Tr( {-align=>'CENTER', -nowrap=>'nowrap'},
	                 $q->th( {-align=>'CENTER',
				  -class=>'searchtitle'},
			         "Please enter PubMed ids" )
		
		       ),
		 $q->Tr( {-align=>'CENTER',
			  -valign=>'CENTER',
			  -nowrap=>'nowrap',
			  -class=>'searchbody'},
			 $q->td( {-align=>'CENTER', -valign=>'CENTER'},
				 "<em><span class='alert'>Note: Please separate PubMed ids with spaces.</span></em>" )
			),
		 $q->Tr( {-align=>'CENTER',
			  -valign=>'CENTER',
			  -nowrap=>'nowrap',
			  -class=>'searchbody'},
			 $q->td( $q->textarea( -name=>'original_ids',
					       -rows=>10,
					       -columns=>40 ) )
		
			),
		 $q->Tr( {-align=>'CENTER',
			  -valign=>'CENTER',
			  -nowrap=>'nowrap',
			  -class=>'searchbody'},
			 $q->td( $q->submit( -name=>'do_it',
					     -value=>'Get References' ) )
		
			),
	
		 $q->end_form() );
print "<p></p>";
print $q->hr();

###########################

unless( $q->param() ) {

#print <<FRONTPAGE;
#<hr>
#<table border="0" cellspacing="0" cellpadding="0" width="98%" align="CENTER"><tr><td>
#<ul>
#  <li>Enter one or more search terms.</li>
#  <li>Enter author names as "smith je", "smith j", or "smith". Initials are optional.</li>
#  <li>Enter journal titles.</li>
#</ul>
#</td></tr></table>
#FRONTPAGE

}  


if( $q->param("original_ids") ) {

    my $original_ids = $q->param("original_ids");
    my @original_ids = split( /\D+/, $original_ids );
    if( @original_ids ) {

        my $db = Gramene::Literature::Pubmed->whatever();
	$db->connect_to_ora( );

	my $pm_ref = \@original_ids;
	
	my ( $old_pid_ref, $old_rid_ref, $new_pid_ref ) = $db->get_exist_pubmeds( $pm_ref );
	my @old_pids = @$old_pid_ref;
	my @old_rids = @$old_rid_ref;
	my @new_pids = @$new_pid_ref;

	if( @old_pids ) {

	    print $q->table( {-border=>'0',
			  -cellpadding=>'4',
			  -width=>'98%',
			  -align=>'CENTER'},
			 $q->Tr( $q->th( {-class=>'resultstitle',
					  -align=>'CENTER'}, 
					 "The following records will not be loaded, because we already have them in our database." ) ) );

	    print start_table( {-width=>'98%', -cellpadding=>'4', -border=>'0', -align=>'CENTER'} );
	    print $q->Tr( $q->td( {-class=>'resultstitle',
			       -align=>'CENTER'}, "&nbsp;" ),
		      $q->td( {-class=>'resultstitle',
			       -align=>'CENTER'}, "PubMed id" ),
		      $q->td( {-class=>'resultstitle',
			       -align=>'CENTER'}, "Gramene internal id" ) );

	    for( my $i=1; $i<=(scalar @old_pids); $i++ ) {

	        print $q->Tr( {-valign=>'TOP'},
			  $q->td( {-class=>'resultsbody',
				   -align=>'CENTER'}, $i ),
			  $q->td( {-class=>'resultsbody',
				   -align=>'CENTER'}, $old_pids[$i-1] ),
			  $q->td( {-class=>'resultsbody',
				   -align=>'CENTER'}, $q->a( {-href=>"/db/literature/pub_search?ref_id=$old_rids[$i-1]"}, $old_rids[$i-1] ) ) );
	    }

	    print end_table;
        }

	if( @new_pids ) {

	    my $is_locked = is_locked($doc_root );
	    if( $is_locked ) {
    
	        print ( "<center><h2>Sorry, another session is going on! Please try again later!</h2></center>" );


	    } else {
	      
	        lock_db($doc_root);

	    my $pm_file = "$doc_root/tmp/literature/pubmed/pubmed_out.txt";
	    open (POUT, ">$pm_file") || die "cannot open $pm_file for writing: $!";

	    foreach my $oid ( @new_pids ) {
	        my $pmentry=pm_journal( $oid,$doc_root ) or next;
		#print "=====>$oid\n$pmentry\n";
		print POUT "=====>$oid\n$pmentry\n";
	    }

	    close (POUT) || die "cannot close $pm_file: $!";
  
	    open(REF,">$doc_root/tmp/literature/pubmed/Reference_pm.txt") or die "Reference_pm.txt:$!";
	    open(AUTHOR,">$doc_root/tmp/literature/pubmed/Author_pm.txt") or die "Author_pm.txt:$!";
	    open(DBX,">$doc_root/tmp/literature/pubmed/Ref_dbxref_pm.txt") or die "Ref_dbxref_pm.txt:$!";
	    open(URL,">$doc_root/tmp/literature/pubmed/Ref_URL_pm.txt") or die "Ref_URL_pm.txt:$!";

	    print AUTHOR "#reference\tcount\tauthor\n";
	    print DBX "#reference\tdatabase\taccession\n" ;
	    print REF "#reference\tsource\ttitle\tvolume\tyear\tstartpage\tendpage\tlanguage\tabstract\n";
	    print URL "#reference\tURL\n" ;

	    open(PIN, "$pm_file") or die "pubmed_out.txt:$!";

	    my $comment_end="\n".('*'x72)."\n\n";

	    my $entry=''; 
	    my $refid='';
	    while (<PIN>) {
	        if(/^=====>(.*)/) {
		    my $newrefid=$1;
		    process($refid,$entry) if $refid;
		    $refid=$newrefid;
		    $entry='';
		} else {
		    $entry.=$_;
		}
	    }
	    process($refid,$entry) if $refid;

	    close (REF) || die "cannot close Reference_pm.txt: $!";
	    close (AUTHOR) || die "cannot close Author_pm.txt: $!";
	    close (DBX) || die "cannot close Ref_dbxref_pm.txt: $!";
	    close (URL) || die "cannot close Ref_URL_pm.txt: $!";

	    open(REF,"$doc_root/tmp/literature/pubmed/Reference_pm.txt") or die "Reference_pm.txt:$!";

	    print $q->start_form(-method=>'GET');
	    print $q->table( {-border=>'0',
			      -cellpadding=>'4',
			      -width=>'98%',
			      -align=>'CENTER'},
			     $q->Tr( $q->th( {-class=>'resultstitle',
					      -align=>'CENTER'}, 
					     "The following records have been retrieved from PubMed. They will be loaded at your command.<br>If you do not want them to be loaded, please click Cancel to unlock the tool." ) ) );
	    print start_table( {-width=>'98%', -cellpadding=>'4', -border=>'0', -align=>'CENTER'} );
	  
	    print $q->Tr( $q->td( {-class=>'resultstitle',
				   -align=>'CENTER'}, "PubMed id" ),
			  $q->td( {-class=>'resultstitle',
				   -align=>'CENTER'}, "PubMed record" ) );
	    while( <REF> ) {
	        chomp;
		if( ($_) && (index($_, "#") != 0) ) {
		    my @fields = split(/\t/, $_);
		    print( "<tr><td class=\"resultsbody\">$fields[0]</td><td class=\"resultsbody\"><p>$fields[2]</p>" );
		    print( "<p>$fields[1], $fields[4], $fields[3], $fields[5]-$fields[6]</p><p></p></td></tr>" );
		}
	    }#End while
	    print end_table;

	    print ( "<br></br>" );
	    print $q->table( {-border=>'0',
			      -cellpadding=>'2',
			      -width=>'30%',
			      -align=>'CENTER'},
			     $q->Tr( {-align=>'CENTER',
				      -valign=>'CENTER',
				      -nowrap=>'nowrap',
				      -class=>'resultsbody'},
				     $q->td( {-colspan=>'2'},
					     $q->submit( -name=>'load_it',
							 -value=>'Load Them' ),
			                     "&nbsp;&nbsp;&nbsp;&nbsp;",
					     $q->submit( -name=>'load_it',
					      -value=>'Cancel' ) ) ) );
	    
	    print $q->end_form();

	    }
	}
    }

}


if( $q->param("load_it") ) {

  if( $q->param("load_it") eq "Load Them" ) {

    my $db = Gramene::Literature::Pubmed->whatever();
    $db->connect_to_ora( );

    $db->load_references();
    $db->load_authors();
    $db->do_extra_work();

    my ( $o_ref, $r_ref, $title_ref ) = $db->get_new_load();
    my @oids = @$o_ref;
    my @rids = @$r_ref;
    my @titles = @$title_ref;

    if( @rids ) {

	    print $q->table( {-border=>'0',
			  -cellpadding=>'2',
			  -width=>'98%',
			  -align=>'CENTER'},
			 $q->Tr( $q->th( {-class=>'resultstitle',
					  -align=>'CENTER'}, 
					 "The following references have been loaded into our database." ) ) );

	    print start_table( {-width=>'98%', -cellpadding=>'4', -border=>'1', -align=>'CENTER'} );
	    print $q->Tr( $q->td( {-class=>'resultstitle',
			       -align=>'CENTER'}, "PubMed id" ),
		      $q->td( {-class=>'resultstitle',
			       -align=>'CENTER'}, "Title" ),
		      $q->td( {-class=>'resultstitle',
			       -align=>'CENTER'}, "Gramene internal id" ) );

	    for( my $i=1; $i<=(scalar @oids); $i++ ) {

	        print $q->Tr( {-valign=>'TOP'},
			  $q->td( {-class=>'resultsbody',
				   -align=>'CENTER'}, $oids[$i-1] ),
			  $q->td( {-class=>'resultsbody',
				   -align=>'CENTER'}, $titles[$i-1] ),
			  $q->td( {-class=>'resultsbody',
				   -align=>'CENTER'}, $q->a( {-href=>"/db/literature/pub_search?ref_id=$rids[$i-1]"}, $rids[$i-1] ) ) );
	    }

	    print end_table;
    }
  }# End if

  unlock_db($doc_root);

}


print $page->end_body;


#######################################


sub pm_journal {
    my ( $pmid,$doc_root )=@_;
    my $url="http://www.ncbi.nlm.nih.gov/entrez/utils/pmfetch.fcgi?db=PubMed&id=$pmid&report=medline&mode=text";

    my($issn,$jname,$biblio,$auth,$abstract,$pages,$title,$volume)=();

    my $request=HTTP::Request->new(GET=>$url);

    my $agent=LWP::UserAgent->new;
    my $response=$agent->request($request);
    sleep 2;
    if(  $response->is_success ) {
	 return $response->content;
    } else { 
        unlock_db($doc_root);
        print STDERR "$url failed: ",$response->message,"\n" ;
	return undef;
    }
}


sub process {
    my($ref,$stuff)=@_;
    
    #$ref=$reftx{$ref} if exists $reftx{$ref};
    my $ml=Gramene::Literature::Medline->new($stuff);
    my $source = $ml->TA;
    $source =~ s/^In:\s+//;
    #$source=$jstd{$source} if exists $jstd{$source};
    print DBX "$ref\tPubMed\t",$ml->PMID,"\n" if $ml->PMID;
    my @pages=split /-/,$ml->PG;
    $pages[1]=
	substr($pages[0],0,length($pages[0])-length($pages[1])).$pages[1]
	if(length($pages[1])<length($pages[0]));
    $pages[0] ||= '';
    $pages[1] ||= '';
    {
	my $count=1;
	print AUTHOR map { "$ref\t".$count++."\t$_\n" } $ml->AU;
    }
    #my $comment=join("\n",$ref->Remark,$ref->Comment,$ref->Remarks);
    #print CMT "$ref\n$comment$comment_end" if $comment;
    print URL "$ref\t".$ml->URLF."\n" if $ml->URLF;
    print URL "$ref\t".$ml->URLS."\n" if $ml->URLS;
    my $title=$ml->TI || '';
    $title =~ s/\.$//;
    my $volume=$ml->VI || '';
    my $year = ($ml->DP =~ /(\d{4})/) ? $1 : '';
    my $language=$ml->LA || '';
    my $abs=$ml->AB || '';
    print REF join("\t",$ref,$source,$title,$volume
    				,$year,@pages,$language,$abs||''),"\n";
}


sub is_locked {
    my $doc_root = shift; 
    open(FLAG,"$doc_root/tmp/literature/pubmed/LOCK_FLAG") or die "can't open LOCK_FLAG:$!";
    
    chomp( my $is_locked = <FLAG> );
    close (FLAG) || die "cannot close LOCK_FLAG: $!";

    if($is_locked){
       my $last_mod_time=(stat("$doc_root/tmp/literature/pubmed/LOCK_FLAG"))[9];  # last modified time
       my $curr_time = time();
       my $time_out=3600;  # 1 hour

       if($curr_time - $last_mod_time > $time_out){
           unlock_db($doc_root);
           $is_locked = 0;
       } 
    } 

    return $is_locked;
}


sub lock_db {
    my $doc_root = shift;
    open(FLAG,">$doc_root/tmp/literature/pubmed/LOCK_FLAG") or die "can't open LOCK_FLAG:$!";
    print FLAG ( "1\n" );
    close (FLAG) || die "can't close LOCK_FLAG: $!";
    return;
}


sub unlock_db {
    my $doc_root = shift;
    open(FLAG,">$doc_root/tmp/literature/pubmed/LOCK_FLAG") or die "can't open LOCK_FLAG:$!";
    print FLAG ( "0\n" );
    close (FLAG) || die "can't close LOCK_FLAG: $!";
    return;
}
    




