package Gramene::Pubmed; 

# A module for the add_ref_tool of Gramene.     --------Wei Zhao (03/08/02)  


use strict;
use DBI;
use CGI;
use CSHL::Config;

my %attr;
my $dbh;
my $old_ref_id;

sub whatever {

    #my $invocant = shift;
    #my $class = ref($invocant) || $invocant;
    my $class = shift;
    my $self = { };
    bless($self, $class);
    return $self;

}


# destructor
sub DESTROY {
  my $self = shift;
  #$self->terminate_database if $self->{'db'};
  $self->terminate_database if $dbh;
}


sub terminate_database{
  my $self = shift;
  #$self->{'db'}->disconnect() if $self->{'db'};
  $dbh ->disconnect() if $dbh;
}


sub connect_to_ora {
	
    
    ### Attributes to pass to DBI->connect()
    %attr = (
	#PrintError => 1,
	RaiseError => 1
    );


    # Connect to the database
   
    $dbh = DBI->connect(OntologyDataSource, OntologyDBUser, OntologyDBPassword )
                      || die "Can't connect to Oracle database: $DBI::errstr\n";


 
    # Connect to the database
    #$dbh = DBI->connect( "dbi:Oracle:rice", "go", "grain", \%attr )
    #                 || die "Can't connect to Oracle database: $DBI::errstr\n";

    $dbh->{LongReadLen} = 5000;
    $dbh->{LongTruncOk} = 1;

    
}


################################# RefCuration ###########################

sub get_exist_pubmeds {
    my $pm_ref = $_[1];

    my @pm_ids = @$pm_ref;
    
    my $sth1 = $dbh->prepare( "select row_id, xref_key
                               from objectxref_temp
                               where table_name = 'gramene.ontology.reference'
                               and xref_dbname = 'PubMed'" );
    $sth1->execute();
    my ( $rid, $pid );
    my %pm_to_refs;
    while( ( $rid, $pid ) = $sth1->fetchrow_array ) {
        #@rids = ( @rids, $rid );
	#@pids = ( @pids, $pid );
        $pm_to_refs{ $pid } = $rid;
    }
    $sth1->finish;

    my ( @old_pids, @old_rids, @new_pids );
    foreach my $p ( @pm_ids ) {
        if( $pm_to_refs{ $p } ) {
	    @old_pids = ( @old_pids, $p );
	    @old_rids = ( @old_rids, $pm_to_refs{ $p } );
	} else {
	    @new_pids = ( @new_pids, $p );
	}
    }

    return ( \@old_pids, \@old_rids, \@new_pids );
}



sub load_references {

    my $ref_file = "PUBMED/Reference_pm.txt";

    my $output_file = "PUBMED/reference_insert.txt";
    open (OUT, ">$output_file") || die "cannot create $output_file: $!";

    my $source_file = "PUBMED/source_insert.txt";
    open (SOU, ">$source_file") || die "cannot create $source_file: $!";

    my $reference_file = "PUBMED/reference_look.txt";
    open (REF, ">$reference_file") || die "cannot create $reference_file: $!";

    # Prepare INSERT statements
    my $sth1 = $dbh->prepare( "INSERT INTO SOURCE VALUES ( ?, ?, ? )" );

    my $sth3 = $dbh->prepare( "INSERT INTO REFERENCE (REFERENCE_ID, SOURCE_ID, TITLE, VOLUME, YEAR, START_PAGE, END_PAGE, LANGUAGE) 
                               VALUES ( ?, ?, ?, ?, ?, ?, ?, ? )" );

    my $sth17 = $dbh->prepare( "INSERT INTO REFERENCE_ABSTRACT (REFERENCE_ID, ABSTRACT_PART_A)
                                VALUES ( ?, ? )" );
    my $sth19 = $dbh->prepare( "INSERT INTO REFERENCE_ABSTRACT VALUES ( ?, ?, ? )" );

    my $sth20 = $dbh->prepare( "INSERT INTO reference_load_help VALUES ( ?, ?, ? )" );

    my $sth32 = $dbh->prepare( "SELECT MAX(source_id) FROM source" );
    $sth32->execute();
    my $source_id = $sth32->fetchrow_array();

    my $sth35 = $dbh->prepare( "SELECT MAX(reference_id) FROM reference" );
    $sth35->execute();
    my $reference_id = $sth35->fetchrow_array();
    $old_ref_id = $reference_id;
    
    my $sth102 = $dbh->prepare( "SELECT reference_id FROM reference
                                 WHERE UPPER(title) = ?" );

    my $sth103 = $dbh->prepare( "SELECT source_id FROM source
                                 WHERE UPPER(source_name) = ?
                                 UNION
                                 SELECT source_id FROM source_synonym
                                 WHERE UPPER(source_synonym) = ?
                                 UNION
                                 SELECT source_id FROM old_source
                                 WHERE UPPER(old_name) = ?" );


    my $sth105 = $dbh->prepare( "SELECT reference_id, source_id, title, volume, year, start_page, end_page, language FROM reference
                                 WHERE reference_id = ?" );

    my $sth106 = $dbh->prepare( "SELECT source_name FROM source
                                 WHERE source_id = ?" );



    open (IN_REF, $ref_file) || die "cannot open $ref_file for reading: $!";


    while( <IN_REF> ) {
        chomp;
        unless( (!$_) || (index($_, "#") == 0) ) {
	        
	    my @fields = split(/\t/);

	    if( $fields[2] ) {
		  
	        my $title = $fields[2];
		if( index( $title, "[" ) == 0 ) {
		    substr( $title, 0, 1 ) = "";
		}
		if( rindex( $title, "]" ) == ( length( $title ) - 1 ) ) {
	            substr( $title, -1, 1 ) = "";
		}

		my $real_title = $title;
		$title =~ tr/a-z/A-Z/;
		#print( "$fields[0]\t$real_title\n" );
		   
		#$sth102->bind_param( 1, $title );
		#$sth102->execute();
		#my $possible_id = $sth102->fetchrow_array();

		#if( $possible_id ) {
	        
		#    $sth105->bind_param( 1, $possible_id );
		#    $sth105->execute();
		#    my ( $a, $b, $c, $d, $e, $f, $g, $h ) = $sth105->fetchrow_array();

		#    $sth106->bind_param( 1, $b );
		#    $sth106->execute();
		#    my $m = $sth106->fetchrow_array();

		#    print REF ( "###Possible Duplications###\n" );
		#    print REF ( "$a\t$m\t$c\t$d\t$e\t$f\t$g\t$h\n" );
		#    print REF ( "$_\n\n" );
			    
		#} else {
	      
		    my $word = $fields[1];
		    $word =~ tr/a-z/A-Z/;
		   
		    $sth103->bind_param( 1, $word );
		    $sth103->bind_param( 2, $word );
		    $sth103->bind_param( 3, $word );
		    $sth103->execute();
				
		    my $s_id = $sth103->fetchrow_array();

		    unless( $s_id ) {

		        $source_id = $source_id + 1;
			$sth1->bind_param( 1, $source_id );
			$sth1->bind_param( 2, $fields[1] );
			$sth1->bind_param( 3, "Not available" );
			$sth1->execute();
			$s_id = $source_id;
			print SOU ( "$source_id\t$fields[1]\tNot available\n" ); 
		    }
		    
		    ###insert reference
		    $reference_id = $reference_id + 1;
		    $sth3->bind_param( 1, $reference_id );
		    $sth3->bind_param( 2, $s_id );
		    $sth3->bind_param( 3, $real_title );

		    $sth3->bind_param( 4, $fields[3] );

		    $sth3->bind_param( 5, $fields[4] );

		    $sth3->bind_param( 6, $fields[5] );

		    $sth3->bind_param( 7, $fields[6] );

		    $sth3->bind_param( 8, $fields[7] );

		    $sth3->execute();
		    print OUT ( "$reference_id\t$s_id\t$title\t$fields[3]\t$fields[4]\t$fields[5]\t$fields[6]\t$fields[7]\n" );
			    
		    $sth20->bind_param( 1, $reference_id );
		    $sth20->bind_param( 2, $fields[0] );
		    $sth20->bind_param( 3, "pubmed" );
		    $sth20->execute();
			
		    # Insert records into table REFERENCE_ABSTRACT
		    if( $fields[8] ) {

		        if( length( $fields[8] ) <= 3950 ) {

			    $sth17->bind_param( 1, $reference_id );
			    $sth17->bind_param( 2, $fields[8] );
			    $sth17->execute();

			} else {
					       
			    my $aa = substr( $fields[8], 0, 3950 );
			    my $ab = substr( $fields[8], 3950 );

			    $sth19->bind_param( 1, $reference_id );
			    $sth19->bind_param( 2, $aa );
			    $sth19->bind_param( 3, $ab );
			    $sth19->execute();
			}
		    }
		#} #end else
			

	    } else {
	        
		print REF ( "NO title: $fields[0]\n" );
	    }

	}
    }


    $sth1->finish;

    $sth3->finish;

    $sth17->finish;
    $sth19->finish;
    $sth20->finish;

    $sth32->finish;
    $sth35->finish;

    $sth102->finish;
    $sth103->finish;

    $sth105->finish;
    $sth106->finish;

    close(IN_REF) || die "can't close $ref_file: $!\n";
    close(OUT) || die "can't close $output_file: $!\n";
    close(REF) || die "can't close $reference_file: $!\n";
    close(SOU) || die "can't close $source_file: $!\n";

    return;

}


#############################################################


sub load_authors {

    my $ref_file = "PUBMED/Author_pm.txt";

    my $output_file = "PUBMED/author_look.txt";
    open (OUT, ">$output_file") || die "cannot create $output_file: $!";

    my $source_file = "PUBMED/author_insert.txt";
    open (AU, ">$source_file") || die "cannot create $source_file: $!";

    my $reference_file = "PUBMED/contributor_insert.txt";
    open (CON, ">$reference_file") || die "cannot create $reference_file: $!";

    # Prepare INSERT statements

    my $sth7 = $dbh->prepare( "INSERT INTO CONTRIBUTOR (CONTRIBUTOR_ID, CONTRIBUTOR_NAME, CONTRIBUTOR_EMAIL, CONTRIBUTOR_ORGANIZATION) 
                           VALUES ( ?, ?, ?, ? )" );
    my $sth9 = $dbh->prepare( "INSERT INTO AUTHOR VALUES ( ?, ?, ? )" );


    my $sth21 = $dbh->prepare( "SELECT contributor_id FROM author
                            WHERE reference_id = ?
                            AND authorship_position = ?" );

    my $sth36 = $dbh->prepare( "SELECT MAX(contributor_id) FROM contributor" );
    $sth36->execute();
    my $contributor_id = $sth36->fetchrow_array();

    #####################keep

    my $sth101 = $dbh->prepare( "SELECT contributor_id FROM contributor
                                 WHERE UPPER(contributor_name) = ?" );



    my $sth104 = $dbh->prepare( "SELECT reference_id FROM reference_load_help
                                 WHERE original_id = ?" );

    my $sth105 = $dbh->prepare( "SELECT authorship_position FROM author
                                 WHERE reference_id = ?
                                 AND contributor_id = ?" );
  
    ################keep

    open (IN_REF, $ref_file) || die "cannot open $ref_file for reading: $!";


    while( <IN_REF> ) {
        chomp;
	unless( (!$_) || (index($_, "#") == 0) ) {
	        
	    my @fields = split(/\t/);

	    if( $fields[0] ) {
		  
		my @parts = split( / /, $fields[2] );
		
		if( $parts[2] || ( length( $parts[1] ) > 2 ) ) {
		    
		    print OUT ( "Contributor name too long: $_\n" );
		} else {
		    my $word = $parts[0];
		    if( $parts[1] ) {
		        $word = $word."-".substr( $parts[1], 0, 1 );
			if( length( $parts[1] ) == 2 ) {
			    $word = $word."-".substr( $parts[1], 1, 1 );
			}
		    }
		    my $real_word = $word;
		    $word =~ tr/a-z/A-Z/;
		   
		    $sth101->bind_param( 1, $word );
		    $sth101->execute();
		    my $c_id = $sth101->fetchrow_array();

		    unless( $c_id ) {
		        $contributor_id = $contributor_id + 1;
			$sth7->bind_param( 1, $contributor_id );
			$sth7->bind_param( 2, $real_word );
			$sth7->bind_param( 3, "Not available" );
			$sth7->bind_param( 4, "Not available" );
			$sth7->execute();
			
			print CON ( "$contributor_id\t$real_word\tNot available\tNot available\n" );
			$c_id = $contributor_id;
		    }

		    $sth104->bind_param( 1, $fields[0] );
		    $sth104->execute();
		    my $ref_id = $sth104->fetchrow_array();

		    if( $ref_id ) {
		        $sth105->bind_param( 1, $ref_id );
			$sth105->bind_param( 2, $c_id );
			$sth105->execute();
			if( $sth105->fetchrow_array() ) {
			    
			    print OUT ( "Contributor already there: $_\n" );
			} else {
			    $sth21->bind_param( 1, $ref_id );
			    $sth21->bind_param( 2, $fields[1] );
			    $sth21->execute();
			    if( $sth21->fetchrow_array() ) {
			        
				print OUT ( "Authorship_position already there: $_\n" );
			    } else {

			        $sth9->bind_param( 1, $c_id );
				$sth9->bind_param( 2, $ref_id );
				$sth9->bind_param( 3, $fields[1] );
			
				$sth9->execute();
				
				print AU ( "$c_id\t$ref_id\t$fields[1]\n" );
			    } #end else
			} #end else

		    } else {
		        
			print OUT ( "didn't find the ref: $_\n" );
		    }

		} #end else
	    } else {
	        
		print OUT ( "No original id: $_\n" );
	    }
		
	}
    }
		    
    $sth7->finish;
    $sth9->finish;

    $sth21->finish;

    $sth36->finish;
    $sth101->finish;

    $sth104->finish;
    $sth105->finish;

    close(IN_REF) || die "can't close $ref_file: $!\n";

    close(OUT) || die "can't close $output_file: $!\n";
    return;
    
}


sub load_dbxref {

    my $ref_file = "PUBMED/Ref_dbxref_pm.txt";

    my $output_file = "PUBMED/dbxref_look.txt";
    open (OUT, ">$output_file") || die "cannot create $output_file: $!";

    my $source_file = "PUBMED/dbxref_insert.txt";
    open (DBX, ">$source_file") || die "cannot create $source_file: $!";

    my $reference_file = "PUBMED/object_insert.txt";
    open (OBJ, ">$reference_file") || die "cannot create $reference_file: $!";

    my $sth15 = $dbh->prepare( "INSERT INTO DBXREF( dbxref_id, xref_key, xref_keytype, xref_dbname ) VALUES ( ?, ?, ?, ? )" );
    my $sth16 = $dbh->prepare( "INSERT INTO OBJECTXREF VALUES ( ?, ?, ?, ? )" );

    my $sth1 = $dbh->prepare( "INSERT INTO OBJECTXREF_TEMP VALUES ( ?, ?, ?, ?, ? )" );

    my $sth33 = $dbh->prepare( "SELECT MAX(dbxref_id) FROM dbxref" );
    $sth33->execute();
    my $dbxref_id = $sth33->fetchrow_array();
    unless( $dbxref_id ) {
        $dbxref_id = 0;
    }

    my $sth34 = $dbh->prepare( "SELECT MAX(objectxref_id) FROM objectxref" );
    $sth34->execute();
    my $objectxref_id = $sth34->fetchrow_array();
    unless( $objectxref_id ) {
        $objectxref_id = 0;
    }


    my $sth104 = $dbh->prepare( "SELECT reference_id FROM reference_load_help
                                 WHERE original_id = ?
                                 and reference_source = 'pubmed'" );

    my $sth105 = $dbh->prepare( "SELECT dbxref_id FROM dbxref
                                 WHERE xref_key = ?
                                 AND xref_keytype = ?
                                 AND xref_dbname = ?" );
    
    open (IN_REF, $ref_file) || die "cannot open $ref_file for reading: $!";

    while( <IN_REF> ) {
        chomp;
	unless( (!$_) || (index($_, "#") == 0) ) {
	        
	        my @fields = split(/\t/);

		if( $fields[0] ) {

		    $sth104->bind_param( 1, $fields[0] );
		    $sth104->execute();
		    my $ref_id = $sth104->fetchrow_array();
		    if( $ref_id ) {

		        $sth105->bind_param( 1, $fields[2] );
			$sth105->bind_param( 2, "acc" );
			$sth105->bind_param( 3, "PubMed" );
			$sth105->execute();
			my $db_id = $sth105->fetchrow_array();
			unless( $db_id ) {
		        
		            $dbxref_id = $dbxref_id + 1;

			    $sth15->bind_param( 1, $dbxref_id );
			    $sth15->bind_param( 2, $fields[2] );
			    $sth15->bind_param( 3, "acc" );
			    $sth15->bind_param( 4, $fields[1] );
			    $sth15->execute();
			    $db_id = $dbxref_id;
			    
			    print DBX ( "$dbxref_id\t$fields[2]\tacc\t$fields[1]\n" );
		        }

			$objectxref_id = $objectxref_id + 1;

			$sth16->bind_param( 1, $objectxref_id );
			$sth16->bind_param( 2, "gramene.ontology.reference" );
			$sth16->bind_param( 3, $ref_id );
			$sth16->bind_param( 4, $db_id );
			$sth16->execute();
		        
		        print OBJ ( "$objectxref_id\tgramene.ontology.reference\t$ref_id\t$db_id\n" );

			$sth1->bind_param( 1, "gramene.ontology.reference" );
			$sth1->bind_param( 2, $ref_id );
			$sth1->bind_param( 3, $fields[2] );
			$sth1->bind_param( 4, "acc" );
			$sth1->bind_param( 5, $fields[1] );
			$sth1->execute();

		    } else {
		        
			print OUT ( "didn't find the ref: $_\n" );
		    }
		}

	}
    }
   
    $sth15->finish;
    $sth16->finish;

    $sth33->finish;
    $sth34->finish;

    $sth104->finish;
    $sth105->finish;

    $sth1->finish;

    close(IN_REF) || die "can't close $ref_file: $!\n";
    close(OUT) || die "can't close $output_file: $!\n";
    return;
}


sub do_extra_work {

    my $sth2 = $dbh->prepare( "SELECT source_name FROM source
                               where source_id = ?" );

    my $sth3 = $dbh->prepare( "select contributor_name from contributor, author
                           where contributor.contributor_id = author.contributor_id
                           and author.reference_id = ?
                           order by author.authorship_position" );


    my $sth4 = $dbh->prepare( "select abstract_part_a, abstract_part_b from reference_abstract
                           where reference_id = ?" );

    my $sth5 = $dbh->prepare( "insert into reference_extra
                           values( ?, ?, ?, ?, ?, ?, ? )" );


    my $sth1 = $dbh->prepare( "SELECT reference_id, source_id, title, year FROM reference
                               where reference_id > ?
                               order by reference_id" );

    $sth1->bind_param( 1, $old_ref_id );
    $sth1->execute();

    my $contributor_file = "PUBMED/reference_extra.txt";
    open (COUT, ">$contributor_file") || die "cannot open $contributor_file for writing: $!";

    my ( $id, $sid, $title, $year );
    while( ( $id, $sid, $title, $year ) = $sth1->fetchrow_array ) {

        my $source;
	if( $sid ) {
	    $sth2->bind_param( 1, $sid );
	    $sth2->execute();
	    $source = $sth2->fetchrow_array;
	} else {
	    $source = "";
	}

	my $names;
	$sth3->bind_param( 1, $id );
	$sth3->execute();
	while( my $name = $sth3->fetchrow_array ) {
	    if( $names ) {
	        $names = $names." ".$name;
	    } else {
	        $names = " ".$name;
	    }
	}
	$names = $names." ";

	$sth4->bind_param( 1, $id );
	$sth4->execute();
	my ( $aa, $ab ) = $sth4->fetchrow_array;

	$sth5->bind_param( 1, $id );
	$title =~ s/\W/ /g;
	$title = " ".$title." ";
	$sth5->bind_param( 2, $title );
	$sth5->bind_param( 3, $source );
	$sth5->bind_param( 4, $names );
	$sth5->bind_param( 5, $year );

	$aa =~ s/\W/ /g;
	$aa = " ".$aa;
	$sth5->bind_param( 6, $aa );

	$ab =~ s/\W/ /g;
	$ab = $ab." ";
	$sth5->bind_param( 7, $ab );
	$sth5->execute();
    
	print COUT ( "$id\t$title\t$source\t$names\t$year\n" );
	

    }

    $sth1->finish();
    $sth2->finish();
    $sth3->finish();
    $sth4->finish();
    $sth5->finish();
    close(COUT) || die "can't close $contributor_file: $!\n";

    return;
}


sub get_new_load {
    
    my $sth1 = $dbh->prepare( "select original_id, reference.reference_id, title
                               from reference_load_help, reference
                               where reference_load_help.reference_id = reference.reference_id
                               and reference.reference_id > ?" );
    $sth1->bind_param( 1, $old_ref_id );
    $sth1->execute();
    my ( $oid, $rid, $title, @oids, @rids, @titles );
    
    while( ( $oid, $rid, $title ) = $sth1->fetchrow_array ) {
        @oids = ( @oids, $oid );
	@rids = ( @rids, $rid );
        @titles = ( @titles, $title );
    }
    $sth1->finish;

    return ( \@oids, \@rids, \@titles );
}


################################# RefCuration End #########################

1;	

