#!/usr/local/bin/perl -w

use strict;
use POSIX qw(strftime);
use Template;
use Gramene::Literature::GetRefD;
use Bio::GMOD::CMap;
use Readonly;

use lib '/opt/GDPDM/lib';
use GDPDM::Config qw( get_config );
use GDPDM::CDBI;


Readonly my $FILE_PATH => '/usr/local/gramene_internal/html/species/';
Readonly my %GENUS => (
    'oryza' 	=> 'Rice',
    'zea' 	=> 'Maize',
    'triticum' 	=> 'Wheat',
    'hordeum' 	=> 'Barley',
    'avena'  	=>  'Oat',
    'setaria' 	=> 'Foxtail Millet',
    'pennisetum' => 'Pearl Millet',
    'secale' 	=> 'Rye',
    'sorghum' 	=> 'Sorghum',
    'zizania' 	=> 'Wild Rice',
    'brachypodium' => 'False Bromes',
);
my $date = strftime "%b.%d, %Y", localtime;

my $cmap = Bio::GMOD::CMap->new;
$cmap->data_source('Build24');
my $cmap_db = $cmap->db;

my $protein_db = Gramene::DB->new('protein');
my $gene_db = Gramene::DB->new('genes');
my $qtl_db = Gramene::DB->new('qtl');
my $marker_db = Gramene::DB->new('markers');
my $ontology_db = Gramene::DB->new('ontology');

my @div_dbs = GDPDM::Config->available_databases; 

my $protein_by_genus_sth = $protein_db->prepare(q[
    SELECT count(*) FROM gene_product_helper
    WHERE  organism like ?
]);

my $ontology_by_genus_sth = $ontology_db->prepare(q[
    SELECT T.term_accession, sum(association_count) as count
    FROM   object_association_count C, term T
    WHERE  T.term_id = C.term_id
    AND    T.term_name = ?
    GROUP BY C.term_id
]);

my $gene_by_genus_spe_sth = $gene_db->prepare(q[
    SELECT G.species_id, genus,species,count(*) as count FROM gene_gene G, gene_species S
    WHERE  G.species_id = S.species_id
    AND    genus like ?
    group by genus
]);

my $map_by_spe_sth = $cmap_db->prepare(q[
    SELECT species_acc, species_full_name,count(map_set_acc) as count
    FROM cmap_map_set M, cmap_species S
    WHERE M.species_id = S.species_id
    AND species_full_name like ? 
    GROUP BY species_acc
    ORDER BY count DESC
]);

my $marker_by_spe_sth = $marker_db->prepare(q[
    SELECT S.species_id,species,count(*) as count
    FROM marker M, species S
    WHERE M.source_species_id = S.species_id
    AND  species like ?
    GROUP BY S.species_id
    ORDER BY count DESC
]);

my $qtl_by_spe_sth = $qtl_db->prepare(q[
    SELECT species, count(*) as count FROM qtl, species
    WHERE  qtl.species_id = species.species_id
    AND    species like ?
    group by species
]);

foreach my $org (keys %GENUS){
    my $tt = Template->new({
	INCLUDE_PATH => $FILE_PATH,
    }) || die "$Template::ERROR\n";
	
    my $common_name = $GENUS{$org};
    my $org_param = $org.'%';

    my $file_name = $common_name;
    $file_name =~tr/A-Z/a-z/;
    $file_name =~s/\s+//g;
    my $out_file = $FILE_PATH.$file_name.'_stat.html';

    $protein_by_genus_sth->execute(($org_param));
    my ($protein_count) = $protein_by_genus_sth->fetchrow_array;

    $ontology_by_genus_sth->execute(($org));
    my $ontology_association_count = $ontology_by_genus_sth->fetchrow_hashref;

    $gene_by_genus_spe_sth->execute(($org_param));
    my @gene_counts;
    while(my $spe_count = $gene_by_genus_spe_sth->fetchrow_hashref){
	push @gene_counts, $spe_count;
    }


    $map_by_spe_sth->execute(($org_param));
    my @map_counts;
    while(my $spe_count = $map_by_spe_sth->fetchrow_hashref){
	push @map_counts, $spe_count;
    }
    

    $qtl_by_spe_sth->execute(($org_param));
    my @qtl_counts;
    while(my $spe_count = $qtl_by_spe_sth->fetchrow_hashref){
	push @qtl_counts, $spe_count;
    }

    $marker_by_spe_sth->execute(($org_param));
    my @marker_counts;
    while(my $spe_count = $marker_by_spe_sth->fetchrow_hashref){
	push @marker_counts, $spe_count;
    }

    my $div_counts = {};
    foreach my $div_db (@div_dbs){
	my $db_name = $div_db->{'key'};
	my $db_name_lable = $div_db->{'name'};
	if($db_name =~/$common_name/i){
	    my $div_cdb = GDPDM::CDBI->db_Main( $db_name );
	    my @div_div_passport = GDPDM::CDBI::DivPassport->retrieve_all;
	    my @div_markers = GDPDM::CDBI::CdvMarker->retrieve_all;
	    $div_counts->{'db_name'}= $db_name;
	    $div_counts->{'germplasms_count'} = scalar(@div_div_passport);
	    $div_counts->{'markers_count'}  = scalar(@div_markers);
	}
    }


    # an array of hash ( genus or common name as key, count as val) 
    my $ref_common_name = $common_name;
    if($ref_common_name eq $org){
	$ref_common_name = ''; # skip the common name search
    }
    my @literature_counts = &get_literature_counts_by_species($org,$ref_common_name);  

    $tt->process('species_template.tmpl',
		{
		    date => $date,
		    genus => ucfirst($org),
		    common_name => $common_name,
		    qtl_counts => \@qtl_counts,
		    marker_counts => \@marker_counts,
		    map_counts => \@map_counts,
		    gene_counts => \@gene_counts,
		    protein_count => $protein_count,
		    ontology_association_count => $ontology_association_count,
		    diversity_counts => $div_counts,
		    literature_counts => \@literature_counts,
		},
		$out_file
	       ) || die $tt->error(), "\n";


}

sub get_literature_counts_by_species{
    my ($genus, $common_name) = @_;
    my @liter_counts;
    foreach my $keyword (($genus, $common_name)){
	my $count = search_literature_by_keyword($keyword);
	if($count && $count>0){
	    push @liter_counts,{$keyword=>$count};
	}
    }

    return @liter_counts; 
}

sub search_literature_by_keyword{
    my $word = shift;

    $word  =~ tr/A-Z/a-z/;    #in lower case
    $word =~ tr/a-z//cd;     #delete non-letters

    if( $word ) {

        my $db = Gramene::Literature::GetRefD->whatever();
	$db->connect_to_ora( );
	
	my ( $author_original_a, $author_original_b );

	##########################
	# prepare author search  #
	##########################
	my $author = $word;
	$author =~ tr/a-z/A-Z/;
	my @fields = split( / /, $author );
	my ( $author_a, $author_b );
	if( $fields[1] ) {
	    
	    if( ( length( $fields[1] ) ) == 2 ) {
	        $author_a = "% ".$fields[0]."-".substr( $fields[1], 0, 1 )."-".substr( $fields[1], 1, 1 )."%";
		$author_original_a = $fields[0]."-".substr( $fields[1], 0, 1 )."-".substr( $fields[1], 1, 1 );
	    } else {
	        $author_a = "% ".$fields[0]."-".substr( $fields[1], 0, 1 )."%";
		$author_original_a = $fields[0]."-".substr( $fields[1], 0, 1 );
	    }

	    if( ( length( $fields[0] ) ) == 2 ) {
	        $author_b = "% ".$fields[1]."-".substr( $fields[0], 0, 1 )."-".substr( $fields[0], 1, 1 )."%";
		$author_original_b = $fields[1]."-".substr( $fields[0], 0, 1 )."-".substr( $fields[0], 1, 1 );
	    } else {
	        $author_b = "% ".$fields[1]."-".substr( $fields[0], 0, 1 )."%";
		$author_original_b = $fields[1]."-".substr( $fields[0], 0, 1 );
	    }

	} else {
	    $author_a = "% ".$fields[0]."-"."%";
	    $author_b = "% ".$fields[0]."-"."%";
	    $author_original_a = $fields[0];
	    $author_original_b = "#";
	}
	    

	my $keyword = $author;

	##########################
	# prepare source search  #
	##########################
	my $source = $author;

	#######################
	# get search results  #
	#######################
	my @ref_ids = $db->get_all_refs( $author_a, $author_b, $keyword, $source );

	return scalar(@ref_ids);

    }

}
