#!/usr/local/bin/perl

# vim: tw=78: sw=4: ts=4: et: 

# $Id: get-qtl-stats.pl,v 1.13 2007/06/07 21:22:43 kclark Exp $

use strict;
use warnings;
use Data::Dumper;
use English qw( -no_match_vars );
use File::Basename;
use Getopt::Long;
use Gramene::CDBI::Markers;
use Gramene::CDBI::Qtl;
use Gramene::DB;
use Gramene::Marker::DB;
use Gramene::Utils qw( commify );
use List::Util qw( max );
use Pod::Usage;
use Readonly;
use Text::TabularDisplay;

Readonly my $VERSION   => sprintf '%d.%02d', 
                          qq$Revision: 1.13 $ =~ /(\d+)\.(\d+)/;
Readonly my $TAB       => "\t";
Readonly my $NL        => "\n";
Readonly my $EMPTY_STR => q{};

my $format = 'text';
my ( $help, $show_version, $man_page, $tabbed_output );
GetOptions(
    'f|format:s' => \$format,
    't|tab'      => \$tabbed_output,
    'help'       => \$help,
    'man'        => \$man_page,
    'version'    => \$show_version,
);

if ( $help || $man_page ) {
    pod2usage({
        -exitval => 0,
        -verbose => $man_page ? 2 : 1
    });
};

if ( $show_version ) {
    my $prog = basename( $PROGRAM_NAME );
    print "$prog v$VERSION\n";
    exit 0;
}

if ( $tabbed_output ) {
    $format = 'tab';
}

$format = lc $format;
unless ( $format eq 'text' || $format eq 'tab' || $format eq 'html' ) {
    pod2usage({ 
        -msg     => "Bad format ($format)",
        -exitval => 2,
        -verbose => 2
    });
}

#
# Gather data ...
#
my $db  = Gramene::DB->new('qtl');
my $mdb = Gramene::Marker::DB->new;

my $trait_cats = $db->selectcol_arrayref(
    'select trait_category from qtl_trait_category order by 1'
);

my $species = $db->selectcol_arrayref(
    'select species from species order by 1'
);

my $num_qtl_by_trait_cat = $db->selectall_hashref(q'
    select   tc.trait_category, count(q.qtl_id) as num_qtl
    from     qtl_trait_category tc, qtl_trait t, qtl q
    where    tc.qtl_trait_category_id=t.qtl_trait_category_id
    and      t.qtl_trait_id=q.qtl_trait_id
    group by trait_category',
    'trait_category'
);

my $num_traits_by_trait_cat = $db->selectall_hashref(q'
    select   tc.trait_category, count(t.qtl_trait_id) as num_traits
    from     qtl_trait_category tc, qtl_trait t
    where    tc.qtl_trait_category_id=t.qtl_trait_category_id
    group by trait_category',
    'trait_category'
);

my $num_traits_by_species = $db->selectall_hashref(q'
    select   s.species, count(distinct q.qtl_trait_id) as num_traits
    from     qtl q, qtl_trait t, species s
    where    q.qtl_trait_id=t.qtl_trait_id
    and      q.species_id=s.species_id
    group by species',
    'species'
);

my $num_qtl_by_species = $db->selectall_hashref(q'
    select   s.species, count(q.qtl_id) as num_qtl
    from     qtl q, species s
    where    q.species_id=s.species_id
    group by species',
    'species'
);

my %num_qtl_by_species_and_trait_cat;
for my $rec (
    @{ $db->selectall_arrayref(q'
        select   s.species, tc.trait_category, count(q.qtl_id) as num_qtls
        from     qtl q, qtl_trait t, qtl_trait_category tc, species s
        where    q.qtl_trait_id=t.qtl_trait_id
        and      t.qtl_trait_category_id=tc.qtl_trait_category_id
        and      q.species_id=s.species_id
        group by species, trait_category',
        { Columns => {} }
    ) }
) {
    $num_qtl_by_species_and_trait_cat{ 
        $rec->{'species'} }{ $rec->{'trait_category'} } = $rec->{'num_qtls'};
}

#
# Collate data ...
#
my @tables;
my @t1 = ( [ 'Trait Cat.', 'Num. Traits', 'Num. QTL' ] );
my ( $total_traits, $total_qtl );
for my $cat ( @$trait_cats ) {
    push @t1, [
        $cat, 
        commify( $num_traits_by_trait_cat->{ $cat }{'num_traits'} ),
        commify( $num_qtl_by_trait_cat->{ $cat }{'num_qtl'} ),
    ];

    $total_traits += $num_traits_by_trait_cat->{ $cat }{'num_traits'};
    $total_qtl    += $num_qtl_by_trait_cat->{ $cat }{'num_qtl'};
}

push @t1, [ 'Total', commify( $total_traits ), commify( $total_qtl ) ];
push @tables, { data => \@t1 };

# reset reused vars
( $total_traits, $total_qtl ) = ( 0, 0 );

my @t2 = ( [ 'Species', 'Num. Traits', 'Num. QTL' ] );
for my $s ( @$species ) {
    push @t2, [
        $s, 
        commify( $num_traits_by_species->{ $s }{'num_traits'} ),
        commify( $num_qtl_by_species->{ $s }{'num_qtl'} ),
    ];

    $total_traits += $num_traits_by_species->{ $s }{'num_traits'};
    $total_qtl    += $num_qtl_by_species->{ $s }{'num_qtl'};
}

push @t2, [ 'Total', commify( $total_traits ), commify( $total_qtl ) ];
push @tables, { data => \@t2 };

my @t3 = ( [ 'Species', @$trait_cats ] );
for my $s ( @$species ) {
    push @t3, [
        $s,
        map {
            commify($num_qtl_by_species_and_trait_cat{ $s }{ $_ } || 0),
        } @$trait_cats
    ];
}
push @tables, { data => \@t3 };

# 
# Number by trait cat/chromosome on QTL maps, rice genome
# 
my ($RiceGenome) = Gramene::CDBI::Markers::MapSet->search( 
    ensembl_species_name => 'Oryza_sativa'
);
my ($QtlMapType)  = Gramene::CDBI::Markers::MapType->search( 
    map_type      => 'QTL'
);
my (@QtlMapSets)  = Gramene::CDBI::Markers::MapSet->search( 
    map_type_id   => $QtlMapType->id
);

#my %trait_cat_by_symbol 
#    = map { @$_ }
#    @{ $db->selectall_arrayref(
#        q[
#            select t.trait_symbol, c.trait_category 
#            from   qtl_trait_category c, qtl_trait t
#            where  t.qtl_trait_category_id=c.qtl_trait_category_id
#        ],
#    ) }
#;
#
#my %qtl_cache;
#my @Qtls = Gramene::CDBI::Qtl::Qtl->retrieve_all;
#for my $Qtl ( @Qtls ) {
#    $qtl_cache{ $Qtl->qtl_accession_id } 
#        = $Qtl->qtl_trait->qtl_trait_category->trait_category;
#}
#
#my %num_by_trait_cat_and_map;
#my %chrs;
#for my $MapSet ( $RiceGenome, @QtlMapSets ) {
#    my $ms_acc = $MapSet->cmap_map_set_accession or next;
#    if ( $ms_acc ne $RiceGenome->cmap_map_set_accession ) {
#        $ms_acc = 'qtl';
#    }
#
#    for my $Map ( $MapSet->maps ) {
#        next unless $Map->cmap_map_accession;
#
#        my $chr;
#        if ( $Map->map_name =~ /(\d+)$/ ) {
#            $chr = $1; 
#            $chrs{ $ms_acc }{ $chr }++;
#        }
#        else {
#            next;
#        }
#
#        my $qtls = $mdb->db->selectcol_arrayref(
#            q[
#                select s.marker_name
#                from   mapping mp, marker m, 
#                       marker_synonym s, marker_type mt
#                where  mp.map_id=?
#                and    mp.marker_id=m.marker_id
#                and    m.marker_type_id=mt.marker_type_id
#                and    mt.marker_type=?
#                and    m.display_synonym_id=s.marker_synonym_id
#            ],
#            {},
#            ( $Map->id, 'QTL' )
#        );
#
#        for my $qtl_acc ( @$qtls ) {
#            my $cat = $qtl_cache{ $qtl_acc } or next;
#            
#            $num_by_trait_cat_and_map{ $cat }{ $ms_acc }{ $chr }++;
#        }
#    }
#}
#
#if ( %num_by_trait_cat_and_map ) {
#    my $genome_acc = $RiceGenome->cmap_map_set_accession;
#    for my $ms_acc ( $genome_acc, 'qtl' ) {
#        my @chrs = sort { $a <=> $b } keys %{ $chrs{ $genome_acc } };
#        my @t4   = ( [ 'Trait Category', @chrs ] );
#
#        for my $cat ( @$trait_cats ) {
#            my @nums;
#            for my $chr ( @chrs ) {
#                push @nums, 
#                    $num_by_trait_cat_and_map{ $cat }{ $ms_acc }{ $chr } || 0;
#            }
#
#            push @t4, [ $cat, @nums ];
#        }
#
#        my $map_type = $ms_acc eq 'qtl' ? 'QTL maps' : 'Genome';
#        push @tables, { 
#            data  => \@t4,
#            title => "Number QTLs mapped by trait category to $map_type", 
#        };
#    }
#}

#
# Create output ...
#
my $out;
if ( $format eq 'tab' ) {
    for my $table ( @tables ) {
        my $data = $table->{'data'};
        for my $line ( @$data ) {
            $out .= join( $TAB, @$line ) . $NL;
        }

        $out .= $NL;
    }
}
elsif ( $format eq 'html' ) {
    for my $table ( @tables ) {
        my $data = $table->{'data'};

        $out .= "<TABLE>$NL";
        my $i = 1;

        for my $line ( @$data ) {
            my $cell = $i == 1 ? 'TH' : 'TD';
            $out .= '  <TR>' . 
                join( $EMPTY_STR, map { "<$cell>$_</$cell>" } @$line ) .
            "</TR>$NL";
            $i++;
        }

        $out .= "</TABLE>$NL";
    }
}
else {
    for my $table ( @tables ) {
        my $data = $table->{'data'};

        # first format the number to be right-justified
        my $num_cols = scalar @{ $data->[0] };
        my @longest;
        for my $i ( 1 .. $num_cols ) {
            $longest[ $i ] = max( map { length($_->[ $i ] || '') } @$data );
        }

        for my $row ( 1..$#{ $data } ) {
            for my $col ( 1..$num_cols - 1 ) {
                $data->[ $row ][ $col ] 
                    = sprintf "%${longest[ $col ]}s", $data->[ $row ][ $col ];
            }
        }

        my $header = shift @$data;
        my $tab    = Text::TabularDisplay->new( @$header );

        for my $line ( @$data ) {
            $tab->add( @$line );
        }

        if ( my $title = $table->{'title'} ) {
            $out .= $NL . $title . $NL;
        }

        $out .= join $NL, $tab->render, $EMPTY_STR, $EMPTY_STR;
    }
}

print $out;

printf "Num. of QTL papers curated: %s\n",
    $db->selectrow_array(
        q[
            select count(distinct x.xref_value) 
            from   xref_type xt, xref x
            where  x.table_name='qtl'
            and    x.xref_type_id=xt.xref_type_id
            and    xt.xref_type='Gramene Literature'
        ]
    );

my ($qtl_type) = Gramene::CDBI::Markers::MapType->search( map_type => 'QTL' );
if ( $qtl_type ) {
    my @map_sets = Gramene::CDBI::Markers::MapSet->search( 
        map_type_id => $qtl_type->id
    );

    printf "Num. of QTL map sets: %s\n", scalar @map_sets;
}

if ( $RiceGenome ) { 
    my @markers     = $mdb->marker_search(
        marker_type => 'QTL',
        map_set_id  => $RiceGenome->id,
    );

    printf "Num. QTL mapped to rice genome: %s\n", scalar @markers;
}


__END__

# ----------------------------------------------------

=head1 NAME

get-qtl-stats.pl - reports stats on "qtl" db

=head1 SYNOPSIS

  get-qtl-stats.pl [options]

Options:

  -f|--format   Either "text," "tab" or "html" (default is "text")
  -t|--tab      Output tab-delimited data, not text table 
                [deprecated -- use "format=tab" instead]

  --help        Show brief help and exit
  --man         Show full documentation
  --version     Show version and exit

=head1 DESCRIPTION

Connects to the current QTL db (as defined in "gramene.conf") and
prints out statistics for release notes.  Output can be in a text
table ("text") suitable for display in a fixed-width font, 
tab-delimited suitable for viewing in a spreadsheet, or HTML suitable
for display on the Internet(s).

=head1 SEE ALSO

Gramene::DB, Text::TabularDisplay.

=head1 AUTHOR

Ken Youens-Clark E<lt>kclark@cshl.eduE<gt>.

=head1 COPYRIGHT

Copyright (c) 2005 Cold Spring Harbor Laboratory

This library is free software;  you can redistribute it and/or modify 
it under the same terms as Perl itself.

=cut
