package Gramene::Diversity::DB;

# $Id: DB.pm,v 1.4 2005/10/20 16:06:14 steven Exp $

=head1 NAME

Gramene::Diversity::DB - a Gramene module

=head1 SYNOPSIS

  use Gramene::Diversity::DB;

  my $div_db = Gramene::Diversity::DB->new;

=head1 DESCRIPTION

Interface to germplasm, species, diversity.

=cut

# ----------------------------------------------------

use strict;
use Class::Base;
use Gramene::DB;
use Gramene::Utils qw[ iris_normalize similarity_search ];

use base 'Class::Base';

use vars qw[ $VERSION ];
$VERSION = sprintf "%d.%02d", q$Revision: 1.4 $ =~ /(\d+)\.(\d+)/;

# ----------------------------------------------------
sub init {
    my ( $self, $config ) = @_;
    $self->params( $config, qw[ admin db_name ] );
    return $self;
}

# ----------------------------------------------------
sub db {

=pod

=head2 db

  my $db = $div_db->db or die $div_db->error;

Returns a database handle to the diversity database.

=cut

    my $self = shift;

    unless ( $self->{'db'} ) {
        my $db_name = 
            $self->{'admin'}   ? 'diversity_rw'     :
            $self->{'db_name'} ? $self->{'db_name'} :
            'diversity';
        $self->{'db'} = Gramene::DB->new( $db_name );
    }

    return $self->{'db'};
}

# ----------------------------------------------------
sub find_or_create_passport {

=pod

=head2 find_or_create_passport

  my $passport_id = $div_db->find_or_create_passport(
      accename        => 'W1519', # or just "name"
      accenumb        => '105159',
      source          => 'IRGC',
      div_taxonomy_id => 13,
      sampstat        => '...',
      germplasm_type  => '...',
      comments        => '...',
  ) or die $div_db->error;

Finds or creates a passport (germplasm).

=cut

    my $self = shift;
    my %args = @_;
    my ( $div_passport_id, $name, $source, $accenumb, $div_taxonomy_id ) = 
        $self->get_passport_id( %args ) or return;

    my $db = $self->db;
    unless ( $div_passport_id ) {
        $db->do(
            q[
                insert 
                into    div_passport 
                        (accename, source, accenumb, div_taxonomy_id)
                values  ( ?, ?, ?, ? )
            ],
            {},
            ( $name, $source, $accenumb, $div_taxonomy_id )
        );

        $div_passport_id = $db->selectrow_array('select last_insert_id()');
    }

    my $sampstat       = $args{'sampstat'}       || '';
    my $germplasm_type = $args{'germplasm_type'} || '';
    my $comments       = $args{'comments'}       || '';

    if ( $sampstat || $germplasm_type || $comments ) {
        my $sql = 'update div_passport set ';
        my ( @sets, @args );

        if ( $sampstat ) {
            push @sets, 'sampstat=?';
            push @args, $sampstat;
        }

        if ( $germplasm_type ) {
            push @sets, 'germplasm_type=?';
            push @args, $germplasm_type;
        }

        if ( $comments ) {
            push @sets, 'comments=?';
            push @args, $comments;
        }

        $sql .= join(', ', @sets) . 'where div_passport_id=?';
        push @args, $div_passport_id;

        $db->do( $sql, {}, @args );
    }

    return $div_passport_id;
}

# ----------------------------------------------------
sub find_or_create_taxonomy {

=pod

=head2 find_or_create_taxonomy

  my $taxonomy_id = $div_db->find_or_create_taxonomy(
      genus       => 'Oryza',
      species     => 'sativa',
      subspecies  => 'japonica' # optional 
      common_name => 'Rice'     # optional
  ) or die $div_db->error;

Or:

  my $taxonomy_id = $div_db->find_or_create_taxonomy(
      'Oryza sativa japonica'
  ) or die $div_db->error;

Finds or creates a species.

=cut

    my $self = shift;

    my ( $taxonomy_id, $genus, $species, $subspecies, $common_name ) = 
        $self->get_taxonomy_id( @_ ) or return;

    unless ( $taxonomy_id ) {
        my $db = $self->db;
        $db->do(
            q[
                insert
                into   div_taxonomy 
                       ( genus, species, subspecies, common_name )
                values ( ?, ?, ?, ? )
            ],
            {},
            ( $genus, $species, $subspecies, $common_name )
        );

        $taxonomy_id = $db->selectrow_array('select last_insert_id()');
    }

    return $taxonomy_id;
}

# ----------------------------------------------------
sub get_passport_id {

=pod

=head2 get_passport_id

  my $passport_id = $div_db->get_passport_id(
      accename        => 'W1519' # or just "name"
      accenumb        => '105159',
      source          => 'IRGC',
      div_taxonomy_id => 13,
  ) or die $div_db->error;

Returns a passport id.

=cut

    my ( $self, %args ) = @_;
    my $name            = $args{'name'} || $args{'accename'} || '';
    my $accenumb        = $args{'accenumb'}        || '';
    my $source          = $args{'source'}          || '';
    my $div_taxonomy_id = $args{'div_taxonomy_id'} || 0;
    my $db              = $self->db;

    return $self->error('Not enough arguments to find passport') 
        unless $name && $div_taxonomy_id;

    my ( $id, $genus, $species ) = $db->selectrow_array(q'
        select div_taxonomy_id, genus, species
        from   div_taxonomy
        where  div_taxonomy_id=?',
        {},
        ( $div_taxonomy_id )
    );

    return $self->error("Bad taxonomy id ($div_taxonomy_id)") unless $id;

    if ( lc $genus eq 'oryza' && lc $species eq 'sativa' ) {
        $name = iris_normalize( $name ); 
    }

    my $sql = q[
        select div_passport_id
        from   div_passport
        where  accename=?
    ];

    my @args = ( $name );

    if ( defined $accenumb ) {
        $sql .= ' and accenumb=? ';
        push @args, $accenumb;
    }

    if ( defined $source ) {
        $sql .= ' and source=? ';
        push @args, $source;
    }

    if ( defined $div_taxonomy_id ) {
        $sql .= ' and div_taxonomy_id=? ';
        push @args, $div_taxonomy_id;
    }

    my $div_passport_id = $db->selectrow_array( $sql, {}, @args );

    return wantarray 
        ? ( $div_passport_id, $name, $source, $accenumb, $div_taxonomy_id ) 
        : $div_passport_id;
}

# ----------------------------------------------------
sub get_taxonomy_id {

=pod

=head2 get_taxonomy_id

  my $taxonomy_id = $div_db->get_taxonomy_id(
      genus       => 'Oryza',
      species     => 'sativa',
      subspecies  => 'japonica' # optional 
      common_name => 'Rice'     # optional
  ) or die $div_db->error;

Or:

  my $species_id = $div_db->get_taxonomy_id(
      'Oryza sativa japonica'
  ) or die $div_db->error;

Returns the taxonomy_id of a species.

=cut

    my $self = shift;

    my ( $genus, $species, $subspecies, $common_name );
    if ( @_ == 1 ) {
        ( $genus, $species, $subspecies ) = split( /\s+/, shift(), 3 );
    }
    else {
        my %args     = @_;
        $genus       = $args{'genus'}       || '';
        $species     = $args{'species'}     || '';
        $subspecies  = $args{'subspecies'}  || '';
        $common_name = $args{'common_name'} || '';
    }

    return $self->error('Please supply at least genus and species')
        unless $genus && $species;

    my $db  = $self->db;
    my $sql = sprintf(
        q[
            select div_taxonomy_id
            from   div_taxonomy
            where  genus=?
            and    species=?
            %s
        ],
        $subspecies ? 'and subspecies=?' : ''
    );
    my @args = ( $genus, $species );
    push @args, $subspecies if $subspecies;

    my $taxonomy_id = $db->selectrow_array( $sql, {}, @args );

    return wantarray 
        ? ( $taxonomy_id, $genus, $species, $subspecies, $common_name ) 
        : $taxonomy_id;
}

# ----------------------------------------------------
sub passport_search {

=pod

=head2 passport_search

  my $germplasm    = $div_db->passport_search(
      search_field         => 'accename', # or undef to search all fields
      search_value         => 'IR 64, IR36',
      genus                => 'Oryza',
      order_by             => 'accename',
      similarity_threshold => .6, # default is .3
  ) or die $div_db->error;

Returns all the passport matching the criteria.  Automatically normalizes
the search value and adds as additional terms.  Separate multiple search
values with commas (spaces can't be used as they are parts of valid 
identifiers, e.g., "IR 64").

=cut

    my ( $self, %args ) = @_;
    my $search_value    = $args{'search_value'} || '';
    my $search_field    = $args{'search_field'} || '';
    my $genus           = $args{'genus'}        || 0;
    my $order_by        = $args{'order_by'}     || 'similarity';
    my $sim_threshold   = $args{'similarity_threshold'} || 0.3;

    $order_by = 'accename,genus,species' if $order_by eq 'accename';
    $order_by = 'genus,species' if $order_by eq 'genus';

    my $sql = qq[
        select   p.div_passport_id,
                 p.div_taxonomy_id,
                 p.accename,
                 p.source,
                 p.accenumb,
                 t.genus,
                 t.species,
                 t.subspecies,
                 '1.00' as similarity
        from     div_passport p, div_taxonomy t
        where    p.div_taxonomy_id=t.div_taxonomy_id
    ];

    # split on commas, trim whitespace, unique and uppercase values
    my %search_values = map { s/^\s+|\s+$//g; uc $_, 1 } 
        split( /,/, $args{'search_value'} || '' );

    # add any normalized terms that differ from the original
    for my $v ( keys %search_values ) {
        $search_values{ iris_normalize( $v ) }++;
    }

    my @fields = qw[
        accename source accenumb common_name genus species subspecies
    ];

#    my @wheres;
#    for my $v ( keys %search_values ) {
#        my $cmp = ( $v =~ s/\*/%/g ) ? 'like' : '=';
#        my $all    = join( '|', @fields );
#        if ( $search_field =~ /^($all)$/ ) {
#            push @wheres, "$search_field $cmp '$v' ";
#        }
#        else {
#            push @wheres, join( ' or ', map { " $_ $cmp '$v' " } @fields );
#        }
#    }
#
#    if ( @wheres ) {
#        $sql .= ' and ((' . ( shift @wheres ) . ') ';
#        $sql .= " or ($_) " for @wheres;
#        $sql .= ') ';
#    }

    $sql .= "and t.genus='$genus' " if $genus;
    $sql .= "order by $order_by";

    my $db  = $self->db or return;
    my $all = $db->selectall_arrayref( $sql, { Columns => {} } );

    my @passport = similarity_search(
        search_values => [ keys %search_values ],
        search_fields => [ $search_field || @fields ],
        threshold     => $sim_threshold,
        data          => $all,
    );

#    my @passport;
#    # get rid of wildcards now
#    if ( 
#        my @search_values = map { 
#            s/\*//g; s/^\s+|\s+$//g; $_ || () 
#        } keys %search_values 
#    ) {
#        for my $p ( @$all ) {
#            my @scores;
#            for my $s1 ( @search_values ) {
#                for my $s2 (
#                    map { $p->{ $_ } || '' } ( $search_field || @fields )
#                ) {
#                    my $score = sprintf( "%.02f", similarity( $s1, $s2 ) );
#                    if ( $score == 1 ) {
#                        @scores = ( $score );
#                        last;
#                    }
#                    else {
#                        push @scores, $score if $score >= $sim_threshold;;
#                    }
#                }
#
#                last if scalar @scores == 1 && $scores[0] == 1;
#            }
#
#            # take the highest score
#            if ( @scores ) {
#                $p->{'similarity'} = ( sort { $b <=> $a } @scores )[0];
#                push @passport, $p;
#            }
#        }
#    }
#    else {
#        @passport = @$all;
#    }

    if ( $order_by eq 'similarity' ) {
        @passport = sort { 
            $b->{'similarity'} <=> $a->{'similarity'} ||
            $a->{'accename'}   cmp $b->{'accename'} 
        } @passport;
    }

    return wantarray ? @passport : \@passport;
}

1;

# ----------------------------------------------------

=pod

=head1 SEE ALSO

Gramene::Diversity::DB.

=head1 AUTHOR

Ken Youens-Clark E<lt>kclark@cshl.eduE<gt>.

=head1 COPYRIGHT

Copyright (c) 2005 Cold Spring Harbor Laboratory

This library is free software;  you can redistribute it and/or modify 
it under the same terms as Perl itself.

=cut
