package GDPDM::Utils;

# vim: set ts=4 sw=4 et:

# $Id: Utils.pm,v 1.3 2006/01/30 15:35:13 kycl4rk Exp $

use strict;
use Carp qw( croak );
use GDPDM::Config qw( get_config );
use List::Util qw( max );
use Log::Dispatch::File;
use String::Similarity;

use Exporter 'import';

use vars qw( @EXPORT_OK );

@EXPORT_OK = qw( table_name_to_class get_logger similarity_search );

# --------------------------------------------------------
sub table_name_to_class {
    my $table_name = shift or croak 'No table name';

    return join( '::', 'GDPDM', 'CDBI',
        join( '', map { ucfirst( lc( $_ ) ) } split( /_/, $table_name ) )
    );
}

# --------------------------------------------------------
sub get_logger {
    my $opts      = shift || {};
    my $config    = get_config();
    my $log_file  = $config->{'log'}{'file'} or croak 'No log file defined';
    my $log_level = $opts->{'log_level'} || $config->{'log'}{'level'} || 'warn';

    if ( -e $log_file && !-r _ ) {
        croak "Log file '$log_file' not writable";
    }

    my $logger = Log::Dispatch->new;
    $logger->add( 
        Log::Dispatch::File->new(
            name      => 'file',
            min_level => $log_level,
            filename  => $log_file,
            mode      => 'append',
            callbacks => sub { 
                my %p = @_; 
                return join '', 
                    '[', scalar localtime, ']: ', $p{'message'}, "\n";
            },
        )
    );

    return $logger;
}

# ----------------------------------------------------
sub similarity_search {

=pod

=head2 similarity_search

  my @matches = similarity_search(
      data          => \@data,    # arrayref of hashrefs
      threshold     => 0.5,       # a number 0-1, default 0.3
      search_fields => [ 'foo' ], # keys in the hashrefs
      search_values => [ 'bar' ]  # search strings
  );

Returns all the members of "data" where any of the "search_fields"
match any of the "search_values" with a similarity greater than or 
equal to the "threshold."  Each returned member will have a new 
field called "similarity" with a value of the highest similarity 
score for any of the search values in any of the search fields.

=cut

    my %args          = @_;
    my @search_values = @{ $args{'search_values'} || [] };
    my @search_fields = @{ $args{'search_fields'} || [] };
    my $threshold     =    $args{'threshold'}     || 0.3;
    my $data          =    $args{'data'};

    croak 'No data' unless UNIVERSAL::isa( $data, 'ARRAY' ) && @$data;

    # get rid of wildcards, trim values
    @search_values = map { s/\*//g; s/^\s+|\s+$//g; $_ || () } @search_values;

    # if nothing to search for, everything is a match, so bail
    unless ( @search_values ) {
        $_->{'similarity'} = 1 for @$data;
        return wantarray ? @$data : $data;
    }
    
    unless ( @search_fields ) {
        die 'Data not hashref' unless UNIVERSAL::isa( $data->[0], 'HASH' );
        @search_fields = keys %{ $data->[0] };
    }

    if ( !@search_fields ) {
        croak "Can't determine search fields\n";
    }

    my @return;
    for my $t ( @$data ) {
        my @scores;
        for my $s1 ( @search_values ) {
            for my $s2 ( map { $t->{ $_ } } @search_fields ) { 
                next unless $s2;
                my $score = sprintf( "%.02f", similarity( $s1, $s2 ) );
                if ( $score == 1 ) {
                    @scores = ( $score );
                    last;
                }
                else {
                    push @scores, $score if $score >= $threshold;;
                }
            }

            last if scalar @scores == 1 && $scores[0] == 1;
        }

        # take the highest score
        if ( @scores ) {
            $t->{'similarity'} = max @scores;
            push @return, $t;
        } 
    }

    return wantarray ? @return : \@return;
}

1;

# --------------------------------------------------------

__END__

=pod

=head1 NAME

GDPDM::Utils - exported utility functions

=head1 SYNOPSIS

  use GDPDM::Utils qw( table_name_to_class );

=head1 DESCRIPTION

Exported utility functions.

=head1 EXPORTABLE METHODS

=head2 table_name_to_class

  my $class = table_name_to_class('cdv_marker');
  # $class now has 'GDPDM::CDBI::CdvMarker'

Turns "foo_barbaz" into "GDPDM::CDBI::FooBarbaz"

=head2 get_logger

  my $logger = get_logger( log_level => 'debug' );
  $logger->info("Setting db to '$db_name'");

Returns a Log::Dispatch::File logger.

=head1 AUTHOR

Ken Youens-Clark E<lt>kclark@cshl.eduE<gt>.

=head1 COPYRIGHT

Copyright (c) 2006 Cold Spring Harbor Laboratory

This library is free software;  you can redistribute it and/or modify 
it under the same terms as Perl itself.

=cut
