#!/usr/local/bin/perl

# $Id: quick_search,v 1.11.2.2 2007/06/11 16:48:56 kclark Exp $

use strict;
use CGI;
use Data::Dumper;
use Fcntl;
use File::Spec::Functions;
use Gramene::Config;
use Gramene::DB;
use Gramene::Marker::DB;
use Gramene::Page;
use Gramene::Utils qw( get_logger iterative_search_values commify 
    pager similarity_search table_name_to_gramene_cdbi_class );
use Readonly;
use Template;

use lib '/usr/local/ensembl-live/ensembl/modules';
use Bio::EnsEMBL::DBSQL::DBAdaptor;

Readonly my $AT_SIGN                 => q{@};
Readonly my $DBL_COLON               => q{::};
Readonly my $EMPTY_STR               => q{};
Readonly my $END_TOKEN               => chr(6);
Readonly my $START_TOKEN             => chr(7);
Readonly my $ITEMS_PER_PAGE          => 25;
Readonly my $MAX_CHARS_MATCH_CONTEXT => 40;
Readonly my $MIN_SEARCH_LEN          => 3;
Readonly my $PIPE                    => q{\|};
Readonly my $SLASH                   => q{/};
Readonly my $SPACE                   => q{ };
Readonly my $VERSION        
    => sprintf "%d.%02d", q$Revision: 1.11.2.2 $ =~ /(\d+)\.(\d+)/;

delete @ENV{qw(IFS CDPATH ENV BASH_ENV)};
$ENV{'PATH'} = '/bin:/usr/bin:/usr/local/bin';

use vars qw( @Modules $Page );

my $q           = CGI->new;
my $output_type = 'text/html';
my $Logger      = get_logger;

my ( $t, $html );
eval { 
#    $Page          ||= Gramene::Page->new( Apache->request );

    my $r;
    if ( $ENV{'MOD_PERL'} ) {
        $r = Apache->request;
    }
    else {
        require Apache::FakeRequest;
        $r = Apache::FakeRequest->new;
    }
    $Page          ||= Gramene::Page->new( $r );
    my $search_for   = $q->param('search_for') 
                    || $q->param('query') 
                    || $EMPTY_STR;
    my $action       = !$q->param('action') 
                       ? $search_for ? 'list' : 'home'
                       : $q->param('action');
    my $cfile        = Gramene::Config->new;
    my $modules_conf = $cfile->get('modules');
    my $search_conf  = $cfile->get('gramene_search');
    my $template_dir = $search_conf->{'template_dir'} or die 'No template dir';
    my $search_db    = Gramene::DB->new('gramene_search');

    @Modules         
        = grep { !/gramene_search/ }
        ref $modules_conf->{'module'} eq 'ARRAY'
        ? @{ $modules_conf->{'module'} } 
        : ( $modules_conf->{'module'} );
    push @Modules, 'documents';

    my $display_order = 1;
    my %module_display_order 
        = map { $_, $display_order++ } 
        ( split /,/, $search_conf->{'module_display_order'} );

    for my $module ( @Modules ) {
        $module_display_order{ $module } ||= $display_order++;
    }

    @Modules = sort { 
        $module_display_order{ $a } <=> $module_display_order{ $b }
    } @Modules;

    my %module_search_url = %{ $search_conf->{'module_search_url'} || {} };

    $t               = Template->new( 
        INCLUDE_PATH => $template_dir,
        WRAPPER      => 'wrapper.tmpl',
        VARIABLES    => {
            cgi               => $q,
            gramene_page      => $Page,
            modules           => \@Modules,
            module_search_url => \%module_search_url,
        },
    );

    if ( $action eq 'list' ) {
        my ( $search_results, $errors ) = search({ 
            $q->Vars,
            db          => $search_db, 
            search_conf => $search_conf, 
            search_for  => $search_for, 
            config      => $cfile,
        });

        $t->process(
            'list.tmpl',
            {
                search_results => $search_results,
                errors         => $errors,
                view_link      => $search_conf->{'view_link'},
            },
            \$html
        ) or $html = $t->error;
    }
    elsif ( $action eq 'view' ) {
        my $module    = $q->param('module') or die 'No module name';
        my $object    = $q->param('object') or die 'No object name';
        my $id        = $q->param('id')     or die 'No id';
        my $class     = table_name_to_gramene_cdbi_class( $module, $object )
                        or die "Invalid object ($module.$object)";
        ( my $base    = $class ) =~ s/::\w+$//;

        eval "use $base";

        my @columns   = $class->columns('Ordered');
        my $item      = $class->retrieve( $id ) or die "Bad id '$id'"; 
        my %has_a     = meta_parse( $class->meta_info('has_a') );
        my $has_many  = $class->meta_info('has_many');

        my @link_baskets;
        LINK_TABLE:
        for my $method ( keys %$has_many ) {
            my $link_class   = $has_many->{$method}{'foreign_class'};
            my $fk_field     = $has_many->{$method}{'args'}{'foreign_key'};
            my @link_columns = $link_class->columns('Essential');
            my %link_has_a   = meta_parse( $link_class->meta_info('has_a') );
            my $link_table   = $link_class->table;

            my $entries_per_page = 10;
            my $pager            = $link_class->pager( $entries_per_page, 1 );
            my @items            = $pager->search( $fk_field => $id );

            next LINK_TABLE if scalar @items == 0;

            my $url = $q->param('url') . "?action=list&object=$link_table"
                    . "&where=$fk_field:" . $item->id;

            push @link_baskets, {
                object    => $link_table,
                items     => \@items,
                pager     => $pager,
                pager_url => $url,
                columns   => \@link_columns,
                has_a     => \%link_has_a,
                url       => $url,
            };
        }

        my $view_template =  "view-${object}.tmpl";
        my $template      = -e catfile( $template_dir, $view_template )
                             ? $view_template : 'view.tmpl';

        $t->process(
            $template,
            {
                module       => $module,
                item         => $item,
                columns      => \@columns,
                has_a        => \%has_a,
                link_baskets => \@link_baskets,
            },
            \$html
        ) or $html = $t->error;
    }
    else {
        $t->process( 'home.tmpl', {}, \$html ) or $html = $t->error;
    }
};

#
# Error handler
#
if ( $@ ) {
    if ( $t ) {
        $t->process(
            'error.tmpl',
            {
                title   => 'Error',
                err_msg => $@,
            },
            \$html
        ) or $html = $t->error;
    }
    else {
        $html = $@;
    }
}

my %headers = ( -content_type => $output_type );

print $q->header( %headers ), $html;
exit 0;

# ----------------------------------------------------
sub search {
    my $args           = shift;
    my $db             = $args->{'db'}          or die 'No search db';
    my $search_conf    = $args->{'search_conf'} or die 'No search config';
    my $config         = $args->{'config'}      or die 'No config';
    my $search_value   = $args->{'search_for'}  or die 'Missing search value';
    my $search_module  = $args->{'module'}     || $EMPTY_STR;
    my $related_to     = $args->{'related_to'} || $EMPTY_STR;
    my $where          = $args->{'where'}      || $EMPTY_STR;
    my $order_by       = $args->{'order_by'}   || $EMPTY_STR;
    my $page_num       = $args->{'page'}       || $EMPTY_STR;
    my %object_display_name = %{ $search_conf->{'object_display_name'} || {} };

    if ( length $search_value < $MIN_SEARCH_LEN ) {
        die "Sorry, the search value &quot;$search_value&quot; is too short.",
            "<br>Please try again with a string at least ",
            "$MIN_SEARCH_LEN characters long.\n"
    }

    $search_value =~ s/%/*/g; # change SQL wildcard to asterisk for FULLTEXT
    $search_value =~ s/^\s+|\s+$//g; # trim

    my $sql = sprintf(
        qq[
            select module_name, table_name, record_id, record_text,
                   match(record_text) against ('%s') as score
            from   module_search
            where  match(record_text) against ('%s' in boolean mode)
            %s
            order by score desc
        ],
        $search_value,
        $search_value,
        $search_module ? "and module_name='$search_module'"   : $EMPTY_STR
    );

    my $sth = $db->prepare( $sql );
    $sth->execute;

    my $entries_per_page = 10;
    my ( %basket, %error );
    while ( my $r = $sth->fetchrow_hashref ) {
        push @{ $basket{ $r->{'module_name'} }{ $r->{'table_name'} }{'data'} },
            $r;
    }

    if ( !$search_module || $search_module eq 'markers' ) {
        my $mdb = Gramene::Marker::DB->new;
        my @markers;
        for my $v ( 
            iterative_search_values($search_value, {no_leading_wildcard => 1 }) 
        ) {
            @markers = $mdb->marker_search( marker_name => $v );
            last if @markers;
        }

        for my $marker ( @markers ) {
            push @{ $basket{'markers'}{'marker'}{'data'} }, {
                record_id   => $marker->{'marker_id'},
                record_text => join($SPACE, 
                    $marker->{'species'},
                    $marker->{'marker_type'},
                ),
            };
        }
    }

    my @ensembl_modules 
        = !$search_module 
        ? grep { /^ensembl_/ } @Modules
        : $search_module =~ /^ensembl_/ 
            ? $search_module
            : ()
    ;

    ENS_MODULE:
    for my $ens_module ( @ensembl_modules ) {
        my $ens_conf = $config->get( $ens_module ) or next ENS_MODULE;
        my ($scheme, $driver, $attr_string, $attr_hash, $driver_dsn) 
            = DBI->parse_dsn($ens_conf->{'db_dsn'});

        my $host    = 'localhost';
        my $db_name = $driver_dsn;
        if ( $driver_dsn =~ /database=([^;]+)/ ) {
            $db_name = $1;
        }
        if ( $driver_dsn =~ /host=([^;]+)/ ) {
            $host = $1;
        }

        eval {
            my $dba = Bio::EnsEMBL::DBSQL::DBAdaptor->new(
                -user   => $ens_conf->{'db_user'},
                -pass   => $ens_conf->{'db_pass'},
                -dbname => $db_name,
                -host   => $host,
                -driver => $driver,
            );

            if ( my $feat_adaptor = $dba->get_adaptor('DnaAlignFeature') ) {
                my @features = @{ 
                    $feat_adaptor->fetch_all_by_hit_name( $search_value ) 
                };

                my %seen;
                for my $f ( @features ) {
                    next if $seen{ $f->hseqname }++;

                    push @{$basket{ $ens_module }{'dna_align_feature'}{'data'}},
                    {
                        record_id   => $f->dbID,
                        record_text => join( $SPACE, 
                            $f->hseqname,
                            $f->analysis->display_label 
                                || $f->analysis->logic_name,
                            $f->feature_Slice->name
                        ),
                    };
                }
            }

            if ( my $marker_adaptor = $dba->get_adaptor('Marker') ) {
                my @markers = @{ 
                    $marker_adaptor->fetch_all_by_synonym( $search_value ) 
                };

                for my $marker ( @markers ) {
                    push @{ $basket{ $ens_module }{'marker'}{'data'} }, {
                        record_id   => $marker->dbID,
                        record_text => join( $SPACE,
                            map { $_->name() } 
                            @{ $marker->get_all_MarkerSynonyms }
                        ),
                    };
                }
            }
        };

        if ( my $err = $@ ) {
            $error{ $ens_module } = $err;
        }
    }

    for my $module ( keys %basket ) {
        next if $error{ $module };

        my %list_column_conf = %{ $search_conf->{'list_columns'} || {} };
        my %view_link_conf   = %{ $search_conf->{'view_link'}    || {} };

        while ( my ( $table, $basket ) = each %{ $basket{ $module } } ) {
            next if ref $basket ne 'HASH';
            my $view_link = $view_link_conf{"$module.$table"};
            my @list_columns     
                = split(/,/, $list_column_conf{"$module.$table"} || $EMPTY_STR);
            my $items = defined $basket->{'data'} ? $basket->{'data'} : [];
            my $pager = Data::Page->new( scalar @$items, 10, $page_num );
            $items    = [ $pager->splice( $items ) ];
            my $class = table_name_to_gramene_cdbi_class( $module, $table );
            ( my $base = $class ) =~ s/::\w+$//;

            eval "use $base";

            ITEM:
            for my $record ( @$items ) {
                my $object;
                eval { $object = $class->retrieve( $record->{'record_id'} ) };

                if ( my $err = $@ ) {
                    $error{ $module } .=
                        "Can't retrieve $class $record->{record_id}: $err\n";
                    next ITEM;
                };

                if ( $module eq 'markers' && $table eq 'marker' ) {
                    $record->{'record_text'} .= join( $SPACE,
                        $EMPTY_STR,
                        map { $_->marker_name } $object->marker_synonyms
                    );
                }

                my @view_urls;
                for my $tmpl ( split( /$PIPE/, $view_link ) ) {
                    my $link_text = 'View';
                    my $url       = $EMPTY_STR;

                    if ( $tmpl =~ /([^@]+)@(.+)/ ) {
                        $link_text = $1;
                        $tmpl      = $2;
                    }

                    my @sub_values;
                    while ( $tmpl =~ /\[(.*?)\]/g ) {
                        my ( $cur_class, @methods ) = split /\./, $1;
                        my $cur_obj     = $object;
                        my $last_method = pop @methods;

                        for (;;) {
                            if ( my $method = shift @methods ) {
                                ( $cur_obj ) = $cur_obj->$method();
                            }

                            if ( scalar @methods == 0 ) {
                                push @sub_values, $cur_obj->$last_method();
                                last;
                            }
                        }
                    }

                    if ( !@sub_values ) {
                        @sub_values = $object->id;
                        $tmpl      .= '[id]';
                    }

                    $tmpl =~ s/\[.*?\]/%s/g;
                    $url  = sprintf( $tmpl, @sub_values );

                    push @view_urls, {
                        link_text => $link_text,
                        url       => $url,
                    } if $url;
                }

                push @{ $basket->{'items'} }, {
                    object    => $object,
                    score     => $record->{'score'},
                    view_urls => \@view_urls,
                    context   => match_context(
                        $search_value, $record->{'record_text'}
                    ),
                };
            }

            $basket->{'pager'}       = $pager;
            $basket->{'columns'}     = 
                @list_columns ? \@list_columns : [$class->columns('Essential')];
            $basket->{'has_a'}       = meta_parse( $class->meta_info('has_a') );
            $basket->{'object_name'} 
                =  $object_display_name{ "$module.$table" }
                || $class->object_type;
            $basket->{'pager_url'}
                = $q->url . "?action=list&module=$module&object=$table"
                . "&search_for=$search_value";
        }
    }

    if ( 
        !$search_module || ($search_module && $search_module eq 'documents')
    ) {
        my $doc_sql = sprintf(
            qq[
                select   path, title, contents,
                         match(path, title, contents) against ('%s') as score
                from     doc_search
                where    match(path, title, contents) 
                against  ('%s' in boolean mode)
                order by score desc
            ],
            $search_value,
            $search_value,
        );

        my $items = $db->selectall_arrayref( $doc_sql, { Columns => {} } );

        if ( @$items > 0 ) {
            my $pager = Data::Page->new( scalar @$items, 10, $page_num );
            $items    = [ $pager->splice( $items ) ];

            for my $item ( @$items ) {
                $item->{'context'} 
                    = match_context( $search_value, $item->{'contents'} );
            }

            $basket{'documents'}{'files'} =  {
                items       => $items,
                pager       => $pager,
                object_name => 'Document',
                pager_url   => $q->url
                    . "?action=list&module=documents&search_for=$search_value",
            };
        }
    }

    return ( \%basket, \%error );
}

# ----------------------------------------------------
sub meta_parse {
    my $meta = shift;
    my %return;
    while ( my ( $fk_field, $meta ) = each %$meta ) {
        my $fk_class = $meta->{'foreign_class'};
        $return{ $fk_field } = $fk_class->table;
    }

    return %return;
}

# ----------------------------------------------------
sub match_context {
    my ( $find, $s ) = @_;

    # Remove punctuation used for MySQL boolean/fulltext searches, spaces
    my @find 
        = map { s/^["(\s+-]+|[")\s]+$//g; quotemeta($_) } 
        split /\s+/, $find;

    my $t = $s;
    for my $f ( @find ) {
        $s =~ s/($f)/${START_TOKEN}${1}${END_TOKEN}/gi;
    }

    #
    # For very long strings (e.g., literature abstracts), this limits
    # to just the first seven hits;  if there are that many or more,
    # this much context should help the user understand that this is 
    # a good match.
    #
    my $pos         = -1;
    my $match_num   = 0;
    my $max_matches = 7;
    my $last_match  = 0;
    while ( ( $pos = index( $s, $END_TOKEN, $pos )) > -1 ) {
        $match_num++;
        $last_match = $pos;
        if ( $match_num == 7 ) {
            my $stop = index( $s, $END_TOKEN, $pos + 1 ) || $pos + 1;
            $s = substr( $s, 0, $stop + 25 );
        }
        else {
            $pos++;
        }
    }

    # 
    # Even if there weren't too many matches, the string may still
    # be too long, so truncate it after the last match.
    # 
    if ( $match_num < $max_matches && length($s) - $last_match > 50 ) {
        $s = substr( $s, 0, $last_match + 25 );
    }

    $s =~ s/^[^$START_TOKEN]+((?:\S+\s+){2}\S*$START_TOKEN)/...$1/xms;
    $s =~ s/
        ($END_TOKEN\S*)
        ((?:\s+\S+){3})
        .{$MAX_CHARS_MATCH_CONTEXT,}?
        ((?:\S+\s+){3})
        (\S*$START_TOKEN)
        /$1$2...<br>$3$4/gxms;

    $s =~ s/$START_TOKEN/<b><span class="matching">/g;
    $s =~ s/$END_TOKEN/<\/span><\/b>/g;

    return $s;
}

# ----------------------------------------------------

=pod

=head1 NAME

diversity_view - CGI interface to GDPDM schema

=head1 DESCRIPTION

A simple CGI viewer for the GDPDM schema.

=head1 SEE ALSO

CGI, GDPDM::CDBI.

=head1 AUTHOR

Ken Youens-Clark E<lt>kclark@cshl.eduE<gt>.

=head1 COPYRIGHT

Copyright (c) 2006 Cold Spring Harbor Laboratory

This library is free software;  you can redistribute it and/or modify 
it under the same terms as Perl itself.

=cut
