##############################################################################
#
#   Description:    Subclass of Bio::EnsEMBL::ExternalData::Disease::DBHandler
#                   extended to support keyword lookups
#
#   Author:         jws
#                   Disease objects by Arek
#
#   History:        2000-10-01
#
##############################################################################

package DiseaseHandler;

=head1 NAME

DiseaseHandler

=head1 SYNOPSIS

Subclass of Bio::EnsEMBL::ExternalData::Disease::DBHandler to support keyword lookups.

Does: 
my $diseasedb = new DiseaseHandler( -user   => 'ensembl', 
                                    -dbname => 'disease',
                                    -host   => 'myhost.ebi.ac.uk',
                                    -ensdb  => $ensembldb,
                                   );



my @diseases=$diseasedb->diseases_on_chromosome(22);
my @diseases=$diseasedb->diseases_without_genes;
my @diseases=$diseasedb->all_diseases;
my $disease =$diseasedb->disease_by_name("DiGeorge syndrome (2)");
my @diseases=$diseasedb->diseases_like("corneal");

as per superclass, but also does:

my @disease_names = $diseasedb->disease_names_by_keyword('cancer bowel');
my @diseases      = $diseasedb->diseases_by_keyword('cancer bowel');

=head1 AUTHOR

James Stalker - jws@sanger.ac.uk

=cut

use DiseaseKwIndex;
use Bio::EnsEMBL::ExternalData::Disease::DBHandler;
@ISA =('Bio::EnsEMBL::ExternalData::Disease::DBHandler');


sub new {
    my($class,@args) = @_;

    my $self=$class->SUPER::new(@args);
    
    my $dbh = DiseaseKwIndex->new({
                                    dbh => $self->_db_handle,
                                    index_name => 'disease_index',
                                    });
                                    
    $dbh || $self->throw("Could not connect to index");
    $self->_index_handle($dbh);

    $self->update_index;
    
    return $self;
}
        


sub disease_names_by_keyword {

    my ($self,$keywords,$offset,$count)=@_;
    $keywords || $self->throw("I need some keywords");
    if ($offset||$offset == 0){
        $offset+=1;             # because KwIndex uses a 1-based offset...
    }
    
    my $index=$self->_index_handle();
    
    my $arr_ref=$index->search({words   => $keywords,
                                boolean => 'AND',
                                num     => $count,
                                start   => $offset
                                });
    $self->throw($index->(ERROR)) if !defined($arr_ref);
    print STDERR  join(',', @$arr_ref),"\n";

    if (scalar(@$arr_ref)){
        my $query_string= "select disease from disease where disease.id in (".join(',', @$arr_ref).")";
        return $self->_get_disease_names($query_string);
    }
    else {
        return ();
    }
    
}



sub diseases_by_keyword {

    my ($self,$keywords,$offset,$count)=@_;
    $keywords || $self->throw("I need some keywords");
    if ($offset||$offset == 0){
        $offset+=1;             # because KwIndex uses a 1-based offset...
    }
    
    my $index=$self->_index_handle();
    
    my $re=0;
    if ($keywords=~m/\*/){
        $re=1;
        print STDERR "REGEX on $keywords\n"
        };
    
    my $arr_ref;
    
    if ($re){
        $arr_ref=$index->search({words  => $keywords,
                                    re  => 1,
                                    num     => $count,
                                    start   => $offset
                                });
    }
    else {
        $arr_ref=$index->search({words  => $keywords,
                                boolean => 'AND',
                                num     => $count,
                                start   => $offset
                                });
    }
    

    $self->throw($index->(ERROR)) if !defined($arr_ref);
    print STDERR "DH disease",join(',', @$arr_ref),"\n";

    if (scalar @$arr_ref){
        my $id_string=join(',',@$arr_ref);

        my $query_string= "SELECT d.disease,g.id,g.gene_symbol,g.omim_id,g.start_cyto,g.end_cyto, g.chromosome 
                            FROM disease AS d,gene AS g 
                            WHERE g.id=d.id 
                            AND d.id IN ($id_string)";

        return $self->_get_disease_objects($query_string);
        
    }

    return ();
}



sub diseases_by_keyword_count {

    my ($self,$keywords)=@_;
    $keywords || $self->throw("I need some keywords");
    my $index=$self->_index_handle();
    
    my $re=0;
    if ($keywords=~m/\*/){
        $re=1;
        print STDERR "REGEX on $keywords\n"
        };
    
    my $arr_ref;
    
    if ($re){
        $arr_ref=$index->search({words  => $keywords,
                                    re  => 1,
                                });
    }
    else {
        $arr_ref=$index->search({words  => $keywords,
                                boolean => 'AND',
                                });
    }
    

    my $count=scalar(@$arr_ref);
    return $count;
}


sub update_index {
    my $self=shift;
    my ($disease, $indexes)=$self->_db_handle->selectrow_array('SELECT disease, indexes from last_update');
    unless ($indexes>=$disease){
        $self->load_index;
        # this fires the timestamp on the index field
        my $sth=$self->_db_handle->prepare('UPDATE last_update SET disease=disease,indexes=NULL');
        $sth->execute;
    }
}
    



sub load_index {
    my $self=shift;
    my $index=$self->_index_handle();
    $index->empty_index;
    my ($max_id)=$self->_db_handle->selectrow_array('SELECT MAX(id) FROM disease');
    $index->add_document([1..$max_id]) or $self->throw($index->(ERROR));
    print STDERR "Loading disease index - $max_id documents\n";
}




sub _index_handle{
  my ($self,$value) = @_;
  if( defined $value) {$self->{'_index_handle'} = $value;}
  
  return $self->{'_index_handle'};
}

1;
