#!/lab/bin/perl

use strict;
package IndexedFasta;



sub new{	
  my $self={};
  bless $self;
  return $self;
}
my $hello="HELLO";
sub do_sequence_search{
  my $sequence;
  my $self=shift;
  my %args=(
	    START=>undef,
	    STOP=>undef,
	    SEQ_ID=>'',
	    FASTA=>'',
	    INDEX=>'',
	    @_,
	   );
  
  my $begin=$args{START};
  my $end=$args{STOP};
  ($begin,$end)=($end,$begin) if $end<$begin;
  my $seq_id=$args{SEQ_ID};
  open(FASTA_FILE,"$args{FASTA}") or  die "Can't open the file: $args{FASTA}. $!\n";
  my $index_name=$args{INDEX}; 
  $index_name=$self->get_default_index($args{FASTA}) unless $index_name;
  unless (-e $index_name){$self->build_index_file(FASTA=>$args{FASTA},INDEX=>$index_name);}
  open(INDEX_FILE,"$index_name") or  die "Can't open the file: INDEX. $!\n";
  
  #get the byte-offset for the sequence in the fasta file
  while (<INDEX_FILE>) {
    my ($id,$curr_offset,$curr_size,$header_size)=split (/\t/);
    if ($id eq $seq_id){
      $curr_size-=$header_size;
      seek(*FASTA_FILE, $curr_offset, 0) or die "did not find $id in $args{FASTA}\n"; 
      read(*FASTA_FILE, $sequence,$header_size);  
      chomp $sequence; 
      $self->header($sequence);
      if (defined $begin && defined $end){
	$curr_offset=$begin;
	my $temp_size=$end-$begin;
	$curr_size=$temp_size if $temp_size<$curr_size;
	seek(*FASTA_FILE, $curr_offset, 1) or die "did not find $id in $args{FASTA}\n";  
	read(*FASTA_FILE, $sequence,$curr_size);   
	chomp $sequence;  
	$self->sequence($sequence);
	last; 
      } 
      else{
	seek(*FASTA_FILE, $curr_offset, 0) or die "did not find $id in $args{FASTA}\n";
	read(*FASTA_FILE, $sequence,$curr_size); 
	chomp $sequence;
	$self->sequence($sequence);
	last;
      }
    }   
  }
  
  close (FASTA_FILE) or die "Couldn't close IN_FILE";
  close (INDEX_FILE) or die "Couldn't close INDEX_FILE"; 
}

sub get_default_index{
  my $self=shift;
  my $index_name=shift;
  $index_name=~s/\..*$//;
  $index_name.='.index';
  return $index_name;
}

sub header{
  my $self=shift;
  if(@_){$self->{HEADER}=shift;}
  return $self->{HEADER};
}
sub sequence{
  my $self=shift;
  if(@_){
    my $seq=shift;
    $seq=uc($seq);
    $self->{SEQUENCE}=$seq;
  }
  return $self->{SEQUENCE};
}

sub build_index_file{
  my $self=shift;
  my %args=(
	    FASTA=>'',
	    INDEX=>'',
	    @_,
	   ); 
  
  my ($offset,$accession,$sequence,$header_size);
  
  open(FASTA,"$args{FASTA}") or  die "Can't open the file: $args{FASTA}. $!\n";
  my $index_name=$args{INDEX};   
  $index_name=$self->get_default_index($args{FASTA}) unless $index_name;
  open(INDEX,">>$index_name") or  die "Can't open the file: INDEX. $!\n";
  
   while (<FASTA>) {
    if (m/^>/){
      my $size=length $sequence;
      print INDEX "$accession\t$offset\t$size\t$header_size\n" if $accession;
      $sequence='';
      $header_size=length $_;
      #get the pure Accession, without the version number.
      ($accession)= (m/^>.+\|(.+?)\|.+$/);
      ($accession)=(m/^>.+?\|(.+?)\s?/) unless $accession; 
      ($accession)=(m/^>(.*)$/) unless $accession;
      $accession=~s/\.\d+//;
      $offset = tell(*FASTA)-length $_;
    }
    #  else{
    $sequence.=$_;
    # }
  }
  my $size=length $sequence;
  print INDEX "$accession\t$offset\t$size\t$header_size\n" if $accession;
  
  close (FASTA) or die "Couldn't close IN_FILE";
  close (INDEX) or die "Couldn't close INDEX_FILE"; 
}
