# POD documentation - main docs before the code

=head1 NAME

Bio::SeqIO::ncbitrace - Creates SeqIO stream from 'trace' files

=head1 SYNOPSIS

Do not use this module directly.  Use it via the Bio::SeqIO class.

=head1 SYNOPSYS

  use Bio::SeqIO;
  use Bio::FeatureIO;

  #...EITHER...
  my $fasta_io = Bio::SeqIO->new( -format => 'fasta',
                                  -file   => 'fastainputfile' );
  #...OR...
  my $scf_io   = Bio::SeqIO->new( -format => 'scf',
                                  -file   => 'scfinputfile' );
  #...OR...
  my $qual_io  = Bio::SeqIO->new( -format => 'qual' 
                                  -file   => 'qualinputfile' );

  # ...AND EITHER...
  my $info_io = Bio::FeatureIO->new( -format => 'traceinfo',
                                     -file   => 'infoinputfile' );
  # ...OR...
  my $info_io = Bio::FeatureIO->new( -format => 'tracexml',
                                     -file   => 'xmlinputfile' );

  # ...THEN...
  my $trace_io = Bio::SeqIO->new( -format    => 'ncbitrace',
                                  -fasta     => $fasta_io, #OR
                                  -scf       => $scf_io,   #OR
                                  -qual      => $qual_io,  
                                  -traceinfo => $info_io,  #OR
                                  -tracexml  => $xml_io );

  while( my $seq = $trace_io->next_seq{
    my $feature = $seq->
  }

=head1 DESCRIPTION

This object can create Bio::Seq objects to and from files
downloaded from the NCBI Trace Archive.

=head1 AUTHORS - William Spooner

Email: whs@ebi.ac.uk

=head1 APPENDIX

The rest of the documentation details each of the object
methods. Internal methods are usually preceded with a _

=cut

# Let the code begin...

package Bio::SeqIO::ncbitrace;
use vars qw(@ISA);
use strict;
use Bio::SeqIO;
use Bio::FeatureIO;
use Bio::Species;

# Object preamble - inherits from Bio::SeqIO

@ISA = qw(Bio::SeqIO);

# Chained constructor

sub _initialize {
  my($self,@args) = @_;

  $self->SUPER::_initialize(@args);

  my %params = @args;
  @params{ map { lc $_ } keys %params } = values %params; # lowercase keys

  my @streams = qw( scf fasta qual tracexml traceinfo );

  # Validate streams
  my %io_streams;
  foreach my $stream( @streams ){
    my $io = $params{"-".$stream} || next;
    unless( UNIVERSAL::isa($io, "Bio::Root::IO") ){
      $self->throw( "-$stream must be Bio::Root::IO compliant" );
    }
    $io_streams{$stream} = $io;
  }
  $self->{_streams} = {%io_streams};
  # SEQUENCE:   fasta and quality OR scf
  # ANNOTATION: xml OR info
}


=head2 next_seq

 Title   : next_seq
 Usage   : $seq = $stream->next_seq()
 Function: returns the next sequence in the stream
 Returns : Bio::Seq object
 Args    : NONE

=cut

sub next_seq {
  my $self = shift;

  # The seq is either from the scf stream, or by combining the fasta and
  # qual streams.
  my $seq;
  if( $self->has_stream('scf') ){ 
    $seq = $self->next_obj('scf');
  }
  else{
    if( $self->has_stream('qual') ){ 
      $seq = $self->next_obj('qual');
    }
    if( $self->has_stream('fasta') ){
      if( $seq ){ 
        my $fseq = $self->next_obj('fasta');
        $seq->primary_seq( $fseq->primary_seq );
      }
      else{ $seq = $self->next_obj('fasta') }
    }
  }
  
  # The feature is either from the xml stream or the info stream
  my $feat;
  if( $self->has_stream('tracexml') ){ 
    $feat = $self->next_obj('tracexml');
  }
  elsif( $self->has_stream('traceinfo') ){
    $feat = $self->next_obj('traceinfo');
  }
  
  # Can we get the trace name from the header?
  if( $seq ){
    my $desc = $seq->description;
    if( $desc =~ /name:(\w+)/io ){
      $seq->accession_number( $1 );
    }
  }

  # Update the seq with the feat
  if( $seq and $feat ){

    # Ensure feature corresponds to sequence.
    # Seq display_name can be either the ncbi_ti (e.g. gnl|ti|5728633)
    # or the trace_name (e.g. jeb67f06.b1).
    my $name_type = 'trace_name';
    my $seq_name = $seq->display_name;
    if( $seq_name =~ m/ti\|(\d+)/ ){
      $name_type = 'ti';
      $seq_name = $1;
    }
    my( $feat_name ) = $feat->get_tag_values($name_type);
    unless( $seq_name eq $seq_name ){
      $self->throw( "Seq $seq_name differs from feature $feat_name!" );
    }
    
    # Use trace_name as accession, and ti as primary_id
    if( my ($n) = $feat->get_tag_values('trace_name') ){
      $seq->accession_number( $n );
    }
    if( my ($n) = $feat->get_tag_values('ti') ){
      $seq->primary_id( $n );
    }

    # Can we get the mate_pair from the description?
    if( $seq->description =~ /mate_pair:(\d+)/io ){
      my $mp = $1;
      unless( $feat->get_tag_values('mate_pair') ){
        $feat->add_tag_value( 'mate_pair',$mp );
      }
    }

    $seq->add_SeqFeature( $feat );

    # Process Species
    my $species;
    if( my($code) = $feat->get_tag_values('species_code') ){
      my($taxid) = $feat->get_tag_values('taxid');
      my @bits = split /\s+/, $code;
      $bits[0] = ucfirst( lc( $bits[0] ) );
      $bits[1] = lc( $bits[1] );
      $species = Bio::Species->new
          (
           -classification=> [ reverse @bits[0..1] ],
           $taxid ? ( -ncbi_taxid => $taxid ) : (), 
           );
      if( @bits > 2 ){ $species->sub_species( join( ' ', @bits[2..$#bits] ) ) }
      $seq->species($species);
    }
  }

  return $seq;
}


=head2 write_seq

 Title   : write_seq
 Usage   : $stream->write_seq(@seq)
 Function: Simply throws - ncbitrace is a read-only stream
 Returns : N/A
 Args    : None


=cut

sub write_seq {
  my $self = shift;
  $self->throw("ncbitrace is a read-only stream");
}


=head2 has_stream

 Title   : next_seq
 Usage   : if( $stream->has_stream( 'fasta' ) ){...}
 Function: returns whether the named stream is registered
 Returns : 1/0
 Args    : string

=cut

sub has_stream{
  my $self = shift;
  my $name = shift || $self->throw( "Need a stream name" );
  return $self->{_streams}->{$name} ? 1 : 0;
}


=head2 get_stream

 Title   : get_stream
 Usage   : my $stream = $stream->get_stream('fasta')
 Function: returns the IO stream for the named stream
 Returns : Bio::Root::IO object
 Args    : string

=cut

sub get_stream{
  my $self = shift;
  my $name = shift || $self->throw( "Need a stream name" );
  return $self->{_streams}->{$name};
}


=head2 next_obj

 Title   : next_obj
 Usage   : $seq = $stream->next_obj()
 Function: returns the next object in the stream
 Returns : Bio::SeqI or Bio::FeatureI object, depending on stream
 Args    : NONE

=cut

sub next_obj{
  my $self = shift;
  my $name = shift || $self->throw( "Need a stream name" );
  my $io = $self->get_stream($name) 
      || $self->throw( "Stream $name is unknown" );
  
  if( $io->can('next_seq'    ) ){ return( $io->next_seq ) }
  if( $io->can('next_feature') ){ return( $io->next_feature ) }
  $self->throw("Dunno what next_* call to use for $io!");
}



1;
