package LookAlign::Interface::Flatfile;

our $VERSION = '0.01';

# $Id: Flatfile.pm,v 1.1.2.1 2007/06/14 19:03:07 kclark Exp $

=head1 NAME

LookAlign::Interface::Flatfile

=head1 DESCRIPTION

Flatfile Interface module for alignemnt viewer. Documentation is added to describe each section/subroutine.

=cut

use warnings;
use strict;

use base qw(LookAlign::Interface::Root);

use Carp;
use CGI;
use File::Temp qw(tempfile tempdir);
use Time::Format qw(%time);

use LookAlign::Alignment::Container;
use LookAlign::Alignment::Container::Sq;
use LookAlign::Alignment::Renderer;

###########
# METHODS #
###########

# Function  : Directs which result page to display based on search type.
# Arguments : None
# Returns   : 1
# Notes     : None specified.

sub make_interface_result_page {
    my ($self) = @_;

    my $search_type = $self->cgi_param('search_type');

    if    ($search_type eq 'locus')     { $self->make_detailed_search_page; }
    elsif ($search_type eq 'pz_number') { $self->make_detailed_search_page; }
    else { croak("Internal error: Unknown search type ($search_type)"); }

    # Might need repair
    return 1;
}

# Function  : Generates a list of available assays.
# Arguments : None
# Returns   : 1
# Notes     : None specified.

sub display_list {
    my ($self) = @_;

    my @list = @{$self->get_list};

    my $root_url         = $self->root_url;
    my $session_id       = $self->session_id;
    my $new_window_links = $self->cfg('NEW_WINDOW_LINKS');

    my $target = $new_window_links ? qq[target="_blank"] : " ";

    # Write HTML
    print qq[<p>\n];
    print qq[<h2>List of Available Assays</h2><p>\n];
    print qq[<table border="0">\n];

    print qq[<tr class="highlight"><td>Assay Id</td><td>Info</td></TR>\n];

    foreach my $ref_row (@list) {
        my ($assay_id, $info) = @{$ref_row};
        print qq[<tr>\n];
        my $assay_id_url =
          qq[<a href="$root_url?session_id=$session_id&assay_id=$assay_id&page=overview" $target>$assay_id</a>];
        print qq[<td>$assay_id_url</td><td>$info</td>\n];
        print qq[</TR>\n];
    }

    print qq[</table>\n];
}

# Function  : Generates content for list of available assays.
# Arguments : None
# Returns   : \@list
# Notes     : None specified.

sub get_list {
    my ($self) = @_;

    my $dir = $self->cfg('FLATFILE_REPOSITORY');

    my @sequence_files = glob("$dir/*.fasta");

    my @list;
    foreach my $sequence_file (@sequence_files) {
        my ($assay_id) = $sequence_file =~ /\/(\d+)\.fasta$/;

        croak(
            "Invalid file name ($sequence_file) in repository, name must be <number>.fasta"
        ) unless $assay_id;

        my $info      = '-- no info --';
        my $info_file = "$dir/$assay_id.info";
        $info = $self->_get_info($info_file) if -e $info_file;

        push @list, [$assay_id, $info];
    }

    return (\@list);
}

# Function  : Retrieves assays by assay id.
# Arguments : $assay_id, $out
# Returns   : 1
# Notes     : None specified.

sub get_by_assay_id {
    my ($self, $assay_id, $out) = @_;

    my $dir = $self->cfg('FLATFILE_REPOSITORY');

    my $fasta_file   = "$dir/$assay_id.fasta";
    my $quality_file = "$dir/$assay_id.quality";
    my $info_file    = "$dir/$assay_id.info";

    unless (-e $fasta_file) {
        croak("This assay_id ($assay_id) cannot be found");
    }
    my ($ref_seq_accession, $ref_sequence) =
      $self->_parse_fasta($fasta_file, '');

    my ($ref_qua_accession, $ref_quality);
    if (-e $quality_file) {
        ($ref_qua_accession, $ref_quality) =
          $self->_parse_fasta($quality_file, ' ');

        # Check sequence-quality accession consistency
        my @seq_accession = @$ref_seq_accession;
        my @qua_accession = @$ref_qua_accession;

        for (0 .. $#seq_accession) {
            croak(
                "Sequence - Quality score labels do not match (SEQ:$seq_accession[$_] vs. QUA:$qua_accession[$_])"
            ) unless $seq_accession[$_] eq $qua_accession[$_];
        }
    }

    my $info = -e $info_file
        ? $self->_get_info($info_file)
        : '';

    my @accession = @$ref_seq_accession;
    my @sequence  = @$ref_sequence;
    my @quality   = @$ref_quality if $ref_quality;

    open(OUT, ">$out") or croak("Cannot open file ($out): $!");

    print OUT "<data>\n";

    my $unique_id = 0;

    for my $i (0 .. $#accession) {
        my $accession = $accession[$i];
        my $sequence  = $sequence[$i];
        my $quality   = $quality[$i];

        $unique_id++;

        # -- Cleaning --

        # Remove preceding and leading spaces
        # $quality =~ s/^\s+//; $quality =~ s/\s+$//;
        # $sequence =~ s/^\s+//; $sequence =~ s/\s+$//;

        # Uppercase sequence
        $sequence = uc($sequence);

        # Replace space (delimiter) with comma in quality score
        $quality =~ tr/ /,/ if $quality;

        # Discard quality score if sizes do not match (if quality file exists)
        my @quality = split(',', $quality) if $quality;
        croak(
            "Sequence and Quality score lengths do not match (Assay_id: $assay_id - Accession: $accession)"
          )
          if (-e $quality_file and length($sequence) != scalar @quality);

        # Remove comma from info line
        $info =~ s/,/_/g if $info;

        # -- End Cleaning --

        # -- Checking --

        croak(
            "This assay id ($assay_id) contains a sequence ($accession) containing invalid sequence characters, only A, T, C, G, N, dash are valid"
        ) if ($sequence =~ /[^ATCGN\-]/);
        croak(
            "This assay id ($assay_id) contains a sequence ($accession) containing invalid quality score characters, only comma, digits, dash are valid: $quality"
        ) if ($quality && $quality =~ /[^,\d\-]/);

        # -- End Checking --

        print OUT qq[<_sequence>\n];
        print OUT qq[\t<sequence label="$accession">$sequence</sequence>\n];
        print OUT qq[\t<attribute label="quality">$quality</attribute>\n]
          if $quality;
        print OUT qq[\t<attribute label="unique_id">$unique_id</attribute>\n];
        print OUT qq[</_sequence>\n];
    }

    print OUT qq[<_offset>0</_offset>\n];
    print OUT qq[<_info>assay_id=$assay_id,other=$info</_info>\n];
    print OUT qq[</data>\n];

    close OUT;

    return 1;
}

# Function  : Parse fasta file (sequence and quality score).
#             Used by get_assay_id.
# Arguments : $file, $separator
# Returns   : \@accession, \@sequence
# Notes     : This is a private method.

sub _parse_fasta {
    my ($self, $file, $separator) = @_;

    my @accessions;
    my @sequences;

    open(IN, "<$file") or croak("Cannot read file ($file)");

    my $current_accession;
    my $current_sequence;

    while (my $line = <IN>) {
        chomp $line;
        if ($line =~ /^>/) {
            if ($current_accession) {
                push @accessions, $current_accession;
                push @sequences,  $current_sequence;
            }

            $line =~ s/^>//;
            $line =~ s/^\s+//;
            $line =~ s/\s+$//;

            ($current_accession) = $line =~ /^(\S+)/;
            $current_sequence = '';
        }

        else {
            $line =~ s/^\s+//;
            $line =~ s/\s+$//;
            $line =~ s/\s+/ /g;

            $current_sequence .=
                $current_sequence
              ? $separator . $line
              : $line;
        }
    }

    if ($current_accession) {
        push @accessions, $current_accession;
        push @sequences,  $current_sequence;
    }

    return (\@accessions, \@sequences);
}

# Function  : Gets and HTML'izes info file contents.
#             Used by get_assay_id and get_list.
# Arguments : $file
# Returns   : $html
# Notes     : This is a private method.

sub _get_info {
    my ($self, $file) = @_;

    my @info;

    open(IN, "<$file") or croak("Cannot read file ($file)");
    while (<IN>) { chomp; push @info, $_; }

    my $info = join('<br>', @info);

    croak("Empty info file ($file)") unless defined $info;

    return $info;
}

# Function  : Generates available search boxes.
# Arguments : None
# Returns   : $html_code
# Notes     : None specified.

sub get_search_forms {
    my ($self) = @_;

    my $root_url   = $self->root_url;
    my $session_id = $self->session_id;

    my $search_forms = <<FORMS;
<tr>
<td class="box">
<form method=get action="$root_url">
     <h3>Enter an assay id to retrieve its alignment:</h3>
     Assay id (must be a number):&nbsp;<input type="text" name="assay_id" SIZE="10" MAXLENGTH="40">
     &nbsp;&nbsp;&nbsp;
     <input type="hidden" name="page" value="search_result">
     <input type="hidden" name="search_type" value="assay_id">
     <input type="hidden" name="session_id" value="$session_id">
     <input type="submit" value="submit">
     <p>
     <p>
</form>
</td>
</TR>

FORMS

    return $search_forms;
}

# Function  : Returns overview info based on the alignment container object.
# Arguments : al, $aln_length, $type
# Returns   : $html_code
# Notes     : None specified.

sub get_overview_info {
    my ($self, $al, $aln_length, $type) = @_;

    my $info = $al->info;

    # Parse info line
    my ($assay_id) = $info =~ /assay_id=([^,]+)/;
    my ($other)    = $info =~ /other=([^,]+)/;

    # Populate missing info
    $assay_id = defined $assay_id ? $assay_id : 'Unknown';
    $other    = defined $other    ? $other    : 'Unknown';

    # Get sequence number related information
    my @sequences           = $al->valid_sequences;
    my $number_of_sequences = @sequences;

    # Get start and end positions
    my $start_pos = $al->offset + 1;
    my $end_pos   = $start_pos + $al->longest_seq_len - 1;

    my $overview_info;

    $overview_info .= qq[<h2>Assay id: $assay_id ($other)</h2><p>\n];
    if ($type eq 'overview') {
        $overview_info .=
          qq[<h3>Alignment Overview (total alignment length $aln_length)</h3>\n];
    }
    elsif ($type eq 'alignment') {
        $overview_info .=
          qq[<h3>Alignment between position $start_pos and $end_pos (total alignment length $aln_length)</h3>\n];
    }
    else { croak("Internal error: Invalid get_overview_info type ($type)"); }

    return $overview_info;
}

# Function  : Returns title info for text dumps.
# Arguments : al
# Returns   : $text
# Notes     : None specified.

sub get_text_dump_info {
    my ($self, $al) = @_;

    my $info = $al->info;

    my ($assay_id) = $info =~ /assay_id=([^,]+)/;
    my ($other)    = $info =~ /other=([^,]+)/;

    # Populate missing info
    $assay_id = defined $assay_id ? $assay_id : 'Unknown';
    $other    = defined $other    ? $other    : 'Unknown';

    my $title = "Assay id: $assay_id ($other)";

    return $title;
}

# Function  : Returns interface specific modules
#             to be displayed in debug message.
# Arguments : None
# Returns   : @critical_modules
# Notes     : None specified.

sub get_critical_modules {
    my ($self) = @_;

    return ($INC{'LookAlign/Interface/Flatfile.pm'});
}

# Function  : Returns interface specific infomation
#             to be displayed in debug message.
# Arguments : None
# Returns   : $html_code
# Notes     : None specified.

sub get_critical_content {
    my ($self) = @_;

    my ($repository) = $self->cfg('FLATFILE_REPOSITORY') =~ /([^\/]+)\/*$/;

    return qq[<b>Repositoy</b>: $repository &nbsp;];
}

# Function  : Returns interface specific datasource infomation
#             to be included in cache index.
# Arguments : None
# Returns   : $string
# Notes     : None specified.

sub get_datasource {
    my ($self) = @_;

    return $self->cfg('FLATFILE_REPOSITORY');
}

=head1 AUTHOR

Payan Canaran <canaran@cshl.edu>

=head1 BUGS

=head1 VERSION

Version 0.01

=head1 ACKNOWLEDGEMENTS

=head1 COPYRIGHT & LICENSE

Copyright (c) 2004-2007 Cold Spring Harbor Laboratory

This program is free software; you can redistribute it and/or modify it
under the same terms as Perl itself. See DISCLAIMER.txt for
disclaimers of warranty.

=cut

1;
