package Panzea::FormModifier;

our $VERSION = '0.02';

# $Id: FormModifier.pm,v 1.6.2.1 2007/06/14 19:03:22 kclark Exp $

=head1 NAME

Panzea::FormModifier - Panzea form modifier used by HTML::SearchPage

=head1 SYNOPSIS

 - used by HTML::SearchPage

=head1 DESCRIPTION

Panzea::FormModifier is used by HTML::SearchPage to format
raw data retrieved by HTML::SearchPage.

=cut

use warnings;
use strict;

use Carp;
use CGI;
use DBI;
use LWP::Simple;
use Panzea::WebFormUtils;
use Time::Format qw(%time);
use Data::Dumper;

=head1 METHODS

=head3 Constructor

 $obj = Panzea::FormModifier->new(%params)

 Function  : Constructor.
 Arguments : none
 Returns   : $formmodifier_object
 Notes     :

=cut

sub new {
    my ($class) = @_;

    my $self = bless {}, $class;

    return $self;
}

=head3 _url_templates

 Function  : Returns internal URL templates.
 Arguments : none
 Returns   : $hashref
 Notes     : This is a private method

=cut

sub _url_templates {
    my ($self) = @_;

    my $cgi      = CGI->new();
    my $database = $cgi->param("database");
    $database    = $database ? qq[database=$database&] : "";

    my $root_url =  $ENV{REQUEST_URI};
    $root_url    =~ s/\?.+//;
    $root_url    =~ s/[^\/]+$//;
    $root_url    =~ s/\/$//;
    
    my %urls = (
        source =>
          qq[$root_url/display_source_info?${database}identifier=__QUERY__],
        locality =>
          qq[$root_url/display_locality_info?${database}identifier=__QUERY__],
        trait =>
          qq[$root_url/display_trait_info?${database}identifier=__QUERY__],
        ref_seq =>
          qq[$root_url/display_ref_seq?${database}identifier=__QUERY__],
        gene_locus =>
          qq[$root_url/genelocus_search?${database}gene_locus_operator=%3D&gene_locus=__QUERY__&gene_locus=&locus_type_operator=%3D&locus_type=all&marker_type_operator=like_c&marker_type=all&ibm2_2005_chr_operator=%3D&ibm2_2005_chr=all&position_from_operator=%3E%3D&position_from=&position_to_operator=%3C%3D&position_to=&order_by=asc&order_by=ami.sort_ibm2_2005_chr&order_by=asc&order_by=ami.sort_ibm2_2005_position&order_by=asc&order_by=not_selected&output_format=html&submit=Submit#results],
        accession =>
          qq[$root_url/germplasm_search?${database}germplasm_operator=%3D&germplasm=__QUERY__&source_operator=%3D&source=&sampstat_operator=%3D&sampstat=all&genus_operator=%3D&genus=all&species_operator=%3D&species=all&subspecies_operator=%3D&subspecies=all&racename_operator=%3D&racename=&country_operator=%3D&country=all&state_province_operator=%3D&state_province=&collector_operator=%3D&collector=&collnumb_operator=%3D&collnumb=&order_by=asc&order_by=dp.accename&order_by=asc&order_by=dp.source&order_by=asc&order_by=not_selected&output_format=html&submit=Submit#results],
        marker_name => {
            SSR =>
              qq[$root_url/display_genotype_by_marker?${database}marker_name_operator=%3D&marker_name=__QUERY__&marker_type_operator=%3D&marker_type=1&assay_id_operator=%3D&assay_id=all&sampstat_operator=%3D&sampstat=all&order_by=asc&order_by=dp.accename&order_by=asc&order_by=aftt.div_obs_unit_id&output_format=html&submit=Submit#results],
            SNP =>
              qq[$root_url/display_genotype_by_marker?${database}marker_name_operator=%3D&marker_name=__QUERY__&marker_type_operator=%3D&marker_type=2&assay_id_operator=%3D&assay_id=all&sampstat_operator=%3D&sampstat=all&order_by=asc&order_by=dp.accename&order_by=asc&order_by=aftt.div_obs_unit_id&output_format=html&submit=Submit#results],
            Sequencing =>
              qq[/db/snp_alignment/alignment_viewer?${database}pz_number=__QUERY__&page=search_result&search_type=pz_number],
        },
        assay_id => {
            Sequencing =>
              qq[/db/snp_alignment/alignment_viewer?${database}assay_id=__QUERY__&page=overview],
        },
        gmap =>
          qq[$root_url/genotypes_in_map?${database}marker_name=__QUERY__;marker_type=__QUERY__;format=js],
        cmap_gene_locus =>
          qq[/db/cmap/feature_search?features=__QUERY__&search_field=feature_name&order_by=&data_source=Panzea&submit=Submit],
        fpcctg =>
          qq[http://www.genome.arizona.edu/cgi-bin/gbrowse/gbrowse?source=maizefpcctg;name=__QUERY__],
        fpcctg_conflict =>
          qq[$root_url/display_fpcctg_info?${database}identifier=__QUERY__],
        fpcchr =>
          qq[http://www.genome.arizona.edu/cgi-bin/gbrowse/gbrowse?source=maizefpcchr;name=__QUERY__],
        maizegdb_chr_view =>
          qq[http://www.maizegdb.org/cgi-bin/bin_viewer.cgi?bin=__QUERY__&sub=__QUERY__],
        maizegdb_ibm2n =>
          qq[http://www.maizegdb.org/cgi-bin/displaymaprecord.cgi?id=__QUERY__],
        maizegdb_ibm2n_conflict =>
          qq[$root_url/display_ibm2chr_info?${database}identifier=__QUERY__],
        gbrowse =>
          qq[/db/gbrowse/gbrowse/panzea_internal/?name=__QUERY__;search=Search],
        polymorphic_between_accessions =>
          qq[$root_url/polymorphic_between_accessions_step2?${database}marker_type=__QUERY__&accession1=__QUERY__],
    );

    return \%urls;
}

=head3 align_numbers

 Function  : Aligns decimal point in columns, formats as preformatted text.
 Arguments : See notes
 Returns   : 1
 Notes     :
             Parameters:

             -column: Column number that will be affected

=cut

sub align_numbers {
    my ($self, %params) = @_;

    my $data          = $params{-data};
    my $output_format = $params{-output_format};

    # This modification applies only to html output
    return 1 unless $output_format eq 'html';

    exists $params{-column}
      or croak("A column number is required to align_decimal");
    my $column = $params{-column};

    # Find max lengths for int and decimal segments
    my $max_int;
    my $max_decimal;
    foreach my $row (0 .. $#{@$data}) {
        my ($int, $decimal) = $self->_parse_number($data->[$row]->[$column]);

        $max_int = length($int) if length($int) > $max_int;
        $max_decimal = length($decimal)
          if defined $decimal && length($decimal) > $max_decimal;
    }

    # Now align them
    foreach my $row (0 .. $#{@$data}) {
        my ($int, $decimal) = $self->_parse_number($data->[$row]->[$column]);

        my $formatted;

        if ($data->[$row]->[$column] =~ /\./) {
            $formatted =
              sprintf("%${max_int}s.%-${max_decimal}s", $int, $decimal);
        }

        else {
            $formatted =
              sprintf("%${max_int}s %-${max_decimal}s", $int, $decimal);
        }

        $formatted =~ s/\s/&nbsp;/g;
        $formatted = qq[<center><pre>$formatted</pre></center>];

        $data->[$row]->[$column] = $formatted;
    }

    return 1;
}

=head3 _parse_number

 Function  : parses a number into int (actually non-decimal) and decimal segments
 Arguments : $number
 Returns   : ($int, $number)
 Notes     : This is a private method.

=cut

sub _parse_number {
    my ($self, $number) = @_;

    return ('', '') unless defined $number;

    my ($int, $decimal);
    my @number = split(/\./, $number);
    if ($number =~ /\./) { $decimal = pop @number; }
    $int = join('.', @number);

    return ($int, $decimal);
}

=head3 add_link

 Function  : Converts into an HTML link.
 Arguments : See notes
 Returns   : 1
 Notes     :
             Parameters:

             -column: Column number that will be affected
             -type: Type of link to be generated.
                    'source'
                    'gene_locus'
                    'germplasm'
                    'assay_id'
                    'phenotype'
                    'locality'
                    'trait'
                    'fpcchr'
                    'fpcctg'
                    'maizegdb_chr_view'
                    'maizegdb_ibm2n'
                    'cmap_gene_locus'

=cut

sub add_link {
    my ($self, %params) = @_;

    my $data          = $params{-data};
    my $output_format = $params{-output_format};
    my $dbh           = $params{-dbh};

    # This modification applies only to html output
    return 1 unless $output_format eq 'html';

    exists $params{-column}
      or croak("A column number is required to add_link");
    my $column = $params{-column};

    my $type = $params{-type} or croak("A type is required to add_link");

    if (   $type eq 'gene_locus'
        or $type eq 'accession'
        or $type eq 'source'
        or $type eq 'locality'
        or $type eq 'trait') {

        my %count_sqls = (
            gene_locus =>
              qq[SELECT count(name) FROM cdv_map_feature WHERE name = ?],
            accession =>
              qq[SELECT count(accename) FROM div_passport WHERE accename = ?],
            source =>
              qq[SELECT count(source) FROM cdv_source WHERE source = ?],
            locality =>
              qq[SELECT count(locality_name) FROM div_locality WHERE locality_name = ?],
            trait =>
              qq[SELECT count(local_trait_name) FROM div_trait_uom WHERE local_trait_name = ?],
            gbrowse =>
              qq[SELECT count(genelocus) FROM aux_seq_annotations_supp WHERE genelocus = ?],
        );

        foreach my $row (0 .. $#{@$data}) {
            my $value = $data->[$row]->[$column];
            next if (!defined $value or $value eq '');

            my $count = 1;
            if (my $statement = $count_sqls{$type}) {
                my $sth = $dbh->prepare($statement)
                  or croak("Cannot prepare statement ($statement)");
                $sth->bind_param(1, $value);
                $sth->execute()
                  or croak("Cannot execute statement ($statement)");

                ($count) = $sth->fetchrow_array;
            }

            my $url = $self->_url_templates->{$type};
            $url =~ s/__QUERY__/$value/;

            $data->[$row]->[$column] = $count
              ? qq[<A href="$url">$value</A>]
              : qq[$value];
        }
    }

    elsif ($type eq 'assay_id') {
        foreach my $row (0 .. $#{@$data}) {
            my $value = $data->[$row]->[$column];
            next if (!defined $value or $value eq '');

            my ($assay_id, $pz_number) = $value =~ /^([^:]+):(.*)$/;
            my ($scoring_tech_group, $data_count) =
              $self->_get_scoring_tech_info($dbh, $assay_id);

            my $link;

            if ($data_count) {
                my $url =
                  $self->_url_templates->{assay_id}{$scoring_tech_group}
                  or croak(
                    "Unrecognized scoring_tech_group ($scoring_tech_group)!");
                $url =~ s/__QUERY__/$assay_id/;
                $link = qq[<b>$pz_number</b> <A href="$url">\[display\]</A>];
            }
            else {
                $link = qq[<b>$pz_number</b> \[no data\]];
            }

            $data->[$row]->[$column] = $link;
        }
    }

    elsif ($type eq 'fpcchr') {
        foreach my $row (0 .. $#{@$data}) {
            my $value = $data->[$row]->[$column];
            next if (!defined $value or $value eq '');

            my $url = $self->_url_templates->{$type};
            $url =~ s/__QUERY__/$value/;

            $data->[$row]->[$column] =
              qq[<A target="_blank" href="$url">$value</A>];
        }
    }

    elsif ($type eq 'fpcctg') {
        foreach my $row (0 .. $#{@$data}) {
            my $value = $data->[$row]->[$column];
            next if (!defined $value or $value eq '');
            
            my ($aux_map_info_id, $ctg) = split(':', $value);

            my $url;
            my $link;

            if (!defined $ctg or $ctg eq '') {
                next;
            }

            elsif ($ctg eq 'conflict') {
                $url = $self->_url_templates->{fpcctg_conflict};
                $url =~ s/__QUERY__/$aux_map_info_id/;
                $link = qq[<a href="$url">conflict</a>];
            }

            else {
                $url = $self->_url_templates->{$type};
                $url =~ s/__QUERY__/$ctg/;
                $link = qq[<a target="_blank" href="$url">$ctg</a>];
            }

            $data->[$row]->[$column] = $link;
        }
    }

    elsif ($type eq 'maizegdb_chr_view') {
        foreach my $row (0 .. $#{@$data}) {
            my $value = $data->[$row]->[$column];
            next if (!defined $value or $value eq '');

            my @chr_bins = split(',', $value);

            my @links;
            foreach my $chr_bin (@chr_bins) {
                my ($chr, $bin) = split(/\./, $chr_bin);

                $bin = '' unless defined $bin;

                $bin =~ s/^0+$/0/;
                $bin =~ s/^0+([^0])/$1/;

                my $url = $self->_url_templates->{$type};
                $url =~ s/__QUERY__/$chr/;
                $url =~ s/__QUERY__/$bin/;

                push @links, qq[<A target="_blank" href="$url">$chr_bin</A>];
            }
            $data->[$row]->[$column] = join(qq[, ], @links);
        }
    }

    elsif ($type eq 'maizegdb_ibm2n') {
        foreach my $row (0 .. $#{@$data}) {
            my $value = $data->[$row]->[$column];
            next if (!defined $value or $value eq '');
            
            my ($aux_map_info_id, $chr) = split(':', $value);

            my $url;
            my $link;

            if (!defined $chr or $chr eq '') {
                next;
            }

            elsif ($chr eq 'conflict') {
                $url = $self->_url_templates->{maizegdb_ibm2n_conflict};
                $url =~ s/__QUERY__/$aux_map_info_id/;
                $link = qq[<a href="$url">conflict</a>];
            }

            else {

                # very specific maizegdb url; adapted initially from http://www.maizegdb.org/map.php (Ctrl+F for "IBM2 2004 Neighbors"),
                # later from communication from Wei Zhao (for IBM 2005)
                my $modified_chr = $chr <= 8 ? 978376 + $chr : 978377 + $chr;

                $url = $self->_url_templates->{$type};
                $url =~ s/__QUERY__/$modified_chr/;
                $link = qq[<a target="_blank" href="$url">$chr</a>];
            }

            $data->[$row]->[$column] = $link;
        }
    }

    elsif ($type eq 'cmap_gene_locus') {
        foreach my $row (0 .. $#{@$data}) {
            my $value = $data->[$row]->[$column];
            next if (!defined $value or $value eq '');

            my ($exists_in_map, $gene_locus) = $value =~ /^([^:]+):(.*)$/;

            my $url = $self->_url_templates->{$type};
            $url =~ s/__QUERY__/$gene_locus/;

            $data->[$row]->[$column] =
              $exists_in_map
              ? qq[<A href="$url">$gene_locus</A>]
              : qq[$gene_locus];
        }
    }

    elsif ($type eq 'polymorphic_between_accessions') {
        foreach my $row (0 .. $#{@$data}) {
            my $value = $data->[$row]->[$column];
            next if (!defined $value or $value eq '');

            my ($marker_type, $accession1) = $value =~ /^([^:]+):(.*)$/;

            my $url = $self->_url_templates->{$type};
            $url =~ s/__QUERY__/$marker_type/;
            $url =~ s/__QUERY__/$accession1/;

            $data->[$row]->[$column] = qq[<A href="$url">$accession1</A>];
        }
    }

    elsif ($type eq 'gmap') {
        foreach my $row (0 .. $#{@$data}) {
            my $value = $data->[$row]->[$column];
            next if (!defined $value or $value eq '');

            my ($marker_name, $marker_type) = $value =~ /^(.+):(SNP|SSR)$/;

            my $url = $self->_url_templates->{$type};

            $url =~ s/__QUERY__/$marker_name/;
            $url =~ s/__QUERY__/$marker_type/;

            $data->[$row]->[$column] = qq[<A href="$url">Geo. Map</A>];
        }
    }

    else { croak("Unrecognized add_link type ($type)!"); }

    return 1;
}

=head3 get_gbrowse_link

 Function  : Gets and formats gbrowse link
 Arguments : See notes
 Returns   : 1
 Notes     :
             Parameters:

             -column: Column number that will be affected

=cut

sub get_gbrowse_link {
    my ($self, %params) = @_;

    my $data          = $params{-data};
    my $output_format = $params{-output_format};
    my $dbh           = $params{-dbh};

    exists $params{-column}
      or croak("A column number is required to get_gbrowse_link");
    my $column = $params{-column};

    my $statement =
      qq[SELECT count(genelocus) FROM aux_seq_annotations_supp WHERE genelocus = ?];

    foreach my $row (0 .. $#{@$data}) {
        my $value = $data->[$row]->[$column];
        next if (!defined $value or $value eq '');

        my $sth = $dbh->prepare($statement)
          or croak("Cannot prepare statement ($statement)");
        $sth->bind_param(1, $value);
        $sth->execute() or croak("Cannot execute statement ($statement)");
        my ($count) = $sth->fetchrow_array;

        my $url = $self->_url_templates->{"gbrowse"};
        $url =~ s/__QUERY__/$value/;

        if ($output_format eq "html") {
            $data->[$row]->[$column] = $count
              ? qq[<A href="$url">[View]</A>]
              : qq[N/A];
        }

        else {
            $data->[$row]->[$column] = qq[N/A];
        }
    }
}

=head3 get_assays

 Function  : Gets and formats assays by marker id and gene/locus id
 Arguments : See notes
 Returns   : 1
 Notes     :
             Parameters:

             -column: Column number that will be affected

=cut

sub get_assays {
    my ($self, %params) = @_;

    my $data          = $params{-data};
    my $output_format = $params{-output_format};
    my $dbh           = $params{-dbh};

    exists $params{-column}
      or croak("A column number is required to get_assays");
    my $column = $params{-column};

    exists $params{-type}
      or croak(
        "A type (gene_locus|marker) specification is required to get_assays");
    my $type = $params{-type};

    if ($type ne 'gene_locus' and $type ne 'marker') {
        croak("Type must be gene_locus or marker!");
    }

    my $where_clause_param =
      $type eq 'gene_locus' ? 'cmf.cdv_map_feature_id' : 'cm.cdv_marker_id';

    my $statement =
      qq[SELECT aftt.div_allele_assay_id, dstt.scoring_tech_group, cm.name, count(*)
         FROM  aux_feature_to_taxonomy aftt
         LEFT  JOIN div_scoring_tech_type dstt ON (dstt.div_scoring_tech_type_id = aftt.div_scoring_tech_type_id)
         LEFT  JOIN cdv_marker cm ON (cm.cdv_marker_id = aftt.cdv_marker_id)
         LEFT  JOIN cdv_map_feature cmf ON (cmf.cdv_map_feature_id = aftt.cdv_map_feature_id)
         WHERE $where_clause_param = ?
         GROUP BY aftt.div_allele_assay_id
         ORDER BY dstt.scoring_tech_group, aftt.div_allele_assay_id];

    my $sth = $dbh->prepare($statement)
      or croak("Cannot prepare statement ($statement)");

    my $gmap_statement =
      qq[SELECT count_valid FROM aux_assay_plant_genotype_count
                            WHERE marker_name = ? AND marker_type = ?];

    my $gmap_sth = $dbh->prepare($gmap_statement)
      or croak("Cannot prepare statement ($gmap_statement)");

    foreach my $row (0 .. $#{@$data}) {
        my $value = $data->[$row]->[$column];

        next if (!defined $value or $value eq '');

        $sth->bind_param(1, $value);
        $sth->execute() or croak("Cannot execute statement ($statement)");

        my %assays_by_marker;

        while (
            my ($assay_id, $scoring_tech_group, $marker_name, $data_count) =
            $sth->fetchrow_array) {
            $assay_id           = '' unless defined $assay_id;
            $scoring_tech_group = '' unless defined $scoring_tech_group;
            $assays_by_marker{$marker_name}{$scoring_tech_group}{$assay_id} =
              $data_count;
        }

        my @links;

        foreach my $marker_name (sort keys %assays_by_marker) {

            foreach my $scoring_tech_group (
                sort keys %{$assays_by_marker{$marker_name}}) {

                my $marker_data_count = 0;
                foreach my $assay_id (
                    keys
                    %{$assays_by_marker{$marker_name}{$scoring_tech_group}}) {
                    $marker_data_count +=
                      ($assays_by_marker{$marker_name}{$scoring_tech_group}
                          {$assay_id} - 1);
                }

                my $link;

                # Rule 0
                if (!$marker_name and !$marker_data_count) {
                    $link = '';
                }

                # Rule 1
                elsif ($scoring_tech_group eq 'SNP'
                    or $scoring_tech_group eq 'SSR') {

                    # Section for GMap View
                    my $gmap_url;

                    $gmap_sth->bind_param(1, $marker_name);
                    $gmap_sth->bind_param(2, $scoring_tech_group);
                    $gmap_sth->execute()
                      or croak("Cannot execute statement ($statement)");
                    my ($count_valid) = $gmap_sth->fetchrow_array;
                    if ($count_valid && $count_valid > 3) {
                        $gmap_url = $self->_url_templates->{gmap};
                        $gmap_url =~ s/__QUERY__/$marker_name/;
                        $gmap_url =~ s/__QUERY__/$scoring_tech_group/;
                    }

                    my $url =
                        $marker_data_count
                      ? $self->_url_templates->{marker_name}
                      {$scoring_tech_group}
                      : $self->_url_templates->{ref_seq};
                    $url =~ s/__QUERY__/$marker_name/;

                    if ($type eq 'gene_locus') {
                        if ($output_format eq 'html') {
                            $link =
                              qq[$marker_name<A href="$url">($scoring_tech_group)</A>];
                            if ($gmap_url) {
                                $link =~ s!\)</A>$!</A>!;
                                $link .=
                                  qq[-<A href="$gmap_url">Geo.Map</A>)];
                            }
                        }
                        else {
                            $link = qq[$marker_name($scoring_tech_group)];
                            $link .= qq[-Geo.Map)] if $gmap_url;
                        }
                    }
                    else {
                        if ($output_format eq 'html') {
                            $link =
                              $marker_data_count
                              ? qq[<A href="$url">$scoring_tech_group</A>]
                              : qq[];
                            $link .= qq[-<A href="$gmap_url">Geo.Map</A>]
                              if $gmap_url;
                        }
                        else {
                            $link =
                              $marker_data_count
                              ? qq[$scoring_tech_group]
                              : qq[];
                            $link .= qq[-Geo.Map] if $gmap_url;
                        }
                    }
                }

                # Rule 2
                elsif ($scoring_tech_group eq 'Sequencing') {
                    my @assay_ids = keys
                      %{$assays_by_marker{$marker_name}{$scoring_tech_group}};
                    if ($output_format eq 'html') {
                        my $url;
                        if (@assay_ids == 1 and $marker_data_count) {
                            my $assay_id = $assay_ids[0];
                            $url =
                              $self->_url_templates->{assay_id}
                              {$scoring_tech_group};
                            $url =~ s/__QUERY__/$assay_id/;
                        }
                        elsif ($marker_data_count) {
                            $url =
                              $self->_url_templates->{marker_name}
                              {$scoring_tech_group};
                            $url =~ s/__QUERY__/$marker_name/;
                        }
                        else {
                            $url = $self->_url_templates->{ref_seq};
                            $url =~ s/__QUERY__/$marker_name/;
                        }

                        $link =
                          $type eq 'gene_locus'
                          ? qq[$marker_name<A href="$url">($scoring_tech_group)</A>]
                          : $marker_data_count
                          ? qq[<A href="$url">$scoring_tech_group</A>]
                          : qq[];
                    }

                    else {
                        $link =
                          $type eq 'gene_locus'
                          ? qq[$marker_name($scoring_tech_group)]
                          : $marker_data_count ? qq[$scoring_tech_group]
                          :                      qq[];
                    }
                }

                # Rule 3
                else {
                    my $url = $self->_url_templates->{ref_seq};
                    $url =~ s/__QUERY__/$marker_name/;

                    $scoring_tech_group =
                      $scoring_tech_group ? $scoring_tech_group : 'no_assays';

                    if ($type eq 'gene_locus') {
                        if ($output_format eq 'html') {
                            $link =
                              qq[$marker_name<A href="$url">($scoring_tech_group)</A>];
                        }
                        else {
                            $link = qq[$marker_name($scoring_tech_group)];
                        }
                    }
                    else {
                        if ($output_format eq 'html') {
                            $link =
                              $marker_data_count
                              ? qq[<A href="$url">$scoring_tech_group:$marker_data_count</A>]
                              : qq[];
                        }
                        else {
                            $link =
                              $marker_data_count
                              ? qq[$scoring_tech_group]
                              : qq[];
                        }
                    }

                }

                push(@links, $link);
            }
        }

        # Join all links
        my $links;

        if ($output_format eq 'html') {
            if (@links) { $links = join(qq[<BR>], @links); }
            else { $links = '&nbsp;'; }
        }

        else {
            $links = join(qq[;], @links);
        }

        $data->[$row]->[$column] = $links;
    }

    return 1;
}

=head3 _get_scoring_tech_info

 Function  : Retrieves scoring_tech_group and allele count for a given assay id.
 Arguments : ($dbh, $assay_id)
 Returns   : ($scoring_tech_group, $data_count)
 Notes     : This is a private method.

=cut

sub _get_scoring_tech_info {
    my ($self, $dbh, $value) = @_;

    my $statement =
      qq[SELECT dstt.scoring_tech_group, count(dstt.scoring_tech_group)
         FROM div_scoring_tech_type dstt
         JOIN div_allele_assay daa USING (div_scoring_tech_type_id)
         JOIN div_allele da USING (div_allele_assay_id)
         WHERE daa.div_allele_assay_id = ?
         GROUP BY dstt.scoring_tech_group
         ];

    my $sth = $dbh->prepare($statement)
      or croak("Cannot prepare statement ($statement)");

    $sth->bind_param(1, $value);
    $sth->execute() or croak("Cannot execute statement ($statement)");

    my ($scoring_tech_group, $data_count) = $sth->fetchrow_array;

    return ($scoring_tech_group, $data_count);
}

=head3 get_marker_annotations

 Function  : gets and formats assay annotations for a given ***markername***.
 Arguments : See notes
 Returns   : 1
 Notes     :
             Parameters:

             -column: Column number that will be affected
             -anno_types: Arrayref of annotation types to be retrieved.

=cut

sub get_marker_annotations {
    my ($self, %params) = @_;

    my $data          = $params{-data};
    my $output_format = $params{-output_format};
    my $dbh           = $params{-dbh};

    exists $params{-column}
      or croak("A column number is required to get_assay_annotations");
    my $column = $params{-column};

    exists $params{-profile}
      or croak("A profile is required to get_assay_annotations");
    my $profile = $params{-profile};

    my $profile_columns;
    if ($profile eq 'moldiversity_search') {
        $profile_columns = qq[aaa.primer1, aaa.primer2, ama.allele_repeat];
    }
    else { croak("Unknown profile ($profile)"); }

    my $statement = qq[SELECT distinct $profile_columns
                       FROM aux_marker_annotations ama
                       RIGHT JOIN cdv_marker cm USING (cdv_marker_id)
                       LEFT JOIN div_allele_assay daa USING (cdv_marker_id)
                       LEFT JOIN aux_assay_annotations aaa USING (div_allele_assay_id)
                       LEFT JOIN div_scoring_tech_type dstt ON (dstt.div_scoring_tech_type_id = daa.div_scoring_tech_type_id)
                       WHERE cm.name = ?
                       ];

    my $sth = $dbh->prepare($statement)
      or croak("Cannot prepare statement ($statement)");

    # Additional statement for ref seq
    my $rs_statement =
      qq[SELECT ref_seq FROM cdv_marker cm WHERE cm.name = ?];
    my $rs_sth = $dbh->prepare($rs_statement)
      or croak("Cannot prepare statement ($rs_statement)");

    # Additional statement for gbrowse (functional_annotation_view)
    my $gb_statement =
      qq[SELECT query_accession FROM aux_seq_annotations_supp asas
                          WHERE asas.query_accession IN (?, ?)];
    my $gb_sth = $dbh->prepare($gb_statement)
      or croak("Cannot prepare statement ($gb_statement)");

    foreach my $row (0 .. $#{@$data}) {
        my $value = $data->[$row]->[$column];

        my $marker = $value;

        next if (!defined $value or $value eq '');

        $sth->bind_param(1, $marker);
        $sth->execute() or croak("Cannot execute statement ($statement)");

        $rs_sth->bind_param(1, $marker);
        $rs_sth->execute() or croak("Cannot execute statement ($statement)");

        my @annotation;

        while (my ($primer1, $primer2, $allele_repeat) = $sth->fetchrow_array)
        {
            my $separator = $output_format eq 'html' ? '<BR>' : ';';

            my @anno_value_string;
            push(@anno_value_string, qq[PRIMER1=$primer1]) if $primer1;
            push(@anno_value_string, qq[PRIMER2=$primer2]) if $primer2;
            push(@anno_value_string, qq[ALLELE_REPEAT=$allele_repeat])
              if $allele_repeat;

            my $anno_value_string = join($separator, @anno_value_string);

            if ($primer1 or $primer2 or $allele_repeat) {
                push(
                    @annotation,
                    $output_format eq 'html'
                    ? "<i>$anno_value_string</i>"
                    : "$anno_value_string"
                );
            }
        }

        while (my ($ref_seq) = $rs_sth->fetchrow_array) {
            my $url = $self->_url_templates->{ref_seq};
            $url =~ s/__QUERY__/$marker/;

            if ($ref_seq) {
                push(
                    @annotation,
                    $output_format eq 'html'
                    ? qq[<i>CONTEXT_SEQUENCE=<a href="$url">[Available]</a></i>]
                    : "CONTEXT_SEQUENCE=$ref_seq"
                );
            }
        }

        my $separator = $output_format eq 'html' ? '<BR>' : ';';
        my $annotation = join($separator, @annotation);

        $data->[$row]->[$column] = $annotation;
    }

    return 1;
}

=head3 translate

 Function  : Translates a record into another based on its type.
 Arguments : See notes
 Returns   : 1
 Notes     :
             Parameters:

             -column: Column number that will be affected
             -type: Type of translation.
                    'code2sampstat'

=cut

sub translate {
    my ($self, %params) = @_;

    my $data          = $params{-data};
    my $output_format = $params{-output_format};

    #    my $dbh = $params{-dbh};

    exists $params{-column}
      or croak("A column number is required to align_decimal");
    my $column = $params{-column};

    my $type = $params{-type} or croak("A type is required to translate");

    if ($type eq 'code2sampstat') {
        my $code2sampstat = Panzea::WebFormUtils->code2sampstat();

        foreach my $row (0 .. $#{@$data}) {
            my $value = $data->[$row]->[$column];
            next if (!defined $value or $value eq '');
            $data->[$row]->[$column] =
              $code2sampstat->{$value} ? $code2sampstat->{$value} : $value;
        }
    }

    else { croak("Unrecognized translate type ($type)!"); }

    return 1;
}

=head3 format_all_fpc_positions

 Function  : Formats bar '|' separated all_fpc_positions column.
 Arguments : See notes
 Returns   : 1
 Notes     :
             Parameters:

             -column: Column number that will be affected

=cut

sub format_all_fpc_positions {
    my ($self, %params) = @_;

    my $data          = $params{-data};
    my $output_format = $params{-output_format};

    exists $params{-column}
      or croak("A column number is required to format_all_fpc_positions");
    my $column = $params{-column};

    foreach my $row (0 .. $#{@$data}) {
        my $value = $data->[$row]->[$column];

        next if (!defined $value or $value eq '');

        my @values = split(/,/, $value);

        my @all_fpc_positions;
        foreach (@values) {
            my ($fpc_ctg, $fpc_chr, $fpc_start, $fpc_end) = split(/\|/, $_);
            push(@all_fpc_positions, $fpc_ctg);
        }

        my @fpc_position_links;
        foreach my $fpc_position (@all_fpc_positions) {
            my $url = $self->_url_templates->{'fpcctg'};
            $url =~ s/__QUERY__/$fpc_position/;
            push(
                @fpc_position_links,
                qq[<a target="_blank" href="$url">$fpc_position</a>]
            );
        }

        my $all_fpc_positions =
          $output_format eq 'html'
          ? join(qq[, ], @fpc_position_links)
          : join(qq[;],  @all_fpc_positions);
        unless (@all_fpc_positions) {
            $all_fpc_positions = $output_format eq 'html' ? '&nbsp;' : '';
        }

        $data->[$row]->[$column] = $all_fpc_positions;
    }

    return 1;
}

=head3 format_all_ibm2_2005_positions

 Function  : Formats bar ',' separated all_ibm2_2005_positions column.
 Arguments : See notes
 Returns   : 1
 Notes     :
             Parameters:

             -column: Column number that will be affected

=cut

sub format_all_ibm2_2005_positions {
    my ($self, %params) = @_;

    my $data          = $params{-data};
    my $output_format = $params{-output_format};

    exists $params{-column}
      or
      croak("A column number is required to format_all_ibm2_2005_positions");
    my $column = $params{-column};

    foreach my $row (0 .. $#{@$data}) {
        my $value = $data->[$row]->[$column];

        next if (!defined $value or $value eq '');

        my @all_ibm2_2005_positions = split(',', $value);

        my @ibm2_2005_position_texts;
        my @ibm2_2005_position_links;

        foreach my $ibm2_2005_position (@all_ibm2_2005_positions) {
            my ($ibm2_2005_chr, $ibm2_2005_position) =
              split(/\|/, $ibm2_2005_position);
            my $modified_ibm2_2005_chr =
              $ibm2_2005_chr <= 8
              ? 978376 + $ibm2_2005_chr
              : 978377 + $ibm2_2005_chr;

            # very specific maizegdb url; adapted initially from http://www.maizegdb.org/map.php (Ctrl+F for "IBM2 2004 Neighbors"),
            # later from communication from Wei Zhao (for IBM 2005)

            my $url = $self->_url_templates->{'maizegdb_ibm2n'};
            $url =~ s/__QUERY__/$modified_ibm2_2005_chr/;
            push(
                @ibm2_2005_position_links,
                qq[<a target="_blank" href="$url">$ibm2_2005_chr</a>($ibm2_2005_position)]
            );
            push(
                @ibm2_2005_position_texts,
                qq[$ibm2_2005_chr($ibm2_2005_position)]
            );
        }

        my $all_positions =
          $output_format eq 'html'
          ? join(qq[, ], @ibm2_2005_position_links)
          : join(qq[;],  @ibm2_2005_position_texts);
        unless (@all_ibm2_2005_positions) {
            $all_positions = $output_format eq 'html' ? '&nbsp;' : '';
        }

        $data->[$row]->[$column] = $all_positions;
    }

    return 1;
}

=head3 get_genotype

 Function  : Retrieves genotype based on div_allele_assay_id and div_obs_unit_id.
 Arguments : See notes
 Returns   : 1
 Notes     :
             Parameters:

             -column: Column number that will be affected

             Notes:

             The column for which genotype is retrieved
             is provided as div_allele_assay_id:div_obs_unit_id.
             In MySQL, this is:
             CONCAT(da.div_allele_assay_id, ':', dou.div_obs_unit_id)

=cut

sub get_genotype {
    my ($self, %params) = @_;

    my $data          = $params{-data};
    my $output_format = $params{-output_format};
    my $dbh           = $params{-dbh};

    exists $params{-column}
      or croak("A column number is required to align_decimal");
    my $column = $params{-column};

    my $statement = qq[SELECT da.value
                       FROM div_obs_unit dou
                       RIGHT JOIN div_obs_unit_sample dous USING (div_obs_unit_id)
                       RIGHT JOIN div_allele da USING (div_obs_unit_sample_id)
                       WHERE da.div_allele_assay_id =? AND dou.div_obs_unit_id = ?
				       ORDER BY da.value
                       ];

    my $sth = $dbh->prepare($statement)
      or croak("Cannot prepare statement ($statement)");

    foreach my $row (0 .. $#{@$data}) {
        my $value = $data->[$row]->[$column];

        my ($div_allele_assay_id, $div_obs_unit_id) =
          $value =~ /^([^:]+):(.*)$/;

        next if (!defined $div_allele_assay_id or !defined $div_obs_unit_id);

        $sth->bind_param(1, $div_allele_assay_id);
        $sth->bind_param(2, $div_obs_unit_id);

        $sth->execute() or croak("Cannot execute statement ($statement)");

        my @genotype;
        while (my ($genotype) = $sth->fetchrow_array) {
            push @genotype, $genotype;
        }

        $data->[$row]->[$column] = join(', ', @genotype);
    }

    return 1;
}

=head3 remove_columns

 Function  : Removes.
 Arguments : See notes
 Returns   : 1
 Notes     :
             Parameters:

             -column: Column number that will be affected.
                      Can be a scalar containing a number or an
                      arrayref containing multiple numbers.
            -affected_format: output_format that will be affected.

=cut

sub remove_columns {
    my ($self, %params) = @_;

    my $page_obj =
      $params{-page_obj}; # temporary hack to be able to modify SearchPage obj
    my $data          = $params{-data};
    my $output_format = $params{-output_format};

    exists $params{-affected_format}
      or croak("An affected_format is required to remove columns");
    my $affected_format = $params{-affected_format};

    # This modification applies only to a single output format
    return 1 unless $output_format eq $affected_format;

    exists $params{-column}
      or croak("A column number(s) is required to remove columns");
    my @columns =
      ref $params{-column} ? @{$params{-column}} : ($params{-column});
    my %columns_to_remove = map { $_ => 1 } @columns;

    # Identify columns to keep
    my %columns_to_keep;
    my $row =
      $data->[0];    # Identify data row size, if no rows try it with headers
    unless ($row) { $row = $page_obj->base_output_headers; }
    foreach my $i (0 .. $#{@$row}) {
        $columns_to_keep{$i} = 1 unless $columns_to_remove{$i};
    }
    my @columns_to_keep = sort { $a <=> $b } keys %columns_to_keep;

    # Now format the rows
    foreach my $row (0 .. $#{@$data}) {
        my @formatted = @{$data->[$row]}[@columns_to_keep];

        $data->[$row] = \@formatted;
    }

    # Remove corresponding headers as well
    my @base_output_headers_formatted =
      @{$page_obj->base_output_headers}[@columns_to_keep];
    $page_obj->base_output_headers(\@base_output_headers_formatted);
    $page_obj->_calculate_super_headers();

    return 1;
}

=head3 get_phenotype_values

 Function  : gets and formats phenotype values for a given div_obs_unit_id.
 Arguments : See notes
 Returns   : 1
 Notes     :
             Parameters:

             -column: Column number that will be affected

=cut

sub get_phenotype_values {
    my ($self, %params) = @_;

    my $data          = $params{-data};
    my $output_format = $params{-output_format};
    my $dbh           = $params{-dbh};

    exists $params{-column}
      or croak("A column number is required to get_assay_annotations");
    my $column = $params{-column};

    my $statement = qq[SELECT dst.stat_type, dt.value
                       FROM div_trait dt LEFT JOIN div_statistic_type dst USING(div_statistic_type_id)
                       WHERE dt.div_obs_unit_id = ? AND dt.div_trait_uom_id = ?
                       ORDER BY dst.stat_type];

    my $sth = $dbh->prepare($statement)
      or croak("Cannot prepare statement ($statement)");

    foreach my $row (0 .. $#{@$data}) {
        my $value = $data->[$row]->[$column];

        next if (!defined $value or $value eq '');

        my ($div_obs_unit_id, $div_trait_uom_id) = $value =~ /^([^:]+):(.*)$/;

        $sth->bind_param(1, $div_obs_unit_id);
        $sth->bind_param(2, $div_trait_uom_id);
        $sth->execute() or croak("Cannot execute statement ($statement)");

        my @phenotype_values;

        while (my ($stat_type, $phenotype_value) = $sth->fetchrow_array) {
            $stat_type = 'UNKNOWN' unless $stat_type;
            $stat_type = 'std_dev'
              if $stat_type eq 'standard deviation';    # Temporary formatting
            push(@phenotype_values, $stat_type . '=' . $phenotype_value);
        }

        my $phenotype_values;

        if ($output_format eq 'html') {
            if (@phenotype_values) {
                $phenotype_values =
                  qq[<i>] . join(qq[<BR>], @phenotype_values) . qq[</i>];
            }
            else {
                $phenotype_values = "&nbsp;";
            }
        }

        else {
            $phenotype_values = join(qq[;], @phenotype_values);
        }

        $data->[$row]->[$column] = $phenotype_values;
    }

    return 1;
}

=head3 format_sequence

 Function  : Formats a sequence.
 Arguments : See notes
 Returns   : 1
 Notes     :
             Parameters:

             -column: Column number that will be affected
             -type: Type of link to be generated.
                    'display_ref_seq'

=cut

sub format_sequence {
    my ($self, %params) = @_;

    my $data          = $params{-data};
    my $output_format = $params{-output_format};
    my $dbh           = $params{-dbh};

    # This modification applies only to html output
    return 1 unless $output_format eq 'html';

    exists $params{-column}
      or croak("A column number is required to format_sequence");
    my $column = $params{-column};

    my $type = $params{-type}
      or croak("A type is required to format_sequence");

    if ($type eq 'display_ref_seq') {

        foreach my $row (0 .. $#{@$data}) {
            my $value = $data->[$row]->[$column];
            next if (!defined $value or $value eq '');

            $value =~ s/(.{60})/$1<br>/g;

            $value =~ s/(\{)/<b><font color="blue">$1/g;
            $value =~ s/(\})/$1<\/b><\/font>/g;

            $value =~ s/(\[)/<b><font color="red">$1/g;
            $value =~ s/(\])/$1<\/b><\/font>/g;

            $value = qq[<pre>$value</pre>];

            $data->[$row]->[$column] = $value;
        }
    }

    else { croak("Unrecognized format_sequence type ($type)!"); }

    return 1;
}

=head1 AUTHOR

Payan Canaran <canaran@cshl.edu>

=head1 BUGS

=head1 VERSION

Version 0.02

=head1 ACKNOWLEDGEMENTS

=head1 COPYRIGHT & LICENSE

Copyright (c) 2005-2007 Cold Spring Harbor Laboratory

This program is free software; you can redistribute it and/or modify it
under the same terms as Perl itself. See DISCLAIMER.txt for
disclaimers of warranty.

=cut

1;
