#!/usr/local/bin/perl -w

=head1 NAME

SeqTable

=head1 DESCRIPTION


This is the new version of the SeqTable CGI script, completely re-written by Wei Zhao in Oct. 2003.

This CGI script retrieves genome sequence information by chromosome from the database and lists them in a table. It provides several search forms at the top of the page, and links to an EnsEMBL-based genome browser, GenBank, and CMap. Data displayed in the table can be sorted by clicking on the column headers.

The previous version of this script was originally written by Xiaokang Pan in Nov. 2001, and modified by Steven Schmidt, Mark-Jason Dominus, and Wei Zhao at various times. For the complete coding history, check the CVS log.

Changes:
columns -> [ superctg name, super pos on chr, ] bac name, genbank accession, gb version, [ bac coords on superctg,] bac coords on chr, 
marker, jrgp map, cornell rflp map, irmi ssr map
sort by:
superctg name, bac name, GENBANK ACC, bac cooords on chr
=cut

# ------------------------------------------------------------------------------

#use lib "/usr/local/ensembl-30/ensembl/modules/";
#use lib "/usr/local/ensembl-30/ensembl-external/modules/";
use strict;
use CGI qw/:standard :netscape/;
use DBI qw(:sql_types);
use Apache;
use EnsWeb;
use EnsEMBL::DB::Core;
use Gramene::Page;
use Gramene::Config;
use Gramene::Utils 'pager';
use SiteDefs;

use constant PAGE_SIZE          => 25;
use constant MAX_PAGES          => 10;

use constant ORDER_BY_TO_REAL   => {
    clone_co  => 'least(clone_chr_start,clone_chr_end)',
    clone      => 'bacname',
    len        => 'super_chr_end',
    acc        => 'embl_acc',
    chromosome => 'super_name',
};
use constant DEFAULT_ORDER_BY => { 'clones' => 'clone_co' ,
				   'super-contigs' => 'clone_co' ,
				   'orphans' => 'chromosome' };

use constant TESTING => 1;

my $data_source="data_source=".Gramene::Config->new->get('ensembl')->{cmap_datasource};

#some parameters that should come from a per-species config file 
my %chr_to_component = ( 0  => 'orphans',
			 1  => 'clones',
			 2  => 'clones',
			 3  => 'clones',
			 4  => 'clones',
			 5  => 'clones',
			 6  => 'clones',
			 7  => 'clones',
			 8  => 'clones',
			 9  => 'clones',
			 10 => 'clones',
			 11 => 'clones',
			 12 => 'clones',
                 chloroplas => 'clones',
                 mitochondr => 'clones',
			 # else => 'super-contigs'
		       );
#really should have more cases: is clone table real clones? , are supercontigs real?
# and even if clone table is not real clones, have mapped clones that are genbank


my $chr = param('chr');
($chr) = ( $chr =~ /([+-]*\d+)/g) if $chr;  #only the valid numeric part
$chr=1 unless defined($chr) and $chr ne '';

# The clone_co position is the default sorting option

my $default_order_by= DEFAULT_ORDER_BY->{$chr_to_component{$chr}};
my $sorting = param('sorting') || $default_order_by.':1';  
my $page_no = param('page_no') || 1;


my ( $order_by, $order_by_direction ) = split(/:/, $sorting);
$order_by and ORDER_BY_TO_REAL->{$order_by} or $order_by = $default_order_by;


######################################################

my $page_title = "Rice Genome Browser";
my $q = CGI->new();

  my $r = Apache->request;
  foreach my $key( keys %SiteDefs::PerlSetVar ){
    $r->dir_config->add($key,$SiteDefs::PerlSetVar{$key});
  }
 
my $page = Gramene::Page->new($r);

print $q->header;
print $page->start_html( -title=>$page_title,
		    );
print $page->start_body();


my $databases = &EnsEMBL::DB::Core::get_databases( "core");
print STDERR "$databases: $DBI::errstr\n" unless ref $databases;
print STDERR "st_test connect ",$databases->{'error'},"\n" if($databases->{'error'});
my $dbh =  $databases->{'core'}->db_handle;

my @values;
my %labels;
my $chrnum;
my %valid_chrs;

my $sth0 = $dbh->prepare( "select distinct chromosome from seqtable_helper order by chromosome" )
                 or print STDERR "seqtable failed to prepare select chromosome: $DBI::errstr\n";
$sth0->execute() or print STDERR "execute:",$sth0->errstr,"\n"; ;

$sth0->bind_col( 1, \$chrnum );

while( $sth0->fetch ) {
    push( @values, $chrnum );
    $labels{ $chrnum } = "Chromosome ".$chrnum unless $chrnum == 0;
    $labels{ $chrnum } = "Unassigned" if $chrnum == 0;
    $valid_chrs{ $chrnum } = 1;
}
$sth0->finish();    

########################################################################
# Header 
########################################################################
print <<HEADER;
<table align="CENTER" cellspacing="0" cellpadding="5" border="0" width="98%">
  <tr>
    <th align="CENTER" colspan="2" class="searchtitle">Rice BAC/PAC Sequence Information</th>
  </tr>
  <tr valign="bottom">
    <td align="CENTER" class="searchbody"><br>
    <span class="alert">Select a chromosome</span>
    </td>
    <td align="CENTER" class="searchbody"><br>
    <span class="alert">BAC/PAC Name or Accession&nbsp;&nbsp;</span>[e.g.&nbsp;<a href="/$ENV{'ENSEMBL_SPECIES'}/contigview?clone=AP002540">P0434B04</a>,&nbsp;&nbsp;<a href="/$ENV{'ENSEMBL_SPECIES'}/contigview?clone=AP002540">AP002540</a>]
    </td>
  </tr>
HEADER

print qq{<tr valign="top">
         <td align="CENTER" nowrap class="searchbody">\n};
print start_form(-method=>'POST', 
                 -action=>"/$ENV{'ENSEMBL_SPECIES'}/SeqTable");

print popup_menu(-name=>"chr",
                 -Values=>\@values,
                 -default=>"3",
                 -labels=>\%labels);

print submit('Go');
print end_form();
print qq( </td>\n );

print qq{<td align="CENTER" nowrap class="searchbody">\n};
print start_form(-method=>'GET',
                 -action=>'/perl/unisearch');
print hidden(-name=>'type',
             -value=>'All'); #changed from 'clone' to 'All'
print "<INPUT TYPE=\"TEXT\" NAME=\"q\" SIZE=\"20\">\n";
print submit('Display');         	    
print end_form();
print "</td></tr></table>\n";
#print "</tr>\n";
#print "</table>\n";
#print "</center>\n";

print qq( <center><h2>To view the features of a sequence, please select a BAC/PAC.</h2></center> );
print $q->p( {-class=>'datawarning', -align=>'left'}, 
		$chr ? EnsWeb::species_defs->ASSEMBLY_DISCLAIMER
		     : qq{Clones not used in the assembly which could not be mapped to it.} );

print qq( <br><table align="CENTER" width="80%"><tr><td align="CENTER"> );
print EnsWeb::print_form( '',"all" );
print qq( </td></tr></table><br> );


if( $valid_chrs{ $chr } ) {

    my $sth1 = $dbh->prepare( "select count(distinct bacname) from seqtable_helper where chromosome = ?" ); #??SCS: bacname or embl_acc or ?? here
    $sth1->bind_param( 1, $chr );
    $sth1->execute();

    my $clone_total;
    $sth1->bind_col( 1, \$clone_total );
    $sth1->fetch;
    $sth1->finish;

    print "&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<strong>$labels{$chr}</strong>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;";
    print "( To sort the $clone_total $chr_to_component{ $chr }, click on column headers. )"; 
#    print qq( <p>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;[Note: Data in the "IRGSP Order" column is based on a snapshot of the <a href="http://rgp.dna.affrc.go.jp/IRGSP/status.html">IRGSP</a> data taken on June 17, 2003.]</p> );
} else {

    print $q->p( "There is no information available for chromosome $chr." );

}
    

########################################################################
    


my %header_to_text = (
		      chromosome=> ["Nominal","chromosome"],
		      super_name=> ["SuperContig"],
		      super_chr => ["SuperContig Coordinates"," on chromosome"],
		      clone   => ["BAC/PAC", " (contig viewer)"],
		      clone_co  => ["BAC/PAC Coordinates", " on chromosome"],
		      len       => ["DNA Length", " (FASTA)"],
		      acc       => ["GenBank Accession",".version"],
#		      contig    => ["Contigs Mapped"],
		      super_pos => ["BAC/PAC Coordinates", " on super-contig"],
		      mn        => ["Rice Marker"],
		      mt        => ["Marker Type"],
		      jrgp_rflp => ["JRGP RFLP", "2000"],
		      cor_rflp  => ["Cornell RFLP", "2001"],
		      irmi_ssr  => ["IRMI SSR", "2003"],
		     );

my %order = ( 
#	'phase' => -1,
	      'acc' => 1,
	      'clone' => 1,
	      'super_name' => 1,
#	      'site' => 1,
	      'len' => -1,
	      'clone_co' => 1,
	      'chromosome' => 1,
	    );
$order{ $order_by } = -$order_by_direction;  #if it is currently sorted by this column, reverse


my $use_super= $chr_to_component{ $chr } eq  'super-contigs';
my $orphans= $chr_to_component{ $chr } eq  'orphans';

print qq( <table align="CENTER" width="96%" border="1"><tr> );

my @headers;

if( $use_super   ) {
    @headers = qw( super_name super_chr clone acc super_pos clone_co mn );
} elsif($orphans) {
    @headers = qw( chromosome clone acc len );
} else {
    @headers = qw( clone acc clone_co mn );
}
 
for my $col ( @headers ) {
    if ( $order{$col} ) {
        print "<td align=\"CENTER\" rowspan=\"2\">";
	my $sort = "$col:$order{$col}";
	my @text = @{$header_to_text{$col}};
	print "<a href=\"/$ENV{'ENSEMBL_SPECIES'}/SeqTable?chr=$chr&sorting=$sort\">";
	print "$text[0]<br>$text[1]</td>";
    } else {
        print "<td align=\"CENTER\" rowspan=\"2\">$header_to_text{$col}[0]<br>$header_to_text{$col}[1]</td>";
    }
}

if($orphans) {
    print qq(</tr><tr></tr>);
} else {
    print qq( <td align="center" colspan="3">Marker Positions (cM)</td></tr> );
    print qq( <tr><td align="center">JRGP RFLP<br>2000</td><td align="center">Cornell RFLP<br>2001</td><td align="center">IRMI<br>2003</td></tr> );
}

#print "</tr>"; 

#####################################################################


my $query = "SELECT super_name, super_chr_start, super_chr_end
    ,bacname, embl_acc, embl_version, htg_phase, clone_chr_start
    ,clone_chr_end, clone_super_start, clone_super_end, marker_name, marker_type
    ,jrgp_rflp_cm ,jrgp_rflp_map ,jrgp_rflp_feature 
    ,cornell_rflp_cm ,cornell_rflp_map ,cornell_rflp_feature
    ,irmi_ssr_cm ,irmi_ssr_map ,irmi_ssr_feature 
FROM seqtable_helper WHERE chromosome = ?";


$query .= " order by ". ORDER_BY_TO_REAL->{ $order_by }; #should already be taken care of: ||  ORDER_BY_TO_REAL->{$default_order_by};


if( $order_by_direction && ( $order_by_direction eq "-1" ) ) {
    $query .= " DESC";
}

#not really necessary now since all orderings we use produce
# a unique order
if( $order_by and  $order_by ne $default_order_by ) {
    $query .= ", ".ORDER_BY_TO_REAL->{$default_order_by};
}


my $sth = $dbh->prepare( $query )
          or print STDERR "SeqTable prepare($query) $DBI::errstr\n";

$sth->bind_param( 1, $chr );
$sth->execute() or print STDERR "execute:",$sth->errstr,"\n"; 

my %classes = ( 1  => 'resultsbody',
                0  => '',
	      );

my $previous_bacname;

my $color = 1;

while ( my ( $super_name, $super_chr_start, $super_chr_end, $bacname
	   , $embl_acc, $embl_version, $htg_phase, $clone_chr_start
	   , $clone_chr_end, $super_start, $super_end, $mn, $mt
	   , $jrgp_cm, $jrgp_map,$jrgp_feature
	   , $cor_rflp_cm, $cor_rflp_map, $cor_rflp_feature
	   , $irmi_cm , $irmi_map , $irmi_feature
	   ) = $sth->fetchrow_array ) {

    if($use_super) {
	$super_start = add_comma( $super_start );
	$super_end = add_comma( $super_end );
	$super_chr_start = add_comma( $super_chr_start );
	$super_chr_end = add_comma( $super_chr_end );
    }
    $super_end = add_comma( $super_end ) if $orphans;

    print "<tr>";

    if( $previous_bacname && ( $previous_bacname ne $bacname ) ) {
        $color = 1 - $color;
    }

    my $class = $classes{ $color };
    $previous_bacname = $bacname;
    

#        if ( !$rgp_order ) {
#	    $rgp_order = "NA";
#	}

#	print $q->td( {-align=>'CENTER', -class=>$class}, $rgp_order );  

     if($use_super) {
	  print $q->td( {-align=>'CENTER', -class=>$class}, $super_name)
	      , $q->td( {-align=>'CENTER', -class=>$class}, $super_chr_start." - ". $super_chr_end);
     }
     if($orphans) {
	  print $q->td( {-align=>'CENTER', -class=>$class}, $super_name=~/R([-0-9]+)_/?$1:'&nbsp;' );
     }

    my $clone_link;
    if( $htg_phase == 2 || $htg_phase == 3 ) {
	if($orphans) {
	$clone_link = qq{<a href="/$ENV{'ENSEMBL_SPECIES'}/contigview?clone=$embl_acc">$bacname</a>};
	} else {  #above needs to be fixed because Clone_start_end is in db.
	    ($clone_chr_start,$clone_chr_end)= ($clone_chr_end, $clone_chr_start) if $clone_chr_start > $clone_chr_end;
	    $clone_link = qq( <a href="/$ENV{'ENSEMBL_SPECIES'}/contigview?chr=$chr&vc_start=$clone_chr_start&vc_end=$clone_chr_end&highlight=$embl_acc">$bacname</a> );
	}
    } else {
	$clone_link = $bacname;
    }
    print $q->td( {-align=>'CENTER', -class=>$class}, $clone_link );  

    

    unless( 1==0 ) { #i.e. if these 'clones' are really clones.

	my $genbank_link = qq{ <a href="http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=Nucleotide&cmd=Search&term=$embl_acc&doptcmdl=GenBank">$embl_acc.$embl_version</a> };
	print $q->td( {-align=>'CENTER', -class=>$class}, $genbank_link );
    }

    if( $use_super ) {
	print $q->td( {-align=>'CENTER', -class=>$class}, $super_start, " - ", $super_end );
    }

    unless($orphans) {
	my $temp = add_comma($clone_chr_start)." - ".add_comma($clone_chr_end);
	($clone_chr_start,$clone_chr_end)= ($clone_chr_end, $clone_chr_start) if $clone_chr_start > $clone_chr_end;
	my $contig_view = qq( <a href="/$ENV{'ENSEMBL_SPECIES'}/contigview?chr=$chr&vc_start=$clone_chr_start&vc_end=$clone_chr_end&highlight=$embl_acc">$temp</a> );
	print $q->td( {-align=>'CENTER', -class=>$class}, $contig_view );
    }

    if($orphans) {
        my $ncbi= qq{<a href="http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=Nucleotide&cmd=Search&term=$embl_acc&doptcmdl=FASTA">$super_chr_end</a>\n};
	print $q->td( {-align=>'CENTER', -class=>$class}, $ncbi );
    }

    unless($orphans) {

    my $marker_link = $mn;
    $marker_link = qq( <a href="/db/markers/marker_view?marker_name=$mn">$mn</a> ) if $mn;
    print $q->td( {-align=>'CENTER', -class=>$class}, $marker_link );  #new

    my $jrgp= defined($jrgp_cm) ? $q->a( { href=>"/db/cmap/map_details?ref_map_set_aid=jrgp-rflp-2000;ref_map_aids=$jrgp_map;highlight=$jrgp_feature;$data_source"},$jrgp_cm )
				: "&nbsp;";
    my $cor_rflp= defined($cor_rflp_cm) ? $q->a( { href=>"/db/cmap/map_details?ref_map_set_aid=cu-sl-2001;ref_map_aids=$cor_rflp_map;highlight=$cor_rflp_feature;$data_source"},$cor_rflp_cm )
				: "&nbsp;";
    my $irmi_ssr= defined($irmi_cm) ? $q->a( { href=>"/db/cmap/map_details?ref_map_set_aid=irmi-2003;ref_map_aids=$irmi_map;highlight=$irmi_feature;$data_source"},$irmi_cm )
				: "&nbsp;";

    print $q->td( {-align=>'CENTER', -class=>$class}, $jrgp );  
    print $q->td( {-align=>'CENTER', -class=>$class}, $cor_rflp );
    print $q->td( {-align=>'CENTER', -class=>$class}, $irmi_ssr );

    }

    print "</tr>";

}


print "</table>";

print $page->end_body;


# Subroutines

sub add_comma {

    my $number = reverse $_[0];
    $number =~ s/(\d\d\d)(?=\d)(?!\d*\.)/$1,/g;
    return scalar reverse $number;
}

# ----------------------------------------------------------------

=pod

=head1 SEE ALSO

DBI, Gramene::Page, Apache::Request.

=head1 AUTHOR

Wei Zhao E<lt>zhaow@cshl.eduE<gt>,

=cut
