#!/usr/local/bin/perl

# vim: tw=78: sw=4: ts=4: et: 

# $Id: export-gene-to-gene-model-assoc.pl,v 1.3 2007/06/05 20:20:04 kclark Exp $

use strict;
use warnings;
use English qw( -no_match_vars );
use File::Basename;
use IO::Prompt;
use Getopt::Long;
use Gramene::CDBI::Genes;
use Gramene::Marker::DB;
use Pod::Usage;
use Readonly;

Readonly my $ANALYSIS             => 'GENES_DB';
Readonly my $ANALYTICAL_CORR_TYPE => 'IAGP';
Readonly my $EMPTY_STR            => q{};
Readonly my $NL                   => qq{\n};
Readonly my $TAB                  => qq{\t};
Readonly my $VERSION 
    => sprintf '%d.%02d', qq$Revision: 1.3 $ =~ /(\d+)\.(\d+)/;
Readonly my $MARKER_CORRESPONDENCE_IMPORT_SCRIPT 
    => '/usr/local/gramene/scripts/markers/load-marker-correspondences.pl';

my $out_file = $EMPTY_STR;
my ( $help, $man_page, $show_version );
GetOptions(
    'o|out=s' => \$out_file,
    'help'    => \$help,
    'man'     => \$man_page,
    'version' => \$show_version,
) or pod2usage(2);

if ( $help || $man_page ) {
    pod2usage({
        -exitval => 0,
        -verbose => $man_page ? 2 : 1
    });
}; 

if ( $show_version ) {
    my $prog = basename( $PROGRAM_NAME );
    print "$prog v$VERSION\n";
    exit 0;
}

if ( !$out_file ) {
    pod2usage('No output file specified');
}

if ( -e $out_file ) {
    my $ok = prompt -yn, "OK to overwrite '$out_file'? [yn] ";    
    if ( !$ok ) {
        print "Not OK, exiting.\n";
        exit 0;
    }
}

my $mdb  = Gramene::Marker::DB->new;
my $db   = Gramene::CDBI::Genes->db_Main;
my $data = $db->selectall_arrayref(
    q[
        select g.accession as gene_acc, 
               ox.dbxref_value as gene_prediction_name
        from   gene_gene g, gene_dbxref dx, gene_dbxref_to_object ox 
        where  g.gene_id=ox.record_id 
        and    dx.dbxref_id=ox.dbxref_id 
        and    ox.table_name='gene' 
        and    dx.dbxref_name='Rice Ensembl Gene'
    ],
    { Columns => {} }
);

if ( !@$data ) {
    print "No associations to export\n";
    exit 0;
}

open my $out_fh, '>', $out_file or die "Can't write '$out_file': $!\n";

print $out_fh join( $TAB, qw[ 
    from_marker_id to_marker_id analysis_name analytical_correspondence_type
] ), $NL;

my $num_corr = 0;
for my $rec ( @$data ) {
    my @genes = $mdb->marker_search(
        marker_name              => $rec->{'gene_acc'},
        marker_type              => 'Gene',
        search_only_primary_name => 1,
    ) or next;

    my @gene_predictions = $mdb->marker_search(
        marker_name => $rec->{'gene_prediction_name'},
        marker_type => 'Gene Prediction'
    ) or next;

    if ( scalar @genes > 1 ) {
        die "Too many genes match '$rec->{gene_acc}'\n";
    }

    if ( scalar @gene_predictions > 1 ) {
        die "Too many gene predictions match '$rec->{gene_prediction_name}'\n";
    }

    $num_corr++;

    print $out_fh join( $TAB,
        $gene_predictions[0]->{'marker_id'},
        $genes[0]->{'marker_id'},
        $ANALYSIS,
        $ANALYTICAL_CORR_TYPE

    ), $NL;
}

close $out_fh;

print join($NL,
    "Done, exported $num_corr correspondences to '$out_file'.",
    'Now do this:',
    "$MARKER_CORRESPONDENCE_IMPORT_SCRIPT $out_file",
    $EMPTY_STR,
);

__END__

# ----------------------------------------------------

=pod

=head1 NAME

export-gene-to-gene-prediction-assoc.pl - export associations b/w genes and predictions

=head1 VERSION

This documentation refers to version $Revision: 1.3 $

=head1 SYNOPSIS

  export-gene-to-gene-prediction-assoc.pl -o gene-to-gene-prediction.tab

Then following the instructions.

Options:

  -o|--out=foo  Name of the output file.
  --help        Show brief help and exit
  --man         Show full documentation
  --version     Show version and exit

=head1 DESCRIPTION

This script creates a tab-delimited file showing the gene-to-gene prediction
correspondences suitable for importing into the markers db.

=head1 SEE ALSO

Markers DB.

=head1 AUTHOR

Ken Youens-Clark E<lt>kclark@cshl.eduE<gt>.

=head1 COPYRIGHT

Copyright (c) 2007 Cold Spring Harbor Laboratory

This library is free software;  you can redistribute it and/or modify 
it under the same terms as Perl itself.

=cut
