#!/usr/local/bin/perl

# vim: tw=78: sw=4: ts=4: et: 

# $Id: export-ontology-assoc.pl,v 1.1 2006/11/01 20:23:49 kclark Exp $

use strict;
use warnings;
use DateTime;
use English qw( -no_match_vars );
use File::Basename;
use File::Path;
use File::Spec::Functions;
use Getopt::Long;
use Gramene::CDBI::Markers;
use Gramene::Marker::DB;
use IO::Prompt;
use Pod::Usage;
use Readonly;

Readonly my $COMMA      => q{,};
Readonly my $NEWLINE    => qq{\n};
Readonly my $PIPE       => q{|};
Readonly my $TAB        => qq{\t};
Readonly my @OUT_FIELDS => (
    'DB',
    'DB_Object_ID',
    'DB_Object_Symbol',
    'Qualifier',
    'GOid',
    'DB:Reference',
    'Evidence',
    'With(or)From',
    'Aspect',
    'DB_Object_Name',
    'DB_Object_Synonym',
    'DB_Object_Type',
    'Taxon',
    'Date',
    'Assigned_by',
);
Readonly my $VERSION => sprintf '%d.%02d', 
                        qq$Revision: 1.1 $ =~ /(\d+)\.(\d+)/;

my $out_dir = '';
my ( $help, $man_page, $show_version );
GetOptions(
    'd|dir=s' => \$out_dir,
    'help'    => \$help,
    'man'     => \$man_page,
    'version' => \$show_version,
) or pod2usage(2);

if ( $help || $man_page ) {
    pod2usage({
        -exitval => 0,
        -verbose => $man_page ? 2 : 1
    });
}; 

if ( $show_version ) {
    my $prog = basename( $PROGRAM_NAME );
    print "$prog v$VERSION\n";
    exit 0;
}

if ( !$out_dir ) {
    pod2usage('No out directory specified');
}

if ( !-d $out_dir ) {
    my $make_dir = prompt -yn,
        "The directory '$out_dir' does not exist.  OK to create? ";
    
    if ( $make_dir ) {
        mkpath( $out_dir );
    }
    else {
        die "Not OK, exiting.\n";
    }
}

my $mdb   = Gramene::Marker::DB->new;
my $dt    = DateTime->now;
(my $date = $dt->ymd) =~ s/-//g;

my $num_records = 0;
my @out_files;

# Libraries
my @xref_types = Gramene::CDBI::Markers::XrefType->retrieve_all;
for my $xref_type ( @xref_types ) {
    if ( $xref_type->xref_type =~ /\bontology$/i ) {
        my $ontology = $xref_type->xref_type;
        my @xrefs    = Gramene::CDBI::Markers::Xref->search(
            table_name   => 'library',
            xref_type_id => $xref_type->id,
        );

        my $ont  =  lc $ontology;
        $ont     =~ s/\s+/_/g;
        my $file =  catfile $out_dir, "marker-library-${ont}-assoc.tab";
        open my $fh, '>', $file or die "Can't write $file: $!\n";

        print STDERR "Processing $ontology terms to $file\n";
        print $fh join( $TAB, @OUT_FIELDS ), $NEWLINE;

        for my $xref ( @xrefs ) {
            my $ontology_id = $xref->xref_value;
            my $library     = Gramene::CDBI::Markers::Library->retrieve(
                $xref->record_id
            );
            my $species_tax 
                = $library->species->gramene_taxonomy_id || 'GR_tax:0';

            my %rec = map { $_, '' } @OUT_FIELDS;

            $rec{'DB'}                = 'GR_markers';
            $rec{'DB_Object_Type'}    = 'Markers library';
            $rec{'Evidence'}          = 'SM';
            $rec{'Aspect'}            = 'Tx';
            $rec{'Assigned_by'}       = 'GR';
            $rec{'DB_Object_ID'}      = $library->id;
            $rec{'DB_Object_Symbol'}  = $library->library_name;
            $rec{'DB_Object_Name'}    = $library->library_name;
            $rec{'DB_Object_Synonym'} = '';
            $rec{'GOid'}              = $ontology_id;
            $rec{'Taxon'}             = $species_tax;
            $rec{'DB:Reference'}      = '';
            $rec{'Date'}              = $date;

            print $fh join( $TAB, map { $rec{ $_ } } @OUT_FIELDS ), 
                $NEWLINE;
            $num_records++;
        }
        push @out_files, $file;
    }
}

# Taxonomy
my $species_file = catfile $out_dir, 'marker-species-assoc.tab';
open my $species_fh, '>', $species_file 
    or die "Can't write $species_file: $!\n";
print $species_fh join( $TAB, @OUT_FIELDS ), $NEWLINE;

print STDERR "Caching xrefs\n";
my @xrefs = 
    grep { $_->xref_type->xref_type eq 'Gramene Literature' }
    Gramene::CDBI::Markers::Xref->search( table_name => 'marker' );

my %xrefs;
for my $xref ( @xrefs ) {
    push @{ $xrefs{ $xref->record_id } }, $xref->xref_value;
}

my $db2 = Gramene::DB->new('markers');
$db2->{'mysql_use_result'} = 1;

print STDERR "Processing taxonomy terms to $species_file\n";
my @species = Gramene::CDBI::Markers::Species->retrieve_all;
for my $species ( @species ) {
    my $ontology_id = $species->gramene_taxonomy_id or next;

    print STDERR "++ doing species '", $species->species, "'\n";

    my %rec                   = map { $_, '' } @OUT_FIELDS;
    $rec{'DB'}                = 'GR_markers';
    $rec{'DB_Object_Type'}    = 'Marker Species';
    $rec{'Evidence'}          = 'SM';
    $rec{'Aspect'}            = 'Tx';
    $rec{'Assigned_by'}       = 'GR';
    $rec{'DB_Object_ID'}      = $species->id;
    $rec{'With(or)From'}      = '';
    $rec{'DB_Object_Symbol'}  = '';
    $rec{'Qualifier'}         = '';
    $rec{'DB_Object_Name'}    = $species->species;
    $rec{'DB_Object_Synonym'} = $species->common_name || '';
    $rec{'GOid'}              = $ontology_id;
    $rec{'Taxon'}             = $ontology_id;
    $rec{'DB:Reference'}      = '';
    $rec{'Date'}              = $date;

    print $species_fh join( $TAB, map { $rec{ $_ } } @OUT_FIELDS ), 
        $NEWLINE;
    $num_records++;

#    my $marker_sth = $db2->prepare(
#        q[
#            select m.marker_id, s.marker_name
#            from   marker m, marker_synonym s
#            where  m.source_species_id=?
#            and    m.display_synonym_id=s.marker_synonym_id
#        ]
#    );
#    $marker_sth->execute( $species->id );
#
#    while ( my $marker = $marker_sth->fetchrow_hashref ) {
#        my @synonyms = map { $_->{'marker_name'} }
#            $mdb->get_marker_synonyms( marker_id => $marker->{'marker_id'} );
#        my $db_refs  = join $PIPE, map { "GR_REF:${_}" }
#            @{ $xrefs{ $marker->{'marker_id'} } || [] };
#
#        my %rec                   = map { $_, '' } @OUT_FIELDS;
#        $rec{'DB'}                = 'GR_markers';
#        $rec{'DB_Object_Type'}    = 'Markers';
#        $rec{'Evidence'}          = 'SM';
#        $rec{'Aspect'}            = 'Tx';
#        $rec{'Assigned_by'}       = 'GR';
#        $rec{'DB_Object_ID'}      = $marker->{'marker_id'};
#        $rec{'DB_Object_Symbol'}  = $marker->{'marker_name'};
#        $rec{'DB_Object_Name'}    = $marker->{'marker_name'};
#        $rec{'DB_Object_Synonym'} = join($PIPE, @synonyms);
#        $rec{'GOid'}              = $ontology_id;
#        $rec{'Taxon'}             = $ontology_id;
#        $rec{'DB:Reference'}      = $db_refs;
#        $rec{'Date'}              = $date;
#
#        print $tax_fh join( $TAB, map { $rec{ $_ } } @OUT_FIELDS ), 
#            $NEWLINE;
#        $num_records++;
#    }
}
close $species_fh;
push @out_files, $species_file;

print STDERR join $NEWLINE,
    "Done, processed $num_records into " . scalar @out_files . ' files:',
    @out_files, '';

__END__

# ----------------------------------------------------

=pod

=head1 NAME

export-ontology-assoc.pl - export marker ontology associations

=head1 VERSION

This documentation refers to export-ontology-assoc.pl version $Revision: 1.1 $

=head1 SYNOPSIS

  export-ontology-assoc.pl 

Required Arguments:

  -d|--dir      The directory where to write the output files

Options:

  --help        Show brief help and exit
  --man         Show full documentation
  --version     Show version and exit

=head1 DESCRIPTION

This script exports the markers and marker libraries associated with
ontology terms.

=head1 SEE ALSO

Gramene::Marker::DB.

=head1 AUTHOR

Ken Youens-Clark E<lt>kclark@cshl.eduE<gt>.

=head1 COPYRIGHT

Copyright (c) 2006 Cold Spring Harbor Laboratory

This library is free software;  you can redistribute it and/or modify 
it under the same terms as Perl itself.

=cut
