#!/usr/local/bin/perl

# vim: tw=78: sw=4: ts=4: et: 

# $Id: export-genes-to-mdb.pl,v 1.6 2007/06/05 20:20:04 kclark Exp $

use strict;
use warnings;
use English qw( -no_match_vars );
use File::Basename;
use File::Path qw( mkpath );
use File::Spec::Functions;
use Getopt::Long;
use Gramene::Marker::DB;
use Gramene::CDBI::Genes;
use Gramene::CDBI::Markers;
use List::MoreUtils qw( uniq );
use IO::Prompt;
use Pod::Usage;
use Readonly;

Readonly my $COMMA           => q{,};
Readonly my $EMPTY_STR       => q{};
Readonly my $GENE            => 'Gene';
Readonly my $MAX_NAME_LENGTH => 50;
Readonly my $NL              => qq{\n};
Readonly my $SPACE           => q{ };
Readonly my $TAB             => qq{\t};
Readonly my $MARKER_IMPORT_SCRIPT 
    => '/usr/local/gramene/scripts/markers/load-markers.pl -v -p -a GENES_DB';
Readonly my $DELETE_MARKER_SCRIPT 
    => '/usr/local/gramene/scripts/markers/delete-marker.pl';
Readonly my $VERSION 
    => sprintf '%d.%02d', qq$Revision: 1.6 $ =~ /(\d+)\.(\d+)/;

my $out_file = $EMPTY_STR;
my ( $help, $man_page, $show_version );
GetOptions(
    'o|out=s' => \$out_file,
    'help'    => \$help,
    'man'     => \$man_page,
    'version' => \$show_version,
) or pod2usage(2);

if ( $help || $man_page ) {
    pod2usage({
        -exitval => 0,
        -verbose => $man_page ? 2 : 1
    });
}; 

if ( $show_version ) {
    my $prog = basename( $PROGRAM_NAME );
    print "$prog v$VERSION\n";
    exit 0;
}

if ( !$out_file ) {
    pod2usage('No output file specified');
}
   
open my $fh, '>', $out_file or die "Can't write $out_file: $!\n";

my $mdb = Gramene::Marker::DB->new;

my %species_id = map { $_->species, $_->id } 
    Gramene::CDBI::Markers::Species->retrieve_all;

my $genes_iter = Gramene::CDBI::Genes::GeneGene->retrieve_all;

print $fh join($TAB, qw[
    marker_name
    marker_synonyms
    marker_species
    marker_type
    chromosome
    description
]), $NL;

my $num_genes = 0;
my @obsolete;
GENE:
while ( my $gene = $genes_iter->next ) {
    if ( $gene->is_obsolete ) {
        my $species = join( $SPACE, 
            $gene->species->genus, $gene->species->species 
        );

        my @markers = $mdb->marker_search( 
            marker_name => $gene->accession,
            marker_type => 'Gene',
            species     => $species,
        );

        if ( @markers ) {
            push @obsolete, {
                marker_name => $gene->accession,
                marker_type => $GENE,
                species     => $species,
            };
        }

        next GENE;
    }

    my %synonyms;
    for my $name ( 
        $gene->name, 
        map { $_->synonym_name } $gene->gene_gene_synonyms 
    ) {
        next if !$name;
        next if $name =~ /'/;
        next if length($name) > $MAX_NAME_LENGTH;
        if ( $name =~ /$COMMA/ ) {
             $name = qq["$name"];
        }

        $synonyms{ $name } = 1;
    }

    print $fh join( $TAB,
        map { s/'/\\'/g; s/\r?\n/ /g; $_ }
        sprintf('%s [[synonym_type=GRAMENE_GENE]]', $gene->accession),
        join( $COMMA, 
            sprintf('%s [[synonym_type=GENE_SYMBOL]]', $gene->symbol),
            uniq( keys %synonyms ) 
        ),
        join( $SPACE, $gene->species->genus, $gene->species->species ),
        $GENE,
        $gene->chromosome,
        $gene->description,
    ), $NL;

    $num_genes++;
}


print join($NL, 
    "Finished exporting $num_genes genes.",
    'Now do this:',
    "$MARKER_IMPORT_SCRIPT $out_file",
    $EMPTY_STR
);

for my $o ( @obsolete ) {
    print "$DELETE_MARKER_SCRIPT -n '$o->{'marker_name'}' ",
          "-t '$o->{'marker_type'}' -s '$o->{'species'}'\n";
}

__END__

# ----------------------------------------------------

=pod

=head1 NAME

export-genes-to-mdb.pl - export gene mapping data to tab file for mappings db 

=head1 VERSION

This documentation refers to version $Revision: 1.6 $

=head1 SYNOPSIS

  export-to-mdb.pl -o OUT_FILE

Then note the errors and follow the directions.

Options:

  -o|--out=FILE  File to write data
  --help         Show brief help and exit
  --man          Show full documentation
  --version      Show version and exit

=head1 DESCRIPTION

This script exports the gene db's map position data to tab-delimited files.
Each file is named for the CMap map set accession.  A shell script is created
that should be run to import the data when this script has finished.

=head1 SEE ALSO

Gramene::CDBI::Genes, Gramene::CDBI::Markers.

=head1 AUTHOR

Ken Youens-Clark E<lt>kclark@cshl.eduE<gt>.

=head1 COPYRIGHT

Copyright (c) 2006 Cold Spring Harbor Laboratory

This library is free software;  you can redistribute it and/or modify 
it under the same terms as Perl itself.

=cut
