#!/usr/local/bin/perl

use strict;
use warnings;
use Bio::GMOD::CMap;
use Data::Dumper;
use English qw( -no_match_vars );
use File::Basename;
use File::Path qw( mkpath );
use File::Spec::Functions;
use Getopt::Long;
use Gramene::CDBI::Genes;
use Gramene::CDBI::Markers;
use Gramene::DB;
use Gramene::Marker::DB;
use Gramene::QTL::DB;
use IO::Prompt;
use Pod::Usage;
use Readonly;

Readonly my $VERSION 
    => sprintf '%d.%02d', qq$Revision: 1.9 $ =~ /(\d+)\.(\d+)/;
Readonly my $QTL       => 'QTL';
Readonly my $COMMA     => q{,};
Readonly my $EMPTY_STR => q{};
Readonly my $NL        => qq{\n};
Readonly my $SPACE     => q{ };
Readonly my $MARKER_IMPORT_SCRIPT 
    => q[/usr/local/gramene/scripts/markers/load-mappings.pl ]
    .  q[-p --no-prompt --analysis-name='QTL_DB'];
Readonly my @OUT_FIELDS => qw(
    marker_name
    marker_synonyms
    marker_species
    marker_type
    library_name
    feature_acc
    feature_name
    map_acc
    map_name
    marker_start
    marker_stop
    description
    chromosome
);

my $out_dir  = $EMPTY_STR;
my $qtl_accs = $EMPTY_STR;
my ( $help, $man_page, $show_version );
GetOptions(
    'o|out=s'        => \$out_dir,
    'a|accessions:s' => \$qtl_accs,
    'help'           => \$help,
    'man'            => \$man_page,
    'version'        => \$show_version,
) or pod2usage(2);

if ( $help || $man_page ) {
    pod2usage({
        -exitval => 0,
        -verbose => $man_page ? 2 : 1
    });
}; 

if ( $show_version ) {
    my $prog = basename( $PROGRAM_NAME );
    print "$prog v$VERSION\n";
    exit 0;
}

if ( !$out_dir ) {
    pod2usage('No out directory specified');
}
elsif ( !File::Spec->file_name_is_absolute( $out_dir ) ) {
    $out_dir = File::Spec->rel2abs( $out_dir );
}

if ( !-d $out_dir ) {
    my $make_dir = prompt -yn,
        "The directory '$out_dir' does not exist.  OK to create? ";

    if ( $make_dir ) {
        mkpath( $out_dir );
    }
    else {
        die "Not OK, exiting.\n";
    }
}

my %restrict_export = map { $_, 1 } split /,\s*/, $qtl_accs;

my $qdb = Gramene::QTL::DB->new;
my $db  = $qdb->db;
my $sth = $db->prepare(
    q[
        select q.qtl_id,
               q.qtl_accession_id,
               t.trait_symbol, 
               q.published_symbol,
               t.to_accession as to_term,
               s.species,
               q.cmap_map_accession,
               q.linkage_group,
               q.start_position,
               q.stop_position,
               q.comments,
               q.chromosome
        from   qtl q, 
               qtl_trait t,
               species s
        where  q.qtl_trait_id=t.qtl_trait_id
        and    q.species_id=s.species_id
    ]
);
$sth->execute;

my %species_id = map { $_->species, 1 } 
    Gramene::CDBI::Markers::Species->retrieve_all;

my ( %data, %map_set_acc, @errors );
QTL:
while ( my $qtl = $sth->fetchrow_hashref ) {
    my $species    = $qtl->{'species'};
    my $species_id = $species_id{ $species };

    if ( %restrict_export ) {
        next if !defined $restrict_export{ $qtl->{'qtl_accession_id'} };
    }

    if ( !$species_id ) {
        push @errors, "Can't find species '$species' in mappings db "
            . "for $qtl->{'qtl_accession_id'}.";
        next QTL;
    }

    my $map_acc = $qtl->{'cmap_map_accession'};
    if ( !$map_set_acc{ $map_acc } ) {
        my ($map) = Gramene::CDBI::Markers::Map->search(
            cmap_map_accession => $map_acc,
        );

        if ( !$map ) {
            push @errors, 
                "Can't find map '$map_acc' in mappings db "
                . "for QTL '$qtl->{qtl_accession_id}'";
            next QTL;
        }

        $map_set_acc{ $map_acc } = $map->map_set->cmap_map_set_accession;
    }

    my $map_set_acc = $map_set_acc{ $map_acc } 
        or die "No map set for map '$map_acc'\n";;

    my @synonyms;
    for my $fld ( qw[ to_term trait_symbol published_symbol ] ) {
        my $val = $qtl->{ $fld } or next;
        push @synonyms, sprintf('%s [[synonym_type=%s]]', $val, uc $fld);
    }

    my $lit_id  = $qdb->get_qtl_lit_id( qtl_id => $qtl->{'qtl_id'} );
    my $library = $lit_id ? "Gramene Literature $lit_id" : $EMPTY_STR;

    push @{ $data{ $map_set_acc } }, {
        marker_name      
            => sprintf('%s [[synonym_type=QTL_ACCESSION]]', 
               $qtl->{'qtl_accession_id'}),
        marker_synonyms  => join( $COMMA, @synonyms ),
        marker_species   => $species,
        marker_type      => $QTL,
        library_name     => $library,
        feature_acc      => $qtl->{'qtl_accession_id'},
        feature_name     => $qtl->{'trait_symbol'},
        map_acc          => $map_acc,
        map_name         => $qtl->{'linkage_group'},
        marker_start     => $qtl->{'start_position'},
        marker_stop      => $qtl->{'stop_position'},
        description      => $qtl->{'comments'},
        chromosome       => $qtl->{'chromosome'},
    }
}

if ( @errors ) {
    print join($NL, 
        'Found ' . scalar @errors . ' errors:',
        @errors,
        '',
    );
}
else {
    print "Found no errors\n";
}

if ( %data ) {
    my $shell_file = catfile( $out_dir, 'marker-import.sh' );
    open my $shell_fh, '>', $shell_file 
        or die "Can't write '$shell_file': $!\n";

    my $i;
    my ( $num_mappings, $num_map_sets );
    for my $map_set_acc ( keys %data ) {
        $i++;
        my $file = catfile( $out_dir, "${map_set_acc}.tab" );

        open my $fh, '>', $file or die "Can't write $file: $!\n";

        print $fh join("\t", @OUT_FIELDS), "\n";

        for my $qtl ( @{ $data{ $map_set_acc } } ) {
            $num_mappings++;
            no warnings 'uninitialized';
            print $fh join("\t", 
                map { s/'/\\\'/g; $_ }
                ( map { $qtl->{ $_ } } @OUT_FIELDS )
            ), "\n";
        }
        $num_map_sets++;

        my $redir = $i == 1 ? '>' : '>>';

        print $shell_fh 
            "$MARKER_IMPORT_SCRIPT -m $map_set_acc $file 2${redir}err\n";
    }

    print $shell_fh join( $NL, 
        'if [ -s err ]; then', 
        '  echo "There were errors:";', 
        '  cat err;', 
        'fi' 
    );

    close $shell_fh;
    print join($NL, 
        "Finished exporting $num_mappings mappings for $num_map_sets map sets.",
        'Now do this:',
        "sh $shell_file",
        $EMPTY_STR
);

}
else {
    print "No data generated.\n";
}

__END__

# ----------------------------------------------------

=pod

=head1 NAME

export-qtl-to-mdb.pl - export QTL mapping data to tab file for mappings db 

=head1 VERSION

This documentation refers to export-qtl-to-mdb.pl version $Revision: 1.9 $

=head1 SYNOPSIS

  export-qtl-to-mdb.pl [options] -o OUT_DIR

Then note the errors and follow the directions.

Options:

  -o|--out=DIR        Diretory to write output files
  -a|--accession=...  Comma-separated list of QTL accessions
  --help              Show brief help and exit
  --man               Show full documentation
  --version           Show version and exit

=head1 DESCRIPTION

This script exports the QTL db's map position data to tab-delimited files.
Each file is named for the CMap map set accession.  A shell script is created
that should be run to import the data when this script has finished.

To export only a defined set of QTLs, list them by their accession, 
separated by commas, using the "-a" flag.

=head1 SEE ALSO

Gramene::CDBI::Markers.

=head1 AUTHOR

Ken Youens-Clark E<lt>kclark@cshl.eduE<gt>.

=head1 COPYRIGHT

Copyright (c) 2006 Cold Spring Harbor Laboratory

This library is free software;  you can redistribute it and/or modify 
it under the same terms as Perl itself.

=cut
