#!/usr/local/bin/perl

# $Id: export-markers.pl,v 1.11 2007/02/26 20:59:01 kclark Exp $

use strict;
use File::Basename;
use File::Path qw( mkpath );
use File::Spec::Functions;
use Getopt::Long;
use Gramene::Marker::DB;
use Gramene::Utils qw( commify );
use Pod::Usage;
use Readonly;

Readonly my @DEFAULT_FIELDS 
    => qw( marker_id marker_name marker_synonyms marker_type species seq );
Readonly my $COMMA     => q{,};
Readonly my $EMPTY_STR => q{};
Readonly my $NL        => qq{\n};
Readonly my $TAB       => qq{\t};
Readonly my $VERSION   
    => sprintf "%d.%02d", q$Revision: 1.11 $ =~ /(\d+)\.(\d+)/;

my $out_dir     = $EMPTY_STR;
my $marker_name = $EMPTY_STR;
my $marker_type = $EMPTY_STR;
my $species     = $EMPTY_STR;
my $no_details  = 0;

my ( $help, $fields, $show_version );
GetOptions(
    'no-details'  => \$no_details,
    'o|out-dir=s' => \$out_dir,
    'f|fields:s'  => \$fields,
    'n|name:s'    => \$marker_name,
    't|type:s'    => \$marker_type,
    's|species:s' => \$species,
    'h|help'      => \$help,
    'v|version'   => \$show_version,
);
pod2usage(2) if $help;

if ( $show_version ) {
    my $prog = basename( $0 );
    print "$prog v$VERSION\n";
    exit(0);
}

my %valid_field = map { $_, 1 } @DEFAULT_FIELDS;

my @fields;
for my $f ( map { s/^\s+|\s+$//g; $_ } split( /,/, $fields ) ) {
    if ( $valid_field{ $f } ) {
        push @fields, $f;
    }
}

if ( !@fields ) {
    @fields = @DEFAULT_FIELDS;
}

my %print_field = map { $_, 1 } @fields;

if ( !-d $out_dir ) {
    mkpath( $out_dir );
}

my $mdb         = Gramene::Marker::DB->new;
my @markers     = $mdb->marker_search( 
    marker_type => $marker_type,
    species     => $species,
    marker_name => $marker_name,
);

my %fh;
my @out_files;
my $num_markers = 0;
if ( @markers ) {
    for my $m ( @markers ) {
        my $marker_type = $no_details ? 'marker' : $m->{'marker_type'};

        if ( $print_field{'marker_synonyms'} ) {
            $m->{'marker_synonyms'} = join($COMMA,
                map { $_->{'marker_name'} } 
                @{ $mdb->get_marker_synonyms( marker_id => $m->{'marker_id'} ) }
            );
        }

        my @print = @fields;
        unless ( $no_details == 1 ) {
            my $details = $mdb->get_marker_details(
                marker_id => $m->{'marker_id'}
            );

            for my $fld ( @{ $details->{'ordered_field_names'} } ) {
                next if $fld eq 'marker_id';
                $m->{ $fld } = $details->{ $fld };
                push @print, $fld;
            }

            $m->{'seq'} = $mdb->get_marker_sequence(
                marker_id => $m->{'marker_id'}
            );
        }

        if ( lc $marker_type eq 'overgo' ) {
            my %take     = map { $_, 1 } qw( 
                contained_in_sequence related_probe 
            );
            my %sequence = map { $_, 1 } qw( 
                overgo_primer_right overgo_primer_left 
            );

            for my $type ( keys %take, keys %sequence ) {
                $m->{ $type } = $EMPTY_STR;
                push @print, $type;
            }

            my @corr = $mdb->get_marker_correspondences(
                marker_id => $m->{'marker_id'}
            );

            CORRESPONDENCE:
            for my $c ( @corr ) {
                my $dir = $c->{'to_marker_id'} eq $m->{'marker_id'}
                          ? 'from' : 'to';
                my $val = $EMPTY_STR;

                if ( $take{ $c->{'type'} } ) {
                    $val = $c->{ "${dir}_marker_name" };
                }
                elsif ( $sequence{ $c->{'type'} } ) {
                    my ($other_marker_id) = $c->{ "${dir}_marker_id" };

                    $val = $mdb->get_marker_sequence(
                        marker_id => $other_marker_id
                    );
                }
                else {
                    next CORRESPONDENCE;
                }

                $m->{ $c->{'type'} } = $val;
            }
        }

        if ( !defined $fh{ $marker_type } ) {
            ( my $type = lc $marker_type ) =~ s/\s+/_/g;
            my $out_file = catfile( $out_dir, "${type}.tab" );
            open my $fh, '>', $out_file or die "Can't write '$out_file': $!\n";
            print $fh join( $TAB, @print ), $NL;
            $fh{ $marker_type } = $fh;
            push @out_files, $out_file;
        }

        my $fh = $fh{ $marker_type };
        print $fh join( $TAB, map { $m->{ $_ } } @print ), $NL;
        $num_markers++;
    }

    for my $fh ( values %fh ) {
        close $fh;
    }

    print "Done, exported ", commify($num_markers), " to '$out_dir'.\n";
    for my $file ( @out_files ) {
        print "  $file\n";
    }
}
else {
    print "No data returned.\n";
}

__END__

# ----------------------------------------------------

=pod

=head1 NAME

export-markers.pl - export marker data from the marker db

=head1 SYNOPSIS

  export-markers.pl -o OUT_DIR [options]

Required Arguments:

  -o|--out-dir=/tmp  The directory where to write the files

Options:

  --no-details       Don't print any details field
  -f|--fields        A comma-separated list of fields
  -n|--name          Limit markers by name (wildcards OK)
  -t|--type          Limit markers by type
  -s|--species       Limit markers by species
  -h|--help          Show brief help and exit
  -v|--version       Show version and exit

=head1 DESCRIPTION

Exports data from the marker db as tab-delimited files into the specified
output directory.  One file will be created for each marker type exported.

If you specify "no-details" then all the markers will go to a file called 
"marker.tab".

=over 4

=item * marker_id

The marker's primary key

=item * species

The marker's species (e.g., "rice")

=item * marker_type

The marker's type (e.g., "RFLP")

=item * marker_name

The marker's primary name

=item * marker_synonyms

A comma-separated list of the marker's synonyms

=item * seq

The marker's sequence

=back

If no "fields" argument is supplied, then all the above fields 
plus those in the details table will be exported.

=cut

=head1 SEE ALSO

Gramene::Marker::DB.

=head1 AUTHOR

Ken Youens-Clark E<lt>kclark@cshl.orgE<gt>.

=cut
