#!/usr/local/bin/perl

# vim: tw=78: sw=4: ts=4: et: 

# $Id: infer-qtl-genome-mapping.pl,v 1.3 2007/05/29 14:00:30 kclark Exp $

use strict;
use warnings;
use Data::Dumper;
use English qw( -no_match_vars );
use File::Basename;
use Getopt::Long;
use Gramene::CDBI::Markers;
use Gramene::CDBI::Qtl;
use Gramene::Marker::DB;
use List::MoreUtils qw( uniq );
use Pod::Usage;
use Readonly;

Readonly my $COLOC_ANALYSIS    => 'Gramene_inferred_QTL';
Readonly my $NEIGHBOR_ANALYSIS => 'Gramene_estimated_QTL';
Readonly my $EMPTY_STR         => q{};
Readonly my $NL                => qq{\n};
Readonly my $TAB               => qq{\t};
Readonly my $VERSION 
    => sprintf '%d.%02d', qq$Revision: 1.3 $ =~ /(\d+)\.(\d+)/;

my $genome_map_set_accession = $EMPTY_STR;
my $out_file                 = $EMPTY_STR;
my ( $help, $man_page, $show_version );

GetOptions(
    'm|map-set-acc=s' => \$genome_map_set_accession,
    'o|out-file=s'    => \$out_file,
    'help'            => \$help,
    'man'             => \$man_page,
    'version'         => \$show_version,
) or pod2usage(2);

if ( $help || $man_page ) {
    pod2usage({
        -exitval => 0,
        -verbose => $man_page ? 2 : 1
    });
}; 

if ( $show_version ) {
    my $prog = basename( $PROGRAM_NAME );
    print "$prog v$VERSION\n";
    exit 0;
}

my $MapSet;
if ( $genome_map_set_accession ) {
    ($MapSet) = Gramene::CDBI::Markers::MapSet->search(
        cmap_map_set_accession => $genome_map_set_accession,
    );

    if ( !$MapSet ) {
        pod2usage(
            "Invalid genome map set accession ($genome_map_set_accession)\n"
        );
    }
}
else {
    pod2usage('No genome map set accession');
}

my $mdb  = Gramene::Marker::DB->new;
my @qtls = $mdb->marker_search( 
    marker_type => 'QTL',
    species_id  => $MapSet->species->id,
);

print join( $TAB, qw[ 
    marker_id 
    marker_type 
    marker_species 
    marker_name 
    feature_acc 
    feature_name 
    map_acc 
    map_name 
    marker_start 
    marker_end 
    analysis
] ), $NL;

my %feature_acc;

QTL:
for my $qtl ( @qtls ) {
    my @related_markers = $mdb->get_marker_correspondences(
        marker_id => $qtl->{'marker_id'},
    ) or next QTL;

    my ( @colocalized, @neighboring );
    for my $marker ( @related_markers ) {
        if ( $marker->{'analysis_name'} =~ /^colocalized/ ) {
            push @colocalized, $marker;
        }
        else {
            push @neighboring, $marker;
        }
    }

    my ( $analysis, @related );
    if ( scalar @colocalized == 0 ) {
        @related = @neighboring;
        $analysis = $NEIGHBOR_ANALYSIS;
    }
    else {
        @related = @colocalized;
        $analysis = $COLOC_ANALYSIS;
    }

    if ( !@related ) {
        next QTL;
    }

    my @marker_ids;
    for my $marker ( @related ) {
        for my $dir ( qw[ from to ] ) {
            my $fld = "${dir}_marker_id";

            if ( $qtl->{'marker_id'} != $marker->{ $fld } ) {
                push @marker_ids, $marker->{ $fld };
            }
        }
    }

    my %pos_by_map;
    for my $marker_id ( uniq( @marker_ids ) ) {
        my $positions = $mdb->db->selectall_arrayref(
            q[
                select map.cmap_map_accession, map.map_name,
                       mp.marker_id, mp.start, mp.end
                from   mapping mp, map
                where  mp.marker_id=?
                and    mp.cmap_feature_accession is not null
                and    mp.map_id=map.map_id
                and    map.cmap_map_accession is not null
                and    map.map_set_id=?
            ],
            { Columns => {} },
            ( $marker_id, $MapSet->id )
        );

        for my $pos ( @$positions ) {
            push @{ $pos_by_map{ $pos->{'cmap_map_accession'} } }, $pos;
        }
    }

    next QTL if !%pos_by_map;

    MAP:
    for my $map_acc ( keys %pos_by_map ) {
        my $map_name = $pos_by_map{ $map_acc }->[0]{'map_name'};
        my $qtl_acc  = $qtl->{'marker_name'};
        my ($Qtl)    = Gramene::CDBI::Qtl::Qtl->search(
            qtl_accession_id => $qtl_acc
        ) or die "Bad QTL accession ($qtl_acc)\n";
        my $qtl_chr  = $Qtl->chromosome or next MAP;

        if ( $map_name =~ /(\d+)$/ ) {
            my $chromosome = $1;
            next MAP if $qtl_chr != $chromosome;
        }
        else {
            print STDERR "Can't deduce chromosome for QTL '$qtl_acc' ",
                "from map name '$map_name'\n";
            next MAP;
        }

        my @pos = uniq(
            sort { $a <=> $b }
            map  { defined $_ ? $_ : () }
            map  { $_->{'start'}, $_->{'end'} }
            @{ $pos_by_map{ $map_acc } }
        );

        my $feature_name = join('-', $qtl_acc, $Qtl->qtl_trait->trait_symbol);
        my $feature_acc  = join('-', 
            $map_acc,
            $qtl_acc,
            ++$feature_acc{ $map_acc }{ $qtl_acc }
        );

        print join( $TAB,
            $qtl->{'marker_id'},
            'QTL',
            $qtl->{'species'},
            $qtl_acc,
            $feature_acc,
            $feature_name,
            $map_acc,
            $map_name,
            $pos[0],
            $pos[-1],
            $analysis,
        ), $NL;
    }
}

__END__

# ----------------------------------------------------

=pod

=head1 NAME

infer-qtl-genome-mapping.pl - infer QTL genome positions

=head1 VERSION

This documentation refers to version $Revision: 1.3 $

=head1 SYNOPSIS

  infer-qtl-genome-mapping.pl -m foobar

Required Arguments:

  -m|--map-set-acc  The CMap map set accession of the genome

Options:

  --help        Show brief help and exit
  --man         Show full documentation
  --version     Show version and exit

=head1 DESCRIPTION

This script selects all the QTLs with the same species as the
reference genome map and then uses the markers from the genome which
are associated to the QTLs to determine the longest span that the QTLs
might occupy on the genome.  Only markers on the same chromosome as
the QTL (from the QTL db) are considered.  Creates a tab-delimited
file suitable for use by "load-mappings.pl."

=head1 SEE ALSO

Markers, QTLs.

=head1 AUTHOR

Ken Youens-Clark E<lt>kclark@cshl.eduE<gt>.

=head1 COPYRIGHT

Copyright (c) 2007 Cold Spring Harbor Laboratory

This library is free software;  you can redistribute it and/or modify 
it under the same terms as Perl itself.

=cut
