#!/usr/local/bin/perl

# $Id: qtl_import.pl,v 1.32 2007/02/26 21:01:56 kclark Exp $

use strict;
use warnings;
use English qw( -no_match_vars );
use File::Basename;
use Getopt::Long;
use Gramene::CDBI::Qtl;
use Gramene::QTL::DB;
use Text::RecordParser::Tab;
use Text::ParseWords 'quotewords';
use Pod::Usage;
use Readonly;

Readonly my $COMMA        => q{,};
Readonly my $COMMA_SPACE  => q{, };
Readonly my $EMPTY_STR    => q{};
Readonly my $DEFAULT_XREF => q{Gramene Literature};
Readonly my $VERSION      => sprintf '%d.%02d', 
                             qq$Revision: 1.32 $ =~ /(\d+)\.(\d+)/;
Readonly my %QTL_FIELD    => (
    cmap_map_accession    => [ qw( map_accession_id map_acc ) ],
    published_symbol      => [],
    linkage_group         => [ qw( map_name ) ],
    chromosome            => [],
    start_position        => [ qw( feature_start marker_start ) ],
    stop_position         => [ qw( feature_stop marker_stop ) ],
    comments              => [],
);

my ( $help, $show_version, $man_page );
GetOptions(
    'help'       => \$help,
    'man'        => \$man_page,
    'version'    => \$show_version,
);

if ( $help || $man_page ) {
    pod2usage({
        -exitval => 0,
        -verbose => $man_page ? 2 : 1
    });
};

if ( $show_version ) {
    my $prog = basename( $PROGRAM_NAME );
    print "$prog v$VERSION\n";
    exit 0;
}

die "Too many files\n" if scalar @ARGV > 1;

my $file = shift or pod2usage('No import file');
my $p    = Text::RecordParser::Tab->new( $file );

$p->header_filter( sub { $_ = shift; s/\s+/_/g; lc $_ } );
$p->field_filter( sub { $_ = shift; s/^\s+|\s+$//g; $_ } );
my $subref = sub { 
    [ map { s/^\s+|\s+$//g; $_ } quotewords( $COMMA, 0, shift() ) ] 
};
$p->field_compute( 'trait_synonyms', $subref );
$p->field_compute( 'dbxrefs', $subref );
$p->bind_header;

my $qdb = Gramene::QTL::DB->new( admin => 1 );
my $db  = $qdb->db or die $qdb->error;

my %fields = map { $_, 1 } $p->field_list;

my $line_num = 0;
RECORD:
while ( my $r = $p->fetchrow_hashref ) {
    $line_num++;

    # 
    # Check trait
    # 
    my $trait_symbol = $r->{'trait_symbol'} 
        or die "Line $line_num: No trait symbol\n";

    my ($Trait) = Gramene::CDBI::Qtl::QtlTrait->search(
        trait_symbol => $trait_symbol
    );
    print "$line_num: Trait '$trait_symbol'\n";

    if ( !$Trait ) {
        my $category = $r->{'trait_category'} 
            or die "Can't create trait '$trait_symbol' w/o a category.\n";

        my $TraitCategory 
            = Gramene::CDBI::Qtl::QtlTraitCategory->find_or_create(
                { trait_category => $category }
            );
    
        $Trait = Gramene::CDBI::Qtl::QtlTrait->find_or_create({
            qtl_trait_category_id => $TraitCategory->id,
            trait_symbol          => $trait_symbol,
        });
    }

    if ( my $to_accession = $r->{'to_id'} || $r->{'to_accession'} ) {
        $Trait->to_accession( $to_accession );
        $Trait->update;
    }

    if ( my $trait_name = $r->{'trait_name'} ) {
        $Trait->trait_name( $trait_name );
        $Trait->update;
    }

    for my $synonym ( @{ $r->{'trait_synonyms'} } ) {
        my $Syn = Gramene::CDBI::Qtl::QtlTraitSynonym->find_or_create({
            qtl_trait_id  => $Trait->id,
            trait_synonym => $synonym,
        });
    }

    my $qtl_acc = $r->{'qtl_accession_id'} || $r->{'feature_acc'};
    next RECORD if !$qtl_acc;
    print "$line_num: QTL '$qtl_acc'\n";

    my $species = $r->{'species'} or die "Line $line_num: No species.\n";
    my ($Species) = Gramene::CDBI::Qtl::Species->find_or_create({
        species   => $species,
    });

    my ($Qtl) = Gramene::CDBI::Qtl::Qtl->search(
        qtl_accession_id => $qtl_acc,
    );

    if ( !$Qtl ) {
        $Qtl = Gramene::CDBI::Qtl::Qtl->insert({
            qtl_accession_id => $qtl_acc,
            species_id       => $Species->id,
            qtl_trait_id     => $Trait->id,
        });
    }

    $Qtl->species_id( $Species->id );
    $Qtl->qtl_trait_id( $Trait->id );

    if ( !defined $r->{'feature_stop'} || $r->{'feature_stop'} !~ /\d+/ ) {
        $r->{'feature_stop'} = $r->{'feature_start'};
    }

    if ( 
        $r->{'feature_start'} > $r->{'feature_stop'} 
    ) {
        ( $r->{'feature_start'}, $r->{'feature_stop'} )
            = ( $r->{'feature_stop'}, $r->{'feature_start'} );
    }

    QTL_FIELD:
    while ( my ( $col_name, $synonyms ) = each %QTL_FIELD ) {
        my $val;
        for my $fld ( $col_name, @$synonyms ) {
            $val = $r->{ $fld };
            if ( defined $val && $val ne $EMPTY_STR ) {
                $Qtl->$col_name( $val );
                next QTL_FIELD;
            }
        }
    }
    $Qtl->update;

    for my $dbxref ( @{ $r->{'dbxrefs'} } ) {
        my ( $type, $value );
        if ( $dbxref =~ /([^:]+):\s*(.+)/ ) {
            ( $type, $value ) = ( $1, $2 );
        }
        else {
            $type  = $DEFAULT_XREF;
            $value = $dbxref;
        }

        ( $type, $value ) = map { s/^\s+|\s+$//g; $_ } ( $type, $value );

        my $XrefType = Gramene::CDBI::Qtl::XrefType->find_or_create({
            xref_type => $type
        });

        my $Xref = Gramene::CDBI::Qtl::Xref->find_or_create({
            table_name   => 'qtl',
            record_id    => $Qtl->id,
            xref_type_id => $XrefType->id,
            xref_value   => $value
        });
    }
}

print "Done, processed $line_num records.\n";

__END__

# ----------------------------------------------------------------

=head1 NAME

qtl_import.pl

=head1 SYNOPSIS

  qtl_import.pl file [options]

Options:

  --help        Show brief help and exit
  --man         Show full documentation
  --version     Show version and exit

=head1 DESCRIPTION

Imports tab-delimited data (traits or QTLs) into Gramene QTL schema.
The file type is automatically detected from the header line.  The
column names in the header file may contain spaces and mixed case as
spaces will be converted to underscores and letters will be lowercased
(e.g., "Feature Name" => "feature_name").

For more documentation, see "qtl-trait-import.txt" and "qtl-import.txt,"
under "gramene/docs/qtl."

=head1 AUTHOR

Ken Youens-Clark E<lt>kclark@cshl.orgE<gt>.

=cut
