#!/usr/bin/perl

use strict;
use warnings;
#use Bio::GMOD::CMap;
use BerkeleyDB;
use DBI;
use File::Path;
use File::Spec::Functions;
use Getopt::Long;
use Pod::Usage;
use Readonly;

Readonly my $DASH      => q{-};
Readonly my $EMPTY_STR => q{};
Readonly my $NL        => qq{\n};
Readonly my $TAB       => qq{\t};

my $out_dir  = $EMPTY_STR;
GetOptions(
    'o=s' => \$out_dir,
);

if ( !$out_dir ) {
    pod2usage('No out directory');
}

if ( !-d $out_dir ) {
    mkpath $out_dir;
}

if ( !@ARGV ) {
    pod2usage('No feature correspondence file(s)');
}
my @corr_files = @ARGV;

my $bdb = BerkeleyDB::Btree->new( 
    -Filename => '/home/kclark/cache.db',
    -Flags    => DB_CREATE,
);

my $cdb = DBI->connect(
    'dbi:mysql:cmap24', 'kclark', 'g0p3rl!', { RaiseError => 1 }
);

my $sth = $cdb->prepare(
    q[
        select feature_acc, feature_id, map_id, feature_start, 
               feature_stop, feature_type_acc
        from   cmap_feature
    ]
);
$sth->execute;

print STDERR "Loading BDB/CMap cache.\n";
while ( my @f = $sth->fetchrow_array ) {
    my $key = shift @f;
    my $val = join("\t", @f);
    $bdb->db_put( $key, $val );
}

print STDERR "Initializing tmp db tables.\n";
my $db    = DBI->connect(
    "dbi:mysql:kyctmp", 'kclark', 'g0p3rl!', { RaiseError => 1 } 
);

my @sql = (
    q[drop table IF EXISTS corr_id],
    q[drop table IF EXISTS corr_evidence],
    q[
      create table corr_id (
        lookup varchar(100),
        corr_id integer,
        unique (lookup)
      )
    ],
    q[
      create table corr_evidence (  
        lookup varchar(100),
        unique (lookup)
      )
    ],
);

for my $sql ( @sql ) {
    $db->do( $sql );
}

#my $cmap = Bio::GMOD::CMap->new;
#$cmap->data_source('Build24');
#my $cdb  = $cmap->db or die $cmap->error;
#
#my $f_sth = $cdb->prepare( q[
#    select feature_id, map_id, feature_start, feature_stop, feature_type_acc
#    from   cmap_feature 
#    where  feature_acc=?
#] );

open my $fc_fh, '>', catfile($out_dir, 'cmap_feature_correspondence.txt')  
    or die $!;
open my $cl_fh, '>', catfile($out_dir, 'cmap_correspondence_lookup.txt')   
    or die $!;
open my $ce_fh, '>', catfile($out_dir, 'cmap_correspondence_evidence.txt') 
    or die $!;

my ( $fc_id, $ce_id );

my $num_corr  = 0;
my $num_files = 0;

for my $corr_file ( @corr_files ) {
    $num_files++;
    print "$num_files: $corr_file\n";

    open my $input, '<', $corr_file or die "Can't read '$corr_file': $!\n";

    my $header = <$input>;
    while ( my $line = <$input> ) {
        $num_corr++;
        chomp $line;
        my ( $facc1, $facc2, $evidence ) = split /$TAB/, $line; 
        my $corr_key = join( $DASH, sort $facc1, $facc2 );

        my %feature;
        my @fids;
        for my $facc ( $facc1, $facc2 ) {
#            $f_sth->execute( $facc );
#            my $f = $f_sth->fetchrow_hashref or next;
#            $feature{ $facc } = $f;
#
#            push @fids, $f->{'feature_id'};

            my $data;
            $bdb->db_get( $facc, $data );
            my ( $feature_id, $map_id, $feature_start, 
               $feature_stop, $feature_type_acc ) = split /\t/, $data;

            next unless $feature_id > 0;

            $feature{ $facc } = {
                feature_acc      => $facc, 
                feature_id       => $feature_id, 
                map_id           => $map_id, 
                feature_start    => $feature_start, 
                feature_stop     => $feature_stop, 
                feature_type_acc => $feature_type_acc,
            };

            push @fids, $feature_id;
        }

        next unless scalar @fids == 2;
        next if $fids[0] == $fids[1];

        # Correspondence
        my $corr_id = $db->selectrow_array(
            'select corr_id from corr_id where lookup=?', {}, ( $corr_key )
        );

        if ( !$corr_id ) {
            $corr_id = ++$fc_id;
            $db->do(
                'insert into corr_id values (?, ?)', {}, ( $corr_key, $corr_id )
            );

            print $fc_fh join( $TAB, $corr_id, $corr_id, @fids, '1' ), $NL;

            # Lookup
            for my $pair ( [ $facc1, $facc2 ], [ $facc2, $facc1 ] ) {
                my $f1 = $feature{ $pair->[0] };
                my $f2 = $feature{ $pair->[1] };

                print $cl_fh join( $TAB, 
                    $f1->{'feature_id'},
                    $f2->{'feature_id'},
                    $corr_id,
                    $f1->{'feature_start'},
                    $f2->{'feature_start'},
                    $f1->{'feature_stop'} || $EMPTY_STR,
                    $f2->{'feature_stop'} || $EMPTY_STR,
                    $f1->{'map_id'},
                    $f2->{'map_id'},
                    $f1->{'feature_type_acc'},
                    $f2->{'feature_type_acc'},
                ), $NL;
            }
        }

        # Evidence
        my $evidence_key = join( $DASH, $corr_key, $evidence );
        my $has_evidence = $db->selectrow_array(
            'select count(*) from corr_evidence where lookup=?', 
            {}, $evidence_key
        );

        if ( !$has_evidence ) {
            $ce_id++;
            print $ce_fh join( $TAB, 
                $ce_id, $ce_id, $corr_id, $evidence, $EMPTY_STR, $EMPTY_STR 
            ), $NL;

            $db->do( 
                'insert into corr_evidence values (?)', {}, $evidence_key 
            );
        }
    }
}

printf "Done, processed %s record%s in %s file%s.\n",
    $num_corr,
    $num_corr == 1 ? $EMPTY_STR : 's',
    $num_files,
    $num_files == 1 ? $EMPTY_STR : 's',
;

__END__

=head1 NAME

mk-mysql-insert-cmap-correspondences.pl -
    makes a MySQL tab-delimited insert file for CMap correspondences

=head1 SYNOPSIS

  ./mk-mysql-insert-cmap-correspondences.pl \
    -o /tmp/cmap-corr/ feature_correspondences.tab [corr.tab ...]

Options:

  -o The directory to write the output files

=head1 DESCRIPTION

This file takes the standard CMap feature correspondence loading file(s)
and makes three tab-delimited files for importing the data into 

 - cmap_feature_correspondence
 - cmap_correspondence_lookup
 - cmap_correspondence_evidence

The CMap import process simply takes too long, so I had to write this.

The problem is that you this assumes an empty database, so you have to 
run it on all the correspondences and replace the data entirely.  The
upside is that this takes a fraction of the time that importing them
through CMap takes.

This script requires a temp db to write the feature correspondence ids.
I tried keeping these in a hash, but I kept running out of memory.

=head1 AUTHOR

Ken Youens-Clark E<lt>kclark@cshl.eduE<gt>.

=head1 COPYRIGHT

Copyright (c) 2006 Cold Spring Harbor Laboratory

This library is free software;  you can redistribute it and/or modify 
it under the same terms as Perl itself.

=cut
