#!/usr/local/bin/perl -w


=head1 NAME

import_species.pl


=head1 DESCRIPTION

Imports tab-delimited data (Species) into Gramene Mutant schema.
The file type is automatically detected from the header line.  The
column names in the header file may contain spaces and mixed case as
spaces will be converted to underscores and letters will be lowercased
(e.g., "Common Name" => "common_name").


* The required field in the header line is:
  common_name

* The optional fields in the header line are:
  ncbi_taxa_id, gramene_tax_id, species, genus


=head1 SYNOPSIS

import_species.pl [options] <species file>

 Options:
    --help              help message
    --man               full documentation
                                                                                


=head1 OPTIONS
                                                                                
=over 4
                                                                                
=item B<--help>
                                                                                
print a help message and exit
                                                                                
=item B<--man>
                                                                                
print documentation and exit
                                                                                
                                                                                
=back
                                                                                
=head1 ARGUMENTS

species file                                                                                

=cut




use lib '/usr/local/gramene/lib/perl/';

use strict; 

use Text::RecordParser; 
use File::Temp qw/ tempfile/;

use Pod::Usage;
use Getopt::Long;
 
use Gramene::DB; 


local $^W=0; # to turn off the warning of empty string in  Text::RecordParser


    {  #Argument Processing
        my $help=0;
        my $man=0;
        GetOptions( "help|?"=>\$help,"man"=>\$man)
          or pod2usage(2);
        pod2usage(-verbose => 2) if $man;
        pod2usage(1) if $help;
        pod2usage('No import file') if(scalar(@ARGV)<1);
    }



my $file            = $ARGV[0]; 


# The Text::ParseWords modules used in Text::RecordParser will remove the quotes
# we have to add backslash to the quotes for parsing quotes
# the processed_file is a temple file, it will be removed when program exist
my $processed_file = process_quotes($file);



my $parser          = Text::RecordParser->new( 
    field_separator => qr/\t/,                #separate the fields by "\t",remove escape
    filename        => $processed_file, 
); 
 
 
 

#a filter for the header field: convert the space to _ and the data to lower case
$parser->header_filter( sub { $_ = shift; s/\s+/_/g; lc $_ } );


#Takes the fields from the next row under the cursor and assigns the field names to the values. 
$parser->bind_header;


#Returns the fields bound via bind_fields (or bind_header) and convert to hash.
my %fields = map { $_, 1 } $parser->field_list; 


my $db;
eval{
      $db = Gramene::DB->new('genes_edit');
      $db->{AutoCommit}=0;  # set transaction control
    };

if($@){
         die "DB connection failed: $@\n";
    }


if($fields{'common_name'} || $fields{'species'}){
   import_species($parser,$db);
}else {
    die "Can't determine file import type!\n";
}



sub import_species{

    my ( $parser, $db ) = @_;
    my @fields      = qw[ncbi_taxa_id gramene_tax_id common_name genus species];
    
    my %acceptable      = map { $_, 1 } @fields;

    my ( $no_imported, $no_updated, $no_processed ) = ( 0, 0, 0 );

    print "Importing Gene species\n\n";

    for my $field ( $parser->field_list ) {
        next if $acceptable{ $field };
        warn "Unknown field: $field!\n";
    }

    $parser->field_filter( sub { $_ = shift; s/^\s+|\s+$//g; $_ } );


    eval{
      while ( my $record = $parser->fetchrow_hashref ) {
        $no_processed++;
        
        unless($record->{'common_name'} ){
           warn "Skipping Line $no_processed, no common name or species name !\n";
           next;
        }

        my ($species_id) = $db->selectrow_array(
                         q[
                            SELECT species_id
                            FROM   gene_species
                            WHERE  UPPER(common_name) =? 
                          ],
			  {},
                          (uc($record->{'common_name'}))  
                         );

       my $verb;
       my @data;
       if($species_id){
            
            @data  = map {defined $record->{$_} ? $record->{$_}:'' } @fields;           
            #@data =($species_id,@data);

             $db->do(
                     q[
                         UPDATE gene_species
                         SET    ncbi_taxa_id =?,
				gramene_taxa_id = ?,
                                common_name=?,
                                genus=?,
                                species=?
                          WHERE  species_id=?
		       ],
                       {}, (@data,$species_id)
                      
                     );
           
            @data =($species_id,@data);

            $verb = 'Updated';
            $no_updated++;

       }else{
           $species_id = next_id($db,'gene_species','species_id');

            @data  = map {defined $record->{$_} ? $record->{$_}:'' } @fields;           
            @data =($species_id,@data);
        
           $db->do(
                    q[
                        INSERT INTO gene_species
                             VALUES (?,?,?,?,?,?) 
                      ],
                      {}, @data
                   );

                   $verb = 'Inserted';
                   $no_imported++;                    

       }  
       print "$verb species [",join(", ", map {defined $_? $_:''} @data),"]\n\n";


      }
     $db->commit; 
  
     print "Done.\nProcessed $no_processed records, ",
        "imported $no_imported, updated $no_updated.\n";
    };
    
    if($@){
        warn "Unable to save to database: $@\n";
        $db->rollback();
    }

}



sub next_id {
    my ( $db, $table_name, $field_name ) = @_;
    my $id = $db->selectrow_array("select max($field_name) from $table_name");
    return $id + 1;
}


sub process_quotes{

  my $file = shift;
  my  ($fh, $temp_file) = tempfile( "tmpfileXXXXX",UNLINK => 1);

  open(RAW,$file) or die "can't open $file :$!";
  while(<RAW>){

     $_=~s/'/\\'/g;
     $_=~s/"/\\"/g;  
     print $fh "$_"; 
  }
  close(RAW);
  close($fh);
  
  return $temp_file;


}
