#!/usr/local/bin/perl

=head1 NAME

retrieve-datasets.pl - create gramene track sequence datasets from markers database for mapping

=head1 SYNOPSIS

  retrieve-datasets.pl [options]
  example: to reload the mappings for Millet_est
           load_mapping_2ensembl.pl -logic_name Millet_est -species rice -replace -mapset_acc 'gt0506' -no_prompt

Options:

  -h|--help         Show brief help and exit.
  -v|--verbose      Talk about what's happening.
  -c|--config_file  Path to DB config file, def $ENV{GrameneConfPath}.
  --dir             Directory to write output files into. Def $PWD
  --q               the query SQL
  --logic_name      track logic name
  --registry_file   contains the information of the ensembl database to load the mappings to
  --species         the species of the ensembl database
  --replace         replace the old mappings with these new ones in ensembl database
  --mapset_acc      the cmap map set accession of the target genomes these mapping are done against in the markers db
  --no_prompt       no prompt, for running in background
  --source          the source of the markers that needs reload, for example entrez

=head1 DESCRIPTION

Create fasta sequence files for mapping, each file corresponds to a distinct
track on the gramene genome browser 

The program retrieves the requested sequences using markers database query 
formulated for each gramene track, output the sequences into a fasta file
using the track's logic name as the file name. It also produe a log file
reporting how many sequences were retieved for each dataset.

Format of config_file;

  <markers_admin>
      db_dsn   dbi:mysql:{dbname}:{myhost}:{myport}
      db_user  marker_rw_user
      db_pass  secret
  </markers_admin>


Everything printed to STDERR by the program will be logged to a
file. Lines start with '[INFO]', '[WARN]' or '[*DIE]' for ease of
grepping. The output dir can be specified as program
arguments. 


=head1 SEE ALSO

Gramene::Marker::DB, Text::RecordParser.

=head1 AUTHOR

Sharon Wei E<lt>weix@cshl.eduE<gt>.


=cut



# ----------------------------------------------------

BEGIN {
    $ENV{'GrameneDir'} ||= '/usr/local/apache/'; 
    $ENV{'GrameneEnsemblDir'} ||= '/usr/local/apache/'; 
}

# The first shall be last...
use lib map { $ENV{'GrameneDir'}."/$_" } qw ( lib/perl );

use lib map { $ENV{'GrameneEnsemblDir'}."/$_" } 
        qw ( bioperl-live modules ensembl/modules conf
	     ensembl-external/modules ensembl-draw/modules
	     ensembl-compara/modules );

use lib map { $ENV{'GrameneEnsemblDir'}."/ensembl-live/$_" } 
        qw ( bioperl-live modules ensembl/modules ensembl-external/modules
             ensembl-draw/modules ensembl-compara/modules );

use lib "/home/weix/gramene/lib/perl";

use strict;
use Getopt::Long;
use Pod::Usage;


use Carp;
use IO::Prompt;
use FindBin qw( $Bin );
use File::Basename qw( dirname );

use Bio::SeqIO;
use Data::Dumper qw(Dumper);
use Date::Calc;
use List::MoreUtils qw(firstval);
use Text::RecordParser;
use Readonly;

use Gramene::Marker::DB;
use Gramene::Ontology::OntologyDB;
use Gramene::Config;
use DBI;
use DBI qw(:sql_types);

# Import EnsEMBL modules
use Bio::EnsEMBL::Registry;
use Bio::EnsEMBL::DnaDnaAlignFeature;
use Bio::EnsEMBL::FeaturePair;
use Bio::EnsEMBL::Analysis;

#print (join "\n",   @INC);
#exit;
$|= 1;

Readonly my $this_coord_system => 'chromosome';
Readonly my $seq_region_strand => 1;
Readonly my @source => qw( entrez ncbitrace unigene tigr_gi pgdb_tug);


Readonly my %LOGICNAME_TO_QUERY => (
      Rice_est   => {
		     query => "select p.* from marker m, species s, marker_type t, mapping p where m.source_species_id = s.species_id and m.marker_type_id = t.marker_type_id and m.marker_id = p.marker_id and t.marker_type = 'EST' and p.start != 0 and s.species like 'Oryza %' ",
		     source => $source[0],
		    },

     Barley_est  => {
		     query => "select p.* from marker m, species s, marker_type t, mapping p where m.source_species_id = s.species_id and m.marker_type_id = t.marker_type_id and m.marker_id = p.marker_id and t.marker_type = 'EST' and p.start != 0 and s.species like 'Hordeum %' ",
		     source => $source[0],
		    },


     Maize_est  => {
		     query => "select p.* from marker m, species s, marker_type t, mapping p where m.source_species_id = s.species_id and m.marker_type_id = t.marker_type_id and m.marker_id = p.marker_id and t.marker_type = 'EST' and p.start != 0 and s.species like 'Zea %' ",
		    source => $source[0],
		    },

    Millet_est  => {
		     query => "select p.* from marker m, species s, marker_type t, mapping p where m.source_species_id = s.species_id and m.marker_type_id = t.marker_type_id and m.marker_id = p.marker_id and t.marker_type = 'EST' and p.start != 0 and s.species like 'Pennisetum %' ",
		    source => $source[0],
		    },				    

    Sorghum_est => {
		     query => "select p.* from marker m, species s, marker_type t, mapping p where m.source_species_id = s.species_id and m.marker_type_id = t.marker_type_id and m.marker_id = p.marker_id and t.marker_type = 'EST' and p.start != 0 and s.species like 'Sorghum %' ",
		    source => $source[0],
		   },

    Sugarcane_est => {
		     query => "select p.* from marker m, species s, marker_type t, mapping p where m.source_species_id = s.species_id and m.marker_type_id = t.marker_type_id and m.marker_id = p.marker_id and t.marker_type = 'EST' and p.start != 0 and s.species like 'Saccharum%' ",
		      source => $source[0],
		   },

    Wheat_est => {
		     query => "select p.* from marker m, species s, marker_type t, mapping p where m.source_species_id = s.species_id and m.marker_type_id = t.marker_type_id and m.marker_id = p.marker_id and t.marker_type = 'EST' and p.start != 0 and s.species like 'Triticum %' ",
		  source => $source[0],
		   },

    'Other-poaceae_est' => {
		     query => "select p.* from marker m, species s, marker_type t, mapping p where m.source_species_id = s.species_id and m.marker_type_id = t.marker_type_id and m.marker_id = p.marker_id and t.marker_type = 'EST' and p.start != 0 and s.species not like 'Triticum %' and s.species not like 'Saccharum%' and s.species not like 'Sorghum %' and s.species not like 'Zea %' and s.species not like 'Hordeum %' and s.species not like 'Oryza %' and s.species not like 'Pennisetum %' ",	 
			    source => $source[0],
			 },


    'Rice_mRNA' =>{
		   query => "select p.* from marker m, species s, marker_type t, mapping p where m.source_species_id = s.species_id and m.marker_type_id = t.marker_type_id and m.marker_id = p.marker_id and t.marker_type = 'mRNA' and p.start != 0 and s.species like 'Oryza %' ",
		   source => $source[0],
		  },


    'Maize_mRNA' =>{
		   query => "select p.* from marker m, species s, marker_type t, mapping p where m.source_species_id = s.species_id and m.marker_type_id = t.marker_type_id and m.marker_id = p.marker_id and t.marker_type = 'mRNA' and p.start != 0 and s.species like 'Zea %' ",
		    source => $source[0],
		  },

    'Other-poaceae_mRNA' =>{
		   query => "select p.* from marker m, species s, marker_type t, mapping p where m.source_species_id = s.species_id and m.marker_type_id = t.marker_type_id and m.marker_id = p.marker_id and t.marker_type = 'mRNA' and p.start != 0 and s.species not like 'Oryza %' and s.species not like 'Zea %' ",
			    source => $source[0],
		  },

    'Rice_cDNA_KOME' =>{
		   query => "select p.* from marker m, species s, marker_type t, library l, mapping p where m.source_species_id = s.species_id and m.marker_type_id = t.marker_type_id and m.marker_id = p.marker_id and m.library_id=l.library_id and t.marker_type = 'mRNA' and p.start != 0 and s.species like 'Oryza %' and library_name like 'KOME%' ",
			source => $source[0],
		  },


# Rice_CDS replaced by Rice_mRNA
#    Rice_CDS  => {
#		  query => "select m.marker_id from marker m, species s, marker_type t where m.source_species_id = s.species_id  and m.marker_type_id = t.marker_type_id and t.marker_type = 'mRNA' and s.species like 'Oryza %' ",
#		 },
    
    Maize_BACend  => { 
	             query => "select p.* from marker m, species s , marker_type t, mapping p  where m.source_species_id = s.species_id  and m.marker_type_id = t.marker_type_id and m.marker_id = p.marker_id and t.marker_type = 'GSS' and p.start != 0 and s.species like 'Zea %' ",
		      source => $source[0],
		    },


    RiceAlta_BACend_OMAP => {
			     query => "select p.* from marker m, species s, marker_type t, mapping p where m.source_species_id = s.species_id and m.marker_type_id = t.marker_type_id and m.marker_id = p.marker_id and t.marker_type = 'GSS' and p.start != 0 and species = 'oryza alta' ",
			     source => $source[0],
			    },

      RiceAustraliensis_BACend_OMAP  => {
			     query => "select p.* from marker m, species s, marker_type t, mapping p where m.source_species_id = s.species_id and m.marker_type_id = t.marker_type_id and m.marker_id = p.marker_id and t.marker_type = 'GSS' and p.start != 0 and species = 'oryza australiensis' ",
					 source => $source[0],
					},

      RiceBrachyantha_BACend_OMAP => {
			     query => "select p.* from marker m, species s, marker_type t, mapping p where m.source_species_id = s.species_id and m.marker_type_id = t.marker_type_id and m.marker_id = p.marker_id and t.marker_type = 'GSS' and p.start != 0 and species = 'oryza brachyantha' ",	
				      source => $source[0],
				     },

      RiceCoarctata_BACend_OMAP => {
			     query => "select p.* from marker m, species s, marker_type t, mapping p where m.source_species_id = s.species_id and m.marker_type_id = t.marker_type_id and m.marker_id = p.marker_id and t.marker_type = 'GSS' and p.start != 0 and species = 'oryza coarctata' ",
				    source => $source[0],
			      },

      RiceGlaberrima_BACend_OMAP => {
			     query => "select p.* from marker m, species s, marker_type t, mapping p where m.source_species_id = s.species_id and m.marker_type_id = t.marker_type_id and m.marker_id = p.marker_id and t.marker_type = 'GSS' and p.start != 0 and species = 'oryza glaberrima' ",
				     source => $source[0],
			      },
      
      RiceGranulata_BACend_OMAP => {
			     query => "select p.* from marker m, species s, marker_type t, mapping p where m.source_species_id = s.species_id and m.marker_type_id = t.marker_type_id and m.marker_id = p.marker_id and t.marker_type = 'GSS' and p.start != 0 and species = 'oryza granulata' ",
				    source => $source[0],
			      },

      RiceJaponica_BACend_OMAP => {
			     query => "select p.* from marker m, species s, marker_type t, mapping p where m.source_species_id = s.species_id and m.marker_type_id = t.marker_type_id and m.marker_id = p.marker_id and t.marker_type = 'GSS' and p.start != 0 and species like 'oryza sativa%' ",	
				   source => $source[0],
			      },

      RiceMinuta_BACend_OMAP => {
			     query => "select p.* from marker m, species s, marker_type t, mapping p where m.source_species_id = s.species_id and m.marker_type_id = t.marker_type_id and m.marker_id = p.marker_id and t.marker_type = 'GSS' and p.start != 0 and species = 'oryza minuta' ",
				 source => $source[0],
			      },

      RiceNivara_BACend_OMAP => {
			     query => "select p.* from marker m, species s, marker_type t, mapping p where m.source_species_id = s.species_id and m.marker_type_id = t.marker_type_id and m.marker_id = p.marker_id and t.marker_type = 'GSS' and p.start != 0 and species = 'oryza nivara' ",
				 source => $source[0],
			      },

      RicePunctata_BACend_OMAP => {
			     query => "select p.* from marker m, species s, marker_type t, mapping p where m.source_species_id = s.species_id and m.marker_type_id = t.marker_type_id and m.marker_id = p.marker_id and t.marker_type = 'GSS' and p.start != 0 and species = 'oryza punctata' ",
				   source => $source[0],
			      },

      RiceRufipogon_BACend_OMAP => {
			     query => "select p.* from marker m, species s, marker_type t, mapping p where m.source_species_id = s.species_id and m.marker_type_id = t.marker_type_id and m.marker_id = p.marker_id and t.marker_type = 'GSS' and p.start != 0 and species = 'oryza rufipogon' ",
				    source => $source[0],
			      },

     RiceOfficinalis_BACend_OMAP => {
			     query => "select p.* from marker m, species s, marker_type t, mapping p where m.source_species_id = s.species_id and m.marker_type_id = t.marker_type_id and m.marker_id = p.marker_id and t.marker_type = 'GSS' and p.start != 0 and species = 'oryza officinalis' ",
				     source => $source[0],
				    },

     RiceRidleyi_BACend_OMAP => {
			     query => "select m.marker_id from marker m, species s, marker_type t, mapping p where m.source_species_id = s.species_id and m.marker_type_id = t.marker_type_id and m.marker_id = p.marker_id and t.marker_type = 'GSS' and p.start != 0 and species = 'oryza ridleyi' ",
				 source => $source[0],
				},

#the following queries haven't be updated yet
#
#				    
    Rice_FstTransposon => { #4183 sequences
                      query => "select m.marker_id
                                from marker m, marker_details_gss md, library l
                                where m.marker_id=md.marker_id
                                and m.library_id=l.library_id
                                and l.library_name IN (
                                'UCD RdSpm Rice Insertions',
                                'UCD RGT Rice Insertions',
                                'UCD RDs Rice Insertions',
                                'UCD RGdSpm Rice Insertions' )",
			  },

    Rice_T_DNA_Insert => { #14533 sequences
		       query => "select m.marker_id
                                 from marker m, marker_details_gss md, library l
                                 where m.marker_id=md.marker_id
                                 and m.library_id=l.library_id
                                 and l.library_name IN (
                                 'AS_TRIM_TDNA_B1',
                                 'Flanking Sequence Tag of Oryza sativa T-DNA insertion lines' )",
			  },


				    
    Rice_tos17_insert  => { #32127 sequences,
		      query => "select m.marker_id
                                from marker m, marker_details_gss md, library l
                                where m.marker_id=md.marker_id
                                and m.library_id=l.library_id
                                and l.library_name IN(
                                'PCR product directly amplified from rice genomic DNA' )",
			  },



      Barley_GI => {
		      query => " select m.marker_id from marker m, analysis a, species s, marker_details_est_cluster d where m.analysis_id = a.analysis_id  and m.source_species_id = s.species_id and m.marker_id = d.marker_id and a.analysis_name = 'tigr_gene_index' and s.species = 'Hordeum vulgare' and d.version = 'HVGI release 9' ",
		      #marker_type => '',
			      },

      Maize_GI => {
		      query => " select m.marker_id from marker m, analysis a, species s, marker_details_est_cluster d where m.analysis_id = a.analysis_id  and m.source_species_id = s.species_id and m.marker_id = d.marker_id and a.analysis_name = 'tigr_gene_index' and s.species = 'Zea mays' and d.version = 'ZMGI release 15' ",
		      #marker_type => '',
			      },
      Rice_GI => {
		      query => " select m.marker_id from marker m, analysis a, species s, marker_details_est_cluster d where m.analysis_id = a.analysis_id  and m.source_species_id = s.species_id and m.marker_id = d.marker_id and a.analysis_name = 'tigr_gene_index' and s.species = 'Oryza sativa' and d.version = 'OGI release 16' ",
		      #marker_type => '',
			      },

      Sorghum_GI => {
		      query => " select m.marker_id from marker m, analysis a, species s, marker_details_est_cluster d where m.analysis_id = a.analysis_id  and m.source_species_id = s.species_id and m.marker_id = d.marker_id and a.analysis_name = 'tigr_gene_index' and s.species = 'Sorghum bicolor' and d.version = 'SBGI release 1-1' ",
		      #marker_type => '',
			      },
      Wheat_GI => {
		      query => " select m.marker_id from marker m, analysis a, species s, marker_details_est_cluster d where m.analysis_id = a.analysis_id  and m.source_species_id = s.species_id and m.marker_id = d.marker_id and a.analysis_name = 'tigr_gene_index' and s.species = 'Triticum aestivum' and d.version = 'TAGI release 10' ",
		      #marker_type => '',
			      },
				    

      Barley_ESTCluster_PlantGDB => {
		      query => "select m.marker_id from marker m, analysis a where m.analysis_id = a.analysis_id and a.analysis_name = 'Barley_ESTCluster_PlantGDB' ",
			      },
      Maize_ESTCluster_PlantGDB => {
		      query => "select m.marker_id from marker m, analysis a where m.analysis_id = a.analysis_id and a.analysis_name = 'Maize_ESTCluster_PlantGDB' ",
			      },
      Rice_ESTCluster_PlantGDB => {
		      query => "select m.marker_id from marker m, analysis a where m.analysis_id = a.analysis_id and a.analysis_name = 'Rice_ESTCluster_PlantGDB' ",
			      },
      Sorghum_ESTCluster_PlantGDB => {
		      query => "select m.marker_id from marker m, analysis a where m.analysis_id = a.analysis_id and a.analysis_name = 'Sorghum_ESTCluster_PlantGDB' ",
			      },
      Wheat_ESTCluster_PlantGDB => {
		      query => "select m.marker_id from marker m, analysis a where m.analysis_id = a.analysis_id and a.analysis_name = 'Wheat_ESTCluster_PlantGDB' ",
			      },

      Sorghum_ESTCluster3P_LGBPratt => {
		      query => "select m.marker_id from marker m, analysis a where m.analysis_id = a.analysis_id and a.analysis_name = 'Sorghum_ESTCluster3P_LGBPratt' "
			      },
      
      Rice_ind_cluster => {
		      query => "select m.marker_id from marker m, analysis a where m.analysis_id = a.analysis_id and a.analysis_name = 'Rice_ind_cluster' ",
			      },
      Rice_ind_est => {
		      query => "select m.marker_id from marker m, analysis a where m.analysis_id = a.analysis_id and a.analysis_name = 'Rice_ind_est' ",
			      },


      Maize_hi_cot_TIGR => {
		      query => "select m.marker_id from marker m, analysis a where m.analysis_id = a.analysis_id and a.analysis_name = 'Maize_hi_cot_TIGR' ",
			      },

     Maize_meth_filt_hi_cot_cluster => {
		      query => "select m.marker_id from marker m, analysis a where m.analysis_id = a.analysis_id and a.analysis_name = 'Maize_meth_filt_hi_cot_cluster' ",
			      },

      Maize_MAGI_ISU => {
		      query => "select m.marker_id from marker m, analysis a where m.analysis_id = a.analysis_id and a.analysis_name = 'Maize_MAGI_ISU' ",
			      },


      Ryegrass_Sequence => {
		      query => "select m.marker_id from marker m, analysis a where m.analysis_id = a.analysis_id and a.analysis_name = 'Ryegrass_Sequence' ",
			      },
      Ryegrass_Assembly => {
		      query => "select m.marker_id from marker m, analysis a where m.analysis_id = a.analysis_id and a.analysis_name = 'Ryegrass_Assembly' ",
			      },

      'Sorghum_gss-read_Klein' => {
		      query => "select m.marker_id from marker m, analysis a where m.analysis_id = a.analysis_id and a.analysis_name = 'Sorghum_gss-read_Klein' ",
			      },


      Sorghum_orion => {
		      query => "select m.marker_id from marker m, analysis a where m.analysis_id = a.analysis_id and a.analysis_name = 'Sorghum_orion' ",
			      },

# replaced by Other-poaceae_mRNA
#     Sorghum_CDNA => {
#		      query => "select m.marker_id from marker m, analysis a where m.analysis_id = a.analysis_id and a.analysis_name = 'Sorghum_CDNA' ",
#			      },
);



$SIG{'INT'} = sub { exit(0) };

my ( $help, $v, $conffile, $dir, $query, $logic_name, $registry_file, $species, $replace, $mapset_acc, $no_prompt, $src);

GetOptions(
           'help'            => \$help,
           'config_file:s'   => \$conffile,
	   'registry_file:s' => \$registry_file,
	   'species:s'       => \$species,
	   'logic_name:s'    => \$logic_name,
	   'q:s'             => \$query,
           'verbose'         => \$v,
           'replace'         => \$replace,
	   'dir:s'           => \$dir,
	   'mapset_acc:s'    => \$mapset_acc,
	   'no_prompt'       => \$no_prompt,
	   'source:s'        => \$src,
#	   'analysis:s'     => \@analyses,
);
pod2usage(-verbose => 2) if $help;

#----
# Validate params
if( defined $conffile ){ 

  unless( -e $conffile ){
    warn( "\n[*DIE] File $conffile does not exist\n\n" );
    pod2usage;
  } unless( -r $conffile ){
    warn( "\n[*DIE] Cannot read $conffile\n\n" );
    pod2usage;
  } unless( -f $conffile ){
    warn( "\n[*DIE] File $conffile is not plain-text\n\n" );
    pod2usage;
  } unless( -s $conffile ){
    warn( "\n[*DIE] File $conffile is empty\n\n" );
    pod2usage;
  }

  $ENV{GrameneConfPath} = $conffile ;
}

$registry_file    ||= $ENV{GrameneEnsemblDir}.'/conf/ensembl.registry';


unless($species ){
  warn( "\n[*DIE] Need species of the ensembl database\n\n" );
  pod2usage;
}

#----
# output dir
$dir ||= $ENV{PWD};

#---
# logfile
my( $file ) = ( $0 =~ m/([^\/]+)$/ );
$file =~ s/\.\w+$//;
my $date = sprintf('%4.4i%2.2i%2.2i',Date::Calc::Today);
my $logfile = join( ".", $date, $$, $file, 'log' );
$logfile    = "$dir/$logfile";

# Log to file
open( LOG, ">$logfile" ) or die( $! );
open( STDERR, ">&LOG" ) or die( $! );

my %logic2query; #Repo for all the tracks need loading

if( $src ){
  $src = lc ($src);
  %logic2query = map { $_ => $LOGICNAME_TO_QUERY{$_}->{query} } 
    grep { $LOGICNAME_TO_QUERY{$_}->{source} eq $src } 
      keys %LOGICNAME_TO_QUERY;  

  my $track_cnt = scalar keys %logic2query;
  print LOG "There are $track_cnt tracks needs update\n"
}else{

  unless($logic_name){
    warn( "\n[*DIE] Need logic name\n\n" );
    pod2usage;
  }

  unless($LOGICNAME_TO_QUERY{$logic_name} || $query){
    warn( "\n[*DIE] Need query to get mappings for $logic_name\n\n" );
    pod2usage;
  }

  $logic2query{$logic_name} = $query || $LOGICNAME_TO_QUERY{$logic_name}->{query};


  if($no_prompt){
    
    print LOG "Using the following parameters and markers query to transfer dna_align_feature $logic_name into gramene ensembl $species database? 
config file = $conffile (default /usr/local/gramene/conf/gramene.conf)
registry file = $registry_file
output dir  = $dir
log file    = $logfile
cmap map set accession = $mapset_acc
ensembl db coord system = $this_coord_system
markers db query = $query
";
  }  
  else{
    
    my $run = prompt -yn, "Using the following parameters and markers query to transfer dna_align_feature $logic_name into gramene ensembl $species database? 
config file = $conffile (default /usr/local/gramene/conf/gramene.conf)
registry file = $registry_file
output dir  = $dir
log file    = $logfile
cmap map set accession = $mapset_acc
ensembl db coord system = $this_coord_system
markers db query = $query
[y/n]
  ";

   exit unless ( $run );

  }
}



# preparations at Markers db 

my $MDB = Gramene::Marker::DB->new ||
    die "\n[*DIE] " . Gramene::Marker::DB->error . "\n\n";

my @map_set_objs = $MDB->search_MapSet(cmap_map_set_accession => $mapset_acc);

if(scalar @map_set_objs > 1){
    die "\n[*DIE] There are more than 1 map sets with cmap_map_set_accession = $mapset_acc\n\n";

}elsif(scalar @map_set_objs < 1){
    die "\n[*DIE] There are no map sets with cmap_map_set_accession = $mapset_acc\n\n";
}

my $map_set_id = $map_set_objs[0]->map_set_id();


my @maps = $MDB->search_Map(map_set_id => $map_set_id);

sub get_ensembl_chr {$_ = shift; $_=~s/chr\.\s+//i; $_};

my %map_hash = map{$_->map_id() => &get_ensembl_chr($_->map_name()) } 
                  grep{$_->map_name() !~ /NONE/i} @maps;
                       


#preparations in the ensembl database

Bio::EnsEMBL::Registry->load_all( $registry_file );
my $ENS_DBA = Bio::EnsEMBL::Registry->get_DBAdaptor( $species, 'core' );
$ENS_DBA || ( warn( "No core DB for $species set in $registry_file\n" ) &&
	      pod2usage(1) );


my $slice_adaptor    = $ENS_DBA->get_adaptor('Slice');
my $analysis_adaptor = $ENS_DBA->get_adaptor('Analysis');
my $feature_adaptor  = $ENS_DBA->get_adaptor('DnaAlignFeature');


#unless ($coord_system) {
#    my $coord_system_adaptor=  $ENS_DBA->get_adaptor('CoordSystem');
#    $coord_system=$coord_system_adaptor->fetch_sequence_level->name;
#    warn("$coord_system coordinates\n");
#}


# get mappings from markers db
# result in @mappings_2load;

my %replaced;
my @essentail_fields = qw(
start
end
marker_start
marker_end
marker_strand
display_synonym_id
cigar_line
);


my $dbh = $MDB->db();

for my $logic_name(keys %logic2query){
  load_one_track($logic_name, $logic2query{$logic_name});
}

#======================================================================
# Returns an Analysis object; either fetched from the DB if one exists,
# or created fresh, in which case it is stored to the DB.
sub fetch_analysis{
  my $logic_name = shift || die("\n[*DIE] Need a logic_name to fetch analysis\n\n" );
  my $adaptor    = shift || die ("\n[*DIE] Need a AnalysisAdaptor to fetch analysis\n\n"); 


  my $analysis;
  if( $analysis = $adaptor->fetch_by_logic_name($logic_name) ){
    # Analysis found in database already; use this.
    return $analysis;
  }

  # No analysis - create one from scratch
  $analysis = Bio::EnsEMBL::Analysis->new(-logic_name=>$logic_name);
  warn("created analysis for $logic_name.\n");
  return $analysis;
}


#==================================================================
# Load mapping for one ensembl analysis/LOGIC_NAME
#
# $dbh is global variable
# %map_hash is global
# all the adaptors are global

sub load_one_track{
  
  my $logic_name = shift;
  my $query      = shift;

  my ( $num_2load, $num_loaded, $num_err ) = ( 0, 0, 0 );
  my @mappings_2load;

  my $sth = $dbh->prepare($query) || 
    do { print STDERR "\n[*ERR] Cannot prepare query for $logic_name\n$query\n\n"; return;};

  $sth->execute() || 
    do { print STDERR "\n[*ERR] Cannot execute query for $logic_name\n$query\n\n"; return;};


      # screen out the mappings to other maps

    MAPPING: while (my $a_mapping_hashref=$sth->fetchrow_hashref){

	my $mapping_map_id = $a_mapping_hashref->{map_id};
	my $map_name       = $map_hash{$mapping_map_id};

	#print "mapping_map_id=$mapping_map_id\tmap_name=$map_name\n";
	next MAPPING unless $map_name;
	++$num_2load;

	# replace map_id value with map_name which will be used by ensembl loading code 
	$a_mapping_hashref->{map_id} = $map_name;

	# replace display_synonym_id with real marker name
	my $display_synonym_obj = $MDB->retrieve_MarkerSynonym($a_mapping_hashref->{display_synonym_id});
	my $display_synonym     = $display_synonym_obj->marker_name();
	unless($display_synonym){
	  print STDERR "\n[*ERR] No display_synonym for mapping $a_mapping_hashref->{mapping_id}, marker $a_mapping_hashref->{marker_id}, display_synonym_id $a_mapping_hashref->{display_synonym_id}\n\n";
	  ++$num_err;
	  next MAPPING;
	}
	$a_mapping_hashref->{display_synonym_id} = $display_synonym;


	#Create a cigar_line if no value in it
	my $cigar_line = $a_mapping_hashref->{'cigar_line'};
	unless($cigar_line){
	  print STDERR "\n[*ERR] No cigar_line for mapping $a_mapping_hashref->{mapping_id}, marker $a_mapping_hashref->{marker_id}, display_synonym_id $a_mapping_hashref->{display_synonym_id}, create one\n\n";
	  $a_mapping_hashref->{'cigar_line'} = abs($a_mapping_hashref->{end} - $a_mapping_hashref->{start} + 1) . 'M';
	  
	}
	
	for my $essentail_field(@essentail_fields){

	  unless ($a_mapping_hashref->{$essentail_field}){
	    print STDERR "\n[*ERR] No $essentail_field for mapping $a_mapping_hashref->{mapping_id}, marker $a_mapping_hashref->{marker_id}, display_synonym_id $a_mapping_hashref->{display_synonym_id}, skip\n\n";
	    ++$num_err;
	    next MAPPING;
	  }
	}
	
	push @mappings_2load, $a_mapping_hashref;
      }
  
  
  # Ensembl db, fetch analysis and delete old mappings if neccessary and
  # load the new mappings

  my $analysis = &fetch_analysis($logic_name, $analysis_adaptor); 
  if($replace && !$replaced{$logic_name}++ && $analysis->dbID) {
    $feature_adaptor->db->dbc->do(
				  "delete from dna_align_feature where analysis_id=".$analysis->dbID)
      or die("\n[*DIE] delete $logic_name = ".$analysis->dbID.": $DBI::errstr");
  } 
  
  for my $a_mapping_2load(@mappings_2load){
    
    my $chr   = $a_mapping_2load->{map_id};
    
    my $slice = $slice_adaptor->fetch_by_region($this_coord_system,$chr) ||
      ( warn( "  Cannot fetch $this_coord_system $chr - skipping\n" ) && ++$num_err && next );
    
    my $feature = Bio::EnsEMBL::DnaDnaAlignFeature->new
      ( 
       -start        => $a_mapping_2load->{start},
       -end          => $a_mapping_2load->{end},
       -strand       => $seq_region_strand,
       -hstart       => $a_mapping_2load->{marker_start},
       -hend         => $a_mapping_2load->{marker_end},
       -hstrand      => $a_mapping_2load->{marker_strand},
       -hseqname     => $a_mapping_2load->{display_synonym_id},
       -analysis     => $analysis,
       -slice        => $slice,
       -cigar_string => $a_mapping_2load->{cigar_line},
       -p_value 	=> $a_mapping_2load->{evalue},
       -score        => $a_mapping_2load->{score},
       -percent_id   => $a_mapping_2load->{percent_identity}, 
      );
    
    $feature_adaptor->store($feature);
    warn( "    Created feature $a_mapping_2load->{display_synonym_id} at ",
	  $feature->feature_Slice->name , "\n") if $v;
    
    ++$num_loaded;
    
    
  }
  
  print LOG "\n\n
$logic_name: Total number of mappings to load = $num_2load
$logic_name: Total number of mappings loaded  = $num_loaded
$logic_name: Total number of errors           = $num_err
";
  
  
}

__END__


#======================================================================


