#
# package Bio::EnsEMBL::Pipeline::Config::ExonerateTranscript
# 
# Cared for by EnsEMBL (ensembl-dev@ebi.ac.uk)
#
# Copyright GRL & EBI
#
# You may distribute this module under the same terms as perl itself

# POD documentation - main docs before the code

=head1 NAME

Bio::EnsEMBL::Pipeline::Config::Exonerate2Genes

=head1 SYNOPSIS

    use Bio::EnsEMBL::Pipeline::Config::Exonerate2Genes;

=head1 DESCRIPTION

It imports and sets a number of standard global variables into the
calling package, which are used in many scripts in the human sequence
analysis system.  The variables are first declared using "use vars",
so that it can be used when "use strict" is in use in the calling
script.  Without arguments all the standard variables are set, and
with a list, only those variables whose names are provided are set.
The module will die if a variable which doesn\'t appear in its
C<%Exonerate> hash is asked to be set.

Since The RunnableDB that this config controls can be used for 
inferring transcript structures from (different sets of) EST, cDNA 
and proteins, and several uses may be required in the same pipeline, 
this Config contains one primary config variable, EXONERATE_TRANSCRIPT_CONFIG.
This is hash keyed off logic name, each entry of which is a hash
containing the variable that affect the behaviour of the RunnableDB.
When the RunnableDB instance is created, the correct entry is identified
by logic name and value for a corresponding set of local variables are
set.

=head1 CONTACT

=cut


package Bio::EnsEMBL::Analysis::Config::Exonerate2Genes;

use strict;
use vars qw( %Config );

# Hash containing config info
%Config = (
           EXONERATE_CONFIG_BY_LOGIC => {
             DEFAULT => {
	       # The GENOMICSEQS can be a file (string, A.), a directory with files (string, B.)
	       # or an anonymous array containing the names of multiple directories (array of strings, C.).
               # In the latter case the order of appearance will determine the usage of directories and files;
               # new versions should therefore be listed in the first directory.
               # format: A. '/your/soft/masked/genome.fa'
#                      or B. '/your/soft/masked/genomeDIR'
#                      or C. ['/your/soft/masked/genomeDIR_1', '/your/soft/masked/genomeDIR_2']

               GENOMICSEQS         => '/data/stein/liang/genomes/maize-agpv1-wbmips.fa',

               QUERYTYPE           => undef,
               QUERYSEQS           => undef,
	       BIOTYPE             => undef,
               IIDREGEXP           => undef,
               OUTDB               => undef,
               FILTER              => undef,
               COVERAGE_BY_ALIGNED => undef,
               OPTIONS             => undef,
	       #set to undef if non-reference regions should NOT be fetched (e.g. DR52 in human)
               NONREF_REGIONS      => 1,
             },

             protein_exonerate => {
               # GENOMICSEQS obtained from DEFAULT
               QUERYTYPE => 'protein',
	       BIOTYPE             => 'protein',
               QUERYSEQS  => '/datafc/stein/liang/data/gene-protein',
               # Input id is a chunk id and chunk total
               IIDREGEXP => '(\d+):(\d+)',
               OUTDB => { -dbname => 'maize_agpv1_mapping2',
                          -host => 'bhsqldw1',
                          -port => '3306',
                          -user => 'liang',
                          -pass => 'liang',
                        },
               # FILTER not set, so no filtering done
               FILTER => { OBJECT     => 'Bio::EnsEMBL::Analysis::Tools::ExonerateTranscriptFilter',
                           PARAMETERS => {
                             -coverage => 40,
                             -percent_id => 30,
                             -best_in_genome => 0,
                             -reject_processed_pseudos => 0,
                           },              
                         },
               COVERAGE_BY_ALIGNED => 0,
               OPTIONS => "--model protein2genome --forwardcoordinates FALSE --softmasktarget TRUE --exhaustive FALSE --score 200 --percent 10 --maxintron 50000 --minintron 15 --saturatethreshold 20 --proteinwordlen 5 --wordjump 2 --intronpenalty -40 --refine region",
             },

             cdna_exonerate => {
               # GENOMICSEQS obtained from DEFAULT
               QUERYTYPE => 'dna',
	       BIOTYPE    => 'cdna22',	       
               QUERYSEQS  => '/datafc/stein/liang/data/maize-cdna',
               # IIDREGEXP not set; input ids are file names
               OUTDB => { 
			  -dbname => 'maize_agpv1_mapping2',
                          -host => 'bhsqldw1',
                          -port => '3306',
                          -user => 'liang',
                          -pass => 'liang',
                        },               
               FILTER => { OBJECT     => 'Bio::EnsEMBL::Analysis::Tools::ExonerateTranscriptFilter',
                           PARAMETERS => {
                             -coverage => 30,
                             -percent_id => 90,
                             -best_in_genome => 0,
                             -reject_processed_pseudos => 0,
                           },              
                         },
               COVERAGE_BY_ALIGNED => 0,               
               OPTIONS => "--model est2genome --forwardcoordinates FALSE --softmasktarget TRUE --exhaustive FALSE  --score 250 --percent 30 --maxintron 25000 --minintron 15 --saturatethreshold 20 --dnawordlen 11 --wordjump 1 --geneseed 250 --intronpenalty -40 --gapopen -16 --gapextend -8",
             },            

             mrna_exonerate => {
               # GENOMICSEQS obtained from DEFAULT
               QUERYTYPE => 'dna',
	       BIOTYPE    => 'cdna10',
               QUERYSEQS  => '/datafc/stein/liang/data/maize-mrna',
               # IIDREGEXP not set; input ids are file names
               OUTDB => { 
			  -dbname => 'maize_agpv1_mapping3',
                          -host => 'bhsqldw1',
                          -port => '3306',
                          -user => 'liang',
                          -pass => 'liang',
                        },               
               FILTER => { OBJECT     => 'Bio::EnsEMBL::Analysis::Tools::ExonerateTranscriptFilter',
                           PARAMETERS => {
                             -coverage => 10,
                             -percent_id => 90,
                             -best_in_genome => 0,
                             -reject_processed_pseudos => 0,
                           },              
                         },
               COVERAGE_BY_ALIGNED => 0,               
               OPTIONS => "--model est2genome --forwardcoordinates FALSE --softmasktarget TRUE --exhaustive FALSE  --score 250 --percent 20 --maxintron 50000 --minintron 15 --saturatethreshold 20 --dnawordlen 12 --wordjump 10 --intronpenalty -50  --gapopen -12 --gapextend -6",
             },            

             est_exonerate => {
               # GENOMICSEQS obtained from DEFAULT
               QUERYTYPE => 'dna',
	       BIOTYPE    => 'est',	       
               QUERYSEQS  => '/datafc/stein/liang/data/maize-est-pioneer',
               # IIDREGEXP not set; input ids are file names
               OUTDB => { -dbname => 'maize_agpv1_mapping2',
                          -host => 'bhsqldw1',
                          -port => '3306',
                          -user => 'liang',
                          -pass => 'liang',
                        },               
               COVERAGE_BY_ALIGNED => 0,               
               FILTER => { OBJECT     => 'Bio::EnsEMBL::Analysis::Tools::ExonerateTranscriptFilter',
                           PARAMETERS => {
                             -coverage => 50,
                             -percent_id => 90,
                             -best_in_genome => 0,
                             -reject_processed_pseudos => 0,
                           },              
                         },
               OPTIONS => "--model est2genome --forwardcoordinates FALSE --softmasktarget TRUE --exhaustive FALSE  --score 250 --percent 30 --maxintron 50000 --minintron 15 --saturatethreshold 20 --dnawordlen 10 --wordjump 10 --intronpenalty -40 --refine region",
             },            

             other_protein_exonerate => {
               # GENOMICSEQS obtained from DEFAULT
               QUERYTYPE => 'protein',
	       BIOTYPE   => 'oprotein',
               QUERYSEQS  => '/datafc/stein/liang/data/gene-protein',
               # Input id is a chunk id and chunk total
               IIDREGEXP => '(\d+):(\d+)',
               OUTDB => { -dbname => 'maize_agpv1_mapping2',
                          -host => 'bhsqldw1',
                          -port => '3306',
                          -user => 'liang',
                          -pass => 'liang',
                        },
               # FILTER not set, so no filtering done
               COVERAGE_BY_ALIGNED => 0,
               FILTER => { OBJECT     => 'Bio::EnsEMBL::Analysis::Tools::ExonerateTranscriptFilter',
                           PARAMETERS => {
                             -coverage => 30,
                             -percent_id => 30,
                             -best_in_genome => 0,
                             -reject_processed_pseudos => 0,
                           },              
                         },
               OPTIONS => "--model protein2genome --forwardcoordinates FALSE --softmasktarget TRUE --exhaustive FALSE --score 200 --percent 10 --maxintron 50000 --minintron 15 --saturatethreshold 20 --proteinwordlen 5 --wordjump 2 --intronpenalty -40",
             },

             short_est_exonerate => {
               # GENOMICSEQS obtained from DEFAULT
               QUERYTYPE => 'dna',
	       BIOTYPE => 'short',
               QUERYSEQS  => '/datafc/stein/liang/data/maize-RNAseq',
               # IIDREGEXP not set; input ids are file names
               OUTDB => { -dbname => 'maize_agpv1_mapping',
                          -host => 'bhsqldw1',
                          -port => '3306',
                          -user => 'liang',
                          -pass => 'liang',
                        },
               COVERAGE_BY_ALIGNED => 0,               
               FILTER => { OBJECT     => 'Bio::EnsEMBL::Analysis::Tools::ExonerateTranscriptFilter',
                           PARAMETERS => {
                             -coverage => 80,
                             -percent_id => 90,
                             -best_in_genome => 0,
                             -reject_processed_pseudos => 0,
			     -spliced_only => 1,
                           },              
                         },
               OPTIONS => "--model est2genome --forwardcoordinates FALSE --softmasktarget TRUE --exhaustive FALSE  --score 100 --percent 20 --maxintron 5000 --minintron 15 --saturatethreshold 20 --dnawordlen 11 --wordjump 1  --intronpenalty -30",
             },            

             similar_mrna_exonerate => {
               # GENOMICSEQS obtained from DEFAULT
               QUERYTYPE => 'dna',
               QUERYSEQS  => '/home1/stein/liang/maize-genebuild/rice-mrna',
               # IIDREGEXP not set; input ids are file names
               OUTDB => { -dbname => 'maize_071218_mapping',
                          -host => 'bhsqldw1',
                          -port => '3306',
                          -user => 'liang',
                          -pass => 'liang',
                        },               
               COVERAGE_BY_ALIGNED => 0,               
               FILTER => { OBJECT     => 'Bio::EnsEMBL::Analysis::Tools::ExonerateTranscriptFilter',
                           PARAMETERS => {
                             -coverage => 30,
                             -percent_id => 40,
                             -best_in_genome => 0,
                             -reject_processed_pseudos => 0,
                           },              
                         },
               OPTIONS => "--model coding2genome --forwardcoordinates FALSE --softmasktarget true --exhaustive FALSE  --score 200 --percent 20 --maxintron 20000 --minintron 10 --saturatethreshold 20 --dnawordlen 15 --wordjump 5  --intronpenalty -30 --refine region --refineboundary 1024 --geneseed 200",
             },            

             close_mrna_exonerate => {
               # GENOMICSEQS obtained from DEFAULT
               QUERYTYPE => 'dna',
	       BIOTYPE    => 'oest',	       
               QUERYSEQS  => '/datafc/stein/liang/data/monocot-nomaize-est',
               # IIDREGEXP not set; input ids are file names
               OUTDB => { -dbname => 'maize_agpv1_mapping',
                          -host => 'bhsqldw1',
                          -port => '3306',
                          -user => 'liang',
                          -pass => 'liang',
                        },               
               COVERAGE_BY_ALIGNED => 0,               
               FILTER => { OBJECT     => 'Bio::EnsEMBL::Analysis::Tools::ExonerateTranscriptFilter',
                           PARAMETERS => {
                             -coverage => 20,
                             -percent_id => 30,
                             -best_in_genome => 0,
                             -reject_processed_pseudos => 0,
                           },              
                         },
               OPTIONS => "--model coding2genome --forwardcoordinates FALSE --softmasktarget true --exhaustive FALSE  --score 200 --percent 5 --maxintron 25000 --minintron 15 --saturatethreshold 20 --dnawordlen 11 --wordjump 5  --intronpenalty -40 --gapopen -12 --gapextend -6",
             },            

             other_mrna_exonerate => {
               # GENOMICSEQS obtained from DEFAULT
               QUERYTYPE => 'dna',
	       BIOTYPE => 'ocdna',
               QUERYSEQS  => '/datafc/stein/liang/data/monocot-nomaize-cdna',
               # IIDREGEXP not set; input ids are file names
               OUTDB => { -dbname => 'maize_agpv1_mapping',
                          -host => 'bhsqldw1',
                          -port => '3306',
                          -user => 'liang',
                          -pass => 'liang',
                        },               
               FILTER => { OBJECT     => 'Bio::EnsEMBL::Analysis::Tools::ExonerateTranscriptFilter',
                           PARAMETERS => {
                             -coverage => 20,
                             -percent_id => 30,
                             -best_in_genome => 0,
                             -reject_processed_pseudos => 0,
                           },              
                         },
               COVERAGE_BY_ALIGNED => 0,               
               OPTIONS => "--model coding2genome --forwardcoordinates FALSE --softmasktarget TRUE --exhaustive FALSE --score 250 --percent 1 --maxintron 25000 --minintron 15 --saturatethreshold 20 --dnawordlen 12 --wordjump 3 --intronpenalty -40 --gapopen -12 --gapextend -6 ",
             },            

             close_cdna_exonerate => {
               # GENOMICSEQS obtained from DEFAULT
               QUERYTYPE => 'dna',
	       BIOTYPE    => 'ccdna',	       
               QUERYSEQS  => '/datafc/stein/liang/data/rice-mrna',
               # IIDREGEXP not set; input ids are file names
               OUTDB => { -dbname => 'maize_agpv1_mapping2',
                          -host => 'bhsqldw1',
                          -port => '3306',
                          -user => 'liang',
                          -pass => 'liang',
                        },               
               COVERAGE_BY_ALIGNED => 0,               
               FILTER => { OBJECT     => 'Bio::EnsEMBL::Analysis::Tools::ExonerateTranscriptFilter',
                           PARAMETERS => {
                             -coverage => 20,
                             -percent_id => 30,
                             -best_in_genome => 0,
                             -reject_processed_pseudos => 0,
                           },              
                         },
               OPTIONS => "--model coding2genome --forwardcoordinates FALSE --softmasktarget TRUE --exhaustive FALSE  --score 200 --percent 3 --maxintron 25000 --minintron 15 --saturatethreshold 20 --dnawordlen 12 --wordjump 6  --intronpenalty -40 ",
             },            

             close_est_exonerate => {
               # GENOMICSEQS obtained from DEFAULT
               QUERYTYPE => 'dna',
	       BIOTYPE    => 'cest',	       
               QUERYSEQS  => '/datafc/stein/liang/data/rice-est',
               # IIDREGEXP not set; input ids are file names
               OUTDB => { -dbname => 'maize_agpv1_mapping2',
                          -host => 'bhsqldw1',
                          -port => '3306',
                          -user => 'liang',
                          -pass => 'liang',
                        },               
               COVERAGE_BY_ALIGNED => 0,               
               FILTER => { OBJECT     => 'Bio::EnsEMBL::Analysis::Tools::ExonerateTranscriptFilter',
                           PARAMETERS => {
                             -coverage => 20,
                             -percent_id => 30,
                             -best_in_genome => 0,
                             -reject_processed_pseudos => 0,
                           },              
                         },
               OPTIONS => "--model coding2genome --forwardcoordinates FALSE --softmasktarget TRUE --exhaustive FALSE  --score 200 --percent 5 --maxintron 25000 --minintron 15 --saturatethreshold 20 --dnawordlen 12 --wordjump 6  --intronpenalty -40 ",
             },            

           }

           );

sub import {
  my ($callpack) = caller(0); # Name of the calling package
  my $pack = shift; # Need to move package off @_

  # Get list of variables supplied, or else everything
  my @vars = @_ ? @_ : keys( %Config );
  return unless @vars;
  
  # Predeclare global variables in calling package
  eval "package $callpack; use vars qw("
    . join(' ', map { '$'.$_ } @vars) . ")";
    die $@ if $@;


    foreach (@vars) {
	if ( defined $Config{$_} ) {
            no strict 'refs';
	    # Exporter does a similar job to the following
	    # statement, but for function names, not
	    # scalar variables:
	    *{"${callpack}::$_"} = \$Config{ $_ };
	} else {
	    die "Error: Config: $_ not known\n";
	}
    }
}

1;
