#!/usr/local/bin/perl -w 

# vim: sw=2: ts=2:

=head1 NAME

load-fpc.pl - Populates the MarkersDB with FPC data

=head1 SYNOPSIS

perl load-fpc.pl [options] fpc_file

Options:
 [B<-h>|B<--help>]
 [B<-i>|B<--info>]
 [B<-g>|B<--gramene_conf> I<file>]
 [B<-b>|B<--basepairs_per_band> I<integer>]
 [B<-s>|B<--species> I<name>]
 [B<-m>|B<--map_set_name> I<name>]
 [B<-so>|B<--source> I<name>]
 [B<-l>|B<--library> I<name>]
 [B<-v>|B<--verbose>]
 [B<-n>|B<--no_insert>]

=head1 OPTIONS

Reads the B<fpc_file>, and uses its data to populate the MarkersDB

B<-h --help>
  Print a brief help message and exits.

B<-i --info>
  Print info/man page and exit

B<-g --gramene_conf>
  Use this Gramene configuration file for MarkersDB connection info.
  Default is ${GrameneConfPath}

B<-s --species>
  Use this species. Required.

B<-b --basepairs_per_band>
  Band-to-basepair scaling factor. If omitted will use the AvgBand 
  attribute in the fpc file header, or 5000 if this is missing.

B<--map_set_name> I<name>
  The value of map_set.map_set_name against which to load the mappings.
  If omitted the map_set_name will be generated from the 'fpc project'
  header of the fpc file.

B<-so --source> I<name>
  The source corresponding to the --map_set_name, e.g. AGI/OMAP. Required.

B<-l --library> I<name>
  The clone library prefix to use for the clone markers.

B<-v --verbose>
  Print verbose output

B<-n --no_insert>
  Do not make changes to the database. Useful for debug.


=head1 DESCRIPTION

B<This program> 

  Populates MarkersDB with FPC assembly data.

=head2 INTRO

This is a rough outline of the main entities in the MarkersDB schema;

=head3 map    

 - An entity describing a contiguous genomic/genetic region e.g. 
 - Chromosome,
 - FingerPrint Contig (FPC),
 - Clone.

=head3 map_set

 - A collection of maps, e.g.
 - All chromosomes in a genome,
 - All FPC in an Agarose map,
 - All accessioned clones at a given date.
   
=head3 marker

 - An entity that can be located on a map e.g.
 - Clone,
 - EST.
 
=head3 mapping

 - A positional association between a marker and a map, e.g.;
 - Maize overgo 'A' maps to FPC 'B' between bands 'X' and 'Y',
 - Maize clone 'A' maps to rice chr 'B' between basepairs 'X' and 
   'Y' with score 'Z'.

=head3 correspondence

 - A non-positional association between two markers, e.g.;
 - Clone end 'A' corresponds to Clone 'B'.


=head2 OBJECTIVES 

The following is a list of FPC-related data that this script loads
into the MarkersDB;

map_set 1  - Collection of all chromosomes in the fpc file,
map_set 2  - Collection of all fingerprint contigs in the fpc file,

maps 1     - Individual chromosome included in the fpc file,
maps 2     - Individual fingerprint contigs in the fpc file,
 
markers 1  - All FPCs from the FPC map,
markers 2  - All clones from the FPC map
markers 4  - All markers on the FPC map,

mapping 1  - Location of FPC markers on chromosome maps, basepair coords,
mapping 2  - Location of FPC markers on FPC maps, band coords (trivial),
mapping 3  - Location of clones on chromsome maps, basepair,
mapping 4  - Location of clones on FPC maps, band,
mapping 5  - Location of markers on chromosome maps, basepair,
mapping 6  - Location of markers on FPC maps, band,

corresp 1  - Associations between FPC markers and their constituent
             clones.
corresp 2  - Associations between map markers and the clones to which
             they hybridise.

  A good place to look for FPC maps is;
  /usr/local/data/fpc/<date><species><source>.fpc
  
Maintained by Will Spooner <whs@ebi.ac.uk>

=cut

use strict;
use warnings;
use Bio::MapIO;
use DBI;
use Data::Dumper qw(Dumper); # For debug 
use Date::Calc;
use File::Basename qw( dirname );
use FindBin qw( $Bin );
use Getopt::Long;
use Gramene::Marker::DB;
use Pod::Usage;
use Readonly;

Readonly my %DEFAULT => (
  avg_band           => 5000,
#  gramene_conf_path  => '/usr/local/gramene/conf/gramene.conf',
);

use vars qw( $MDB $FPC_MAP $SCALING_FACTOR $I $V
  $CHR_CTGS $CHR_LENGTHS $CTG_LENGTHS 
  $CLONES_BY_CTG $MARKERS_BY_CTG %SPECIES_IDS $CDBI_ANALYSIS
);

my $gconf = $ENV{'GrameneConfPath'} || $DEFAULT{'gramene_conf_path'};

my( $help, $man, $no_insert, $species, $map_set_name, $source, $library );
GetOptions(
  'help|?'               => \$help,
  'info'                 => \$man,
  'no_insert'            => \$no_insert,
  'verbose'              => \$V,
  'species|s:s'            => \$species,
  'gramene_conf:s'       => \$gconf,
  'map_set_name:s'       => \$map_set_name,
  'source:s'             => \$source,
  'library:s'            => \$library,
  'basepairs_per_band:s' => \$SCALING_FACTOR,
) or pod2usage();
pod2usage(-verbose => 2) if $man;
pod2usage(1) if $help;

my $fpc_file = shift @ARGV;
$fpc_file || ( prlog( "\n[*DIE] Need the path to an FPC file\n\n" ) 
               && pod2usage );

$ENV{'GrameneConfPath'} = $gconf;

for my $file ( $gconf, $fpc_file ) {
  next unless $file;
  -e $file || ( prlog( "File '$_' does not exist\n" )    && pod2usage );
  -r $file || ( prlog( "Cannot read '$_'\n" )            && pod2usage );
  -f $file || ( prlog( "File '$_' is not plain-text\n" ) && pod2usage );
  -s $file || ( prlog( "File '$_' is empty\n" )          && pod2usage );
}

$I = $no_insert ? 0 : 1;
unless( $I ){ prlog( "[INFO] Evaluation run - no db changes will be made")}

$species || ( prlog( "\n[*DIE] Need a --species\n\n" ) && pod2usage );
$source  || ( prlog( "\n[*DIE] Need a --source\n\n" ) && pod2usage );

# MarkersDB connections
$MDB = Gramene::Marker::DB->new( admin=>1 ) ||
    die "\n[*DIE] " . CSHL::Marker::DB->error . "\n\n";

# Process the FPC file header
open my $fh, '<', $fpc_file or die "Can't read FPC file '$fpc_file': $!\n";
my %header;
while( my $line = <$fh> ){
  if( $line =~ m|^//(.+)$| ){
    $line = $1;
    if( $line =~ /fpc project (\w+)/i ){
      $header{project} = $1;
    }
    elsif( $line =~ /Date:\s+\d+:\d+\s+\w+\s+(\d+)\s+(\w+)\s+(\d+)/ ){
      my( $dd, $mm, $yy ) = ($1, Date::Calc::Decode_Month($2), $3);
      $header{isodate} = sprintf('%4.4d-%2.2d-%2.2d', $yy, $mm, $dd );
    }
    elsif( $line =~ /AvgBand\s+(\d+)\s+AvgInsert\s+(\d+)/ ){
      $header{avg_band}   = $1;
      $header{avg_insert} = $2;
    }
    elsif( $line =~ /
           Contigs\s+(\d+)\s+
           Clones\s+(\d+)\s+
           Markers\s+(\d+)\s+
           Bands\s+(\d+)/ix ){
      $header{contig_count} = $1;
      $header{clone_count}  = $2;
      $header{marker_count} = $3;
      $header{band_count}   = $4;
    }
  } 
  else { last } # Header complete
}
close $fh;

# Create the template that we will use for the map-specific contig name
my $date = $header{isodate};
$date =~ s/-//g; # Strip '-'
my $CTG_NAME_TMPL .= "$header{project}.$date.%s%4.4d";
$library ||= $header{project}; # Works for OMAP. Need to test for maize/sorghum

# CB to BP scaling factor taken from fpc file header
$SCALING_FACTOR ||= ( $header{'avg_band'} || $DEFAULT{'avg_band'} );
#|| die( "\n[*DIE] No AvgBand data found in header for $fpc_file\n\n" );
$header{contig_count} ||= 'UNKNOWN';
$header{marker_count} ||= 'UNKNOWN';
$header{avg_insert}   ||= 'UNKNOWN';

# Print some debug
prlog( "[INFO][FPC] Filename; $fpc_file" );
prlog( "[INFO][FPC] Date: $header{isodate}" );
prlog( "[INFO][FPC] Ctgs: $header{contig_count}, ".
       "Clones: $header{clone_count}, ".
       "Markers: $header{marker_count}, ".
       "Bands: $header{band_count}" );
prlog( "[INFO][FPC] AvgBand: ${SCALING_FACTOR}bp, ".
       "AvgInsert: $header{avg_insert}bp" );

# Load the FPC file
prlog( "[INFO][FPC] Loading..." );
my $mapio  = Bio::MapIO->new(
  -format  => "fpc",
  -file    => "$fpc_file",
  -readcor => 0,
  -verbose => 0
);
$FPC_MAP = $mapio->next_map(); # Single map per FPC file
$FPC_MAP->{_header_annotation} = \%header;

# Populate the species
$species =~ s/_/ /g;
$FPC_MAP->species( $species );

# Get the cdbi_analysis
my $a_name = 'fpc_loader';
($CDBI_ANALYSIS) = $MDB->search_Analysis({analysis_name=>$a_name});
$CDBI_ANALYSIS || die( "MarkersDB analysis $a_name not found ");

# Get the cdbi_species
my $sp_name   = lc($FPC_MAP->species);
my $cdbi_species;
our %CDBI_SPECIES;
foreach my $obj( $MDB->retrieve_all_Species ){
  my $name = lc($obj->species);
  $CDBI_SPECIES{$name} = $obj;
  if( $sp_name eq $name ){ $cdbi_species = $obj }
  if( $sp_name eq lc($obj->common_name) ){ $cdbi_species ||= $obj }
}
$cdbi_species || die( "MarkersDB species $sp_name not found" );

# Need two mapsets - one for the chomosomes, and one for the FP contigs
my( $cdbi_mapset_ctg, $cdbi_mapset_chr ) 
    = &get_map_sets( $FPC_MAP, $cdbi_species, $map_set_name, $source );

# Create a list of cdbi_marker_types and cdbi_synonym_types and corr types
our %CDBI_MARKER_TYPES;
foreach my $obj( $MDB->retrieve_all_MarkerType ){
  my $name = lc($obj->marker_type);
  $name =~ s/\s+/_/g;
  $CDBI_MARKER_TYPES{$name} = $obj;
}
our %CDBI_SYNONYM_TYPES;
foreach my $obj( $MDB->retrieve_all_SynonymType ){
  my $name = lc($obj->synonym_type);
  $name =~ s/\s+/_/g;
  $CDBI_SYNONYM_TYPES{$name} = $obj;
}
our %CDBI_CORR_TYPES
    = ( map{ lc($_->type) => $_ } 
        $MDB->retrieve_all_AnalyticalCorrespondenceType );

# Loop through each chr and contig
foreach my $chr( &get_chromosome_names ){
  my $chr_length = &get_chromosome_length($chr);

  prlog( "[INFO] Processing Chr ${chr} ($chr_length->{bp}bp)" );

  # MARKERSDB


  # Find or create chromosome map
  my $cdbi_map_chr = &get_map( $cdbi_mapset_chr, $chr, 
                               1, $chr_length->{bp} ); 

  my( $chr_start, $chr_end ) =  (1,1);
  foreach my $ctg_name( &get_contig_names($chr) ){

    my $ctg_name_long = sprintf($CTG_NAME_TMPL, $ctg_name=~/(\D+)(\d+)/);

    my $ctg_length = &get_contig_length($ctg_name);
    $chr_end = $chr_start + $ctg_length->{bp} -1;
    my $ctg_start = 1;

    prlog( "[INFO] Processing Contig $ctg_name_long ($ctg_length->{bp}bp)" );

    # MARKERSDB
    my $cdbi_map_ctg = &get_map
        ( $cdbi_mapset_ctg, $ctg_name, 1, $ctg_length->{band} ); 

    my %cdbi_marker_fpc_args = 
        ( marker_name => { marker_name => $ctg_name_long },
          MarkerType  => $CDBI_MARKER_TYPES{fpc},
          Species     => $cdbi_species,
          Analysis    => $CDBI_ANALYSIS,
          details     => { length_band     => $ctg_length->{band},
                           length_basepair => $ctg_length->{bp},
                           clone_count     => &get_clone_count($ctg_name) } );
    my $cdbi_marker_fpc = &get_marker( %cdbi_marker_fpc_args );
    if( $I ){
      $MDB->add_synonyms_to_Marker # Add syn as an extra step
        ( Marker => $cdbi_marker_fpc,
          synonyms    => [ { marker_name => $ctg_name,
                             SynonymType => $CDBI_SYNONYM_TYPES{agi_fpc} } ] );
    }

    my %ctg_ctg_mapping_args = 
        ( Marker       => $cdbi_marker_fpc,
          Map          => $cdbi_map_ctg,
          Analysis     => $CDBI_ANALYSIS, 
          marker_name  => $ctg_name,
          start        => 1,
          end          => $ctg_length->{band},
          #marker_start => 1,
          #marker_end   => $ctg_length->{band}, 
          );

    my %ctg_chr_mapping_args = 
        ( Marker       => $cdbi_marker_fpc,
          Map          => $cdbi_map_chr,
          Analysis     => $CDBI_ANALYSIS, 
          marker_name  => $ctg_name,
          start        => $chr_start,
          end          => $chr_end,
          #marker_start => 1,
          #marker_end   => $ctg_length->{bp}, 
          );
    
    my $ctg_chr_mapping = &get_mapping(%ctg_chr_mapping_args);
    my $ctg_ctg_mapping = &get_mapping(%ctg_ctg_mapping_args);

    foreach my $cloneobj( &get_clones($ctg_name) ){
      my $clone_name = $cloneobj->name;
      my $clone_start_band  = $cloneobj->range->start;
      my $clone_end_band    = $cloneobj->range->end;
      my $clone_length_band = $clone_end_band - $clone_start_band; 
      my $clone_start_bp    = $clone_start_band * $SCALING_FACTOR;
      my $clone_end_bp      = $clone_end_band * $SCALING_FACTOR;
      my $clone_length_bp   = $clone_end_bp - $clone_start_bp + 1;

      my $cdbi_marker_clone = &get_clone_marker
          ( clone_name => $clone_name, 
            library    => $library, # Change this to be a cdbi_library
            Species    => $cdbi_species, 
            Analysis   => $CDBI_ANALYSIS,
            details     => { clone_insert_length => $clone_length_bp } );

      # Add correspondence between clone and fpc markers
      my $corr = &get_correspondence
          ( 'constituent_clone', $cdbi_marker_clone, $cdbi_marker_fpc );
 
      # Add mappings between the clone and the two maps
      my %clone_chr_mapping_args = 
          ( Marker       => $cdbi_marker_clone,
            Map          => $cdbi_map_chr,
            Analysis     => $CDBI_ANALYSIS, 
            marker_name  => $clone_name,
            start        => $chr_start + $clone_start_bp,
            end          => $chr_start + $clone_end_bp,
            #marker_start => 1,
            #marker_end   => $clone_length_bp 
            );
      my %clone_ctg_mapping_args = 
          ( Marker       => $cdbi_marker_clone,
            Map          => $cdbi_map_ctg,
            Analysis     => $CDBI_ANALYSIS, 
            marker_name  => $clone_name,
            start        => $clone_start_band,
            end          => $clone_end_band,
            #marker_start => 1,
            #marker_end   => $clone_length_band 
            );
      my $clone_chr_mapping = &get_mapping(%clone_chr_mapping_args);
      my $clone_ctg_mapping = &get_mapping(%clone_ctg_mapping_args);


      # Add correspondence between fpc markers and clone
      foreach my $id ( $cloneobj->each_markerid ){
        my $bio_map_marker = $FPC_MAP->get_markerobj( $id );
        my $cdbi_marker_marker = &get_marker_marker
            ( 'bio_map_marker' => $bio_map_marker,
              'Analysis'       => $CDBI_ANALYSIS, );
        my $corr = &get_correspondence
            ( 'hybridisation', $cdbi_marker_marker, $cdbi_marker_clone );
      }
    }

    foreach my $markerobj( &get_markers($ctg_name) ){
      my $name = $markerobj->name;

      # Get the marker
      my $cdbi_marker = &get_marker_marker
          ( 'bio_map_marker' => $markerobj,
            'Analysis'       => $CDBI_ANALYSIS );
                                          
      # Add mappings between the marker and the two maps
      my $ctgid = $ctg_name;
      $ctgid =~ s/^ctg//o;
      my $position_band = $markerobj->position($ctgid);
      my $position_bp   = $position_band * $SCALING_FACTOR + $chr_start;
      my %chr_mapping_args = 
          ( Marker      => $cdbi_marker,
            Map         => $cdbi_map_chr,
            Analysis    => $CDBI_ANALYSIS,
            marker_name => $name,
            start       => $position_bp, );
      my %ctg_mapping_args = 
          ( Marker      => $cdbi_marker,
            Map         => $cdbi_map_ctg,
            Analysis    => $CDBI_ANALYSIS,
            marker_name => $name,
            start       => $position_band, );
      my $chr_mapping = &get_mapping(%chr_mapping_args);
      my $ctg_mapping = &get_mapping(%ctg_mapping_args);

    }
    $chr_start = $chr_end + 1;
  }
}

#======================================================================
# Internal method to process FPC data into lists of clones grouped by
# chromosomes
sub _process_fpc_data{
  $CHR_CTGS    ||= {};
  $CHR_LENGTHS ||= {};
  $CTG_LENGTHS ||= {};
  my $scaling_factor = $SCALING_FACTOR
      || die( "No avg_band scaling factor" );
  foreach my $ctgid( $FPC_MAP->each_contigid ){
    $ctgid || next; # Skip unnamed contigs
    my $ctgobj   = $FPC_MAP->get_contigobj($ctgid);
    my $ctgname = "ctg${ctgid}";
    my $ctgpos   = $ctgobj->position     || 0;
    my $ctgstart = $ctgobj->range->start || 0;
    my $ctgend   = $ctgobj->range->end   || 0;
    my $ctgband  = $ctgend - $ctgstart;
    my $ctgbasepair = $ctgband * $scaling_factor;
    $ctgpos += 0; # Force into numerical context
    my $chr = $ctgobj->group || '';
    $chr ||= 'UNKNOWN';
    #unless( $chr =~ /^\d+/ ){ $chr="UNKNOWN" }

    $CHR_CTGS->{$chr} ||= [];
    $CHR_LENGTHS->{$chr} ||= 0;

    push @{$CHR_CTGS->{$chr}}, [$ctgname, $ctgpos];
    $CHR_LENGTHS->{$chr} ||= {};
    $CHR_LENGTHS->{$chr}->{band}    += $ctgband;
    $CHR_LENGTHS->{$chr}->{bp}      += $ctgbasepair;
    $CTG_LENGTHS->{$ctgname} = {};
    $CTG_LENGTHS->{$ctgname}->{band} = $ctgband;
    $CTG_LENGTHS->{$ctgname}->{bp}   = $ctgbasepair;

    $CLONES_BY_CTG->{$ctgname} = [];
    foreach my $id( $ctgobj->each_cloneid ){
      push @{$CLONES_BY_CTG->{$ctgname}}, $FPC_MAP->get_cloneobj( $id );
    }

    $MARKERS_BY_CTG->{$ctgname} = [];
    foreach my $id( $ctgobj->each_markerid ){
      push @{$MARKERS_BY_CTG->{$ctgname}}, $FPC_MAP->get_markerobj( $id );
    }
  }
  foreach my $chr( keys %$CHR_CTGS ){
    $CHR_CTGS->{$chr} = [ map{ $_->[0] }
                          sort{ ($a->[1] <=> $b->[1]) || 
                                ($a->[0] cmp $b->[0]) } @{$CHR_CTGS->{$chr}} ]
  }
  return $CHR_CTGS;
}

#----------------------------------------------------------------------
# Returns MarkerDB map_sets
sub get_map_sets{
  my $fpc_map = shift || die( "No FPC map" );
  my $cdbi_species = shift || die( "No CDBI Species" );
  my $map_set_name = shift || undef;
  my $source = shift || undef;
  my $project = $fpc_map->name;
  my $isodate    = $fpc_map->{_header_annotation}->{isodate};

  unless( $map_set_name ){
    my( $yy, $mm, $dd ) = split( "-", $isodate );
    $mm = Date::Calc::Month_to_Text( $mm );
    $mm = substr($mm,0,3);
    my $date = "$mm $yy";
    $map_set_name = "FPC $project $date";
  }

  my $fpc_map_set_name = $map_set_name;
  my $chr_map_set_name = $map_set_name;
  unless( $chr_map_set_name =~ s/FPC/FPC Chr/ ){
    $chr_map_set_name = $map_set_name . " (Chr)";
  }

  my $map_type_name = 'physical';
  my ($map_type) = $MDB->search_MapType({map_type=>$map_type_name});
  $map_type || die( "MarkersDB map_type $map_type_name not found" );

  my( $fpc_set, $chr_set );
  prlog("[INFO][MAP_SET] find_or_create_MapSet ".
        "$fpc_map_set_name, $chr_map_set_name" );
  if( $I ){
    $fpc_set = $MDB->find_or_create_MapSet
        ({map_set_name => $fpc_map_set_name,
          map_type_id  => $map_type->id,
          species_id   => $cdbi_species->id,
          project      => $project,
          published_on => $isodate,
          ( $source ? (source=>$source) : () ), });
    $chr_set = $MDB->find_or_create_MapSet
        ({map_set_name => $chr_map_set_name,
          map_type_id  => $map_type->id,
          species_id   => $cdbi_species->id,
          project      => $project,
          published_on => $isodate,
          ( $source ? (source=>$source) : () ), });
    my $band_length = $fpc_map->{_header_annotation}->{band_count};
    my $bp_length = $band_length * $SCALING_FACTOR;
    $fpc_set->total_length  or $fpc_set->total_length( $band_length );
    $chr_set->total_length  or $chr_set->total_length( $bp_length );
    $fpc_set->distance_unit or $fpc_set->distance_unit('band');
    $chr_set->distance_unit or $chr_set->distance_unit('basepair');
    $fpc_set->update;
    $chr_set->update;
  }
  else{
    ($fpc_set) = $MDB->search_MapSet({map_set_name => $fpc_map_set_name,
                                      map_type_id  => $map_type->id,
                                      species_id   => $cdbi_species->id });
    ($chr_set) = $MDB->search_MapSet({map_set_name => $chr_map_set_name,
                                      map_type_id  => $map_type->id,
                                      species_id   => $cdbi_species->id });
  }
  return( $fpc_set, $chr_set );
}

#----------------------------------------------------------------------
# Returns a cdbi_map object. 
# Args; CDBI::Markers::MapSet, map_name, start and end.
sub get_map{
  my $cdbi_mapset = shift;
  my $map_name    = shift;
  my $start       = shift;
  my $end         = shift;

  prlog( "[INFO][MAP] Get Map $map_name ($start..$end) on MapSet " .
        ($cdbi_mapset ? $cdbi_mapset->map_set_name : '???') ) if $V;
  
  my %cdbi_map_args = ( map_set_id => $cdbi_mapset, 
                        map_name   => $map_name );

  my( $cdbi_map ) = $MDB->search_Map({%cdbi_map_args});
  if( $cdbi_map and $V ){
    prlog( "[INFO][MAP] Found Map $map_name (dbID ", $cdbi_map->id, ")" );
  }
  unless( $cdbi_map ){
    $cdbi_map_args{start} = $start;
    $cdbi_map_args{end}   = $end;
    if( $I ){ $cdbi_map = $MDB->insert_Map({ %cdbi_map_args }) }
    prlog( "[INFO][MAP] Created Map $map_name" ) if $V;
  }
  return( $cdbi_map );
}

#----------------------------------------------------------------------
# Retrns a CDBI::Markers::Marker obj of marker_type clone
# It is important that existing clones in the DB are used rather than 
# creating new ones. Unfortunatly the clone name in the FPC file 
# often differs from that in GenBank. We therefore use a fuzzy matching 
# approach. We cant't wildcard-search each clone (takes too long), so we build
# a template library to represent previous wildcard hits, and try using these 
# before each wildcarded search.

our @CLONE_NAME_TEMPLATES;
sub get_clone_marker{
  my %args = @_;
  my $fpc_clone_name   = $args{clone_name} || die( "Need a clone name" );
  my $cdbi_species     = $args{Species}    || die( "Need a CDBI Species" );
  my $cdbi_analysis    = $args{Analysis}   || die( "Need a CDBI Analysis" );
  my $library          = $args{library};
  delete( $args{library} ); # TODO make library handling more sophisticated
  my $cdbi_marker_type = $CDBI_MARKER_TYPES{clone};
  
  # Build the argument array that we will send to get_marker
  delete $args{clone_name};
  $args{MarkerType} = $cdbi_marker_type;
  $args{synonyms} = [{marker_name => $fpc_clone_name,
                      SynonymType => $CDBI_SYNONYM_TYPES{agi_fpc_clone}},];

  # Do we already have the fpc clone name in the markers DB?
  #prlog( "[INFO][MARKER] Looking for $fpc_clone_name" ) if $V;
  my @clones = $MDB->search_marker_synonyms
      ( synonyms   => [ $library.$fpc_clone_name ],
        Species    => $cdbi_species,
        MarkerType => $cdbi_marker_type );
  if( @clones ){ # We have a hit!
    if( @clones > 1){ # Oops - we've got too many direct hits!
      prlog( "[WARN][MARKER] Clone $fpc_clone_name has > 1 direct hit: ", 
           join(', ', map{$_->{marker_name}} @clones ) );
    }
    my $cdbi_synonym = 
        $MDB->retrieve_MarkerSynonym($clones[0]->{display_synonym_id});
    my $clone_name = $cdbi_synonym->marker_name;
    unshift( @{$args{synonyms}}, { marker_name => $clone_name } );
    prlog( "[INFO][MARKER] Clone $fpc_clone_name => $clone_name (tmpl)") if $V;
    return &get_marker( %args );
  }
    
  if( $fpc_clone_name =~ /(\w+)sd\d$/ ){
    # Simulated digest of Genbank clone. Ends in sd1 or sd2                 
    prlog( "[INFO][MARKER] Clone name $fpc_clone_name => $1 (sd)" ) if $V;
    unshift( @{$args{synonyms}}, 
             { marker_name => $1,
               SynonymType => $CDBI_SYNONYM_TYPES{genbank_accession} } );
    return &get_marker( %args );
  }

  if( $fpc_clone_name =~ /^[a-z]{0,1}\d{1,4}[A-Z]\d{1,2}$/o ){
    # Looks like a plate location (e.g. a0043M17). 
    # These have a different prefix in the DB than in the map.

    # Look in clone name template library for matches clones
    if( @CLONE_NAME_TEMPLATES ){
      my @expanded_clone_names = ( map{ sprintf( $_, $fpc_clone_name ) } 
                                   @CLONE_NAME_TEMPLATES );
      my @clones = $MDB->search_marker_synonyms
          ( synonyms   => [ @expanded_clone_names ],
            Species    => $cdbi_species,
            MarkerType => $cdbi_marker_type );
      if( @clones ){ # We have a hit!
        if( @clones > 1){ # Oops - we've got too many template hit!
          prlog( "[WARN][MARKER] Clone $fpc_clone_name has >1 template hit: ", 
               join(', ', map{$_->{marker_name}} @clones ) );
        }
        my $cdbi_synonym = 
            $MDB->retrieve_MarkerSynonym($clones[0]->{display_synonym_id});
        my $clone_name = $cdbi_synonym->marker_name;
        prlog( "[INFO][MARKER] Clone $fpc_clone_name => $clone_name (tmpl)")
            if $V;
        unshift( @{$args{synonyms}}, { marker_name => $clone_name } ),
        return &get_marker( %args );
      }
    }
    
    # Still here? Nothing in the template library :(
    # Time for a wildcarded search
    my @cdbi_syns = ( map{ $_->{marker_name} }
                      $MDB->search_marker_synonyms
                      ( synonyms   => [ '%'.$fpc_clone_name.'%' ],
                        Species    => $cdbi_species,
                        MarkerType => $cdbi_marker_type ) );
    
    if( @cdbi_syns ){
      if( @cdbi_syns > 1){ # Oops - we've got too many clone hits!
        # Are they forward and reverse reads from the same clone?
        unless( @cdbi_syns==2 and ( substr( $cdbi_syns[0],0,-1 ) eq 
                                    substr( $cdbi_syns[1],0,-1 ) ) ){
          prlog("[WARN][MARKER] Clone $fpc_clone_name has >1 fuzzy hit: ", 
              join(', ', @cdbi_syns ) );
        }
      }
      # Found a match, use to create a template
      my $clone_name = $cdbi_syns[0];
      prlog( "[INFO][MARKER] Clone $fpc_clone_name => $clone_name (fuzzy)" )
          if $V;
      my $tmpl = $clone_name;
      $tmpl =~ s/$fpc_clone_name/%s/;
      push @CLONE_NAME_TEMPLATES, $tmpl;
      unshift( @{$args{synonyms}},
               { marker_name => $clone_name} );
      return &get_marker( %args );
    }
    # Still here? Looks like we cannot find a corresponding clone in the DB!
    prlog( "[WARN][MARKER] No existing clone found for $fpc_clone_name" );
    #die();
    return &get_marker( %args );
  }

  #Still here? Looks like we cannot recognise the clone name!
  prlog("[WARN][MARKER] Don't recognise format of clone name $fpc_clone_name");
  return &get_marker( %args );
}

#----------------------------------------------------------------------
# Returns a CDBI::Markers::Marker obj of type corresponding to the fpc 
# map marker. We do not know what the species is, as probes designed against
# various species may be located on this map.
our %MARKER_MARKERS;
sub get_marker_marker{
  my %args = @_;
  my $bio_map_marker = $args{bio_map_marker} || die("Need a Bio::Map::Marker");
  my $cdbi_analysis  = $args{Analysis}       || die("Need a CDBI Analysis");

  my $name = $bio_map_marker->name;
  if( my $m = $MARKER_MARKERS{$name} ){ return $m } # Cached
  
  # Determine the marker type
  my $cdbi_marker_type = $CDBI_MARKER_TYPES{lc($bio_map_marker->type)};
  my $cdbi_synonym_type = $CDBI_SYNONYM_TYPES{unknown};

  # See if we can make sense of the marker by parsing the name
  # ...Do we need to do this, or should we rely on the data being updated
  #    fom source?
  my $desc = '';
  if( $name eq 'pZmR1' ){
    $desc = 'Ribosomal probe';
  }
  elsif( $name eq '185bp' ){
    $desc = 'Knob heterochromatin probe';
  }
  elsif( $name eq 'HICF' ){
    $desc = 'HICF bridging clone';
  }
  elsif( $name =~ /^mito/i ){
    $desc = 'Mitochondrial DNA';
  }
  elsif( $name =~ /^chloro/i ){
    $desc = 'Chloroplast DNA';
  }
  elsif( $name =~ /^cent/i ){
    $desc = 'Centromere-related';
  }
  elsif( $name =~ /^pMTY/i ){
    $desc = 'Telomere-related';
  }
  elsif( $name =~ /^CL/ ){
    $desc = 'Cluster assembled from public EST sequences';
    $cdbi_marker_type = $CDBI_MARKER_TYPES{'est cluster'};
    if( $name =~ /_ov$/i ){
      $desc = "Overgo to ".$desc;
      $cdbi_marker_type  = $CDBI_MARKER_TYPES{'overgo'};
      $cdbi_synonym_type = $CDBI_SYNONYM_TYPES{'maizegdb_overgo'};
    }
  }
  elsif( $name =~ /^PCO*/ ){
    $desc = 'Cornsensus Unigene deposited in GenBank';
    $cdbi_marker_type = $CDBI_MARKER_TYPES{'est cluster'};
    if( $name =~ /_ov$/i ){
      $desc = "Overgo to ".$desc;
      $cdbi_marker_type  = $CDBI_MARKER_TYPES{'overgo'};
      $cdbi_synonym_type = $CDBI_SYNONYM_TYPES{'maizegdb_overgo'};
    }
  }
  elsif( $name =~ /^si/ ){
    $desc = 'Public singletons that did not cluster with DuPont sequences';
    $cdbi_marker_type = $CDBI_MARKER_TYPES{'est cluster'};
    if( $name =~ /_ov$/i ){
      $desc = "Overgo to ".$desc;
      $cdbi_marker_type  = $CDBI_MARKER_TYPES{'overgo'};
      $cdbi_synonym_type = $CDBI_SYNONYM_TYPES{'maizegdb_overgo'};
    }
  }
  elsif( $name =~ /_ov$/i ){
    $desc = 'Overgos to Cornsensus Unigenes (DuPont/MMP/Incyte)';
    $cdbi_marker_type  = $CDBI_MARKER_TYPES{'overgo'};
    $cdbi_synonym_type = $CDBI_SYNONYM_TYPES{maizegdb_overgo};
  }
  elsif( $name =~ /^dd/ ){
    $desc = 'Overgos to anonymous EST clusters (DuPont, seq unavailable)';
    $cdbi_marker_type  = $CDBI_MARKER_TYPES{'overgo'};
  }
  elsif( $name =~ /^(SOG)|(AOG)/ ){
    $desc = 'Overgos from the Paterson lab';
    $cdbi_synonym_type = $CDBI_SYNONYM_TYPES{maizegdb_overgo};
    $cdbi_marker_type  = $CDBI_MARKER_TYPES{'overgo'};
  }
  elsif( $name =~ /^(AEST)|(BCD)|(BNL)|(CDO)|(CDSB)|(SDSC)|(CDSR)|
         (CSU)|(HHU)|(PHY)|(RG)|(RZ)|(PRC)|(PSB)|(PSHR)/x ){
    $desc = 'Plasmids from the Paterson lab';
  }

  # Get the marker
  my %cdbi_marker_args = 
      ( MarkerType  => $cdbi_marker_type,
        Analysis    => $cdbi_analysis,
        ( $desc ? (description => $desc) : () ),
        synonyms    => [{ marker_name=>$name,
                          SynonymType=>$cdbi_synonym_type }]);
  my $cdbi_marker = &get_marker(%cdbi_marker_args);

  $MARKER_MARKERS{$name} = $cdbi_marker; # Update cache
  return $cdbi_marker;

}


#----------------------------------------------------------------------
# Returns a CDBI::Markers::Marker obj
# Takes the same args as Marker::DB->create_Marker
sub get_marker{
  my %args = @_;
  my $cdbi_species     = $args{Species}    || undef;
  my $cdbi_marker_type = $args{MarkerType} || undef;

  my @syns = map{$_->{marker_name}} ( $args{marker_name}||(), 
                                      @{$args{synonyms}||[]} );

  my @markers = $MDB->search_marker_synonyms
      ( synonyms   => [@syns ],
        ( $cdbi_species     ? ( Species    => $cdbi_species )     : () ),
        ( $cdbi_marker_type ? ( MarkerType => $cdbi_marker_type ) : () ), );

  my $marker_id;
  if( @markers ){
    my %seen;
    map{ $seen{$_->{marker_id}}++ } @markers;
    if( keys %seen > 1 ){
      prlog( "[WARN][MARKER] Synonyms "
             . join( ", ", @syns )
             . " returned "
             . scalar( @markers )
             . " markers for species "
             . ( $cdbi_species ? $cdbi_species->species : 'ALL' )
             . " and type "
             . ( $cdbi_marker_type ? $cdbi_marker_type->marker_type : 'ANY' )
             );
    }
    # Decide which marker to take. Use the one that matches the first synonym.
    # TODO examine synonyms as well
    my $marker;
    foreach my $syn( @syns ){
      foreach my $mrk( @markers ){
        if( $syn eq $mrk->{marker_name} ){
          $marker = $mrk;
          last;
        }
      }
      if( $marker ){ last }
    }
    $marker ||= $markers[0];
    $marker_id = $marker->{marker_id};
    my $species_id     = $marker->{source_species_id};
    my $marker_type_id = $marker->{marker_type_id};
    $cdbi_species     = $MDB->retrieve_Species( $species_id );
    $cdbi_marker_type = $MDB->retrieve_MarkerType( $marker_type_id );
  }

  my $marker;
  if( $marker_id ){
    prlog( "[INFO][MARKER] updating $syns[0] ("
           . $cdbi_marker_type->marker_type.")" ) if $V;
    if($I){ $marker = $MDB->update_Marker( marker_id => $marker_id, %args ) }
  }
  else{
    $cdbi_marker_type ||= $CDBI_MARKER_TYPES{undefined};
    $cdbi_species     ||= $CDBI_SPECIES{unknown};
    $args{MarkerType} ||= $cdbi_marker_type;
    $args{Species}    ||= $cdbi_species;
    unless( $args{marker_name} ){
      $args{marker_name} = shift @{$args{synonyms}};
    }
    prlog( "[INFO][MARKER] creating $syns[0] ("
          . $cdbi_marker_type->marker_type.")" ) if $V;
    if($I){ $marker = $MDB->create_Marker( %args ) }
  }
  return $marker;
}

#----------------------------------------------------------------------
# 
sub get_mapping{
  my %args = @_;
  # TODO: Should we remove all mappings for this marker_id to this map_id
  # for this analysis_id? This will make it easier to update the maps.
  prlog( "[INFO][MAPPING] Creating between Marker ".
        ( $args{Marker} ? $args{Marker}->display_synonym->marker_name : '???').
        " and Map " .
        ( $args{Map} ? $args{Map}->map_name : '???').
        " ($args{start}". ($args{end} ? "..$args{end}" : '') .")") if $V;

  if($I){ return $MDB->set_marker_mapping(%args) }
  return;
}

#----------------------------------------------------------------------
#
sub get_correspondence{
  # TODO: Whould we remove all corrs of this type between these markers using
  # this analysis? This will make it easier to update the maps.
  my $corrtype = shift || die( "Need a correspondence type" );
  my $cdbi_from_marker = shift;
  my $cdbi_to_marker = shift;
  my $cdbi_corrtype = $CDBI_CORR_TYPES{$corrtype}
    || die( "No analytical_correspondence_type of $corrtype in DB" );
  $V && &prlog( sprintf
          ( "[INFO][CORR] Creating (%s) from ID %s to %s",
            $cdbi_corrtype->type, 
            $cdbi_from_marker ? 
            $cdbi_from_marker->display_synonym->marker_name : '???',
            $cdbi_to_marker  ? 
            $cdbi_to_marker->display_synonym->marker_name  : '???' ));
  if( $I ){
    $MDB->set_correspondence
        ( from_Marker => $cdbi_from_marker,
          to_Marker   => $cdbi_to_marker,
          Analysis    => $CDBI_ANALYSIS,
          AnalyticalCorrespondenceType => $cdbi_corrtype,
          ) || die( $MDB->error );
  }
  return 1;
}

#----------------------------------------------------------------------
# Returns a list of all chromosomes in the FPC file
sub get_chromosome_names{
  $CHR_CTGS || &_process_fpc_data;
  return sort keys( %{$CHR_CTGS} );
}

#----------------------------------------------------------------------
# Returns the length in bp of a given chromosome
sub get_chromosome_length{
  $CHR_LENGTHS || &_process_fpc_data;
  my $chr = shift || die( "Need a chromosome name" );
  return $CHR_LENGTHS->{$chr};
}

#----------------------------------------------------------------------
# Returns a list of all contigs in the FPC file for a given chromosome,
# sorted in order of chromosome location
sub get_contig_names{
  my $chr = shift || die( "Need a chromosome name" );
  $CHR_CTGS || &_process_fpc_data;
  return @{ $CHR_CTGS->{$chr} || []};
}

#----------------------------------------------------------------------
# Returns the length in of a given contig
sub get_contig_length{
  $CTG_LENGTHS || &_process_fpc_data;
  my $ctg = shift || die( "Need a contig name" );
  return $CTG_LENGTHS->{$ctg};
}

#----------------------------------------------------------------------
# Returns the number of clones on a given contig
sub get_clone_count{
  $CLONES_BY_CTG || &_process_fpc_data;
  my $ctg = shift || die( "Need a contig name" );
  return scalar( @{$CLONES_BY_CTG->{$ctg}} );
}

#----------------------------------------------------------------------
# Returns the clones on a given contig
sub get_clones{
  $CLONES_BY_CTG || &_process_fpc_data;  
  my $ctg = shift || die( "Need a contig name" );
  return @{$CLONES_BY_CTG->{$ctg}};
}

#----------------------------------------------------------------------
# Returns the markers on a given contig
sub get_markers{
  $MARKERS_BY_CTG || &_process_fpc_data;
  my $ctg = shift || die( "Need a contig name" );
  return @{$MARKERS_BY_CTG->{$ctg}};
}

#----------------------------------------------------------------------
#
sub prlog{
  my $message = shift;
  warn( $message."\n" );
#  print LOG $message."\n";
}
