-- Denormalized Auxiliary Tables for Panzea Database
--
-- This SQL file generates auxiliary tables used by some
-- searches.
--
-- ********************************************************************
-- *** DO NOT SOURCE THIS FILE DIRECTLY AGAINST THE GDPDM DATABASE  ***
-- ***                                                              ***
-- *** There are scripted sections that needs to be run in between, ***
-- *** which are automated by the auxiliary_tables.pl script.       ***
-- ********************************************************************
--
-- $Id: auxiliary_tables.sql,v 1.1.2.1 2007/06/14 19:03:44 kclark Exp $
--
-- Author: Payan Canaran <canaran@cshl.edu>
-- Copyright 2006-2007 Cold Spring Harbor Laboratory

----------------------------------------------------------------------------
-- This table contains a row for each div_allele (an assay value).
-- Primary keys from cdv_map_feature to div_taxonomy are included
-- in the table. Additionally, div_scoring_tech_type_id is included
-- for easy access to div_scoring_tech_type table.
----------------------------------------------------------------------------

-- LABEL:drop_aux_feature_to_taxonomy
DROP TABLE IF EXISTS `aux_feature_to_taxonomy`;
-- END

-- LABEL:create_aux_feature_to_taxonomy
CREATE TABLE `aux_feature_to_taxonomy` (
    `aux_feature_to_taxonomy_id` int(11) NOT NULL auto_increment,
    `cdv_map_feature_id`        int(11),
    `cdv_marker_id`             int(11),
    `div_allele_assay_id`       int(11),
    `div_scoring_tech_type_id`  int(11),
    `div_allele_id`             int(11),
    `div_obs_unit_sample_id`    int(11),
    `div_obs_unit_id`           int(11),
    `div_stock_id`              int(11),
    `div_passport_id`           int(11),
    `div_taxonomy_id`           int(11),
    PRIMARY KEY                 (`aux_feature_to_taxonomy_id`),
    KEY                         `aux_idx_cdv_map_feature_id`        (`cdv_map_feature_id`),
    KEY                         `aux_idx_cdv_marker_id`             (`cdv_marker_id`),
    KEY                         `aux_idx_div_allele_assay_id`       (`div_allele_assay_id`),
    KEY                         `aux_idx_div_scoring_tech_type_id`  (`div_scoring_tech_type_id`),
    KEY                         `aux_idx_div_allele_id`             (`div_allele_id`),
    KEY                         `aux_idx_div_obs_unit_sample_id`    (`div_obs_unit_sample_id`),
    KEY                         `aux_idx_div_obs_unit_id`           (`div_obs_unit_id`),
    KEY                         `aux_div_stock_id`                  (`div_stock_id`),
    KEY                         `aux_idx_div_passport_id`           (`div_passport_id`),
    KEY                         `aux_idx_div_taxonomy_id`           (`div_taxonomy_id`)
    ) ENGINE=MyISAM;
-- END

-- LABEL:populate_aux_feature_to_taxonomy
INSERT INTO `aux_feature_to_taxonomy`
(cdv_map_feature_id, cdv_marker_id, div_allele_assay_id, div_scoring_tech_type_id,
div_allele_id, div_obs_unit_sample_id, div_obs_unit_id, div_stock_id, div_passport_id, div_taxonomy_id)
SELECT
cmf.cdv_map_feature_id, cm.cdv_marker_id, daa.div_allele_assay_id,
daa.div_scoring_tech_type_id, da.div_allele_id, dous.div_obs_unit_sample_id,
dou.div_obs_unit_id, ds.div_stock_id, dp.div_passport_id, dt.div_taxonomy_id
FROM
div_taxonomy dt
RIGHT JOIN div_passport dp USING (div_taxonomy_id)
RIGHT JOIN div_stock ds USING (div_passport_id)
RIGHT JOIN div_obs_unit dou USING (div_stock_id)
RIGHT JOIN div_obs_unit_sample dous USING (div_obs_unit_id)
RIGHT JOIN div_allele da USING (div_obs_unit_sample_id)
RIGHT JOIN div_allele_assay daa USING (div_allele_assay_id)
RIGHT JOIN cdv_marker cm USING (cdv_marker_id)
RIGHT JOIN cdv_map_feature cmf USING (cdv_map_feature_id)
LEFT  JOIN div_scoring_tech_type dstt ON (daa.div_scoring_tech_type_id = dstt.div_scoring_tech_type_id);
-- END

----------------------------------------------------------------------------
-- This table contains a row for each div_trait (a trait value).
-- Primary keys from div_trait_uom to div_passport are included
-- in the table. Additionally, div_locality_id is included
-- for easy access to div_locality table.
----------------------------------------------------------------------------

-- LABEL:drop_aux_uom_to_passport
DROP TABLE IF EXISTS `aux_uom_to_passport`;
-- END

-- LABEL:create_aux_uom_to_passport
CREATE TABLE `aux_uom_to_passport` (
    `aux_uom_to_passport_id`    int(11) NOT NULL auto_increment,
    `div_trait_uom_id`          int(11),
    `div_trait_id`              int(11),
    `div_obs_unit_id`           int(11),
    `div_locality_id`           int(11),
    `div_stock_id`              int(11),
    `div_passport_id`           int(11),
    PRIMARY KEY                 (`aux_uom_to_passport_id`),
    KEY                         `aux_idx_div_trait_uom_id`      (`div_trait_uom_id`),
    KEY                         `aux_idx_div_trait_id`          (`div_trait_id`),
    KEY                         `aux_idx_div_obs_unit_id`       (`div_obs_unit_id`),
    KEY                         `aux_idx_div_locality_id`       (`div_locality_id`),
    KEY                         `aux_idxdiv_stock_id`           (`div_stock_id`),
    KEY                         `aux_idx_div_passport_id`       (`div_passport_id`)
    ) ENGINE=MyISAM;
-- END

-- LABEL:populate_aux_uom_to_passport
INSERT INTO `aux_uom_to_passport`
(div_trait_uom_id, div_trait_id, div_obs_unit_id,
div_locality_id, div_stock_id, div_passport_id)
SELECT
dtu.div_trait_uom_id, dt.div_trait_id, dou.div_obs_unit_id,
dl.div_locality_id, ds.div_stock_id, dp.div_passport_id
FROM
div_trait_uom dtu
RIGHT JOIN div_trait dt USING (div_trait_uom_id)
LEFT JOIN div_obs_unit dou USING (div_obs_unit_id)
LEFT JOIN div_locality dl USING (div_locality_id)
LEFT JOIN div_stock ds ON (dou.div_stock_id = ds.div_stock_id)
LEFT JOIN div_passport dp USING (div_passport_id);
-- END

----------------------------------------------------------------------------
-- This table pivots the marker annotations that are displayed on the website.
----------------------------------------------------------------------------

-- LABEL:drop_aux_marker_annotations
DROP TABLE IF EXISTS `aux_marker_annotations`;
-- END

-- LABEL:create_aux_marker_annotations
CREATE TABLE `aux_marker_annotations` (
    `aux_marker_annotations_id` int(11) NOT NULL auto_increment,
    `cdv_marker_id`             int(11),
    `marker_type`               varchar(30),
    `allele_repeat`             text,
    `allele_repeat_size`        text,
    PRIMARY KEY                 (`aux_marker_annotations_id`),
    KEY                         `aux_idx_cdv_marker_id` (`cdv_marker_id`)
    ) ENGINE=MyISAM;
-- END

-- LABEL:populate_aux_marker_annotations
INSERT INTO `aux_marker_annotations`
(cdv_marker_id, marker_type, allele_repeat, allele_repeat_size)
SELECT a.cdv_marker_id,
       GROUP_CONCAT(marker_type        SEPARATOR '::SEPARATOR::') as marker_type,
       GROUP_CONCAT(allele_repeat      SEPARATOR '::SEPARATOR::') as allele_repeat,
       GROUP_CONCAT(allele_repeat_size SEPARATOR '::SEPARATOR::') as allele_repeat_size
FROM
(
SELECT cma.cdv_marker_id as cdv_marker_id,
       case when cmat.anno_type='marker_type'
            then cma.annotation_value else null end as marker_type,
       case when cmat.anno_type='allele_repeat'
            then cma.annotation_value else null end as allele_repeat,
       case when cmat.anno_type='allele_repeat_size'
            then cma.annotation_value else null end as allele_repeat_size
FROM
cdv_marker_annotation cma
INNER JOIN cdv_marker_annotation_type cmat USING (cdv_marker_annotation_type_id)
) a
GROUP BY cdv_marker_id;
-- END

----------------------------------------------------------------------------
-- This table pivots the assay annotations that are displayed on the website.
----------------------------------------------------------------------------

-- LABEL:drop_aux_assay_annotations
DROP TABLE IF EXISTS `aux_assay_annotations`;
-- END

-- LABEL:create_aux_assay_annotations
CREATE TABLE `aux_assay_annotations` (
    `aux_assay_annotations_id`  int(11) NOT NULL auto_increment,
    `div_allele_assay_id`       int(11),
    `primer1`                   text,
    `primer2`                   text,
    PRIMARY KEY                 (`aux_assay_annotations_id`),
    KEY                         `aux_idx_div_allele_assay_id` (`div_allele_assay_id`)
    ) ENGINE=MyISAM;
-- END

-- LABEL:populate_aux_assay_annotations
INSERT INTO `aux_assay_annotations`
(div_allele_assay_id, primer1, primer2)
SELECT a.div_allele_assay_id,
       GROUP_CONCAT(primer1      SEPARATOR '::SEPARATOR::') as primer1,
       GROUP_CONCAT(primer2      SEPARATOR '::SEPARATOR::') as primer2
FROM
(
SELECT daaa.div_allele_assay_id as div_allele_assay_id,
       case when dat.anno_type='primer1'
            then daaa.annotation_value else null end as primer1,
       case when dat.anno_type='primer2'
            then daaa.annotation_value else null end as primer2
FROM
div_aa_annotation daaa
INNER JOIN div_annotation_type dat USING (div_annotation_type_id)
) a
GROUP BY div_allele_assay_id;
-- END

----------------------------------------------------------------------------
-- This table pivots the feature annotations that are displayed on the website.
-- It re-creates the previously used cdv_map_details table
----------------------------------------------------------------------------

-- mysql> desc cdv_map_details;
-- +--------------------+--------------+------+-----+---------+----------------+
-- | Field              | Type         | Null | Key | Default | Extra          |
-- +--------------------+--------------+------+-----+---------+----------------+
-- | cdv_map_details_id | int(11)      |      | PRI | NULL    | auto_increment |
-- | cdv_map_feature_id | int(11)      | YES  | MUL | NULL    |                |
-- | pioneer_chr        | int(11)      | YES  |     | NULL    |                |
-- | comments           | varchar(255) | YES  |     | NULL    |                |
-- | genetic_bin        | varchar(50)  | YES  |     | NULL    |                |
-- | pioneer_position   | double       | YES  |     | NULL    |                |
-- | locus_type         | varchar(50)  | YES  |     | NULL    |                |
-- | name               | varchar(50)  | YES  |     | NULL    |                |
-- | physical_position  | double       | YES  |     | NULL    |                |
-- | unigene_name       | varchar(50)  | YES  |     | NULL    |                |
-- | ibm2_chr           | int(2)       | YES  |     | NULL    |                |
-- | ibm2_position      | double       | YES  |     | NULL    |                |
-- | fpc_contig         | varchar(10)  | YES  |     | NULL    |                |
-- | fpc_chr            | int(2)       | YES  |     | NULL    |                |
-- | fpc_start          | double       | YES  |     | NULL    |                |
-- | fpc_stop           | double       | YES  |     | NULL    |                |
-- | all_fpc_positions  | text         | YES  |     | NULL    |                |
-- | candidate          | char(1)      | YES  |     | NULL    |                |
-- +--------------------+--------------+------+-----+---------+----------------+

-- mysql> select * from cdv_map_feature_annotation_type;
-- +------------------------------------+--------------------+
-- | cdv_map_feature_annotation_type_id | anno_type          |
-- +------------------------------------+--------------------+
-- |                                  1 | ref_seq_cDNA       |
-- |                                 12 | fpc_chr            |
-- |                                 11 | fpc_contig         |
-- |                                 10 | ibm2_position      |
-- |                                  9 | ibm2_chr           |
-- |                                  8 | unigene_name       |
-- |                                  7 | physical_position  |
-- |                                  6 | locus_type         |
-- |                                  5 | pioneer_position   |
-- |                                  4 | genetic_bin        |
-- |                                  3 | comments           |
-- |                                  2 | pioneer_chr        |
-- |                                 13 | fpc_start          |
-- |                                 14 | fpc_stop           |
-- |                                 15 | all_fpc_positions  |
-- |                                 16 | candidate          |
-- |                                 17 | ref_seq_genomic    |
-- |                                 18 | ibm2_2005_position |
-- |                                 19 | ibm2_2005_bin      |
-- +------------------------------------+--------------------+

-- LABEL:drop_aux_map_info
DROP TABLE IF EXISTS `aux_map_info`;
-- END

-- LABEL:create_aux_map_info
CREATE TABLE `aux_map_info` (
    `aux_map_info_id`        int(11) NOT NULL auto_increment,
    `cdv_map_feature_id`     int(11),
    `name_gene_locus`        varchar(50),
    `pioneer_chr`            int(11),
    `comments`               varchar(255),
    `ibm2_2005_bin`          double,      -- will not be populated by SQL file
    `all_ibm2_2005_bins`     text,        -- note the name change
    `pioneer_position`       double,
    `locus_type`             varchar(50),
    `name`                   varchar(50),
    `physical_position`      double,
    `unigene_name`           varchar(50),
    `ibm2_2005_chr`          varchar(10), -- will not be populated by SQL file
    `ibm2_2005_position`     double,      -- will not be populated by SQL file
    `all_ibm2_2005_positions`text,        -- note the name change
    `fpc_contig`             varchar(10),
    `fpc_chr`                varchar(10),
    `fpc_start`              varchar(10),
    `fpc_stop`               varchar(10),
    `all_fpc_positions`      text,
    `candidate`              char(1),
    `sort_ibm2_2005_bin`     double,      -- (sort field) will not be populated by SQL file
    `sort_ibm2_2005_chr`     double,      -- (sort field) will not be populated by SQL file
    `sort_ibm2_2005_position`double,      -- (sort field) will not be populated by SQL file
    `sort_fpc_contig`        double,      -- (sort field) will not be populated by SQL file
    `sort_fpc_chr`           double,      -- (sort field) will not be populated by SQL file
    `sort_fpc_start`         double,      -- (sort field) will not be populated by SQL file
    `sort_fpc_stop`          double,      -- (sort field) will not be populated by SQL file
    PRIMARY KEY              (`aux_map_info_id`),
    KEY                      `aux_idx_cdv_map_feature_id`      (`cdv_map_feature_id`),
    KEY                      `aux_idx_name_gene_locus`         (`name_gene_locus`),
    KEY                      `aux_idx_sort_ibm2_2005_bin`      (`sort_ibm2_2005_bin`),
    KEY                      `aux_idx_sort_ibm2_2005_chr`      (`sort_ibm2_2005_chr`),
    KEY                      `aux_idx_sort_ibm2_2005_position` (`sort_ibm2_2005_position`),
    KEY                      `aux_idx_sort_fpc_contig`         (`sort_fpc_contig`),
    KEY                      `aux_idx_sort_fpc_chr`            (`sort_fpc_chr`),
    KEY                      `aux_idx_sort_fpc_start`          (`sort_fpc_start`),
    KEY                      `aux_idx_sort_fpc_stop`           (`sort_fpc_stop`)
    ) ENGINE=MyISAM;
-- END

-- LABEL:populate_aux_map_info
INSERT INTO `aux_map_info`
(
    cdv_map_feature_id,
    name_gene_locus,
    pioneer_chr,
    comments,
    all_ibm2_2005_bins,      -- note the name change
    pioneer_position,
    locus_type,
    name,
    physical_position,
    unigene_name,
    all_ibm2_2005_positions, -- note the name change
    fpc_contig,
    fpc_chr,
    fpc_start,
    fpc_stop,
    all_fpc_positions,
    candidate
)
SELECT a.cdv_map_feature_id,
       a.name_gene_locus,
       GROUP_CONCAT(pioneer_chr             SEPARATOR '::SEPARATOR::') as pioneer_chr,
       GROUP_CONCAT(comments                SEPARATOR '::SEPARATOR::') as comments,
       REPLACE(GROUP_CONCAT(all_ibm2_2005_bins SEPARATOR '::SEPARATOR::'), '"', '') as all_ibm2_2005_bins,           -- note the name change; remove double quotes
       GROUP_CONCAT(pioneer_position        SEPARATOR '::SEPARATOR::') as pioneer_position,
       GROUP_CONCAT(locus_type              SEPARATOR '::SEPARATOR::') as locus_type,
       GROUP_CONCAT(name                    SEPARATOR '::SEPARATOR::') as name,
       GROUP_CONCAT(physical_position       SEPARATOR '::SEPARATOR::') as physical_position,
       GROUP_CONCAT(unigene_name            SEPARATOR '::SEPARATOR::') as unigene_name,
       REPLACE(GROUP_CONCAT(all_ibm2_2005_positions SEPARATOR '::SEPARATOR::'), '"', '') as all_ibm2_2005_positions, -- note the name change; remove double quotes
       GROUP_CONCAT(fpc_contig              SEPARATOR '::SEPARATOR::') as fpc_contig,
       GROUP_CONCAT(fpc_chr                 SEPARATOR '::SEPARATOR::') as fpc_chr,
       GROUP_CONCAT(fpc_start               SEPARATOR '::SEPARATOR::') as fpc_start,
       GROUP_CONCAT(fpc_stop                SEPARATOR '::SEPARATOR::') as fpc_stop,
       GROUP_CONCAT(all_fpc_positions       SEPARATOR '::SEPARATOR::') as all_fpc_positions,
       GROUP_CONCAT(candidate               SEPARATOR '::SEPARATOR::') as candidate
FROM
(
SELECT cmf.cdv_map_feature_id as cdv_map_feature_id,
       cmf.name as name_gene_locus,
       case when cmfat.anno_type='pioneer_chr'
            then cmfa.annotation_value else null end as pioneer_chr,
       case when cmfat.anno_type='comments'
            then cmfa.annotation_value else null end as comments,
       case when cmfat.anno_type='ibm2_2005_bin'
            then cmfa.annotation_value else null end as all_ibm2_2005_bins,      -- note the name change
       case when cmfat.anno_type='pioneer_position'
            then cmfa.annotation_value else null end as pioneer_position,
       case when cmfat.anno_type='locus_type'
            then cmfa.annotation_value else null end as locus_type,
       case when cmfat.anno_type='name'
            then cmfa.annotation_value else null end as name,
       case when cmfat.anno_type='physical_position'
            then cmfa.annotation_value else null end as physical_position,
       case when cmfat.anno_type='unigene_name'
            then cmfa.annotation_value else null end as unigene_name,
       case when cmfat.anno_type='ibm2_2005_position'
            then cmfa.annotation_value else null end as all_ibm2_2005_positions, -- note the name change
       case when cmfat.anno_type='fpc_contig'
            then cmfa.annotation_value else null end as fpc_contig,
       case when cmfat.anno_type='fpc_chr'
            then cmfa.annotation_value else null end as fpc_chr,
       case when cmfat.anno_type='fpc_start'
            then cmfa.annotation_value else null end as fpc_start,
       case when cmfat.anno_type='fpc_stop'
            then cmfa.annotation_value else null end as fpc_stop,
       case when cmfat.anno_type='all_fpc_positions'
            then cmfa.annotation_value else null end as all_fpc_positions,
       case when cmfat.anno_type='candidate'
            then cmfa.annotation_value else null end as candidate
FROM
cdv_map_feature cmf
LEFT JOIN cdv_map_feature_annotation cmfa USING (cdv_map_feature_id)
LEFT JOIN cdv_map_feature_annotation_type cmfat USING (cdv_map_feature_annotation_type_id)
) a
GROUP BY cdv_map_feature_id;
-- END

----------------------------------------------------------------------------
-- The following subroutines must be executed to format tables further before
-- proceeding. Please refer to documentation of auxiliary_tables.pl script.
----------------------------------------------------------------------------

-- LABEL:sub:process_map_info
-- END

----------------------------------------------------------------------------
-- This table contains a row for each cdv_map_feature_id in cdv_map_feature.
-- For each cdv_map_feature_id, a string is provided that contains
-- the marker_type from aux_map_info, separated by colons and a colon appended to the
-- beginning and end, for which at least one assay is available. If none, a null
-- value is provided.
----------------------------------------------------------------------------

-- LABEL:drop_aux_feature_by_marker_type
DROP TABLE IF EXISTS `aux_feature_by_marker_type`;
-- END

-- LABEL:create_aux_feature_by_marker_type
CREATE TABLE `aux_feature_by_marker_type` (
    `aux_feature_by_marker_type` int(11) NOT NULL auto_increment,
    `cdv_map_feature_id`         int(11),
    `marker_type_string`         varchar(255),
    PRIMARY KEY                  (`aux_feature_by_marker_type`),
    KEY                          `aux_idx_cdv_map_feature_id`        (`cdv_map_feature_id`)
    ) ENGINE=MyISAM;
-- END

-- LABEL:populate_aux_feature_by_marker_type
INSERT INTO `aux_feature_by_marker_type` (cdv_map_feature_id, marker_type_string)
SELECT a.cdv_map_feature_id,
GROUP_CONCAT(a.marker_type ORDER BY a.marker_type ASC SEPARATOR ':')
FROM
(
SELECT distinct
cmf.cdv_map_feature_id as cdv_map_feature_id,
ama.marker_type as marker_type
FROM aux_marker_annotations ama
RIGHT JOIN cdv_marker cm USING (cdv_marker_id)
RIGHT JOIN cdv_map_feature cmf USING (cdv_map_feature_id)
) a
GROUP BY a.cdv_map_feature_id;
-- END

----------------------------------------------------------------------------
-- This table contains one row for each genotype value cluster (div_allele.value)
-- grouped by daa.div_allele_assay_id and dou.div_obs_unit_id. This is a rather
-- large table that is crossed by itself for the polymorphic_between_accessions
-- script, so all side information is packed in the table to eliminate additional
-- joins.
----------------------------------------------------------------------------

-- LABEL:drop_aux_genotype_by_accession
DROP TABLE IF EXISTS `aux_genotype_by_accession`;
-- END

-- LABEL:create_aux_genotype_by_accession
CREATE TABLE `aux_genotype_by_accession` (
    `aux_genotype_by_accession_id` int(11) NOT NULL auto_increment,
    `cdv_marker_id`                varchar(30),
    `aux_map_info_id`              int(11),
    `marker_name`                  varchar(30),
    `cdv_map_feature_id`           varchar(30),
    `ibm2_2005_bin`                double,
    `ibm2_2005_chr`                varchar(10),
    `ibm2_2005_position`           double,
    `feature_name`                 varchar(30),
    `div_allele_assay_id`          int(11),
    `scoring_tech_group`           varchar(30),
    `marker_type`                  varchar(30),
    `genotype_string`              text,
    `sorted_genotype`              text,
    `formatted_genotype`           text,
    `resolved_genotype`            text,
    `allele_count`                 int(11),
    `div_obs_unit_id`              int(11),
    `accename`                     varchar(30),
    `source`                       varchar(30),
    `sampstat`                     varchar(30),
    `genus`                        varchar(30),
    `species`                      varchar(30),
    `subspecies`                   varchar(30),
    `sort_ibm2_2005_bin`           int(4),
    `sort_ibm2_2005_chr`           int(4),
    `sort_ibm2_2005_position`      int(4),
    PRIMARY KEY                 (`aux_genotype_by_accession_id`),
    KEY                         `aux_cdv_marker_id`           (`cdv_marker_id`),
    KEY                         `aux_cdv_marker_id_m1`        (`cdv_marker_id`,`accename`),
    KEY                         `aux_cdv_marker_id_m2`        (`cdv_marker_id`,`marker_type`),
    KEY                         `aux_idx_marker_name`         (`marker_name`),
    KEY                         `aux_cdv_map_feature_id`      (`cdv_map_feature_id`),
    KEY                         `aux_idx_ibm2_2005_chr`       (`ibm2_2005_chr`),
    KEY                         `aux_idx_ibm2_2005_position`  (`ibm2_2005_position`),
    KEY                         `aux_idx_feature_name`        (`feature_name`),
    KEY                         `aux_idx_div_allele_assay_id` (`div_allele_assay_id`),
    KEY                         `aux_idx_scoring_tech_group`  (`scoring_tech_group`),
    KEY                         `aux_idx_marker_type`         (`marker_type`),
    KEY                         `aux_idx_marker_type_m1`      (`marker_type`, `accename`),
    KEY                         `aux_idx_allele_count`        (`allele_count`),
    KEY                         `aux_idx_div_obs_unit_id`     (`div_obs_unit_id`),
    KEY                         `aux_div_accename`            (`accename`),
    KEY                         `aux_idx_sort_ibm2_2005_bin`      (`sort_ibm2_2005_bin`),
    KEY                         `aux_idx_sort_ibm2_2005_chr`      (`sort_ibm2_2005_chr`),
    KEY                         `aux_idx_sort_ibm2_2005_position` (`sort_ibm2_2005_position`)
    ) ENGINE=MyISAM;
-- END

-- LABEL:populate_aux_genotype_by_accession
INSERT INTO `aux_genotype_by_accession`
(cdv_marker_id, aux_map_info_id, marker_name, cdv_map_feature_id, ibm2_2005_bin, ibm2_2005_chr, ibm2_2005_position, feature_name,
div_allele_assay_id, scoring_tech_group, marker_type, genotype_string, allele_count,
div_obs_unit_id, accename, source, sampstat, genus, species,

subspecies, sort_ibm2_2005_bin, sort_ibm2_2005_chr, sort_ibm2_2005_position)
SELECT
cm.cdv_marker_id, ami.aux_map_info_id, cm.name, cmf.cdv_map_feature_id, ami.ibm2_2005_bin, ami.ibm2_2005_chr, ami.ibm2_2005_position, cmf.name,
daa.div_allele_assay_id, dstt.scoring_tech_group, cma.annotation_value as marker_type,
GROUP_CONCAT(CONCAT(daa.div_allele_assay_id, ':', da.value) SEPARATOR ",") as genotype_string,
count(*) as allele_count, dou.div_obs_unit_id,
dp.accename, dp.source, dp.sampstat,
dt.genus, dt.species, dt.subspecies,
ami.sort_ibm2_2005_bin, ami.sort_ibm2_2005_chr, ami.sort_ibm2_2005_position
FROM
div_passport dp
RIGHT JOIN div_stock ds USING (div_passport_id)
RIGHT JOIN div_obs_unit dou USING (div_stock_id)
RIGHT JOIN div_obs_unit_sample dous USING (div_obs_unit_id)
RIGHT JOIN div_allele da USING (div_obs_unit_sample_id)
LEFT  JOIN div_allele_assay daa USING (div_allele_assay_id)
LEFT  JOIN cdv_marker cm USING (cdv_marker_id)
LEFT  JOIN div_scoring_tech_type dstt ON (daa.div_scoring_tech_type_id = dstt.div_scoring_tech_type_id)
LEFT  JOIN cdv_marker_annotation cma ON (cma.cdv_marker_id = cm.cdv_marker_id)
INNER JOIN cdv_marker_annotation_type cmat ON (cmat.cdv_marker_annotation_type_id = cma.cdv_marker_annotation_type_id)
LEFT  JOIN div_taxonomy dt ON (dt.div_taxonomy_id = dp.div_taxonomy_id)
LEFT  JOIN cdv_map_feature cmf ON (cmf.cdv_map_feature_id = cm.cdv_map_feature_id)
LEFT  JOIN aux_map_info ami ON (ami.cdv_map_feature_id = cmf.cdv_map_feature_id)
WHERE
dstt.scoring_tech_group IN ("SSR", "SNP") AND
cmat.anno_type = "marker_type"
GROUP BY cm.cdv_marker_id, dou.div_obs_unit_id;
-- END

----------------------------------------------------------------------------
-- The following subroutines must be executed to format tables further before
-- proceeding. Please refer to documentation of auxiliary_tables.pl script.
----------------------------------------------------------------------------

-- LABEL:sub:format_genotypes
-- END

----------------------------------------------------------------------------
-- This table contains counts of accession1, accession2, marker_type for step 2 of
-- Polymorphic Between Two Accessions search.
----------------------------------------------------------------------------

-- LABEL:drop_aux_genotype_by_accession_count
DROP TABLE IF EXISTS `aux_genotype_by_accession_count`;
-- END

-- LABEL:create_aux_genotype_by_accession_count
CREATE TABLE `aux_genotype_by_accession_count` (
    `aux_genotype_by_accession_count_id` int(11) NOT NULL auto_increment,
    `accename1`                    varchar(30),
    `accename2`                    varchar(30),
    `marker_type`                  varchar(30),
    `count_accename`               int(11),
    PRIMARY KEY                 (`aux_genotype_by_accession_count_id`),
    KEY                         (`accename1`),
    KEY                         (`accename2`),
    KEY                         (`marker_type`),
    KEY                         (`count_accename`)
    ) ENGINE=MyISAM;
-- END

-- *** Currently diabled ***
--INSERT INTO `aux_genotype_by_accession_count`
--(accename1, accename2, marker_type, count_accename)
--
--SELECT agba_a.accename, agba_b.accename, agba_a.marker_type, count(*)
--FROM
--aux_genotype_by_accession agba_a
--JOIN aux_genotype_by_accession agba_b ON (agba_b.cdv_marker_id = agba_a.cdv_marker_id)
--WHERE
--agba_a.accename != agba_b.accename
--GROUP by agba_a.accename, agba_b.accename, agba_a.marker_type;

----------------------------------------------------------------------------
-- The following subroutines must be executed to format tables further before
-- proceeding. Please refer to documentation of auxiliary_tables.pl script.
----------------------------------------------------------------------------

-- LABEL:sub:make_genotype_counts
-- END

----------------------------------------------------------------------------
-- This table contains a summary of aux_genotype_by_accession optimized for step 1 of
-- Polymorphic Between Two Accessions search.
----------------------------------------------------------------------------

-- LABEL:drop_aux_genotype_by_accession_summary
DROP TABLE IF EXISTS `aux_genotype_by_accession_summary`;
-- END

-- LABEL:create_aux_genotype_by_accession_summary
CREATE TABLE `aux_genotype_by_accession_summary` (
    `aux_genotype_by_accession_summary_id` int(11) NOT NULL auto_increment,
    `marker_type`                  varchar(30),
    `accename`                     varchar(30),
    `source`                       varchar(30),
    `sampstat`                     varchar(30),
    `genus`                        varchar(30),
    `species`                      varchar(30),
    `subspecies`                   varchar(30),
    PRIMARY KEY                 (`aux_genotype_by_accession_summary_id`),
    KEY                         `aux_idx_marker_type`         (`marker_type`),
    KEY                         `aux_div_accename`            (`accename`)
    ) ENGINE=MyISAM;
-- END

-- LABEL:populate_aux_genotype_by_accession_summary
INSERT INTO `aux_genotype_by_accession_summary`
(marker_type, accename, source, sampstat, genus, species, subspecies)
SELECT DISTINCT
marker_type, accename, source, sampstat, genus, species, subspecies
FROM
aux_genotype_by_accession;
-- END

----------------------------------------------------------------------------
-- This table contains a row for each SSR/SNP marker_name.
-- For each row the locality information of he accession and genotype is provided.
--
--
----------------------------------------------------------------------------

-- LABEL:drop_aux_assay_plant_genotype
DROP TABLE IF EXISTS `aux_assay_plant_genotype`;
-- END

-- LABEL:create_aux_assay_plant_genotype
CREATE TABLE `aux_assay_plant_genotype` (
    `aux_assay_plant_genotype_id` int(11) NOT NULL auto_increment,
    `marker_name`                 varchar(30),
    `marker_type`                 varchar(30),
    `feature_name`                varchar(30),
    `country`                     varchar(50),
    `state_province`              varchar(50),
    `locality_name`               varchar(255),
    `latitude`                    double,
    `longitude`                   double,
    `div_allele_assay_id`         int(11),
    `div_obs_unit_id`             int(11),
    `accename`                    varchar(30),
    `source`                      varchar(30),
    `sampstat`                    varchar(30),
    `germplasm_type`              varchar(200),
    `genus`                       varchar(30),
    `species`                     varchar(30),
    `subspecies`                  varchar(30),
    `allele_value`                varchar(30),
    PRIMARY KEY                 (`aux_assay_plant_genotype_id`),
    KEY                         (`marker_name`),
    KEY                         (`marker_type`),
    KEY                         (`latitude`),
    KEY                         (`longitude`),
    KEY                         (`div_allele_assay_id`),
    KEY                         (`div_obs_unit_id`),
    KEY                         (`accename`),
    KEY                         (`allele_value`)
    ) ENGINE=MyISAM;
-- END

-- LABEL:populate_aux_assay_plant_genotype
INSERT INTO `aux_assay_plant_genotype`
(marker_name, marker_type, feature_name, country, state_province,
locality_name, latitude, longitude, div_allele_assay_id, div_obs_unit_id,
accename, source, sampstat, germplasm_type, genus, species, subspecies, allele_value)
SELECT
cm.name, ama.marker_type, cmf.name, dl.country, dl.state_province,
dl.locality_name, dl.latitude, dl.longitude, daa.div_allele_assay_id, dou.div_obs_unit_id,
dp.accename, cs.source, dp.sampstat, NULL, dt.genus, dt.species, dt.subspecies, da.value
FROM
div_locality dl
RIGHT JOIN div_accession_collecting dac USING (div_locality_id)
RIGHT JOIN div_passport dp USING (div_accession_collecting_id)
RIGHT JOIN div_stock ds USING (div_passport_id)
RIGHT JOIN div_obs_unit dou USING (div_stock_id)
RIGHT JOIN div_obs_unit_sample dous USING (div_obs_unit_id)
RIGHT JOIN div_allele da USING (div_obs_unit_sample_id)
LEFT  JOIN div_allele_assay daa USING (div_allele_assay_id)
LEFT  JOIN cdv_marker cm USING (cdv_marker_id)
LEFT  JOIN cdv_map_feature cmf USING (cdv_map_feature_id)
LEFT  JOIN aux_marker_annotations ama ON (ama.cdv_marker_id = cm.cdv_marker_id)
LEFT  JOIN div_taxonomy dt ON (dt.div_taxonomy_id = dp.div_taxonomy_id)
LEFT  JOIN cdv_source cs ON (cs.cdv_source_id = dp.cdv_source_id)
WHERE
ama.marker_type IN ("SNP", "SSR")
AND da.value != 'nd'
AND da.value != 'N';
-- END

----------------------------------------------------------------------------
-- The following subroutines must be executed to format tables further before
-- proceeding. Please refer to documentation of auxiliary_tables.pl script.
----------------------------------------------------------------------------

-- LABEL:sub:process_genotypes_gmap
-- END

----------------------------------------------------------------------------
-- This table contains a row for each SSR/SNP marker_name.
-- For each row the number of rows in aux_assay_plant_genotype that have a non-null
-- latitude and longitude value & total number of rows is provided.
----------------------------------------------------------------------------

-- LABEL:drop_aux_assay_plant_genotype_count
DROP TABLE IF EXISTS `aux_assay_plant_genotype_count`;
-- END

-- LABEL:create_aux_assay_plant_genotype_count
CREATE TABLE `aux_assay_plant_genotype_count` (
    `aux_assay_plant_genotype_count_id` int(11) NOT NULL auto_increment,
    `marker_name`                 varchar(30),
    `marker_type`                 varchar(30),
    `feature_name`                varchar(30),
    `count_latitude`              int(11),
    `count_longitude`             int(11),
    `count_invalid`                 int(11),
    `count_valid`               int(11),
    `count_all`                   int(11),
    PRIMARY KEY                 (`aux_assay_plant_genotype_count_id`),
    KEY                         (`marker_name`),
    KEY                         (`marker_type`)
    ) ENGINE=MyISAM;
-- END

-- LABEL:populate_aux_assay_plant_genotype_count
INSERT INTO `aux_assay_plant_genotype_count`
(marker_name, marker_type, feature_name, count_latitude, count_longitude,
count_invalid, count_valid, count_all)
SELECT
marker_name, marker_type, feature_name,
count(latitude) AS count_latitude,
count(longitude) AS count_longitude,
count(aux_assay_plant_genotype_id) - GREATEST(count(longitude), count(latitude)) AS count_invalid,
LEAST(count(longitude), count(latitude)) AS count_valid,
count(aux_assay_plant_genotype_id) AS count_all
FROM
aux_assay_plant_genotype
GROUP BY marker_name, marker_type;
-- END
