#!/usr/local/bin/perl -w

=head1 NAME

stat_pathway.pl

=cut

=head1 SYNOPSIS

stat_pathway.pl [options]

  Options:
    --data_path    the data path of pathway

=head1 OPTIONS

=over 4

=item B<--data_path>

the data path of pathway

=back

=cut



use strict;

use Getopt::Long;
use Pod::Usage;

my ($path);
{
    my $help=0;
    my $man=0;

    GetOptions( "help|?"=>\$help,"man"=>\$man,
		"data_path=s" => \$path 
		) or pod2usage(1);
    pod2usage(-verbose => 2) if $man;
    pod2usage(1) if $help;
    $path or pod2usage(1);
}
#my $path = '/usr/local/pathway-tools/aic-export/ecocyc/ricecyc/1.0/data/';
my $pwy_file = $path.'pathways.dat';
my $reaction_file = $path.'reactions.dat';
my $enzrxn_file = $path.'enzrxns.dat';
my $protein_file = $path.'proteins.dat';
my $gene_file = $path.'genes.dat';

my %pwys;

my ($pwy,$name, @rxns);
open(PWY, $pwy_file) or die "$!";
while(<PWY>){
    chomp;
    next if /^#/ ;
    if(/^UNIQUE-ID - (.+)/){
	$pwy = $1 ;
    }elsif(/^COMMON-NAME - (.+)/){
	$name = $1;
    }elsif(/^REACTION-LIST - (.+)/){
	push @rxns, $1;
    }elsif(/^\/\//){
	my @new_rxns = @rxns;
	$pwys{$pwy}->{'name'} = $name;
	$pwys{$pwy}->{'reactions'} = \@new_rxns;
	$pwy = $name = '';
	@rxns = ();
    }
}
close(PWY);

my %rxns;
my ($rxn, $ec, $rxn_name,@enzs);
open(RXN,$reaction_file) or die "$!";
while(<RXN>){
    chomp;
    next if /^#/ ;
    if(/^UNIQUE-ID - (.+)/){
	$rxn = $1;
    }elsif(/^COMMON-NAME - (.+)/){
	$rxn_name = $1;
    }elsif(/^EC-NUMBER - (.+)/){
	$ec = $1;
    }elsif(/^ENZYMATIC-REACTION - (.+)/){
	push @enzs, $1;
    }elsif(/^\/\//){
	my @new_enzs = @enzs;
	$rxns{$rxn}->{'name'} = $rxn_name;
	$rxns{$rxn}->{'ec'} = $ec;
	$rxns{$rxn}->{'enzymes'}=\@new_enzs;
	$rxn = $rxn_name = '';
	@enzs = ();
    }
}
close(RXN);

my %enzrxns;
my ($enz, $enz_name, $protein_id);
open(ENZ, $enzrxn_file) or die "$!";
while(<ENZ>){
    chomp;
    next if /^#/ ;
    if(/^UNIQUE-ID - (.+)/){
	$enz = $1;
    }elsif(/^COMMON-NAME - (.+)/){
	$enz_name = $1;
    }elsif(/^ENZYME - (.+)/){
	$protein_id =  $1;
    }elsif(/^\/\//){
	$enzrxns{$enz}->{'name'} = $enz_name;
	$enzrxns{$enz}->{'enzyme'} = $protein_id;
	$enz = $enz_name = $protein_id = '';
    }
}
close(ENZ);

my %proteins;
my ($protein, $type, $gene, @components);
open(PROTEIN, $protein_file) or die "$!";
while(<PROTEIN>){
    chomp;
    next if /^#/;
    if(/^UNIQUE-ID - (.+)/){
	$protein = $1;
    }elsif(/^TYPES - (.+)/){
	$type = $1;
    }elsif(/^COMPONENTS - (.+)/){
	push @components, $1;
    }elsif(/^GENE - (.+)/){
	$gene = $1;
    }elsif(/^\/\//){
	$proteins{$protein}->{'gene'} = $gene;
	$proteins{$protein}->{'types'} = $type;
	my @new_components = @components;
	$proteins{$protein}->{'components'} = \@new_components;
	$protein = $type = $gene = '';
	@components = ();
    }
}
close(PROTEIN);

my %genes;
open(GENE, $gene_file) or die "$!";
while(<GENE>){
    chomp;
    next if /^#/;
    if(/^UNIQUE-ID - (.+)/){
	$gene = $1;
    }elsif(/^TYPES - (.+)/){
	$type = $1;
    }elsif(/^\/\//){
	$genes{$gene} = 1;
	$gene = $type = '';
    }
}
close(GENE);

my %gene_pwys;
my $out_file = 'pwy.txt';
open(OUT, ">$out_file") or die "$!";

foreach my $pwy (keys %pwys){
    my $reaction_list = $pwys{$pwy}->{'reactions'};
    foreach my $rxn (@$reaction_list){
	my $rxnenzs = $rxns{$rxn}->{'enzymes'};
	if($rxnenzs && scalar(@$rxnenzs)>0){
	    foreach my $rxnenz (@$rxnenzs){
		my $enz  = $enzrxns{$rxnenz}->{'enzyme'};
		my $gene = $proteins{$enz}->{'gene'};
		if($gene){
		    $gene_pwys{$gene}->{$rxnenz}->{$rxn}->{$pwy}=1;

		}else{
		    print "gene not found for $enz\n";
		}
	    }
	}
    }

    
}

print OUT join("\t", 
		    qw(
		    gene_name
		    enzyme_name
		    reaction_id
		    reaction_name
		    EC
		    Pathway_id
		    Pathway_name
		    )

		), "\n";

foreach my $gene (keys %gene_pwys){
    my $rxnenzs2 = $gene_pwys{$gene};
    foreach my $rxnenz (keys %$rxnenzs2){
	my $rxns2 = $rxnenzs2->{$rxnenz};
	my $enz_name  = $enzrxns{$rxnenz}->{'name'};
	foreach my $rxn ( keys %$rxns2){
	    my $pwys2 = $rxns2->{$rxn};
	    my $rxn_name = $rxns{$rxn}->{'name'};
	    my $ec = $rxns{$rxn}->{'ec'};
	    foreach my $pwy (keys %$pwys2){
		my $pwy_name = $pwys{$pwy}->{'name'};
		print OUT join("\t",($gene,
				     $enz_name,
				     $rxn,
				     $rxn_name,
				     $ec,
				     $pwy,
				     $pwy_name
				)), "\n";


	    }

	}

    }
}



close(OUT);
    
__END__

=cut
