[Bio] / DeJonghStuff / parse_model.pl Repository:
ViewVC logotype

Diff of /DeJonghStuff/parse_model.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.1.1.1, Wed Oct 26 18:48:35 2005 UTC revision 1.2, Thu Jan 12 19:53:18 2006 UTC
# Line 1  Line 1 
1  # -*- perl -*-  # -*- perl -*-
2    
3  ###########################################  ###########################################
4  # usage:  parse_model <genome_id> <model_id>  # usage:  parse_model <genome_id> <model_id> <kegg organism id>
5  # models for a given genome should be stored in Data/Global/Models/<genome_id>/  # models for a given genome should be stored in Data/Global/Models/<genome_id>/
6  # models should be prepended with the model id (e.g., iSB615).  Three files are expected:  # models should be prepended with the model id (e.g., iSB615).  Three files are expected:
7  # ${model_id}-metabolites.txt:  contains two-column, tab-separated file with compound ids  # ${model_id}-metabolites.txt:  contains two-column, tab-separated file with compound ids
# Line 15  Line 15 
15  use strict;  use strict;
16  use FIG;  use FIG;
17    
18  my ($genome_id, $model_id) = @ARGV;  my ($genome_id, $model_id, $kegg_org_id) = @ARGV;
19    
20    unless ($genome_id && $model_id && $kegg_org_id)
21    {
22        print "usage:  parse_model <genome_id> <model_id> <kegg organism id>\n";
23        exit(-1);
24    }
25    
26  my $fig = new FIG;  my $fig = new FIG;
27    
28  open(MODEL_METABOLITES, "<$FIG_Config::global/Models/$genome_id/${model_id}-metabolites.txt")  open(MODEL_METABOLITES, "<$FIG_Config::global/Models/$genome_id/${model_id}-metabolites.txt")
# Line 164  Line 171 
171  close(REACTION_TO_ROLE);  close(REACTION_TO_ROLE);
172  close(REACTION_TO_COMPOUND);  close(REACTION_TO_COMPOUND);
173    
174  # extra - use hash for tracking gene/peg correspondences since genes occur in multiple reactions  # use hash for tracking gene/peg and gene/reaction correspondences since genes occur in multiple reactions
175  my %gene_2_peg;  my (%gene_2_pegs, %gene_2_reactions);
176    
177  while (<MODEL_GRA>)  while (<MODEL_GRA>)
178  {  {
# Line 174  Line 181 
181    
182      if (defined $reaction_abbrev_2_role{$rabbrev})      if (defined $reaction_abbrev_2_role{$rabbrev})
183      {      {
184          while ($genes =~ /(SA\d\d\d\d)/g)          chomp $genes;
185          {          $genes =~ s/\(//g;
186              my $gene = $1;          $genes =~ s/\)//g;
187            $genes =~ s/and//gi;
188            $genes =~ s/or//gi;
189            $genes =~ s/^\s+//;
190            $genes =~ s/\s+$//;
191    
192              print GENE_TO_REACTION "$gene\t$rabbrev\n";          foreach my $gene (split /\s+/ , $genes)
193            {
194                print "Checking $gene ...\n";
195                push @{$gene_2_reactions{$gene}}, $rabbrev;
196              my $rname = $reaction_abbrev_2_role{$rabbrev};              my $rname = $reaction_abbrev_2_role{$rabbrev};
197              print GENE_TO_ROLE "$gene\t$rname\n"; # this table is redundant              print GENE_TO_ROLE "$gene\t$rname\n"; # this table is redundant
198    
199              # extra - look for pegs based on gene              if (! defined($gene_2_pegs{$gene}))
             if (! defined($gene_2_peg{$gene}))  
200              {              {
201                  my $peg_count = 0;                  $gene_2_pegs{$gene} = ();
202                  my ($peg_index_data,undef) = $fig->search_index($gene);                  # search on kegg gene identifiers because the seem to limit results to the correct peg
203                    my ($peg_index_data,undef) = $fig->search_index("$kegg_org_id:".$gene);
204    
205                  foreach my $peg (map { $_->[0] } @$peg_index_data)                  foreach my $peg (map { $_->[0] } @$peg_index_data)
206                  {                  {
207                      if ($peg =~ /$genome_id/ )                      if ($peg =~ /$genome_id/ )
208                      {                      {
209                          $peg_count++;                          push @{$gene_2_pegs{$gene}}, $peg;
                         print GENE_TO_PEG "$gene\t$peg\t$peg_count\n";  
210                      }                      }
211                  }                  }
   
                 if ($peg_count == 0)  
                 {  
                     print "Warning, no pegs found for gene $gene\n";  
                 }  
   
                 $gene_2_peg{$gene} = 1;  
212              }              }
213          }          }
214      }      }
# Line 212  Line 218 
218      }      }
219  }  }
220    
221    foreach my $gene (keys %gene_2_pegs)
222    {
223        if (defined $gene_2_pegs{$gene})
224        {
225            print GENE_TO_PEG "$gene\t@{$gene_2_pegs{$gene}}\n";
226        }
227        else
228        {
229            print "Can't find peg for $gene\n";
230        }
231    }
232    
233    foreach my $gene (keys %gene_2_reactions)
234    {
235        print GENE_TO_REACTION "$gene\t@{$gene_2_reactions{$gene}}\n";
236    }
237    
238  close(GENE_TO_REACTION);  close(GENE_TO_REACTION);
239  close(GENE_TO_ROLE);  close(GENE_TO_ROLE);
240  close(GENE_TO_PEG);  close(GENE_TO_PEG);

Legend:
Removed from v.1.1.1.1  
changed lines
  Added in v.1.2

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3