[Bio] / FigKernelPackages / Assignments.pm Repository:
ViewVC logotype

Diff of /FigKernelPackages/Assignments.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.1, Fri Jun 3 14:32:27 2005 UTC revision 1.17, Wed Sep 10 20:09:55 2008 UTC
# Line 1  Line 1 
1    #
2    # Copyright (c) 2003-2006 University of Chicago and Fellowship
3    # for Interpretations of Genomes. All Rights Reserved.
4    #
5    # This file is part of the SEED Toolkit.
6    #
7    # The SEED Toolkit is free software. You can redistribute
8    # it and/or modify it under the terms of the SEED Toolkit
9    # Public License.
10    #
11    # You should have received a copy of the SEED Toolkit Public License
12    # along with this program; if not write to the University of Chicago
13    # at info@ci.uchicago.edu or the Fellowship for Interpretation of
14    # Genomes at veronika@thefig.info or download a copy from
15    # http://www.theseed.org/LICENSE.TXT.
16    #
17    
18  package Assignments;  package Assignments;
19    
20  use Carp;  use Carp;
# Line 8  Line 25 
25  sub default_parms {  sub default_parms {
26    
27      my $x = <<END      my $x = <<END
 genome  198214.1        4       Shigella flexneri 2a str. 301  
 genome  198215.1        4       Shigella flexneri 2a str. 2457T  
 genome  216598.1        4       Shigella dysenteriae M131649  
 genome  216599.1        4       Shigella sonnei 53G  
 genome  630.2   4       Yersinia enterocolitica 8081  
 genome  633.2   4       Yersinia pseudotuberculosis (Livermore)  
 genome  187410.1        4       Yersinia pestis KIM  
 genome  214092.1        4       Yersinia pestis CO92  
 genome  229193.1        4       Yersinia pestis biovar Medievalis str. 91001  
 genome  273123.1        4       Yersinia pseudotuberculosis IP 32953  
 genome  594.1   4       Salmonella enterica subsp. enterica serovar Gallinarum  
 genome  99287.1 4       Salmonella typhimurium LT2  
 genome  119912.1        4       Salmonella enterica serovar Choleraesuis SC-B67  
 genome  209261.1        4       Salmonella enterica subsp. enterica serovar Typhi Ty2  
 genome  220341.1        4       Salmonella enterica subsp. enterica serovar Typhi str. CT18  
 genome  83333.1 4       Escherichia coli K12  
 genome  83334.1 4       Escherichia coli O157:H7  
 genome  155864.1        4       Escherichia coli O157:H7 EDL933  
 genome  199310.1        4       Escherichia coli CFT073  
 genome  216592.1        4       Escherichia coli 042  
 genome  216593.1        4       Escherichia coli E2348/69  
 genome  192222.1        4       Campylobacter jejuni subsp. jejuni NCTC 11168  
 genome  224308.1        2       Bacillus subtilis subsp. subtilis str. 168  
28  external        sp      4  external        sp      4
29  external        uni     2  external        img     4
30    external        uni     1.3
31  external        kegg    1  external        kegg    1
32  subsystems      trusted 8  external        gi      1
 ABC_transporter_L-proline_glycine_betaine_(TC_3.A.1.12.1)       MattC  
 ABC_transporter_alkylphosphonate_(TC_3.A.1.9.1) MattC  
 ABC_transporter_arabinose_(TC_3.A.1.2.2)        MattC  
 ABC_transporter_branched-chain_amino_acid_(TC_3.A.1.4.1)        MattC  
 ABC_transporter_dipeptide_(TC_3.A.1.5.2)        MattC  
 ABC_transporter_ferric_enterobactin_(TC_3.A.1.14.2)     MattC  
 ABC_transporter_ferrichrome_(TC_3.A.1.14.3)     MattC  
 ABC_transporter_galactose_(TC_3.A.1.2.3)        MattC  
 ABC_transporter_glutamate_aspartate_(TC_3.A.1.3.4)      MattC  
 ABC_transporter_glutamine_(TC_3.A.1.3.2)        MattC  
 ABC_transporter_glycerol_(TC_3.A.1.1.3) MattC  
 ABC_transporter_heme_(TC3.A.1.107.1)    MattC  
 ABC_transporter_histidine_lysine_arginine_ornithine_(TC_3.A.1.3.1)      MattC  
 ABC_transporter_iron(III)_dicitrate_(TC_3.A.1.14.1)     MattC  
 ABC_transporter_macrolide       MattC  
 ABC_transporter_maltose MattC  
 ABC_transporter_molybdenum_(TC_3.A.1.8.1)       MattC  
 ABC_transporter_nickel_(TC_3.A.1.5.3)   MattC  
 ABC_transporter_oligopeptide_(TC_3.A.1.5.1)     MattC  
 ABC_transporter_peptide_(TC_3.A.1.5.5)  MattC  
 ABC_transporter_phosphate_(TC_3.A.1.7.1)        MattC  
 ABC_transporter_polyamine_putrescine_spermidine_(TC_3.A.1.11.1) MattC  
 ABC_transporter_putrescine_(TC_3.A.1.11.2)      MattC  
 ABC_transporter_ribose_(TC_3.A.1.2.1)   MattC  
 Adhesion_to_eukaryotic_cell     MikeK  
 Alanine_Biosynthesis    Straw  
 Allantoin_degradation   MattC  
 Ammonia_assimilation    EdF  
 Anaerobic_respiratory_reductases        OlgaV  
 Arginine_Biosynthesis   RickS  
 Arginine_Putrescine_and_4-aminobutyrate_degradation     MattC  
 Asp-Glu-tRNA(Asn-Gln)_transamidation    gjo  
 Bacterial_Cell_Division RickS  
 Betaine_biosynthesis    MattC  
 Bilin_Biosynthesis      OlgaZ  
 Biotin_biosynthesis     rodionov  
 Branched-Chain_Amino_Acid_Biosynthesis  RossO  
 CMP-N-acetylneuraminate_Biosynthesis    OlgaZ  
 Calvin-Benson_cycle     SvetaG  
 Carotenoids     OlgaV  
 Chlorophyll_Biosynthesis        VeronikaV  
 Chorismate_Synthesis    VeronikaV  
 Coenzyme_A_Biosynthesis AndreiO  
 Cyanobacterial_CO2_uptake       OlgaV  
 Cyanobacterial_Circadian_Clock  OlgaZ  
 Cyanophycin_metabolism  MikeR  
 Cytochrome_B6-F_complex SvetaG  
 Cytolethal_distending_toxin_of_Campylobacter_jejuni     OlgaZ  
 D-arabinose_degradation MattC  
 D-galactarate_degradation       MattC  
 D-galacturonate_degradation     MattC  
 DNA-replication RickS  
 DNA_Repair_Base_Excision        MikeK  
 De_Novo_Purine_Biosynthesis     RossO  
 De_Novo_Pyrimidine_Synthesis    RossO  
 Denitrification rodionov  
 Embden-Meyerhof_and_Gluconeogenesis     SvetaG  
 F0F1-type_ATP_synthase  RickS  
 FMN_and_FAD_biosynthesis        AndreiO  
 Fatty_Acid_Biosynthesis_FASII   AndreiO  
 Fe-S_cluster_assembly   rodionov  
 Flagellum       RickS  
 Folate_Biosynthesis     vcrecy  
 Fucose_and_rhamnose_degradation MattC  
 Galactitol_degradation  MattC  
 Galactose_degradation   MattC  
 General_secretory_pathway_(Sec-SRP)_complex_(TC_3.A.5.1.1)      MattC  
 Glutamate,_aspartate_and_asparagine_biosynthisis        MattC  
 Glutamate_biosynthesis  MattC  
 Glutathione_Redox_Metabolism    Neema_UCSD  
 Glycerol_Metabolism     MattC  
 Glycerolipid_and_glycerphospholipid_metabolism  VasiliyP  
 Glycine_synthesis       MikeK  
 Glyoxylate_Synthesis    RickS  
 GroEL_GroES     MikeK  
 HMG_CoA_Synthesis       Veronika  
 Hexitol_degradation     MattC  
 Histidine_Biosynthesis  RossO  
 Histidine_Degradation   RossO  
 Inorganic_Sulfur_Assimilation   ChristianR  
 Inositol_catabolism     VeronikaV  
 Isoprenoid_Biosynthesis OlgaZ  
 Ketogluconate_metabolism        MattC  
 L-ascorbate_degradation MattC  
 Lactose_degradation     MattC  
 Leucine_Degradation_and_HMG-CoA_Metabolism      VeronikaV  
 Lysine_Biosynthesis_DAP_Pathway AndreiO  
 Mannose-sensitive_hemagglutinin_type_4_pilus    RobE  
 Mannose_and_fructose_metabolism HanYuC_UCSD  
 Menaquinone_and_Phylloquinone_Biosynthesis      OlgaZ  
 Methanogenesis  gjo  
 Methionine_Biosynthesis rodionov  
 Methylcitrate_cycle     MattC  
 N-Acetyl-D-Glucosamine_Utilization      OlgaZ  
 N-linked_Glycosylation_in_Bacteria      OlgaZ  
 NAD_and_NADP_cofactor_biosynthesis_global       AndreiO  
 Na(+)-translocating_NADH-quinone_oxidoreductase_and_rnf-like_group_of_electron_transport_complexes      OlgaV  
 Nitrate_and_nitrite_ammonification      rodionov  
 Nitrosative_stress      rodionov  
 P-type_ATPase_transporter_potassium_(TC_3.A.3.7.1)      MattC  
 Pentose_phosphate_pathway       SvetaG  
 Peptidoglycan_Biosynthesis      RickS  
 Phenylalanine_synthesis MikeK  
 Photosystem_I   SvetaG  
 Photosystem_II  SvetaG  
 Phycobilisome   OlgaZ  
 Plastoquinone_Biosynthesis      OlgaZ  
 Polyamine_Metabolism    InesT_UCSD  
 Porphyrin,_Heme,_and_Siroheme_Biosynthesis      SvetaG  
 Proline_Synthesis       RickS  
 Proteasome_archaeal     gjo  
 Proteasome_eukaryotic   gjo  
 Pterin_biosynthesis     vcrecy  
 Purine_conversions      OlgaV  
 Pyruvate_Alanine_Serine_Interconversions        JasonS_UCSD  
 Queuosine-Archaeosine_Biosynthesis      vcrecy  
 RNA_polymerase_I        gjo  
 RNA_polymerase_II       gjo  
 RNA_polymerase_III      gjo  
 RNA_polymerase_II_initiation_factors    gjo  
 RNA_polymerase_archaeal gjo  
 RNA_polymerase_archaeal_initiation_factors      gjo  
 RNA_polymerase_bacterial        gjo  
 RNA_polymerase_chloroplast      gjo  
 Resistance_to_fluoroquinolones  MattC  
 Respiratory_Complex_I   OlgaV  
 Respiratory_dehydrogenases_1    OlgaV  
 Ribonucleotide_reduction        rodionov  
 Ribose_and_deoxyribose_phosphate_metabolism     MattC  
 Ribosome_LSU_bacterial  gjo  
 Ribosome_LSU_eukaryotic_and_archaeal    gjo  
 Ribosome_SSU_bacterial  gjo  
 Ribosome_SSU_chloroplast        gjo  
 Ribosome_SSU_eukaryotic_and_archaeal    gjo  
 Ribosome_biogenesis_bacterial   gjo  
 Serine_Biosynthesis     MikeK  
 Siderophore_Aerobactin_and_Ferrichrome_Biosynthesis     MattC  
 Soluble_cytochromes_and_functionally_related_electron_carriers  OlgaV  
 Succinate_dehydrogenase OlgaV  
 Sulfate_assimilation    MattC  
 Sulfur_Metabolism       RobE  
 TCA_Cycle       OlgaV  
 Terminal_cytochrome_C_oxidases  OlgaV  
 Terminal_cytochrome_oxidases    OlgaV  
 Thiamin_biosynthesis    rodionov  
 Threonine_synthesis     MikeK  
 Tocopherol_Biosynthesis OlgaZ  
 Transcription_factors_bacterial gjo  
 Translation_elongation_factors_eukaryotic_and_archaeal  gjo  
 Translation_factors_bacterial   gjo  
 Translation_initiation_factors_eukaryotic_and_archaeal  gjo  
 Transport_of_Nickel_and_Cobalt  rodionov  
 Trehalose_biosynthesis  MattC  
 Tricarballylate_Utilization     RossO  
 Tryptophan_synthesis    VeronikaV  
 Tyrosine_synthesis      MikeK  
 UDP-N-acetylmuramate_from_Fructose-6-phosphate_Biosynthesis     VasiliyP  
 Ubiquinone_Biosynthesis OlgaZ  
 Ubiquinone_Menaquinone-cytochrome_c_reductase_complexes OlgaV  
 Urea_decomposition      rodionov  
 V-Type_ATP_synthase     RickS  
 carnitine_metabolism    MattC  
 cysteine_biosynthesis   RobE  
 dTDP-rhamnose_synthesis MikeK  
 fatty_acid_metabolism   MattC  
 fatty_acid_oxidation_pathway    MattC  
 glyoxylate_degradation  MattC  
 mannose_and_GDP-mannose_metabolism      MattC  
 polyisoprenoid_biosynthesis     MattC  
 ppGpp_biosynthesis      MikeK  
 tRNA_aminoacylation     gjo  
 tRNA_processing gjo  
 tRNA_splicing   gjo  
 //  
33  END  END
34  ;  ;
35      return split(/\n/,$default_parms);  #
36    # TO PUT FIG ANNOTATIONS BACK IN ADD THE FOLLOWING LINE
37    #######################################################
38    #subsystems     trusted 20
39    #######################################################
40    # You may also improve things by adding lines like:
41    #
42    #genome 83333.1 15      Escherichia coli K12
43    #
44    #######################################################
45    
46        my @parms = split(/\n/,$x);
47        my $fig = new FIG;
48        my @trusted_subsystems = map { my $sub = $_; my $curr = $fig->subsystem_curator($sub);
49                                       "$sub\t$curr\n"
50                                     }
51                                 grep { $fig->usable_subsystem($_) }
52                                 $fig->all_subsystems;
53        push(@parms,@trusted_subsystems,"//\n");
54        return @parms;
55  }  }
56    
57    
58  sub choose_best_assignment {  sub choose_best_assignment {
59      my($fig,$parms,$pegs,$external_ids) = @_;      my($fig,$parms,$pegs,$external_ids,$ignore) = @_;
60      my($peg,$id);      my($peg,$id);
61    
62      my $functions = {};      my $functions = {};
# Line 223  Line 64 
64      {      {
65          &load_peg_function($fig,$parms,$peg,$functions);          &load_peg_function($fig,$parms,$peg,$functions);
66      }      }
67        my @tmp = keys(%$functions);
68        print STDERR &Dumper(['peg check',\@tmp,$functions]) if ($ENV{'DEBUG'} || $ENV{'VERBOSE'});
69    
70        if ((@tmp == 1) && (@$pegs >= 5)) { return $tmp[0] }
71    
72      foreach $id (@$external_ids)      foreach $id (@$external_ids)
73      {      {
# Line 239  Line 84 
84      if (! $func)                                           { return "hypothetical protein" }      if (! $func)                                           { return "hypothetical protein" }
85      if ($func =~ /^hypothetical (\S+ )?protein .*$/i)      { return "hypothetical protein" }      if ($func =~ /^hypothetical (\S+ )?protein .*$/i)      { return "hypothetical protein" }
86      if ($func =~ /^[a-zA-Z]{1,2}\d{2,5}( protein)?$/i)     { return "hypothetical protein" }      if ($func =~ /^[a-zA-Z]{1,2}\d{2,5}( protein)?$/i)     { return "hypothetical protein" }
87        if ($func =~ /^similar to ORF\d+$/)                    { return "hypothetical protein" }
88        if ($func =~ /^(Alr|As|All|Tlr|Tll|Glr|Blr|Slr|SEW|pANL)\d+( protein)?$/i) { return "hypothetical protein" }
89        if ($func =~ /^\d{5}/)                                 { return "hypothetical protein" }
90        if ($func =~ /unknown protein/)                        { return "hypothetical protein" }
91    
92      return $func;      return $func;
93  }  }
# Line 247  Line 96 
96      my($fig,$parms,$functions) = @_;      my($fig,$parms,$functions) = @_;
97      my($set,$score,$best_source,$poss_function);      my($set,$score,$best_source,$poss_function);
98      my(@scored);      my(@scored);
   
99      my @partitions = &SameFunc::group_funcs(keys(%$functions));      my @partitions = &SameFunc::group_funcs(keys(%$functions));
100      if ($ENV{'VERBOSE'}) {  print STDERR "partition: ",&Dumper(\@partitions,$functions); }      if ($ENV{'VERBOSE'}) {  print STDERR "partition: ",&Dumper(\@partitions,$functions); }
101    
102      foreach $set (@partitions)      foreach $set (@partitions)
103      {      {
104          $score = &score_set($set,$functions);          $score = &score_set($set,$functions);
105  #       print STDERR &Dumper([$score,$set]);          if ($ENV{'DEBUG'}) { print STDERR &Dumper([$score,$set]); }
106    
107  #       print STDERR "picking from set ",&Dumper($set);          if ($ENV{'DEBUG'}) { print STDERR "picking from set ",&Dumper($set); }
108          ($poss_function,$best_source) = &pick_specific($fig,$parms,$set,$functions);          ($poss_function,$best_source) = &pick_specific($fig,$parms,$set,$functions);
109  #       print STDERR "picked $best_function from $best_source\n";          if ($ENV{'DEBUG'}) { print STDERR "picked $poss_function from $best_source\n"; }
110          push(@scored,[$score,$poss_function,$best_source]);          push(@scored,[$score,$poss_function,$best_source]);
111      }      }
112      @scored = sort { $b->[0] <=> $a->[0] } @scored;      @scored = sort { $b->[0] <=> $a->[0] } @scored;
# Line 297  Line 145 
145      my($best_func,$best_score,$func,$x,$best_source);      my($best_func,$best_score,$func,$x,$best_source);
146    
147      $best_func  = "";      $best_func  = "";
148      $best_score = "";      $best_score = 0;
149      $best_source = "";      $best_source = "";
150    
151      foreach $func (@$set)      foreach $func (@$set)
# Line 307  Line 155 
155              my $incr = @$x;              my $incr = @$x;
156              foreach $_ (@$x)              foreach $_ (@$x)
157              {              {
158                  if (((100 * $_->[0]) + $incr) > $best_score)                  my($sc,$peg,$in_sub) = @$_;
159                    $sc += $in_sub ? 10000 : 0;
160    
161                    if (((100 * $sc) + $incr) > $best_score)
162                  {                  {
163                      $best_score = (100 * $_->[0]) + $incr;                      $best_score = (100 * $sc) + $incr;
164                      $best_func  = $func;                      $best_func  = $func;
165                      $best_source = $_->[1];                      $best_source = $peg;
166                  }                  }
167              }              }
168          }          }
# Line 344  Line 195 
195          {          {
196              $value += $_;              $value += $_;
197          }          }
   
198          my $subv = 0;          my $subv = 0;
199          my @subs = $fig->peg_to_subsystems($peg);          my @subs = ();
200            foreach my $sub ($fig->peg_to_subsystems($peg))
201            {
202                if (1) # (&solid_sub_assign($fig,$sub,$peg,$func))
203                {
204                    push(@subs,$sub);
205                }
206            }
207          my $sub;          my $sub;
208            my $in_sub = 0;
209          foreach $sub (@subs)          foreach $sub (@subs)
210          {          {
211              if (($_ = $parms->{'subsystems'}->{$sub}) && ($_ > $subv))              if ($_ = $parms->{'subsystems'}->{$sub})
212                {
213                    if ($_ > $subv)
214              {              {
215                  $subv = $_;                  $subv = $_;
216              }              }
217                    $in_sub = 1;
218                }
219          }          }
220          $value += $subv;          $value += $subv;
221            push(@{$functions->{$func}},[$value,$peg,$in_sub]);
222        }
223    }
224    
225          push(@{$functions->{$func}},[$value,$peg]);  sub solid_sub_assign {
226        my($fig,$sub,$peg,$func) = @_;
227    
228        my $curator = $fig->subsystem_curator($sub);
229        $curator =~ s/^master://;
230        return ($fig->usable_subsystem($sub) && &made_by_curator($fig,$peg,$func,$curator));
231      }      }
232    
233    sub made_by_curator {
234        my($fig,$peg,$func,$curator) = @_;
235    
236        my @ann = $fig->feature_annotations($peg,"rawtime");
237        my $i;
238        my $funcQ = quotemeta $func;
239        for ($i=$#ann;
240             ($i >= 0) && (($ann[$i]->[2] !~ /$curator/) || ($ann[$i]->[3] !~ /Set \S+ function to\n$funcQ/s));
241             $i--) {}
242        return ($i >= 0);
243  }  }
244    
245  sub equivalent_ids {  sub equivalent_ids {
# Line 378  Line 259 
259          }          }
260          foreach $tuple ($fig->mapped_prot_ids($peg))          foreach $tuple ($fig->mapped_prot_ids($peg))
261          {          {
262              if ($tuple->[0] =~ /^fig\|/)              if (($tuple->[0] =~ /^fig\|/) && $fig->is_real_feature($tuple->[0]))
263              {              {
264                  $pegs{$tuple->[0]} = 1;                  $pegs{$tuple->[0]} = 1;
265              }              }
# Line 405  Line 286 
286      {      {
287          @parmsS = &default_parms;          @parmsS = &default_parms;
288      }      }
   
289      while ($_ = shift @parmsS)      while ($_ = shift @parmsS)
290      {      {
291          chomp;          chomp;

Legend:
Removed from v.1.1  
changed lines
  Added in v.1.17

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3