[Bio] / FigKernelPackages / Assignments.pm Repository:
ViewVC logotype

Diff of /FigKernelPackages/Assignments.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.1, Fri Jun 3 14:32:27 2005 UTC revision 1.11, Wed May 3 13:18:22 2006 UTC
# Line 1  Line 1 
1    #
2    # Copyright (c) 2003-2006 University of Chicago and Fellowship
3    # for Interpretations of Genomes. All Rights Reserved.
4    #
5    # This file is part of the SEED Toolkit.
6    #
7    # The SEED Toolkit is free software. You can redistribute
8    # it and/or modify it under the terms of the SEED Toolkit
9    # Public License.
10    #
11    # You should have received a copy of the SEED Toolkit Public License
12    # along with this program; if not write to the University of Chicago
13    # at info@ci.uchicago.edu or the Fellowship for Interpretation of
14    # Genomes at veronika@thefig.info or download a copy from
15    # http://www.theseed.org/LICENSE.TXT.
16    #
17    
18  package Assignments;  package Assignments;
19    
20  use Carp;  use Carp;
# Line 8  Line 25 
25  sub default_parms {  sub default_parms {
26    
27      my $x = <<END      my $x = <<END
28  genome  198214.1        4       Shigella flexneri 2a str. 301  genome  198214.1        5       Shigella flexneri 2a str. 301
29  genome  198215.1        4       Shigella flexneri 2a str. 2457T  genome  198215.1        5       Shigella flexneri 2a str. 2457T
30  genome  216598.1        4       Shigella dysenteriae M131649  genome  216598.1        5       Shigella dysenteriae M131649
31  genome  216599.1        4       Shigella sonnei 53G  genome  216599.1        5       Shigella sonnei 53G
32  genome  630.2   4       Yersinia enterocolitica 8081  genome  630.2   5       Yersinia enterocolitica 8081
33  genome  633.2   4       Yersinia pseudotuberculosis (Livermore)  genome  633.2   5       Yersinia pseudotuberculosis (Livermore)
34  genome  187410.1        4       Yersinia pestis KIM  genome  187410.1        5       Yersinia pestis KIM
35  genome  214092.1        4       Yersinia pestis CO92  genome  214092.1        5       Yersinia pestis CO92
36  genome  229193.1        4       Yersinia pestis biovar Medievalis str. 91001  genome  229193.1        5       Yersinia pestis biovar Medievalis str. 91001
37  genome  273123.1        4       Yersinia pseudotuberculosis IP 32953  genome  273123.1        5       Yersinia pseudotuberculosis IP 32953
38  genome  594.1   4       Salmonella enterica subsp. enterica serovar Gallinarum  genome  594.1   5       Salmonella enterica subsp. enterica serovar Gallinarum
39  genome  99287.1 4       Salmonella typhimurium LT2  genome  99287.1 5       Salmonella typhimurium LT2
40  genome  119912.1        4       Salmonella enterica serovar Choleraesuis SC-B67  genome  119912.1        5       Salmonella enterica serovar Choleraesuis SC-B67
41  genome  209261.1        4       Salmonella enterica subsp. enterica serovar Typhi Ty2  genome  209261.1        5       Salmonella enterica subsp. enterica serovar Typhi Ty2
42  genome  220341.1        4       Salmonella enterica subsp. enterica serovar Typhi str. CT18  genome  220341.1        5       Salmonella enterica subsp. enterica serovar Typhi str. CT18
43  genome  83333.1 4       Escherichia coli K12  genome  83333.1 5       Escherichia coli K12
44  genome  83334.1 4       Escherichia coli O157:H7  genome  83334.1 5       Escherichia coli O157:H7
45  genome  155864.1        4       Escherichia coli O157:H7 EDL933  genome  155864.1        5       Escherichia coli O157:H7 EDL933
46  genome  199310.1        4       Escherichia coli CFT073  genome  199310.1        5       Escherichia coli CFT073
47  genome  216592.1        4       Escherichia coli 042  genome  216592.1        5       Escherichia coli 042
48  genome  216593.1        4       Escherichia coli E2348/69  genome  216593.1        5       Escherichia coli E2348/69
49  genome  192222.1        4       Campylobacter jejuni subsp. jejuni NCTC 11168  genome  192222.1        5       Campylobacter jejuni subsp. jejuni NCTC 11168
50  genome  224308.1        2       Bacillus subtilis subsp. subtilis str. 168  genome  224308.1        5       Bacillus subtilis subsp. subtilis str. 168
51    genome  196600.1        5       Vibrio vulnificus YJ016
52    genome  216895.1        5       Vibrio vulnificus CMCP6
53    genome  223926.1        5       Vibrio parahaemolyticus RIMD 2210633
54    genome  243277.1        5       Vibrio cholerae O1 biovar eltor str. N16961
55    genome  312309.3        5       Vibrio fischeri ES114
56    genome  314290.3        5       Vibrio sp. MED222
57    genome  314291.3        5       Vibrio splendidus 12B01
58    genome  192222.1        5       Campylobacter jejuni subsp. jejuni NCTC 11168
59    genome  195099.3        5       Campylobacter jejuni RM1221
60    genome  306254.1        5       Campylobacter coli RM2228
61    genome  306263.1        5       Campylobacter lari RM2100
62    genome  306264.1        5       Campylobacter upsaliensis RM3195
63    genome  169963.1        5       Listeria monocytogenes EGD-e
64    genome  265669.1        5       Listeria monocytogenes str. 4b F2365
65    genome  267409.1        5       Listeria monocytogenes str. 1/2a F6854
66    genome  267410.1        5       Listeria monocytogenes str. 4b H7858
67    genome  272626.1        5       Listeria innocua Clip11262
68    genome  1314.1          5       Streptococcus pyogenes M5
69    genome  160490.1        5       Streptococcus pyogenes M1 GAS
70    genome  170187.1        5       Streptococcus pneumoniae TIGR4
71    genome  171101.1        5       Streptococcus pneumoniae R6
72    genome  186103.1        5       Streptococcus pyogenes MGAS8232
73    genome  193567.1        5       Streptococcus pyogenes SSI-1
74    genome  198466.1        5       Streptococcus pyogenes MGAS315
75    genome  205921.3        5       Streptococcus agalactiae A909
76    genome  208435.1        5       Streptococcus agalactiae 2603V/R
77    genome  210007.1        5       Streptococcus mutans UA159
78    genome  211110.1        5       Streptococcus agalactiae NEM316
79    genome  246201.1        5       Streptococcus mitis NCTC 12261
80    genome  264199.3        5       Streptococcus thermophilus LMG 18311
81    genome  286636.1        5       Streptococcus pyogenes MGAS10394
82    genome  293653.3        5       Streptococcus pyogenes MGAS5005
83    genome  299768.3        5       Streptococcus thermophilus CNRZ1066
84    genome  319701.3        5       Streptococcus pyogenes MGAS6180
85    genome  93062.4         5       Staphylococcus aureus subsp. aureus COL
86    genome  158878.1        5       Staphylococcus aureus subsp. aureus Mu50
87    genome  158879.1        5       Staphylococcus aureus subsp. aureus N315
88    genome  176279.3        5       Staphylococcus epidermidis RP62A
89    genome  176280.1        5       Staphylococcus epidermidis ATCC 12228
90    genome  196620.1        5       Staphylococcus aureus subsp. aureus MW2
91    genome  282458.1        5       Staphylococcus aureus subsp. aureus MRSA252
92    genome  282459.1        5       Staphylococcus aureus subsp. aureus MSSA476
93  external        sp      4  external        sp      4
94  external        uni     2  external        uni     1.3
95  external        kegg    1  external        kegg    1
96  subsystems      trusted 8  subsystems      trusted 20
 ABC_transporter_L-proline_glycine_betaine_(TC_3.A.1.12.1)       MattC  
 ABC_transporter_alkylphosphonate_(TC_3.A.1.9.1) MattC  
 ABC_transporter_arabinose_(TC_3.A.1.2.2)        MattC  
 ABC_transporter_branched-chain_amino_acid_(TC_3.A.1.4.1)        MattC  
 ABC_transporter_dipeptide_(TC_3.A.1.5.2)        MattC  
 ABC_transporter_ferric_enterobactin_(TC_3.A.1.14.2)     MattC  
 ABC_transporter_ferrichrome_(TC_3.A.1.14.3)     MattC  
 ABC_transporter_galactose_(TC_3.A.1.2.3)        MattC  
 ABC_transporter_glutamate_aspartate_(TC_3.A.1.3.4)      MattC  
 ABC_transporter_glutamine_(TC_3.A.1.3.2)        MattC  
 ABC_transporter_glycerol_(TC_3.A.1.1.3) MattC  
 ABC_transporter_heme_(TC3.A.1.107.1)    MattC  
 ABC_transporter_histidine_lysine_arginine_ornithine_(TC_3.A.1.3.1)      MattC  
 ABC_transporter_iron(III)_dicitrate_(TC_3.A.1.14.1)     MattC  
 ABC_transporter_macrolide       MattC  
 ABC_transporter_maltose MattC  
 ABC_transporter_molybdenum_(TC_3.A.1.8.1)       MattC  
 ABC_transporter_nickel_(TC_3.A.1.5.3)   MattC  
 ABC_transporter_oligopeptide_(TC_3.A.1.5.1)     MattC  
 ABC_transporter_peptide_(TC_3.A.1.5.5)  MattC  
 ABC_transporter_phosphate_(TC_3.A.1.7.1)        MattC  
 ABC_transporter_polyamine_putrescine_spermidine_(TC_3.A.1.11.1) MattC  
 ABC_transporter_putrescine_(TC_3.A.1.11.2)      MattC  
 ABC_transporter_ribose_(TC_3.A.1.2.1)   MattC  
 Adhesion_to_eukaryotic_cell     MikeK  
 Alanine_Biosynthesis    Straw  
 Allantoin_degradation   MattC  
 Ammonia_assimilation    EdF  
 Anaerobic_respiratory_reductases        OlgaV  
 Arginine_Biosynthesis   RickS  
 Arginine_Putrescine_and_4-aminobutyrate_degradation     MattC  
 Asp-Glu-tRNA(Asn-Gln)_transamidation    gjo  
 Bacterial_Cell_Division RickS  
 Betaine_biosynthesis    MattC  
 Bilin_Biosynthesis      OlgaZ  
 Biotin_biosynthesis     rodionov  
 Branched-Chain_Amino_Acid_Biosynthesis  RossO  
 CMP-N-acetylneuraminate_Biosynthesis    OlgaZ  
 Calvin-Benson_cycle     SvetaG  
 Carotenoids     OlgaV  
 Chlorophyll_Biosynthesis        VeronikaV  
 Chorismate_Synthesis    VeronikaV  
 Coenzyme_A_Biosynthesis AndreiO  
 Cyanobacterial_CO2_uptake       OlgaV  
 Cyanobacterial_Circadian_Clock  OlgaZ  
 Cyanophycin_metabolism  MikeR  
 Cytochrome_B6-F_complex SvetaG  
 Cytolethal_distending_toxin_of_Campylobacter_jejuni     OlgaZ  
 D-arabinose_degradation MattC  
 D-galactarate_degradation       MattC  
 D-galacturonate_degradation     MattC  
 DNA-replication RickS  
 DNA_Repair_Base_Excision        MikeK  
 De_Novo_Purine_Biosynthesis     RossO  
 De_Novo_Pyrimidine_Synthesis    RossO  
 Denitrification rodionov  
 Embden-Meyerhof_and_Gluconeogenesis     SvetaG  
 F0F1-type_ATP_synthase  RickS  
 FMN_and_FAD_biosynthesis        AndreiO  
 Fatty_Acid_Biosynthesis_FASII   AndreiO  
 Fe-S_cluster_assembly   rodionov  
 Flagellum       RickS  
 Folate_Biosynthesis     vcrecy  
 Fucose_and_rhamnose_degradation MattC  
 Galactitol_degradation  MattC  
 Galactose_degradation   MattC  
 General_secretory_pathway_(Sec-SRP)_complex_(TC_3.A.5.1.1)      MattC  
 Glutamate,_aspartate_and_asparagine_biosynthisis        MattC  
 Glutamate_biosynthesis  MattC  
 Glutathione_Redox_Metabolism    Neema_UCSD  
 Glycerol_Metabolism     MattC  
 Glycerolipid_and_glycerphospholipid_metabolism  VasiliyP  
 Glycine_synthesis       MikeK  
 Glyoxylate_Synthesis    RickS  
 GroEL_GroES     MikeK  
 HMG_CoA_Synthesis       Veronika  
 Hexitol_degradation     MattC  
 Histidine_Biosynthesis  RossO  
 Histidine_Degradation   RossO  
 Inorganic_Sulfur_Assimilation   ChristianR  
 Inositol_catabolism     VeronikaV  
 Isoprenoid_Biosynthesis OlgaZ  
 Ketogluconate_metabolism        MattC  
 L-ascorbate_degradation MattC  
 Lactose_degradation     MattC  
 Leucine_Degradation_and_HMG-CoA_Metabolism      VeronikaV  
 Lysine_Biosynthesis_DAP_Pathway AndreiO  
 Mannose-sensitive_hemagglutinin_type_4_pilus    RobE  
 Mannose_and_fructose_metabolism HanYuC_UCSD  
 Menaquinone_and_Phylloquinone_Biosynthesis      OlgaZ  
 Methanogenesis  gjo  
 Methionine_Biosynthesis rodionov  
 Methylcitrate_cycle     MattC  
 N-Acetyl-D-Glucosamine_Utilization      OlgaZ  
 N-linked_Glycosylation_in_Bacteria      OlgaZ  
 NAD_and_NADP_cofactor_biosynthesis_global       AndreiO  
 Na(+)-translocating_NADH-quinone_oxidoreductase_and_rnf-like_group_of_electron_transport_complexes      OlgaV  
 Nitrate_and_nitrite_ammonification      rodionov  
 Nitrosative_stress      rodionov  
 P-type_ATPase_transporter_potassium_(TC_3.A.3.7.1)      MattC  
 Pentose_phosphate_pathway       SvetaG  
 Peptidoglycan_Biosynthesis      RickS  
 Phenylalanine_synthesis MikeK  
 Photosystem_I   SvetaG  
 Photosystem_II  SvetaG  
 Phycobilisome   OlgaZ  
 Plastoquinone_Biosynthesis      OlgaZ  
 Polyamine_Metabolism    InesT_UCSD  
 Porphyrin,_Heme,_and_Siroheme_Biosynthesis      SvetaG  
 Proline_Synthesis       RickS  
 Proteasome_archaeal     gjo  
 Proteasome_eukaryotic   gjo  
 Pterin_biosynthesis     vcrecy  
 Purine_conversions      OlgaV  
 Pyruvate_Alanine_Serine_Interconversions        JasonS_UCSD  
 Queuosine-Archaeosine_Biosynthesis      vcrecy  
 RNA_polymerase_I        gjo  
 RNA_polymerase_II       gjo  
 RNA_polymerase_III      gjo  
 RNA_polymerase_II_initiation_factors    gjo  
 RNA_polymerase_archaeal gjo  
 RNA_polymerase_archaeal_initiation_factors      gjo  
 RNA_polymerase_bacterial        gjo  
 RNA_polymerase_chloroplast      gjo  
 Resistance_to_fluoroquinolones  MattC  
 Respiratory_Complex_I   OlgaV  
 Respiratory_dehydrogenases_1    OlgaV  
 Ribonucleotide_reduction        rodionov  
 Ribose_and_deoxyribose_phosphate_metabolism     MattC  
 Ribosome_LSU_bacterial  gjo  
 Ribosome_LSU_eukaryotic_and_archaeal    gjo  
 Ribosome_SSU_bacterial  gjo  
 Ribosome_SSU_chloroplast        gjo  
 Ribosome_SSU_eukaryotic_and_archaeal    gjo  
 Ribosome_biogenesis_bacterial   gjo  
 Serine_Biosynthesis     MikeK  
 Siderophore_Aerobactin_and_Ferrichrome_Biosynthesis     MattC  
 Soluble_cytochromes_and_functionally_related_electron_carriers  OlgaV  
 Succinate_dehydrogenase OlgaV  
 Sulfate_assimilation    MattC  
 Sulfur_Metabolism       RobE  
 TCA_Cycle       OlgaV  
 Terminal_cytochrome_C_oxidases  OlgaV  
 Terminal_cytochrome_oxidases    OlgaV  
 Thiamin_biosynthesis    rodionov  
 Threonine_synthesis     MikeK  
 Tocopherol_Biosynthesis OlgaZ  
 Transcription_factors_bacterial gjo  
 Translation_elongation_factors_eukaryotic_and_archaeal  gjo  
 Translation_factors_bacterial   gjo  
 Translation_initiation_factors_eukaryotic_and_archaeal  gjo  
 Transport_of_Nickel_and_Cobalt  rodionov  
 Trehalose_biosynthesis  MattC  
 Tricarballylate_Utilization     RossO  
 Tryptophan_synthesis    VeronikaV  
 Tyrosine_synthesis      MikeK  
 UDP-N-acetylmuramate_from_Fructose-6-phosphate_Biosynthesis     VasiliyP  
 Ubiquinone_Biosynthesis OlgaZ  
 Ubiquinone_Menaquinone-cytochrome_c_reductase_complexes OlgaV  
 Urea_decomposition      rodionov  
 V-Type_ATP_synthase     RickS  
 carnitine_metabolism    MattC  
 cysteine_biosynthesis   RobE  
 dTDP-rhamnose_synthesis MikeK  
 fatty_acid_metabolism   MattC  
 fatty_acid_oxidation_pathway    MattC  
 glyoxylate_degradation  MattC  
 mannose_and_GDP-mannose_metabolism      MattC  
 polyisoprenoid_biosynthesis     MattC  
 ppGpp_biosynthesis      MikeK  
 tRNA_aminoacylation     gjo  
 tRNA_processing gjo  
 tRNA_splicing   gjo  
 //  
97  END  END
98  ;  ;
99      return split(/\n/,$default_parms);      my @parms = split(/\n/,$x);
100        my $fig = new FIG;
101        my @trusted_subsystems = map { my $sub = $_; my $curr = $fig->subsystem_curator($sub);
102                                       "$sub\t$curr\n"
103                                     }
104                                 grep { $fig->usable_subsystem($_) }
105                                 $fig->all_subsystems;
106        push(@parms,@trusted_subsystems,"//\n");
107        return @parms;
108  }  }
109    
110    
111  sub choose_best_assignment {  sub choose_best_assignment {
112      my($fig,$parms,$pegs,$external_ids) = @_;      my($fig,$parms,$pegs,$external_ids,$ignore) = @_;
113      my($peg,$id);      my($peg,$id);
114    
115      my $functions = {};      my $functions = {};
# Line 223  Line 117 
117      {      {
118          &load_peg_function($fig,$parms,$peg,$functions);          &load_peg_function($fig,$parms,$peg,$functions);
119      }      }
120        my @tmp = keys(%$functions);
121    #   print &Dumper(['peg check',\@tmp,$functions]);
122    
123        if ((@tmp == 1) && (@$pegs >= 5)) { return $tmp[0] }
124    
125      foreach $id (@$external_ids)      foreach $id (@$external_ids)
126      {      {
# Line 239  Line 137 
137      if (! $func)                                           { return "hypothetical protein" }      if (! $func)                                           { return "hypothetical protein" }
138      if ($func =~ /^hypothetical (\S+ )?protein .*$/i)      { return "hypothetical protein" }      if ($func =~ /^hypothetical (\S+ )?protein .*$/i)      { return "hypothetical protein" }
139      if ($func =~ /^[a-zA-Z]{1,2}\d{2,5}( protein)?$/i)     { return "hypothetical protein" }      if ($func =~ /^[a-zA-Z]{1,2}\d{2,5}( protein)?$/i)     { return "hypothetical protein" }
140        if ($func =~ /^similar to ORF\d+$/)                    { return "hypothetical protein" }
141        if ($func =~ /^(Alr|As|All|Tlr|Tll|Glr|Blr|Slr|SEW|pANL)\d+( protein)?$/i) { return "hypothetical protein" }
142        if ($func =~ /^\d{5}/)                                 { return "hypothetical protein" }
143        if ($func =~ /unknown protein/)                        { return "hypothetical protein" }
144    
145      return $func;      return $func;
146  }  }
# Line 258  Line 160 
160    
161  #       print STDERR "picking from set ",&Dumper($set);  #       print STDERR "picking from set ",&Dumper($set);
162          ($poss_function,$best_source) = &pick_specific($fig,$parms,$set,$functions);          ($poss_function,$best_source) = &pick_specific($fig,$parms,$set,$functions);
163  #       print STDERR "picked $best_function from $best_source\n";  #       print STDERR "picked $poss_function from $best_source\n";
164          push(@scored,[$score,$poss_function,$best_source]);          push(@scored,[$score,$poss_function,$best_source]);
165      }      }
166      @scored = sort { $b->[0] <=> $a->[0] } @scored;      @scored = sort { $b->[0] <=> $a->[0] } @scored;
# Line 297  Line 199 
199      my($best_func,$best_score,$func,$x,$best_source);      my($best_func,$best_score,$func,$x,$best_source);
200    
201      $best_func  = "";      $best_func  = "";
202      $best_score = "";      $best_score = 0;
203      $best_source = "";      $best_source = "";
204    
205      foreach $func (@$set)      foreach $func (@$set)
# Line 307  Line 209 
209              my $incr = @$x;              my $incr = @$x;
210              foreach $_ (@$x)              foreach $_ (@$x)
211              {              {
212                  if (((100 * $_->[0]) + $incr) > $best_score)                  my($sc,$peg,$in_sub) = @$_;
213                    $sc += $in_sub ? 10000 : 0;
214    
215                    if (((100 * $sc) + $incr) > $best_score)
216                  {                  {
217                      $best_score = (100 * $_->[0]) + $incr;                      $best_score = (100 * $sc) + $incr;
218                      $best_func  = $func;                      $best_func  = $func;
219                      $best_source = $_->[1];                      $best_source = $peg;
220                  }                  }
221              }              }
222          }          }
# Line 348  Line 253 
253          my $subv = 0;          my $subv = 0;
254          my @subs = $fig->peg_to_subsystems($peg);          my @subs = $fig->peg_to_subsystems($peg);
255          my $sub;          my $sub;
256            my $in_sub = 0;
257          foreach $sub (@subs)          foreach $sub (@subs)
258          {          {
259              if (($_ = $parms->{'subsystems'}->{$sub}) && ($_ > $subv))              if ($_ = $parms->{'subsystems'}->{$sub})
260                {
261                    if ($_ > $subv)
262              {              {
263                  $subv = $_;                  $subv = $_;
264              }              }
265                    $in_sub = 1;
266                }
267          }          }
268          $value += $subv;          $value += $subv;
269            push(@{$functions->{$func}},[$value,$peg,$in_sub]);
         push(@{$functions->{$func}},[$value,$peg]);  
270      }      }
271  }  }
272    
# Line 378  Line 287 
287          }          }
288          foreach $tuple ($fig->mapped_prot_ids($peg))          foreach $tuple ($fig->mapped_prot_ids($peg))
289          {          {
290              if ($tuple->[0] =~ /^fig\|/)              if (($tuple->[0] =~ /^fig\|/) && $fig->is_real_feature($tuple->[0]))
291              {              {
292                  $pegs{$tuple->[0]} = 1;                  $pegs{$tuple->[0]} = 1;
293              }              }
# Line 405  Line 314 
314      {      {
315          @parmsS = &default_parms;          @parmsS = &default_parms;
316      }      }
   
317      while ($_ = shift @parmsS)      while ($_ = shift @parmsS)
318      {      {
319          chomp;          chomp;

Legend:
Removed from v.1.1  
changed lines
  Added in v.1.11

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3