[Bio] / FigKernelScripts / assign_to_close_strains.pl Repository:
ViewVC logotype

Diff of /FigKernelScripts/assign_to_close_strains.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.5, Fri Jun 3 13:26:36 2005 UTC revision 1.6, Fri Jun 3 14:32:27 2005 UTC
# Line 1  Line 1 
1    
   
2  use strict;  use strict;
3    
4  use SameFunc;  use Assignments;
5    
6  use FIG;  use FIG;
7  my $fig = new FIG;  my $fig = new FIG;
# Line 19  Line 18 
18    
19  if (@ARGV > 0)  if (@ARGV > 0)
20  {  {
21      $parms = &load_parms($ARGV[0]);      $parms = &Assignments::load_parms($ARGV[0]);
22  }  }
23  else  else
24  {  {
25      $parms = &load_parms();      $parms = &Assignments::load_parms();
26  }  }
27  # &print_parms($parms);  # &print_parms($parms);
28    
# Line 33  Line 32 
32      chomp;      chomp;
33      @pegs_in_set = split(/\t/,$_);      @pegs_in_set = split(/\t/,$_);
34    
35      ($pegs,$external_ids) = &equivalent_ids($fig,$parms,\@pegs_in_set);      ($pegs,$external_ids) = &Assignments::equivalent_ids($fig,$parms,\@pegs_in_set);
36  #    print STDERR &Dumper(["equiv.ids",$pegs,$external_ids]);  #    print STDERR &Dumper(["equiv.ids",$pegs,$external_ids]);
37    
38      if ($best_function = &choose_best_assignment($fig,$pegs,$external_ids))      if ($best_function = &Assignments::choose_best_assignment($fig,$parms,$pegs,$external_ids))
39      {      {
40  #       print STDERR "best_function: $best_function\n";  #       print STDERR "best_function: $best_function\n";
41          foreach $peg (@$pegs)          foreach $peg (@$pegs)
# Line 50  Line 49 
49      }      }
50  }  }
51    
 sub choose_best_assignment {  
     my($fig,$pegs,$external_ids) = @_;  
     my($peg,$id);  
   
     my $functions = {};  
     foreach $peg (@$pegs)  
     {  
         &load_peg_function($fig,$parms,$peg,$functions);  
     }  
   
     foreach $id (@$external_ids)  
     {  
         &load_ext_function($fig,$parms,$id,$functions);  
     }  
   
     return &cleanup(&pick_function($fig,$parms,$functions));  
 }  
   
 ###### What follows is the core of the automated assignments #####  
   
 sub cleanup {  
     my($func) = @_;  
   
     if ($func =~ /^hypothetical (\S+ )?protein .*$/i)      { return "hypothetical protein" }  
     if ($func =~ /^[a-zA-Z]{1,2}\d{2,5}( protein)?$/i)     { return "hypothetical protein" }  
   
     return $func;  
 }  
   
 sub pick_function {  
     my($fig,$parms,$functions) = @_;  
     my($set,$score,$best_source,$poss_function);  
     my(@scored);  
   
     my @partitions = &SameFunc::group_funcs(keys(%$functions));  
     if ($ENV{'VERBOSE'}) {  print STDERR "partition: ",&Dumper(\@partitions,$functions); }  
   
     foreach $set (@partitions)  
     {  
         $score = &score_set($set,$functions);  
 #       print STDERR &Dumper([$score,$set]);  
   
 #       print STDERR "picking from set ",&Dumper($set);  
         ($poss_function,$best_source) = &pick_specific($fig,$parms,$set,$functions);  
 #       print STDERR "picked $best_function from $best_source\n";  
         push(@scored,[$score,$poss_function,$best_source]);  
     }  
     @scored = sort { $b->[0] <=> $a->[0] } @scored;  
   
     if ((@scored > 1) && $ENV{'VERBOSE'})  
     {  
         foreach $_ (@scored)  
         {  
             print STDERR join("\t",@$_),"\n";  
         }  
         print STDERR "//\n";  
     }  
     return (@scored > 0) ? $scored[0]->[1] : "";  
 }  
   
 sub score_set {  
     my($set,$functions) = @_;  
     my($func,$x);  
   
     my $score = 0;  
     foreach $func (@$set)  
     {  
         if ($x = $functions->{$func})  
         {  
             foreach $_ (@$x)  
             {  
                 $score += $_->[0];  
             }  
         }  
     }  
     return $score;  
 }  
   
 sub pick_specific {  
     my($fig,$parms,$set,$functions) = @_;  
     my($best_func,$best_score,$func,$x,$best_source);  
   
     $best_func  = "";  
     $best_score = "";  
     $best_source = "";  
   
     foreach $func (@$set)  
     {  
         if ($x = $functions->{$func})  
         {  
             foreach $_ (@$x)  
             {  
                 if ($_->[0] > $best_score)  
                 {  
                     $best_score = $_->[0];  
                     $best_func  = $func;  
                     $best_source = $_->[1];  
                 }  
             }  
         }  
     }  
     if ($ENV{'VERBOSE'}) { print STDERR &Dumper($set,$functions,$best_func,$best_source) }  
     return ($best_func,$best_source);  
 }  
   
 sub load_ext_function {  
     my($fig,$parms,$id,$functions) = @_;  
   
     my $func = $fig->function_of($id);  
     if ($func && # (! &FIG::hypo($func)) &&  
         ($id =~ /^([A-Za-z]{2,4})\|/) && ($_ = $parms->{'external'}->{$1}))  
     {  
         push(@{$functions->{$func}},[$_,$id]);  
     }  
 }  
   
 sub load_peg_function {  
     my($fig,$parms,$peg,$functions) = @_;  
   
     my $func = $fig->function_of($peg);  
     if ($func) # (! &FIG::hypo($func))  
     {  
         my $value = 1;  
   
         my $genome = &FIG::genome_of($peg);  
         if ($_ = $parms->{'genome'}->{$genome})  
         {  
             $value += $_;  
         }  
   
         my $subv = 0;  
         my @subs = $fig->peg_to_subsystems($peg);  
         my $sub;  
         foreach $sub (@subs)  
         {  
             if (($_ = $parms->{'subsystems'}->{$sub}) && ($_ > $subv))  
             {  
                 $subv = $_;  
             }  
         }  
         $value += $subv;  
   
         push(@{$functions->{$func}},[$value,$peg]);  
     }  
 }  
   
 sub equivalent_ids {  
     my($fig,$parms,$pegs) = @_;  
     my($peg,@aliases,$alias,%external_ids,%pegs,$tuple);  
   
     foreach $peg (@$pegs)  
     {  
         $pegs{$peg} = 1;  
         @aliases = $fig->feature_aliases($peg);  
         foreach $alias (@aliases)  
         {  
             if (($alias =~ /^([A-Za-z]{2,4})\|\S+$/) && $parms->{"external"}->{$1})  
             {  
                 $external_ids{$alias} = 1;  
             }  
         }  
         foreach $tuple ($fig->mapped_prot_ids($peg))  
         {  
             if ($tuple->[0] =~ /^fig\|/)  
             {  
                 $pegs{$tuple->[0]} = 1;  
             }  
             elsif (($tuple->[0] =~ /^([A-Za-z]{2,4})\|\S+$/) && $parms->{"external"}->{$1})  
             {  
                 $external_ids{$tuple->[0]} = 1;  
             }  
         }  
     }  
     return ([sort { &FIG::by_fig_id($a,$b) }  keys(%pegs)],[sort keys(%external_ids)]);  
 }  
   
 sub load_parms {  
     my($parmsF) = @_;  
   
     my $wts = {};  
   
     if ($parmsF)  
     {  
         open(PARMS,"<$parmsF") || die "could not open $parmsF";  
     }  
   
     while (defined($_ = $parmsF ? <PARMS> : <DATA>))  
     {  
         chop;  
         my($type,$data,$val) = split(/\t/,$_);  
         if ($type eq 'subsystems')  
         {  
             my $x;  
             while (defined($x = $parmsF ? <PARMS> : <DATA>) && ($x !~ /^\/\//))  
             {  
                 if ($x =~ /^(\S[^\t]+\S)/)  
                 {  
                     $wts->{$type}->{$1} = $val;  
                 }  
             }  
         }  
         else  
         {  
             $wts->{$type}->{$data} = $val;  
         }  
     }  
     return $wts;  
 }  
   
 sub print_parms {  
     my($parms) = @_;  
     my($type,$data,$val,$wt_by_type);  
   
     print STDERR "Parameters:\n";  
     foreach $type (sort keys(%$parms))  
     {  
         print STDERR "\n\t$type\n";  
         $wt_by_type = $parms->{$type};  
         foreach $data (sort keys(%$wt_by_type))  
         {  
             $val = $wt_by_type->{$data};  
             print STDERR "\t\t$data\t$val\n";  
         }  
     }  
     print STDERR "\n";  
 }  
 __END__  
 genome  198214.1        4       Shigella flexneri 2a str. 301  
 genome  198215.1        4       Shigella flexneri 2a str. 2457T  
 genome  216598.1        4       Shigella dysenteriae M131649  
 genome  216599.1        4       Shigella sonnei 53G  
 genome  630.2   4       Yersinia enterocolitica 8081  
 genome  633.2   4       Yersinia pseudotuberculosis (Livermore)  
 genome  187410.1        4       Yersinia pestis KIM  
 genome  214092.1        4       Yersinia pestis CO92  
 genome  229193.1        4       Yersinia pestis biovar Medievalis str. 91001  
 genome  273123.1        4       Yersinia pseudotuberculosis IP 32953  
 genome  594.1   4       Salmonella enterica subsp. enterica serovar Gallinarum  
 genome  99287.1 4       Salmonella typhimurium LT2  
 genome  119912.1        4       Salmonella enterica serovar Choleraesuis SC-B67  
 genome  209261.1        4       Salmonella enterica subsp. enterica serovar Typhi Ty2  
 genome  220341.1        4       Salmonella enterica subsp. enterica serovar Typhi str. CT18  
 genome  83333.1 4       Escherichia coli K12  
 genome  83334.1 4       Escherichia coli O157:H7  
 genome  155864.1        4       Escherichia coli O157:H7 EDL933  
 genome  199310.1        4       Escherichia coli CFT073  
 genome  216592.1        4       Escherichia coli 042  
 genome  216593.1        4       Escherichia coli E2348/69  
 genome  192222.1        4       Campylobacter jejuni subsp. jejuni NCTC 11168  
 genome  224308.1        2       Bacillus subtilis subsp. subtilis str. 168  
 external        sp      4  
 external        uni     2  
 external        kegg    1  
 subsystems      trusted 8  
 ABC_transporter_L-proline_glycine_betaine_(TC_3.A.1.12.1)       MattC  
 ABC_transporter_alkylphosphonate_(TC_3.A.1.9.1) MattC  
 ABC_transporter_arabinose_(TC_3.A.1.2.2)        MattC  
 ABC_transporter_branched-chain_amino_acid_(TC_3.A.1.4.1)        MattC  
 ABC_transporter_dipeptide_(TC_3.A.1.5.2)        MattC  
 ABC_transporter_ferric_enterobactin_(TC_3.A.1.14.2)     MattC  
 ABC_transporter_ferrichrome_(TC_3.A.1.14.3)     MattC  
 ABC_transporter_galactose_(TC_3.A.1.2.3)        MattC  
 ABC_transporter_glutamate_aspartate_(TC_3.A.1.3.4)      MattC  
 ABC_transporter_glutamine_(TC_3.A.1.3.2)        MattC  
 ABC_transporter_glycerol_(TC_3.A.1.1.3) MattC  
 ABC_transporter_heme_(TC3.A.1.107.1)    MattC  
 ABC_transporter_histidine_lysine_arginine_ornithine_(TC_3.A.1.3.1)      MattC  
 ABC_transporter_iron(III)_dicitrate_(TC_3.A.1.14.1)     MattC  
 ABC_transporter_macrolide       MattC  
 ABC_transporter_maltose MattC  
 ABC_transporter_molybdenum_(TC_3.A.1.8.1)       MattC  
 ABC_transporter_nickel_(TC_3.A.1.5.3)   MattC  
 ABC_transporter_oligopeptide_(TC_3.A.1.5.1)     MattC  
 ABC_transporter_peptide_(TC_3.A.1.5.5)  MattC  
 ABC_transporter_phosphate_(TC_3.A.1.7.1)        MattC  
 ABC_transporter_polyamine_putrescine_spermidine_(TC_3.A.1.11.1) MattC  
 ABC_transporter_putrescine_(TC_3.A.1.11.2)      MattC  
 ABC_transporter_ribose_(TC_3.A.1.2.1)   MattC  
 Adhesion_to_eukaryotic_cell     MikeK  
 Alanine_Biosynthesis    Straw  
 Allantoin_degradation   MattC  
 Ammonia_assimilation    EdF  
 Anaerobic_respiratory_reductases        OlgaV  
 Arginine_Biosynthesis   RickS  
 Arginine_Putrescine_and_4-aminobutyrate_degradation     MattC  
 Asp-Glu-tRNA(Asn-Gln)_transamidation    gjo  
 Bacterial_Cell_Division RickS  
 Betaine_biosynthesis    MattC  
 Bilin_Biosynthesis      OlgaZ  
 Biotin_biosynthesis     rodionov  
 Branched-Chain_Amino_Acid_Biosynthesis  RossO  
 CMP-N-acetylneuraminate_Biosynthesis    OlgaZ  
 Calvin-Benson_cycle     SvetaG  
 Carotenoids     OlgaV  
 Chlorophyll_Biosynthesis        VeronikaV  
 Chorismate_Synthesis    VeronikaV  
 Coenzyme_A_Biosynthesis AndreiO  
 Cyanobacterial_CO2_uptake       OlgaV  
 Cyanobacterial_Circadian_Clock  OlgaZ  
 Cyanophycin_metabolism  MikeR  
 Cytochrome_B6-F_complex SvetaG  
 Cytolethal_distending_toxin_of_Campylobacter_jejuni     OlgaZ  
 D-arabinose_degradation MattC  
 D-galactarate_degradation       MattC  
 D-galacturonate_degradation     MattC  
 DNA-replication RickS  
 DNA_Repair_Base_Excision        MikeK  
 De_Novo_Purine_Biosynthesis     RossO  
 De_Novo_Pyrimidine_Synthesis    RossO  
 Denitrification rodionov  
 Embden-Meyerhof_and_Gluconeogenesis     SvetaG  
 F0F1-type_ATP_synthase  RickS  
 FMN_and_FAD_biosynthesis        AndreiO  
 Fatty_Acid_Biosynthesis_FASII   AndreiO  
 Fe-S_cluster_assembly   rodionov  
 Flagellum       RickS  
 Folate_Biosynthesis     vcrecy  
 Fucose_and_rhamnose_degradation MattC  
 Galactitol_degradation  MattC  
 Galactose_degradation   MattC  
 General_secretory_pathway_(Sec-SRP)_complex_(TC_3.A.5.1.1)      MattC  
 Glutamate,_aspartate_and_asparagine_biosynthisis        MattC  
 Glutamate_biosynthesis  MattC  
 Glutathione_Redox_Metabolism    Neema_UCSD  
 Glycerol_Metabolism     MattC  
 Glycerolipid_and_glycerphospholipid_metabolism  VasiliyP  
 Glycine_synthesis       MikeK  
 Glyoxylate_Synthesis    RickS  
 GroEL_GroES     MikeK  
 HMG_CoA_Synthesis       Veronika  
 Hexitol_degradation     MattC  
 Histidine_Biosynthesis  RossO  
 Histidine_Degradation   RossO  
 Inorganic_Sulfur_Assimilation   ChristianR  
 Inositol_catabolism     VeronikaV  
 Isoprenoid_Biosynthesis OlgaZ  
 Ketogluconate_metabolism        MattC  
 L-ascorbate_degradation MattC  
 Lactose_degradation     MattC  
 Leucine_Degradation_and_HMG-CoA_Metabolism      VeronikaV  
 Lysine_Biosynthesis_DAP_Pathway AndreiO  
 Mannose-sensitive_hemagglutinin_type_4_pilus    RobE  
 Mannose_and_fructose_metabolism HanYuC_UCSD  
 Menaquinone_and_Phylloquinone_Biosynthesis      OlgaZ  
 Methanogenesis  gjo  
 Methionine_Biosynthesis rodionov  
 Methylcitrate_cycle     MattC  
 N-Acetyl-D-Glucosamine_Utilization      OlgaZ  
 N-linked_Glycosylation_in_Bacteria      OlgaZ  
 NAD_and_NADP_cofactor_biosynthesis_global       AndreiO  
 Na(+)-translocating_NADH-quinone_oxidoreductase_and_rnf-like_group_of_electron_transport_complexes      OlgaV  
 Nitrate_and_nitrite_ammonification      rodionov  
 Nitrosative_stress      rodionov  
 P-type_ATPase_transporter_potassium_(TC_3.A.3.7.1)      MattC  
 Pentose_phosphate_pathway       SvetaG  
 Peptidoglycan_Biosynthesis      RickS  
 Phenylalanine_synthesis MikeK  
 Photosystem_I   SvetaG  
 Photosystem_II  SvetaG  
 Phycobilisome   OlgaZ  
 Plastoquinone_Biosynthesis      OlgaZ  
 Polyamine_Metabolism    InesT_UCSD  
 Porphyrin,_Heme,_and_Siroheme_Biosynthesis      SvetaG  
 Proline_Synthesis       RickS  
 Proteasome_archaeal     gjo  
 Proteasome_eukaryotic   gjo  
 Pterin_biosynthesis     vcrecy  
 Purine_conversions      OlgaV  
 Pyruvate_Alanine_Serine_Interconversions        JasonS_UCSD  
 Queuosine-Archaeosine_Biosynthesis      vcrecy  
 RNA_polymerase_I        gjo  
 RNA_polymerase_II       gjo  
 RNA_polymerase_III      gjo  
 RNA_polymerase_II_initiation_factors    gjo  
 RNA_polymerase_archaeal gjo  
 RNA_polymerase_archaeal_initiation_factors      gjo  
 RNA_polymerase_bacterial        gjo  
 RNA_polymerase_chloroplast      gjo  
 Resistance_to_fluoroquinolones  MattC  
 Respiratory_Complex_I   OlgaV  
 Respiratory_dehydrogenases_1    OlgaV  
 Ribonucleotide_reduction        rodionov  
 Ribose_and_deoxyribose_phosphate_metabolism     MattC  
 Ribosome_LSU_bacterial  gjo  
 Ribosome_LSU_eukaryotic_and_archaeal    gjo  
 Ribosome_SSU_bacterial  gjo  
 Ribosome_SSU_chloroplast        gjo  
 Ribosome_SSU_eukaryotic_and_archaeal    gjo  
 Ribosome_biogenesis_bacterial   gjo  
 Serine_Biosynthesis     MikeK  
 Siderophore_Aerobactin_and_Ferrichrome_Biosynthesis     MattC  
 Soluble_cytochromes_and_functionally_related_electron_carriers  OlgaV  
 Succinate_dehydrogenase OlgaV  
 Sulfate_assimilation    MattC  
 Sulfur_Metabolism       RobE  
 TCA_Cycle       OlgaV  
 Terminal_cytochrome_C_oxidases  OlgaV  
 Terminal_cytochrome_oxidases    OlgaV  
 Thiamin_biosynthesis    rodionov  
 Threonine_synthesis     MikeK  
 Tocopherol_Biosynthesis OlgaZ  
 Transcription_factors_bacterial gjo  
 Translation_elongation_factors_eukaryotic_and_archaeal  gjo  
 Translation_factors_bacterial   gjo  
 Translation_initiation_factors_eukaryotic_and_archaeal  gjo  
 Transport_of_Nickel_and_Cobalt  rodionov  
 Trehalose_biosynthesis  MattC  
 Tricarballylate_Utilization     RossO  
 Tryptophan_synthesis    VeronikaV  
 Tyrosine_synthesis      MikeK  
 UDP-N-acetylmuramate_from_Fructose-6-phosphate_Biosynthesis     VasiliyP  
 Ubiquinone_Biosynthesis OlgaZ  
 Ubiquinone_Menaquinone-cytochrome_c_reductase_complexes OlgaV  
 Urea_decomposition      rodionov  
 V-Type_ATP_synthase     RickS  
 carnitine_metabolism    MattC  
 cysteine_biosynthesis   RobE  
 dTDP-rhamnose_synthesis MikeK  
 fatty_acid_metabolism   MattC  
 fatty_acid_oxidation_pathway    MattC  
 glyoxylate_degradation  MattC  
 mannose_and_GDP-mannose_metabolism      MattC  
 polyisoprenoid_biosynthesis     MattC  
 ppGpp_biosynthesis      MikeK  
 tRNA_aminoacylation     gjo  
 tRNA_processing gjo  
 tRNA_splicing   gjo  
 //  

Legend:
Removed from v.1.5  
changed lines
  Added in v.1.6

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3