[Bio] / FigKernelScripts / salvage_subsystem_rows.pl Repository:
ViewVC logotype

Annotation of /FigKernelScripts/salvage_subsystem_rows.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.6 - (view) (download) (as text)

1 : overbeek 1.4 ########################################################################
2 : olson 1.3 #
3 :     # Copyright (c) 2003-2006 University of Chicago and Fellowship
4 :     # for Interpretations of Genomes. All Rights Reserved.
5 :     #
6 :     # This file is part of the SEED Toolkit.
7 :     #
8 :     # The SEED Toolkit is free software. You can redistribute
9 :     # it and/or modify it under the terms of the SEED Toolkit
10 :     # Public License.
11 :     #
12 :     # You should have received a copy of the SEED Toolkit Public License
13 :     # along with this program; if not write to the University of Chicago
14 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
15 :     # Genomes at veronika@thefig.info or download a copy from
16 :     # http://www.theseed.org/LICENSE.TXT.
17 :     #
18 :    
19 : overbeek 1.1 use FIG;
20 :     my $fig = new FIG;
21 :    
22 : overbeek 1.4 my $usage = "usage: salvage_subsystem_rows From To Maps [ToData]";
23 : overbeek 1.1
24 :     # Foreach Subsystem in To (a directory of subsystems), if Subsystem is in From,
25 :     # then each row representing an organism in Mapping will be "salvaged" and added
26 : overbeek 1.4 # to Subsystem in To. If ToData is included, then any subsystems in From, but not in To
27 :     # will be copied to To (deleting rows for genomes not in ToData)
28 : overbeek 1.1 #
29 :    
30 :     (
31 :     ($from = shift @ARGV) &&
32 :     ($to = shift @ARGV) &&
33 :     ($mapping = shift @ARGV)
34 :     )
35 :     || die $usage;
36 :    
37 : overbeek 1.4 my $tmpF = "$FIG_Config::temp/extract_subsystems.$$";
38 :     if (@ARGV == 1)
39 :     {
40 :     open(TMP,">$tmpF")
41 :     || die "could not open $tmpF";
42 :     while (defined($_ = <DATA>))
43 :     {
44 :     print TMP $_;
45 :     }
46 :     close(TMP);
47 :     &copy_new($from,$to,$ARGV[0],$tmpF);
48 :     }
49 :    
50 : overbeek 1.2 foreach $_ (`cat $mapping/*`)
51 : overbeek 1.1 {
52 :     if ($_ =~ /^(fig\|(\d+\.\d+)\.peg\.\d+)\t(fig\|(\d+\.\d+)\.peg\.(\d+))$/)
53 :     {
54 :     $old{$2} = $4;
55 :     $map{$1} = $5;
56 :     }
57 :     }
58 :    
59 :     opendir(SUBS,$to) || die "could not open $to";
60 :     @subs = grep { $_ !~ /^\./ } readdir(SUBS);
61 :     closedir(SUBS);
62 :    
63 :     foreach $sub (@subs)
64 :     {
65 : overbeek 1.6 next if (! -s "$from/$sub/spreadsheet");
66 : overbeek 1.4 print STDERR "updating $sub\n";
67 : overbeek 1.1 undef %salvaged;
68 :     open(IN,"<$from/$sub/spreadsheet") || die "$from/$sub/spreadsheet";
69 :     while (defined($_ = <IN>))
70 :     {
71 :     if (($_ =~ /^(\d+\.\d+)\t(\S+)\t(.*)$/) && ($new = $old{$1}))
72 :     {
73 : overbeek 1.6 my $genomeOld = $1;
74 : overbeek 1.1 $var = $2;
75 :     @old_pegs = split(/\t/,$3);
76 :     @new_pegs = ();
77 : overbeek 1.6 foreach my $set (@old_pegs)
78 : overbeek 1.1 {
79 : overbeek 1.6 my @new_set = ();
80 :     foreach $peg (map { "fig|$genomeOld.peg." . $_ } split(/,/,$set))
81 : overbeek 1.1 {
82 : overbeek 1.6 if ($peg && ($pegN = $map{$peg}))
83 :     {
84 :     push(@new_set,$pegN);
85 :     }
86 :     }
87 :    
88 :     if (@new_set > 0)
89 :     {
90 :     push(@new_pegs,join(",",@new_set));
91 : overbeek 1.1 }
92 :     else
93 :     {
94 :     push(@new_pegs,"");
95 :     }
96 :     }
97 : overbeek 1.6 $salvaged{$new} = join("\t",($new,$var,@new_pegs));
98 : overbeek 1.1 }
99 :     }
100 :     close(IN);
101 :    
102 :     open(IN,"<$to/$sub/spreadsheet") || die "$to/$sub/spreadsheet";
103 :     $/ = "\n//\n";
104 :     @spreadsheet = <IN>;
105 :     close(IN);
106 :    
107 :     open(OUT,">$to/$sub/spreadsheet") || die "$to/$sub/spreadsheet";
108 :     for ($i=0; ($i < @spreadsheet); $i++)
109 :     {
110 :     if ($i != 2)
111 :     {
112 :     print OUT $spreadsheet[$i];
113 :     }
114 :     else
115 :     {
116 : overbeek 1.4 $_ = $spreadsheet[$i];
117 : overbeek 1.1 chomp;
118 :     @lines = split(/\n/,$_);
119 :     %in_already = map { $_ =~ /^(\d+\.\d+)/; $1 => 1 } @lines;
120 :     foreach $new (keys(%salvaged))
121 :     {
122 :     if (($new =~ /^(\d+\.\d+)/) && (! $in_already{$1}))
123 :     {
124 :     push(@lines,$salvaged{$new});
125 :     }
126 :     }
127 : overbeek 1.6 print OUT join("\n",@lines),"\n";
128 : overbeek 1.1 }
129 :     }
130 :     close(OUT);
131 :     $/ = "\n";
132 :     }
133 : overbeek 1.4 if (@ARGV == 1) { unlink($tmpF); }
134 :    
135 :     sub copy_new {
136 :     my($from,$to,$to_data,$tmpF) = @_;
137 :    
138 :     opendir(ORG,"$to_data/Organisms") || die "$to/Organisms does not exist";
139 :     my @orgs = grep { $_ =~ /^\d+\.\d+$/ } readdir(ORG);
140 :     closedir(ORG);
141 :    
142 :     opendir(FROM,$from) || die "could not open $from";
143 :     my @subsystems = grep { $_ !~ /^\./ } readdir(FROM);
144 :     closedir(FROM);
145 :    
146 :     foreach my $subsystem (@subsystems)
147 :     {
148 :     if (! -d "$to/$subsystem")
149 :     {
150 :     print STDERR "copying $subsystem\n";
151 :     &filter_genomes("$from/$subsystem","$to/$subsystem",\@orgs,$subsystem,$tmpF);
152 :     }
153 :     }
154 :     }
155 :    
156 :     sub filter_genomes {
157 :     my($from,$to,$orgs,$subsystem,$tmpF) = @_;
158 :    
159 :     my $genomesF = "$FIG_Config::temp/genomes.$$";
160 :     open(TMP,">$genomesF") || die "could not open $genomesF";
161 : overbeek 1.5 foreach $org (@$orgs)
162 : overbeek 1.4 {
163 :     print TMP "$org\n";
164 :     }
165 :     close(TMP);
166 :    
167 :     my $rc = system("extract_genomes",$genomesF,$from,$to,$tmpF);
168 :     if ($rc != 0)
169 :     {
170 :     print STDERR "failed to copy $from $to\n";
171 :     }
172 :     unlink($genomesF);
173 :     }
174 : overbeek 1.1
175 : overbeek 1.4 __DATA__
176 :     /^\S.*\S$/
177 :     Alignments,erase
178 :     Backup,erase
179 :     /^[CEV].*$/,copy
180 :     curation.log,copy
181 :     notes,copy
182 :     reactions,copy
183 :     spreadsheet,filter_rows
184 :     SubsystemDiagrams,copyR
185 :     diagrams,copyR
186 :     /^assignments*/,erase
187 :     /^rowss*/,erase
188 :     constructs,copyR
189 :     rows,erase
190 :     /\.log$/,copyR
191 :     MAP_SUPPORT,copyR
192 :     /^.*~$/,erase

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3