[Bio] / FigKernelScripts / salvage_subsystem_rows.pl Repository:
ViewVC logotype

Annotation of /FigKernelScripts/salvage_subsystem_rows.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.4 - (view) (download) (as text)

1 : overbeek 1.4 ########################################################################
2 : olson 1.3 #
3 :     # Copyright (c) 2003-2006 University of Chicago and Fellowship
4 :     # for Interpretations of Genomes. All Rights Reserved.
5 :     #
6 :     # This file is part of the SEED Toolkit.
7 :     #
8 :     # The SEED Toolkit is free software. You can redistribute
9 :     # it and/or modify it under the terms of the SEED Toolkit
10 :     # Public License.
11 :     #
12 :     # You should have received a copy of the SEED Toolkit Public License
13 :     # along with this program; if not write to the University of Chicago
14 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
15 :     # Genomes at veronika@thefig.info or download a copy from
16 :     # http://www.theseed.org/LICENSE.TXT.
17 :     #
18 :    
19 : overbeek 1.1 use FIG;
20 :     my $fig = new FIG;
21 :    
22 : overbeek 1.4 my $usage = "usage: salvage_subsystem_rows From To Maps [ToData]";
23 : overbeek 1.1
24 :     # Foreach Subsystem in To (a directory of subsystems), if Subsystem is in From,
25 :     # then each row representing an organism in Mapping will be "salvaged" and added
26 : overbeek 1.4 # to Subsystem in To. If ToData is included, then any subsystems in From, but not in To
27 :     # will be copied to To (deleting rows for genomes not in ToData)
28 : overbeek 1.1 #
29 :    
30 :     (
31 :     ($from = shift @ARGV) &&
32 :     ($to = shift @ARGV) &&
33 :     ($mapping = shift @ARGV)
34 :     )
35 :     || die $usage;
36 :    
37 : overbeek 1.4 my $tmpF = "$FIG_Config::temp/extract_subsystems.$$";
38 :     if (@ARGV == 1)
39 :     {
40 :     open(TMP,">$tmpF")
41 :     || die "could not open $tmpF";
42 :     while (defined($_ = <DATA>))
43 :     {
44 :     print TMP $_;
45 :     }
46 :     close(TMP);
47 :     &copy_new($from,$to,$ARGV[0],$tmpF);
48 :     }
49 :    
50 : overbeek 1.2 foreach $_ (`cat $mapping/*`)
51 : overbeek 1.1 {
52 :     if ($_ =~ /^(fig\|(\d+\.\d+)\.peg\.\d+)\t(fig\|(\d+\.\d+)\.peg\.(\d+))$/)
53 :     {
54 :     $old{$2} = $4;
55 :     $map{$1} = $5;
56 :     }
57 :     }
58 :    
59 :     opendir(SUBS,$to) || die "could not open $to";
60 :     @subs = grep { $_ !~ /^\./ } readdir(SUBS);
61 :     closedir(SUBS);
62 :    
63 :     foreach $sub (@subs)
64 :     {
65 :     next if (! -d "$from/$sub/spreadsheet");
66 : overbeek 1.4 print STDERR "updating $sub\n";
67 : overbeek 1.1 undef %salvaged;
68 :     open(IN,"<$from/$sub/spreadsheet") || die "$from/$sub/spreadsheet";
69 :     while (defined($_ = <IN>))
70 :     {
71 :     if (($_ =~ /^(\d+\.\d+)\t(\S+)\t(.*)$/) && ($new = $old{$1}))
72 :     {
73 :     $var = $2;
74 :     @old_pegs = split(/\t/,$3);
75 :     @new_pegs = ();
76 :     foreach $peg (@old_pegs)
77 :     {
78 :     if ($peg && ($pegN = $map{$peg}))
79 :     {
80 :     push(@new_pegs,"fig|$new\.peg\.$pegN");
81 :     }
82 :     else
83 :     {
84 :     push(@new_pegs,"");
85 :     }
86 :     }
87 :     salvaged{$new} = join("\t",($new,$var,@new_pegs));
88 :     }
89 :     }
90 :     close(IN);
91 :    
92 :     open(IN,"<$to/$sub/spreadsheet") || die "$to/$sub/spreadsheet";
93 :     $/ = "\n//\n";
94 :     @spreadsheet = <IN>;
95 :     close(IN);
96 :    
97 :     open(OUT,">$to/$sub/spreadsheet") || die "$to/$sub/spreadsheet";
98 :     for ($i=0; ($i < @spreadsheet); $i++)
99 :     {
100 :     if ($i != 2)
101 :     {
102 :     print OUT $spreadsheet[$i];
103 :     }
104 :     else
105 :     {
106 : overbeek 1.4 $_ = $spreadsheet[$i];
107 : overbeek 1.1 chomp;
108 :     @lines = split(/\n/,$_);
109 :     %in_already = map { $_ =~ /^(\d+\.\d+)/; $1 => 1 } @lines;
110 :     foreach $new (keys(%salvaged))
111 :     {
112 :     if (($new =~ /^(\d+\.\d+)/) && (! $in_already{$1}))
113 :     {
114 :     push(@lines,$salvaged{$new});
115 :     }
116 :     }
117 :     print OUT join("\n",@lines),$/;
118 :     }
119 :     }
120 :     close(OUT);
121 :     $/ = "\n";
122 :     }
123 : overbeek 1.4 if (@ARGV == 1) { unlink($tmpF); }
124 :    
125 :     sub copy_new {
126 :     my($from,$to,$to_data,$tmpF) = @_;
127 :    
128 :     opendir(ORG,"$to_data/Organisms") || die "$to/Organisms does not exist";
129 :     my @orgs = grep { $_ =~ /^\d+\.\d+$/ } readdir(ORG);
130 :     closedir(ORG);
131 :    
132 :     opendir(FROM,$from) || die "could not open $from";
133 :     my @subsystems = grep { $_ !~ /^\./ } readdir(FROM);
134 :     closedir(FROM);
135 :    
136 :     foreach my $subsystem (@subsystems)
137 :     {
138 :     if (! -d "$to/$subsystem")
139 :     {
140 :     print STDERR "copying $subsystem\n";
141 :     &filter_genomes("$from/$subsystem","$to/$subsystem",\@orgs,$subsystem,$tmpF);
142 :     }
143 :     }
144 :     }
145 :    
146 :     sub filter_genomes {
147 :     my($from,$to,$orgs,$subsystem,$tmpF) = @_;
148 :    
149 :     my $genomesF = "$FIG_Config::temp/genomes.$$";
150 :     open(TMP,">$genomesF") || die "could not open $genomesF";
151 :     foreach $org (@orgs)
152 :     {
153 :     print TMP "$org\n";
154 :     }
155 :     close(TMP);
156 :    
157 :     my $rc = system("extract_genomes",$genomesF,$from,$to,$tmpF);
158 :     if ($rc != 0)
159 :     {
160 :     print STDERR "failed to copy $from $to\n";
161 :     }
162 :     unlink($genomesF);
163 :     }
164 : overbeek 1.1
165 : overbeek 1.4 __DATA__
166 :     /^\S.*\S$/
167 :     Alignments,erase
168 :     Backup,erase
169 :     /^[CEV].*$/,copy
170 :     curation.log,copy
171 :     notes,copy
172 :     reactions,copy
173 :     spreadsheet,filter_rows
174 :     SubsystemDiagrams,copyR
175 :     diagrams,copyR
176 :     /^assignments*/,erase
177 :     /^rowss*/,erase
178 :     constructs,copyR
179 :     rows,erase
180 :     /\.log$/,copyR
181 :     MAP_SUPPORT,copyR
182 :     /^.*~$/,erase

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3