Parent Directory
|
Revision Log
Revision 1.6 - (view) (download) (as text)
1 : | chenry | 1.1 | use strict; |
2 : | use FileHandle; | ||
3 : | |||
4 : | #--- This function just prints the given array reference to file with a different array element on each line ---# | ||
5 : | sub PrintArrayToFile { | ||
6 : | my ($Filename,$ArrayRef) = @_; | ||
7 : | |||
8 : | if (open (OUTPUT, ">$Filename")) { | ||
9 : | foreach my $Item (@{$ArrayRef}) { | ||
10 : | if (length($Item) > 0) { | ||
11 : | print OUTPUT $Item."\n"; | ||
12 : | } | ||
13 : | } | ||
14 : | close(OUTPUT); | ||
15 : | } else { | ||
16 : | die "Cannot open $Filename: $!"; | ||
17 : | } | ||
18 : | } | ||
19 : | |||
20 : | sub PrintTwoDimensionalArrayToFile { | ||
21 : | my ($Filename,$ArrayRef,$Delimiter) = @_; | ||
22 : | |||
23 : | if (open (OUTPUT, ">$Filename")) { | ||
24 : | foreach my $Item (@{$ArrayRef}) { | ||
25 : | if (@{$Item} > 0) { | ||
26 : | print OUTPUT join($Delimiter,@{$Item})."\n"; | ||
27 : | } | ||
28 : | } | ||
29 : | close(OUTPUT); | ||
30 : | } else { | ||
31 : | die "Cannot open $Filename: $!"; | ||
32 : | } | ||
33 : | } | ||
34 : | |||
35 : | |||
36 : | #--- This function removes the specified line from the file with the input filename if the line exists in the file ---# | ||
37 : | sub RemoveSpecificLineFromFile { | ||
38 : | my ($Filename,$DelLine,$Delimiter) = @_; | ||
39 : | |||
40 : | #Note that I donot specify the delimiter to the file upload function because I want to preserve the entire content of the file | ||
41 : | my $FileArray = &LoadSingleColumnFile($Filename,""); | ||
42 : | my $Count = 0; | ||
43 : | foreach my $Item (@{$FileArray}) { | ||
44 : | my @Data = split(/$Delimiter/,$Item); | ||
45 : | if ($Data[0] eq $DelLine) { | ||
46 : | delete $FileArray->[$Count]; | ||
47 : | } | ||
48 : | $Count++; | ||
49 : | } | ||
50 : | &PrintArrayToFile($Filename,$FileArray); | ||
51 : | } | ||
52 : | |||
53 : | #--- This function adds the input line to the file with the input filename if the line does not already exist in the file ---# | ||
54 : | sub AddLineToFileUnique { | ||
55 : | my ($Filename,$NewLine,$Delimiter) = @_; | ||
56 : | my $FileArray = &LoadSingleColumnFile($Filename,$Delimiter); | ||
57 : | chenry | 1.3 | my $LastLineLength = 0; |
58 : | chenry | 1.1 | foreach my $Item (@{$FileArray}) { |
59 : | chenry | 1.3 | $LastLineLength = length($Item); |
60 : | chenry | 1.1 | if ($Item eq $NewLine) { |
61 : | return; | ||
62 : | } | ||
63 : | } | ||
64 : | if (open (OUTPUT, ">>$Filename")) { | ||
65 : | chenry | 1.3 | if ($LastLineLength > 0) { |
66 : | print OUTPUT "\n"; | ||
67 : | } | ||
68 : | chenry | 1.1 | print OUTPUT $NewLine."\n"; |
69 : | close(OUTPUT); | ||
70 : | } else { | ||
71 : | die "Cannot open $Filename: $!"; | ||
72 : | } | ||
73 : | } | ||
74 : | |||
75 : | #--- This function saves the input hash back to a file where each data item is stored on a separate line with the file headings stored in the first column of data ---# | ||
76 : | sub SaveHashToHorizontalDataFile { | ||
77 : | my ($Filename,$Delimiter,$DataHashRef) = @_; | ||
78 : | |||
79 : | if (!defined($DataHashRef->{"orderedkeys"})) { | ||
80 : | my @Keys = keys(%{$DataHashRef}); | ||
81 : | push(@{$DataHashRef->{"orderedkeys"}},@Keys); | ||
82 : | } | ||
83 : | |||
84 : | if ($Filename eq "") { | ||
85 : | open (HASHTOHORIZONTALOUTPUT, ">&STDOUT"); | ||
86 : | } else { | ||
87 : | open (HASHTOHORIZONTALOUTPUT, ">$Filename"); | ||
88 : | } | ||
89 : | |||
90 : | if (open (HASHTOHORIZONTALOUTPUT, ">$Filename")) { | ||
91 : | my @ReactionKeys = @{$DataHashRef->{"orderedkeys"}}; | ||
92 : | foreach my $Item (@ReactionKeys) { | ||
93 : | if (defined($DataHashRef->{"keytranslation"}) && defined($DataHashRef->{"keytranslation"}->{$Item})) { | ||
94 : | $Item = $DataHashRef->{"keytranslation"}->{$Item}; | ||
95 : | } | ||
96 : | if (defined($DataHashRef->{$Item}) && @{$DataHashRef->{$Item}} > 0) { | ||
97 : | print HASHTOHORIZONTALOUTPUT $Item.$Delimiter.join($Delimiter,@{$DataHashRef->{"$Item"}})."\n"; | ||
98 : | } | ||
99 : | } | ||
100 : | close(HASHTOHORIZONTALOUTPUT); | ||
101 : | } else { | ||
102 : | die "Cannot open $Filename: $!"; | ||
103 : | } | ||
104 : | |||
105 : | return $DataHashRef; | ||
106 : | } | ||
107 : | |||
108 : | #--- This function loads a file where each data item is stored on a separate line with the file headings stored in the first column of data ---# | ||
109 : | sub LoadHorizontalDataFile { | ||
110 : | my ($Filename,$Delimiter,$HeadingTranslation) = @_; | ||
111 : | |||
112 : | my $DataHashRef = {}; | ||
113 : | |||
114 : | if (open (INPUT, "<$Filename")) { | ||
115 : | while (my $Line = <INPUT>) { | ||
116 : | chomp($Line); | ||
117 : | my @Data = split(/$Delimiter/,$Line); | ||
118 : | if (defined($HeadingTranslation) && defined($HeadingTranslation->{$Data[0]})) { | ||
119 : | $DataHashRef->{"keytranslation"}->{$HeadingTranslation->{$Data[0]}} = $Data[0]; | ||
120 : | $Data[0] = $HeadingTranslation->{$Data[0]}; | ||
121 : | } | ||
122 : | for (my $i=1; $i < @Data; $i++) { | ||
123 : | $DataHashRef->{$Data[0]}->[$i-1] = $Data[$i]; | ||
124 : | } | ||
125 : | if (@Data > 1) { | ||
126 : | push(@{$DataHashRef->{"orderedkeys"}},$Data[0]); | ||
127 : | } | ||
128 : | } | ||
129 : | close(INPUT); | ||
130 : | } else { | ||
131 : | die "Cannot open $Filename: $!"; | ||
132 : | } | ||
133 : | |||
134 : | return $DataHashRef; | ||
135 : | } | ||
136 : | |||
137 : | |||
138 : | #--- This function loads a file containing a simple list and returns a reference to an array containing that list ---# | ||
139 : | #--- Note that when a delimiter is supplied, each line in the file is broken up with the delimiter, and only the first element from each line is stored in the returned list ---# | ||
140 : | sub LoadSingleColumnFile { | ||
141 : | my ($Filename,$Delimiter) = @_; | ||
142 : | |||
143 : | my $DataArrayRef = []; | ||
144 : | if (open (INPUT, "<$Filename")) { | ||
145 : | while (my $Line = <INPUT>) { | ||
146 : | chomp($Line); | ||
147 : | |||
148 : | if (length($Delimiter) > 0) { | ||
149 : | my @Data = split(/$Delimiter/,$Line); | ||
150 : | $Line = $Data[0]; | ||
151 : | } | ||
152 : | |||
153 : | push(@{$DataArrayRef},$Line); | ||
154 : | } | ||
155 : | close(INPUT); | ||
156 : | } else { | ||
157 : | die "Cannot open $Filename: $!"; | ||
158 : | } | ||
159 : | return $DataArrayRef; | ||
160 : | } | ||
161 : | |||
162 : | #--- This function loads a file containing multiple columns of data with no file headings ---# | ||
163 : | sub LoadMultipleColumnFile { | ||
164 : | my ($Filename,$Delimiter) = @_; | ||
165 : | |||
166 : | my $DataArrayRefArrayRef = []; | ||
167 : | if (open (INPUT, "<$Filename")) { | ||
168 : | while (my $Line = <INPUT>) { | ||
169 : | chomp($Line); | ||
170 : | my $Data = []; | ||
171 : | $Data->[0] = $Line; | ||
172 : | if (length($Delimiter) > 0) { | ||
173 : | @{$Data} = split(/$Delimiter/,$Line); | ||
174 : | } | ||
175 : | push(@{$DataArrayRefArrayRef},$Data); | ||
176 : | } | ||
177 : | close(INPUT); | ||
178 : | } else { | ||
179 : | die "Cannot open $Filename: $!"; | ||
180 : | } | ||
181 : | return $DataArrayRefArrayRef; | ||
182 : | } | ||
183 : | |||
184 : | #--- This function loads a file containing multiple columns of data with file headings at the top ---# | ||
185 : | sub LoadMultipleLabeledColumnFile { | ||
186 : | my ($Filename,$ColumnDelimiter,$ItemDelimiter,$HeadingRowNumber) = @_; | ||
187 : | if (!defined($HeadingRowNumber) || $HeadingRowNumber eq "") { | ||
188 : | $HeadingRowNumber = 0; | ||
189 : | } | ||
190 : | my $DataHashRefArrayRef = []; | ||
191 : | if (open (INPUT, "<$Filename")) { | ||
192 : | my $Line = <INPUT>; | ||
193 : | for (my $i=0; $i < $HeadingRowNumber; $i++) { | ||
194 : | $Line = <INPUT>; | ||
195 : | } | ||
196 : | chomp($Line); | ||
197 : | if (length($ColumnDelimiter) > 0) { | ||
198 : | my @Headings = split(/$ColumnDelimiter/,$Line); | ||
199 : | chenry | 1.2 | my $First = 1; |
200 : | chenry | 1.1 | while ($Line = <INPUT>) { |
201 : | chomp($Line); | ||
202 : | my @Data = split(/$ColumnDelimiter/,$Line); | ||
203 : | my $ArrayRefHashRef; | ||
204 : | for (my $i=0; $i < @Headings; $i++) { | ||
205 : | if (defined($Data[$i]) && length($Data[$i]) > 0) { | ||
206 : | chenry | 1.3 | if (defined($ItemDelimiter) && length($ItemDelimiter) > 0) { |
207 : | chenry | 1.1 | my @TempArray = split(/$ItemDelimiter/,$Data[$i]); |
208 : | foreach my $Item (@TempArray) { | ||
209 : | push(@{$ArrayRefHashRef->{$Headings[$i]}},$Item); | ||
210 : | } | ||
211 : | } else { | ||
212 : | $ArrayRefHashRef->{$Headings[$i]}->[0] = $Data[$i]; | ||
213 : | } | ||
214 : | } | ||
215 : | } | ||
216 : | chenry | 1.2 | if ($First == 1) { |
217 : | $First = 0; | ||
218 : | push(@{$ArrayRefHashRef->{"orderedkeys"}},@Headings); | ||
219 : | } | ||
220 : | push(@{$DataHashRefArrayRef},$ArrayRefHashRef); | ||
221 : | chenry | 1.1 | } |
222 : | } | ||
223 : | close(INPUT); | ||
224 : | } else { | ||
225 : | die "Cannot open $Filename: $!"; | ||
226 : | } | ||
227 : | return $DataHashRefArrayRef; | ||
228 : | } | ||
229 : | |||
230 : | sub PrintHashArrayToFile { | ||
231 : | chenry | 1.3 | my ($Filename,$HashArrayRef,$HeaderRef,$ExtraHeaders) = @_; |
232 : | chenry | 1.5 | |
233 : | chenry | 1.2 | if (!defined($HeaderRef) || $HeaderRef == 0 || $HeaderRef eq "") { |
234 : | if (!defined($HashArrayRef->[0]) || !defined($HashArrayRef->[0]->{"orderedkeys"})) { | ||
235 : | chenry | 1.5 | return; |
236 : | chenry | 1.2 | } else { |
237 : | $HeaderRef = $HashArrayRef->[0]->{"orderedkeys"}; | ||
238 : | } | ||
239 : | } | ||
240 : | chenry | 1.5 | |
241 : | chenry | 1.1 | if (open (HASHARRAYTOFILE, ">$Filename")) { |
242 : | chenry | 1.3 | if (defined($ExtraHeaders)) { |
243 : | print HASHARRAYTOFILE $ExtraHeaders; | ||
244 : | chenry | 1.1 | } |
245 : | chenry | 1.5 | print HASHARRAYTOFILE join(";",@{$HeaderRef})."\n"; |
246 : | chenry | 1.1 | for (my $i=0; $i < @{$HashArrayRef}; $i++) { |
247 : | for (my $j=0; $j < @{$HeaderRef}; $j++) { | ||
248 : | if ($j > 0) { | ||
249 : | chenry | 1.5 | print HASHARRAYTOFILE ";"; |
250 : | chenry | 1.1 | } |
251 : | if (defined($HashArrayRef->[$i]->{$HeaderRef->[$j]})) { | ||
252 : | print HASHARRAYTOFILE join("|",@{$HashArrayRef->[$i]->{$HeaderRef->[$j]}}); | ||
253 : | } | ||
254 : | } | ||
255 : | chenry | 1.5 | print HASHARRAYTOFILE "\n"; |
256 : | chenry | 1.1 | } |
257 : | close (HASHARRAYTOFILE); | ||
258 : | } | ||
259 : | } | ||
260 : | |||
261 : | chenry | 1.4 | sub LoadTable { |
262 : | chenry | 1.5 | my ($VariableHash,$Filename,$Delimiter,$ItemDelimiter,$HeadingLine,$HashColumns) = @_; |
263 : | |||
264 : | chenry | 1.4 | #Checking that the table file exists |
265 : | if (!-e $Filename) { | ||
266 : | chenry | 1.5 | return undef; |
267 : | chenry | 1.4 | } |
268 : | chenry | 1.5 | |
269 : | chenry | 1.4 | #Sanity checking input values |
270 : | if (!defined($HeadingLine) || $HeadingLine eq "") { | ||
271 : | $HeadingLine = 0; | ||
272 : | } | ||
273 : | if (!defined($Delimiter) || $Delimiter eq "") { | ||
274 : | $Delimiter = ";"; | ||
275 : | } | ||
276 : | chenry | 1.5 | if ($Delimiter eq "|") { |
277 : | $Delimiter = "\\|"; | ||
278 : | } | ||
279 : | chenry | 1.4 | if (!defined($ItemDelimiter) || $ItemDelimiter eq "") { |
280 : | $ItemDelimiter = ""; | ||
281 : | chenry | 1.5 | } elsif ($ItemDelimiter eq "|") { |
282 : | $ItemDelimiter = "\\|"; | ||
283 : | chenry | 1.4 | } |
284 : | chenry | 1.5 | |
285 : | chenry | 1.4 | #Loading the data table |
286 : | my $Table; | ||
287 : | my $Prefix; | ||
288 : | my @Headings; | ||
289 : | if (!open (TABLEINPUT, "<$Filename")) { | ||
290 : | return undef; | ||
291 : | } | ||
292 : | my $Line = <TABLEINPUT>; | ||
293 : | for (my $i=0; $i < $HeadingLine; $i++) { | ||
294 : | chenry | 1.5 | $Prefix .= $Line; |
295 : | chenry | 1.4 | $Line = <TABLEINPUT>; |
296 : | } | ||
297 : | chomp($Line); | ||
298 : | chenry | 1.5 | |
299 : | chenry | 1.4 | @Headings = split(/$Delimiter/,$Line); |
300 : | while ($Line = <TABLEINPUT>) { | ||
301 : | chomp($Line); | ||
302 : | my @Data = split(/$Delimiter/,$Line); | ||
303 : | my $ArrayRefHashRef; | ||
304 : | for (my $i=0; $i < @Headings; $i++) { | ||
305 : | if (defined($Data[$i]) && length($Data[$i]) > 0) { | ||
306 : | if (defined($ItemDelimiter) && length($ItemDelimiter) > 0) { | ||
307 : | my @TempArray = split(/$ItemDelimiter/,$Data[$i]); | ||
308 : | foreach my $Item (@TempArray) { | ||
309 : | push(@{$ArrayRefHashRef->{$Headings[$i]}},$Item); | ||
310 : | } | ||
311 : | } else { | ||
312 : | $ArrayRefHashRef->{$Headings[$i]}->[0] = $Data[$i]; | ||
313 : | } | ||
314 : | } | ||
315 : | } | ||
316 : | push(@{$Table->{"array"}},$ArrayRefHashRef); | ||
317 : | } | ||
318 : | close(TABLEINPUT); | ||
319 : | chenry | 1.5 | |
320 : | chenry | 1.4 | #Loading file IO parameters |
321 : | $Table->{"file IO settings"}->{"filename"}->[0] = $Filename; | ||
322 : | chenry | 1.5 | if ($Delimiter eq "\\|") { |
323 : | $Delimiter = "|"; | ||
324 : | } | ||
325 : | chenry | 1.4 | $Table->{"file IO settings"}->{"delimiter"}->[0] = $Delimiter; |
326 : | chenry | 1.5 | if ($ItemDelimiter eq "\\|") { |
327 : | $ItemDelimiter = "|"; | ||
328 : | } | ||
329 : | chenry | 1.4 | $Table->{"file IO settings"}->{"item delimiter"}->[0] = $ItemDelimiter; |
330 : | $Table->{"file IO settings"}->{"file prefix"}->[0] = $Prefix; | ||
331 : | push(@{$Table->{"file IO settings"}->{"orderedkeys"}},@Headings); | ||
332 : | #Replacing variables in variable columns with variable values and loading hash with hash column keys | ||
333 : | foreach my $ItemData (@{$Table->{"array"}}) { | ||
334 : | if (defined($HashColumns) && $HashColumns ne "") { | ||
335 : | foreach my $Heading (@{$HashColumns}) { | ||
336 : | if (defined($ItemData->{$Heading})) { | ||
337 : | for (my $i=0; $i < @{$ItemData->{$Heading}}; $i++) { | ||
338 : | push(@{$Table->{$ItemData->{$Heading}->[$i]}},$ItemData); | ||
339 : | chenry | 1.5 | push(@{$Table->{"hash columns"}->{$Heading}->{$ItemData->{$Heading}->[$i]}},$ItemData); |
340 : | chenry | 1.4 | } |
341 : | } | ||
342 : | } | ||
343 : | } | ||
344 : | } | ||
345 : | chenry | 1.5 | |
346 : | chenry | 1.4 | return $Table; |
347 : | } | ||
348 : | |||
349 : | sub SaveTable { | ||
350 : | my ($TableRef) = @_; | ||
351 : | chenry | 1.5 | |
352 : | chenry | 1.4 | #Checking that a filename exists |
353 : | if (!defined($TableRef->{"array"}) || !defined($TableRef->{"file IO settings"}->{"filename"}) || !defined($TableRef->{"file IO settings"}->{"orderedkeys"})) { | ||
354 : | return -1; | ||
355 : | } | ||
356 : | chenry | 1.5 | |
357 : | chenry | 1.4 | my $Filename = $TableRef->{"file IO settings"}->{"filename"}->[0]; |
358 : | my $Delimiter = ";"; | ||
359 : | my $ItemDelimiter = "|"; | ||
360 : | my $Prefix = ""; | ||
361 : | if (defined($TableRef->{"file IO settings"}->{"delimiter"})) { | ||
362 : | chenry | 1.5 | $Delimiter = $TableRef->{"file IO settings"}->{"delimiter"}->[0]; |
363 : | if ($Delimiter eq "\\|" || $Delimiter eq "\|") { | ||
364 : | $Delimiter = "|"; | ||
365 : | } | ||
366 : | chenry | 1.4 | } |
367 : | if (defined($TableRef->{"file IO settings"}->{"item delimiter"})) { | ||
368 : | $ItemDelimiter = $TableRef->{"file IO settings"}->{"item delimiter"}->[0]; | ||
369 : | chenry | 1.5 | if ($ItemDelimiter eq "\\|" || $ItemDelimiter eq "\|") { |
370 : | $ItemDelimiter = "|"; | ||
371 : | } | ||
372 : | chenry | 1.4 | } |
373 : | if (defined($TableRef->{"file IO settings"}->{"file prefix"})) { | ||
374 : | $Prefix = $TableRef->{"file IO settings"}->{"file prefix"}->[0]; | ||
375 : | } | ||
376 : | chenry | 1.5 | |
377 : | chenry | 1.4 | #Opening the file |
378 : | if (defined($TableRef->{"file IO settings"}->{"append"})) { | ||
379 : | if (!open (SAVINGTABLE, ">>$Filename")) { | ||
380 : | return -1; | ||
381 : | } | ||
382 : | } else { | ||
383 : | if (!open (SAVINGTABLE, ">$Filename")) { | ||
384 : | return -1; | ||
385 : | } | ||
386 : | } | ||
387 : | chenry | 1.5 | |
388 : | if (defined($Prefix)) { | ||
389 : | print SAVINGTABLE $Prefix; | ||
390 : | } | ||
391 : | print SAVINGTABLE join($Delimiter,@{$TableRef->{"file IO settings"}->{"orderedkeys"}})."\n"; | ||
392 : | chenry | 1.4 | for (my $i=0; $i < @{$TableRef->{"array"}}; $i++) { |
393 : | for (my $j=0; $j < @{$TableRef->{"file IO settings"}->{"orderedkeys"}}; $j++) { | ||
394 : | if ($j > 0) { | ||
395 : | chenry | 1.5 | print SAVINGTABLE $Delimiter; |
396 : | chenry | 1.4 | } |
397 : | if (defined($TableRef->{"array"}->[$i]->{$TableRef->{"file IO settings"}->{"orderedkeys"}->[$j]})) { | ||
398 : | print SAVINGTABLE join($ItemDelimiter,@{$TableRef->{"array"}->[$i]->{$TableRef->{"file IO settings"}->{"orderedkeys"}->[$j]}}); | ||
399 : | } | ||
400 : | } | ||
401 : | chenry | 1.5 | print SAVINGTABLE "\n"; |
402 : | chenry | 1.4 | } |
403 : | close (SAVINGTABLE); | ||
404 : | } | ||
405 : | |||
406 : | chenry | 1.5 | sub copy_table_row { |
407 : | my ($InRow) = @_; | ||
408 : | |||
409 : | my $NewRow; | ||
410 : | my @Headings = keys(%{$InRow}); | ||
411 : | foreach my $Heading (@Headings) { | ||
412 : | push(@{$NewRow->{$Heading}},@{$InRow->{$Heading}}); | ||
413 : | } | ||
414 : | |||
415 : | return $NewRow; | ||
416 : | } | ||
417 : | |||
418 : | chenry | 1.1 | #--- This function loads a file with the following on each line: $A$Delimiter$B and maps $A to $B in the first returned hash reference and $B to $A in the second returned hash reference ---# |
419 : | sub LoadSeparateTranslationFiles { | ||
420 : | my ($Filename,$Delimiter) = @_; | ||
421 : | my $HashReferenceForward = {}; | ||
422 : | my $HashReferenceReverse = {}; | ||
423 : | |||
424 : | if (open (INPUT, "<$Filename")) { | ||
425 : | while (my $Line = <INPUT>) { | ||
426 : | chomp($Line); | ||
427 : | my @Data = split(/$Delimiter/,$Line); | ||
428 : | if (@Data >= 2) { | ||
429 : | chenry | 1.5 | if (!defined($HashReferenceForward->{$Data[0]})) { |
430 : | $HashReferenceForward->{$Data[0]} = $Data[1]; | ||
431 : | } | ||
432 : | if (!defined($HashReferenceForward->{$Data[1]})) { | ||
433 : | $HashReferenceReverse->{$Data[1]} = $Data[0]; | ||
434 : | } | ||
435 : | chenry | 1.1 | } |
436 : | } | ||
437 : | close(INPUT); | ||
438 : | } | ||
439 : | |||
440 : | return ($HashReferenceForward,$HashReferenceReverse); | ||
441 : | } | ||
442 : | |||
443 : | #--- This function breaks down the input filename into a directory, filename, and extension ---# | ||
444 : | sub ParseFilename { | ||
445 : | my ($Filename) = @_; | ||
446 : | |||
447 : | my $Directory = ""; | ||
448 : | my $Extension = ""; | ||
449 : | chenry | 1.3 | if ($Filename =~ m/^(.+\/)([^\/]+)\.([^\.]+)/) { |
450 : | $Directory = $1; | ||
451 : | $Filename = $2; | ||
452 : | $Extension = $3; | ||
453 : | chenry | 1.1 | } |
454 : | |||
455 : | return ($Filename,$Directory,$Extension); | ||
456 : | } | ||
457 : | |||
458 : | #--- This function compares the files listed in two separate directories and returns the list of new files and updated files ---# | ||
459 : | sub CompareDirectories { | ||
460 : | my ($NewDirectory,$OldDirectory,$ComparisonType) = @_; | ||
461 : | |||
462 : | my $Command = "ls -la ".$NewDirectory." > ".$NewDirectory."FileList.txt"; | ||
463 : | system($Command); | ||
464 : | $Command = "ls -la ".$OldDirectory." > ".$OldDirectory."FileList.txt"; | ||
465 : | system($Command); | ||
466 : | |||
467 : | my $NewFileData = &LoadMultipleColumnFile($NewDirectory."FileList.txt","\\s"); | ||
468 : | my $OldFileData = &LoadMultipleColumnFile($OldDirectory."FileList.txt","\\s"); | ||
469 : | |||
470 : | my $UpdatedFiles = []; | ||
471 : | my $NewFiles = []; | ||
472 : | |||
473 : | my %FilenameHash; | ||
474 : | foreach my $File (@{$OldFileData}) { | ||
475 : | if ($ComparisonType eq "date") { | ||
476 : | $FilenameHash{$File->[$#{$File}]} = $File->[$#{$File}-2].":".$File->[$#{$File}-1]; | ||
477 : | } elsif ($ComparisonType eq "size") { | ||
478 : | $FilenameHash{$File->[$#{$File}]} = $File->[$#{$File}-3]; | ||
479 : | } | ||
480 : | } | ||
481 : | foreach my $File (@{$NewFileData}) { | ||
482 : | if (defined($FilenameHash{$File->[$#{$File}]})) { | ||
483 : | if ($ComparisonType eq "date" && $FilenameHash{$File->[$#{$File}]} ne $File->[$#{$File}-2].":".$File->[$#{$File}-1]) { | ||
484 : | push(@{$UpdatedFiles},$File->[$#{$File}]); | ||
485 : | } elsif ($ComparisonType eq "size" && $FilenameHash{$File->[$#{$File}]} ne $File->[$#{$File}-3]) { | ||
486 : | push(@{$UpdatedFiles},$File->[$#{$File}]); | ||
487 : | } | ||
488 : | } else { | ||
489 : | $FilenameHash{$File->[$#{$File}]} = $File->[$#{$File}-2].":".$File->[$#{$File}-1]; | ||
490 : | push(@{$NewFiles},$File->[$#{$File}]); | ||
491 : | } | ||
492 : | } | ||
493 : | |||
494 : | return ($NewFiles,$UpdatedFiles); | ||
495 : | } | ||
496 : | |||
497 : | sub RemoveHFromFormula { | ||
498 : | my ($Formula) = @_; | ||
499 : | my @Data = split(/H/,$Formula); | ||
500 : | |||
501 : | if (@Data == 1) { | ||
502 : | return $Formula; | ||
503 : | } | ||
504 : | |||
505 : | while ($Data[1] =~ m/^\d/) { | ||
506 : | $Data[1] = substr($Data[1],1); | ||
507 : | } | ||
508 : | |||
509 : | return $Data[0].$Data[1]; | ||
510 : | } | ||
511 : | |||
512 : | sub CompareArrays { | ||
513 : | my ($ArrayOne,$ArrayTwo) = @_; | ||
514 : | |||
515 : | my $ArrayOneExtra = (); | ||
516 : | my $ArrayTwoExtra = (); | ||
517 : | my $ArrayOverlap = (); | ||
518 : | my %ArrayTwoHash; | ||
519 : | |||
520 : | for (my $i=0; $i < @{$ArrayOne}; $i++) { | ||
521 : | my $Match = 0; | ||
522 : | for (my $j=0; $j < @{$ArrayTwo}; $j++) { | ||
523 : | if ($ArrayOne->[$i] eq $ArrayTwo->[$j]) { | ||
524 : | $ArrayTwoHash{$ArrayOne->[$i]} = 1; | ||
525 : | $Match = 1; | ||
526 : | push(@{$ArrayOverlap},$ArrayOne->[$i]); | ||
527 : | $j = @{$ArrayTwo}; | ||
528 : | } | ||
529 : | } | ||
530 : | if ($Match == 0) { | ||
531 : | push(@{$ArrayOneExtra},$ArrayOne->[$i]); | ||
532 : | } | ||
533 : | } | ||
534 : | for (my $j=0; $j < @{$ArrayTwo}; $j++) { | ||
535 : | if (!defined($ArrayTwoHash{$ArrayTwo->[$j]})) { | ||
536 : | push(@{$ArrayTwoExtra},$ArrayTwo->[$j]); | ||
537 : | } | ||
538 : | } | ||
539 : | |||
540 : | return ($ArrayOneExtra,$ArrayTwoExtra,$ArrayOverlap); | ||
541 : | } | ||
542 : | |||
543 : | sub ReplaceLineSubstringsFromHash { | ||
544 : | my ($Translation, $Line) = @_; | ||
545 : | |||
546 : | my @Data = keys(%{$Translation}); | ||
547 : | for (my $i=0; $i < @Data; $i++) { | ||
548 : | my $FindString = $Data[$i]; | ||
549 : | my $ReplaceString = $Translation->{$Data[$i]}; | ||
550 : | $Line =~ s/([\,\s\;\+\[])$FindString([\,\s\;\+\[])/$1$ReplaceString$2/g; | ||
551 : | $Line =~ s/$FindString$/$ReplaceString/g; | ||
552 : | $Line =~ s/^$FindString/$ReplaceString/g; | ||
553 : | } | ||
554 : | |||
555 : | return $Line; | ||
556 : | } | ||
557 : | |||
558 : | sub FindArrayElement { | ||
559 : | my ($ArrayRef,$Value) = @_; | ||
560 : | |||
561 : | if (!defined($ArrayRef)) { | ||
562 : | return -1; | ||
563 : | } | ||
564 : | |||
565 : | for (my $i=0;$i < @{$ArrayRef};$i++) { | ||
566 : | if ($ArrayRef->[$i] eq $Value) { | ||
567 : | return $i; | ||
568 : | } | ||
569 : | } | ||
570 : | |||
571 : | return -1; | ||
572 : | } | ||
573 : | |||
574 : | sub RemoveArrayElement { | ||
575 : | my ($ArrayRef,$Value) = @_; | ||
576 : | |||
577 : | chenry | 1.5 | for (my $i=0;$i < @{$ArrayRef};$i++) { |
578 : | if ($ArrayRef->[$i] eq $Value) { | ||
579 : | splice(@{$ArrayRef},$i,1); | ||
580 : | chenry | 1.1 | $i--; |
581 : | chenry | 1.5 | } |
582 : | } | ||
583 : | chenry | 1.1 | |
584 : | chenry | 1.5 | return $ArrayRef; |
585 : | chenry | 1.1 | } |
586 : | |||
587 : | sub FormatNumber { | ||
588 : | my ($OriginalNumber,$Digits,$ZeroEquivalence) = @_; | ||
589 : | chenry | 1.5 | |
590 : | chenry | 1.1 | if (abs($OriginalNumber) < $ZeroEquivalence) { |
591 : | $OriginalNumber = "0."; | ||
592 : | for (my $i=0; $i < $Digits;$i++) { | ||
593 : | $OriginalNumber .= "0" | ||
594 : | } | ||
595 : | return $OriginalNumber | ||
596 : | } | ||
597 : | chenry | 1.5 | |
598 : | chenry | 1.1 | if ($OriginalNumber > 1 || $OriginalNumber < -1) { |
599 : | $OriginalNumber = $OriginalNumber*(10**$Digits); | ||
600 : | $OriginalNumber = int($OriginalNumber + .5 * ($OriginalNumber <=> 0)); | ||
601 : | $OriginalNumber = $OriginalNumber/(10**$Digits); | ||
602 : | return $OriginalNumber; | ||
603 : | } | ||
604 : | chenry | 1.5 | |
605 : | chenry | 1.1 | my $Zeros = 0; |
606 : | while (abs($OriginalNumber) < 10**$Zeros) { | ||
607 : | chenry | 1.5 | $Zeros--; |
608 : | chenry | 1.1 | } |
609 : | chenry | 1.5 | |
610 : | chenry | 1.1 | $OriginalNumber = $OriginalNumber*10**-$Zeros; |
611 : | $OriginalNumber = $OriginalNumber*(10**$Digits); | ||
612 : | $OriginalNumber = int($OriginalNumber + .5 * ($OriginalNumber <=> 0)); | ||
613 : | $OriginalNumber = $OriginalNumber/(10**$Digits); | ||
614 : | if ($Zeros > -4) { | ||
615 : | chenry | 1.5 | $OriginalNumber = $OriginalNumber/(10**-$Zeros); |
616 : | chenry | 1.1 | } else { |
617 : | $OriginalNumber .= "e".$Zeros; | ||
618 : | } | ||
619 : | chenry | 1.5 | |
620 : | chenry | 1.1 | return $OriginalNumber; |
621 : | } | ||
622 : | |||
623 : | sub ConvertToSearchNames { | ||
624 : | my ($InName) = @_; | ||
625 : | chenry | 1.5 | |
626 : | chenry | 1.1 | if ($InName =~ m/-$/) { |
627 : | return $InName; | ||
628 : | } | ||
629 : | chenry | 1.5 | |
630 : | chenry | 1.1 | #I convert all names to lowercase to help with matching |
631 : | $InName = lc($InName); | ||
632 : | #I remove all spaces from all names to help with matching | ||
633 : | $InName =~ s/\s//g; | ||
634 : | chenry | 1.5 | $InName =~ s/,//g; |
635 : | chenry | 1.1 | $InName =~ s/-//g; |
636 : | $InName =~ s/_//g; | ||
637 : | $InName =~ s/\(//g; | ||
638 : | $InName =~ s/\)//g; | ||
639 : | $InName =~ s/\[//g; | ||
640 : | $InName =~ s/\]//g; | ||
641 : | $InName =~ s/\://g; | ||
642 : | chenry | 1.5 | $InName =~ s/’//g; |
643 : | $InName =~ s/'//g; | ||
644 : | chenry | 1.1 | $InName =~ s/\;//g; |
645 : | chenry | 1.5 | |
646 : | chenry | 1.1 | my $NameOne = $InName; |
647 : | $InName =~ s/icacid/ate/g; | ||
648 : | if ($NameOne eq $InName) { | ||
649 : | return ($NameOne); | ||
650 : | } else { | ||
651 : | return ($NameOne,$InName); | ||
652 : | } | ||
653 : | } | ||
654 : | chenry | 1.5 | |
655 : | sub ConvertToNeutralFormula { | ||
656 : | my ($NeutralFormula,$Charge) = @_; | ||
657 : | |||
658 : | if (!defined($NeutralFormula)) { | ||
659 : | $NeutralFormula = ""; | ||
660 : | } elsif ($NeutralFormula eq "H") { | ||
661 : | #Do nothing | ||
662 : | } elsif (defined($Charge) && $Charge ne "0") { | ||
663 : | my $CurrentH = 0; | ||
664 : | if ($NeutralFormula =~ m/H(\d+)/) { | ||
665 : | $CurrentH = $1; | ||
666 : | } elsif ($NeutralFormula =~ m/H[A-Z]/ || $NeutralFormula =~ m/H$/) { | ||
667 : | $CurrentH = 1; | ||
668 : | } | ||
669 : | my $NewH = $CurrentH; | ||
670 : | if ($Charge >= $CurrentH) { | ||
671 : | $NewH = 0; | ||
672 : | } else { | ||
673 : | $NewH = $CurrentH - $Charge; | ||
674 : | } | ||
675 : | my $Replace = "H"; | ||
676 : | if ($NewH > 1) { | ||
677 : | $Replace = "H".$NewH; | ||
678 : | } elsif ($NewH == 0) { | ||
679 : | $Replace = ""; | ||
680 : | } | ||
681 : | if ($CurrentH == 0 && $NewH > 0) { | ||
682 : | $NeutralFormula .= "H"; | ||
683 : | if ($NewH > 1) { | ||
684 : | $NeutralFormula .= $NewH; | ||
685 : | } | ||
686 : | } elsif ($CurrentH == 1) { | ||
687 : | $NeutralFormula =~ s/H$/$Replace/; | ||
688 : | $NeutralFormula =~ s/H([A-Z])/$Replace$1/; | ||
689 : | } else { | ||
690 : | my $Match = "H".$CurrentH; | ||
691 : | $NeutralFormula =~ s/$Match/$Replace/; | ||
692 : | } | ||
693 : | } | ||
694 : | |||
695 : | return $NeutralFormula; | ||
696 : | } | ||
697 : | |||
698 : | chenry | 1.1 | sub CountFileLines { |
699 : | my ($filename) = @_; | ||
700 : | my $lines = 0; | ||
701 : | open(FILE, $filename) or die "Can't open `$filename': $!"; | ||
702 : | while(<FILE>) { | ||
703 : | $lines++; | ||
704 : | chenry | 1.5 | } |
705 : | chenry | 1.1 | close FILE; |
706 : | return $lines; | ||
707 : | } | ||
708 : | |||
709 : | sub ManipulateFormula { | ||
710 : | my ($OriginalFormula) = @_; | ||
711 : | |||
712 : | my %Atoms; | ||
713 : | my $CurrentAtomType = ""; | ||
714 : | my $CurrentAtomNumber = ""; | ||
715 : | for (my $i=0; $i < length($OriginalFormula); $i++) { | ||
716 : | my $CurrentLetter = substr($OriginalFormula,$i,1); | ||
717 : | if ($CurrentLetter =~ m/[A-Z]/) { | ||
718 : | if ($CurrentAtomType ne "") { | ||
719 : | if ($CurrentAtomNumber eq "1") { | ||
720 : | chenry | 1.5 | $CurrentAtomNumber = ""; |
721 : | chenry | 1.1 | } |
722 : | $Atoms{$CurrentAtomType} = $CurrentAtomNumber; | ||
723 : | } | ||
724 : | $CurrentAtomType = $CurrentLetter; | ||
725 : | $CurrentAtomNumber = ""; | ||
726 : | } elsif ($CurrentLetter =~ m/[a-z]/) { | ||
727 : | $CurrentAtomType .= $CurrentLetter; | ||
728 : | } elsif ($CurrentLetter =~ m/[\d]/) { | ||
729 : | $CurrentAtomNumber .= $CurrentLetter; | ||
730 : | } else { | ||
731 : | if ($CurrentAtomType ne "") { | ||
732 : | $Atoms{$CurrentAtomType} = $CurrentAtomNumber; | ||
733 : | } | ||
734 : | $CurrentAtomType = ""; | ||
735 : | } | ||
736 : | } | ||
737 : | if ($CurrentAtomType ne "") { | ||
738 : | if ($CurrentAtomNumber eq "1") { | ||
739 : | chenry | 1.5 | $CurrentAtomNumber = ""; |
740 : | chenry | 1.1 | } |
741 : | $Atoms{$CurrentAtomType} = $CurrentAtomNumber; | ||
742 : | } | ||
743 : | |||
744 : | my @SortedAtoms = sort(keys(%Atoms)); | ||
745 : | my $StandardFormula; | ||
746 : | my $CompareFormula; | ||
747 : | for (my $i=0; $i < @SortedAtoms; $i++) { | ||
748 : | $StandardFormula .= $SortedAtoms[$i]; | ||
749 : | $StandardFormula .= $Atoms{$SortedAtoms[$i]}; | ||
750 : | if ($SortedAtoms[$i] ne "H") { | ||
751 : | $CompareFormula .= $SortedAtoms[$i]; | ||
752 : | $CompareFormula .= $Atoms{$SortedAtoms[$i]}; | ||
753 : | } | ||
754 : | } | ||
755 : | |||
756 : | return ($StandardFormula,$CompareFormula); | ||
757 : | } | ||
758 : | |||
759 : | |||
760 : | |||
761 : | sub ParseGPRFile { | ||
762 : | my ($Filename,$ReactionData) = @_; | ||
763 : | chenry | 1.5 | |
764 : | chenry | 1.1 | my $GPRData = &LoadMultipleColumnFile($Filename,"\t"); |
765 : | chenry | 1.5 | |
766 : | chenry | 1.1 | for (my $i=0; $i < @{$GPRData}; $i++) { |
767 : | if (@{$GPRData->[$i]} >= 3) { | ||
768 : | if (!defined($ReactionData->{$GPRData->[$i]->[2]})) { | ||
769 : | $ReactionData->{$GPRData->[$i]->[2]}->{"ID"} = $GPRData->[$i]->[2]; | ||
770 : | } | ||
771 : | if (length($GPRData->[$i]->[1]) > 0) { | ||
772 : | $ReactionData->{$GPRData->[$i]->[2]}->{"EC"} = $GPRData->[$i]->[1]; | ||
773 : | } | ||
774 : | for (my $j=4; $j < @{$GPRData->[$i]}; $j++) { | ||
775 : | if (length($GPRData->[$i]->[$j]) > 0) { | ||
776 : | if ($GPRData->[$i]->[$j] =~ m/_$/) { | ||
777 : | $GPRData->[$i]->[$j] = chop($GPRData->[$i]->[$j]); | ||
778 : | } | ||
779 : | push(@{$ReactionData->{$GPRData->[$i]->[2]}->{"GENE ID"}},$GPRData->[$i]->[$j]); | ||
780 : | } | ||
781 : | } | ||
782 : | } | ||
783 : | } | ||
784 : | chenry | 1.5 | |
785 : | chenry | 1.1 | return ($ReactionData); |
786 : | } | ||
787 : | |||
788 : | sub ParseSBMLFile { | ||
789 : | my ($Filename,$ReactionData,$CompoundData) = @_; | ||
790 : | chenry | 1.5 | |
791 : | chenry | 1.1 | my $SBMLData = &LoadSingleColumnFile($Filename,""); |
792 : | chenry | 1.5 | |
793 : | chenry | 1.1 | my $HandlingSpecies = 0; |
794 : | my $HandlingReactions = 0; | ||
795 : | my $HandlingReactants = 0; | ||
796 : | my $HandlingProducts = 0; | ||
797 : | my $ReactionID = ""; | ||
798 : | my $ReactionReactants = ""; | ||
799 : | my $ReactionSign = ""; | ||
800 : | my $ReactionProducts = ""; | ||
801 : | for (my $i=0; $i < @{$SBMLData}; $i++) { | ||
802 : | if ($SBMLData->[$i] =~ m/^<listOfSpecies>/) { | ||
803 : | $HandlingSpecies = 1; | ||
804 : | } elsif ($SBMLData->[$i] =~ m/^<\/listOfSpecies>/) { | ||
805 : | $HandlingSpecies = 0; | ||
806 : | } elsif ($SBMLData->[$i] =~ m/^<listOfReactions>/) { | ||
807 : | $HandlingReactions = 1; | ||
808 : | } elsif ($SBMLData->[$i] =~ m/^<\/listOfReactions>/) { | ||
809 : | $HandlingReactions = 0; | ||
810 : | } elsif ($HandlingSpecies == 1 && $SBMLData->[$i] =~ m/^<species/) { | ||
811 : | #Parsing out the compound ID | ||
812 : | if ($SBMLData->[$i] =~ m/id="([^"]+)"/) { | ||
813 : | my $ID = $1; | ||
814 : | if ($ID =~ m/^_/) { | ||
815 : | $ID = substr($ID,1); | ||
816 : | } | ||
817 : | if ($ID =~ m/_[a-z]$/) { | ||
818 : | chop($ID); | ||
819 : | chop($ID); | ||
820 : | } | ||
821 : | if (length($ID) > 0) { | ||
822 : | #Parsing out the compound name | ||
823 : | if (!defined($CompoundData->{$ID})) { | ||
824 : | $CompoundData->{$ID}->{"ID"} = $ID; | ||
825 : | } | ||
826 : | if ($SBMLData->[$i] =~ m/name="([^"]+)"/) { | ||
827 : | my $Name = $1; | ||
828 : | if ($Name =~ m/^_/) { | ||
829 : | $Name = substr($Name,1); | ||
830 : | } | ||
831 : | $Name =~ s/_/ /g; | ||
832 : | if (length($Name) > 0 && (!defined($CompoundData->{$ID}->{"NAME"}) || &FindArrayElement($CompoundData->{$ID}->{"NAME"},$Name) == -1)) { | ||
833 : | push(@{$CompoundData->{$ID}->{"NAME"}},$Name); | ||
834 : | } | ||
835 : | } | ||
836 : | } | ||
837 : | } | ||
838 : | } elsif ($HandlingReactions == 1 && $SBMLData->[$i] =~ m/^<reaction/) { | ||
839 : | $ReactionSign = " <=> "; | ||
840 : | if ($SBMLData->[$i] =~ m/reversible="false"/) { | ||
841 : | $ReactionSign = " => "; | ||
842 : | } | ||
843 : | if ($SBMLData->[$i] =~ m/id="([^"]+)"/) { | ||
844 : | $ReactionID = $1; | ||
845 : | if (length($ReactionID) > 0) { | ||
846 : | if (!defined($ReactionData->{$ReactionID})) { | ||
847 : | $ReactionData->{$ReactionID}->{"ID"} = $ReactionID; | ||
848 : | } | ||
849 : | if ($SBMLData->[$i] =~ m/name="([^"]+)"/) { | ||
850 : | my $Name = $1; | ||
851 : | if ($Name =~ m/^_/) { | ||
852 : | $Name = substr($Name,1); | ||
853 : | } | ||
854 : | $Name =~ s/_/ /g; | ||
855 : | if (length($Name) > 0 && (!defined($ReactionData->{$ReactionID}->{"NAME"}) || &FindArrayElement($ReactionData->{$ReactionID}->{"NAME"},$Name) == -1)) { | ||
856 : | push(@{$ReactionData->{$ReactionID}->{"NAME"}},$Name); | ||
857 : | } | ||
858 : | } | ||
859 : | chenry | 1.5 | |
860 : | chenry | 1.1 | } |
861 : | } | ||
862 : | } elsif ($HandlingReactions == 1 && $SBMLData->[$i] =~ m/^<\/reaction>/) { | ||
863 : | $ReactionID = ""; | ||
864 : | $ReactionReactants = ""; | ||
865 : | $ReactionSign = ""; | ||
866 : | $ReactionProducts = ""; | ||
867 : | } elsif ($HandlingReactions == 1 && $SBMLData->[$i] =~ m/^<html:p>EC Number:\s([^<]+)/) { | ||
868 : | my $ECNumber = $1; | ||
869 : | if (length($ECNumber) > 3 && (!defined($ReactionData->{$ReactionID}->{"EC"}) || &FindArrayElement($ReactionData->{$ReactionID}->{"EC"},$ECNumber) == -1)) { | ||
870 : | push(@{$ReactionData->{$ReactionID}->{"EC"}},$ECNumber); | ||
871 : | } | ||
872 : | } elsif ($HandlingReactions == 1 && $SBMLData->[$i] =~ m/^<html:p>Confidence Level:\s([^<]+)/) { | ||
873 : | my $Confidence = $1; | ||
874 : | if (length($Confidence) > 0) { | ||
875 : | $ReactionData->{$ReactionID}->{"CONFIDENCE"} = $Confidence; | ||
876 : | } | ||
877 : | } elsif ($HandlingReactions == 1 && $SBMLData->[$i] =~ m/^<html:p>LOCUS:/) { | ||
878 : | $_ = $SBMLData->[$i]; | ||
879 : | my @GeneArray = /<html:p>LOCUS:([^\#]+)/g; | ||
880 : | for (my $j=0; $j < @GeneArray; $j++) { | ||
881 : | if (length($GeneArray[$j]) > 0 && (!defined($ReactionData->{$ReactionID}->{"GENES"}) || &FindArrayElement($ReactionData->{$ReactionID}->{"GENES"},$GeneArray[$j]) == -1)) { | ||
882 : | push(@{$ReactionData->{$ReactionID}->{"GENES"}},$GeneArray[$j]); | ||
883 : | } | ||
884 : | } | ||
885 : | } elsif ($HandlingReactions == 1 && $SBMLData->[$i] =~ m/^<listOfReactants>/) { | ||
886 : | $HandlingReactants = 1; | ||
887 : | } elsif ($HandlingReactions == 1 && $SBMLData->[$i] =~ m/^<\/listOfReactants>/) { | ||
888 : | $HandlingReactants = 0; | ||
889 : | } elsif ($HandlingReactions == 1 && $SBMLData->[$i] =~ m/^<listOfProducts>/) { | ||
890 : | $HandlingProducts = 1; | ||
891 : | } elsif ($HandlingReactions == 1 && $SBMLData->[$i] =~ m/^<\/listOfProducts>/) { | ||
892 : | $HandlingProducts = 0; | ||
893 : | if (length($ReactionID) > 0 && defined($ReactionData->{$ReactionID})) { | ||
894 : | my $Equation = $ReactionReactants.$ReactionSign.$ReactionProducts; | ||
895 : | $ReactionData->{$ReactionID}->{"EQUATION"} = $Equation; | ||
896 : | } | ||
897 : | } elsif ($HandlingReactions == 1 && $SBMLData->[$i] =~ m/^<speciesReference/) { | ||
898 : | if ($SBMLData->[$i] =~ m/species="([^"]+)"/) { | ||
899 : | my $SpeciesID = $1; | ||
900 : | if ($SpeciesID =~ m/^_/) { | ||
901 : | $SpeciesID = substr($SpeciesID,1); | ||
902 : | } | ||
903 : | my $Compartment = ""; | ||
904 : | if ($SpeciesID =~ m/_([a-z])$/) { | ||
905 : | $Compartment = $1; | ||
906 : | chop($SpeciesID); | ||
907 : | chop($SpeciesID); | ||
908 : | } | ||
909 : | my $Stoichiometry = ""; | ||
910 : | if ($SBMLData->[$i] =~ m/stoichiometry="([^"]+)"/) { | ||
911 : | $Stoichiometry = $1; | ||
912 : | } | ||
913 : | if (length($Stoichiometry) > 0 && length($SpeciesID) > 0) { | ||
914 : | my $SpeciesString = ""; | ||
915 : | if ($Stoichiometry ne "1") { | ||
916 : | $SpeciesString .= "(".$Stoichiometry.") "; | ||
917 : | } | ||
918 : | $SpeciesString .= "$SpeciesID"; | ||
919 : | if (length($Compartment) > 0 && $Compartment ne "c") { | ||
920 : | $SpeciesString .= "[".$Compartment."]"; | ||
921 : | } | ||
922 : | if ($HandlingReactants == 1) { | ||
923 : | if (length($ReactionReactants) > 0) { | ||
924 : | $ReactionReactants .= " + "; | ||
925 : | } | ||
926 : | $ReactionReactants .= $SpeciesString; | ||
927 : | } elsif ($HandlingProducts == 1) { | ||
928 : | if (length($ReactionProducts) > 0) { | ||
929 : | $ReactionProducts .= " + "; | ||
930 : | } | ||
931 : | $ReactionProducts .= $SpeciesString; | ||
932 : | } | ||
933 : | } | ||
934 : | } | ||
935 : | } | ||
936 : | } | ||
937 : | chenry | 1.5 | |
938 : | chenry | 1.1 | return ($ReactionData,$CompoundData); |
939 : | } | ||
940 : | |||
941 : | sub SearchTranslationDataForMatchingID { | ||
942 : | my ($Filename,$SearchText) = @_; | ||
943 : | chenry | 1.5 | |
944 : | chenry | 1.1 | #Declaring the reference to the array where the results will ultimately be stored |
945 : | my $MatchingIDs = (); | ||
946 : | chenry | 1.5 | |
947 : | chenry | 1.1 | #If the search text is blank or the input file does not exist, I return an empty array |
948 : | if (length($SearchText) == 0 || !(-e $Filename)) { | ||
949 : | return $MatchingIDs; | ||
950 : | } | ||
951 : | chenry | 1.5 | |
952 : | chenry | 1.1 | #Loading the translation file |
953 : | my %IDHash; | ||
954 : | my $TranslationData = &LoadMultipleColumnFile($Filename,"\t"); | ||
955 : | my %UniqueKeysHash; | ||
956 : | for (my $i=0; $i < @{$TranslationData}; $i++) { | ||
957 : | if (@{$TranslationData->[$i]} >= 2) { | ||
958 : | push(@{$UniqueKeysHash{$TranslationData->[$i]->[1]}},$TranslationData->[$i]->[0]); | ||
959 : | } | ||
960 : | } | ||
961 : | #Searching through the keys of the translation file for my search text and storing matching ids in a hash | ||
962 : | my @AllKeys = keys(%UniqueKeysHash); | ||
963 : | for (my $i=0; $i < @AllKeys; $i++) { | ||
964 : | if ($AllKeys[$i] =~ m/$SearchText/) { | ||
965 : | for (my $j=0; $j < @{$UniqueKeysHash{$AllKeys[$i]}}; $j++) { | ||
966 : | $IDHash{$UniqueKeysHash{$AllKeys[$i]}->[$j]} = 1; | ||
967 : | } | ||
968 : | } | ||
969 : | } | ||
970 : | chenry | 1.5 | |
971 : | chenry | 1.1 | #Putting the matching hash keys into an array and sorting it |
972 : | push(@{$MatchingIDs},keys(%IDHash)); | ||
973 : | if (defined($MatchingIDs)) { | ||
974 : | @{$MatchingIDs} = sort(@{$MatchingIDs}) | ||
975 : | } | ||
976 : | chenry | 1.5 | |
977 : | chenry | 1.1 | return $MatchingIDs; |
978 : | } | ||
979 : | |||
980 : | sub BackupFile { | ||
981 : | my ($CurrentFilename,$BackupFilename) = @_; | ||
982 : | chenry | 1.5 | |
983 : | chenry | 1.1 | if (-e $CurrentFilename) { |
984 : | if (-e $BackupFilename) { | ||
985 : | unlink($BackupFilename); | ||
986 : | } | ||
987 : | rename($CurrentFilename,$BackupFilename); | ||
988 : | } | ||
989 : | } | ||
990 : | |||
991 : | sub Date { | ||
992 : | chenry | 1.4 | my ($Time) = @_; |
993 : | chenry | 1.5 | if (!defined($Time)) { |
994 : | $Time = time(); | ||
995 : | } | ||
996 : | chenry | 1.4 | my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($Time); |
997 : | chenry | 1.5 | |
998 : | chenry | 1.4 | return ($mon+1)."/".($mday)."/".($year+1900); |
999 : | chenry | 1.1 | } |
1000 : | |||
1001 : | sub MergeArraysUnique { | ||
1002 : | my @ArrayRefs = @_; | ||
1003 : | chenry | 1.5 | |
1004 : | chenry | 1.1 | my $ResultArray = (); |
1005 : | my %EntryHash; | ||
1006 : | for (my $i=0; $i < @ArrayRefs; $i++) { | ||
1007 : | if (defined($ArrayRefs[$i])) { | ||
1008 : | for (my $j=0; $j < @{$ArrayRefs[$i]}; $j++) { | ||
1009 : | if (!defined($EntryHash{$ArrayRefs[$i]->[$j]})) { | ||
1010 : | push(@{$ResultArray},$ArrayRefs[$i]->[$j]); | ||
1011 : | $EntryHash{$ArrayRefs[$i]->[$j]} = 1; | ||
1012 : | } | ||
1013 : | } | ||
1014 : | } | ||
1015 : | } | ||
1016 : | chenry | 1.5 | |
1017 : | chenry | 1.1 | return $ResultArray |
1018 : | } | ||
1019 : | |||
1020 : | #Opens up every file in a directory, searches each line for the search expression, and replaces the objects matching the search expression according to the translation hash | ||
1021 : | sub TranslateFileData { | ||
1022 : | my ($TranslationHash,$FilenameExpression,$SearchExpression,$Recursive,$MakeUnique,$SortLines) = @_; | ||
1023 : | chenry | 1.5 | |
1024 : | chenry | 1.1 | #Checking the search expression if one was provided |
1025 : | if (defined($SearchExpression) && length($SearchExpression) > 0) { | ||
1026 : | if (index($SearchExpression,"(") == -1 || index($SearchExpression,"(") == -1) { | ||
1027 : | $SearchExpression = "(".$SearchExpression.")"; | ||
1028 : | } | ||
1029 : | chenry | 1.5 | } |
1030 : | chenry | 1.1 | |
1031 : | #Finding all matching filenames | ||
1032 : | my @MatchingFiles; | ||
1033 : | if ($Recursive == 1 && $FilenameExpression =~ m/\/$/) { | ||
1034 : | @MatchingFiles = &RecursiveGlob($FilenameExpression); | ||
1035 : | } else { | ||
1036 : | @MatchingFiles = glob($FilenameExpression); | ||
1037 : | } | ||
1038 : | chenry | 1.5 | |
1039 : | chenry | 1.1 | #Exiting if no matching filenames were found |
1040 : | if (@MatchingFiles == 0) { | ||
1041 : | print "No matching files!\n"; | ||
1042 : | return; | ||
1043 : | } | ||
1044 : | chenry | 1.5 | |
1045 : | chenry | 1.1 | #Loading the translation file |
1046 : | if (!(-e $TranslationHash)) { | ||
1047 : | print "Could not find translation file: ".$TranslationHash."!\n"; | ||
1048 : | return; | ||
1049 : | } | ||
1050 : | my ($Ignore,$ReverseTranslation) = &LoadSeparateTranslationFiles($TranslationHash,"\t"); | ||
1051 : | |||
1052 : | #Scanning through all matching filenames | ||
1053 : | my @TranslationKeys = keys(%{$ReverseTranslation}); | ||
1054 : | chenry | 1.5 | |
1055 : | chenry | 1.1 | for (my $i=0; $i < @MatchingFiles; $i++) { |
1056 : | #Loading the file data into an array | ||
1057 : | my $FileData = &LoadSingleColumnFile($MatchingFiles[$i],""); | ||
1058 : | #Scanning through each fileline | ||
1059 : | my $MatchCount = 0; | ||
1060 : | for (my $j=0; $j < @{$FileData}; $j++) { | ||
1061 : | if (defined($SearchExpression) && length($SearchExpression) > 0) { | ||
1062 : | #This should be faster | ||
1063 : | $_ = $FileData->[$j]; | ||
1064 : | my @MatchingGroups = /$SearchExpression/g; | ||
1065 : | for (my $k=0; $k < @MatchingGroups;$k++) { | ||
1066 : | if (defined($ReverseTranslation->{$MatchingGroups[$k]})) { | ||
1067 : | $MatchCount++; | ||
1068 : | my $VarOne = $MatchingGroups[$k]; | ||
1069 : | my $VarTwo = $ReverseTranslation->{$MatchingGroups[$k]}; | ||
1070 : | $FileData->[$j] =~ s/$VarOne/$VarTwo/; | ||
1071 : | chenry | 1.5 | } |
1072 : | chenry | 1.1 | } |
1073 : | } else { | ||
1074 : | #This will be slower | ||
1075 : | for (my $k=0; $k < @TranslationKeys;$k++) { | ||
1076 : | my $VarOne = $TranslationKeys[$k]; | ||
1077 : | my $VarTwo = $ReverseTranslation->{$TranslationKeys[$k]}; | ||
1078 : | $FileData->[$j] =~ s/$VarOne/$VarTwo/g; | ||
1079 : | } | ||
1080 : | } | ||
1081 : | chenry | 1.5 | |
1082 : | chenry | 1.1 | } |
1083 : | #Saving the modified file data back to the file | ||
1084 : | print $MatchingFiles[$i]."\n"; | ||
1085 : | print $MatchCount."\n"; | ||
1086 : | #Making the array unique if requested | ||
1087 : | if (defined($MakeUnique) && $MakeUnique == 1) { | ||
1088 : | $FileData = &MergeArraysUnique($FileData); | ||
1089 : | } | ||
1090 : | #Sort file lines | ||
1091 : | if (defined($SortLines) && $SortLines == 1) { | ||
1092 : | @{$FileData} = sort(@{$FileData}); | ||
1093 : | } | ||
1094 : | &PrintArrayToFile($MatchingFiles[$i],$FileData); | ||
1095 : | chenry | 1.5 | } |
1096 : | chenry | 1.1 | } |
1097 : | |||
1098 : | sub RecursiveGlob { | ||
1099 : | my($path) = @_; | ||
1100 : | chenry | 1.5 | |
1101 : | my @FileList; | ||
1102 : | |||
1103 : | chenry | 1.1 | ## append a trailing / if it's not there |
1104 : | $path .= '/' if($path !~ /\/$/); | ||
1105 : | chenry | 1.5 | |
1106 : | chenry | 1.1 | ## loop through the files contained in the directory |
1107 : | for my $eachFile (glob($path.'*')) { | ||
1108 : | ## if the file is a directory | ||
1109 : | if( -d $eachFile) { | ||
1110 : | ## pass the directory to the routine ( recursion ) | ||
1111 : | push(@FileList,RecursiveGlob($eachFile)); | ||
1112 : | } else { | ||
1113 : | push(@FileList,$eachFile); | ||
1114 : | } | ||
1115 : | } | ||
1116 : | chenry | 1.5 | |
1117 : | chenry | 1.1 | return @FileList; |
1118 : | } | ||
1119 : | |||
1120 : | sub PrintHashToFile { | ||
1121 : | my($HashRef,$Filename) = @_; | ||
1122 : | chenry | 1.5 | |
1123 : | chenry | 1.1 | if ($Filename == "") { |
1124 : | open (HASHOUTPUT, ">&STDOUT"); | ||
1125 : | } else { | ||
1126 : | open (HASHOUTPUT, ">$Filename"); | ||
1127 : | } | ||
1128 : | my @Headings = keys(%{$HashRef}); | ||
1129 : | my @FirstHeadings; | ||
1130 : | my @SecondHeadings; | ||
1131 : | for (my $i=0; $i < @Headings; $i++) { | ||
1132 : | if (ref($HashRef->{$Headings[$i]}) eq "HASH") { | ||
1133 : | my @SubHeadings = keys(%{$HashRef->{$Headings[$i]}}); | ||
1134 : | for (my $j=0; $j < @SubHeadings;$j++) { | ||
1135 : | push(@FirstHeadings,$Headings[$i]); | ||
1136 : | push(@SecondHeadings,$SubHeadings[$j]); | ||
1137 : | } | ||
1138 : | } else { | ||
1139 : | push(@FirstHeadings,$Headings[$i]); | ||
1140 : | push(@SecondHeadings,$Headings[$i]); | ||
1141 : | } | ||
1142 : | } | ||
1143 : | #Printing headers | ||
1144 : | print HASHOUTPUT "FIRST HEADING;"; | ||
1145 : | print HASHOUTPUT join(";",@FirstHeadings)."\n"; | ||
1146 : | print HASHOUTPUT "SECOND HEADING;"; | ||
1147 : | print HASHOUTPUT join(";",@SecondHeadings)."\n"; | ||
1148 : | #Printing the number of data entries | ||
1149 : | print HASHOUTPUT "Number of entries;"; | ||
1150 : | for (my $i=0; $i < @FirstHeadings; $i++) { | ||
1151 : | if ($i > 0) { | ||
1152 : | print HASHOUTPUT ";"; | ||
1153 : | } | ||
1154 : | if ($FirstHeadings[$i] ne $SecondHeadings[$i]) { | ||
1155 : | chenry | 1.5 | if (defined($HashRef->{$FirstHeadings[$i]}->{$SecondHeadings[$i]})) { |
1156 : | chenry | 1.1 | if (@{$HashRef->{$FirstHeadings[$i]}->{$SecondHeadings[$i]}} == 1) { |
1157 : | print HASHOUTPUT $HashRef->{$FirstHeadings[$i]}->{$SecondHeadings[$i]}->[0]; | ||
1158 : | } else { | ||
1159 : | my $NumEntries = @{$HashRef->{$FirstHeadings[$i]}->{$SecondHeadings[$i]}}; | ||
1160 : | print HASHOUTPUT $NumEntries; | ||
1161 : | } | ||
1162 : | } else { | ||
1163 : | print HASHOUTPUT 0; | ||
1164 : | } | ||
1165 : | } else { | ||
1166 : | chenry | 1.5 | if (defined($HashRef->{$FirstHeadings[$i]})) { |
1167 : | chenry | 1.1 | if (@{$HashRef->{$FirstHeadings[$i]}} == 1) { |
1168 : | print HASHOUTPUT $HashRef->{$FirstHeadings[$i]}->[0]; | ||
1169 : | } else { | ||
1170 : | my $NumEntries = @{$HashRef->{$FirstHeadings[$i]}}; | ||
1171 : | print HASHOUTPUT $NumEntries; | ||
1172 : | } | ||
1173 : | } else { | ||
1174 : | print HASHOUTPUT 0; | ||
1175 : | } | ||
1176 : | } | ||
1177 : | } | ||
1178 : | print HASHOUTPUT "\n"; | ||
1179 : | #Printing data | ||
1180 : | my $Continue = 1; | ||
1181 : | my $Count = 0; | ||
1182 : | while($Continue == 1) { | ||
1183 : | print HASHOUTPUT ($Count+1).";"; | ||
1184 : | $Continue = 0; | ||
1185 : | for (my $i=0; $i < @FirstHeadings; $i++) { | ||
1186 : | if ($FirstHeadings[$i] ne $SecondHeadings[$i]) { | ||
1187 : | if (defined($HashRef->{$FirstHeadings[$i]}->{$SecondHeadings[$i]})) { | ||
1188 : | if (@{$HashRef->{$FirstHeadings[$i]}->{$SecondHeadings[$i]}} > 1 && defined($HashRef->{$FirstHeadings[$i]}->{$SecondHeadings[$i]}->[$Count])) { | ||
1189 : | print HASHOUTPUT $HashRef->{$FirstHeadings[$i]}->{$SecondHeadings[$i]}->[$Count]; | ||
1190 : | $Continue = 1; | ||
1191 : | } | ||
1192 : | } | ||
1193 : | } else { | ||
1194 : | if (defined($HashRef->{$FirstHeadings[$i]})) { | ||
1195 : | if (@{$HashRef->{$FirstHeadings[$i]}} > 1 && defined($HashRef->{$FirstHeadings[$i]}->[$Count])) { | ||
1196 : | print HASHOUTPUT $HashRef->{$FirstHeadings[$i]}->[$Count]; | ||
1197 : | chenry | 1.5 | $Continue = 1; |
1198 : | chenry | 1.1 | } |
1199 : | } | ||
1200 : | } | ||
1201 : | print HASHOUTPUT ";"; | ||
1202 : | } | ||
1203 : | $Count++; | ||
1204 : | print HASHOUTPUT "\n"; | ||
1205 : | } | ||
1206 : | close(HASHOUTPUT); | ||
1207 : | } | ||
1208 : | |||
1209 : | sub CreateHistogramHash { | ||
1210 : | my($ArrayRef) = @_; | ||
1211 : | chenry | 1.5 | |
1212 : | chenry | 1.1 | my $HashRef; |
1213 : | for (my $i=0; $i < @{$ArrayRef}; $i++) { | ||
1214 : | my @TempArray = split(/\|/,$ArrayRef->[$i]); | ||
1215 : | for (my $j=0; $j < @TempArray; $j++) { | ||
1216 : | if (defined($HashRef->{$TempArray[$j]})) { | ||
1217 : | $HashRef->{$TempArray[$j]}->[0]++; | ||
1218 : | } else { | ||
1219 : | $HashRef->{$TempArray[$j]}->[0] = 1; | ||
1220 : | } | ||
1221 : | } | ||
1222 : | } | ||
1223 : | chenry | 1.5 | |
1224 : | chenry | 1.1 | return $HashRef; |
1225 : | } | ||
1226 : | |||
1227 : | chenry | 1.5 | #init_hoh will take a directory like peg or rxn and create a hash of hashes |
1228 : | chenry | 1.1 | #my $dir_all = '/disks/www/Network_Data/MinOrg'; |
1229 : | #$dir_peg = "$dir_all/peg"; | ||
1230 : | #%hoh_peg = &init_hoh($dir_peg); | ||
1231 : | # $hoh_peg{'peg.1234'}{REACTIONS} will be an @array of reactions associated with peg.1234 | ||
1232 : | sub init_hoh{ | ||
1233 : | chenry | 1.5 | my $dir = shift @_; |
1234 : | chenry | 1.1 | my %hash; |
1235 : | opendir my $DH, $dir or die "cannot open '$dir' $!"; | ||
1236 : | while (my $file = readdir $DH ) { | ||
1237 : | chomp $file; | ||
1238 : | next if $file =~ /~$/; | ||
1239 : | next if -d $file; | ||
1240 : | open my $FH, "<", "$dir/$file" or die "Cannot open '$dir/$file' $!"; | ||
1241 : | while ( my $line = <$FH> ) { | ||
1242 : | chomp $line; | ||
1243 : | next if /^#/ || !length($line); | ||
1244 : | my ($key, @values ) = split(/\t/, $line); | ||
1245 : | $hash{ $file }{ $key } = \@values; | ||
1246 : | } | ||
1247 : | close $FH; | ||
1248 : | } | ||
1249 : | return %hash; | ||
1250 : | } | ||
1251 : | |||
1252 : | sub AddElementsUnique { | ||
1253 : | my ($ArrayRef,@NewElements) = @_; | ||
1254 : | chenry | 1.5 | |
1255 : | chenry | 1.1 | my $ArrayValueHash; |
1256 : | my $NewArray; | ||
1257 : | if (defined($ArrayRef) && @{$ArrayRef} > 0) { | ||
1258 : | for (my $i=0; $i < @$ArrayRef; $i++) { | ||
1259 : | if (!defined($ArrayValueHash->{$ArrayRef->[$i]})) { | ||
1260 : | push(@{$NewArray},$ArrayRef->[$i]); | ||
1261 : | $ArrayValueHash->{$ArrayRef->[$i]} = @{$NewArray}-1; | ||
1262 : | } | ||
1263 : | } | ||
1264 : | } | ||
1265 : | chenry | 1.5 | |
1266 : | chenry | 1.1 | my $NumberOfMatches = 0; |
1267 : | for (my $i=0; $i < @NewElements; $i++) { | ||
1268 : | if (length($NewElements[$i]) > 0 && !defined($ArrayValueHash->{$NewElements[$i]})) { | ||
1269 : | push(@{$NewArray},$NewElements[$i]); | ||
1270 : | $ArrayValueHash->{$NewElements[$i]} = @{$NewArray}-1; | ||
1271 : | } else { | ||
1272 : | $NumberOfMatches++; | ||
1273 : | } | ||
1274 : | } | ||
1275 : | chenry | 1.5 | |
1276 : | chenry | 1.1 | return ($NewArray,$NumberOfMatches); |
1277 : | } | ||
1278 : | |||
1279 : | sub PutArrayInHash { | ||
1280 : | chenry | 1.6 | my (@ArrayRef) = @_; |
1281 : | chenry | 1.5 | |
1282 : | chenry | 1.1 | my $HashRef; |
1283 : | chenry | 1.6 | for (my $i=0; $i < @ArrayRef; $i++) { |
1284 : | $HashRef->{$ArrayRef[$i]} = $i; | ||
1285 : | chenry | 1.1 | } |
1286 : | chenry | 1.5 | |
1287 : | chenry | 1.1 | return $HashRef; |
1288 : | } | ||
1289 : | |||
1290 : | chenry | 1.4 | sub RefineOrganismName { |
1291 : | my ($Name) = @_; | ||
1292 : | chenry | 1.5 | |
1293 : | chenry | 1.4 | my @Temp = split(/\s/,$Name); |
1294 : | if (@Temp >= 2) { | ||
1295 : | $Name = substr(shift(@Temp),0,1).". ".lc(join(" ",@Temp)); | ||
1296 : | } | ||
1297 : | $Name =~ s/str\.\s//g; | ||
1298 : | my $Find = "subsp. ".$Temp[0]; | ||
1299 : | $Name =~ s/$Find//g; | ||
1300 : | chenry | 1.5 | |
1301 : | chenry | 1.4 | return $Name; |
1302 : | } | ||
1303 : | |||
1304 : | chenry | 1.6 | sub RemoveDuplicates { |
1305 : | my (@OriginalArray) = @_; | ||
1306 : | |||
1307 : | my %Hash; | ||
1308 : | foreach my $Element (@OriginalArray) { | ||
1309 : | $Hash{$Element} = 1; | ||
1310 : | } | ||
1311 : | @OriginalArray = sort(keys(%Hash)); | ||
1312 : | |||
1313 : | return @OriginalArray; | ||
1314 : | } | ||
1315 : | |||
1316 : | chenry | 1.1 | 1; |
MCS Webmaster | ViewVC Help |
Powered by ViewVC 1.0.3 |