[Bio] / FigKernelScripts / process_new_sims.pl Repository:
ViewVC logotype

Annotation of /FigKernelScripts/process_new_sims.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.5 - (view) (download) (as text)

1 : olson 1.4 #
2 :     # Copyright (c) 2003-2006 University of Chicago and Fellowship
3 :     # for Interpretations of Genomes. All Rights Reserved.
4 :     #
5 :     # This file is part of the SEED Toolkit.
6 :     #
7 :     # The SEED Toolkit is free software. You can redistribute
8 :     # it and/or modify it under the terms of the SEED Toolkit
9 :     # Public License.
10 :     #
11 :     # You should have received a copy of the SEED Toolkit Public License
12 :     # along with this program; if not write to the University of Chicago
13 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
14 :     # Genomes at veronika@thefig.info or download a copy from
15 :     # http://www.theseed.org/LICENSE.TXT.
16 :     #
17 :    
18 : efrank 1.1 use FIG;
19 :    
20 : olson 1.5 $usage = "usage: process_new_sims [-timelogic] NR peg.synonyms SimsDir Prefix EnhancedSimsDir";
21 :    
22 :     my $timelogic;
23 :     while ($ARGV[0] =~ /^-/)
24 :     {
25 :     $opt = shift;
26 :     if ($opt eq '-timelogic')
27 :     {
28 :     $timelogic++;
29 :     }
30 :     else
31 :     {
32 :     die "Invalid option $opt\n";
33 :     }
34 :     }
35 : efrank 1.1
36 : overbeek 1.2 ( ($nr = shift @ARGV) && (-s $nr)
37 :     && ($syn = shift @ARGV) && (-s $syn)
38 :     && ($from_dir = shift @ARGV)
39 :     && ($prefix = shift @ARGV)
40 :     && ($to_dir = shift @ARGV)
41 :     ) || die $usage;
42 : efrank 1.1
43 : olson 1.3 #
44 :     # If fromdir == "-", read a list of directories from stdin to process.
45 :     #
46 :    
47 :     my @dir_list;
48 :     if ($from_dir eq '-')
49 :     {
50 :     while (<>)
51 :     {
52 :     chomp;
53 :     s/^\s*//;
54 :     s/\s*$//;
55 : olson 1.5
56 :     my @l = split(/\s+/, $_);
57 :     for my $d (@l)
58 :     {
59 :     -d $d or die "Sims directory $d does not exist\n";
60 :     push(@dir_list, $d);
61 :     }
62 : olson 1.3 }
63 :     }
64 :     else
65 :     {
66 :     push(@dir_list, $from_dir);
67 :     }
68 :    
69 :     my @sim_files;
70 :     for my $dir (@dir_list)
71 :     {
72 :     opendir(FROM,$dir) || die "could not open $dir";
73 :     my @files = grep { $_ !~ /^\./ } readdir(FROM);
74 :     @files = sort { $a =~ /\.(\d+)(\.gz)?$/; $x = $1;
75 : overbeek 1.2 $b =~ /\.(\d+)(\.gz)?$/; $y = $1; ($x <=> $y) }
76 : olson 1.3 @files;
77 :     push(@sim_files, map { "$dir/$_" } @files);
78 :     closedir(FROM);
79 :     }
80 :    
81 :     @to_process = @sim_files;
82 : efrank 1.1
83 : olson 1.3 printf "Processing %d files\n", int(@to_process);
84 : efrank 1.1 (-d $to_dir) || mkdir($to_dir,0777) || die "could not make $to_dir";
85 :    
86 : olson 1.5 my $pipeline;
87 :     if ($timelogic)
88 :     {
89 :     $pipeline = "reformat_timelogic_sims | ";
90 :     }
91 :    
92 :     $pipeline .= "reduce_sims $syn 300 | reformat_sims $nr | split_sims $to_dir $prefix";
93 :    
94 :     open(OUT, "| $pipeline")
95 : overbeek 1.2 || die "could not open output pipeline";
96 :    
97 : efrank 1.1 foreach $file (@to_process)
98 :     {
99 :     print STDERR "processing $file\n";
100 : overbeek 1.2 # my $base = ($file =~ /^(\S+).gz$/) ? $1 : $file;
101 :    
102 :     if ($file =~ /^\S+\.gz$/)
103 : efrank 1.1 {
104 : olson 1.3 open(IN, "zcat $file |") || die "Could not pipe-open $from_dir/$file";
105 : efrank 1.1 }
106 :     else
107 :     {
108 : olson 1.3 open(IN, "<$file") || die "Could not read-open $from_dir/$file";
109 : efrank 1.1 }
110 : overbeek 1.2
111 :     while (defined($entry = <IN>)) { print OUT $entry; }
112 : efrank 1.1 }

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3