[Bio] / FigKernelScripts / process_new_sims.pl Repository:
ViewVC logotype

Annotation of /FigKernelScripts/process_new_sims.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.4 - (view) (download) (as text)

1 : olson 1.4 #
2 :     # Copyright (c) 2003-2006 University of Chicago and Fellowship
3 :     # for Interpretations of Genomes. All Rights Reserved.
4 :     #
5 :     # This file is part of the SEED Toolkit.
6 :     #
7 :     # The SEED Toolkit is free software. You can redistribute
8 :     # it and/or modify it under the terms of the SEED Toolkit
9 :     # Public License.
10 :     #
11 :     # You should have received a copy of the SEED Toolkit Public License
12 :     # along with this program; if not write to the University of Chicago
13 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
14 :     # Genomes at veronika@thefig.info or download a copy from
15 :     # http://www.theseed.org/LICENSE.TXT.
16 :     #
17 :    
18 : efrank 1.1 use FIG;
19 :    
20 : overbeek 1.2 $usage = "usage: process_new_sims NR peg.synonyms SimsDir Prefix EnhancedSimsDir";
21 : efrank 1.1
22 : overbeek 1.2 ( ($nr = shift @ARGV) && (-s $nr)
23 :     && ($syn = shift @ARGV) && (-s $syn)
24 :     && ($from_dir = shift @ARGV)
25 :     && ($prefix = shift @ARGV)
26 :     && ($to_dir = shift @ARGV)
27 :     ) || die $usage;
28 : efrank 1.1
29 : olson 1.3 #
30 :     # If fromdir == "-", read a list of directories from stdin to process.
31 :     #
32 :    
33 :     my @dir_list;
34 :     if ($from_dir eq '-')
35 :     {
36 :     while (<>)
37 :     {
38 :     chomp;
39 :     s/^\s*//;
40 :     s/\s*$//;
41 :     -d $_ or die "Sims directory $_ does not exist\n";
42 :     push(@dir_list, $_);
43 :     }
44 :     }
45 :     else
46 :     {
47 :     push(@dir_list, $from_dir);
48 :     }
49 :    
50 :     my @sim_files;
51 :     for my $dir (@dir_list)
52 :     {
53 :     opendir(FROM,$dir) || die "could not open $dir";
54 :     my @files = grep { $_ !~ /^\./ } readdir(FROM);
55 :     @files = sort { $a =~ /\.(\d+)(\.gz)?$/; $x = $1;
56 : overbeek 1.2 $b =~ /\.(\d+)(\.gz)?$/; $y = $1; ($x <=> $y) }
57 : olson 1.3 @files;
58 :     push(@sim_files, map { "$dir/$_" } @files);
59 :     closedir(FROM);
60 :     }
61 :    
62 :     @to_process = @sim_files;
63 : efrank 1.1
64 : olson 1.3 printf "Processing %d files\n", int(@to_process);
65 : efrank 1.1 (-d $to_dir) || mkdir($to_dir,0777) || die "could not make $to_dir";
66 :    
67 : overbeek 1.2 open(OUT, "| reduce_sims $syn 300 | reformat_sims $nr | split_sims $to_dir $prefix")
68 :     || die "could not open output pipeline";
69 :    
70 : efrank 1.1 foreach $file (@to_process)
71 :     {
72 :     print STDERR "processing $file\n";
73 : overbeek 1.2 # my $base = ($file =~ /^(\S+).gz$/) ? $1 : $file;
74 :    
75 :     if ($file =~ /^\S+\.gz$/)
76 : efrank 1.1 {
77 : olson 1.3 open(IN, "zcat $file |") || die "Could not pipe-open $from_dir/$file";
78 : efrank 1.1 }
79 :     else
80 :     {
81 : olson 1.3 open(IN, "<$file") || die "Could not read-open $from_dir/$file";
82 : efrank 1.1 }
83 : overbeek 1.2
84 :     while (defined($entry = <IN>)) { print OUT $entry; }
85 : efrank 1.1 }

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3