[Bio] / Sprout / CorePegs.pl Repository:
ViewVC logotype

Annotation of /Sprout/CorePegs.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (view) (download) (as text)

1 : parrello 1.1 #!/usr/bin/perl -w
2 :    
3 :     #
4 :     # Copyright (c) 2003-2006 University of Chicago and Fellowship
5 :     # for Interpretations of Genomes. All Rights Reserved.
6 :     #
7 :     # This file is part of the SEED Toolkit.
8 :     #
9 :     # The SEED Toolkit is free software. You can redistribute
10 :     # it and/or modify it under the terms of the SEED Toolkit
11 :     # Public License.
12 :     #
13 :     # You should have received a copy of the SEED Toolkit Public License
14 :     # along with this program; if not write to the University of Chicago
15 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
16 :     # Genomes at veronika@thefig.info or download a copy from
17 :     # http://www.theseed.org/LICENSE.TXT.
18 :     #
19 :    
20 :     =head1 Core Peg List
21 :    
22 :     This is a simple script that creates a tab-delimited list of all the
23 : parrello 1.2 features for the selected NMPDR organisms. The single positional
24 :     parameter is the name of the output file.
25 : parrello 1.1
26 :     The currently-supported command-line options are as follows.
27 :    
28 :     =over 4
29 :    
30 : parrello 1.2 =item orgs
31 :    
32 :     Organisms whose features are desired. If C<all>, then all
33 :     organisms will be listed. If C<nmpdr>, then all organisms in
34 :     NMPDR groups will be listed. If C<core>, then only the organisms
35 :     in the core NMPDR groups will be listed. The default is C<core>.
36 :    
37 : parrello 1.1 =item user
38 :    
39 :     Name suffix to be used for log files. If omitted, the PID is used.
40 :    
41 :     =item trace
42 :    
43 :     Numeric trace level. A higher trace level causes more messages to appear. The
44 :     default trace level is 2. Tracing will be directly to the standard output
45 :     as well as to a C<trace>I<User>C<.log> file in the FIG temporary directory,
46 :     where I<User> is the value of the B<user> option above.
47 :    
48 :     =item sql
49 :    
50 :     If specified, turns on tracing of SQL activity.
51 :    
52 :     =item background
53 :    
54 :     Save the standard and error output to files. The files will be created
55 :     in the FIG temporary directory and will be named C<err>I<User>C<.log> and
56 :     C<out>I<User>C<.log>, respectively, where I<User> is the value of the
57 :     B<user> option above.
58 :    
59 :     =item h
60 :    
61 :     Display this command's parameters and options.
62 :    
63 :     =item phone
64 :    
65 :     Phone number to message when the script is complete.
66 :    
67 :     =item filter
68 :    
69 :     Type of filtering to apply. If C<pegs>, only true PEGs will be included. If C<essential>,
70 :     only essential genes will be included. Otherwise, all genes will be included.
71 : parrello 1.2
72 : parrello 1.1
73 :     =item
74 :    
75 :     =back
76 :    
77 :     =cut
78 :    
79 :     use strict;
80 :     use Tracer;
81 :     use DocUtils;
82 :     use TestUtils;
83 :     use Sprout;
84 :     use SFXlate;
85 :    
86 :     # Get the command-line options and parameters.
87 :     my ($options, @parameters) = StandardSetup([qw(Sprout) ],
88 :     {
89 : parrello 1.2 orgs => ["core", "type of organisms (core, nmpdr, all)"],
90 : parrello 1.1 filter => ["", "filtering type: pegs or essential"],
91 :     phone => ["", "phone number (international format) to call when load finishes"],
92 :     },
93 :     "<fileName>",
94 :     @ARGV);
95 :     # Set a variable to contain return type information.
96 :     my $rtype;
97 :     # Insure we catch errors.
98 :     eval {
99 :     # Get the Sprout object.
100 :     my $sprout = SFXlate->new_sprout_only();
101 : parrello 1.2 # Now we just output the list to the standard output.
102 :     # Next, we need to determine the genomes of interest. This
103 :     # is determined by the "orgs" option.
104 :     my @genomes = $sprout->CoreGenomes($options->{orgs});
105 :     # If no genomes are found, it's an error.
106 :     my $genomes = scalar @genomes;
107 :     if (! $genomes) {
108 :     Confess("No genomes found for orgs option \"$options->{org}\".");
109 : parrello 1.1 } else {
110 : parrello 1.2 Trace("$genomes genomes will be processed.") if T(2);
111 :     # Check for a file name.
112 :     if (! $parameters[0]) {
113 :     Confess("No output file specified.");
114 :     } else {
115 :     # A file was specified, so we open it.
116 :     my $oh = Open(undef, ">$parameters[0]");
117 :     Trace("Output will be to $parameters[0].") if T(2);
118 :     # We need to compute the filter clause, the parameters, and the
119 :     # result columns. The base filter is by genome ID (which is the
120 :     # first parameter). The base result column list is the
121 :     # feature ID and assignment. Additional filtering and stuff could be
122 :     # required by the filter option.
123 :     my $filter = "HasFeature(from-link) = ?";
124 :     my @parms = ('genomeID');
125 :     my @cols = ('Feature(id)', 'Feature(assignment)');
126 :     if ($options->{filter} eq 'pegs') {
127 :     # Here we filter by feature type to get PEGs only.
128 :     $filter .= ' AND Feature(type) eq ?';
129 :     push @parms, 'peg';
130 :     Trace("Filtering for PEGs.") if T(2);
131 :     } elsif ($options->{filter} eq 'essential') {
132 :     # Here we filter by the essentiality column.
133 :     $filter .= ' AND Feature(essential) IS NOT NULL';
134 :     push @cols, 'Feature(essential)';
135 :     Trace("Filtering for essential genes.") if T(2);
136 :     } elsif ($options->{filter}) {
137 :     # Here the filter type is invalid.
138 :     Confess("Unknown filter type \"$options->{filter}\".");
139 :     }
140 :     # Set up a counter.
141 :     my $totalCount = 0;
142 :     # Loop through the organisms.
143 :     for my $genome (sort @genomes) {
144 :     Trace("Processing $genome.") if T(3);
145 :     # Store the genome ID in the parms.
146 :     $parms[0] = $genome;
147 :     # Get this organism's features according to the filter.
148 :     my $query = $sprout->Get(['HasFeature', 'Feature'], $filter, \@parms);
149 :     # Set up a counter.
150 :     my $genomeCount = 0;
151 :     # Write them to the output file.
152 :     while (my $result = $query->Fetch()) {
153 :     my @fields = $result->Values(\@cols);
154 :     Tracer::PutLine($oh, \@fields);
155 :     $genomeCount++;
156 :     }
157 :     # Update the counts.
158 :     Trace("$genomeCount features found for $genome.") if T(3);
159 :     $totalCount += $genomeCount;
160 :     }
161 :     Trace("$totalCount features output.") if T(2);
162 :     # Close the output file.
163 :     close $oh;
164 : parrello 1.1 }
165 :     }
166 :     };
167 :     if ($@) {
168 :     Trace("Script failed with error: $@") if T(0);
169 :     $rtype = "error";
170 :     } else {
171 :     Trace("Script complete.") if T(2);
172 :     $rtype = "no error";
173 :     }
174 :     if ($options->{phone}) {
175 :     my $msgID = Tracer::SendSMS($options->{phone}, "Core Peg List terminated with $rtype.");
176 :     if ($msgID) {
177 :     Trace("Phone message sent with ID $msgID.") if T(2);
178 :     } else {
179 :     Trace("Phone message not sent.") if T(2);
180 :     }
181 :     }
182 :    
183 :     1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3