[Bio] / Sprout / UpdateSaplingAnnotations.pl Repository:
ViewVC logotype

Annotation of /Sprout/UpdateSaplingAnnotations.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (view) (download) (as text)

1 : parrello 1.1 #!/usr/bin/perl -w
2 :    
3 :     # -*- perl -*-
4 :     #
5 :     # Copyright (c) 2003-2011 University of Chicago and Fellowship
6 :     # for Interpretations of Genomes. All Rights Reserved.
7 :     #
8 :     # This file is part of the SEED Toolkit.
9 :     #
10 :     # The SEED Toolkit is free software. You can redistribute
11 :     # it and/or modify it under the terms of the SEED Toolkit
12 :     # Public License.
13 :     #
14 :     # You should have received a copy of the SEED Toolkit Public License
15 :     # along with this program; if not write to the University of Chicago
16 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
17 :     # Genomes at veronika@thefig.info or download a copy from
18 :     # http://www.theseed.org/LICENSE.TXT.
19 :     #
20 :    
21 :     =head1 Update Sapling Annotations
22 :    
23 :     This script takes as input an annotation file and applies the annotations to
24 :     the current Sapling database.
25 :    
26 :     The single positional parameter is the name of the input file containing the
27 :     annotations.
28 :    
29 :     The currently-supported command-line options are as follows.
30 :    
31 :     =over 4
32 :    
33 :     =item span
34 :    
35 :     Maximum span of time in seconds for two annotations to be considered part of the
36 :     same group. The default is C<30>.
37 :    
38 :     =item user
39 :    
40 :     Name suffix to be used for log files. If omitted, the PID is used.
41 :    
42 :     =item trace
43 :    
44 :     Numeric trace level. A higher trace level causes more messages to appear. The
45 :     default trace level is 2. Tracing will be directly to the standard output
46 :     as well as to a C<trace>I<User>C<.log> file in the FIG temporary directory,
47 :     where I<User> is the value of the B<user> option above.
48 :    
49 :     =item sql
50 :    
51 :     If specified, turns on tracing of SQL activity.
52 :    
53 :     =item background
54 :    
55 :     Save the standard and error output to files. The files will be created
56 :     in the FIG temporary directory and will be named C<err>I<User>C<.log> and
57 :     C<out>I<User>C<.log>, respectively, where I<User> is the value of the
58 :     B<user> option above.
59 :    
60 :     =item h
61 :    
62 :     Display this command's parameters and options.
63 :    
64 :     =item dbname
65 :    
66 :     Name of the Sapling database to use. This option is generally only useful for debugging.
67 :    
68 :     =item dbhost
69 :    
70 :     SQL host for the Sapling database to use. This option is generally only useful for debugging.
71 :    
72 :     =item dbport
73 :    
74 :     Database port to use for the Sapling database. This option is generally only useful for debugging.
75 :    
76 :    
77 :     =back
78 :    
79 :     =cut
80 :    
81 : parrello 1.3 use strict;
82 :     use Tracer;
83 :     use SaplingFunctionLoader;
84 :     use AnnotationGroup;
85 :     use Sapling;
86 :     use Stats;
87 :     use FIG;
88 :     use ERDBTypeText;
89 :    
90 :     # Parse the command line.
91 :     my ($options, @parameters) = StandardSetup([qw(SaplingFunctionLoader SaplingDataLoader)],
92 :     {span => [30, "maximum time span of an annotation group"],
93 : parrello 1.1 dbname => [$FIG_Config::saplingDB, "name of the Sapling database to use"],
94 :     dbhost => ["", "host containing the Sapling database"],
95 :     dbport => ["", "port for connecting to the Sapling database"],
96 : parrello 1.3 },
97 :     "<annotationFile>", @ARGV);
98 :     # Create the statistics object.
99 :     my $stats = Stats->new();
100 :     # Get the Sapling database.
101 :     my $sap = Sapling->new(dbName => $options->{dbname}, dbhost => $options->{dbhost},
102 :     port => $options->{dbport});
103 :     # Get the function loader object.
104 :     my $loader = SaplingFunctionLoader->new($sap);
105 :     # Get the input file.
106 :     Trace("Reading annotations from $parameters[0].") if T(2);
107 :     my $ih = Open(undef, "<$parameters[0]");
108 :     # Create the first annotation group.
109 :     my $group = AnnotationGroup->new();
110 :     $group->Add(AnnotationGroup::Read($ih));
111 :     $stats->Add(annotations => 1);
112 :     # Loop through the annotation file.
113 :     while (! eof $ih) {
114 :     # Read the next annotation.
115 :     my ($fid, $time, $user, $data) = AnnotationGroup::Read($ih);
116 :     $stats->Add(annotations => 1);
117 :     # Is it a member of the current group?
118 :     if ($group->fid ne $fid || $time - $group->time0 > $options->{span}) {
119 :     # No. Process the old group.
120 :     ProcessGroup($group, $stats, $sap, $loader);
121 :     # Start a new group.
122 :     $group = AnnotationGroup->new();
123 :     }
124 :     # Add this annotation to the group.
125 :     $group->Add($fid, $time, $user, $data);
126 :     }
127 :     Trace("Statistics for this run:\n" . $stats->Show());
128 :    
129 : parrello 1.1 ## ProcessGroup
130 :     #
131 :     # Process an annotation group. If it is an assignment and the assignment has
132 :     # already been made, we discard it. If it is an assignment and the assignment
133 :     # has not already been made, we make the assignment and add the annotations.
134 :     # Otherwise, we add the annotations without preamble.
135 :     #
136 :     sub ProcessGroup {
137 : parrello 1.3 # Get the parameters.
138 :     my ($group, $stats, $sap, $loader) = @_;
139 :     # We need this to encode annotations for the duplicate-check filter.
140 :     my $encoder = ERDBTypeText->new();
141 :     # Get the feature ID and time.
142 :     my $fid = $group->fid;
143 :     my $time0 = $group->time0;
144 :     Trace("Processing annotation group at $time0 for $fid.") if T(2);
145 :     # We'll turn off this flag if we don't want to apply the annotations.
146 :     my $annotate = 1;
147 :     # Is there an assignment?
148 :     my $assignment = $group->assignment;
149 :     if (defined $assignment) {
150 :     # Yes. Check to see if it's already in place.
151 :     my ($current) = $sap->GetEntityValues(Feature => $fid, ['function']);
152 :     if ($current eq $assignment) {
153 :     # It is, so skip this annotation.
154 :     $annotate = 0;
155 :     $stats->Add(groupSkipped => 1);
156 :     } else {
157 :     # It isn't, so make the assignment.
158 :     $loader->UpdateFunction($fid, $assignment);
159 :     $stats->Add(newAssignment => 1);
160 :     }
161 :     }
162 :     # Do we want to annotate with this group?
163 :     if ($annotate) {
164 :     # Yes. Get the number of annotations.
165 :     my $count = $group->count;
166 :     # Loop through them, checking for duplicates.
167 :     my $idx;
168 :     my @keeping;
169 :     for ($idx = 0; $idx < $count; $idx++) {
170 :     my ($fid, $time, $user, $data) = $group->annotation($idx);
171 :     my ($id) = $sap->GetFlat("Annotation",
172 :     "Annotation(id) LIKE ? AND Annotation(annotation-time) = ? AND Annotation(comment) = ?",
173 :     ["$fid:%", $time, $encoder->encode($data)], 'id');
174 :     if (! defined $id) {
175 :     # This is not a duplicate, so save its array index.
176 :     push @keeping, $idx;
177 :     } else {
178 :     # This is a duplicate.
179 :     $stats->Add(annotationsDup => 1);
180 :     }
181 :     }
182 :     # Did we find any non-duplicates?
183 :     if (@keeping) {
184 :     # Yes. First, annotate the fact we're doing this.
185 :     $loader->MakeAnnotation($fid, scalar(@keeping) . " annotations imported to Sapling from PUBSEED.",
186 :     "sapling_updater", $group->time0);
187 :     $stats->Add(annotationWrappers => 1);
188 :     # Now loop through the annotations in order, applying them.
189 :     for $idx (@keeping) {
190 :     my ($fid, $time, $user, $data) = $group->annotation($idx);
191 :     $loader->MakeAnnotation($fid, $data, $user, $time);
192 :     $stats->Add(annotationsMade => 1);
193 :     }
194 :     }
195 :     }
196 :     # Record this group.
197 :     $stats->Add(annotationGroups => 1);
198 : parrello 1.1 }

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3