[Bio] / Sprout / UpdateSaplingAnnotations.pl Repository:
ViewVC logotype

View of /Sprout/UpdateSaplingAnnotations.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (download) (as text) (annotate)
Wed Sep 21 21:05:15 2011 UTC (7 years, 4 months ago) by parrello
Branch: MAIN
CVS Tags: rast_rel_2014_0912, rast_rel_2014_0729, mgrast_release_3_1_2, rast_rel_2011_0928, mgrast_version_3_2, mgrast_dev_12152011, mgrast_dev_10262011, HEAD
Changes since 1.2: +106 -106 lines
Changes to support complexes.

#!/usr/bin/perl -w

# -*- perl -*-
#
# Copyright (c) 2003-2011 University of Chicago and Fellowship
# for Interpretations of Genomes. All Rights Reserved.
#
# This file is part of the SEED Toolkit.
#
# The SEED Toolkit is free software. You can redistribute
# it and/or modify it under the terms of the SEED Toolkit
# Public License.
#
# You should have received a copy of the SEED Toolkit Public License
# along with this program; if not write to the University of Chicago
# at info@ci.uchicago.edu or the Fellowship for Interpretation of
# Genomes at veronika@thefig.info or download a copy from
# http://www.theseed.org/LICENSE.TXT.
#

=head1 Update Sapling Annotations

This script takes as input an annotation file and applies the annotations to
the current Sapling database.

The single positional parameter is the name of the input file containing the
annotations.

The currently-supported command-line options are as follows.

=over 4

=item span

Maximum span of time in seconds for two annotations to be considered part of the 
same group. The default is C<30>.

=item user

Name suffix to be used for log files. If omitted, the PID is used.

=item trace

Numeric trace level. A higher trace level causes more messages to appear. The
default trace level is 2. Tracing will be directly to the standard output
as well as to a C<trace>I<User>C<.log> file in the FIG temporary directory,
where I<User> is the value of the B<user> option above.

=item sql

If specified, turns on tracing of SQL activity.

=item background

Save the standard and error output to files. The files will be created
in the FIG temporary directory and will be named C<err>I<User>C<.log> and
C<out>I<User>C<.log>, respectively, where I<User> is the value of the
B<user> option above.

=item h

Display this command's parameters and options.

=item dbname

Name of the Sapling database to use. This option is generally only useful for debugging.

=item dbhost

SQL host for the Sapling database to use. This option is generally only useful for debugging.

=item dbport

Database port to use for the Sapling database. This option is generally only useful for debugging.


=back

=cut

    use strict;
    use Tracer;
    use SaplingFunctionLoader;
    use AnnotationGroup;
    use Sapling;
    use Stats;
    use FIG;
    use ERDBTypeText;
    
    # Parse the command line.
    my ($options, @parameters) = StandardSetup([qw(SaplingFunctionLoader SaplingDataLoader)],
        {span => [30, "maximum time span of an annotation group"],
         dbname => [$FIG_Config::saplingDB, "name of the Sapling database to use"],
         dbhost => ["", "host containing the Sapling database"],
         dbport => ["", "port for connecting to the Sapling database"],
        },
        "<annotationFile>", @ARGV);
    # Create the statistics object.
    my $stats = Stats->new();
    # Get the Sapling database.
    my $sap = Sapling->new(dbName => $options->{dbname}, dbhost => $options->{dbhost},
        port => $options->{dbport});
    # Get the function loader object.
    my $loader = SaplingFunctionLoader->new($sap);
    # Get the input file.
    Trace("Reading annotations from $parameters[0].") if T(2);
    my $ih = Open(undef, "<$parameters[0]");
    # Create the first annotation group.
    my $group = AnnotationGroup->new();
    $group->Add(AnnotationGroup::Read($ih));
    $stats->Add(annotations => 1);
    # Loop through the annotation file.
    while (! eof $ih) {
        # Read the next annotation.
        my ($fid, $time, $user, $data) = AnnotationGroup::Read($ih);
        $stats->Add(annotations => 1);
        # Is it a member of the current group?
        if ($group->fid ne $fid || $time - $group->time0 > $options->{span}) {
            # No. Process the old group.
            ProcessGroup($group, $stats, $sap, $loader);
            # Start a new group.
            $group = AnnotationGroup->new();
        }
        # Add this annotation to the group.
        $group->Add($fid, $time, $user, $data);
    }
    Trace("Statistics for this run:\n" . $stats->Show());
    
## ProcessGroup
#
# Process an annotation group. If it is an assignment and the assignment has
# already been made, we discard it. If it is an assignment and the assignment
# has not already been made, we make the assignment and add the annotations.
# Otherwise, we add the annotations without preamble.
#
sub ProcessGroup {
    # Get the parameters.
    my ($group, $stats, $sap, $loader) = @_;
    # We need this to encode annotations for the duplicate-check filter.
    my $encoder = ERDBTypeText->new();
    # Get the feature ID and time.
    my $fid = $group->fid;
    my $time0 = $group->time0;
    Trace("Processing annotation group at $time0 for $fid.") if T(2);
    # We'll turn off this flag if we don't want to apply the annotations.
    my $annotate = 1;
    # Is there an assignment?
    my $assignment = $group->assignment;
    if (defined $assignment) {
        # Yes. Check to see if it's already in place.
        my ($current) = $sap->GetEntityValues(Feature => $fid, ['function']);
        if ($current eq $assignment) {
            # It is, so skip this annotation.
            $annotate = 0;
            $stats->Add(groupSkipped => 1);
        } else {
            # It isn't, so make the assignment.
            $loader->UpdateFunction($fid, $assignment);
            $stats->Add(newAssignment => 1);
        }
    }
    # Do we want to annotate with this group?
    if ($annotate) {
        # Yes. Get the number of annotations.
        my $count = $group->count;
        # Loop through them, checking for duplicates.
        my $idx;
        my @keeping;
        for ($idx = 0; $idx < $count; $idx++) {
            my ($fid, $time, $user, $data) = $group->annotation($idx);
            my ($id) = $sap->GetFlat("Annotation",
                "Annotation(id) LIKE ? AND Annotation(annotation-time) = ? AND Annotation(comment) = ?",
                ["$fid:%", $time, $encoder->encode($data)], 'id');
            if (! defined $id) {
                # This is not a duplicate, so save its array index.
                push @keeping, $idx;
            } else {
                # This is a duplicate.
                $stats->Add(annotationsDup => 1);
            }
        }
        # Did we find any non-duplicates?
        if (@keeping) {
            # Yes. First, annotate the fact we're doing this.
            $loader->MakeAnnotation($fid, scalar(@keeping) . " annotations imported to Sapling from PUBSEED.",
                "sapling_updater", $group->time0);
            $stats->Add(annotationWrappers => 1);
            # Now loop through the annotations in order, applying them.
            for $idx (@keeping) {
                my ($fid, $time, $user, $data) = $group->annotation($idx);
                $loader->MakeAnnotation($fid, $data, $user, $time);
                $stats->Add(annotationsMade => 1);
            }
        }
    }
    # Record this group.
    $stats->Add(annotationGroups => 1);
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3