[Bio] / Sprout / FamilySaplingLoader.pm Repository:
ViewVC logotype

View of /Sprout/FamilySaplingLoader.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (download) (as text) (annotate)
Thu Apr 2 01:40:32 2009 UTC (10 years, 5 months ago) by parrello
Branch: MAIN
CVS Tags: rast_rel_2009_05_18
Changes since 1.1: +1 -1 lines
Fixed to clean subsystem objects.

#!/usr/bin/perl -w

#
# Copyright (c) 2003-2006 University of Chicago and Fellowship
# for Interpretations of Genomes. All Rights Reserved.
#
# This file is part of the SEED Toolkit.
#
# The SEED Toolkit is free software. You can redistribute
# it and/or modify it under the terms of the SEED Toolkit
# Public License.
#
# You should have received a copy of the SEED Toolkit Public License
# along with this program; if not write to the University of Chicago
# at info@ci.uchicago.edu or the Fellowship for Interpretation of
# Genomes at veronika@thefig.info or download a copy from
# http://www.theseed.org/LICENSE.TXT.
#

package FamilySaplingLoader;

    use strict;
    use Tracer;
    use ERDB;
    use FFs;
    use base 'BaseSaplingLoader';

=head1 Sapling Family Load Group Class

=head2 Introduction

The Family Load Group includes all of the major family and pairing tables.

=head3 new

    my $sl = FamilySaplingLoader->new($erdb, $options);

Construct a new FamilySaplingLoader object.

=over 4

=item erdb

[[SaplingPm]] object for the database being loaded.

=item options

Reference to a hash of command-line options.

=back

=cut

sub new {
    # Get the parameters.
    my ($class, $erdb, $options) = @_;
    # Create the table list.
    my @tables = sort qw(Family HasMember IsInPair Pairing IsDeterminedBy
                         PairSet OccursIn Cluster FamilyName Annotation
                         IsAnnotatedBy);
    # Create the BaseSaplingLoader object.
    my $retVal = BaseSaplingLoader::new($class, $erdb, $options, @tables);
    # Return it.
    return $retVal;
}

=head2 Public Methods

=head3 Generate

    $sl->Generate();

Generate the data for the family and pairing files.

=cut

sub Generate {
    # Get the parameters.
    my ($self) = @_;
    # Get the database object.
    my $erdb = $self->db();
    # Get the source object.
    my $fig = $self->source();
    # Is this the global section?
    if ($self->global()) {
        # Here we load the coupling data. The coupling data is stored in flat files
        # in a Sapling data subdirectory.
        my $couplingDir = $erdb->LoadDirectory() . '/FamilyData/Sapling';
        $self->LoadFromFile(Pairing => "$couplingDir/Pairing", qw(id));
        $self->LoadFromFile(Cluster => "$couplingDir/Cluster", qw(id));
        $self->LoadFromFile(IsDeterminedBy => "$couplingDir/IsDeterminedBy",
                            qw(from-link inverted to-link));
        $self->LoadFromFile(IsInPair => "$couplingDir/IsPairOf",
                            qw(from-link to-link));
        $self->LoadFromFile(OccursIn => "$couplingDir/OccursInCluster",
                            qw(to-link from-link));
        $self->LoadFromFile(PairSet => "$couplingDir/PairSet",
                            qw(id score));
    } else {
        # Get the section ID.
        my $genomeID = $self->section();
        # Here we load the FIGfams for the selected genome. First we need
        # access to the FIGfam data.
        my $figfam_data = &FIG::get_figfams_data();
        my $ffs = new FFs($figfam_data, $fig);
        # Get this genome's features.
        my @fidList = $fig->all_features($genomeID);
        # Loop through the features, generating their FIGfam data.
        for my $featureID (@fidList) {
            $self->Track(Features => $featureID, 1000);
            # Ask for the figfams.
            my @fams = $ffs->families_containing_peg($featureID);
            # Connect them to the feature (if any).
            for my $fam (@fams) {
                $self->PutE(Family => $fam);
                $self->PutR(HasMember => $fam, $featureID);
            }
        }
        # Now we process the annotations for the specified genome.
        # Get the current time.
        my $time = time();
        # Create a hash of timestamps. We use this to prevent duplicate time stamps
        # from showing up for a single PEG's annotations.
        my %seenTimestamps = ();
        # Get the genome's annotations.
        my @annotations = $fig->read_all_annotations($genomeID);
        Trace("Processing annotations.") if T(2);
        for my $tuple (@annotations) {
            # Get the annotation tuple.
            my ($peg, $timestamp, $user, $text) = @{$tuple};
            $self->Track(Annotations => "$peg:$timestamp", 1000);
            # Change assignments by the master user to FIG assignments.
            $text =~ s/Set master function/Set FIG function/s;
            # Insure the time stamp is valid.
            if ($timestamp =~ /^\d+$/) {
                # Here it's a number. We need to insure the one we use to form
                # the key is unique.
                my $keyStamp = $timestamp;
                while ($seenTimestamps{"$peg:$keyStamp"}) {
                    $keyStamp++;
                }
                my $annotationID = "$peg:" . Tracer::Pad(9999999999 - $keyStamp, 10,
                                                         1, "0");
                $seenTimestamps{"$peg:$keyStamp"} = 1;
                # Generate the annotation.
                $self->PutE(Annotation => $annotationID, annotation_time => $timestamp,
                            comment => $text, annotator => $user);
                $self->PutR(IsAnnotatedBy => $peg, $annotationID);
            } else {
                # Here we have an invalid time stamp.
                Trace("Invalid time stamp \"$timestamp\" in annotations for $peg.") if T(1);
            }
        }
    }
}


1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3