[Bio] / Sprout / FamilySaplingLoader.pm Repository:
ViewVC logotype

Annotation of /Sprout/FamilySaplingLoader.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.13 - (view) (download) (as text)

1 : parrello 1.1 #!/usr/bin/perl -w
2 :    
3 :     #
4 :     # Copyright (c) 2003-2006 University of Chicago and Fellowship
5 :     # for Interpretations of Genomes. All Rights Reserved.
6 :     #
7 :     # This file is part of the SEED Toolkit.
8 :     #
9 :     # The SEED Toolkit is free software. You can redistribute
10 :     # it and/or modify it under the terms of the SEED Toolkit
11 :     # Public License.
12 :     #
13 :     # You should have received a copy of the SEED Toolkit Public License
14 :     # along with this program; if not write to the University of Chicago
15 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
16 :     # Genomes at veronika@thefig.info or download a copy from
17 :     # http://www.theseed.org/LICENSE.TXT.
18 :     #
19 :    
20 :     package FamilySaplingLoader;
21 :    
22 :     use strict;
23 :     use Tracer;
24 :     use ERDB;
25 :     use FFs;
26 : parrello 1.3 use FF;
27 : parrello 1.7 use SeedUtils;
28 :     use LoaderUtils;
29 : parrello 1.1 use base 'BaseSaplingLoader';
30 :    
31 :     =head1 Sapling Family Load Group Class
32 :    
33 :     =head2 Introduction
34 :    
35 :     The Family Load Group includes all of the major family and pairing tables.
36 :    
37 :     =head3 new
38 :    
39 :     my $sl = FamilySaplingLoader->new($erdb, $options);
40 :    
41 :     Construct a new FamilySaplingLoader object.
42 :    
43 :     =over 4
44 :    
45 :     =item erdb
46 :    
47 : parrello 1.4 L<Sapling> object for the database being loaded.
48 : parrello 1.1
49 :     =item options
50 :    
51 :     Reference to a hash of command-line options.
52 :    
53 :     =back
54 :    
55 :     =cut
56 :    
57 :     sub new {
58 :     # Get the parameters.
59 :     my ($class, $erdb, $options) = @_;
60 :     # Create the table list.
61 :     my @tables = sort qw(Family HasMember IsInPair Pairing IsDeterminedBy
62 : parrello 1.9 PairSet OccursIn Cluster FamilyName IsFamilyFor
63 : parrello 1.10 HasRepresentativeOf IsCoupledTo);
64 : parrello 1.1 # Create the BaseSaplingLoader object.
65 :     my $retVal = BaseSaplingLoader::new($class, $erdb, $options, @tables);
66 :     # Return it.
67 :     return $retVal;
68 :     }
69 :    
70 :     =head2 Public Methods
71 :    
72 :     =head3 Generate
73 :    
74 :     $sl->Generate();
75 :    
76 :     Generate the data for the family and pairing files.
77 :    
78 :     =cut
79 :    
80 :     sub Generate {
81 :     # Get the parameters.
82 :     my ($self) = @_;
83 :     # Get the database object.
84 :     my $erdb = $self->db();
85 :     # Get the source object.
86 :     my $fig = $self->source();
87 :     # Is this the global section?
88 :     if ($self->global()) {
89 :     # Here we load the coupling data. The coupling data is stored in flat files
90 :     # in a Sapling data subdirectory.
91 :     my $couplingDir = $erdb->LoadDirectory() . '/FamilyData/Sapling';
92 : parrello 1.5 $self->LoadFromFile(Pairing => "$couplingDir/Pairing.dtx", qw(id));
93 :     $self->LoadFromFile(Cluster => "$couplingDir/Cluster.dtx", qw(id));
94 :     $self->LoadFromFile(IsDeterminedBy => "$couplingDir/IsDeterminedBy.dtx",
95 : parrello 1.6 qw(from-link to-link inverted));
96 :     $self->LoadFromFile(IsInPair => "$couplingDir/IsInPair.dtx",
97 :     qw(from-link to-link));
98 :     $self->LoadFromFile(OccursIn => "$couplingDir/OccursIn.dtx",
99 : parrello 1.1 qw(from-link to-link));
100 : parrello 1.5 $self->LoadFromFile(PairSet => "$couplingDir/PairSet.dtx",
101 : parrello 1.1 qw(id score));
102 : parrello 1.8 # The next step is to load all the FIGfam data. This data is found in
103 :     # the latest figfam-prod release directory.
104 : parrello 1.11 my @releases = sort { Tracer::Cmp($a, $b) } grep { $_ =~ /^Release\d+/ } OpenDir("/vol/figfam-prod");
105 : parrello 1.12 # Find the first valid FIGfam directory.
106 :     my $figFamDir;
107 :     for (my $i = $#releases; $i >= 0 && ! $figFamDir; $i--) {
108 :     my $testDir = "/vol/figfam-prod/$releases[$i]";
109 : parrello 1.13 if (-f "$testDir/coupling.values") {
110 : parrello 1.12 $figFamDir = $testDir;
111 :     }
112 :     }
113 :     if (! $figFamDir) {
114 :     Confess("No FIGfam directory found.");
115 : parrello 1.8 } else {
116 :     Trace("FIGfams will be loaded from $figFamDir.") if T(ERDBLoadGroup => 2);
117 : parrello 1.10 # We will keep the FIGfam IDs in here. We need them to filter the coupling
118 :     # file.
119 :     my %figFams;
120 : parrello 1.8 # Read the family functions.
121 :     my $ih = Open(undef, "<$figFamDir/family.functions");
122 :     while (! eof $ih) {
123 :     my ($fam, $function) = Tracer::GetLine($ih);
124 :     $self->Track(familyFunctionRecord => $fam, 1000);
125 :     # Output the family record.
126 :     $self->PutE(Family => $fam);
127 :     $self->PutE(FamilyName => $fam,
128 :     family_function => $function);
129 : parrello 1.10 # Remember that this is a valid family.
130 :     $figFams{$fam} = 1;
131 : parrello 1.8 # Connect the family to its roles.
132 :     my ($roles, $errors) = LoaderUtils::RolesForLoading($function);
133 :     if (! defined $roles) {
134 :     # Here the family function was suspicious.
135 :     $self->Add(suspiciousFamilyFunction => 1);
136 : parrello 1.3 } else {
137 : parrello 1.8 # Here we have a good function.
138 :     for my $role (@$roles) {
139 :     $self->Add(figfamRole => 1);
140 :     $self->PutR(IsFamilyFor => $fam, $role);
141 : parrello 1.7 }
142 : parrello 1.8 $self->Add(badFigfamRoles => $errors);
143 : parrello 1.1 }
144 :     }
145 : parrello 1.8 close $ih;
146 :     # Read the memberships.
147 :     $ih = Open(undef, "<$figFamDir/families.2c");
148 :     while (! eof $ih) {
149 :     my ($fam, $featureID) = Tracer::GetLine($ih);
150 :     $self->Track(familyFeatureRecord => "$fam:$featureID", 5000);
151 :     # Connect the family to the feature.
152 :     $self->PutR(HasMember => $fam, $featureID);
153 : parrello 1.9 # Extract the genome ID.
154 :     if ($featureID =~ /^fig\|(\d+\.\d+)/) {
155 :     # Connect the family to the genome.
156 :     $self->PutR(HasRepresentativeOf => $1, $fam);
157 :     }
158 : parrello 1.8 }
159 : parrello 1.10 close $ih;
160 :     # Now read the coupling data.
161 :     $ih = Open(undef, "<$figFamDir/coupling.values");
162 :     while (! eof $ih) {
163 :     my ($from, $to, $expScore, $contigScore) = Tracer::GetLine($ih);
164 :     $self->Track(familyCouplingRecord => "$from:$to", 1000);
165 :     # Verify that both FIGfams are ours and are distinct.
166 :     if (! $figFams{$from} || ! $figFams{$to}) {
167 :     $self->Add(couplingFigFamNotFound => 1);
168 :     } elsif ($from eq $to) {
169 :     $self->Add(couplingFigFamReflexive => 1);
170 :     } else {
171 :     # Everything's okay, so we can connect the two figfams together.
172 :     # Insure the ordering is correct.
173 :     if ($from > $to) {
174 :     ($from, $to) = ($to, $from);
175 :     }
176 :     # Forge the connection.
177 :     $self->PutR(IsCoupledTo => $from, $to, co_occurrence_evidence => $contigScore,
178 :     co_expression_evidence => $expScore);
179 :     }
180 :     }
181 :     close $ih;
182 : parrello 1.1 }
183 :     }
184 :     }
185 :    
186 :    
187 :     1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3