[Bio] / Sprout / BaseSproutLoader.pm Repository:
ViewVC logotype

Diff of /Sprout/BaseSproutLoader.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.1, Tue Sep 30 15:23:55 2008 UTC revision 1.7, Tue Jun 30 19:53:00 2009 UTC
# Line 22  Line 22 
22      use strict;      use strict;
23      use Tracer;      use Tracer;
24      use ERDB;      use ERDB;
25        use FIG;
26        use Time::HiRes;
27      use base 'ERDBLoadGroup';      use base 'ERDBLoadGroup';
28    
29      # Name of the global section      # Name of the global section
# Line 36  Line 38 
38    
39  =head3 new  =head3 new
40    
41      my $sl = BaseSproutLoader->new($erdb, $source, $options, @tables);      my $sl = BaseSproutLoader->new($erdb, $options, @tables);
42    
43  Construct a new BaseSproutLoader object.  Construct a new BaseSproutLoader object.
44    
# Line 48  Line 50 
50    
51  =item source  =item source
52    
53  [[FigPm]] object used to access the source data.  L<FIG> object used to access the source data.
54    
55  =item options  =item options
56    
# Line 64  Line 66 
66    
67  sub new {  sub new {
68      # Get the parameters.      # Get the parameters.
69      my ($class, $erdb, $source, $options, @tables) = @_;      my ($class, $erdb, $options, @tables) = @_;
70      # Create the BaseSproutLoader object.      # Create the base load group object.
71      my $retVal = ERDBLoadGroup::new($class, $source, $erdb, $erdb->LoadDirectory(),      my $retVal = ERDBLoadGroup::new($class, $erdb, $options, @tables);
72                                      $options, @tables);      # Return it.
     # Bless and return it.  
     bless $retVal, $class;  
73      return $retVal;      return $retVal;
74  }  }
75    
# Line 78  Line 78 
78    
79  =head3 GetGenomeAttributes  =head3 GetGenomeAttributes
80    
81      my $aHashRef = $sl->GetGenomeAttributes($genomeID);      my $aHashRef = $sl->GetGenomeAttributes($genomeID, \@fids);
82    
83  Return a hash of attributes keyed on feature ID. This method gets all the NMPDR-related  Return a hash of attributes keyed on feature ID. This method gets all the NMPDR-related
84  attributes for all the features of a genome in a single call, then organizes them into  attributes for all the features of a genome in a single call, then organizes them into
# Line 92  Line 92 
92    
93  =item genomeID  =item genomeID
94    
95  ID of the genome who's attributes are desired.  ID of the genome whose attributes are desired.
96    
97  =item fids  =item fids (optional)
98    
99  Reference to a list of the feature IDs whose attributes are to be kept.  Reference to a list of feature IDs whose attributes are to be kept. If it is a list
100    of lists, the feature IDs will be taken from the first element in each sub-list.
 =item propKeys  
   
 A list of the keys to retrieve.  
101    
102  =item RETURN  =item RETURN
103    
# Line 114  Line 111 
111    
112  sub GetGenomeAttributes {  sub GetGenomeAttributes {
113      # Get the parameters.      # Get the parameters.
114      my ($self, $genomeID) = @_;      my ($self, $genomeID, $fids) = @_;
     # Declare the return variable.  
     my $retVal = {};  
115      # Get the source object.      # Get the source object.
116      my $fig = $self->source();      my $fig = $self->source();
117      # Get the features for this genome.      # Start a timer.
118      my @fids = $fig->all_features($genomeID);      my $start = time();
119      # Initialize the hash. This not only enables us to easily determine which FIDs to      # Initalize the FID list if we don't already have it.
120      # keep, it insures that the caller sees a list reference for every known fid,      if (! defined $fids) {
121      # simplifying the logic.          $fids = [ $fig->all_features($genomeID) ];
     for my $fid (@fids) {  
         $retVal->{$fid} = [];  
122      }      }
123      # Get the list of NMPDR-related attributes.      # Declare the return variable and initialize it with all the features.
124      my @propKeys = $fig->get_group_keys("NMPDR");      my %retVal = map { (ref $_ ? $_->[0] : $_) => [] } @$fids;
125      # Get the attributes. If ev_code_cron is running, we may get a timeout error, so      # Get the attributes. If ev_code_cron is running, we may get a timeout error, so
126      # an eval is used.      # an eval is used.
127      my @aList = ();      my @aList = ();
128      eval {      eval {
129          @aList = $fig->get_attributes("fig|$genomeID%", \@propKeys);          @aList = $fig->get_attributes("fig|$genomeID%");
130          Trace(scalar(@aList) . " attributes returned for genome $genomeID.") if T(ERDBLoadGroup => 3);          Trace(scalar(@aList) . " attributes returned for genome $genomeID.") if T(ERDBLoadGroup => 3);
131      };      };
132      # Check for a problem.      # Check for a problem.
# Line 141  Line 134 
134          Trace("Retrying attributes for $genomeID due to error: $@") if T(ERDBLoadGroup => 1);          Trace("Retrying attributes for $genomeID due to error: $@") if T(ERDBLoadGroup => 1);
135          # Our fallback plan is to process the attributes in blocks of 100. This is much slower,          # Our fallback plan is to process the attributes in blocks of 100. This is much slower,
136          # but allows us to continue processing.          # but allows us to continue processing.
137          my $nFids = scalar @fids;          my $nFids = scalar @$fids;
138          for (my $i = 0; $i < $nFids; $i += 100) {          for (my $i = 0; $i < $nFids; $i += 100) {
139              # Determine the index of the last feature ID we'll be specifying on this pass.              # Determine the index of the last feature ID we'll be specifying on this pass.
140              # Normally it's $i + 99, but if we're close to the end it may be less.              # Normally it's $i + 99, but if we're close to the end it may be less.
141              my $end = ($i + 100 > $nFids ? $nFids - 1 : $i + 99);              my $end = ($i + 100 > $nFids ? $nFids - 1 : $i + 99);
142              # Get a slice of the fid list.              # Get a slice of the fid list.
143              my @slice = @fids[$i .. $end];              my @slice = @{$fids}[$i .. $end];
144              # Get the relevant attributes.              # Get the relevant attributes.
145              Trace("Retrieving attributes for fids $i to $end.") if T(ERDBLoadGroup => 3);              Trace("Retrieving attributes for fids $i to $end.") if T(ERDBLoadGroup => 3);
146              my @aShort = $fig->get_attributes(\@slice, \@propKeys);              my @aShort = $fig->get_attributes(\@slice);
147              Trace(scalar(@aShort) . " attributes returned for fids $i to $end.") if T(ERDBLoadGroup => 3);              Trace(scalar(@aShort) . " attributes returned for fids $i to $end.") if T(ERDBLoadGroup => 3);
148              push @aList, @aShort;              push @aList, @aShort;
149          }          }
# Line 159  Line 152 
152      # them.      # them.
153      for my $aListEntry (@aList) {      for my $aListEntry (@aList) {
154          my $fid = $aListEntry->[0];          my $fid = $aListEntry->[0];
155          if (exists $retVal->{$fid}) {          if (exists $retVal{$fid}) {
156              push @{$retVal->{$fid}}, $aListEntry;              push @{$retVal{$fid}}, $aListEntry;
157                $self->Add(attributes => 1);
158          }          }
159      }      }
160        $self->Add('attribute-time' => time() - $start);
161      # Return the result.      # Return the result.
162      return $retVal;      return \%retVal;
163  }  }
164    
165  =head3 GetSubsystems  =head3 GetSubsystems
# Line 196  Line 191 
191          # No, so compute the list and then create the file.          # No, so compute the list and then create the file.
192          my @subs = $fig->all_subsystems();          my @subs = $fig->all_subsystems();
193          for my $sub (@subs) {          for my $sub (@subs) {
194              if ($fig->nmpdr_subsystem($sub)) {              # Only keep NMPDR subsystems that exist on disk.
195                if ($fig->nmpdr_subsystem($sub) && ! $fig->is_experimental_subsystem($sub)) {
196                  push @retVal, $sub;                  push @retVal, $sub;
197              }              }
198          }          }
199          Tracer::PutFile($subFileName, \@retVal);          Tracer::PutFile($subFileName, \@retVal);
200      }      }
201        Trace(scalar(@retVal) . " subsystems in list.") if T(ERDBLoadGroup => 3);
202      # Return the result.      # Return the result.
203      my %retVal = map { $_ => 1 } @retVal;      my %retVal = map { $_ => 1 } @retVal;
204      return \%retVal;      return \%retVal;
# Line 210  Line 207 
207    
208  =head3 GetSectionList  =head3 GetSectionList
209    
210      my @sections = BaseSproutLoader::GetSectionList($sprout, $fig);      my @sections = BaseSproutLoader::GetSectionList($sprout, fig, $directory);
211    
212  Return a list of the sections for a Sprout load. The section list is  Return a list of the sections for a Sprout load. The section list is
213  normally determined by retrieving a list of all the complete genomes and  normally determined by retrieving a list of all the complete genomes and
# Line 227  Line 224 
224    
225  =item fig  =item fig
226    
227  [[FigPm]] object from which the data is being retrieved.  L<FIG> object from which the data is being retrieved.
228    
229    =item directory (optional)
230    
231    Directory from which the Sprout tables are being loaded.
232    
233  =item RETURN  =item RETURN
234    
# Line 238  Line 239 
239  =cut  =cut
240    
241  sub GetSectionList {  sub GetSectionList {
242      my ($sprout, $fig) = @_;      my ($sprout, $fig, $directory) = @_;
243      # Declare the return variable.      # Declare the return variable.
244      my @retVal;      my @retVal;
245        # Insure we have a data directory.
246        $directory ||= $sprout->LoadDirectory();
247      # Look for the section list in the data directory.      # Look for the section list in the data directory.
248      my $sectionFileName = $sprout->LoadDirectory() . "/" .      my $sectionFileName = $directory . "/" .
249          ERDBGenerate::CreateFileName('section_master', undef, 'control');          ERDBGenerate::CreateFileName('section_master', undef, 'control');
250      if (-f $sectionFileName) {      if (-f $sectionFileName) {
251          # It's there. Get the list from it.          # It's there. Get the list from it.
252          @retVal = Tracer::GetFile($sectionFileName);          @retVal = Tracer::GetFile($sectionFileName);
253      } else {      } else {
254          # We need to create it. First, we get the list: all the complete          # We need to create it. First, we get the list of all complete
255          # genomes followed by a global section indicator. The genomes are sorted          # genomes. As a safety feature, we only include genomes with
256          # in lexical order.          # an organism directory.
257          @retVal = sort { $a cmp $b } $fig->genomes(1);          my @genomes = grep { -d "$FIG_Config::organisms/$_" } $fig->genomes(1);
258            # Sort the results and add the GLOBAL tag.
259            @retVal = sort { $a cmp $b } @genomes;
260          push @retVal, GLOBAL;          push @retVal, GLOBAL;
261          # Write the list to a file for future use. This insures that if the source          # Write the list to a file for future use. This insures that if the source
262          # data changes, we have a consistent section list.          # data changes, we have a consistent section list.

Legend:
Removed from v.1.1  
changed lines
  Added in v.1.7

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3