[Bio] / Sprout / CustomAttributes.pm Repository:
ViewVC logotype

Diff of /Sprout/CustomAttributes.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.18, Tue Feb 6 16:28:40 2007 UTC revision 1.19, Fri Feb 9 22:59:18 2007 UTC
# Line 28  Line 28 
28  The actual attribute values are stored as a relationship between the attribute  The actual attribute values are stored as a relationship between the attribute
29  keys and the objects. There can be multiple values for a single key/object pair.  keys and the objects. There can be multiple values for a single key/object pair.
30    
31    =head3 Object IDs
32    
33    The object ID is normally represented as
34    
35        I<type>:I<id>
36    
37    where I<type> is the object type (C<Role>, C<Coupling>, etc.) and I<id> is
38    the actual object ID. Note that the object type must consist of only upper- and
39    lower-case letters! Thus, C<GenomeGroup> is a valid object type, but
40    C<genome_group> is not. Given that restriction, the object ID
41    
42        Family:aclame|cluster10
43    
44    would represent the FIG family C<aclame|cluster10>. For historical reasons,
45    there are three exceptions: subsystems, genomes, and features do not need
46    a type. So, for PEG 3361 of Streptomyces coelicolor A3(2), you simply code
47    
48        fig|100226.1.peg.3361
49    
50    The methods L</ParseID> and L</FormID> can be used to make this all seem
51    more consistent. Given any object ID string, L</ParseID> will convert it to an
52    object type and ID, and given any object type and ID, L</FormID> will
53    convert it to an object ID string. The attribute database is pretty
54    freewheeling about what it will allow for an ID; however, for best
55    results, the type should match an entity type from a Sprout genetics
56    database. If this rule is followed, then the database object
57    corresponding to an ID in the attribute database could be retrieved using
58    L</GetTargetObject> method.
59    
60        my $object = CustomAttributes::GetTargetObject($sprout, $idValue);
61    
62    =head3 Retrieval and Logging
63    
64  The full suite of ERDB retrieval capabilities is provided. In addition,  The full suite of ERDB retrieval capabilities is provided. In addition,
65  custom methods are provided specific to this application. To get all  custom methods are provided specific to this application. To get all
66  the values of the attribute C<essential> in a specified B<Feature>, you  the values of the attribute C<essential> in a specified B<Feature>, you
# Line 216  Line 249 
249  tab-delimited file with internal tab and new-line characters escaped. This is  tab-delimited file with internal tab and new-line characters escaped. This is
250  the typical TBL-style file used by most FIG applications. One of the columns  the typical TBL-style file used by most FIG applications. One of the columns
251  in the input file must contain the appropriate object id value and the other the  in the input file must contain the appropriate object id value and the other the
252  corresponding attribute value.  corresponding attribute value. The current contents of the attribute database will
253    be erased before loading, unless the options are used to override that behavior.
254    
255  =over 4  =over 4
256    
# Line 251  Line 285 
285    
286  =over 4  =over 4
287    
288  =item erase  =item keep
289    
290    If specified, the existing attribute values will not be erased.
291    
292  If TRUE, the key's values will all be erased before loading. (Doing so  =item archive
293  makes for a faster load.)  
294    If specified, the name of a file into which the incoming file should be saved.
295    
296  =back  =back
297    
# Line 267  Line 304 
304      my $retVal = Stats->new("lineIn", "shortLine");      my $retVal = Stats->new("lineIn", "shortLine");
305      # Compute the minimum number of fields required in each input line. The user specifies two      # Compute the minimum number of fields required in each input line. The user specifies two
306      # columns, and we need to make sure both columns are in every record.      # columns, and we need to make sure both columns are in every record.
307      my $minCols = ($idCol < $dataCol ? $idCol : $idCol) + 1;      my $minCols = ($idCol < $dataCol ? $dataCol : $idCol) + 1;
308        Trace("Minimum column count is $minCols.") if T(3);
309        #
310      # Insure the attribute key exists.      # Insure the attribute key exists.
311      my $found = $self->GetEntity('AttributeKey', $keyName);      my $found = $self->GetEntity('AttributeKey', $keyName);
312      if (! defined $found) {      if (! defined $found) {
313          Confess("Attribute key \"$keyName\" not found in database.");          Confess("Attribute key \"$keyName\" not found in database.");
314      } else {      } else {
315          # Erase the key's current values.          # Erase the key's current values (unless, of course, the caller specified the "keep" option.
316            if (! $options{keep}) {
317          $self->EraseAttribute($keyName);          $self->EraseAttribute($keyName);
318            }
319            # Check for a save file. In the main loop, we'll know a save file is needed if $sh is
320            # defined.
321            my $sh;
322            if ($options{archive}) {
323                $sh = Open(undef, ">$options{archive}");
324                Trace("Attribute $keyName upload saved in $options{archive}.") if T(2);
325            }
326          # Save a list of the object IDs we need to add.          # Save a list of the object IDs we need to add.
327          my %objectIDs = ();          my %objectIDs = ();
328          # Loop through the input file.          # Loop through the input file.
# Line 282  Line 330 
330              # Get the next line of the file.              # Get the next line of the file.
331              my @fields = Tracer::GetLine($fh);              my @fields = Tracer::GetLine($fh);
332              $retVal->Add(lineIn => 1);              $retVal->Add(lineIn => 1);
333              # Now we need to validate the line.              my $count = scalar @fields;
334              if (scalar(@fields) < $minCols) {              Trace("Field count is $count. First field is \"$fields[0]\".") if T(4);
335                # Archive it if necessary.
336                if (defined $sh) {
337                    Tracer::PutLine($sh, \@fields);
338                }
339                # Now we need to check for comments and validate the line.
340                if ($fields[0] =~ /^\s*$/) {
341                    # Blank line: skip it.
342                    $retVal->Add(blank => 1);
343                } elsif (substr($fields[0],0,1) eq '#') {
344                    # Comment line: skip it.
345                    $retVal->Add(comment => 1);
346                } elsif ($count < $minCols) {
347                    # Line is too short: we have an error.
348                  $retVal->Add(shortLine => 1);                  $retVal->Add(shortLine => 1);
349              } else {              } else {
350                  # It's valid, so get the ID and value.                  # It's valid, so get the ID and value.
# Line 291  Line 352 
352                  # Denote we're using this input line.                  # Denote we're using this input line.
353                  $retVal->Add(lineUsed => 1);                  $retVal->Add(lineUsed => 1);
354                  # Now we insert the attribute.                  # Now we insert the attribute.
355                  $self->InsertObject('HasValueFor', { from => $keyName, to => $id,                  $self->InsertObject('HasValueFor', { 'from-link' => $keyName,
356                                                         'to-link' => $id,
357                                                       value => $value });                                                       value => $value });
358                  $retVal->Add(newValue => 1);                  $retVal->Add(newValue => 1);
359              }              }
360          }          }
361          # Log this operation.          # Log this operation.
362          $self->LogOperation("Load Key", $keyName, $retVal->Display());          $self->LogOperation("Load Key", $keyName, $retVal->Display());
363            # If there's an archive, close it.
364            if (defined $sh) {
365                close $sh;
366            }
367      }      }
368      # Return the statistics.      # Return the statistics.
369      return $retVal;      return $retVal;
# Line 1615  Line 1681 
1681      return sort @groups;      return sort @groups;
1682  }  }
1683    
1684    =head3 ParseID
1685    
1686    C<< my ($type, $id) = CustomAttributes::ParseID($idValue); >>
1687    
1688    Determine the type and object ID corresponding to an ID value from the attribute database.
1689    Most ID values consist of a type name and an ID, separated by a colon (e.g. C<Family:aclame|cluster10>);
1690    however, Genomes, Features, and Subsystems are not stored with a type name, so we need to
1691    deduce the type from the ID value structure.
1692    
1693    The theory here is that you can plug the ID and type directly into a Sprout database method, as
1694    follows
1695    
1696        my ($type, $id) = CustomAttributes::ParseID($attrList[$num]->[0]);
1697        my $target = $sprout->GetEntity($type, $id);
1698    
1699    =over 4
1700    
1701    =item idValue
1702    
1703    ID value taken from the attribute database.
1704    
1705    =item RETURN
1706    
1707    Returns a two-element list. The first element is the type of object indicated by the ID value,
1708    and the second element is the actual object ID.
1709    
1710    =back
1711    
1712    =cut
1713    
1714    sub ParseID {
1715        # Get the parameters.
1716        my ($idValue) = @_;
1717        # Declare the return variables.
1718        my ($type, $id);
1719        # Parse the incoming ID. We first check for the presence of an entity name. Entity names
1720        # can only contain letters, which helps to insure typed object IDs don't collide with
1721        # subsystem names (which are untyped).
1722        if ($idValue =~ /^([A-Za-z]+):(.+)/) {
1723            # Here we have a typed ID.
1724            ($type, $id) = ($1, $2);
1725        } elsif ($idValue =~ /fig\|/) {
1726            # Here we have a feature ID.
1727            ($type, $id) = (Feature => $idValue);
1728        } elsif ($idValue =~ /\d+\.\d+/) {
1729            # Here we have a genome ID.
1730            ($type, $id) = (Genome => $idValue);
1731        } else {
1732            # The default is a subsystem ID.
1733            ($type, $id) = (Subsystem => $idValue);
1734        }
1735        # Return the results.
1736        return ($type, $id);
1737    }
1738    
1739    =head3 FormID
1740    
1741    C<< my $idValue = CustomAttributes::FormID($type, $id); >>
1742    
1743    Convert an object type and ID pair into an object ID string for the attribute system. Subsystems,
1744    genomes, and features are stored in the database without type information, but all other object IDs
1745    must be prefixed with the object type.
1746    
1747    =over 4
1748    
1749    =item type
1750    
1751    Relevant object type.
1752    
1753    =item id
1754    
1755    ID of the object in question.
1756    
1757    =item RETURN
1758    
1759    Returns a string that will be recognized as an object ID in the attribute database.
1760    
1761    =back
1762    
1763    =cut
1764    
1765    sub FormID {
1766        # Get the parameters.
1767        my ($type, $id) = @_;
1768        # Declare the return variable.
1769        my $retVal;
1770        # Compute the ID string from the type.
1771        if (grep { $type eq $_ } qw(Feature Genome Subsystem)) {
1772            $retVal = $id;
1773        } else {
1774            $retVal = "$type:$id";
1775        }
1776        # Return the result.
1777        return $retVal;
1778    }
1779    
1780    =head3 GetTargetObject
1781    
1782    C<< my $object = CustomAttributes::GetTargetObject($erdb, $idValue); >>
1783    
1784    Return the database object corresponding to the specified attribute object ID. The
1785    object type associated with the ID value must correspond to an entity name in the
1786    specified database.
1787    
1788    =over 4
1789    
1790    =item erdb
1791    
1792    B<ERDB> object for accessing the target database.
1793    
1794    =item idValue
1795    
1796    ID value retrieved from the attribute database.
1797    
1798    =item RETURN
1799    
1800    Returns a B<DBObject> for the attribute value's target object.
1801    
1802    =back
1803    
1804    =cut
1805    
1806    sub GetTargetObject {
1807        # Get the parameters.
1808        my ($erdb, $idValue) = @_;
1809        # Declare the return variable.
1810        my $retVal;
1811        # Get the type and ID for the target object.
1812        my ($type, $id) = ParseID($idValue);
1813        # Plug them into the GetEntity method.
1814        $retVal = $erdb->GetEntity($type, $id);
1815        # Return the resulting object.
1816        return $retVal;
1817    }
1818    
1819  1;  1;

Legend:
Removed from v.1.18  
changed lines
  Added in v.1.19

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3