[Bio] / Sprout / Sprout.pm Repository:
ViewVC logotype

Diff of /Sprout/Sprout.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.76, Sun Jun 25 18:03:29 2006 UTC revision 1.98, Tue Apr 10 06:13:33 2007 UTC
# Line 5  Line 5 
5      @ISA = qw(Exporter ERDB);      @ISA = qw(Exporter ERDB);
6      use Data::Dumper;      use Data::Dumper;
7      use strict;      use strict;
     use Carp;  
8      use DBKernel;      use DBKernel;
9      use XML::Simple;      use XML::Simple;
10      use DBQuery;      use DBQuery;
11      use DBObject;      use ERDBObject;
12      use Tracer;      use Tracer;
13      use FIGRules;      use FIGRules;
14      use FidCheck;      use FidCheck;
# Line 92  Line 91 
91  sub new {  sub new {
92      # Get the parameters.      # Get the parameters.
93      my ($class, $dbName, $options) = @_;      my ($class, $dbName, $options) = @_;
94        # Compute the DBD directory.
95        my $dbd_dir = (defined($FIG_Config::dbd_dir) ? $FIG_Config::dbd_dir :
96                                                      $FIG_Config::fig );
97      # Compute the options. We do this by starting with a table of defaults and overwriting with      # Compute the options. We do this by starting with a table of defaults and overwriting with
98      # the incoming data.      # the incoming data.
99      my $optionTable = Tracer::GetOptions({      my $optionTable = Tracer::GetOptions({
# Line 99  Line 101 
101                                                          # database type                                                          # database type
102                         dataDir      => $FIG_Config::sproutData,                         dataDir      => $FIG_Config::sproutData,
103                                                          # data file directory                                                          # data file directory
104                         xmlFileName  => "$FIG_Config::fig/SproutDBD.xml",                         xmlFileName  => "$dbd_dir/SproutDBD.xml",
105                                                          # database definition file name                                                          # database definition file name
106                         userData     => "$FIG_Config::dbuser/$FIG_Config::dbpass",                         userData     => "$FIG_Config::dbuser/$FIG_Config::dbpass",
107                                                          # user name and password                                                          # user name and password
108                         port         => $FIG_Config::dbport,                         port         => $FIG_Config::dbport,
109                                                          # database connection port                                                          # database connection port
110                         sock         => $FIG_Config::dbsock,                         sock         => $FIG_Config::dbsock,
111                           host         => $FIG_Config::dbhost,
112                         maxSegmentLength => 4500,        # maximum feature segment length                         maxSegmentLength => 4500,        # maximum feature segment length
113                         maxSequenceLength => 8000,       # maximum contig sequence length                         maxSequenceLength => 8000,       # maximum contig sequence length
114                         noDBOpen     => 0,               # 1 to suppress the database open                         noDBOpen     => 0,               # 1 to suppress the database open
# Line 119  Line 122 
122      my $dbh;      my $dbh;
123      if (! $optionTable->{noDBOpen}) {      if (! $optionTable->{noDBOpen}) {
124          $dbh = DBKernel->new($optionTable->{dbType}, $dbName, $userName,          $dbh = DBKernel->new($optionTable->{dbType}, $dbName, $userName,
125                                  $password, $optionTable->{port}, undef, $optionTable->{sock});                                  $password, $optionTable->{port}, $optionTable->{host}, $optionTable->{sock});
126      }      }
127      # Create the ERDB object.      # Create the ERDB object.
128      my $xmlFileName = "$optionTable->{xmlFileName}";      my $xmlFileName = "$optionTable->{xmlFileName}";
# Line 127  Line 130 
130      # Add the option table and XML file name.      # Add the option table and XML file name.
131      $retVal->{_options} = $optionTable;      $retVal->{_options} = $optionTable;
132      $retVal->{_xmlName} = $xmlFileName;      $retVal->{_xmlName} = $xmlFileName;
133        # Set up space for the group file data.
134        $retVal->{groupHash} = undef;
135      # Return it.      # Return it.
136      return $retVal;      return $retVal;
137  }  }
# Line 336  Line 341 
341    
342  =head3 GeneMenu  =head3 GeneMenu
343    
344  C<< my $selectHtml = $sprout->GeneMenu(\%attributes, $filterString, \@params); >>  C<< my $selectHtml = $sprout->GeneMenu(\%attributes, $filterString, \@params, $selected); >>
345    
346  Return an HTML select menu of genomes. Each genome will be an option in the menu,  Return an HTML select menu of genomes. Each genome will be an option in the menu,
347  and will be displayed by name with the ID and a contig count attached. The selection  and will be displayed by name with the ID and a contig count attached. The selection
# Line 358  Line 363 
363  Reference to a list of values to be substituted in for the parameter marks in  Reference to a list of values to be substituted in for the parameter marks in
364  the filter string.  the filter string.
365    
366    =item selected (optional)
367    
368    ID of the genome to be initially selected.
369    
370    =item fast (optional)
371    
372    If specified and TRUE, the contig counts will be omitted to improve performance.
373    
374  =item RETURN  =item RETURN
375    
376  Returns an HTML select menu with the specified genomes as selectable options.  Returns an HTML select menu with the specified genomes as selectable options.
# Line 368  Line 381 
381    
382  sub GeneMenu {  sub GeneMenu {
383      # Get the parameters.      # Get the parameters.
384      my ($self, $attributes, $filterString, $params) = @_;      my ($self, $attributes, $filterString, $params, $selected, $fast) = @_;
385        my $slowMode = ! $fast;
386        # Default to nothing selected. This prevents an execution warning if "$selected"
387        # is undefined.
388        $selected = "" unless defined $selected;
389        Trace("Gene Menu called with slow mode \"$slowMode\" and selection \"$selected\".") if T(3);
390      # Start the menu.      # Start the menu.
391      my $retVal = "<select " .      my $retVal = "<select " .
392          join(" ", map { "$_=\"$attributes->{$_}\"" } keys %{$attributes}) .          join(" ", map { "$_=\"$attributes->{$_}\"" } keys %{$attributes}) .
# Line 385  Line 403 
403          # Get the data for this genome.          # Get the data for this genome.
404          my ($genomeID, $genus, $species, $strain) = @{$genomeData};          my ($genomeID, $genus, $species, $strain) = @{$genomeData};
405          # Get the contig count.          # Get the contig count.
406            my $contigInfo = "";
407            if ($slowMode) {
408          my $count = $self->ContigCount($genomeID);          my $count = $self->ContigCount($genomeID);
409          my $counting = ($count == 1 ? "contig" : "contigs");          my $counting = ($count == 1 ? "contig" : "contigs");
410                $contigInfo = "[$count $counting]";
411            }
412            # Find out if we're selected.
413            my $selectOption = ($selected eq $genomeID ? " selected" : "");
414          # Build the option tag.          # Build the option tag.
415          $retVal .= "<option value=\"$genomeID\">$genus $species $strain ($genomeID) [$count $counting]</option>\n";          $retVal .= "<option value=\"$genomeID\"$selectOption>$genus $species $strain ($genomeID)$contigInfo</option>\n";
         Trace("Option tag built for $genomeID: $genus $species $strain.") if T(3);  
416      }      }
417      # Close the SELECT tag.      # Close the SELECT tag.
418      $retVal .= "</select>\n";      $retVal .= "</select>\n";
419      # Return the result.      # Return the result.
420      return $retVal;      return $retVal;
421  }  }
422    
423  =head3 Build  =head3 Build
424    
425  C<< $sprout->Build(); >>  C<< $sprout->Build(); >>
# Line 630  Line 654 
654      return ($contigID, $start, $dir, $len);      return ($contigID, $start, $dir, $len);
655  }  }
656    
657    
658    
659  =head3 PointLocation  =head3 PointLocation
660    
661  C<< my $found = Sprout::PointLocation($location, $point); >>  C<< my $found = Sprout::PointLocation($location, $point); >>
# Line 894  Line 920 
920      my ($self, $genomeID) = @_;      my ($self, $genomeID) = @_;
921      # Declare the return variable.      # Declare the return variable.
922      my $retVal = {};      my $retVal = {};
923      # Query the genome's features and annotations. We'll put the oldest annotations      # Query the genome's features.
924      # first so that the last assignment to go into the hash will be the correct one.      my $query = $self->Get(['HasFeature', 'Feature'], "HasFeature(from-link) = ?",
     my $query = $self->Get(['HasFeature', 'IsTargetOfAnnotation', 'Annotation'],  
                            "HasFeature(from-link) = ? ORDER BY Annotation(time)",  
925                             [$genomeID]);                             [$genomeID]);
926      # Loop through the annotations.      # Loop through the features.
927      while (my $data = $query->Fetch) {      while (my $data = $query->Fetch) {
928          # Get the feature ID and annotation text.          # Get the feature ID and assignment.
929          my ($fid, $annotation) = $data->Values(['HasFeature(to-link)',          my ($fid, $assignment) = $data->Values(['Feature(id)', 'Feature(assignment)']);
930                                                  'Annotation(annotation)']);          if ($assignment) {
         # Check to see if this is an assignment. Note that the user really  
         # doesn't matter to us, other than we use it to determine whether or  
         # not this is an assignment.  
         my ($user, $assignment) = _ParseAssignment('fig', $annotation);  
         if ($user) {  
             # Here it's an assignment. We put it in the return hash, overwriting  
             # any older assignment that might be present.  
931              $retVal->{$fid} = $assignment;              $retVal->{$fid} = $assignment;
932          }          }
933      }      }
# Line 1272  Line 1289 
1289  Return the most recently-determined functional assignment of a particular feature.  Return the most recently-determined functional assignment of a particular feature.
1290    
1291  The functional assignment is handled differently depending on the type of feature. If  The functional assignment is handled differently depending on the type of feature. If
1292  the feature is identified by a FIG ID (begins with the string C<fig|>), then a functional  the feature is identified by a FIG ID (begins with the string C<fig|>), then the functional
1293  assignment is a type of annotation. The format of an assignment is described in  assignment is taken from the B<Feature> or C<Annotation> table, depending.
 L</ParseAssignment>. Its worth noting that we cannot filter on the content of the  
 annotation itself because it's a text field; however, this is not a big problem because  
 most features only have a small number of annotations.  
1294    
1295  Each user has an associated list of trusted users. The assignment returned will be the most  Each user has an associated list of trusted users. The assignment returned will be the most
1296  recent one by at least one of the trusted users. If no trusted user list is available, then  recent one by at least one of the trusted users. If no trusted user list is available, then
# Line 1295  Line 1309 
1309    
1310  =item userID (optional)  =item userID (optional)
1311    
1312  ID of the user whose function determination is desired. If omitted, only the latest  ID of the user whose function determination is desired. If omitted, the primary
1313  C<FIG> assignment will be returned.  functional assignment in the B<Feature> table will be returned.
1314    
1315  =item RETURN  =item RETURN
1316    
# Line 1313  Line 1327 
1327      my $retVal;      my $retVal;
1328      # Determine the ID type.      # Determine the ID type.
1329      if ($featureID =~ m/^fig\|/) {      if ($featureID =~ m/^fig\|/) {
1330          # Here we have a FIG feature ID. We must build the list of trusted          # Here we have a FIG feature ID.
1331          # users.          if (!$userID) {
1332                # Use the primary assignment.
1333                ($retVal) = $self->GetEntityValues('Feature', $featureID, ['Feature(assignment)']);
1334            } else {
1335                # We must build the list of trusted users.
1336          my %trusteeTable = ();          my %trusteeTable = ();
1337          # Check the user ID.          # Check the user ID.
1338          if (!$userID) {          if (!$userID) {
# Line 1357  Line 1375 
1375                  }                  }
1376              }              }
1377          }          }
1378            }
1379      } else {      } else {
1380          # Here we have a non-FIG feature ID. In this case the user ID does not          # Here we have a non-FIG feature ID. In this case the user ID does not
1381          # matter. We simply get the information from the External Alias Function          # matter. We simply get the information from the External Alias Function
# Line 1472  Line 1491 
1491      my %retVal = ();      my %retVal = ();
1492      # Loop through the incoming features.      # Loop through the incoming features.
1493      for my $featureID (@{$featureList}) {      for my $featureID (@{$featureList}) {
1494          # Create a query to get the feature's best hit.          # Ask the server for the feature's best hit.
1495          my $query = $self->Get(['IsBidirectionalBestHitOf'],          my @bbhData = FIGRules::BBHData($featureID);
                                "IsBidirectionalBestHitOf(from-link) = ? AND IsBidirectionalBestHitOf(genome) = ?",  
                                [$featureID, $genomeID]);  
1496          # Peel off the BBHs found.          # Peel off the BBHs found.
1497          my @found = ();          my @found = ();
1498          while (my $bbh = $query->Fetch) {          for my $bbh (@bbhData) {
1499              push @found, $bbh->Value('IsBidirectionalBestHitOf(to-link)');              my $fid = $bbh->[0];
1500                my $bbGenome = $self->GenomeOf($fid);
1501                if ($bbGenome eq $genomeID) {
1502                    push @found, $fid;
1503                }
1504          }          }
1505          $retVal{$featureID} = \@found;          $retVal{$featureID} = \@found;
1506      }      }
# Line 1493  Line 1514 
1514    
1515  Return a list of the similarities to the specified feature.  Return a list of the similarities to the specified feature.
1516    
1517  Sprout does not support real similarities, so this method just returns the bidirectional  This method just returns the bidirectional best hits for performance reasons.
 best hits.  
1518    
1519  =over 4  =over 4
1520    
# Line 1514  Line 1534 
1534      # Get the parameters.      # Get the parameters.
1535      my ($self, $featureID, $count) = @_;      my ($self, $featureID, $count) = @_;
1536      # Ask for the best hits.      # Ask for the best hits.
1537      my @lists = $self->GetAll(['IsBidirectionalBestHitOf'],      my @lists = FIGRules::BBHData($featureID);
                               "IsBidirectionalBestHitOf(from-link) = ? ORDER BY IsBidirectionalBestHitOf(score) DESC",  
                               [$featureID], ['IsBidirectionalBestHitOf(to-link)', 'IsBidirectionalBestHitOf(score)'],  
                               $count);  
1538      # Create the return value.      # Create the return value.
1539      my %retVal = ();      my %retVal = ();
1540      for my $tuple (@lists) {      for my $tuple (@lists) {
# Line 1527  Line 1544 
1544      return %retVal;      return %retVal;
1545  }  }
1546    
   
   
1547  =head3 IsComplete  =head3 IsComplete
1548    
1549  C<< my $flag = $sprout->IsComplete($genomeID); >>  C<< my $flag = $sprout->IsComplete($genomeID); >>
# Line 1656  Line 1671 
1671  sub CoupledFeatures {  sub CoupledFeatures {
1672      # Get the parameters.      # Get the parameters.
1673      my ($self, $featureID) = @_;      my ($self, $featureID) = @_;
1674        Trace("Looking for features coupled to $featureID.") if T(coupling => 3);
1675      # Create a query to retrieve the functionally-coupled features.      # Create a query to retrieve the functionally-coupled features.
1676      my $query = $self->Get(['ParticipatesInCoupling', 'Coupling'],      my $query = $self->Get(['ParticipatesInCoupling', 'Coupling'],
1677                             "ParticipatesInCoupling(from-link) = ?", [$featureID]);                             "ParticipatesInCoupling(from-link) = ?", [$featureID]);
# Line 1668  Line 1684 
1684          # Get the ID and score of the coupling.          # Get the ID and score of the coupling.
1685          my ($couplingID, $score) = $clustering->Values(['Coupling(id)',          my ($couplingID, $score) = $clustering->Values(['Coupling(id)',
1686                                                          'Coupling(score)']);                                                          'Coupling(score)']);
1687            Trace("$featureID coupled with score $score to ID $couplingID.") if T(coupling => 4);
1688          # Get the other feature that participates in the coupling.          # Get the other feature that participates in the coupling.
1689          my ($otherFeatureID) = $self->GetFlat(['ParticipatesInCoupling'],          my ($otherFeatureID) = $self->GetFlat(['ParticipatesInCoupling'],
1690                                             "ParticipatesInCoupling(to-link) = ? AND ParticipatesInCoupling(from-link) <> ?",                                             "ParticipatesInCoupling(to-link) = ? AND ParticipatesInCoupling(from-link) <> ?",
1691                                             [$couplingID, $featureID], 'ParticipatesInCoupling(from-link)');                                             [$couplingID, $featureID], 'ParticipatesInCoupling(from-link)');
1692            Trace("$couplingID target feature is $otherFeatureID.") if T(coupling => 4);
1693          # Attach the other feature's score to its ID.          # Attach the other feature's score to its ID.
1694          $retVal{$otherFeatureID} = $score;          $retVal{$otherFeatureID} = $score;
1695          $found = 1;          $found = 1;
# Line 2657  Line 2675 
2675      return $retVal;      return $retVal;
2676  }  }
2677    
2678    =head3 PropertyID
2679    
2680    C<< my $id = $sprout->PropertyID($propName, $propValue); >>
2681    
2682    Return the ID of the specified property name and value pair, if the
2683    pair exists.
2684    
2685    =over 4
2686    
2687    =item propName
2688    
2689    Name of the desired property.
2690    
2691    =item propValue
2692    
2693    Value expected for the desired property.
2694    
2695    =item RETURN
2696    
2697    Returns the ID of the name/value pair, or C<undef> if the pair does not exist.
2698    
2699    =back
2700    
2701    =cut
2702    
2703    sub PropertyID {
2704        # Get the parameters.
2705        my ($self, $propName, $propValue) = @_;
2706        # Try to find the ID.
2707        my ($retVal) = $self->GetFlat(['Property'],
2708                                      "Property(property-name) = ? AND Property(property-value) = ?",
2709                                      [$propName, $propValue], 'Property(id)');
2710        # Return the result.
2711        return $retVal;
2712    }
2713    
2714  =head3 MergedAnnotations  =head3 MergedAnnotations
2715    
2716  C<< my @annotationList = $sprout->MergedAnnotations(\@list); >>  C<< my @annotationList = $sprout->MergedAnnotations(\@list); >>
# Line 2854  Line 2908 
2908      # Get the parameters.      # Get the parameters.
2909      my ($self, $featureID) = @_;      my ($self, $featureID) = @_;
2910      # Get the list of names.      # Get the list of names.
2911      my @retVal = $self->GetFlat(['ContainsFeature', 'HasSSCell'], "ContainsFeature(to-link) = ?",      my @retVal = $self->GetFlat(['HasRoleInSubsystem'], "HasRoleInSubsystem(from-link) = ?",
2912                                  [$featureID], 'HasSSCell(from-link)');                                  [$featureID], 'HasRoleInSubsystem(to-link)');
2913        # Return the result, sorted.
2914        return sort @retVal;
2915    }
2916    
2917    =head3 GenomeSubsystemData
2918    
2919    C<< my %featureData = $sprout->GenomeSubsystemData($genomeID); >>
2920    
2921    Return a hash mapping genome features to their subsystem roles.
2922    
2923    =over 4
2924    
2925    =item genomeID
2926    
2927    ID of the genome whose subsystem feature map is desired.
2928    
2929    =item RETURN
2930    
2931    Returns a hash mapping each feature of the genome to a list of 2-tuples. Eacb
2932    2-tuple contains a subsystem name followed by a role ID.
2933    
2934    =back
2935    
2936    =cut
2937    
2938    sub GenomeSubsystemData {
2939        # Get the parameters.
2940        my ($self, $genomeID) = @_;
2941        # Declare the return variable.
2942        my %retVal = ();
2943        # Get a list of the genome features that participate in subsystems. For each
2944        # feature we get its spreadsheet cells and the corresponding roles.
2945        my @roleData = $self->GetAll(['HasFeature', 'ContainsFeature', 'IsRoleOf'],
2946                                 "HasFeature(from-link) = ?", [$genomeID],
2947                                 ['HasFeature(to-link)', 'IsRoleOf(to-link)', 'IsRoleOf(from-link)']);
2948        # Now we get a list of the spreadsheet cells and their associated subsystems. Subsystems
2949        # with an unknown variant code (-1) are skipped. Note the genome ID is at both ends of the
2950        # list. We use it at the beginning to get all the spreadsheet cells for the genome and
2951        # again at the end to filter out participation in subsystems with a negative variant code.
2952        my @cellData = $self->GetAll(['IsGenomeOf', 'HasSSCell', 'ParticipatesIn'],
2953                                     "IsGenomeOf(from-link) = ? AND ParticipatesIn(variant-code) >= 0 AND ParticipatesIn(from-link) = ?",
2954                                     [$genomeID, $genomeID], ['HasSSCell(to-link)', 'HasSSCell(from-link)']);
2955        # Now "@roleData" lists the spreadsheet cell and role for each of the genome's features.
2956        # "@cellData" lists the subsystem name for each of the genome's spreadsheet cells. We
2957        # link these two lists together to create the result. First, we want a hash mapping
2958        # spreadsheet cells to subsystem names.
2959        my %subHash = map { $_->[0] => $_->[1] } @cellData;
2960        # We loop through @cellData to build the hash.
2961        for my $roleEntry (@roleData) {
2962            # Get the data for this feature and cell.
2963            my ($fid, $cellID, $role) = @{$roleEntry};
2964            # Check for a subsystem name.
2965            my $subsys = $subHash{$cellID};
2966            if ($subsys) {
2967                # Insure this feature has an entry in the return hash.
2968                if (! exists $retVal{$fid}) { $retVal{$fid} = []; }
2969                # Merge in this new data.
2970                push @{$retVal{$fid}}, [$subsys, $role];
2971            }
2972        }
2973      # Return the result.      # Return the result.
2974      return @retVal;      return %retVal;
2975  }  }
2976    
2977  =head3 RelatedFeatures  =head3 RelatedFeatures
# Line 2895  Line 3009 
3009      # Get the parameters.      # Get the parameters.
3010      my ($self, $featureID, $function, $userID) = @_;      my ($self, $featureID, $function, $userID) = @_;
3011      # Get a list of the features that are BBHs of the incoming feature.      # Get a list of the features that are BBHs of the incoming feature.
3012      my @bbhFeatures = $self->GetFlat(['IsBidirectionalBestHitOf'],      my @bbhFeatures = map { $_->[0] } FIGRules::BBHData($featureID);
                                      "IsBidirectionalBestHitOf(from-link) = ?", [$featureID],  
                                      'IsBidirectionalBestHitOf(to-link)');  
3013      # Now we loop through the features, pulling out the ones that have the correct      # Now we loop through the features, pulling out the ones that have the correct
3014      # functional assignment.      # functional assignment.
3015      my @retVal = ();      my @retVal = ();
# Line 3033  Line 3145 
3145      # Loop through the input triples.      # Loop through the input triples.
3146      my $n = length $sequence;      my $n = length $sequence;
3147      for (my $i = 0; $i < $n; $i += 3) {      for (my $i = 0; $i < $n; $i += 3) {
3148          # Get the current triple from the sequence.          # Get the current triple from the sequence. Note we convert to
3149          my $triple = substr($sequence, $i, 3);          # upper case to insure a match.
3150            my $triple = uc substr($sequence, $i, 3);
3151          # Translate it using the table.          # Translate it using the table.
3152          my $protein = "X";          my $protein = "X";
3153          if (exists $table->{$triple}) { $protein = $table->{$triple}; }          if (exists $table->{$triple}) { $protein = $table->{$triple}; }
# Line 3067  Line 3180 
3180      return @retVal;      return @retVal;
3181  }  }
3182    
3183    =head3 BBHMatrix
3184    
3185    C<< my %bbhMap = $sprout->BBHMatrix($genomeID, $cutoff, @targets); >>
3186    
3187    Find all the bidirectional best hits for the features of a genome in a
3188    specified list of target genomes. The return value will be a hash mapping
3189    features in the original genome to their bidirectional best hits in the
3190    target genomes.
3191    
3192    =over 4
3193    
3194    =item genomeID
3195    
3196    ID of the genome whose features are to be examined for bidirectional best hits.
3197    
3198    =item cutoff
3199    
3200    A cutoff value. Only hits with a score lower than the cutoff will be returned.
3201    
3202    =item targets
3203    
3204    List of target genomes. Only pairs originating in the original
3205    genome and landing in one of the target genomes will be returned.
3206    
3207    =item RETURN
3208    
3209    Returns a hash mapping each feature in the original genome to a hash mapping its
3210    BBH pegs in the target genomes to their scores.
3211    
3212    =back
3213    
3214    =cut
3215    
3216    sub BBHMatrix {
3217        # Get the parameters.
3218        my ($self, $genomeID, $cutoff, @targets) = @_;
3219        # Declare the return variable.
3220        my %retVal = ();
3221        # Ask for the BBHs.
3222        my @bbhList = FIGRules::BatchBBHs("fig|$genomeID.%", $cutoff, @targets);
3223        # We now have a set of 4-tuples that we need to convert into a hash of hashes.
3224        for my $bbhData (@bbhList) {
3225            my ($peg1, $peg2, $score) = @{$bbhData};
3226            if (! exists $retVal{$peg1}) {
3227                $retVal{$peg1} = { $peg2 => $score };
3228            } else {
3229                $retVal{$peg1}->{$peg2} = $score;
3230            }
3231        }
3232        # Return the result.
3233        return %retVal;
3234    }
3235    
3236    
3237    =head3 SimMatrix
3238    
3239    C<< my %simMap = $sprout->SimMatrix($genomeID, $cutoff, @targets); >>
3240    
3241    Find all the similarities for the features of a genome in a
3242    specified list of target genomes. The return value will be a hash mapping
3243    features in the original genome to their similarites in the
3244    target genomes.
3245    
3246    =over 4
3247    
3248    =item genomeID
3249    
3250    ID of the genome whose features are to be examined for similarities.
3251    
3252    =item cutoff
3253    
3254    A cutoff value. Only hits with a score lower than the cutoff will be returned.
3255    
3256    =item targets
3257    
3258    List of target genomes. Only pairs originating in the original
3259    genome and landing in one of the target genomes will be returned.
3260    
3261    =item RETURN
3262    
3263    Returns a hash mapping each feature in the original genome to a hash mapping its
3264    similar pegs in the target genomes to their scores.
3265    
3266    =back
3267    
3268    =cut
3269    
3270    sub SimMatrix {
3271        # Get the parameters.
3272        my ($self, $genomeID, $cutoff, @targets) = @_;
3273        # Declare the return variable.
3274        my %retVal = ();
3275        # Get the list of features in the source organism.
3276        my @fids = $self->FeaturesOf($genomeID);
3277        # Ask for the sims. We only want similarities to fig features.
3278        my $simList = FIGRules::GetNetworkSims($self, \@fids, {}, 1000, $cutoff, "fig");
3279        if (! defined $simList) {
3280            Confess("Unable to retrieve similarities from server.");
3281        } else {
3282            Trace("Processing sims.") if T(3);
3283            # We now have a set of sims that we need to convert into a hash of hashes. First, we
3284            # Create a hash for the target genomes.
3285            my %targetHash = map { $_ => 1 } @targets;
3286            for my $simData (@{$simList}) {
3287                # Get the PEGs and the score.
3288                my ($peg1, $peg2, $score) = ($simData->id1, $simData->id2, $simData->psc);
3289                # Insure the second ID is in the target list.
3290                my ($genome2) = FIGRules::ParseFeatureID($peg2);
3291                if (exists $targetHash{$genome2}) {
3292                    # Here it is. Now we need to add it to the return hash. How we do that depends
3293                    # on whether or not $peg1 is new to us.
3294                    if (! exists $retVal{$peg1}) {
3295                        $retVal{$peg1} = { $peg2 => $score };
3296                    } else {
3297                        $retVal{$peg1}->{$peg2} = $score;
3298                    }
3299                }
3300            }
3301        }
3302        # Return the result.
3303        return %retVal;
3304    }
3305    
3306    
3307  =head3 LowBBHs  =head3 LowBBHs
3308    
3309  C<< my %bbhMap = $sprout->LowBBHs($featureID, $cutoff); >>  C<< my %bbhMap = $sprout->LowBBHs($featureID, $cutoff); >>
# Line 3098  Line 3335 
3335      my ($self, $featureID, $cutoff) = @_;      my ($self, $featureID, $cutoff) = @_;
3336      # Create the return hash.      # Create the return hash.
3337      my %retVal = ();      my %retVal = ();
3338      # Create a query to get the desired BBHs.      # Query for the desired BBHs.
3339      my @bbhList = $self->GetAll(['IsBidirectionalBestHitOf'],      my @bbhList = FIGRules::BBHData($featureID, $cutoff);
                                 'IsBidirectionalBestHitOf(sc) <= ? AND IsBidirectionalBestHitOf(from-link) = ?',  
                                 [$cutoff, $featureID],  
                                 ['IsBidirectionalBestHitOf(to-link)', 'IsBidirectionalBestHitOf(sc)']);  
3340      # Form the results into the return hash.      # Form the results into the return hash.
3341      for my $pair (@bbhList) {      for my $pair (@bbhList) {
3342          $retVal{$pair->[0]} = $pair->[1];          my $fid = $pair->[0];
3343            if ($self->Exists('Feature', $fid)) {
3344                $retVal{$fid} = $pair->[1];
3345            }
3346      }      }
3347      # Return the result.      # Return the result.
3348      return %retVal;      return %retVal;
# Line 3123  Line 3360 
3360  Similarities can be either raw or expanded. The raw similarities are basic  Similarities can be either raw or expanded. The raw similarities are basic
3361  hits between features with similar DNA. Expanding a raw similarity drags in any  hits between features with similar DNA. Expanding a raw similarity drags in any
3362  features considered substantially identical. So, for example, if features B<A1>,  features considered substantially identical. So, for example, if features B<A1>,
3363  B<A2>, and B<A3> are all substatially identical to B<A>, then a raw similarity  B<A2>, and B<A3> are all substantially identical to B<A>, then a raw similarity
3364  B<[C,A]> would be expanded to B<[C,A] [C,A1] [C,A2] [C,A3]>.  B<[C,A]> would be expanded to B<[C,A] [C,A1] [C,A2] [C,A3]>.
3365    
3366  =over 4  =over 4
# Line 3176  Line 3413 
3413      return $retVal;      return $retVal;
3414  }  }
3415    
3416    =head3 IsAllGenomes
3417    
3418    C<< my $flag = $sprout->IsAllGenomes(\@list, \@checkList); >>
3419    
3420    Return TRUE if all genomes in the second list are represented in the first list at
3421    least one. Otherwise, return FALSE. If the second list is omitted, the first list is
3422    compared to a list of all the genomes.
3423    
3424    =over 4
3425    
3426    =item list
3427    
3428    Reference to the list to be compared to the second list.
3429    
3430    =item checkList (optional)
3431    
3432    Reference to the comparison target list. Every genome ID in this list must occur at
3433    least once in the first list. If this parameter is omitted, a list of all the genomes
3434    is used.
3435    
3436    =item RETURN
3437    
3438    Returns TRUE if every item in the second list appears at least once in the
3439    first list, else FALSE.
3440    
3441    =back
3442    
3443    =cut
3444    
3445    sub IsAllGenomes {
3446        # Get the parameters.
3447        my ($self, $list, $checkList) = @_;
3448        # Supply the checklist if it was omitted.
3449        $checkList = [$self->Genomes()] if ! defined($checkList);
3450        # Create a hash of the original list.
3451        my %testList = map { $_ => 1 } @{$list};
3452        # Declare the return variable. We assume that the representation
3453        # is complete and stop at the first failure.
3454        my $retVal = 1;
3455        my $n = scalar @{$checkList};
3456        for (my $i = 0; $retVal && $i < $n; $i++) {
3457            if (! $testList{$checkList->[$i]}) {
3458                $retVal = 0;
3459            }
3460        }
3461        # Return the result.
3462        return $retVal;
3463    }
3464    
3465  =head3 GetGroups  =head3 GetGroups
3466    
3467  C<< my %groups = $sprout->GetGroups(\@groupList); >>  C<< my %groups = $sprout->GetGroups(\@groupList); >>
# Line 3197  Line 3483 
3483          # Here we have a group list. Loop through them individually,          # Here we have a group list. Loop through them individually,
3484          # getting a list of the relevant genomes.          # getting a list of the relevant genomes.
3485          for my $group (@{$groupList}) {          for my $group (@{$groupList}) {
3486              my @genomeIDs = $self->GetFlat(['Genome'], "Genome(group-name) = ?",              my @genomeIDs = $self->GetFlat(['Genome'], "Genome(primary-group) = ?",
3487                  [$group], "Genome(id)");                  [$group], "Genome(id)");
3488              $retVal{$group} = \@genomeIDs;              $retVal{$group} = \@genomeIDs;
3489          }          }
# Line 3205  Line 3491 
3491          # Here we need all of the groups. In this case, we run through all          # Here we need all of the groups. In this case, we run through all
3492          # of the genome records, putting each one found into the appropriate          # of the genome records, putting each one found into the appropriate
3493          # group. Note that we use a filter clause to insure that only genomes          # group. Note that we use a filter clause to insure that only genomes
3494          # in groups are included in the return set.          # in real NMPDR groups are included in the return set.
3495          my @genomes = $self->GetAll(['Genome'], "Genome(group-name) > ' '", [],          my @genomes = $self->GetAll(['Genome'], "Genome(primary-group) <> ?",
3496                                      ['Genome(id)', 'Genome(group-name)']);                                      [$FIG_Config::otherGroup], ['Genome(id)', 'Genome(primary-group)']);
3497          # Loop through the genomes found.          # Loop through the genomes found.
3498          for my $genome (@genomes) {          for my $genome (@genomes) {
3499              # Pop this genome's ID off the current list.              # Pop this genome's ID off the current list.
# Line 3325  Line 3611 
3611      # Get the parameters.      # Get the parameters.
3612      my ($self, $genomeID, $testFlag) = @_;      my ($self, $genomeID, $testFlag) = @_;
3613      # Perform the delete for the genome's features.      # Perform the delete for the genome's features.
3614      my $retVal = $self->Delete('Feature', "fig|$genomeID.%", $testFlag);      my $retVal = $self->Delete('Feature', "fig|$genomeID.%", testMode => $testFlag);
3615      # Perform the delete for the primary genome data.      # Perform the delete for the primary genome data.
3616      my $stats = $self->Delete('Genome', $genomeID, $testFlag);      my $stats = $self->Delete('Genome', $genomeID, testMode => $testFlag);
3617      $retVal->Accumulate($stats);      $retVal->Accumulate($stats);
3618      # Return the result.      # Return the result.
3619      return $retVal;      return $retVal;
3620  }  }
3621    
3622  =head2 Internal Utility Methods  =head3 Fix
3623    
3624  =head3 ParseAssignment  C<< my %fixedHash = Sprout::Fix(%groupHash); >>
3625    
3626  Parse annotation text to determine whether or not it is a functional assignment. If it is,  Prepare a genome group hash (like that returned by L</GetGroups> for processing.
3627  the user, function text, and assigning user will be returned as a 3-element list. If it  Groups with the same primary name will be combined. The primary name is the
3628  isn't, an empty list will be returned.  first capitalized word in the group name.
3629    
3630  A functional assignment is always of the form  =over 4
3631    
3632      C<set >I<YYYY>C< function to\n>I<ZZZZZ>  =item groupHash
3633    
3634  where I<YYYY> is the B<user>, and I<ZZZZ> is the actual functional role. In most cases,  Hash to be fixed up.
 the user and the assigning user (from MadeAnnotation) will be the same, but that is  
 not always the case.  
3635    
3636  In addition, the functional role may contain extra data that is stripped, such as  =item RETURN
 terminating spaces or a comment separated from the rest of the text by a tab.  
3637    
3638  This is a static method.  Returns a fixed-up version of the hash.
3639    
3640  =over 4  =back
3641    
3642  =item user  =cut
3643    
3644  Name of the assigning user.  sub Fix {
3645        # Get the parameters.
3646        my (%groupHash) = @_;
3647        # Create the result hash.
3648        my %retVal = ();
3649        # Copy over the genomes.
3650        for my $groupID (keys %groupHash) {
3651            # Make a safety copy of the group ID.
3652            my $realGroupID = $groupID;
3653            # Yank the primary name.
3654            if ($groupID =~ /([A-Z]\w+)/) {
3655                $realGroupID = $1;
3656            }
3657            # Append this group's genomes into the result hash.
3658            Tracer::AddToListMap(\%retVal, $realGroupID, @{$groupHash{$groupID}});
3659        }
3660        # Return the result hash.
3661        return %retVal;
3662    }
3663    
3664  =item text  =head3 GroupPageName
3665    
3666  Text of the annotation.  C<< my $name = $sprout->GroupPageName($group); >>
3667    
3668    Return the name of the page for the specified NMPDR group.
3669    
3670    =over 4
3671    
3672    =item group
3673    
3674    Name of the relevant group.
3675    
3676  =item RETURN  =item RETURN
3677    
3678  Returns an empty list if the annotation is not a functional assignment; otherwise, returns  Returns the relative page name (e.g. C<../content/campy.php>). If the group file is not in
3679  a two-element list containing the user name and the function text.  memory it will be read in.
3680    
3681  =back  =back
3682    
3683  =cut  =cut
3684    
3685  sub _ParseAssignment {  sub GroupPageName {
3686      # Get the parameters.      # Get the parameters.
3687      my ($user, $text) = @_;      my ($self, $group) = @_;
3688      # Declare the return value.      # Declare the return variable.
3689      my @retVal = ();      my $retVal;
3690      # Check to see if this is a functional assignment.      # Check for the group file data.
3691      my ($type, $function) = split(/\n/, $text);      if (! defined $self->{groupHash}) {
3692      if ($type =~ m/^set function to$/i) {          # Read the group file.
3693          # Here we have an assignment without a user, so we use the incoming user ID.          my %groupData = Sprout::ReadGroupFile($self->{_options}->{dataDir} . "/groups.tbl");
3694          @retVal = ($user, $function);          # Store it in our object.
3695      } elsif ($type =~ m/^set (\S+) function to$/i) {          $self->{groupHash} = \%groupData;
         # Here we have an assignment with a user that is passed back to the caller.  
         @retVal = ($1, $function);  
3696      }      }
3697      # If we have an assignment, we need to clean the function text. There may be      # Compute the real group name.
3698      # extra junk at the end added as a note from the user.      my $realGroup = $group;
3699      if (@retVal) {      if ($group =~ /([A-Z]\w+)/) {
3700          $retVal[1] =~ s/(\t\S)?\s*$//;          $realGroup = $1;
3701      }      }
3702      # Return the result list.      # Return the page name.
3703      return @retVal;      $retVal = "../content/" . $self->{groupHash}->{$realGroup}->[1];
3704        # Return the result.
3705        return $retVal;
3706  }  }
3707    
3708  =head3 FriendlyTimestamp  =head3 ReadGroupFile
3709    
3710  Convert a time number to a user-friendly time stamp for display.  C<< my %groupData = Sprout::ReadGroupFile($groupFileName); >>
3711    
3712  This is a static method.  Read in the data from the specified group file. The group file contains information
3713    about each of the NMPDR groups.
3714    
3715  =over 4  =over 4
3716    
3717  =item timeValue  =item name
3718    
3719  Numeric time value.  Name of the group.
3720    
3721    =item page
3722    
3723    Name of the group's page on the web site (e.g. C<campy.php> for
3724    Campylobacter)
3725    
3726    =item genus
3727    
3728    Genus of the group
3729    
3730    =item species
3731    
3732    Species of the group, or an empty string if the group is for an entire
3733    genus. If the group contains more than one species, the species names
3734    should be separated by commas.
3735    
3736    =back
3737    
3738    The parameters to this method are as follows
3739    
3740    =over 4
3741    
3742    =item groupFile
3743    
3744    Name of the file containing the group data.
3745    
3746  =item RETURN  =item RETURN
3747    
3748  Returns a string containing the same time in user-readable format.  Returns a hash keyed on group name. The value of each hash
3749    
3750  =back  =back
3751    
3752  =cut  =cut
3753    
3754  sub FriendlyTimestamp {  sub ReadGroupFile {
3755      my ($timeValue) = @_;      # Get the parameters.
3756      my $retVal = localtime($timeValue);      my ($groupFileName) = @_;
3757      return $retVal;      # Declare the return variable.
3758        my %retVal;
3759        # Read the group file.
3760        my @groupLines = Tracer::GetFile($groupFileName);
3761        for my $groupLine (@groupLines) {
3762            my ($name, $page, $genus, $species) = split(/\t/, $groupLine);
3763            $retVal{$name} = [$page, $genus, $species];
3764        }
3765        # Return the result.
3766        return %retVal;
3767  }  }
3768    
3769  =head3 AddProperty  =head3 AddProperty
# Line 3466  Line 3810 
3810      if (@properties) {      if (@properties) {
3811          # Here the property is already in the database. We save its ID.          # Here the property is already in the database. We save its ID.
3812          $propID = $properties[0];          $propID = $properties[0];
3813        } else {
3814          # Here the property value does not exist. We need to generate an ID. It will be set          # Here the property value does not exist. We need to generate an ID. It will be set
3815          # to a number one greater than the maximum value in the database. This call to          # to a number one greater than the maximum value in the database. This call to
3816          # GetAll will stop after one record.          # GetAll will stop after one record.
# Line 3479  Line 3824 
3824      $self->Insert('HasProperty', { 'from-link' => $featureID, 'to-link' => $propID, evidence => $url });      $self->Insert('HasProperty', { 'from-link' => $featureID, 'to-link' => $propID, evidence => $url });
3825  }  }
3826    
3827    =head2 Virtual Methods
3828    
3829    =head3 CleanKeywords
3830    
3831    C<< my $cleanedString = $sprout->CleanKeywords($searchExpression); >>
3832    
3833    Clean up a search expression or keyword list. This involves converting the periods
3834    in EC numbers to underscores, converting non-leading minus signs to underscores,
3835    a vertical bar or colon to an apostrophe, and forcing lower case for all alphabetic
3836    characters. In addition, any extra spaces are removed.
3837    
3838    =over 4
3839    
3840    =item searchExpression
3841    
3842    Search expression or keyword list to clean. Note that a search expression may
3843    contain boolean operators which need to be preserved. This includes leading
3844    minus signs.
3845    
3846    =item RETURN
3847    
3848    Cleaned expression or keyword list.
3849    
3850    =back
3851    
3852    =cut
3853    
3854    sub CleanKeywords {
3855        # Get the parameters.
3856        my ($self, $searchExpression) = @_;
3857        # Perform the standard cleanup.
3858        my $retVal = $self->ERDB::CleanKeywords($searchExpression);
3859        # Fix the periods in EC and TC numbers.
3860        $retVal =~ s/(\d+|\-)\.(\d+|-)\.(\d+|-)\.(\d+|-)/$1_$2_$3_$4/g;
3861        # Fix non-trailing periods.
3862        $retVal =~ s/\.(\w)/_$1/g;
3863        # Fix non-leading minus signs.
3864        $retVal =~ s/(\w)[\-]/$1_/g;
3865        # Fix the vertical bars and colons
3866        $retVal =~ s/(\w)[|:](\w)/$1'$2/g;
3867        # Return the result.
3868        return $retVal;
3869    }
3870    
3871    =head2 Internal Utility Methods
3872    
3873    =head3 ParseAssignment
3874    
3875    Parse annotation text to determine whether or not it is a functional assignment. If it is,
3876    the user, function text, and assigning user will be returned as a 3-element list. If it
3877    isn't, an empty list will be returned.
3878    
3879    A functional assignment is always of the form
3880    
3881        C<set >I<YYYY>C< function to\n>I<ZZZZZ>
3882    
3883    where I<YYYY> is the B<user>, and I<ZZZZ> is the actual functional role. In most cases,
3884    the user and the assigning user (from MadeAnnotation) will be the same, but that is
3885    not always the case.
3886    
3887    In addition, the functional role may contain extra data that is stripped, such as
3888    terminating spaces or a comment separated from the rest of the text by a tab.
3889    
3890    This is a static method.
3891    
3892    =over 4
3893    
3894    =item user
3895    
3896    Name of the assigning user.
3897    
3898    =item text
3899    
3900    Text of the annotation.
3901    
3902    =item RETURN
3903    
3904    Returns an empty list if the annotation is not a functional assignment; otherwise, returns
3905    a two-element list containing the user name and the function text.
3906    
3907    =back
3908    
3909    =cut
3910    
3911    sub _ParseAssignment {
3912        # Get the parameters.
3913        my ($user, $text) = @_;
3914        # Declare the return value.
3915        my @retVal = ();
3916        # Check to see if this is a functional assignment.
3917        my ($type, $function) = split(/\n/, $text);
3918        if ($type =~ m/^set function to$/i) {
3919            # Here we have an assignment without a user, so we use the incoming user ID.
3920            @retVal = ($user, $function);
3921        } elsif ($type =~ m/^set (\S+) function to$/i) {
3922            # Here we have an assignment with a user that is passed back to the caller.
3923            @retVal = ($1, $function);
3924        }
3925        # If we have an assignment, we need to clean the function text. There may be
3926        # extra junk at the end added as a note from the user.
3927        if (defined( $retVal[1] )) {
3928            $retVal[1] =~ s/(\t\S)?\s*$//;
3929        }
3930        # Return the result list.
3931        return @retVal;
3932    }
3933    
3934    =head3 FriendlyTimestamp
3935    
3936    Convert a time number to a user-friendly time stamp for display.
3937    
3938    This is a static method.
3939    
3940    =over 4
3941    
3942    =item timeValue
3943    
3944    Numeric time value.
3945    
3946    =item RETURN
3947    
3948    Returns a string containing the same time in user-readable format.
3949    
3950    =back
3951    
3952    =cut
3953    
3954    sub FriendlyTimestamp {
3955        my ($timeValue) = @_;
3956        my $retVal = localtime($timeValue);
3957        return $retVal;
3958    }
3959    
3960    
3961  1;  1;

Legend:
Removed from v.1.76  
changed lines
  Added in v.1.98

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3