[Bio] / Sprout / Sprout.pm Repository:
ViewVC logotype

Diff of /Sprout/Sprout.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.4, Tue Jan 25 01:36:09 2005 UTC revision 1.9, Mon Feb 7 00:32:07 2005 UTC
# Line 36  Line 36 
36    
37  #: Constructor SFXlate->new_sprout_only();  #: Constructor SFXlate->new_sprout_only();
38    
   
39  =head2 Public Methods  =head2 Public Methods
40    
41  =head3 new  =head3 new
# Line 424  Line 423 
423  =back  =back
424    
425  =cut  =cut
426  #: Return Type %;  #: Return Type $%;
427  sub LoadUpdate {  sub LoadUpdate {
428          # Get the parameters.          # Get the parameters.
429          my $self = shift @_;          my $self = shift @_;
# Line 853  Line 852 
852  Returns a three-element list. The first element is a list of feature IDs for the features that  Returns a three-element list. The first element is a list of feature IDs for the features that
853  overlap the region of interest. The second and third elements are the minimum and maximum  overlap the region of interest. The second and third elements are the minimum and maximum
854  locations of the features provided on the specified contig. These may extend outside  locations of the features provided on the specified contig. These may extend outside
855  the start and stop values.  the start and stop values. The first element (that is, the list of features) is sorted
856    roughly by location.
857    
858  =back  =back
859    
860  =cut  =cut
861  #: Return Type @;  #: Return Type @@;
862  sub GenesInRegion {  sub GenesInRegion {
863          # Get the parameters.          # Get the parameters.
864          my $self = shift @_;          my $self = shift @_;
# Line 866  Line 866 
866          # Get the maximum segment length.          # Get the maximum segment length.
867          my $maximumSegmentLength = $self->MaxSegment;          my $maximumSegmentLength = $self->MaxSegment;
868          # Create a hash to receive the feature list. We use a hash so that we can eliminate          # Create a hash to receive the feature list. We use a hash so that we can eliminate
869          # duplicates easily.          # duplicates easily. The hash key will be the feature ID. The value will be a two-element
870            # containing the minimum and maximum offsets. We will use the offsets to sort the results
871            # when we're building the result set.
872          my %featuresFound = ();          my %featuresFound = ();
873          # Prime the values we'll use for the returned beginning and end.          # Prime the values we'll use for the returned beginning and end.
874          my ($min, $max) = ($self->ContigLength($contigID), 0);          my @initialMinMax = ($self->ContigLength($contigID), 0);
875            my ($min, $max) = @initialMinMax;
876          # Create a table of parameters for each query. Each query looks for features travelling in          # Create a table of parameters for each query. Each query looks for features travelling in
877          # a particular direction. The query parameters include the contig ID, the feature direction,          # a particular direction. The query parameters include the contig ID, the feature direction,
878          # the lowest possible start position, and the highest possible start position. This works          # the lowest possible start position, and the highest possible start position. This works
# Line 899  Line 902 
902                                          $found = 1;                                          $found = 1;
903                                  }                                  }
904                          } elsif ($dir eq '-') {                          } elsif ($dir eq '-') {
905                                  $end = $beg - $len;                                  # Note we switch things around so that the beginning is to the left of the
906                                  if ($end <= $stop) {                                  # ending.
907                                    ($beg, $end) = ($beg - $len, $beg);
908                                    if ($beg <= $stop) {
909                                          # Denote we found a useful feature.                                          # Denote we found a useful feature.
910                                          $found = 1;                                          $found = 1;
911                                  }                                  }
912                          }                          }
913                          if ($found) {                          if ($found) {
914                                  # Here we need to record the feature and update the minimum and maximum.                                  # Here we need to record the feature and update the minima and maxima. First,
915                                  $featuresFound{$featureID} = 1;                                  # get the current entry for the specified feature.
916                                  if ($beg < $min) { $min = $beg; }                                  my ($loc1, $loc2) = (exists $featuresFound{$featureID} ? @{$featuresFound{$featureID}} :
917                                  if ($end < $min) { $min = $end; }                                                                           @initialMinMax);
918                                  if ($beg > $max) { $max = $beg; }                                  # Merge the current segment's begin and end into the feature begin and end and the
919                                  if ($end > $max) { $max = $end; }                                  # global min and max.
920                                    if ($beg < $loc1) {
921                                            $loc1 = $beg;
922                                            $min = $beg if $beg < $min;
923                                    }
924                                    if ($end > $loc2) {
925                                            $loc2 = $end;
926                                            $max = $end if $end > $max;
927                                    }
928                                    # Store the entry back into the hash table.
929                                    $featuresFound{$featureID} = [$loc1, $loc2];
930                          }                          }
931                  }                  }
932          }          }
933          # Compute a list of the IDs for the features found.          # Now we must compute the list of the IDs for the features found. We start with a list
934          my @list = (sort (keys %featuresFound));          # of midpoints / feature ID pairs. (It's not really a midpoint, it's twice the midpoint,
935            # but the result of the sort will be the same.)
936            my @list = map { [$featuresFound{$_}->[0] + $featuresFound{$_}->[1], $_] } keys %featuresFound;
937            # Now we sort by midpoint and yank out the feature IDs.
938            my @retVal = map { $_->[1] } sort { $a->[0] <=> $b->[0] } @list;
939          # Return it along with the min and max.          # Return it along with the min and max.
940          return (\@list, $min, $max);          return (\@retVal, $min, $max);
941  }  }
942    
943  =head3 FType  =head3 FType
# Line 1016  Line 1035 
1035  It has the format "XXXX\nset XXXX function to\nYYYYY". In this instance, XXXX is the user ID  It has the format "XXXX\nset XXXX function to\nYYYYY". In this instance, XXXX is the user ID
1036  and YYYYY is the functional assignment text. Its worth noting that we cannot filter on the content  and YYYYY is the functional assignment text. Its worth noting that we cannot filter on the content
1037  of the annotation itself because it's a text field; however, this is not a big problem because most  of the annotation itself because it's a text field; however, this is not a big problem because most
1038  features only have a small number of annotations.  features only have a small number of annotations. Finally, if a single user has multiple
1039    functional assignments, we will only keep the most recent one.
1040    
1041  =over 4  =over 4
1042    
# Line 1037  Line 1057 
1057          my $self = shift @_;          my $self = shift @_;
1058          my ($featureID) = @_;          my ($featureID) = @_;
1059          # Get all of the feature's annotations.          # Get all of the feature's annotations.
1060          my @query = $self->GetFlat(['IsTargetOfAnnotation', 'Annotation'],      my @query = $self->GetAll(['IsTargetOfAnnotation', 'Annotation'],
1061                                                      "IsTargetOfAnnotation(from-link) = ?",                                                      "IsTargetOfAnnotation(from-link) = ?",
1062                                                          [$featureID], 'Annotation(annotation)');                              [$featureID], ['Annotation(time)', 'Annotation(annotation)']);
1063          # Declare the return hash.          # Declare the return hash.
1064          my %retVal;          my %retVal;
1065        # Declare a hash for insuring we only make one assignment per user.
1066        my %timeHash = ();
1067        # Now we sort the assignments by timestamp in reverse.
1068        my @sortedQuery = sort { -($a->[0] <=> $b->[0]) } @query;
1069          # Loop until we run out of annotations.          # Loop until we run out of annotations.
1070          for my $text (@query) {      for my $annotation (@sortedQuery) {
1071            # Get the annotation fields.
1072            my ($timeStamp, $text) = @{$annotation};
1073                  # Check to see if this is a functional assignment.                  # Check to see if this is a functional assignment.
1074                  my ($user, $function) = ParseAssignment($text);                  my ($user, $function) = ParseAssignment($text);
1075                  if ($user) {          if ($user && ! exists $timeHash{$user}) {
1076                          # Here it is, so stuff it in the return hash.              # Here it is a functional assignment and there has been no
1077                # previous assignment for this user, so we stuff it in the
1078                # return hash.
1079                          $retVal{$function} = $user;                          $retVal{$function} = $user;
1080                # Insure we don't assign to this user again.
1081                $timeHash{$user} = 1;
1082                  }                  }
1083          }          }
1084          # Return the hash of assignments found.          # Return the hash of assignments found.
# Line 2539  Line 2569 
2569          return %retVal;          return %retVal;
2570  }  }
2571    
2572    =head3 GetGroups
2573    
2574    C<< my %groups = $sprout->GetGroups(\@groupList); >>
2575    
2576    Return a hash mapping each group to the IDs of the genomes in the group.
2577    A list of groups may be specified, in which case only those groups will be
2578    shown. Alternatively, if no parameter is supplied, all groups will be
2579    included. Genomes that are not in any group are omitted.
2580    
2581    =cut
2582    #: Return Type %@;
2583    sub GetGroups {
2584        # Get the parameters.
2585        my $self = shift @_;
2586        my ($groupList) = @_;
2587        # Declare the return value.
2588        my %retVal = ();
2589        # Determine whether we are getting all the groups or just some.
2590        if (defined $groupList) {
2591            # Here we have a group list. Loop through them individually,
2592            # getting a list of the relevant genomes.
2593            for my $group (@{$groupList}) {
2594                my @genomeIDs = $self->GetFlat(['Genome'], "Genome(group-name) = ?",
2595                    [$group], "Genome(id)");
2596                $retVal{$group} = \@genomeIDs;
2597            }
2598        } else {
2599            # Here we need all of the groups. In this case, we run through all
2600            # of the genome records, putting each one found into the appropriate
2601            # group. Note that we use a filter clause to insure that only genomes
2602            # in groups are included in the return set.
2603            my @genomes = $self->GetAll(['Genome'], "Genome(group-name) > ' '", [],
2604                                        ['Genome(id)', 'Genome(group-name)']);
2605            # Loop through the genomes found.
2606            for my $genome (@genomes) {
2607                # Pop this genome's ID off the current list.
2608                my @groups = @{$genome};
2609                my $genomeID = shift @groups;
2610                # Loop through the groups, adding the genome ID to each group's
2611                # list.
2612                for my $group (@groups) {
2613                    if (exists $retVal{$group}) {
2614                        push @{$retVal{$group}}, $genomeID;
2615                    } else {
2616                        $retVal{$group} = [$genomeID];
2617                    }
2618                }
2619            }
2620        }
2621        # Return the hash we just built.
2622        return %retVal;
2623    }
2624    
2625  =head2 Internal Utility Methods  =head2 Internal Utility Methods
2626    
2627  =head3 ParseAssignment  =head3 ParseAssignment

Legend:
Removed from v.1.4  
changed lines
  Added in v.1.9

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3