[Bio] / Sprout / Sprout.pm Repository:
ViewVC logotype

Diff of /Sprout/Sprout.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.99, Fri Apr 27 22:21:46 2007 UTC revision 1.116, Tue Sep 9 21:02:10 2008 UTC
# Line 1  Line 1 
1  package Sprout;  package Sprout;
2    
     require Exporter;  
     use ERDB;  
     @ISA = qw(Exporter ERDB);  
3      use Data::Dumper;      use Data::Dumper;
4      use strict;      use strict;
5      use DBKernel;      use DBKernel;
# Line 17  Line 14 
14      use BasicLocation;      use BasicLocation;
15      use CustomAttributes;      use CustomAttributes;
16      use RemoteCustomAttributes;      use RemoteCustomAttributes;
17        use CGI;
18        use WikiTools;
19        use base qw(ERDB);
20    
21  =head1 Sprout Database Manipulation Object  =head1 Sprout Database Manipulation Object
22    
# Line 29  Line 29 
29  on the constructor. For example, the following invocation specifies a PostgreSQL database named I<GenDB>  on the constructor. For example, the following invocation specifies a PostgreSQL database named I<GenDB>
30  whose definition and data files are in a co-directory named F<Data>.  whose definition and data files are in a co-directory named F<Data>.
31    
32  C<< my $sprout = Sprout->new('GenDB', { dbType => 'pg', dataDir => '../Data', xmlFileName => '../Data/SproutDBD.xml' }); >>      my $sprout = Sprout->new('GenDB', { dbType => 'pg', dataDir => '../Data', xmlFileName => '../Data/SproutDBD.xml' });
33    
34  Once you have a sprout object, you may use it to re-create the database, load the tables from  Once you have a sprout object, you may use it to re-create the database, load the tables from
35  tab-delimited flat files and perform queries. Several special methods are provided for common  tab-delimited flat files and perform queries. Several special methods are provided for common
36  query tasks. For example, L</genomes> lists the IDs of all the genomes in the database and  query tasks. For example, L</Genomes> lists the IDs of all the genomes in the database and
37  L</dna_seq> returns the DNA sequence for a specified genome location.  L</DNASeq> returns the DNA sequence for a specified genome location.
38    
39  The Sprout object is a subclass of the ERDB object and inherits all its properties and methods.  The Sprout object is a subclass of the ERDB object and inherits all its properties and methods.
40    
# Line 46  Line 46 
46    
47  =head3 new  =head3 new
48    
49  C<< my $sprout = Sprout->new($dbName, \%options); >>      my $sprout = Sprout->new($dbName, \%options);
50    
51  This is the constructor for a sprout object. It connects to the database and loads the  This is the constructor for a sprout object. It connects to the database and loads the
52  database definition into memory. The positional first parameter specifies the name of the  database definition into memory. The positional first parameter specifies the name of the
# Line 80  Line 80 
80    
81  * B<noDBOpen> suppresses the connection to the database if TRUE, else FALSE  * B<noDBOpen> suppresses the connection to the database if TRUE, else FALSE
82    
83    * B<host> name of the database host
84    
85  =back  =back
86    
87  For example, the following constructor call specifies a database named I<Sprout> and a user name of  For example, the following constructor call specifies a database named I<Sprout> and a user name of
88  I<fig> with a password of I<admin>. The database load files are in the directory  I<fig> with a password of I<admin>. The database load files are in the directory
89  F</usr/fig/SproutData>.  F</usr/fig/SproutData>.
90    
91  C<< my $sprout = Sprout->new('Sprout', { userData =>; 'fig/admin', dataDir => '/usr/fig/SproutData' }); >>      my $sprout = Sprout->new('Sprout', { userData => 'fig/admin', dataDir => '/usr/fig/SproutData' });
92    
93  =cut  =cut
94    
# Line 105  Line 107 
107                                                          # data file directory                                                          # data file directory
108                         xmlFileName  => "$dbd_dir/SproutDBD.xml",                         xmlFileName  => "$dbd_dir/SproutDBD.xml",
109                                                          # database definition file name                                                          # database definition file name
110                         userData     => "$FIG_Config::dbuser/$FIG_Config::dbpass",                         userData     => "$FIG_Config::sproutUser/$FIG_Config::sproutPass",
111                                                          # user name and password                                                          # user name and password
112                         port         => $FIG_Config::dbport,                         port         => $FIG_Config::sproutPort,
113                                                          # database connection port                                                          # database connection port
114                         sock         => $FIG_Config::dbsock,                         sock         => $FIG_Config::sproutSock,
115                         host         => $FIG_Config::dbhost,                         host         => $FIG_Config::sprout_host,
116                         maxSegmentLength => 4500,        # maximum feature segment length                         maxSegmentLength => 4500,        # maximum feature segment length
117                         maxSequenceLength => 8000,       # maximum contig sequence length                         maxSequenceLength => 8000,       # maximum contig sequence length
118                         noDBOpen     => 0,               # 1 to suppress the database open                         noDBOpen     => 0,               # 1 to suppress the database open
# Line 123  Line 125 
125      # Connect to the database.      # Connect to the database.
126      my $dbh;      my $dbh;
127      if (! $optionTable->{noDBOpen}) {      if (! $optionTable->{noDBOpen}) {
128            Trace("Connect data: host = $optionTable->{host}, port = $optionTable->{port}.") if T(3);
129          $dbh = DBKernel->new($optionTable->{dbType}, $dbName, $userName,          $dbh = DBKernel->new($optionTable->{dbType}, $dbName, $userName,
130                                  $password, $optionTable->{port}, $optionTable->{host}, $optionTable->{sock});                                  $password, $optionTable->{port}, $optionTable->{host}, $optionTable->{sock});
131      }      }
# Line 134  Line 137 
137      $retVal->{_xmlName} = $xmlFileName;      $retVal->{_xmlName} = $xmlFileName;
138      # Set up space for the group file data.      # Set up space for the group file data.
139      $retVal->{groupHash} = undef;      $retVal->{groupHash} = undef;
140        # Set up space for the genome hash. We use this to identify NMPDR genomes.
141        $retVal->{genomeHash} = undef;
142      # Connect to the attributes.      # Connect to the attributes.
143      if ($FIG_Config::attrURL) {      if ($FIG_Config::attrURL) {
144          Trace("Remote attribute server $FIG_Config::attrURL chosen.") if T(3);          Trace("Remote attribute server $FIG_Config::attrURL chosen.") if T(3);
# Line 147  Line 152 
152      return $retVal;      return $retVal;
153  }  }
154    
155    =head3 CoreGenomes
156    
157        my @genomes = $sprout->CoreGenomes($scope);
158    
159    Return the IDs of NMPDR genomes in the specified scope.
160    
161    =over 4
162    
163    =item scope
164    
165    Scope of the desired genomes. C<core> covers the original core genomes,
166    C<nmpdr> covers all genomes in NMPDR groups, and C<all> covers all
167    genomes in the system.
168    
169    =item RETURN
170    
171    Returns a list of the IDs for the genomes in the specified scope.
172    
173    =back
174    
175    =cut
176    
177    sub CoreGenomes {
178        # Get the parameters.
179        my ($self, $scope) = @_;
180        # Declare the return variable.
181        my @retVal = ();
182        # If we want all genomes, then this is easy.
183        if ($scope eq 'all') {
184            @retVal = $self->Genomes();
185        } else {
186            # Here we're dealing with groups. Get the hash of all the
187            # genome groups.
188            my %groups = $self->GetGroups();
189            # Loop through the groups, keeping the ones that we want.
190            for my $group (keys %groups) {
191                # Decide if we want to keep this group.
192                my $keepGroup = 0;
193                if ($scope eq 'nmpdr') {
194                    # NMPDR mode: keep all groups.
195                    $keepGroup = 1;
196                } elsif ($scope eq 'core') {
197                    # CORE mode. Only keep real core groups.
198                    if (grep { $group =~ /$_/ } @{$FIG_Config::realCoreGroups}) {
199                        $keepGroup = 1;
200                    }
201                }
202                # Add this group if we're keeping it.
203                if ($keepGroup) {
204                    push @retVal, @{$groups{$group}};
205                }
206            }
207        }
208        # Return the result.
209        return @retVal;
210    }
211    
212    =head3 SuperGroup
213    
214        my $superGroup = $sprout->SuperGroup($groupName);
215    
216    Return the name of the super-group containing the specified NMPDR genome
217    group. If no appropriate super-group can be found, an error will be
218    thrown.
219    
220    =over 4
221    
222    =item groupName
223    
224    Name of the group whose super-group is desired.
225    
226    =item RETURN
227    
228    Returns the name of the super-group containing the incoming group.
229    
230    =back
231    
232    =cut
233    
234    sub SuperGroup {
235        # Get the parameters.
236        my ($self, $groupName) = @_;
237        # Declare the return variable.
238        my $retVal;
239        # Get the group hash.
240        my %groupHash = $self->CheckGroupFile();
241        # Find the super-group genus.
242        $groupName =~ /([A-Z]\w+)/;
243        my $nameThing = $1;
244        # See if it's directly in the group hash.
245        if (exists $groupHash{$nameThing}) {
246            # Yes, then it's our result.
247            $retVal = $nameThing;
248        } else {
249            # No, so we have to search.
250            for my $superGroup (keys %groupHash) {
251                # Get this super-group's item list.
252                my $list = $groupHash{$superGroup}->{contents};
253                # Search it.
254                if (grep { $_->[0] eq $nameThing } @{$list}) {
255                    $retVal = $superGroup;
256                }
257            }
258            # Make sure we found something.
259            if (! $retVal) {
260                Confess("No super-group found for \"$groupName\".");
261            }
262        }
263        # Return the result.
264        return $retVal;
265    }
266    
267  =head3 MaxSegment  =head3 MaxSegment
268    
269  C<< my $length = $sprout->MaxSegment(); >>      my $length = $sprout->MaxSegment();
270    
271  This method returns the maximum permissible length of a feature segment. The length is important  This method returns the maximum permissible length of a feature segment. The length is important
272  because it enables us to make reasonable guesses at how to find features inside a particular  because it enables us to make reasonable guesses at how to find features inside a particular
# Line 166  Line 283 
283    
284  =head3 MaxSequence  =head3 MaxSequence
285    
286  C<< my $length = $sprout->MaxSequence(); >>      my $length = $sprout->MaxSequence();
287    
288  This method returns the maximum permissible length of a contig sequence. A contig is broken  This method returns the maximum permissible length of a contig sequence. A contig is broken
289  into sequences in order to save memory resources. In particular, when manipulating features,  into sequences in order to save memory resources. In particular, when manipulating features,
# Line 181  Line 298 
298    
299  =head3 Load  =head3 Load
300    
301  C<< $sprout->Load($rebuild); >>;      $sprout->Load($rebuild);;
302    
303  Load the database from files in the data directory, optionally re-creating the tables.  Load the database from files in the data directory, optionally re-creating the tables.
304    
# Line 221  Line 338 
338    
339  =head3 LoadUpdate  =head3 LoadUpdate
340    
341  C<< my $stats = $sprout->LoadUpdate($truncateFlag, \@tableList); >>      my $stats = $sprout->LoadUpdate($truncateFlag, \@tableList);
342    
343  Load updates to one or more database tables. This method enables the client to make changes to one  Load updates to one or more database tables. This method enables the client to make changes to one
344  or two tables without reloading the whole database. For each table, there must be a corresponding  or two tables without reloading the whole database. For each table, there must be a corresponding
# Line 267  Line 384 
384              Trace("No load file found for $tableName in $dataDir.") if T(0);              Trace("No load file found for $tableName in $dataDir.") if T(0);
385          } else {          } else {
386              # Attempt to load this table.              # Attempt to load this table.
387              my $result = $self->LoadTable($fileName, $tableName, $truncateFlag);              my $result = $self->LoadTable($fileName, $tableName, truncate => $truncateFlag);
388              # Accumulate the resulting statistics.              # Accumulate the resulting statistics.
389              $retVal->Accumulate($result);              $retVal->Accumulate($result);
390          }          }
# Line 278  Line 395 
395    
396  =head3 GenomeCounts  =head3 GenomeCounts
397    
398  C<< my ($arch, $bact, $euk, $vir, $env, $unk) = $sprout->GenomeCounts($complete); >>      my ($arch, $bact, $euk, $vir, $env, $unk) = $sprout->GenomeCounts($complete);
399    
400  Count the number of genomes in each domain. If I<$complete> is TRUE, only complete  Count the number of genomes in each domain. If I<$complete> is TRUE, only complete
401  genomes will be included in the counts.  genomes will be included in the counts.
# Line 323  Line 440 
440    
441  =head3 ContigCount  =head3 ContigCount
442    
443  C<< my $count = $sprout->ContigCount($genomeID); >>      my $count = $sprout->ContigCount($genomeID);
444    
445  Return the number of contigs for the specified genome ID.  Return the number of contigs for the specified genome ID.
446    
# Line 350  Line 467 
467      return $retVal;      return $retVal;
468  }  }
469    
470  =head3 GeneMenu  =head3 GenomeMenu
471    
472  C<< my $selectHtml = $sprout->GeneMenu(\%attributes, $filterString, \@params, $selected); >>      my $html = $sprout->GenomeMenu(%options);
473    
474  Return an HTML select menu of genomes. Each genome will be an option in the menu,  Generate a genome selection control with the specified name and options.
475  and will be displayed by name with the ID and a contig count attached. The selection  This control is almost but not quite the same as the genome control in the
476  value will be the genome ID. The genomes will be sorted by genus/species name.  B<SearchHelper> class. Eventually, the two will be combined.
477    
478  =over 4  =over 4
479    
480  =item attributes  =item options
481    
482    Optional parameters for the control (see below).
483    
484    =item RETURN
485    
486    Returns the HTML for a genome selection control on a form (sometimes called a popup menu).
487    
488    =back
489    
490    The valid options are as follows.
491    
492    =over 4
493    
494    =item name
495    
496    Name to give this control for use in passing it to the form. The default is C<myGenomeControl>.
497    Terrible things will happen if you have two controls with the same name on the same page.
498    
499  Reference to a hash mapping attributes to values for the SELECT tag generated.  =item filter
500    
501  =item filterString  If specified, a filter for the list of genomes to display. The filter should be in the form of a
502    list reference. The first element of the list should be the filter string, and the remaining elements
503    the filter parameters.
504    
505    =item multiSelect
506    
507    If TRUE, then the user can select multiple genomes. If FALSE, the user can only select one genome.
508    
509    =item size
510    
511    Number of rows to display in the control. The default is C<10>
512    
513    =item id
514    
515    ID to give this control. The default is the value of the C<name> option. Nothing will work correctly
516    unless this ID is unique.
517    
518    =item selected
519    
520    A comma-delimited list of selected genomes, or a reference to a list of selected genomes. The
521    default is none.
522    
523    =item class
524    
525    If specified, a style class to assign to the genome control.
526    
527    =back
528    
529    =cut
530    
531    sub GenomeMenu {
532        # Get the parameters.
533        my ($self, %options) = @_;
534        # Get the control's name and ID.
535        my $menuName = $options{name} || $options{id} || 'myGenomeControl';
536        my $menuID = $options{id} || $menuName;
537        Trace("Genome menu name = $menuName with ID $menuID.") if T(3);
538        # Compute the IDs for the status display.
539        my $divID = "${menuID}_status";
540        my $urlID = "${menuID}_url";
541        # Compute the code to show selected genomes in the status area.
542        my $showSelect = "showSelected('$menuID', '$divID', '$urlID', 1000)";
543        # Check for single-select or multi-select.
544        my $multiSelect = $options{multiSelect} || 0;
545        # Get the style data.
546        my $class = $options{class} || '';
547        # Get the list of pre-selected items.
548        my $selections = $options{selected} || [];
549        if (ref $selections ne 'ARRAY') {
550            $selections = [ split /\s*,\s*/, $selections ];
551        }
552        my %selected = map { $_ => 1 } @{$selections};
553        # Extract the filter information. The default is no filtering. It can be passed as a tab-delimited
554        # string or a list reference.
555        my $filterParms = $options{filter} || "";
556        if (! ref $filterParms) {
557            $filterParms = [split /\t|\\t/, $filterParms];
558        }
559        my $filterString = shift @{$filterParms};
560        # Get a list of all the genomes in group order. In fact, we only need them ordered
561        # by name (genus,species,strain), but putting primary-group in front enables us to
562        # take advantage of an existing index.
563        my @genomeList = $self->GetAll(['Genome'], "$filterString ORDER BY Genome(primary-group), Genome(genus), Genome(species), Genome(unique-characterization)",
564                                       $filterParms,
565                                       [qw(Genome(primary-group) Genome(id) Genome(genus) Genome(species) Genome(unique-characterization) Genome(taxonomy) Genome(contigs))]);
566        # Create a hash to organize the genomes by group. Each group will contain a list of
567        # 2-tuples, the first element being the genome ID and the second being the genome
568        # name.
569        my %gHash = ();
570        for my $genome (@genomeList) {
571            # Get the genome data.
572            my ($group, $genomeID, $genus, $species, $strain, $taxonomy, $contigs) = @{$genome};
573            # Compute its name. This is the genus, species, strain (if any), and the contig count.
574            my $name = "$genus $species ";
575            $name .= "$strain " if $strain;
576            my $contigCount = ($contigs == 1 ? "" : ", $contigs contigs");
577            # Now we get the domain. The domain tells us the display style of the organism.
578            my ($domain) = split /\s*;\s*/, $taxonomy, 2;
579            # Now compute the display group. This is normally the primary group, but if the
580            # organism is supporting, we blank it out.
581            my $displayGroup = ($group eq $FIG_Config::otherGroup ? "" : $group);
582            # Push the genome into the group's list. Note that we use the real group
583            # name for the hash key here, not the display group name.
584            push @{$gHash{$group}}, [$genomeID, $name, $contigCount, $domain];
585        }
586        # We are almost ready to unroll the menu out of the group hash. The final step is to separate
587        # the supporting genomes by domain. First, we extract the NMPDR groups and sort them. They
588        # are sorted by the first capitalized word. Groups with "other" are sorted after groups
589        # that aren't "other". At some point, we will want to make this less complicated.
590        my %sortGroups = map { $_ =~ /(other)?(.*)([A-Z].+)/; "$3$1$2" => $_ }
591                             grep { $_ ne $FIG_Config::otherGroup } keys %gHash;
592        my @groups = map { $sortGroups{$_} } sort keys %sortGroups;
593        # Remember the number of NMPDR groups.
594        my $nmpdrGroupCount = scalar @groups;
595        # Loop through the supporting genomes, classifying them by domain. We'll also keep a list
596        # of the domains found.
597        my @otherGenomes = @{$gHash{$FIG_Config::otherGroup}};
598        my @domains = ();
599        for my $genomeData (@otherGenomes) {
600            my ($genomeID, $name, $contigCount, $domain) = @{$genomeData};
601            if (exists $gHash{$domain}) {
602                push @{$gHash{$domain}}, $genomeData;
603            } else {
604                $gHash{$domain} = [$genomeData];
605                push @domains, $domain;
606            }
607        }
608        # Add the domain groups at the end of the main group list. The main group list will now
609        # contain all the categories we need to display the genomes.
610        push @groups, sort @domains;
611        # Delete the supporting group.
612        delete $gHash{$FIG_Config::otherGroup};
613        # Now it gets complicated. We need a way to mark all the NMPDR genomes. We take advantage
614        # of the fact they come first in the list. We'll accumulate a count of the NMPDR genomes
615        # and use that to make the selections.
616        my $nmpdrCount = 0;
617        # Create the type counters.
618        my $groupCount = 1;
619        # Get the number of rows to display.
620        my $rows = $options{size} || 10;
621        # If we're multi-row, create an onChange event.
622        my $onChangeTag = ( $rows > 1 ? " onChange=\"$showSelect;\" onFocus=\"$showSelect;\"" : "" );
623        # Set up the multiple-select flag.
624        my $multipleTag = ($multiSelect ? " multiple" : "" );
625        # Set up the style class.
626        my $classTag = ($class ? " class=\"$class\"" : "" );
627        # Create the SELECT tag and stuff it into the output array.
628        my @lines = ("<SELECT name=\"$menuName\" id=\"$menuID\" $onChangeTag$multipleTag$classTag size=\"$rows\">");
629        # Loop through the groups.
630        for my $group (@groups) {
631            # Get the genomes in the group.
632            for my $genome (@{$gHash{$group}}) {
633                # If this is an NMPDR organism, we add an extra style and count it.
634                my $nmpdrStyle = "";
635                if ($nmpdrGroupCount > 0) {
636                    $nmpdrCount++;
637                    $nmpdrStyle = " Core";
638                }
639                # Get the organism ID, name, contig count, and domain.
640                my ($genomeID, $name, $contigCount, $domain) = @{$genome};
641                # See if we're pre-selected.
642                my $selectTag = ($selected{$genomeID} ? " SELECTED" : "");
643                # Compute the display name.
644                my $nameString = "$name ($genomeID$contigCount)";
645                # Generate the option tag.
646                my $optionTag = "<OPTION class=\"$domain$nmpdrStyle\" title=\"$group\" value=\"$genomeID\"$selectTag>$nameString</OPTION>";
647                push @lines, "    $optionTag";
648            }
649            # Record this group in the nmpdrGroup count. When that gets to 0, we've finished the NMPDR
650            # groups.
651            $nmpdrGroupCount--;
652        }
653        # Close the SELECT tag.
654        push @lines, "</SELECT>";
655        if ($rows > 1) {
656            # We're in a non-compact mode, so we need to add some selection helpers. First is
657            # the search box. This allows the user to type text and change which genomes are
658            # displayed. For multiple-select mode, we include a button that selects the displayed
659            # genes. For single-select mode, we use a plain label instead.
660            my $searchThingName = "${menuID}_SearchThing";
661            my $searchThingLabel = ($multiSelect ? "<INPUT type=\"button\" name=\"MacroSearch\" class=\"button\" value=\"Select genomes containing\" onClick=\"selectShowing('$menuID', '$searchThingName'); $showSelect;\" />"
662                                                 : "Show genomes containing");
663            push @lines, "<br />$searchThingLabel&nbsp;" .
664                         "<INPUT type=\"text\" id=\"$searchThingName\" name=\"$searchThingName\" size=\"30\" onKeyup=\"showTyped('$menuID', '$searchThingName');\" />" .
665                         Hint("GenomeControl", "Type here to filter the genomes displayed.") . "<br />";
666            # For multi-select mode, we also have buttons to set and clear selections.
667            if ($multiSelect) {
668                push @lines, "<INPUT type=\"button\" name=\"ClearAll\" class=\"bigButton\"  value=\"Clear All\" onClick=\"clearAll('$menuID'); $showSelect\" />";
669                push @lines, "<INPUT type=\"button\" name=\"SelectAll\" class=\"bigButton\" value=\"Select All\" onClick=\"selectAll('$menuID'); $showSelect\" />";
670                push @lines, "<INPUT type=\"button\" name=\"NMPDROnly\" class=\"bigButton\"  value=\"Select NMPDR\" onClick=\"selectSome('$menuID', $nmpdrCount, true); $showSelect;\" />";
671            }
672            # Add a hidden field we can use to generate organism page hyperlinks.
673            push @lines, "<INPUT type=\"hidden\" id=\"$urlID\" value=\"$FIG_Config::cgi_url/wiki/rest.cgi/NmpdrPlugin/SeedViewer?page=Organism;organism=\" />";
674            # Add the status display. This tells the user what's selected no matter where the list is scrolled.
675            push @lines, "<DIV id=\"$divID\" class=\"Panel\"></DIV>";
676        }
677        # Assemble all the lines into a string.
678        my $retVal = join("\n", @lines, "");
679        # Return the result.
680        return $retVal;
681    }
682    
 A filter string for use in selecting the genomes. The filter string must conform  
 to the rules for the C<< ERDB->Get >> method.  
683    
684  =item params  =head3 Stem
685    
686  Reference to a list of values to be substituted in for the parameter marks in      my $stem = $sprout->Stem($word);
 the filter string.  
687    
688  =item selected (optional)  Return the stem of the specified word, or C<undef> if the word is not
689    stemmable. Note that even if the word is stemmable, the stem may be
690    the same as the original word.
691    
692  ID of the genome to be initially selected.  =over 4
693    
694  =item fast (optional)  =item word
695    
696  If specified and TRUE, the contig counts will be omitted to improve performance.  Word to convert into a stem.
697    
698  =item RETURN  =item RETURN
699    
700  Returns an HTML select menu with the specified genomes as selectable options.  Returns a stem of the word (which may be the word itself), or C<undef> if
701    the word is not stemmable.
702    
703  =back  =back
704    
705  =cut  =cut
706    
707  sub GeneMenu {  sub Stem {
708      # Get the parameters.      # Get the parameters.
709      my ($self, $attributes, $filterString, $params, $selected, $fast) = @_;      my ($self, $word) = @_;
710      my $slowMode = ! $fast;      # Declare the return variable.
711      # Default to nothing selected. This prevents an execution warning if "$selected"      my $retVal;
712      # is undefined.      # See if it's stemmable.
713      $selected = "" unless defined $selected;      if ($word =~ /^[A-Za-z]+$/) {
714      Trace("Gene Menu called with slow mode \"$slowMode\" and selection \"$selected\".") if T(3);          # Compute the stem. Currently, it's just the word.
715      # Start the menu.          my $stem = $word;
716      my $retVal = "<select " .          # Check to see if it's long enough.
717          join(" ", map { "$_=\"$attributes->{$_}\"" } keys %{$attributes}) .          if (length $stem >= 3) {
718          ">\n";              # Yes, keep it.
719      # Get the genomes.              $retVal = $stem;
720      my @genomes = $self->GetAll(['Genome'], $filterString, $params, ['Genome(id)',          } else {
721                                                                       'Genome(genus)',              # No, use the original word.
722                                                                       'Genome(species)',              $retVal = $word;
723                                                                       'Genome(unique-characterization)']);          }
     # Sort them by name.  
     my @sorted = sort { lc("$a->[1] $a->[2]") cmp lc("$b->[1] $b->[2]") } @genomes;  
     # Loop through the genomes, creating the option tags.  
     for my $genomeData (@sorted) {  
         # Get the data for this genome.  
         my ($genomeID, $genus, $species, $strain) = @{$genomeData};  
         # Get the contig count.  
         my $contigInfo = "";  
         if ($slowMode) {  
             my $count = $self->ContigCount($genomeID);  
             my $counting = ($count == 1 ? "contig" : "contigs");  
             $contigInfo = "[$count $counting]";  
         }  
         # Find out if we're selected.  
         my $selectOption = ($selected eq $genomeID ? " selected" : "");  
         # Build the option tag.  
         $retVal .= "<option value=\"$genomeID\"$selectOption>$genus $species $strain ($genomeID)$contigInfo</option>\n";  
724      }      }
     # Close the SELECT tag.  
     $retVal .= "</select>\n";  
725      # Return the result.      # Return the result.
726      return $retVal;      return $retVal;
727  }  }
728    
729    
730  =head3 Build  =head3 Build
731    
732  C<< $sprout->Build(); >>      $sprout->Build();
733    
734  Build the database. The database will be cleared and the tables re-created from the metadata.  Build the database. The database will be cleared and the tables re-created from the metadata.
735  This method is useful when a database is brand new or when the database definition has  This method is useful when a database is brand new or when the database definition has
# Line 450  Line 746 
746    
747  =head3 Genomes  =head3 Genomes
748    
749  C<< my @genomes = $sprout->Genomes(); >>      my @genomes = $sprout->Genomes();
750    
751  Return a list of all the genome IDs.  Return a list of all the genome IDs.
752    
# Line 467  Line 763 
763    
764  =head3 GenusSpecies  =head3 GenusSpecies
765    
766  C<< my $infoString = $sprout->GenusSpecies($genomeID); >>      my $infoString = $sprout->GenusSpecies($genomeID);
767    
768  Return the genus, species, and unique characterization for a genome.  Return the genus, species, and unique characterization for a genome.
769    
# Line 499  Line 795 
795    
796  =head3 FeaturesOf  =head3 FeaturesOf
797    
798  C<< my @features = $sprout->FeaturesOf($genomeID, $ftype); >>      my @features = $sprout->FeaturesOf($genomeID, $ftype);
799    
800  Return a list of the features relevant to a specified genome.  Return a list of the features relevant to a specified genome.
801    
# Line 544  Line 840 
840    
841  =head3 FeatureLocation  =head3 FeatureLocation
842    
843  C<< my @locations = $sprout->FeatureLocation($featureID); >>      my @locations = $sprout->FeatureLocation($featureID);
844    
845  Return the location of a feature in its genome's contig segments. In a list context, this method  Return the location of a feature in its genome's contig segments. In a list context, this method
846  will return a list of the locations. In a scalar context, it will return the locations as a space-  will return a list of the locations. In a scalar context, it will return the locations as a space-
# Line 568  Line 864 
864  =item RETURN  =item RETURN
865    
866  Returns a list of the feature's contig segments. The locations are returned as a list in a list  Returns a list of the feature's contig segments. The locations are returned as a list in a list
867  context and as a comma-delimited string in a scalar context.  context and as a comma-delimited string in a scalar context. An empty list means the feature
868    wasn't found.
869    
870  =back  =back
871    
872  =cut  =cut
873  #: Return Type @;  
 #: Return Type $;  
874  sub FeatureLocation {  sub FeatureLocation {
875      # Get the parameters.      # Get the parameters.
876      my ($self, $featureID) = @_;      my ($self, $featureID) = @_;
877      # Create a query for the feature locations.      # Declare the return variable.
     my $query = $self->Get(['IsLocatedIn'], "IsLocatedIn(from-link) = ? ORDER BY IsLocatedIn(locN)",  
                            [$featureID]);  
     # Create the return list.  
878      my @retVal = ();      my @retVal = ();
879      # Set up the variables used to determine if we have adjacent segments. This initial setup will      # Get the feature record.
880      # not match anything.      my $object = $self->GetEntity('Feature', $featureID);
881      my ($prevContig, $prevBeg, $prevDir, $prevLen) = ("", 0, "0", 0);      # Only proceed if we found it.
882      # Loop through the query results, creating location specifiers.      if (defined $object) {
883      while (my $location = $query->Fetch()) {          # Get the location string.
884          # Get the location parameters.          my $locString = $object->PrimaryValue('Feature(location-string)');
885          my ($contigID, $beg, $dir, $len) = $location->Values(['IsLocatedIn(to-link)',          # Create the return list.
886              'IsLocatedIn(beg)', 'IsLocatedIn(dir)', 'IsLocatedIn(len)']);          @retVal = split /\s*,\s*/, $locString;
         # Check to see if we are adjacent to the previous segment.  
         if ($prevContig eq $contigID && $dir eq $prevDir) {  
             # Here the new segment is in the same direction on the same contig. Insure the  
             # new segment's beginning is next to the old segment's end.  
             if ($dir eq "-" && $beg + $len == $prevBeg) {  
                 # Here we're merging two backward blocks, so we keep the new begin point  
                 # and adjust the length.  
                 $len += $prevLen;  
                 # Pop the old segment off. The new one will replace it later.  
                 pop @retVal;  
             } elsif ($dir eq "+" && $beg == $prevBeg + $prevLen) {  
                 # Here we need to merge two forward blocks. Adjust the beginning and  
                 # length values to include both segments.  
                 $beg = $prevBeg;  
                 $len += $prevLen;  
                 # Pop the old segment off. The new one will replace it later.  
                 pop @retVal;  
             }  
         }  
         # Remember this specifier for the adjacent-segment test the next time through.  
         ($prevContig, $prevBeg, $prevDir, $prevLen) = ($contigID, $beg, $dir, $len);  
         # Compute the initial base pair.  
         my $start = ($dir eq "+" ? $beg : $beg + $len - 1);  
         # Add the specifier to the list.  
         push @retVal, "${contigID}_$start$dir$len";  
887      }      }
888      # Return the list in the format indicated by the context.      # Return the list in the format indicated by the context.
889      return (wantarray ? @retVal : join(',', @retVal));      return (wantarray ? @retVal : join(',', @retVal));
# Line 623  Line 891 
891    
892  =head3 ParseLocation  =head3 ParseLocation
893    
894  C<< my ($contigID, $start, $dir, $len) = Sprout::ParseLocation($location); >>      my ($contigID, $start, $dir, $len) = Sprout::ParseLocation($location);
895    
896  Split a location specifier into the contig ID, the starting point, the direction, and the  Split a location specifier into the contig ID, the starting point, the direction, and the
897  length.  length.
# Line 642  Line 910 
910  =back  =back
911    
912  =cut  =cut
913  #: Return Type @;  
914  sub ParseLocation {  sub ParseLocation {
915      # Get the parameter. Note that if we're called as an instance method, we ignore      # Get the parameter. Note that if we're called as an instance method, we ignore
916      # the first parameter.      # the first parameter.
# Line 669  Line 937 
937    
938  =head3 PointLocation  =head3 PointLocation
939    
940  C<< my $found = Sprout::PointLocation($location, $point); >>      my $found = Sprout::PointLocation($location, $point);
941    
942  Return the offset into the specified location of the specified point on the contig. If  Return the offset into the specified location of the specified point on the contig. If
943  the specified point is before the location, a negative value will be returned. If it is  the specified point is before the location, a negative value will be returned. If it is
# Line 698  Line 966 
966  =back  =back
967    
968  =cut  =cut
969  #: Return Type $;  
970  sub PointLocation {  sub PointLocation {
971      # Get the parameter. Note that if we're called as an instance method, we ignore      # Get the parameter. Note that if we're called as an instance method, we ignore
972      # the first parameter.      # the first parameter.
# Line 721  Line 989 
989    
990  =head3 DNASeq  =head3 DNASeq
991    
992  C<< my $sequence = $sprout->DNASeq(\@locationList); >>      my $sequence = $sprout->DNASeq(\@locationList);
993    
994  This method returns the DNA sequence represented by a list of locations. The list of locations  This method returns the DNA sequence represented by a list of locations. The list of locations
995  should be of the form returned by L</featureLocation> when in a list context. In other words,  should be of the form returned by L</featureLocation> when in a list context. In other words,
# Line 805  Line 1073 
1073    
1074  =head3 AllContigs  =head3 AllContigs
1075    
1076  C<< my @idList = $sprout->AllContigs($genomeID); >>      my @idList = $sprout->AllContigs($genomeID);
1077    
1078  Return a list of all the contigs for a genome.  Return a list of all the contigs for a genome.
1079    
# Line 835  Line 1103 
1103    
1104  =head3 GenomeLength  =head3 GenomeLength
1105    
1106  C<< my $length = $sprout->GenomeLength($genomeID); >>      my $length = $sprout->GenomeLength($genomeID);
1107    
1108  Return the length of the specified genome in base pairs.  Return the length of the specified genome in base pairs.
1109    
# Line 870  Line 1138 
1138    
1139  =head3 FeatureCount  =head3 FeatureCount
1140    
1141  C<< my $count = $sprout->FeatureCount($genomeID, $type); >>      my $count = $sprout->FeatureCount($genomeID, $type);
1142    
1143  Return the number of features of the specified type in the specified genome.  Return the number of features of the specified type in the specified genome.
1144    
# Line 905  Line 1173 
1173    
1174  =head3 GenomeAssignments  =head3 GenomeAssignments
1175    
1176  C<< my $fidHash = $sprout->GenomeAssignments($genomeID); >>      my $fidHash = $sprout->GenomeAssignments($genomeID);
1177    
1178  Return a list of a genome's assigned features. The return hash will contain each  Return a list of a genome's assigned features. The return hash will contain each
1179  assigned feature of the genome mapped to the text of its most recent functional  assigned feature of the genome mapped to the text of its most recent functional
# Line 948  Line 1216 
1216    
1217  =head3 ContigLength  =head3 ContigLength
1218    
1219  C<< my $length = $sprout->ContigLength($contigID); >>      my $length = $sprout->ContigLength($contigID);
1220    
1221  Compute the length of a contig.  Compute the length of a contig.
1222    
# Line 987  Line 1255 
1255    
1256  =head3 ClusterPEGs  =head3 ClusterPEGs
1257    
1258  C<< my $clusteredList = $sprout->ClusterPEGs($sub, \@pegs); >>      my $clusteredList = $sprout->ClusterPEGs($sub, \@pegs);
1259    
1260  Cluster the PEGs in a list according to the cluster coding scheme of the specified  Cluster the PEGs in a list according to the cluster coding scheme of the specified
1261  subsystem. In order for this to work properly, the subsystem object must have  subsystem. In order for this to work properly, the subsystem object must have
1262  been used recently to retrieve the PEGs using the B<get_pegs_from_cell> method.  been used recently to retrieve the PEGs using the B<get_pegs_from_cell> or
1263  This causes the cluster numbers to be pulled into the subsystem's color hash.  B<get_row> methods. This causes the cluster numbers to be pulled into the
1264  If a PEG is not found in the color hash, it will not appear in the output  subsystem's color hash. If a PEG is not found in the color hash, it will not
1265  sequence.  appear in the output sequence.
1266    
1267  =over 4  =over 4
1268    
# Line 1035  Line 1303 
1303    
1304  =head3 GenesInRegion  =head3 GenesInRegion
1305    
1306  C<< my (\@featureIDList, $beg, $end) = $sprout->GenesInRegion($contigID, $start, $stop); >>      my (\@featureIDList, $beg, $end) = $sprout->GenesInRegion($contigID, $start, $stop);
1307    
1308  List the features which overlap a specified region in a contig.  List the features which overlap a specified region in a contig.
1309    
# Line 1064  Line 1332 
1332  =back  =back
1333    
1334  =cut  =cut
1335  #: Return Type @@;  
1336  sub GenesInRegion {  sub GenesInRegion {
1337      # Get the parameters.      # Get the parameters.
1338      my ($self, $contigID, $start, $stop) = @_;      my ($self, $contigID, $start, $stop) = @_;
1339      # Get the maximum segment length.      # Get the maximum segment length.
1340      my $maximumSegmentLength = $self->MaxSegment;      my $maximumSegmentLength = $self->MaxSegment;
     # Create a hash to receive the feature list. We use a hash so that we can eliminate  
     # duplicates easily. The hash key will be the feature ID. The value will be a two-element  
     # containing the minimum and maximum offsets. We will use the offsets to sort the results  
     # when we're building the result set.  
     my %featuresFound = ();  
1341      # Prime the values we'll use for the returned beginning and end.      # Prime the values we'll use for the returned beginning and end.
1342      my @initialMinMax = ($self->ContigLength($contigID), 0);      my @initialMinMax = ($self->ContigLength($contigID), 0);
1343      my ($min, $max) = @initialMinMax;      my ($min, $max) = @initialMinMax;
1344      # Create a table of parameters for each query. Each query looks for features travelling in      # Get the overlapping features.
1345        my @featureObjects = $self->GeneDataInRegion($contigID, $start, $stop);
1346        # We'l use this hash to help us track the feature IDs and sort them. The key is the
1347        # feature ID and the value is a [$left,$right] pair indicating the maximum extent
1348        # of the feature's locations.
1349        my %featureMap = ();
1350        # Loop through them to do the begin/end analysis.
1351        for my $featureObject (@featureObjects) {
1352            # Get the feature's location string. This may contain multiple actual locations.
1353            my ($locations, $fid) = $featureObject->Values([qw(Feature(location-string) Feature(id))]);
1354            my @locationSegments = split /\s*,\s*/, $locations;
1355            # Loop through the locations.
1356            for my $locationSegment (@locationSegments) {
1357                # Construct an object for the location.
1358                my $locationObject = BasicLocation->new($locationSegment);
1359                # Merge the current segment's begin and end into the min and max.
1360                my ($left, $right) = ($locationObject->Left, $locationObject->Right);
1361                my ($beg, $end);
1362                if (exists $featureMap{$fid}) {
1363                    ($beg, $end) = @{$featureMap{$fid}};
1364                    $beg = $left if $left < $beg;
1365                    $end = $right if $right > $end;
1366                } else {
1367                    ($beg, $end) = ($left, $right);
1368                }
1369                $min = $beg if $beg < $min;
1370                $max = $end if $end > $max;
1371                # Store the feature's new extent back into the hash table.
1372                $featureMap{$fid} = [$beg, $end];
1373            }
1374        }
1375        # Now we must compute the list of the IDs for the features found. We start with a list
1376        # of midpoints / feature ID pairs. (It's not really a midpoint, it's twice the midpoint,
1377        # but the result of the sort will be the same.)
1378        my @list = map { [$featureMap{$_}->[0] + $featureMap{$_}->[1], $_] } keys %featureMap;
1379        # Now we sort by midpoint and yank out the feature IDs.
1380        my @retVal = map { $_->[1] } sort { $a->[0] <=> $b->[0] } @list;
1381        # Return it along with the min and max.
1382        return (\@retVal, $min, $max);
1383    }
1384    
1385    =head3 GeneDataInRegion
1386    
1387        my @featureList = $sprout->GenesInRegion($contigID, $start, $stop);
1388    
1389    List the features which overlap a specified region in a contig.
1390    
1391    =over 4
1392    
1393    =item contigID
1394    
1395    ID of the contig containing the region of interest.
1396    
1397    =item start
1398    
1399    Offset of the first residue in the region of interest.
1400    
1401    =item stop
1402    
1403    Offset of the last residue in the region of interest.
1404    
1405    =item RETURN
1406    
1407    Returns a list of B<ERDBObjects> for the desired features. Each object will
1408    contain a B<Feature> record.
1409    
1410    =back
1411    
1412    =cut
1413    
1414    sub GeneDataInRegion {
1415        # Get the parameters.
1416        my ($self, $contigID, $start, $stop) = @_;
1417        # Get the maximum segment length.
1418        my $maximumSegmentLength = $self->MaxSegment;
1419        # Create a hash to receive the feature list. We use a hash so that we can eliminate
1420        # duplicates easily. The hash key will be the feature ID. The value will be the feature's
1421        # ERDBObject from the query.
1422        my %featuresFound = ();
1423        # Create a table of parameters for the queries. Each query looks for features travelling in
1424      # a particular direction. The query parameters include the contig ID, the feature direction,      # a particular direction. The query parameters include the contig ID, the feature direction,
1425      # the lowest possible start position, and the highest possible start position. This works      # the lowest possible start position, and the highest possible start position. This works
1426      # because each feature segment length must be no greater than the maximum segment length.      # because each feature segment length must be no greater than the maximum segment length.
# Line 1087  Line 1429 
1429      # Loop through the query parameters.      # Loop through the query parameters.
1430      for my $parms (values %queryParms) {      for my $parms (values %queryParms) {
1431          # Create the query.          # Create the query.
1432          my $query = $self->Get(['IsLocatedIn'],          my $query = $self->Get([qw(Feature IsLocatedIn)],
1433              "IsLocatedIn(to-link)= ? AND IsLocatedIn(dir) = ? AND IsLocatedIn(beg) >= ? AND IsLocatedIn(beg) <= ?",              "IsLocatedIn(to-link)= ? AND IsLocatedIn(dir) = ? AND IsLocatedIn(beg) >= ? AND IsLocatedIn(beg) <= ?",
1434              $parms);              $parms);
1435          # Loop through the feature segments found.          # Loop through the feature segments found.
1436          while (my $segment = $query->Fetch) {          while (my $segment = $query->Fetch) {
1437              # Get the data about this segment.              # Get the data about this segment.
1438              my ($featureID, $dir, $beg, $len) = $segment->Values(['IsLocatedIn(from-link)',              my ($featureID, $contig, $dir, $beg, $len) = $segment->Values([qw(IsLocatedIn(from-link)
1439                  'IsLocatedIn(dir)', 'IsLocatedIn(beg)', 'IsLocatedIn(len)']);                  IsLocatedIn(to-link) IsLocatedIn(dir) IsLocatedIn(beg) IsLocatedIn(len))]);
1440              # Determine if this feature actually overlaps the region. The query insures that              # Determine if this feature segment actually overlaps the region. The query insures that
1441              # this will be the case if the segment is the maximum length, so to fine-tune              # this will be the case if the segment is the maximum length, so to fine-tune
1442              # the results we insure that the inequality from the query holds using the actual              # the results we insure that the inequality from the query holds using the actual
1443              # length.              # length.
1444              my ($found, $end) = (0, 0);              my $loc = BasicLocation->new($contig, $beg, $dir, $len);
1445              if ($dir eq '+') {              my $found = $loc->Overlap($start, $stop);
                 $end = $beg + $len;  
                 if ($end >= $start) {  
                     # Denote we found a useful feature.  
                     $found = 1;  
                 }  
             } elsif ($dir eq '-') {  
                 # Note we switch things around so that the beginning is to the left of the  
                 # ending.  
                 ($beg, $end) = ($beg - $len, $beg);  
                 if ($beg <= $stop) {  
                     # Denote we found a useful feature.  
                     $found = 1;  
                 }  
             }  
1446              if ($found) {              if ($found) {
1447                  # Here we need to record the feature and update the minima and maxima. First,                  # Save this feature in the result list.
1448                  # get the current entry for the specified feature.                  $featuresFound{$featureID} = $segment;
                 my ($loc1, $loc2) = (exists $featuresFound{$featureID} ? @{$featuresFound{$featureID}} :  
                                      @initialMinMax);  
                 # Merge the current segment's begin and end into the feature begin and end and the  
                 # global min and max.  
                 if ($beg < $loc1) {  
                     $loc1 = $beg;  
                     $min = $beg if $beg < $min;  
                 }  
                 if ($end > $loc2) {  
                     $loc2 = $end;  
                     $max = $end if $end > $max;  
1449                  }                  }
                 # Store the entry back into the hash table.  
                 $featuresFound{$featureID} = [$loc1, $loc2];  
1450              }              }
1451          }          }
1452      }      # Return the ERDB objects for the features found.
1453      # Now we must compute the list of the IDs for the features found. We start with a list      return values %featuresFound;
     # of midpoints / feature ID pairs. (It's not really a midpoint, it's twice the midpoint,  
     # but the result of the sort will be the same.)  
     my @list = map { [$featuresFound{$_}->[0] + $featuresFound{$_}->[1], $_] } keys %featuresFound;  
     # Now we sort by midpoint and yank out the feature IDs.  
     my @retVal = map { $_->[1] } sort { $a->[0] <=> $b->[0] } @list;  
     # Return it along with the min and max.  
     return (\@retVal, $min, $max);  
1454  }  }
1455    
1456  =head3 FType  =head3 FType
1457    
1458  C<< my $ftype = $sprout->FType($featureID); >>      my $ftype = $sprout->FType($featureID);
1459    
1460  Return the type of a feature.  Return the type of a feature.
1461    
# Line 1177  Line 1485 
1485    
1486  =head3 FeatureAnnotations  =head3 FeatureAnnotations
1487    
1488  C<< my @descriptors = $sprout->FeatureAnnotations($featureID, $rawFlag); >>      my @descriptors = $sprout->FeatureAnnotations($featureID, $rawFlag);
1489    
1490  Return the annotations of a feature.  Return the annotations of a feature.
1491    
# Line 1240  Line 1548 
1548    
1549  =head3 AllFunctionsOf  =head3 AllFunctionsOf
1550    
1551  C<< my %functions = $sprout->AllFunctionsOf($featureID); >>      my %functions = $sprout->AllFunctionsOf($featureID);
1552    
1553  Return all of the functional assignments for a particular feature. The data is returned as a  Return all of the functional assignments for a particular feature. The data is returned as a
1554  hash of functional assignments to user IDs. A functional assignment is a type of annotation,  hash of functional assignments to user IDs. A functional assignment is a type of annotation,
# Line 1295  Line 1603 
1603    
1604  =head3 FunctionOf  =head3 FunctionOf
1605    
1606  C<< my $functionText = $sprout->FunctionOf($featureID, $userID); >>      my $functionText = $sprout->FunctionOf($featureID, $userID);
1607    
1608  Return the most recently-determined functional assignment of a particular feature.  Return the most recently-determined functional assignment of a particular feature.
1609    
# Line 1399  Line 1707 
1707    
1708  =head3 FunctionsOf  =head3 FunctionsOf
1709    
1710  C<< my @functionList = $sprout->FunctionOf($featureID, $userID); >>      my @functionList = $sprout->FunctionOf($featureID, $userID);
1711    
1712  Return the functional assignments of a particular feature.  Return the functional assignments of a particular feature.
1713    
# Line 1471  Line 1779 
1779    
1780  =head3 BBHList  =head3 BBHList
1781    
1782  C<< my $bbhHash = $sprout->BBHList($genomeID, \@featureList); >>      my $bbhHash = $sprout->BBHList($genomeID, \@featureList);
1783    
1784  Return a hash mapping the features in a specified list to their bidirectional best hits  Return a hash mapping the features in a specified list to their bidirectional best hits
1785  on a specified target genome.  on a specified target genome.
# Line 1521  Line 1829 
1829    
1830  =head3 SimList  =head3 SimList
1831    
1832  C<< my %similarities = $sprout->SimList($featureID, $count); >>      my %similarities = $sprout->SimList($featureID, $count);
1833    
1834  Return a list of the similarities to the specified feature.  Return a list of the similarities to the specified feature.
1835    
# Line 1557  Line 1865 
1865    
1866  =head3 IsComplete  =head3 IsComplete
1867    
1868  C<< my $flag = $sprout->IsComplete($genomeID); >>      my $flag = $sprout->IsComplete($genomeID);
1869    
1870  Return TRUE if the specified genome is complete, else FALSE.  Return TRUE if the specified genome is complete, else FALSE.
1871    
# Line 1585  Line 1893 
1893      my $genomeData = $self->GetEntity('Genome', $genomeID);      my $genomeData = $self->GetEntity('Genome', $genomeID);
1894      if ($genomeData) {      if ($genomeData) {
1895          # The genome exists, so get the completeness flag.          # The genome exists, so get the completeness flag.
1896          ($retVal) = $genomeData->Value('Genome(complete)');          $retVal = $genomeData->PrimaryValue('Genome(complete)');
1897      }      }
1898      # Return the result.      # Return the result.
1899      return $retVal;      return $retVal;
# Line 1593  Line 1901 
1901    
1902  =head3 FeatureAliases  =head3 FeatureAliases
1903    
1904  C<< my @aliasList = $sprout->FeatureAliases($featureID); >>      my @aliasList = $sprout->FeatureAliases($featureID);
1905    
1906  Return a list of the aliases for a specified feature.  Return a list of the aliases for a specified feature.
1907    
# Line 1616  Line 1924 
1924      # Get the parameters.      # Get the parameters.
1925      my ($self, $featureID) = @_;      my ($self, $featureID) = @_;
1926      # Get the desired feature's aliases      # Get the desired feature's aliases
1927      my @retVal = $self->GetEntityValues('Feature', $featureID, ['Feature(alias)']);      my @retVal = $self->GetFlat(['IsAliasOf'], "IsAliasOf(to-link) = ?", [$featureID], 'IsAliasOf(from-link)');
1928      # Return the result.      # Return the result.
1929      return @retVal;      return @retVal;
1930  }  }
1931    
1932  =head3 GenomeOf  =head3 GenomeOf
1933    
1934  C<< my $genomeID = $sprout->GenomeOf($featureID); >>      my $genomeID = $sprout->GenomeOf($featureID);
1935    
1936  Return the genome that contains a specified feature or contig.  Return the genome that contains a specified feature or contig.
1937    
# Line 1645  Line 1953 
1953  sub GenomeOf {  sub GenomeOf {
1954      # Get the parameters.      # Get the parameters.
1955      my ($self, $featureID) = @_;      my ($self, $featureID) = @_;
     # Create a query to find the genome associated with the incoming ID.  
     my $query = $self->Get(['IsLocatedIn', 'HasContig'], "IsLocatedIn(from-link) = ? OR HasContig(to-link) = ?",  
                            [$featureID, $featureID]);  
1956      # Declare the return value.      # Declare the return value.
1957      my $retVal;      my $retVal;
1958      # Get the genome ID.      # Parse the genome ID from the feature ID.
1959      if (my $relationship = $query->Fetch()) {      if ($featureID =~ /^fig\|(\d+\.\d+)/) {
1960          ($retVal) = $relationship->Value('HasContig(from-link)');          $retVal = $1;
1961        } else {
1962            # Find the feature by alias.
1963            my ($realFeatureID) = $self->FeaturesByAlias($featureID);
1964            if ($realFeatureID && $realFeatureID =~ /^fig\|(\d+\.\d+)/) {
1965                $retVal = $1;
1966            } else {
1967                # Use the external table.
1968                my ($org) = $self->GetFlat(['ExternalAliasOrg'], "ExternalAliasOrg(id) = ?",
1969                                           [$featureID], "ExternalAliasOrg(org)");
1970                if ($org) {
1971                    $retVal = $org;
1972                } else {
1973                    Confess("Invalid feature ID $featureID.");
1974                }
1975            }
1976      }      }
1977      # Return the value found.      # Return the value found.
1978      return $retVal;      return $retVal;
# Line 1660  Line 1980 
1980    
1981  =head3 CoupledFeatures  =head3 CoupledFeatures
1982    
1983  C<< my %coupleHash = $sprout->CoupledFeatures($featureID); >>      my %coupleHash = $sprout->CoupledFeatures($featureID);
1984    
1985  Return the features functionally coupled with a specified feature. Features are considered  Return the features functionally coupled with a specified feature. Features are considered
1986  functionally coupled if they tend to be clustered on the same chromosome.  functionally coupled if they tend to be clustered on the same chromosome.
# Line 1682  Line 2002 
2002  sub CoupledFeatures {  sub CoupledFeatures {
2003      # Get the parameters.      # Get the parameters.
2004      my ($self, $featureID) = @_;      my ($self, $featureID) = @_;
2005        # Ask the coupling server for the data.
2006      Trace("Looking for features coupled to $featureID.") if T(coupling => 3);      Trace("Looking for features coupled to $featureID.") if T(coupling => 3);
2007      # Create a query to retrieve the functionally-coupled features.      my @rawPairs = FIGRules::NetCouplingData('coupled_to', id1 => $featureID);
2008      my $query = $self->Get(['ParticipatesInCoupling', 'Coupling'],      Trace(scalar(@rawPairs) . " couplings returned.") if T(coupling => 3);
2009                             "ParticipatesInCoupling(from-link) = ?", [$featureID]);      # Form them into a hash.
     # This value will be set to TRUE if we find at least one coupled feature.  
     my $found = 0;  
     # Create the return hash.  
2010      my %retVal = ();      my %retVal = ();
2011      # Retrieve the relationship records and store them in the hash.      for my $pair (@rawPairs) {
2012      while (my $clustering = $query->Fetch()) {          # Get the feature ID and score.
2013          # Get the ID and score of the coupling.          my ($featureID2, $score) = @{$pair};
2014          my ($couplingID, $score) = $clustering->Values(['Coupling(id)',          # Only proceed if the feature is in NMPDR.
2015                                                          'Coupling(score)']);          if ($self->_CheckFeature($featureID2)) {
2016          Trace("$featureID coupled with score $score to ID $couplingID.") if T(coupling => 4);              $retVal{$featureID2} = $score;
2017          # Get the other feature that participates in the coupling.          }
         my ($otherFeatureID) = $self->GetFlat(['ParticipatesInCoupling'],  
                                            "ParticipatesInCoupling(to-link) = ? AND ParticipatesInCoupling(from-link) <> ?",  
                                            [$couplingID, $featureID], 'ParticipatesInCoupling(from-link)');  
         Trace("$couplingID target feature is $otherFeatureID.") if T(coupling => 4);  
         # Attach the other feature's score to its ID.  
         $retVal{$otherFeatureID} = $score;  
         $found = 1;  
2018      }      }
2019      # Functional coupling is reflexive. If we found at least one coupled feature, we must add      # Functional coupling is reflexive. If we found at least one coupled feature, we must add
2020      # the incoming feature as well.      # the incoming feature as well.
2021      if ($found) {      if (keys %retVal) {
2022          $retVal{$featureID} = 9999;          $retVal{$featureID} = 9999;
2023      }      }
2024      # Return the hash.      # Return the hash.
# Line 1716  Line 2027 
2027    
2028  =head3 CouplingEvidence  =head3 CouplingEvidence
2029    
2030  C<< my @evidence = $sprout->CouplingEvidence($peg1, $peg2); >>      my @evidence = $sprout->CouplingEvidence($peg1, $peg2);
2031    
2032  Return the evidence for a functional coupling.  Return the evidence for a functional coupling.
2033    
# Line 1764  Line 2075 
2075      my ($self, $peg1, $peg2) = @_;      my ($self, $peg1, $peg2) = @_;
2076      # Declare the return variable.      # Declare the return variable.
2077      my @retVal = ();      my @retVal = ();
2078      # Our first task is to find out the nature of the coupling: whether or not      # Get the coupling and evidence data.
2079      # it exists, its score, and whether the features are stored in the same      my @rawData = FIGRules::NetCouplingData('coupling_evidence', id1 => $peg1, id2 => $peg2);
2080      # order as the ones coming in.      # Loop through the raw data, saving the ones that are in NMPDR genomes.
2081      my ($couplingID, $inverted, $score) = $self->GetCoupling($peg1, $peg2);      for my $rawTuple (@rawData) {
2082      # Only proceed if a coupling exists.          if ($self->_CheckFeature($rawTuple->[0]) && $self->_CheckFeature($rawTuple->[1])) {
2083      if ($couplingID) {              push @retVal, $rawTuple;
         # Determine the ordering to place on the evidence items. If we're  
         # inverted, we want to see feature 2 before feature 1 (descending); otherwise,  
         # we want feature 1 before feature 2 (normal).  
         Trace("Coupling evidence for ($peg1, $peg2) with inversion flag $inverted.") if T(Coupling => 4);  
         my $ordering = ($inverted ? "DESC" : "");  
         # Get the coupling evidence.  
         my @evidenceList = $self->GetAll(['IsEvidencedBy', 'PCH', 'UsesAsEvidence'],  
                                           "IsEvidencedBy(from-link) = ? ORDER BY PCH(id), UsesAsEvidence(pos) $ordering",  
                                           [$couplingID],  
                                           ['PCH(used)', 'UsesAsEvidence(to-link)']);  
         # Loop through the evidence items. Each piece of evidence is represented by two  
         # positions in the evidence list, one for each feature on the other side of the  
         # evidence link. If at some point we want to generalize to couplings with  
         # more than two positions, this section of code will need to be re-done.  
         while (@evidenceList > 0) {  
             my $peg1Data = shift @evidenceList;  
             my $peg2Data = shift @evidenceList;  
             Trace("Peg 1 is " . $peg1Data->[1] . " and Peg 2 is " . $peg2Data->[1] . ".") if T(Coupling => 4);  
             push @retVal, [$peg1Data->[1], $peg2Data->[1], $peg1Data->[0]];  
         }  
         Trace("Last index in evidence result is is $#retVal.") if T(Coupling => 4);  
2084      }      }
     # Return the result.  
     return @retVal;  
 }  
   
 =head3 GetCoupling  
   
 C<< my ($couplingID, $inverted, $score) = $sprout->GetCoupling($peg1, $peg2); >>  
   
 Return the coupling (if any) for the specified pair of PEGs. If a coupling  
 exists, we return the coupling ID along with an indicator of whether the  
 coupling is stored as C<(>I<$peg1>C<, >I<$peg2>C<)> or C<(>I<$peg2>C<, >I<$peg1>C<)>.  
 In the second case, we say the coupling is I<inverted>. The importance of an  
 inverted coupling is that the PEGs in the evidence will appear in reverse order.  
   
 =over 4  
   
 =item peg1  
   
 ID of the feature of interest.  
   
 =item peg2  
   
 ID of the potentially coupled feature.  
   
 =item RETURN  
   
 Returns a three-element list. The first element contains the database ID of  
 the coupling. The second element is FALSE if the coupling is stored in the  
 database in the caller specified order and TRUE if it is stored in the  
 inverted order. The third element is the coupling's score. If the coupling  
 does not exist, all three list elements will be C<undef>.  
   
 =back  
   
 =cut  
 #: Return Type $%@;  
 sub GetCoupling {  
     # Get the parameters.  
     my ($self, $peg1, $peg2) = @_;  
     # Declare the return values. We'll start with the coupling ID and undefine the  
     # flag and score until we have more information.  
     my ($retVal, $inverted, $score) = ($self->CouplingID($peg1, $peg2), undef, undef);  
     # Find the coupling data.  
     my @pegs = $self->GetAll(['Coupling', 'ParticipatesInCoupling'],  
                                  "Coupling(id) = ? ORDER BY ParticipatesInCoupling(pos)",  
                                  [$retVal], ["ParticipatesInCoupling(from-link)", "Coupling(score)"]);  
     # Check to see if we found anything.  
     if (!@pegs) {  
         Trace("No coupling found.") if T(Coupling => 4);  
         # No coupling, so undefine the return value.  
         $retVal = undef;  
     } else {  
         # We have a coupling! Get the score and check for inversion.  
         $score = $pegs[0]->[1];  
         my $firstFound = $pegs[0]->[0];  
         $inverted = ($firstFound ne $peg1);  
         Trace("Coupling score is $score. First peg is $firstFound, peg 1 is $peg1.") if T(Coupling => 4);  
2085      }      }
2086      # Return the result.      # Return the result.
2087      return ($retVal, $inverted, $score);      return @retVal;
2088  }  }
2089    
2090  =head3 GetSynonymGroup  =head3 GetSynonymGroup
2091    
2092  C<< my $id = $sprout->GetSynonymGroup($fid); >>      my $id = $sprout->GetSynonymGroup($fid);
2093    
2094  Return the synonym group name for the specified feature.  Return the synonym group name for the specified feature.
2095    
# Line 1895  Line 2128 
2128    
2129  =head3 GetBoundaries  =head3 GetBoundaries
2130    
2131  C<< my ($contig, $beg, $end) = $sprout->GetBoundaries(@locList); >>      my ($contig, $beg, $end) = $sprout->GetBoundaries(@locList);
2132    
2133  Determine the begin and end boundaries for the locations in a list. All of the  Determine the begin and end boundaries for the locations in a list. All of the
2134  locations must belong to the same contig and have mostly the same direction in  locations must belong to the same contig and have mostly the same direction in
# Line 1957  Line 2190 
2190      return ($contig, $beg, $end);      return ($contig, $beg, $end);
2191  }  }
2192    
 =head3 CouplingID  
   
 C<< my $couplingID = $sprout->CouplingID($peg1, $peg2); >>  
   
 Return the coupling ID for a pair of feature IDs.  
   
 The coupling ID is currently computed by joining the feature IDs in  
 sorted order with a space. Client modules (that is, modules which  
 use Sprout) should not, however, count on this always being the  
 case. This method provides a way for abstracting the concept of a  
 coupling ID. All that we know for sure about it is that it can be  
 generated easily from the feature IDs and the order of the IDs  
 in the parameter list does not matter (i.e. C<CouplingID("a1", "b1")>  
 will have the same value as C<CouplingID("b1", "a1")>.  
   
 =over 4  
   
 =item peg1  
   
 First feature of interest.  
   
 =item peg2  
   
 Second feature of interest.  
   
 =item RETURN  
   
 Returns the ID that would be used to represent a functional coupling of  
 the two specified PEGs.  
   
 =back  
   
 =cut  
 #: Return Type $;  
 sub CouplingID {  
     my ($self, @pegs) = @_;  
     return $self->DigestKey(join " ", sort @pegs);  
 }  
   
2193  =head3 ReadFasta  =head3 ReadFasta
2194    
2195  C<< my %sequenceData = Sprout::ReadFasta($fileName, $prefix); >>      my %sequenceData = Sprout::ReadFasta($fileName, $prefix);
2196    
2197  Read sequence data from a FASTA-format file. Each sequence in a FASTA file is represented by  Read sequence data from a FASTA-format file. Each sequence in a FASTA file is represented by
2198  one or more lines of data. The first line begins with a > character and contains an ID.  one or more lines of data. The first line begins with a > character and contains an ID.
# Line 2064  Line 2258 
2258    
2259  =head3 FormatLocations  =head3 FormatLocations
2260    
2261  C<< my @locations = $sprout->FormatLocations($prefix, \@locations, $oldFormat); >>      my @locations = $sprout->FormatLocations($prefix, \@locations, $oldFormat);
2262    
2263  Insure that a list of feature locations is in the Sprout format. The Sprout feature location  Insure that a list of feature locations is in the Sprout format. The Sprout feature location
2264  format is I<contig>_I<beg*len> where I<*> is C<+> for a forward gene and C<-> for a backward  format is I<contig>_I<beg*len> where I<*> is C<+> for a forward gene and C<-> for a backward
# Line 2129  Line 2323 
2323    
2324  =head3 DumpData  =head3 DumpData
2325    
2326  C<< $sprout->DumpData(); >>      $sprout->DumpData();
2327    
2328  Dump all the tables to tab-delimited DTX files. The files will be stored in the data directory.  Dump all the tables to tab-delimited DTX files. The files will be stored in the data directory.
2329    
# Line 2146  Line 2340 
2340    
2341  =head3 XMLFileName  =head3 XMLFileName
2342    
2343  C<< my $fileName = $sprout->XMLFileName(); >>      my $fileName = $sprout->XMLFileName();
2344    
2345  Return the name of this database's XML definition file.  Return the name of this database's XML definition file.
2346    
# Line 2157  Line 2351 
2351      return $self->{_xmlName};      return $self->{_xmlName};
2352  }  }
2353    
2354    =head3 GetGenomeNameData
2355    
2356        my ($genus, $species, $strain) = $sprout->GenomeNameData($genomeID);
2357    
2358    Return the genus, species, and unique characterization for a genome. This
2359    is similar to L</GenusSpecies>, with the exception that it returns the
2360    values in three seperate fields.
2361    
2362    =over 4
2363    
2364    =item genomeID
2365    
2366    ID of the genome whose name data is desired.
2367    
2368    =item RETURN
2369    
2370    Returns a three-element list, consisting of the genus, species, and strain
2371    of the specified genome. If the genome is not found, an error occurs.
2372    
2373    =back
2374    
2375    =cut
2376    
2377    sub GetGenomeNameData {
2378        # Get the parameters.
2379        my ($self, $genomeID) = @_;
2380        # Get the desired values.
2381        my ($genus, $species, $strain) = $self->GetEntityValues('Genome', $genomeID =>
2382                                                                [qw(Genome(genus) Genome(species) Genome(unique-characterization))]);
2383        # Throw an error if they were not found.
2384        if (! defined $genus) {
2385            Confess("Genome $genomeID not found in database.");
2386        }
2387        # Return the results.
2388        return ($genus, $species, $strain);
2389    }
2390    
2391    =head3 GetGenomeByNameData
2392    
2393        my @genomes = $sprout->GetGenomeByNameData($genus, $species, $strain);
2394    
2395    Return a list of the IDs of the genomes with the specified genus,
2396    species, and strain. In almost every case, there will be either zero or
2397    one IDs returned; however, two or more IDs could be returned if there are
2398    multiple versions of the genome in the database.
2399    
2400    =over 4
2401    
2402    =item genus
2403    
2404    Genus of the desired genome.
2405    
2406    =item species
2407    
2408    Species of the desired genome.
2409    
2410    =item strain
2411    
2412    Strain (unique characterization) of the desired genome. This may be an empty
2413    string, in which case it is presumed that the desired genome has no strain
2414    specified.
2415    
2416    =item RETURN
2417    
2418    Returns a list of the IDs of the genomes having the specified genus, species, and
2419    strain.
2420    
2421    =back
2422    
2423    =cut
2424    
2425    sub GetGenomeByNameData {
2426        # Get the parameters.
2427        my ($self, $genus, $species, $strain) = @_;
2428        # Try to find the genomes.
2429        my @retVal = $self->GetFlat(['Genome'], "Genome(genus) = ? AND Genome(species) = ? AND Genome(unique-characterization) = ?",
2430                                    [$genus, $species, $strain], 'Genome(id)');
2431        # Return the result.
2432        return @retVal;
2433    }
2434    
2435  =head3 Insert  =head3 Insert
2436    
2437  C<< $sprout->Insert($objectType, \%fieldHash); >>      $sprout->Insert($objectType, \%fieldHash);
2438    
2439  Insert an entity or relationship instance into the database. The entity or relationship of interest  Insert an entity or relationship instance into the database. The entity or relationship of interest
2440  is defined by a type name and then a hash of field names to values. Field values in the primary  is defined by a type name and then a hash of field names to values. Field values in the primary
# Line 2168  Line 2443 
2443  list references. For example, the following line inserts an inactive PEG feature named  list references. For example, the following line inserts an inactive PEG feature named
2444  C<fig|188.1.peg.1> with aliases C<ZP_00210270.1> and C<gi|46206278>.  C<fig|188.1.peg.1> with aliases C<ZP_00210270.1> and C<gi|46206278>.
2445    
2446  C<< $sprout->Insert('Feature', { id => 'fig|188.1.peg.1', active => 0, feature-type => 'peg', alias => ['ZP_00210270.1', 'gi|46206278']}); >>      $sprout->Insert('Feature', { id => 'fig|188.1.peg.1', active => 0, feature-type => 'peg', alias => ['ZP_00210270.1', 'gi|46206278']});
2447    
2448  The next statement inserts a C<HasProperty> relationship between feature C<fig|158879.1.peg.1> and  The next statement inserts a C<HasProperty> relationship between feature C<fig|158879.1.peg.1> and
2449  property C<4> with an evidence URL of C<http://seedu.uchicago.edu/query.cgi?article_id=142>.  property C<4> with an evidence URL of C<http://seedu.uchicago.edu/query.cgi?article_id=142>.
2450    
2451  C<< $sprout->InsertObject('HasProperty', { 'from-link' => 'fig|158879.1.peg.1', 'to-link' => 4, evidence => 'http://seedu.uchicago.edu/query.cgi?article_id=142'}); >>      $sprout->InsertObject('HasProperty', { 'from-link' => 'fig|158879.1.peg.1', 'to-link' => 4, evidence => 'http://seedu.uchicago.edu/query.cgi?article_id=142'});
2452    
2453  =over 4  =over 4
2454    
# Line 2198  Line 2473 
2473    
2474  =head3 Annotate  =head3 Annotate
2475    
2476  C<< my $ok = $sprout->Annotate($fid, $timestamp, $user, $text); >>      my $ok = $sprout->Annotate($fid, $timestamp, $user, $text);
2477    
2478  Annotate a feature. This inserts an Annotation record into the database and links it to the  Annotate a feature. This inserts an Annotation record into the database and links it to the
2479  specified feature and user.  specified feature and user.
# Line 2252  Line 2527 
2527    
2528  =head3 AssignFunction  =head3 AssignFunction
2529    
2530  C<< my $ok = $sprout->AssignFunction($featureID, $user, $function, $assigningUser); >>      my $ok = $sprout->AssignFunction($featureID, $user, $function, $assigningUser);
2531    
2532  This method assigns a function to a feature. Functions are a special type of annotation. The general  This method assigns a function to a feature. Functions are a special type of annotation. The general
2533  format is described in L</ParseAssignment>.  format is described in L</ParseAssignment>.
# Line 2312  Line 2587 
2587    
2588  =head3 FeaturesByAlias  =head3 FeaturesByAlias
2589    
2590  C<< my @features = $sprout->FeaturesByAlias($alias); >>      my @features = $sprout->FeaturesByAlias($alias);
2591    
2592  Returns a list of features with the specified alias. The alias is parsed to determine  Returns a list of features with the specified alias. The alias is parsed to determine
2593  the type of the alias. A string of digits is a GenBack ID and a string of exactly 6  the type of the alias. A string of digits is a GenBack ID and a string of exactly 6
# Line 2346  Line 2621 
2621          push @retVal, $mappedAlias;          push @retVal, $mappedAlias;
2622      } else {      } else {
2623          # Here we have a non-FIG alias. Get the features with the normalized alias.          # Here we have a non-FIG alias. Get the features with the normalized alias.
2624          @retVal = $self->GetFlat(['Feature'], 'Feature(alias) = ?', [$mappedAlias], 'Feature(id)');          @retVal = $self->GetFlat(['IsAliasOf'], 'IsAliasOf(from-link) = ?', [$mappedAlias], 'IsAliasOf(to-link)');
2625      }      }
2626      # Return the result.      # Return the result.
2627      return @retVal;      return @retVal;
# Line 2354  Line 2629 
2629    
2630  =head3 FeatureTranslation  =head3 FeatureTranslation
2631    
2632  C<< my $translation = $sprout->FeatureTranslation($featureID); >>      my $translation = $sprout->FeatureTranslation($featureID);
2633    
2634  Return the translation of a feature.  Return the translation of a feature.
2635    
# Line 2382  Line 2657 
2657    
2658  =head3 Taxonomy  =head3 Taxonomy
2659    
2660  C<< my @taxonomyList = $sprout->Taxonomy($genome); >>      my @taxonomyList = $sprout->Taxonomy($genome);
2661    
2662  Return the taxonomy of the specified genome. This will be in the form of a list  Return the taxonomy of the specified genome. This will be in the form of a list
2663  containing the various classifications in order from domain (eg. C<Bacteria>, C<Archaea>,  containing the various classifications in order from domain (eg. C<Bacteria>, C<Archaea>,
2664  or C<Eukaryote>) to sub-species. For example,  or C<Eukaryote>) to sub-species. For example,
2665    
2666  C<< (Bacteria, Proteobacteria, Gammaproteobacteria, Enterobacteriales, Enterobacteriaceae, Escherichia, Escherichia coli, Escherichia coli K12) >>      (Bacteria, Proteobacteria, Gammaproteobacteria, Enterobacteriales, Enterobacteriaceae, Escherichia, Escherichia coli, Escherichia coli K12)
2667    
2668  =over 4  =over 4
2669    
# Line 2423  Line 2698 
2698    
2699  =head3 CrudeDistance  =head3 CrudeDistance
2700    
2701  C<< my $distance = $sprout->CrudeDistance($genome1, $genome2); >>      my $distance = $sprout->CrudeDistance($genome1, $genome2);
2702    
2703  Returns a crude estimate of the distance between two genomes. The distance is construed so  Returns a crude estimate of the distance between two genomes. The distance is construed so
2704  that it will be 0 for genomes with identical taxonomies and 1 for genomes from different domains.  that it will be 0 for genomes with identical taxonomies and 1 for genomes from different domains.
# Line 2475  Line 2750 
2750    
2751  =head3 RoleName  =head3 RoleName
2752    
2753  C<< my $roleName = $sprout->RoleName($roleID); >>      my $roleName = $sprout->RoleName($roleID);
2754    
2755  Return the descriptive name of the role with the specified ID. In general, a role  Return the descriptive name of the role with the specified ID. In general, a role
2756  will only have a descriptive name if it is coded as an EC number.  will only have a descriptive name if it is coded as an EC number.
# Line 2509  Line 2784 
2784    
2785  =head3 RoleDiagrams  =head3 RoleDiagrams
2786    
2787  C<< my @diagrams = $sprout->RoleDiagrams($roleID); >>      my @diagrams = $sprout->RoleDiagrams($roleID);
2788    
2789  Return a list of the diagrams containing a specified functional role.  Return a list of the diagrams containing a specified functional role.
2790    
# Line 2537  Line 2812 
2812      return @retVal;      return @retVal;
2813  }  }
2814    
 =head3 GetProperties  
   
 C<< my @list = $sprout->GetProperties($fid, $key, $value, $url); >>  
   
 Return a list of the properties with the specified characteristics.  
   
 Properties are the Sprout analog of the FIG attributes. The call is  
 passed directly to the CustomAttributes or RemoteCustomAttributes object  
 contained in this object.  
   
 This method returns a series of tuples that match the specified criteria. Each tuple  
 will contain an object ID, a key, and one or more values. The parameters to this  
 method therefore correspond structurally to the values expected in each tuple. In  
 addition, you can ask for a generic search by suffixing a percent sign (C<%>) to any  
 of the parameters. So, for example,  
   
     my @attributeList = $sprout->GetProperties('fig|100226.1.peg.1004', 'structure%', 1, 2);  
   
 would return something like  
   
     ['fig}100226.1.peg.1004', 'structure', 1, 2]  
     ['fig}100226.1.peg.1004', 'structure1', 1, 2]  
     ['fig}100226.1.peg.1004', 'structure2', 1, 2]  
     ['fig}100226.1.peg.1004', 'structureA', 1, 2]  
   
 Use of C<undef> in any position acts as a wild card (all values). You can also specify  
 a list reference in the ID column. Thus,  
   
     my @attributeList = $sprout->GetProperties(['100226.1', 'fig|100226.1.%'], 'PUBMED');  
   
 would get the PUBMED attribute data for Streptomyces coelicolor A3(2) and all its  
 features.  
   
 In addition to values in multiple sections, a single attribute key can have multiple  
 values, so even  
   
     my @attributeList = $sprout->GetProperties($peg, 'virulent');  
   
 which has no wildcard in the key or the object ID, may return multiple tuples.  
   
 =over 4  
   
 =item objectID  
   
 ID of object whose attributes are desired. If the attributes are desired for multiple  
 objects, this parameter can be specified as a list reference. If the attributes are  
 desired for all objects, specify C<undef> or an empty string. Finally, you can specify  
 attributes for a range of object IDs by putting a percent sign (C<%>) at the end.  
   
 =item key  
   
 Attribute key name. A value of C<undef> or an empty string will match all  
 attribute keys. If the values are desired for multiple keys, this parameter can be  
 specified as a list reference. Finally, you can specify attributes for a range of  
 keys by putting a percent sign (C<%>) at the end.  
   
 =item values  
   
 List of the desired attribute values, section by section. If C<undef>  
 or an empty string is specified, all values in that section will match. A  
 generic match can be requested by placing a percent sign (C<%>) at the end.  
 In that case, all values that match up to and not including the percent sign  
 will match. You may also specify a regular expression enclosed  
 in slashes. All values that match the regular expression will be returned. For  
 performance reasons, only values have this extra capability.  
   
 =item RETURN  
   
 Returns a list of tuples. The first element in the tuple is an object ID, the  
 second is an attribute key, and the remaining elements are the sections of  
 the attribute value. All of the tuples will match the criteria set forth in  
 the parameter list.  
   
 =back  
   
 =cut  
   
 sub GetProperties {  
     # Get the parameters.  
     my ($self, @parms) = @_;  
     # Declare the return variable.  
     my @retVal = $self->{_ca}->GetAttributes(@parms);  
     # Return the result.  
     return @retVal;  
 }  
   
2815  =head3 FeatureProperties  =head3 FeatureProperties
2816    
2817  C<< my @properties = $sprout->FeatureProperties($featureID); >>      my @properties = $sprout->FeatureProperties($featureID);
2818    
2819  Return a list of the properties for the specified feature. Properties are key-value pairs  Return a list of the properties for the specified feature. Properties are key-value pairs
2820  that specify special characteristics of the feature. For example, a property could indicate  that specify special characteristics of the feature. For example, a property could indicate
# Line 2664  Line 2853 
2853    
2854  =head3 DiagramName  =head3 DiagramName
2855    
2856  C<< my $diagramName = $sprout->DiagramName($diagramID); >>      my $diagramName = $sprout->DiagramName($diagramID);
2857    
2858  Return the descriptive name of a diagram.  Return the descriptive name of a diagram.
2859    
# Line 2692  Line 2881 
2881    
2882  =head3 PropertyID  =head3 PropertyID
2883    
2884  C<< my $id = $sprout->PropertyID($propName, $propValue); >>      my $id = $sprout->PropertyID($propName, $propValue);
2885    
2886  Return the ID of the specified property name and value pair, if the  Return the ID of the specified property name and value pair, if the
2887  pair exists. Only a small subset of the FIG attributes are stored as  pair exists. Only a small subset of the FIG attributes are stored as
# Line 2729  Line 2918 
2918    
2919  =head3 MergedAnnotations  =head3 MergedAnnotations
2920    
2921  C<< my @annotationList = $sprout->MergedAnnotations(\@list); >>      my @annotationList = $sprout->MergedAnnotations(\@list);
2922    
2923  Returns a merged list of the annotations for the features in a list. Each annotation is  Returns a merged list of the annotations for the features in a list. Each annotation is
2924  represented by a 4-tuple of the form C<($fid, $timestamp, $userID, $annotation)>, where  represented by a 4-tuple of the form C<($fid, $timestamp, $userID, $annotation)>, where
# Line 2778  Line 2967 
2967    
2968  =head3 RoleNeighbors  =head3 RoleNeighbors
2969    
2970  C<< my @roleList = $sprout->RoleNeighbors($roleID); >>      my @roleList = $sprout->RoleNeighbors($roleID);
2971    
2972  Returns a list of the roles that occur in the same diagram as the specified role. Because  Returns a list of the roles that occur in the same diagram as the specified role. Because
2973  diagrams and roles are in a many-to-many relationship with each other, the list is  diagrams and roles are in a many-to-many relationship with each other, the list is
# Line 2821  Line 3010 
3010    
3011  =head3 FeatureLinks  =head3 FeatureLinks
3012    
3013  C<< my @links = $sprout->FeatureLinks($featureID); >>      my @links = $sprout->FeatureLinks($featureID);
3014    
3015  Return a list of the web hyperlinks associated with a feature. The web hyperlinks are  Return a list of the web hyperlinks associated with a feature. The web hyperlinks are
3016  to external websites describing either the feature itself or the organism containing it  to external websites describing either the feature itself or the organism containing it
# Line 2852  Line 3041 
3041    
3042  =head3 SubsystemsOf  =head3 SubsystemsOf
3043    
3044  C<< my %subsystems = $sprout->SubsystemsOf($featureID); >>      my %subsystems = $sprout->SubsystemsOf($featureID);
3045    
3046  Return a hash describing all the subsystems in which a feature participates. Each subsystem is mapped  Return a hash describing all the subsystems in which a feature participates. Each subsystem is mapped
3047  to the roles the feature performs.  to the roles the feature performs.
# Line 2900  Line 3089 
3089    
3090  =head3 SubsystemList  =head3 SubsystemList
3091    
3092  C<< my @subsystems = $sprout->SubsystemList($featureID); >>      my @subsystems = $sprout->SubsystemList($featureID);
3093    
3094  Return a list containing the names of the subsystems in which the specified  Return a list containing the names of the subsystems in which the specified
3095  feature participates. Unlike L</SubsystemsOf>, this method only returns the  feature participates. Unlike L</SubsystemsOf>, this method only returns the
# Line 2924  Line 3113 
3113      # Get the parameters.      # Get the parameters.
3114      my ($self, $featureID) = @_;      my ($self, $featureID) = @_;
3115      # Get the list of names.      # Get the list of names.
3116      my @retVal = $self->GetFlat(['HasRoleInSubsystem'], "HasRoleInSubsystem(from-link) = ?",      ##HACK: we do a join to the Subsystem table because we have missing subsystems in
3117        ## the Sprout database!
3118        my @retVal = $self->GetFlat(['HasRoleInSubsystem', 'Subsystem'], "HasRoleInSubsystem(from-link) = ?",
3119                                  [$featureID], 'HasRoleInSubsystem(to-link)');                                  [$featureID], 'HasRoleInSubsystem(to-link)');
3120      # Return the result, sorted.      # Return the result, sorted.
3121      return sort @retVal;      return sort @retVal;
# Line 2932  Line 3123 
3123    
3124  =head3 GenomeSubsystemData  =head3 GenomeSubsystemData
3125    
3126  C<< my %featureData = $sprout->GenomeSubsystemData($genomeID); >>      my %featureData = $sprout->GenomeSubsystemData($genomeID);
3127    
3128  Return a hash mapping genome features to their subsystem roles.  Return a hash mapping genome features to their subsystem roles.
3129    
# Line 2992  Line 3183 
3183    
3184  =head3 RelatedFeatures  =head3 RelatedFeatures
3185    
3186  C<< my @relatedList = $sprout->RelatedFeatures($featureID, $function, $userID); >>      my @relatedList = $sprout->RelatedFeatures($featureID, $function, $userID);
3187    
3188  Return a list of the features which are bi-directional best hits of the specified feature and  Return a list of the features which are bi-directional best hits of the specified feature and
3189  have been assigned the specified function by the specified user. If no such features exists,  have been assigned the specified function by the specified user. If no such features exists,
# Line 3043  Line 3234 
3234    
3235  =head3 TaxonomySort  =head3 TaxonomySort
3236    
3237  C<< my @sortedFeatureIDs = $sprout->TaxonomySort(\@featureIDs); >>      my @sortedFeatureIDs = $sprout->TaxonomySort(\@featureIDs);
3238    
3239  Return a list formed by sorting the specified features by the taxonomy of the containing  Return a list formed by sorting the specified features by the taxonomy of the containing
3240  genome. This will cause genomes from similar organisms to float close to each other.  genome. This will cause genomes from similar organisms to float close to each other.
# Line 3078  Line 3269 
3269          my ($taxonomy) = $self->GetFlat(['IsLocatedIn', 'HasContig', 'Genome'], "IsLocatedIn(from-link) = ?",          my ($taxonomy) = $self->GetFlat(['IsLocatedIn', 'HasContig', 'Genome'], "IsLocatedIn(from-link) = ?",
3270                                          [$fid], 'Genome(taxonomy)');                                          [$fid], 'Genome(taxonomy)');
3271          # Add this feature to the hash buffer.          # Add this feature to the hash buffer.
3272          Tracer::AddToListMap(\%hashBuffer, $taxonomy, $fid);          push @{$hashBuffer{$taxonomy}}, $fid;
3273      }      }
3274      # Sort the keys and get the elements.      # Sort the keys and get the elements.
3275      my @retVal = ();      my @retVal = ();
# Line 3091  Line 3282 
3282    
3283  =head3 Protein  =head3 Protein
3284    
3285  C<< my $protein = Sprout::Protein($sequence, $table); >>      my $protein = Sprout::Protein($sequence, $table);
3286    
3287  Translate a DNA sequence into a protein sequence.  Translate a DNA sequence into a protein sequence.
3288    
# Line 3177  Line 3368 
3368    
3369  =head3 LoadInfo  =head3 LoadInfo
3370    
3371  C<< my ($dirName, @relNames) = $sprout->LoadInfo(); >>      my ($dirName, @relNames) = $sprout->LoadInfo();
3372    
3373  Return the name of the directory from which data is to be loaded and a list of the relation  Return the name of the directory from which data is to be loaded and a list of the relation
3374  names. This information is useful when trying to analyze what needs to be put where in order  names. This information is useful when trying to analyze what needs to be put where in order
# Line 3198  Line 3389 
3389    
3390  =head3 BBHMatrix  =head3 BBHMatrix
3391    
3392  C<< my %bbhMap = $sprout->BBHMatrix($genomeID, $cutoff, @targets); >>      my %bbhMap = $sprout->BBHMatrix($genomeID, $cutoff, @targets);
3393    
3394  Find all the bidirectional best hits for the features of a genome in a  Find all the bidirectional best hits for the features of a genome in a
3395  specified list of target genomes. The return value will be a hash mapping  specified list of target genomes. The return value will be a hash mapping
# Line 3252  Line 3443 
3443    
3444  =head3 SimMatrix  =head3 SimMatrix
3445    
3446  C<< my %simMap = $sprout->SimMatrix($genomeID, $cutoff, @targets); >>      my %simMap = $sprout->SimMatrix($genomeID, $cutoff, @targets);
3447    
3448  Find all the similarities for the features of a genome in a  Find all the similarities for the features of a genome in a
3449  specified list of target genomes. The return value will be a hash mapping  specified list of target genomes. The return value will be a hash mapping
# Line 3322  Line 3513 
3513    
3514  =head3 LowBBHs  =head3 LowBBHs
3515    
3516  C<< my %bbhMap = $sprout->LowBBHs($featureID, $cutoff); >>      my %bbhMap = $sprout->LowBBHs($featureID, $cutoff);
3517    
3518  Return the bidirectional best hits of a feature whose score is no greater than a  Return the bidirectional best hits of a feature whose score is no greater than a
3519  specified cutoff value. A higher cutoff value will allow inclusion of hits with  specified cutoff value. A higher cutoff value will allow inclusion of hits with
# Line 3366  Line 3557 
3557    
3558  =head3 Sims  =head3 Sims
3559    
3560  C<< my $simList = $sprout->Sims($fid, $maxN, $maxP, $select, $max_expand, $filters); >>      my $simList = $sprout->Sims($fid, $maxN, $maxP, $select, $max_expand, $filters);
3561    
3562  Get a list of similarities for a specified feature. Similarity information is not kept in the  Get a list of similarities for a specified feature. Similarity information is not kept in the
3563  Sprout database; rather, they are retrieved from a network server. The similarities are  Sprout database; rather, they are retrieved from a network server. The similarities are
# Line 3383  Line 3574 
3574    
3575  =item fid  =item fid
3576    
3577  ID of the feature whose similarities are desired.  ID of the feature whose similarities are desired, or reference to a list of IDs
3578    of features whose similarities are desired.
3579    
3580  =item maxN  =item maxN
3581    
# Line 3431  Line 3623 
3623    
3624  =head3 IsAllGenomes  =head3 IsAllGenomes
3625    
3626  C<< my $flag = $sprout->IsAllGenomes(\@list, \@checkList); >>      my $flag = $sprout->IsAllGenomes(\@list, \@checkList);
3627    
3628  Return TRUE if all genomes in the second list are represented in the first list at  Return TRUE if all genomes in the second list are represented in the first list at
3629  least one. Otherwise, return FALSE. If the second list is omitted, the first list is  least one. Otherwise, return FALSE. If the second list is omitted, the first list is
# Line 3480  Line 3672 
3672    
3673  =head3 GetGroups  =head3 GetGroups
3674    
3675  C<< my %groups = $sprout->GetGroups(\@groupList); >>      my %groups = $sprout->GetGroups(\@groupList);
3676    
3677  Return a hash mapping each group to the IDs of the genomes in the group.  Return a hash mapping each group to the IDs of the genomes in the group.
3678  A list of groups may be specified, in which case only those groups will be  A list of groups may be specified, in which case only those groups will be
# Line 3512  Line 3704 
3704                                      [$FIG_Config::otherGroup], ['Genome(id)', 'Genome(primary-group)']);                                      [$FIG_Config::otherGroup], ['Genome(id)', 'Genome(primary-group)']);
3705          # Loop through the genomes found.          # Loop through the genomes found.
3706          for my $genome (@genomes) {          for my $genome (@genomes) {
3707              # Pop this genome's ID off the current list.              # Get the genome ID and group, and add this genome to the group's list.
3708              my @groups = @{$genome};              my ($genomeID, $group) = @{$genome};
3709              my $genomeID = shift @groups;              push @{$retVal{$group}}, $genomeID;
             # Loop through the groups, adding the genome ID to each group's  
             # list.  
             for my $group (@groups) {  
                 Tracer::AddToListMap(\%retVal, $group, $genomeID);  
             }  
3710          }          }
3711      }      }
3712      # Return the hash we just built.      # Return the hash we just built.
# Line 3528  Line 3715 
3715    
3716  =head3 MyGenomes  =head3 MyGenomes
3717    
3718  C<< my @genomes = Sprout::MyGenomes($dataDir); >>      my @genomes = Sprout::MyGenomes($dataDir);
3719    
3720  Return a list of the genomes to be included in the Sprout.  Return a list of the genomes to be included in the Sprout.
3721    
# Line 3560  Line 3747 
3747    
3748  =head3 LoadFileName  =head3 LoadFileName
3749    
3750  C<< my $fileName = Sprout::LoadFileName($dataDir, $tableName); >>      my $fileName = Sprout::LoadFileName($dataDir, $tableName);
3751    
3752  Return the name of the load file for the specified table in the specified data  Return the name of the load file for the specified table in the specified data
3753  directory.  directory.
# Line 3601  Line 3788 
3788    
3789  =head3 DeleteGenome  =head3 DeleteGenome
3790    
3791  C<< my $stats = $sprout->DeleteGenome($genomeID, $testFlag); >>      my $stats = $sprout->DeleteGenome($genomeID, $testFlag);
3792    
3793  Delete a genome from the database.  Delete a genome from the database.
3794    
# Line 3637  Line 3824 
3824    
3825  =head3 Fix  =head3 Fix
3826    
3827  C<< my %fixedHash = Sprout::Fix(%groupHash); >>      my %fixedHash = $sprout->Fix(%groupHash);
3828    
3829  Prepare a genome group hash (like that returned by L</GetGroups> for processing.  Prepare a genome group hash (like that returned by L</GetGroups>) for processing.
3830  Groups with the same primary name will be combined. The primary name is the  The groups will be combined into the appropriate super-groups.
 first capitalized word in the group name.  
3831    
3832  =over 4  =over 4
3833    
# Line 3659  Line 3845 
3845    
3846  sub Fix {  sub Fix {
3847      # Get the parameters.      # Get the parameters.
3848      my (%groupHash) = @_;      my ($self, %groupHash) = @_;
3849      # Create the result hash.      # Create the result hash.
3850      my %retVal = ();      my %retVal = ();
3851      # Copy over the genomes.      # Copy over the genomes.
3852      for my $groupID (keys %groupHash) {      for my $groupID (keys %groupHash) {
3853          # Make a safety copy of the group ID.          # Get the super-group name.
3854          my $realGroupID = $groupID;          my $realGroupID = $self->SuperGroup($groupID);
3855          # Yank the primary name.          # Append this group's genomes into the result hash
3856          if ($groupID =~ /([A-Z]\w+)/) {          # using the super-group name.
3857              $realGroupID = $1;          push @{$retVal{$realGroupID}}, @{$groupHash{$groupID}};
         }  
         # Append this group's genomes into the result hash.  
         Tracer::AddToListMap(\%retVal, $realGroupID, @{$groupHash{$groupID}});  
3858      }      }
3859      # Return the result hash.      # Return the result hash.
3860      return %retVal;      return %retVal;
# Line 3679  Line 3862 
3862    
3863  =head3 GroupPageName  =head3 GroupPageName
3864    
3865  C<< my $name = $sprout->GroupPageName($group); >>      my $name = $sprout->GroupPageName($group);
3866    
3867  Return the name of the page for the specified NMPDR group.  Return the name of the page for the specified NMPDR group.
3868    
# Line 3701  Line 3884 
3884  sub GroupPageName {  sub GroupPageName {
3885      # Get the parameters.      # Get the parameters.
3886      my ($self, $group) = @_;      my ($self, $group) = @_;
     # Declare the return variable.  
     my $retVal;  
3887      # Check for the group file data.      # Check for the group file data.
3888      if (! defined $self->{groupHash}) {      my %superTable = $self->CheckGroupFile();
         # Read the group file.  
         my %groupData = Sprout::ReadGroupFile($self->{_options}->{dataDir} . "/groups.tbl");  
         # Store it in our object.  
         $self->{groupHash} = \%groupData;  
     }  
3889      # Compute the real group name.      # Compute the real group name.
3890      my $realGroup = $group;      my $realGroup = $self->SuperGroup($group);
3891      if ($group =~ /([A-Z]\w+)/) {      # Get the associated page name.
3892          $realGroup = $1;      my $retVal = "../content/$superTable{$realGroup}->{page}";
     }  
     # Return the page name.  
     $retVal = "../content/" . $self->{groupHash}->{$realGroup}->[1];  
3893      # Return the result.      # Return the result.
3894      return $retVal;      return $retVal;
3895  }  }
3896    
 =head3 ReadGroupFile  
   
 C<< my %groupData = Sprout::ReadGroupFile($groupFileName); >>  
   
 Read in the data from the specified group file. The group file contains information  
 about each of the NMPDR groups.  
   
 =over 4  
3897    
3898  =item name  =head3 AddProperty
   
 Name of the group.  
   
 =item page  
   
 Name of the group's page on the web site (e.g. C<campy.php> for  
 Campylobacter)  
   
 =item genus  
   
 Genus of the group  
3899    
3900  =item species      $sprout->AddProperty($featureID, $key, @values);
3901    
3902  Species of the group, or an empty string if the group is for an entire  Add a new attribute value (Property) to a feature.
 genus. If the group contains more than one species, the species names  
 should be separated by commas.  
3903    
3904  =back  =over 4
3905    
3906  The parameters to this method are as follows  =item peg
3907    
3908  =over 4  ID of the feature to which the attribute is to be added.
3909    
3910  =item groupFile  =item key
3911    
3912  Name of the file containing the group data.  Name of the attribute (key).
3913    
3914  =item RETURN  =item values
3915    
3916  Returns a hash keyed on group name. The value of each hash  Values of the attribute.
3917    
3918  =back  =back
3919    
3920  =cut  =cut
3921    #: Return Type ;
3922  sub ReadGroupFile {  sub AddProperty {
3923      # Get the parameters.      # Get the parameters.
3924      my ($groupFileName) = @_;      my ($self, $featureID, $key, @values) = @_;
3925      # Declare the return variable.      # Add the property using the attached attributes object.
3926      my %retVal;      $self->{_ca}->AddAttribute($featureID, $key, @values);
     # Read the group file.  
     my @groupLines = Tracer::GetFile($groupFileName);  
     for my $groupLine (@groupLines) {  
         my ($name, $page, $genus, $species) = split(/\t/, $groupLine);  
         $retVal{$name} = [$page, $genus, $species];  
     }  
     # Return the result.  
     return %retVal;  
3927  }  }
3928    
3929  =head3 AddProperty  =head3 CheckGroupFile
   
 C<< my  = $sprout->AddProperty($featureID, $key, @values); >>  
3930    
3931  Add a new attribute value (Property) to a feature.      my %groupData = $sprout->CheckGroupFile();
3932    
3933  =over 4  Get the group file hash. The group file hash describes the relationship
3934    between a group and the super-group to which it belongs for purposes of
3935    display. The super-group name is computed from the first capitalized word
3936    in the actual group name. For each super-group, the group file contains
3937    the page name and a list of the species expected to be in the group.
3938    Each species is specified by a genus and a species name. A species name
3939    of C<0> implies an entire genus.
3940    
3941  =item peg  This method returns a hash from super-group names to a hash reference. Each
3942    resulting hash reference contains the following fields.
3943    
3944  ID of the feature to which the attribute is to be added.  =over 4
3945    
3946  =item key  =item page
3947    
3948  Name of the attribute (key).  The super-group's web page in the NMPDR.
3949    
3950  =item values  =item contents
3951    
3952  Values of the attribute.  A list of 2-tuples, each containing a genus name followed by a species name
3953    (or 0, indicating all species). This list indicates which organisms belong
3954    in the super-group.
3955    
3956  =back  =back
3957    
3958  =cut  =cut
3959  #: Return Type ;  
3960  sub AddProperty {  sub CheckGroupFile {
3961      # Get the parameters.      # Get the parameters.
3962      my ($self, $featureID, $key, @values) = @_;      my ($self) = @_;
3963      # Add the property using the attached attributes object.      # Check to see if we already have this hash.
3964      $self->{_ca}->AddAttribute($featureID, $key, @values);      if (! defined $self->{groupHash}) {
3965            # We don't, so we need to read it in.
3966            my %groupHash;
3967            # Read the group file.
3968            my @groupLines = Tracer::GetFile("$FIG_Config::sproutData/groups.tbl");
3969            # Loop through the list of sort-of groups.
3970            for my $groupLine (@groupLines) {
3971                my ($name, $page, @contents) = split /\t/, $groupLine;
3972                $groupHash{$name} = { page => $page,
3973                                      contents => [ map { [ split /\s*,\s*/, $_ ] } @contents ]
3974                                    };
3975            }
3976            # Save the hash.
3977            $self->{groupHash} = \%groupHash;
3978        }
3979        # Return the result.
3980        return %{$self->{groupHash}};
3981  }  }
3982    
3983  =head2 Virtual Methods  =head2 Virtual Methods
3984    
3985  =head3 CleanKeywords  =head3 CleanKeywords
3986    
3987  C<< my $cleanedString = $sprout->CleanKeywords($searchExpression); >>      my $cleanedString = $sprout->CleanKeywords($searchExpression);
3988    
3989  Clean up a search expression or keyword list. This involves converting the periods  Clean up a search expression or keyword list. This involves converting the periods
3990  in EC numbers to underscores, converting non-leading minus signs to underscores,  in EC numbers to underscores, converting non-leading minus signs to underscores,
# Line 3844  Line 4011 
4011      # Get the parameters.      # Get the parameters.
4012      my ($self, $searchExpression) = @_;      my ($self, $searchExpression) = @_;
4013      # Perform the standard cleanup.      # Perform the standard cleanup.
4014      my $retVal = $self->ERDB::CleanKeywords($searchExpression);      my $words = $self->ERDB::CleanKeywords($searchExpression);
4015      # Fix the periods in EC and TC numbers.      # Fix the periods in EC and TC numbers.
4016      $retVal =~ s/(\d+|\-)\.(\d+|-)\.(\d+|-)\.(\d+|-)/$1_$2_$3_$4/g;      $words =~ s/(\d+|\-)\.(\d+|-)\.(\d+|-)\.(\d+|-)/$1_$2_$3_$4/g;
4017      # Fix non-trailing periods.      # Fix non-trailing periods.
4018      $retVal =~ s/\.(\w)/_$1/g;      $words =~ s/\.(\w)/_$1/g;
4019      # Fix non-leading minus signs.      # Fix non-leading minus signs.
4020      $retVal =~ s/(\w)[\-]/$1_/g;      $words =~ s/(\w)[\-]/$1_/g;
4021      # Fix the vertical bars and colons      # Fix the vertical bars and colons
4022      $retVal =~ s/(\w)[|:](\w)/$1'$2/g;      $words =~ s/(\w)[|:](\w)/$1'$2/g;
4023        # Now split up the list so that each keyword is in its own string. We keep the delimiters
4024        # because they may contain boolean expression data.
4025        my @words = split /([^A-Za-z'0-9_]+)/, $words;
4026        # We'll convert the stemmable words into stems and re-assemble the result.
4027        my $retVal = "";
4028        for my $word (@words) {
4029            my $stem = $self->Stem($word);
4030            if (defined $stem) {
4031                $retVal .= $stem;
4032            } else {
4033                $retVal .= $word;
4034            }
4035        }
4036        Trace("Cleaned keyword list for \"$searchExpression\" is \"$retVal\".") if T(3);
4037      # Return the result.      # Return the result.
4038      return $retVal;      return $retVal;
4039  }  }
# Line 3867  Line 4048 
4048    
4049  A functional assignment is always of the form  A functional assignment is always of the form
4050    
4051      C<set >I<YYYY>C< function to\n>I<ZZZZZ>      set YYYY function to
4052        ZZZZ
4053    
4054  where I<YYYY> is the B<user>, and I<ZZZZ> is the actual functional role. In most cases,  where I<YYYY> is the B<user>, and I<ZZZZ> is the actual functional role. In most cases,
4055  the user and the assigning user (from MadeAnnotation) will be the same, but that is  the user and the assigning user (from MadeAnnotation) will be the same, but that is
# Line 3920  Line 4102 
4102      return @retVal;      return @retVal;
4103  }  }
4104    
4105    =head3 _CheckFeature
4106    
4107        my $flag = $sprout->_CheckFeature($fid);
4108    
4109    Return TRUE if the specified FID is probably an NMPDR feature ID, else FALSE.
4110    
4111    =over 4
4112    
4113    =item fid
4114    
4115    Feature ID to check.
4116    
4117    =item RETURN
4118    
4119    Returns TRUE if the FID is for one of the NMPDR genomes, else FALSE.
4120    
4121    =back
4122    
4123    =cut
4124    
4125    sub _CheckFeature {
4126        # Get the parameters.
4127        my ($self, $fid) = @_;
4128        # Insure we have a genome hash.
4129        if (! defined $self->{genomeHash}) {
4130            my %genomeHash = map { $_ => 1 } $self->GetFlat(['Genome'], "", [], 'Genome(id)');
4131            $self->{genomeHash} = \%genomeHash;
4132        }
4133        # Get the feature's genome ID.
4134        my ($genomeID) = FIGRules::ParseFeatureID($fid);
4135        # Return an indicator of whether or not the genome ID is in the hash.
4136        return ($self->{genomeHash}->{$genomeID} ? 1 : 0);
4137    }
4138    
4139  =head3 FriendlyTimestamp  =head3 FriendlyTimestamp
4140    
4141  Convert a time number to a user-friendly time stamp for display.  Convert a time number to a user-friendly time stamp for display.
# Line 3947  Line 4163 
4163  }  }
4164    
4165    
4166    =head3 Hint
4167    
4168        my $htmlText = SearchHelper::Hint($wikiPage, $hintText);
4169    
4170    Return the HTML for a small question mark that displays the specified hint text when it is clicked.
4171    This HTML can be put in forms to provide a useful hinting mechanism.
4172    
4173    =over 4
4174    
4175    =item wikiPage
4176    
4177    Name of the wiki page to be popped up when the hint mark is clicked.
4178    
4179    =item hintText
4180    
4181    Text to display for the hint. It is raw html, but may not contain any double quotes.
4182    
4183    =item RETURN
4184    
4185    Returns the html for the hint facility. The resulting html shows a small button-like thing that
4186    uses the standard FIG popup technology.
4187    
4188    =back
4189    
4190    =cut
4191    
4192    sub Hint {
4193        # Get the parameters.
4194        my ($wikiPage, $hintText) = @_;
4195        # Escape the single quotes in the hint text.
4196        my $quotedText = $hintText;
4197        $quotedText =~ s/'/\\'/g;
4198        # Convert the wiki page name to a URL.
4199        my $wikiURL = join("", map { ucfirst $_ } split /\s+/, $wikiPage);
4200        $wikiURL = "$FIG_Config::cgi_url/wiki/view.cgi/FIG/$wikiURL";
4201        # Compute the mouseover script.
4202        my $mouseOver = "doTooltip(this, '$quotedText')";
4203        # Create the html.
4204        my $retVal = "&nbsp;<a href=\"$wikiURL\"><img src=\"$FIG_Config::cgi_url/Html/button-h.png\" class=\"helpicon\" onmouseover=\"$mouseOver\"/></a>";
4205        # Return it.
4206        return $retVal;
4207    }
4208    
4209  1;  1;

Legend:
Removed from v.1.99  
changed lines
  Added in v.1.116

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3