[Bio] / Sprout / Sprout.pm Repository:
ViewVC logotype

Diff of /Sprout/Sprout.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.108, Thu Feb 14 19:13:33 2008 UTC revision 1.111, Wed May 7 23:11:51 2008 UTC
# Line 14  Line 14 
14      use BasicLocation;      use BasicLocation;
15      use CustomAttributes;      use CustomAttributes;
16      use RemoteCustomAttributes;      use RemoteCustomAttributes;
17        use CGI;
18        use WikiTools;
19      use base qw(ERDB);      use base qw(ERDB);
20    
21  =head1 Sprout Database Manipulation Object  =head1 Sprout Database Manipulation Object
# Line 146  Line 148 
148          my $user = ($FIG_Config::arch eq 'win' ? 'self' : scalar(getpwent()));          my $user = ($FIG_Config::arch eq 'win' ? 'self' : scalar(getpwent()));
149          $retVal->{_ca} = CustomAttributes->new(user => $user);          $retVal->{_ca} = CustomAttributes->new(user => $user);
150      }      }
151        # Insure we have access to the stem module.
152        WikiUse('Lingua::Stem');
153        $retVal->{stemmer} = Lingua::Stem->new();
154        $retVal->{stemmer}->stem_caching({ -level => 2 });
155      # Return it.      # Return it.
156      return $retVal;      return $retVal;
157  }  }
# Line 465  Line 471 
471      return $retVal;      return $retVal;
472  }  }
473    
474  =head3 GeneMenu  =head3 GenomeMenu
475    
476      my $selectHtml = $sprout->GeneMenu(\%attributes, $filterString, \@params, $selected);      my $html = $sprout->GenomeMenu(%options);
477    
478  Return an HTML select menu of genomes. Each genome will be an option in the menu,  Generate a genome selection control with the specified name and options.
479  and will be displayed by name with the ID and a contig count attached. The selection  This control is almost but not quite the same as the genome control in the
480  value will be the genome ID. The genomes will be sorted by genus/species name.  B<SearchHelper> class. Eventually, the two will be combined.
481    
482  =over 4  =over 4
483    
484  =item attributes  =item options
485    
486  Reference to a hash mapping attributes to values for the SELECT tag generated.  Optional parameters for the control (see below).
487    
488  =item filterString  =item RETURN
489    
490  A filter string for use in selecting the genomes. The filter string must conform  Returns the HTML for a genome selection control on a form (sometimes called a popup menu).
 to the rules for the C<< ERDB->Get >> method.  
491    
492  =item params  =back
493    
494  Reference to a list of values to be substituted in for the parameter marks in  The valid options are as follows.
 the filter string.  
495    
496  =item selected (optional)  =over 4
497    
498  ID of the genome to be initially selected.  =item name
499    
500  =item fast (optional)  Name to give this control for use in passing it to the form. The default is C<myGenomeControl>.
501    Terrible things will happen if you have two controls with the same name on the same page.
502    
503  If specified and TRUE, the contig counts will be omitted to improve performance.  =item filter
504    
505  =item RETURN  If specified, a filter for the list of genomes to display. The filter should be in the form of a
506    list reference. The first element of the list should be the filter string, and the remaining elements
507    the filter parameters.
508    
509    =item multiSelect
510    
511    If TRUE, then the user can select multiple genomes. If FALSE, the user can only select one genome.
512    
513    =item size
514    
515    Number of rows to display in the control. The default is C<10>
516    
517    =item id
518    
519    ID to give this control. The default is the value of the C<name> option. Nothing will work correctly
520    unless this ID is unique.
521    
522    =item selected
523    
524    A comma-delimited list of selected genomes, or a reference to a list of selected genomes. The
525    default is none.
526    
527    =item class
528    
529  Returns an HTML select menu with the specified genomes as selectable options.  If specified, a style class to assign to the genome control.
530    
531  =back  =back
532    
533  =cut  =cut
534    
535  sub GeneMenu {  sub GenomeMenu {
536      # Get the parameters.      # Get the parameters.
537      my ($self, $attributes, $filterString, $params, $selected, $fast) = @_;      my ($self, %options) = @_;
538      my $slowMode = ! $fast;      # Get the control's name and ID.
539      # Default to nothing selected. This prevents an execution warning if "$selected"      my $menuName = $options{name} || 'myGenomeControl';
540      # is undefined.      my $menuID = $options{id} || $menuName;
541      $selected = "" unless defined $selected;      # Compute the IDs for the status display.
542      Trace("Gene Menu called with slow mode \"$slowMode\" and selection \"$selected\".") if T(3);      my $divID = "${menuID}_status";
543      # Start the menu.      my $urlID = "${menuID}_url";
544      my $retVal = "<select " .      # Compute the code to show selected genomes in the status area.
545          join(" ", map { "$_=\"$attributes->{$_}\"" } keys %{$attributes}) .      my $showSelect = "showSelected('$menuID', '$divID', '$urlID', 1000)";
546          ">\n";      # Check for single-select or multi-select.
547      # Get the genomes.      my $multiSelect = $options{multiSelect} || 0;
548      my @genomes = $self->GetAll(['Genome'], $filterString, $params, ['Genome(id)',      # Get the style data.
549                                                                       'Genome(genus)',      my $class = $options{class} || '';
550                                                                       'Genome(species)',      # Get the list of pre-selected items.
551                                                                       'Genome(unique-characterization)']);      my $selections = $options{selected} || [];
552      # Sort them by name.      if (ref $selections ne 'ARRAY') {
553      my @sorted = sort { lc("$a->[1] $a->[2]") cmp lc("$b->[1] $b->[2]") } @genomes;          $selections = [ split /\s*,\s*/, $selections ];
554      # Loop through the genomes, creating the option tags.      }
555      for my $genomeData (@sorted) {      my %selected = map { $_ => } @{$selections};
556          # Get the data for this genome.      # Extract the filter information. The default is no filtering. It can be passed as a tab-delimited
557          my ($genomeID, $genus, $species, $strain) = @{$genomeData};      # string or a list reference.
558          # Get the contig count.      my $filterParms = $options{filter} || "";
559          my $contigInfo = "";      if (! ref $filterParms) {
560          if ($slowMode) {          $filterParms = [split /\t|\\t/, $filterParms];
561              my $count = $self->ContigCount($genomeID);      }
562              my $counting = ($count == 1 ? "contig" : "contigs");      my $filterString = shift @{$filterParms};
563              $contigInfo = "[$count $counting]";      # Get a list of all the genomes in group order. In fact, we only need them ordered
564          }      # by name (genus,species,strain), but putting primary-group in front enables us to
565          # Find out if we're selected.      # take advantage of an existing index.
566          my $selectOption = ($selected eq $genomeID ? " selected" : "");      my @genomeList = $self->GetAll(['Genome'], "$filterString ORDER BY Genome(primary-group), Genome(genus), Genome(species), Genome(unique-characterization)",
567          # Build the option tag.                                     $filterParms,
568          $retVal .= "<option value=\"$genomeID\"$selectOption>$genus $species $strain ($genomeID)$contigInfo</option>\n";                                     [qw(Genome(primary-group) Genome(id) Genome(genus) Genome(species) Genome(unique-characterization) Genome(taxonomy) Genome(contigs))]);
569        # Create a hash to organize the genomes by group. Each group will contain a list of
570        # 2-tuples, the first element being the genome ID and the second being the genome
571        # name.
572        my %gHash = ();
573        for my $genome (@genomeList) {
574            # Get the genome data.
575            my ($group, $genomeID, $genus, $species, $strain, $taxonomy, $contigs) = @{$genome};
576            # Compute its name. This is the genus, species, strain (if any), and the contig count.
577            my $name = "$genus $species ";
578            $name .= "$strain " if $strain;
579            my $contigCount = ($contigs == 1 ? "" : ", $contigs contigs");
580            # Now we get the domain. The domain tells us the display style of the organism.
581            my ($domain) = split /\s*;\s*/, $taxonomy, 2;
582            # Now compute the display group. This is normally the primary group, but if the
583            # organism is supporting, we blank it out.
584            my $displayGroup = ($group eq $FIG_Config::otherGroup ? "" : $group);
585            # Push the genome into the group's list. Note that we use the real group
586            # name for the hash key here, not the display group name.
587            push @{$gHash{$group}}, [$genomeID, $name, $contigCount, $domain];
588        }
589        # We are almost ready to unroll the menu out of the group hash. The final step is to separate
590        # the supporting genomes by domain. First, we extract the NMPDR groups and sort them. They
591        # are sorted by the first capitalized word. Groups with "other" are sorted after groups
592        # that aren't "other". At some point, we will want to make this less complicated.
593        my %sortGroups = map { $_ =~ /(other)?(.*)([A-Z].+)/; "$3$1$2" => $_ }
594                             grep { $_ ne $FIG_Config::otherGroup } keys %gHash;
595        my @groups = map { $sortGroups{$_} } sort keys %sortGroups;
596        # Remember the number of NMPDR groups.
597        my $nmpdrGroupCount = scalar @groups;
598        # Loop through the supporting genomes, classifying them by domain. We'll also keep a list
599        # of the domains found.
600        my @otherGenomes = @{$gHash{$FIG_Config::otherGroup}};
601        my @domains = ();
602        for my $genomeData (@otherGenomes) {
603            my ($genomeID, $name, $contigCount, $domain) = @{$genomeData};
604            if (exists $gHash{$domain}) {
605                push @{$gHash{$domain}}, $genomeData;
606            } else {
607                $gHash{$domain} = [$genomeData];
608                push @domains, $domain;
609            }
610        }
611        # Add the domain groups at the end of the main group list. The main group list will now
612        # contain all the categories we need to display the genomes.
613        push @groups, sort @domains;
614        # Delete the supporting group.
615        delete $gHash{$FIG_Config::otherGroup};
616        # Now it gets complicated. We need a way to mark all the NMPDR genomes. We take advantage
617        # of the fact they come first in the list. We'll accumulate a count of the NMPDR genomes
618        # and use that to make the selections.
619        my $nmpdrCount = 0;
620        # Create the type counters.
621        my $groupCount = 1;
622        # Get the number of rows to display.
623        my $rows = $options{size} || 10;
624        # If we're multi-row, create an onChange event.
625        my $onChangeTag = ( $rows > 1 ? " onChange=\"$showSelect;\" onFocus=\"$showSelect;\"" : "" );
626        # Set up the multiple-select flag.
627        my $multipleTag = ($multiSelect ? " multiple" : "" );
628        # Set up the style class.
629        my $classTag = ($class ? " class=\"$class\"" : "" );
630        # Create the SELECT tag and stuff it into the output array.
631        my @lines = ("<SELECT name=\"$menuID\" id=\"$menuID\" $onChangeTag$multipleTag$classTag size=\"$rows\">");
632        # Loop through the groups.
633        for my $group (@groups) {
634            # Get the genomes in the group.
635            for my $genome (@{$gHash{$group}}) {
636                # If this is an NMPDR organism, we add an extra style and count it.
637                my $nmpdrStyle = "";
638                if ($nmpdrGroupCount > 0) {
639                    $nmpdrCount++;
640                    $nmpdrStyle = " Core";
641                }
642                # Get the organism ID, name, contig count, and domain.
643                my ($genomeID, $name, $contigCount, $domain) = @{$genome};
644                # See if we're pre-selected.
645                my $selectTag = ($selected{$genomeID} ? " SELECTED" : "");
646                # Compute the display name.
647                my $nameString = "$name ($genomeID$contigCount)";
648                # Generate the option tag.
649                my $optionTag = "<OPTION class=\"$domain$nmpdrStyle\" title=\"$group\" value=\"$genomeID\"$selectTag>$nameString</OPTION>";
650                push @lines, "    $optionTag";
651            }
652            # Record this group in the nmpdrGroup count. When that gets to 0, we've finished the NMPDR
653            # groups.
654            $nmpdrGroupCount--;
655      }      }
656      # Close the SELECT tag.      # Close the SELECT tag.
657      $retVal .= "</select>\n";      push @lines, "</SELECT>";
658        if ($rows > 1) {
659            # We're in a non-compact mode, so we need to add some selection helpers. First is
660            # the search box. This allows the user to type text and change which genomes are
661            # displayed. For multiple-select mode, we include a button that selects the displayed
662            # genes. For single-select mode, we use a plain label instead.
663            my $searchThingName = "${menuID}_SearchThing";
664            my $searchThingLabel = ($multiSelect ? "<INPUT type=\"button\" name=\"MacroSearch\" class=\"button\" value=\"Select genomes containing\" onClick=\"selectShowing('$menuID', '$searchThingName'); $showSelect;\" />"
665                                                 : "Show genomes containing");
666            push @lines, "<br />$searchThingLabel&nbsp;" .
667                         "<INPUT type=\"text\" id=\"$searchThingName\" name=\"$searchThingName\" size=\"30\" onKeyup=\"showTyped('$menuID', '$searchThingName');\" />";
668            # For multi-select mode, we also have buttons to set and clear selections.
669            if ($multiSelect) {
670                push @lines, "<INPUT type=\"button\" name=\"ClearAll\" class=\"bigButton\"  value=\"Clear All\" onClick=\"clearAll('$menuID'); $showSelect\" />";
671                push @lines, "<INPUT type=\"button\" name=\"SelectAll\" class=\"bigButton\" value=\"Select All\" onClick=\"selectAll('$menuID'); $showSelect\" />";
672                push @lines, "<INPUT type=\"button\" name=\"NMPDROnly\" class=\"bigButton\"  value=\"Select NMPDR\" onClick=\"selectSome('$menuID', $nmpdrCount, true); $showSelect;\" />";
673            }
674            # Add a hidden field we can use to generate organism page hyperlinks.
675            push @lines, "<INPUT type=\"hidden\" id=\"$urlID\" value=\"$FIG_Config::cgi_url/seedviewer.cgi?page=Organism;organism=\" />";
676            # Add the status display. This tells the user what's selected no matter where the list is scrolled.
677            push @lines, "<DIV id=\"$divID\" class=\"Panel\"></DIV>";
678        }
679        # Assemble all the lines into a string.
680        my $retVal = join("\n", @lines, "");
681        # Return the result.
682        return $retVal;
683    }
684    
685    
686    =head3 Stem
687    
688        my $stem = $sprout->Stem($word);
689    
690    Return the stem of the specified word, or C<undef> if the word is not
691    stemmable. Note that even if the word is stemmable, the stem may be
692    the same as the original word.
693    
694    =over 4
695    
696    =item word
697    
698    Word to convert into a stem.
699    
700    =item RETURN
701    
702    Returns a stem of the word (which may be the word itself), or C<undef> if
703    the word is not stemmable.
704    
705    =back
706    
707    =cut
708    
709    sub Stem {
710        # Get the parameters.
711        my ($self, $word) = @_;
712        # Declare the return variable.
713        my $retVal;
714        # See if it's stemmable.
715        if ($word =~ /^[A-Za-z]+$/) {
716            # Compute the stem.
717            my $stemList = $self->{stemmer}->stem($word);
718            my $stem = $stemList->[0];
719            # Check to see if it's long enough.
720            if (length $stem >= 3) {
721                # Yes, keep it.
722                $retVal = $stem;
723            } else {
724                # No, use the original word.
725                $retVal = $word;
726            }
727        }
728      # Return the result.      # Return the result.
729      return $retVal;      return $retVal;
730  }  }
731    
732    
733  =head3 Build  =head3 Build
734    
735      $sprout->Build();      $sprout->Build();
# Line 683  Line 867 
867  =item RETURN  =item RETURN
868    
869  Returns a list of the feature's contig segments. The locations are returned as a list in a list  Returns a list of the feature's contig segments. The locations are returned as a list in a list
870  context and as a comma-delimited string in a scalar context.  context and as a comma-delimited string in a scalar context. An empty list means the feature
871    wasn't found.
872    
873  =back  =back
874    
# Line 692  Line 877 
877  sub FeatureLocation {  sub FeatureLocation {
878      # Get the parameters.      # Get the parameters.
879      my ($self, $featureID) = @_;      my ($self, $featureID) = @_;
880        # Declare the return variable.
881        my @retVal = ();
882      # Get the feature record.      # Get the feature record.
883      my $object = $self->GetEntity('Feature', $featureID);      my $object = $self->GetEntity('Feature', $featureID);
884      Confess("Feature $featureID not found.") if ! defined($object);      # Only proceed if we found it.
885        if (defined $object) {
886      # Get the location string.      # Get the location string.
887      my $locString = $object->PrimaryValue('Feature(location-string)');      my $locString = $object->PrimaryValue('Feature(location-string)');
888      # Create the return list.      # Create the return list.
889      my @retVal = split /\s*,\s*/, $locString;          @retVal = split /\s*,\s*/, $locString;
890        }
891      # Return the list in the format indicated by the context.      # Return the list in the format indicated by the context.
892      return (wantarray ? @retVal : join(',', @retVal));      return (wantarray ? @retVal : join(',', @retVal));
893  }  }
# Line 3896  Line 4085 
4085      # Get the parameters.      # Get the parameters.
4086      my ($self, $searchExpression) = @_;      my ($self, $searchExpression) = @_;
4087      # Perform the standard cleanup.      # Perform the standard cleanup.
4088      my $retVal = $self->ERDB::CleanKeywords($searchExpression);      my $words = $self->ERDB::CleanKeywords($searchExpression);
4089      # Fix the periods in EC and TC numbers.      # Fix the periods in EC and TC numbers.
4090      $retVal =~ s/(\d+|\-)\.(\d+|-)\.(\d+|-)\.(\d+|-)/$1_$2_$3_$4/g;      $words =~ s/(\d+|\-)\.(\d+|-)\.(\d+|-)\.(\d+|-)/$1_$2_$3_$4/g;
4091      # Fix non-trailing periods.      # Fix non-trailing periods.
4092      $retVal =~ s/\.(\w)/_$1/g;      $words =~ s/\.(\w)/_$1/g;
4093      # Fix non-leading minus signs.      # Fix non-leading minus signs.
4094      $retVal =~ s/(\w)[\-]/$1_/g;      $words =~ s/(\w)[\-]/$1_/g;
4095      # Fix the vertical bars and colons      # Fix the vertical bars and colons
4096      $retVal =~ s/(\w)[|:](\w)/$1'$2/g;      $words =~ s/(\w)[|:](\w)/$1'$2/g;
4097        # Now split up the list so that each keyword is in its own string. We keep the delimiters
4098        # because they may contain boolean expression data.
4099        my @words = split /([^A-Za-z'0-9_]+)/, $words;
4100        # We'll convert the stemmable words into stems and re-assemble the result.
4101        my $retVal = "";
4102        for my $word (@words) {
4103            my $stem = $self->Stem($word);
4104            if (defined $stem) {
4105                $retVal .= $stem;
4106            } else {
4107                $retVal .= $word;
4108            }
4109        }
4110        Trace("Cleaned keyword list for \"$searchExpression\" is \"$retVal\".") if T(3);
4111      # Return the result.      # Return the result.
4112      return $retVal;      return $retVal;
4113  }  }

Legend:
Removed from v.1.108  
changed lines
  Added in v.1.111

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3