[Bio] / Sprout / SearchHelper.pm Repository:
ViewVC logotype

Diff of /Sprout/SearchHelper.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.31, Fri May 11 06:28:21 2007 UTC revision 1.32, Thu May 17 23:43:30 2007 UTC
# Line 791  Line 791 
791      # Write the column headers and close the file.      # Write the column headers and close the file.
792      Tracer::PutLine($handle1, \@colNames);      Tracer::PutLine($handle1, \@colNames);
793      close $handle1;      close $handle1;
794        Trace("Column headers are: " . join("; ", @colNames) . ".") if T(3);
795      # Now open the sort pipe and save the file handle. Note how we append the      # Now open the sort pipe and save the file handle. Note how we append the
796      # sorted data to the column header row already in place. The output will      # sorted data to the column header row already in place. The output will
797      # contain a sort key followed by the real columns. The sort key is      # contain a sort key followed by the real columns. The sort key is
# Line 824  Line 825 
825      my ($self, $key, @colValues) = @_;      my ($self, $key, @colValues) = @_;
826      # Write them to the cache file.      # Write them to the cache file.
827      Tracer::PutLine($self->{fileHandle}, [$key, @colValues]);      Tracer::PutLine($self->{fileHandle}, [$key, @colValues]);
828        Trace("Column data is " . join("; ", $key, @colValues) . ".") if T(4);
829  }  }
830    
831  =head3 CloseSession  =head3 CloseSession
# Line 887  Line 889 
889    
890  =item RETURN  =item RETURN
891    
892  Returns a list of two items. The first item in the list is the organism name,  Returns a list of three items. The first item in the list is the organism name,
893  and the second is the name of the NMPDR group, or an empty string if the  and the second is the name of the NMPDR group, or an empty string if the
894  organism is not in an NMPDR group.  organism is not in an NMPDR group. The third item is the organism's domain.
895    
896  =back  =back
897    
# Line 899  Line 901 
901      # Get the parameters.      # Get the parameters.
902      my ($self, $genomeID) = @_;      my ($self, $genomeID) = @_;
903      # Declare the return variables.      # Declare the return variables.
904      my ($orgName, $group);      my ($orgName, $group, $domain);
905      # Check the cache.      # Check the cache.
906      my $cache = $self->{orgs};      my $cache = $self->{orgs};
907      if (exists $cache->{$genomeID}) {      if (exists $cache->{$genomeID}) {
908          ($orgName, $group) = @{$cache->{$genomeID}};          ($orgName, $group, $domain) = @{$cache->{$genomeID}};
909      } else {      } else {
910          # Here we have to use the database.          # Here we have to use the database.
911          my $sprout = $self->DB();          my $sprout = $self->DB();
912          my ($genus, $species, $strain, $group) = $sprout->GetEntityValues('Genome', $genomeID,          my ($genus, $species, $strain, $group, $taxonomy) = $sprout->GetEntityValues('Genome', $genomeID,
913                                                      ['Genome(genus)', 'Genome(species)',                                                      ['Genome(genus)', 'Genome(species)',
914                                                       'Genome(unique-characterization)',                                                       'Genome(unique-characterization)',
915                                                       'Genome(primary-group)']);                                                                   'Genome(primary-group)',
916                                                                     'Genome(taxonomy)']);
917          # Format and cache the name and display group.          # Format and cache the name and display group.
918          ($orgName, $group) = $self->SaveOrganismData($group, $genomeID, $genus, $species,          ($orgName, $group, $domain) = $self->SaveOrganismData($group, $genomeID, $genus, $species,
919                                                              $strain);                                                                $strain, $taxonomy);
920      }      }
921      # Return the result.      # Return the result.
922      return ($orgName, $group);      return ($orgName, $group, $domain);
923  }  }
924    
925  =head3 Organism  =head3 Organism
# Line 944  Line 947 
947      # Get the parameters.      # Get the parameters.
948      my ($self, $genomeID) = @_;      my ($self, $genomeID) = @_;
949      # Get the organism data.      # Get the organism data.
950      my ($retVal, $group) = $self->OrganismData($genomeID);      my ($retVal) = $self->OrganismData($genomeID);
951      # Return the result.      # Return the result.
952      return $retVal;      return $retVal;
953  }  }
# Line 1024  Line 1027 
1027    
1028  =head3 ComputeFASTA  =head3 ComputeFASTA
1029    
1030  C<< my $fasta = $shelp->ComputeFASTA($desiredType, $sequence); >>  C<< my $fasta = $shelp->ComputeFASTA($desiredType, $sequence, $flankingWidth); >>
1031    
1032  Parse a sequence input and convert it into a FASTA string of the desired type.  Parse a sequence input and convert it into a FASTA string of the desired type with
1033    the desired flanking width.
1034    
1035  =over 4  =over 4
1036    
# Line 1042  Line 1046 
1046  if the input does not begin with a greater-than sign (FASTA label line), a default label  if the input does not begin with a greater-than sign (FASTA label line), a default label
1047  line will be provided.  line will be provided.
1048    
1049    =item flankingWidth
1050    
1051    If the DNA FASTA of a feature is desired, the number of base pairs to either side of the
1052    feature that should be included. Currently we can't do this for Proteins because the
1053    protein translation of a feature doesn't always match the DNA and is taken directly
1054    from the database.
1055    
1056  =item RETURN  =item RETURN
1057    
1058  Returns a string in FASTA format representing the content of the desired sequence with  Returns a string in FASTA format representing the content of the desired sequence with
# Line 1054  Line 1065 
1065    
1066  sub ComputeFASTA {  sub ComputeFASTA {
1067      # Get the parameters.      # Get the parameters.
1068      my ($self, $desiredType, $sequence) = @_;      my ($self, $desiredType, $sequence, $flankingWidth) = @_;
1069      # Declare the return variable. If an error occurs, it will remain undefined.      # Declare the return variable. If an error occurs, it will remain undefined.
1070      my $retVal;      my $retVal;
1071      # This variable will be cleared if an error is detected.      # This variable will be cleared if an error is detected.
# Line 1091  Line 1102 
1102                  $fastaData = $sprout->FeatureTranslation($figID);                  $fastaData = $sprout->FeatureTranslation($figID);
1103                  Trace(length $fastaData . " characters returned for translation of $fastaLabel.") if T(3);                  Trace(length $fastaData . " characters returned for translation of $fastaLabel.") if T(3);
1104              } else {              } else {
1105                  # We want DNA, so get the DNA sequence. This is a two-step process.                  # We want DNA, so get the DNA sequence. This is a two-step process. First, we get the
1106                    # locations.
1107                  my @locList = $sprout->FeatureLocation($figID);                  my @locList = $sprout->FeatureLocation($figID);
1108                    if ($flankingWidth > 0) {
1109                        # Here we need to add flanking data. Convert the locations to a list
1110                        # of location objects.
1111                        my @locObjects = map { BasicLocation->new($_) } @locList;
1112                        # Initialize the return variable. We will put the DNA in here segment by segment.
1113                        $fastaData = "";
1114                        # Now we widen each location by the flanking width and stash the results. This
1115                        # requires getting the contig length for each contig so we don't fall off the end.
1116                        for my $locObject (@locObjects) {
1117                            Trace("Current location is " . $locObject->String . ".") if T(4);
1118                            # Remember the current start and length.
1119                            my ($start, $len) = ($locObject->Left, $locObject->Length);
1120                            # Get the contig length.
1121                            my $contigLen = $sprout->ContigLength($locObject->Contig);
1122                            # Widen the location and get its DNA.
1123                            $locObject->Widen($flankingWidth, $contigLen);
1124                            my $fastaSegment = $sprout->DNASeq([$locObject->String()]);
1125                            # Now we need to do some case changing. The main DNA is upper case and
1126                            # the flanking DNA is lower case.
1127                            my $leftFlank = $start - $locObject->Left;
1128                            my $rightFlank = $leftFlank + $len;
1129                            Trace("Wide location is " . $locObject->String . ". Flanks are $leftFlank and $rightFlank. Contig len is $contigLen.") if T(4);
1130                            my $fancyFastaSegment = lc(substr($fastaSegment, 0, $leftFlank)) .
1131                                                    uc(substr($fastaSegment, $leftFlank, $rightFlank - $leftFlank)) .
1132                                                    lc(substr($fastaSegment, $rightFlank));
1133                            $fastaData .= $fancyFastaSegment;
1134                        }
1135                    } else {
1136                        # Here we have just the raw sequence.
1137                  $fastaData = $sprout->DNASeq(\@locList);                  $fastaData = $sprout->DNASeq(\@locList);
1138                  Trace(length $fastaData . " characters returned for DNA of $fastaLabel.") if T(3);                  }
1139                    Trace((length $fastaData) . " characters returned for DNA of $fastaLabel.") if T(3);
1140              }              }
1141          }          }
1142      } else {      } else {
# Line 1345  Line 1387 
1387      # Get the form name.      # Get the form name.
1388      my $formName = $self->FormName();      my $formName = $self->FormName();
1389      # Check to see if we already have a genome list in memory.      # Check to see if we already have a genome list in memory.
     my $genomes = $self->{genomeList};  
1390      my $groupHash;      my $groupHash;
1391        my @groups;
1392        my $nmpdrGroupCount;
1393        my $genomes = $self->{genomeList};
1394      if (defined $genomes) {      if (defined $genomes) {
1395          # We have a list ready to use.          # We have a list ready to use.
1396          $groupHash = $genomes;          $groupHash = $genomes;
1397            @groups = @{$self->{groupList}};
1398            $nmpdrGroupCount = $self->{groupCount};
1399      } else {      } else {
1400          # Get a list of all the genomes in group order. In fact, we only need them ordered          # Get a list of all the genomes in group order. In fact, we only need them ordered
1401          # by name (genus,species,strain), but putting primary-group in front enables us to          # by name (genus,species,strain), but putting primary-group in front enables us to
# Line 1358  Line 1404 
1404                                           "ORDER BY Genome(primary-group), Genome(genus), Genome(species), Genome(unique-characterization)",                                           "ORDER BY Genome(primary-group), Genome(genus), Genome(species), Genome(unique-characterization)",
1405                                           [], ['Genome(primary-group)', 'Genome(id)',                                           [], ['Genome(primary-group)', 'Genome(id)',
1406                                                'Genome(genus)', 'Genome(species)',                                                'Genome(genus)', 'Genome(species)',
1407                                                'Genome(unique-characterization)']);                                                'Genome(unique-characterization)',
1408                                                  'Genome(taxonomy)']);
1409          # Create a hash to organize the genomes by group. Each group will contain a list of          # Create a hash to organize the genomes by group. Each group will contain a list of
1410          # 2-tuples, the first element being the genome ID and the second being the genome          # 2-tuples, the first element being the genome ID and the second being the genome
1411          # name.          # name.
1412          my %gHash = ();          my %gHash = ();
1413          for my $genome (@genomeList) {          for my $genome (@genomeList) {
1414              # Get the genome data.              # Get the genome data.
1415              my ($group, $genomeID, $genus, $species, $strain) = @{$genome};              my ($group, $genomeID, $genus, $species, $strain, $taxonomy) = @{$genome};
1416              # Compute and cache its name and display group.              # Compute and cache its name and display group.
1417              my ($name, $displayGroup) = $self->SaveOrganismData($group, $genomeID, $genus, $species,              my ($name, $displayGroup, $domain) = $self->SaveOrganismData($group, $genomeID, $genus, $species,
1418                                                                  $strain);                                                                           $strain, $taxonomy);
1419              # Push the genome into the group's list. Note that we use the real group              # Push the genome into the group's list. Note that we use the real group
1420              # name here, not the display group name.              # name here, not the display group name.
1421              push @{$gHash{$group}}, [$genomeID, $name];              push @{$gHash{$group}}, [$genomeID, $name, $domain];
1422          }          }
1423            # We are almost ready to unroll the menu out of the group hash. The final step is to separate
1424            # the supporting genomes by domain. First, we sort the NMPDR groups.
1425            @groups = sort grep { $_ ne $FIG_Config::otherGroup } keys %gHash;
1426            # Remember the number of NMPDR groups.
1427            $nmpdrGroupCount = scalar @groups;
1428            # Loop through the supporting genomes, classifying them by domain. We'll also keep a list
1429            # of the domains found.
1430            my @otherGenomes = @{$gHash{$FIG_Config::otherGroup}};
1431            my @domains = ();
1432            for my $genomeData (@otherGenomes) {
1433                my ($genomeID, $name, $domain) = @{$genomeData};
1434                if (exists $gHash{$domain}) {
1435                    push @{$gHash{$domain}}, $genomeData;
1436                } else {
1437                    $gHash{$domain} = [$genomeData];
1438                    push @domains, $domain;
1439                }
1440            }
1441            # Add the domain groups at the end of the main group list. The main group list will now
1442            # contain all the categories we need to display the genomes.
1443            push @groups, sort @domains;
1444            # Delete the supporting group.
1445            delete $gHash{$FIG_Config::otherGroup};
1446          # Save the genome list for future use.          # Save the genome list for future use.
1447          $self->{genomeList} = \%gHash;          $self->{genomeList} = \%gHash;
1448            $self->{groupList} = \@groups;
1449            $self->{groupCount} = $nmpdrGroupCount;
1450          $groupHash = \%gHash;          $groupHash = \%gHash;
1451      }      }
     # Now we are ready to unroll the menu out of the group hash. First, we sort the groups, putting  
     # the supporting-genome group last.  
     my @groups = sort grep { $_ ne $FIG_Config::otherGroup } keys %{$groupHash};  
     push @groups, $FIG_Config::otherGroup;  
1452      # Next, create a hash that specifies the pre-selected entries. Note that we need to deal      # Next, create a hash that specifies the pre-selected entries. Note that we need to deal
1453      # with the possibility of undefined values in the incoming list.      # with the possibility of undefined values in the incoming list.
1454      my %selectedHash = ();      my %selectedHash = ();
# Line 1417  Line 1485 
1485          # Get the genomes in the group.          # Get the genomes in the group.
1486          for my $genome (@{$groupHash->{$group}}) {          for my $genome (@{$groupHash->{$group}}) {
1487              # Count this organism if it's NMPDR.              # Count this organism if it's NMPDR.
1488              if ($group ne $FIG_Config::otherGroup) {              if ($nmpdrGroupCount > 0) {
1489                  $nmpdrCount++;                  $nmpdrCount++;
1490              }              }
1491              # Get the organism ID and name.              # Get the organism ID, name, and domain.
1492              my ($genomeID, $name) = @{$genome};              my ($genomeID, $name, $domain) = @{$genome};
1493              # See if it's selected.              # See if it's selected.
1494              my $select = ($selectedHash{$genomeID} ? " selected" : "");              my $select = ($selectedHash{$genomeID} ? " selected" : "");
1495              # Generate the option tag.              # Generate the option tag.
1496              my $optionTag = "<OPTION value=\"$genomeID\"$select>$name <em>($genomeID)</em></OPTION>";              my $optionTag = "<OPTION class=\"$domain\" value=\"$genomeID\"$select>$name <em>($genomeID)</em></OPTION>";
1497              push @lines, "    $optionTag";              push @lines, "    $optionTag";
1498          }          }
1499          # Close the option group.          # Close the option group.
1500          push @lines, "  </OPTGROUP>";          push @lines, "  </OPTGROUP>";
1501            # Record this group in the nmpdrGroup count. When that gets to 0, we've finished the NMPDR
1502            # groups.
1503            $nmpdrGroupCount--;
1504      }      }
1505      # Close the SELECT tag.      # Close the SELECT tag.
1506      push @lines, "</SELECT>";      push @lines, "</SELECT>";
# Line 2710  Line 2781 
2781    
2782  =head3 SaveOrganismData  =head3 SaveOrganismData
2783    
2784  C<< my ($name, $displayGroup) = $shelp->SaveOrganismData($group, $genomeID, $genus, $species, $strain); >>  C<< my ($name, $displayGroup) = $shelp->SaveOrganismData($group, $genomeID, $genus, $species, $strain, $taxonomy); >>
2785    
2786  Format the name of an organism and the display version of its group name. The incoming  Format the name of an organism and the display version of its group name. The incoming
2787  data should be the relevant fields from the B<Genome> record in the database. The  data should be the relevant fields from the B<Genome> record in the database. The
# Line 2740  Line 2811 
2811    
2812  Strain of the species represented by the genome.  Strain of the species represented by the genome.
2813    
2814    =item taxonomy
2815    
2816    Taxonomy of the species represented by the genome.
2817    
2818  =item RETURN  =item RETURN
2819    
2820  Returns a two-element list. The first element is the formatted genome name. The second  Returns a three-element list. The first element is the formatted genome name. The second
2821  element is the display name of the genome's group.  element is the display name of the genome's group. The third is the genome's domain.
2822    
2823  =back  =back
2824    
# Line 2751  Line 2826 
2826    
2827  sub SaveOrganismData {  sub SaveOrganismData {
2828      # Get the parameters.      # Get the parameters.
2829      my ($self, $group, $genomeID, $genus, $species, $strain) = @_;      my ($self, $group, $genomeID, $genus, $species, $strain, $taxonomy) = @_;
2830      # Declare the return values.      # Declare the return values.
2831      my ($name, $displayGroup);      my ($name, $displayGroup);
2832      # If the organism does not exist, format an unknown name and a blank group.      # If the organism does not exist, format an unknown name and a blank group.
# Line 2768  Line 2843 
2843          # name unless it's the supporting group, which is nulled out.          # name unless it's the supporting group, which is nulled out.
2844          $displayGroup = ($group eq $FIG_Config::otherGroup ? "" : $group);          $displayGroup = ($group eq $FIG_Config::otherGroup ? "" : $group);
2845      }      }
2846        # Compute the domain from the taxonomy.
2847        my ($domain) = split /\s*;\s*/, $taxonomy, 2;
2848      # Cache the group and organism data.      # Cache the group and organism data.
2849      my $cache = $self->{orgs};      my $cache = $self->{orgs};
2850      $cache->{$genomeID} = [$name, $displayGroup];      $cache->{$genomeID} = [$name, $displayGroup, $domain];
2851      # Return the result.      # Return the result.
2852      return ($name, $displayGroup);      return ($name, $displayGroup, $domain);
2853  }  }
2854    
2855  =head3 ValidateKeywords  =head3 ValidateKeywords
# Line 3082  Line 3159 
3159      return $retVal;      return $retVal;
3160  }  }
3161    
3162    =head3 ColumnTitle
3163    
3164    C<< my $title = $shelp->ColumnTitle($colName); >>
3165    
3166    Return the column heading title to be used for the specified column name. The
3167    default implementation is to simply call L</FeatureColumnTitle>.
3168    
3169    =over 4
3170    
3171    =item colName
3172    
3173    Name of the desired column.
3174    
3175    =item RETURN
3176    
3177    Returns the title to be used as the column header for the named column.
3178    
3179    =back
3180    
3181    =cut
3182    
3183    sub ColumnTitle {
3184        my ($self, $colName) = @_;
3185        return $self->FeatureColumnTitle($colName);
3186    }
3187    
3188    
3189  1;  1;

Legend:
Removed from v.1.31  
changed lines
  Added in v.1.32

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3