[Bio] / FigKernelPackages / SeedSearch.pm Repository:
ViewVC logotype

View of /FigKernelPackages/SeedSearch.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (download) (as text) (annotate)
Tue Jun 21 21:20:21 2011 UTC (8 years, 9 months ago) by olson
Branch: MAIN
CVS Tags: mgrast_dev_08112011, mgrast_dev_08022011, myrast_rel40, mgrast_release_3_1_2, mgrast_release_3_1_1, rast_rel_2011_0928, mgrast_version_3_2, mgrast_dev_12152011, mgrast_dev_10262011
Changes since 1.2: +6 -24 lines
OK, index rebuilt, we can switch to the new encoding now.

package SeedSearch;

use FIG_Config;
use Carp;
use strict;

our @doctypes = qw(peg
		   rna
		   atn
		   att
		   bs
		   opr
		   pbs
		   pi
		   pp
		   prm
		   pseudo
		   rsw
		   sRNA
		   trm
		   );

our %tmap;
for my $i (0..$#doctypes)
{
    $tmap{$doctypes[$i]} = $i;
    $tmap{$i} = $doctypes[$i];
}

=head1 SeedSearch

This package uses a Sphinx indexing engine to do fast lookups into
a SEED sphinx index.

=cut

sub new
{
    my($class, $params) = @_;

    if (!defined($params))
    {
	if (@FIG_Config::search_params)
	{
	    $params = \@FIG_Config::search_params;
	}
	else
	{
	    confess "SeedSearch requires a Sphinx configuration in @FIG_Config::search_params";
	}
    }

    my $sphinx = Sphinx::Search->new();

    $sphinx->SetServer($params);
    
    my $self = {
	params => $params,
	sphinx => $sphinx,
    };
    return bless $self, $class;
}

=head2 search
    
    my @results = $sphinx->search($search_terms,
			      -pagenum => i,
			      -pagesize => N)
				    

=cut    
sub start_search
{

}

sub fid_to_docid
{
    my($fid) = @_;
    
    if ($fid =~ /^fig\|(\d+)\.(\d+)\.([^.]+)\.(\d+)$/)
    {
	my ($g, $ext, $type, $num) = ($1, $2, $3, $4);
	my $tnum = $tmap{$type};

	#
	# right to left: (cumulative)
	# 17 bits for feature number    (0)
	# 4 bits for type		(17)
	# 8 bits for ext		(21)
	# Rest for genome 		(29)
	#
	# New encoding; we ran out of bits in ext
	# 17 bits for feature number    (0)
	# 4 bits for type		(17)
	# 15 bits for ext		(21)
	# Rest for genome 		(36)

	my $enc;

	$enc = $g << 36| $ext << 21 | $tnum << 17 | $num;
	
	return $enc;
    }

    return undef;
}

sub docid_to_fid
{
    my($doc) = @_;

    my($g, $e, $t, $n);

    $g = $doc >> 36;
    $e = ($doc >> 21) & 0x7fff;
    $t = ($doc >> 17) & 0xf;
    $n = $doc & 0x1ffff;
    
    my $type = $tmap{$t};
    my $genome = "$g.$e";
    my $fid = "fig|$genome.$type.$n";

    return $fid;
}


1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3