[Bio] / FigKernelScripts / sphinx_index_genome.pl Repository:
ViewVC logotype

View of /FigKernelScripts/sphinx_index_genome.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.6 - (download) (as text) (annotate)
Mon Jun 7 21:25:47 2010 UTC (10 years ago) by olson
Branch: MAIN
CVS Tags: rast_rel_2010_0928, rast_rel_2010_1206, rast_rel_2011_0119, rast_rel_2010_0827
Changes since 1.5: +19 -9 lines
speed up alias computation

use strict;
use Encode;
use FIG;
my $fig = new FIG;

print <<END;
<?xml version="1.0" encoding="utf-8"?>
<sphinx:docset>
<sphinx:schema>
<sphinx:field name="fid"/>
<sphinx:field name="annotation"/>
<sphinx:field name="genome"/>
<sphinx:field name="alias"/>
<sphinx:field name="subsystem"/>
</sphinx:schema>
END

my %tmap = (peg => 1, rna => 2);

for my $genome ($fig->genomes(1))
{
    print STDERR "$genome\n";
    my $gs = $fig->genus_species($genome);
    $gs =~ s/&/&amp;/g;
    $gs =~ s/</&lt;/g;
    $gs =~ s/>/&gt;/g;

    my $all_data = $fig->all_features_detailed_fast($genome);

    my $ext_aliases_l = $fig->db_handle->SQL(qq(SELECT id, alias
						FROM ext_alias
						WHERE id like 'fig|${genome}.%'));
    my %ext_aliases;
    map { $ext_aliases{$_->[0]}->{$_->[1]}++ } @$ext_aliases_l;

    for my $feature (@$all_data)
    {
	my($fid, $loc, $aliases, $type, $b, $e, $func, $who) = @$feature;

	my @ss = $fig->peg_to_subsystems($fid, 1, 1);
	@ss = map { defined($_) ? encode_utf8($_) : () } @ss;
	my $ss = join("\n", map { s/_/ /g; $_ } @ss);
	$ss =~ s/&/&amp;/g;
	$ss =~ s/</&lt;/g;
	$ss =~ s/>/&gt;/g;
	
	$func = defined($func) ? encode_utf8($func) : "";
	$func =~ s/&/&amp;/g;
	$func =~ s/</&lt;/g;
	$func =~ s/>/&gt;/g;

	my %aliases = map { $_ => 1 } split(",", $aliases);
	map { $aliases{$_} = 1 } keys %{$ext_aliases{$fid}};
	my @aliases = keys %aliases;
	my $alias_txt = "";
	if (@aliases)
	{
	    $alias_txt = join("\n",
			      map { s/&/&amp;/g;
				    s/</&lt;/g;
				    s/>/&gt;/g;
				    $_ } @aliases);
	}
	if ($fid =~ /^fig\|(\d+)\.(\d+)\.([^.]+)\.(\d+)$/)
	{
	    my ($g, $ext, $type, $num) = ($1, $2, $3, $4);
	    my $tnum = $tmap{$type};
	    my $enc = $g << 26 | $ext << 18 | $tnum << 16 | $num;
	    print <<END;
<sphinx:document id="$enc">
<fid>$fid</fid>
<annotation>$func</annotation>
<genome>$genome $gs</genome>
<alias>$alias_txt</alias>
<subsystem>$ss</subsystem>
</sphinx:document>
END
	}
    }
}
print "</sphinx:docset>\n";

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3