[Bio] / FigKernelScripts / sphinx_index_genome.pl Repository:
ViewVC logotype

View of /FigKernelScripts/sphinx_index_genome.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.7 - (download) (as text) (annotate)
Mon Feb 14 22:47:40 2011 UTC (9 years, 4 months ago) by olson
Branch: MAIN
CVS Tags: mgrast_dev_04082011, mgrast_dev_02212011, mgrast_release_3_0, mgrast_dev_03252011, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_dev_04132011, mgrast_dev_04012011, myrast_33, mgrast_dev_04052011, mgrast_dev_02222011
Changes since 1.6: +32 -2 lines
More fixups.

use Data::Dumper;

use strict;
use Encode;
use FIG;
my $fig = new FIG;

print <<END;
<?xml version="1.0" encoding="utf-8"?>
<sphinx:docset>
<sphinx:schema>
<sphinx:field name="fid"/>
<sphinx:field name="annotation"/>
<sphinx:field name="genome"/>
<sphinx:field name="alias"/>
<sphinx:field name="subsystem"/>
</sphinx:schema>
END

my %tmap = (peg => 1, rna => 2);

my @genomes;
if (my $glist = $ENV{SPHINX_INDEX_ONLY})
{
    @genomes = split(/,/, $glist);
}
else
{
    @genomes = $fig->genomes(1);
}

#
# Ingest the subsystem index.
#

for my $genome (@genomes)
{
    print STDERR "$genome\n";

    my %ss_info;
    
    my $sth = $fig->db_handle->{_dbh}->prepare(qq(SELECT i.protein, i.subsystem
						  FROM subsystem_index i LEFT JOIN aux_roles a ON i.role = a.role
						  WHERE i.protein LIKE 'fig|$genome.peg.%' AND a.subsystem IS NULL),
					   { mysql_use_result => 1});
    $sth->execute();
    while (my $ent = $sth->fetchrow_arrayref())
    {
	my($prot, $ss) = @$ent;
	$ss_info{$prot}->{$ss} = 1;
    }

    my $gs = $fig->genus_species($genome);
    $gs =~ s/&/&amp;/g;
    $gs =~ s/</&lt;/g;
    $gs =~ s/>/&gt;/g;

    my $all_data = $fig->all_features_detailed_fast($genome);

    my $ext_aliases_l = $fig->db_handle->SQL(qq(SELECT id, alias
						FROM ext_alias
						WHERE id like 'fig|${genome}.%'));
    my %ext_aliases;
    map { $ext_aliases{$_->[0]}->{$_->[1]}++ } @$ext_aliases_l;

    for my $feature (@$all_data)
    {
	my($fid, $loc, $aliases, $type, $b, $e, $func, $who) = @$feature;

	# my @ss = $fig->peg_to_subsystems($fid, 1, 1);
	my @ss = keys %{$ss_info{$fid}};
	@ss = map { defined($_) ? encode_utf8($_) : () } @ss;
	my $ss = join("\n", map { s/_/ /g; $_ } @ss);
	$ss =~ s/&/&amp;/g;
	$ss =~ s/</&lt;/g;
	$ss =~ s/>/&gt;/g;
	
	$func = defined($func) ? encode_utf8($func) : "";
	$func =~ s/&/&amp;/g;
	$func =~ s/</&lt;/g;
	$func =~ s/>/&gt;/g;

	my %aliases = map { $_ => 1 } split(",", $aliases);
	map { $aliases{$_} = 1 } keys %{$ext_aliases{$fid}};
	my @aliases = keys %aliases;
	my $alias_txt = "";
	if (@aliases)
	{
	    $alias_txt = join("\n",
			      map { s/&/&amp;/g;
				    s/</&lt;/g;
				    s/>/&gt;/g;
				    $_ } @aliases);
	}
	if ($fid =~ /^fig\|(\d+)\.(\d+)\.([^.]+)\.(\d+)$/)
	{
	    my ($g, $ext, $type, $num) = ($1, $2, $3, $4);
	    my $tnum = $tmap{$type};
	    my $enc = $g << 26 | $ext << 18 | $tnum << 16 | $num;
	    print <<END;
<sphinx:document id="$enc">
<fid>$fid</fid>
<annotation>$func</annotation>
<genome>$genome $gs</genome>
<alias>$alias_txt</alias>
<subsystem>$ss</subsystem>
</sphinx:document>
END
	}
    }
}
print "</sphinx:docset>\n";

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3