[Bio] / FigKernelScripts / sphinx_index_genome.pl Repository:
ViewVC logotype

Annotation of /FigKernelScripts/sphinx_index_genome.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.7 - (view) (download) (as text)

1 : olson 1.7 use Data::Dumper;
2 : olson 1.1
3 :     use strict;
4 :     use Encode;
5 :     use FIG;
6 :     my $fig = new FIG;
7 :    
8 :     print <<END;
9 :     <?xml version="1.0" encoding="utf-8"?>
10 :     <sphinx:docset>
11 :     <sphinx:schema>
12 :     <sphinx:field name="fid"/>
13 :     <sphinx:field name="annotation"/>
14 : olson 1.5 <sphinx:field name="genome"/>
15 :     <sphinx:field name="alias"/>
16 :     <sphinx:field name="subsystem"/>
17 : olson 1.1 </sphinx:schema>
18 :     END
19 :    
20 :     my %tmap = (peg => 1, rna => 2);
21 :    
22 : olson 1.7 my @genomes;
23 :     if (my $glist = $ENV{SPHINX_INDEX_ONLY})
24 :     {
25 :     @genomes = split(/,/, $glist);
26 :     }
27 :     else
28 :     {
29 :     @genomes = $fig->genomes(1);
30 :     }
31 :    
32 :     #
33 :     # Ingest the subsystem index.
34 :     #
35 :    
36 :     for my $genome (@genomes)
37 : olson 1.1 {
38 :     print STDERR "$genome\n";
39 : olson 1.7
40 :     my %ss_info;
41 :    
42 :     my $sth = $fig->db_handle->{_dbh}->prepare(qq(SELECT i.protein, i.subsystem
43 :     FROM subsystem_index i LEFT JOIN aux_roles a ON i.role = a.role
44 :     WHERE i.protein LIKE 'fig|$genome.peg.%' AND a.subsystem IS NULL),
45 :     { mysql_use_result => 1});
46 :     $sth->execute();
47 :     while (my $ent = $sth->fetchrow_arrayref())
48 :     {
49 :     my($prot, $ss) = @$ent;
50 :     $ss_info{$prot}->{$ss} = 1;
51 :     }
52 :    
53 : olson 1.1 my $gs = $fig->genus_species($genome);
54 : olson 1.4 $gs =~ s/&/&amp;/g;
55 :     $gs =~ s/</&lt;/g;
56 :     $gs =~ s/>/&gt;/g;
57 : olson 1.5
58 :     my $all_data = $fig->all_features_detailed_fast($genome);
59 : olson 1.6
60 :     my $ext_aliases_l = $fig->db_handle->SQL(qq(SELECT id, alias
61 :     FROM ext_alias
62 :     WHERE id like 'fig|${genome}.%'));
63 :     my %ext_aliases;
64 :     map { $ext_aliases{$_->[0]}->{$_->[1]}++ } @$ext_aliases_l;
65 :    
66 : olson 1.5 for my $feature (@$all_data)
67 : olson 1.1 {
68 :     my($fid, $loc, $aliases, $type, $b, $e, $func, $who) = @$feature;
69 : olson 1.2
70 : olson 1.7 # my @ss = $fig->peg_to_subsystems($fid, 1, 1);
71 :     my @ss = keys %{$ss_info{$fid}};
72 : olson 1.3 @ss = map { defined($_) ? encode_utf8($_) : () } @ss;
73 : olson 1.2 my $ss = join("\n", map { s/_/ /g; $_ } @ss);
74 : olson 1.4 $ss =~ s/&/&amp;/g;
75 :     $ss =~ s/</&lt;/g;
76 :     $ss =~ s/>/&gt;/g;
77 : olson 1.1
78 : olson 1.3 $func = defined($func) ? encode_utf8($func) : "";
79 : olson 1.1 $func =~ s/&/&amp;/g;
80 :     $func =~ s/</&lt;/g;
81 :     $func =~ s/>/&gt;/g;
82 : olson 1.6
83 :     my %aliases = map { $_ => 1 } split(",", $aliases);
84 :     map { $aliases{$_} = 1 } keys %{$ext_aliases{$fid}};
85 :     my @aliases = keys %aliases;
86 :     my $alias_txt = "";
87 :     if (@aliases)
88 : olson 1.5 {
89 : olson 1.6 $alias_txt = join("\n",
90 :     map { s/&/&amp;/g;
91 :     s/</&lt;/g;
92 :     s/>/&gt;/g;
93 :     $_ } @aliases);
94 : olson 1.5 }
95 : olson 1.1 if ($fid =~ /^fig\|(\d+)\.(\d+)\.([^.]+)\.(\d+)$/)
96 :     {
97 :     my ($g, $ext, $type, $num) = ($1, $2, $3, $4);
98 :     my $tnum = $tmap{$type};
99 :     my $enc = $g << 26 | $ext << 18 | $tnum << 16 | $num;
100 :     print <<END;
101 :     <sphinx:document id="$enc">
102 :     <fid>$fid</fid>
103 :     <annotation>$func</annotation>
104 : olson 1.5 <genome>$genome $gs</genome>
105 : olson 1.6 <alias>$alias_txt</alias>
106 : olson 1.5 <subsystem>$ss</subsystem>
107 : olson 1.1 </sphinx:document>
108 :     END
109 :     }
110 :     }
111 :     }
112 :     print "</sphinx:docset>\n";

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3