Parent Directory
|
Revision Log
Revision 1.12 - (view) (download) (as text)
1 : | redwards | 1.1 | # -*- perl -*- |
2 : | |||
3 : | =pod | ||
4 : | |||
5 : | =head1 | ||
6 : | |||
7 : | Some routines and things that Rob uses. Please feel free to use at will and incorporate into | ||
8 : | your own code or move them into FIG.pm or elsewhere. | ||
9 : | |||
10 : | =cut | ||
11 : | |||
12 : | package raelib; | ||
13 : | use strict; | ||
14 : | use FIG; | ||
15 : | my $fig=new FIG; | ||
16 : | |||
17 : | redwards | 1.5 | =head2 new |
18 : | |||
19 : | Just instantiate the object and return $self | ||
20 : | |||
21 : | =cut | ||
22 : | |||
23 : | sub new { | ||
24 : | my $self=shift; | ||
25 : | return $self | ||
26 : | } | ||
27 : | |||
28 : | |||
29 : | |||
30 : | |||
31 : | redwards | 1.4 | =head2 features_on_contig |
32 : | |||
33 : | Returns a reference to an array containing all the features on a contig in a genome. | ||
34 : | |||
35 : | use: | ||
36 : | |||
37 : | my $arrayref=$rae->features_on_contig($genome, $contig); | ||
38 : | |||
39 : | or | ||
40 : | |||
41 : | foreach my $peg (@{$rae->features_on_contig($genome, $contig)}) { | ||
42 : | ... blah blah ... | ||
43 : | } | ||
44 : | |||
45 : | returns undef if contig is not a part of genome or there is nothing to return, otherwise returns a list of pegs | ||
46 : | |||
47 : | v. experimental and guaranteed not to work! | ||
48 : | |||
49 : | =cut | ||
50 : | |||
51 : | sub features_on_contig { | ||
52 : | my ($self, $genome, $contig)=@_; | ||
53 : | # were this in FIG.pm you'd use this line: | ||
54 : | #my $rdbH = $self->db_handle; | ||
55 : | |||
56 : | my $rdbH = $fig->db_handle; | ||
57 : | my $relational_db_response=$rdbH->SQL('SELECT id FROM features WHERE (genome = \'' . $genome . '\' AND location ~* \'' . $contig . '\')'); | ||
58 : | # this is complicated. A reference to an array of references to arrays, and we only want the first element. | ||
59 : | # simplify. | ||
60 : | my @results; | ||
61 : | foreach my $res (@$relational_db_response) {push @results, $res->[0]} | ||
62 : | return \@results; | ||
63 : | } | ||
64 : | |||
65 : | |||
66 : | |||
67 : | |||
68 : | |||
69 : | |||
70 : | |||
71 : | redwards | 1.7 | =head2 pirsfcorrespondence |
72 : | redwards | 1.1 | |
73 : | redwards | 1.7 | Generate the pirsf->fig id correspondence. This is only done once and the correspondence file is written. |
74 : | redwards | 1.1 | This is so that we can easily go back and forth. |
75 : | |||
76 : | redwards | 1.7 | The correspondence has PIR ID \t FIG ID\n, and is probably based on ftp://ftp.pir.georgetown.edu/pir_databases/pirsf/data/pirsfinfo.dat |
77 : | redwards | 1.1 | |
78 : | redwards | 1.9 | This method takes three arguments: |
79 : | from : pirsfinfo.dat file | ||
80 : | to : file to write information to | ||
81 : | verbose : report on progress | ||
82 : | |||
83 : | Returns the number of lines in the pirsinfo file that were read. | ||
84 : | |||
85 : | redwards | 1.1 | =cut |
86 : | |||
87 : | redwards | 1.7 | sub pirsfcorrespondence { |
88 : | redwards | 1.9 | my ($self, $from, $to, $verbose)=@_; |
89 : | redwards | 1.10 | unless (-e $from) { |
90 : | print STDERR "File $from does not exist as called in $0\n"; | ||
91 : | return 0; | ||
92 : | } | ||
93 : | redwards | 1.1 | open (IN, $from) || die "Can't open $from"; |
94 : | redwards | 1.8 | open (OUT, ">$to") || die "Can't write to $to"; |
95 : | redwards | 1.9 | my $linecount; |
96 : | redwards | 1.1 | while (<IN>) { |
97 : | redwards | 1.9 | $linecount++; |
98 : | unless ($linecount % 10000) {print STDERR "Correspondence of $linecount lines calculated\n"} | ||
99 : | redwards | 1.1 | if (/^>/) {print OUT; next} |
100 : | chomp; | ||
101 : | my $done; | ||
102 : | redwards | 1.2 | foreach my $peg ($fig->by_alias("uni|$_")) { |
103 : | redwards | 1.1 | print OUT $_, "\t", $peg, "\n"; |
104 : | $done=1; | ||
105 : | } | ||
106 : | redwards | 1.12 | unless ($done) { |
107 : | foreach my $peg ($fig->by_alias("tr|$_")) { | ||
108 : | print OUT $_, "\t", $peg, "\n"; | ||
109 : | $done=1; | ||
110 : | } | ||
111 : | } | ||
112 : | unless ($done) { | ||
113 : | foreach my $peg ($fig->by_alias("sp|$_")) { | ||
114 : | print OUT $_, "\t", $peg, "\n"; | ||
115 : | $done=1; | ||
116 : | } | ||
117 : | } | ||
118 : | redwards | 1.1 | unless ($done) {print OUT $_, "\t\n"} |
119 : | } | ||
120 : | close IN; | ||
121 : | close OUT; | ||
122 : | redwards | 1.9 | return $linecount; |
123 : | redwards | 1.1 | } |
124 : | |||
125 : | |||
126 : | =head2 ss_by_id | ||
127 : | |||
128 : | Generate a list of subsystems that a peg occurs in. This is a ; separated list. | ||
129 : | This is a wrapper that removes roles and ignores essential things | ||
130 : | |||
131 : | =cut | ||
132 : | |||
133 : | sub ss_by_id { | ||
134 : | my ($self, $peg)=@_; | ||
135 : | my $ssout; | ||
136 : | foreach my $ss (sort $fig->subsystems_for_peg($peg)) | ||
137 : | { | ||
138 : | next if ($$ss[0] =~ /essential/i); # Ignore the Essential B-subtilis subsystems | ||
139 : | $ssout.=$$ss[0]."; "; | ||
140 : | } | ||
141 : | $ssout =~ s/; $//; | ||
142 : | return $ssout; | ||
143 : | } | ||
144 : | |||
145 : | redwards | 1.3 | =head2 ss_by_homol |
146 : | |||
147 : | Generate a list of subsystems that homologs of a peg occur in. This is a ; separated list. | ||
148 : | This is also a wrapper around sims and ss, but makes everything unified | ||
149 : | |||
150 : | =cut | ||
151 : | |||
152 : | sub ss_by_homol { | ||
153 : | my ($self, $peg)=@_; | ||
154 : | return unless ($peg); | ||
155 : | my ($maxN, $maxP)=(50, 1e-20); | ||
156 : | |||
157 : | # find the sims | ||
158 : | my @sims=$fig->sims($peg, $maxN, $maxP, 'fig'); | ||
159 : | |||
160 : | # we are only going to keep the best hit for each peg | ||
161 : | # in a subsystem | ||
162 : | my $best_ss_score; my $best_ss_id; | ||
163 : | foreach my $sim (@sims) | ||
164 : | { | ||
165 : | my $simpeg=$$sim[1]; | ||
166 : | my $simscore=$$sim[10]; | ||
167 : | my @subsys=$fig->subsystems_for_peg($simpeg); | ||
168 : | foreach my $ss (@subsys) | ||
169 : | { | ||
170 : | if (! defined $best_ss_score->{$$ss[0]}) {$best_ss_score->{$$ss[0]}=$simscore; $best_ss_id->{$$ss[0]}=$simpeg} | ||
171 : | elsif ($best_ss_score->{$$ss[0]} > $simscore) | ||
172 : | { | ||
173 : | $best_ss_score->{$$ss[0]}=$simscore; | ||
174 : | $best_ss_id->{$$ss[0]}=$simpeg; | ||
175 : | } | ||
176 : | } | ||
177 : | } | ||
178 : | |||
179 : | my $ssoutput=join "", (map {"$_ (".$best_ss_id->{$_}."), "} keys %$best_ss_id); | ||
180 : | |||
181 : | $ssoutput =~ s/, $//; | ||
182 : | return $ssoutput; | ||
183 : | } | ||
184 : | |||
185 : | =head2 tagvalue | ||
186 : | |||
187 : | This will just check for tag value pairs and return either an array of values or a single ; separated list (if called as a scalar) | ||
188 : | |||
189 : | e.g. $values=raelib->tagvalue($peg, "PIRSF"); print join "\n", @$values; | ||
190 : | |||
191 : | Returns an empty array if no tag/value appropriate. | ||
192 : | |||
193 : | Just because I use this a lot I don't want to waste rewriting it. | ||
194 : | |||
195 : | =cut | ||
196 : | |||
197 : | sub tagvalue { | ||
198 : | my ($self, $peg, $tag)=@_; | ||
199 : | my @return; | ||
200 : | my @attr=$fig->feature_attributes($peg); | ||
201 : | foreach my $attr (@attr) { | ||
202 : | redwards | 1.11 | my ($gotpeg, $gottag, $val, $link)=@$attr; |
203 : | redwards | 1.3 | push @return, $val if ($gottag eq $tag); |
204 : | } | ||
205 : | return wantarray ? @return : join "; ", @return; | ||
206 : | } | ||
207 : | redwards | 1.1 | |
208 : | redwards | 1.5 | =head2 locations_on_contig |
209 : | |||
210 : | Return the locations of a sequence on a contig. | ||
211 : | |||
212 : | This will look for exact matches to a sequence on a contig, and return a reference to an array that has all the locations. | ||
213 : | |||
214 : | my $locations=$raelib->locations_on_contig($genome, $contig, 'GATC', undef); | ||
215 : | foreach my $bp (@$locations) { ... do something ... } | ||
216 : | |||
217 : | first argument : genome number | ||
218 : | second argument : contig name | ||
219 : | third argument : sequence to look for | ||
220 : | fourth argument : beginning position to start looking from (can be undef) | ||
221 : | fifth argument : end position to stop looking from (can be undef) | ||
222 : | sixth argument : check reverse complement (0 or undef will check forward, 1 or true will check rc) | ||
223 : | |||
224 : | Note, the position is calculated before the sequence is rc'd | ||
225 : | |||
226 : | =cut | ||
227 : | |||
228 : | sub locations_on_contig { | ||
229 : | my ($self, $genome, $contig, $sequence, $from, $to, $check_reverse)=@_; | ||
230 : | my $return=[]; | ||
231 : | |||
232 : | # get the dna sequence of the contig, and make sure it is uppercase | ||
233 : | my $contig_ln=$fig->contig_ln($genome, $contig); | ||
234 : | return $return unless ($contig_ln); | ||
235 : | unless ($from) {$from=1} | ||
236 : | unless ($to) {$to=$contig_ln} | ||
237 : | if ($from > $to) {($from, $to)=($to, $from)} | ||
238 : | my $dna_seq=$fig->dna_seq($genome, $contig."_".$from."_".$to); | ||
239 : | $dna_seq=uc($dna_seq); | ||
240 : | |||
241 : | # if we want to check the rc, we actually rc the query | ||
242 : | $sequence=$fig->reverse_comp($sequence) if ($check_reverse); | ||
243 : | $sequence=uc($sequence); | ||
244 : | |||
245 : | # now find all the matches | ||
246 : | my $posn=index($dna_seq, $sequence, 0); | ||
247 : | while ($posn > -1) { | ||
248 : | push @$return, $posn; | ||
249 : | $posn=index($dna_seq, $sequence, $posn+1); | ||
250 : | } | ||
251 : | return $return; | ||
252 : | } | ||
253 : | |||
254 : | |||
255 : | =head2 scrolling_org_list | ||
256 : | |||
257 : | This is the list from index.cgi, that I call often. It has one minor modification: the value returned is solely the organisms id and does not contain genus_species information. I abstracted this here: 1, so I could call it often, and 2, so I could edit it once. | ||
258 : | |||
259 : | use like this push @$html, $raelib->scrolling_org_list($cgi, $multiple); | ||
260 : | |||
261 : | multiple selections will only be set if $multiple is true | ||
262 : | |||
263 : | =cut | ||
264 : | |||
265 : | sub scrolling_org_list { | ||
266 : | my ($self, $cgi, $multiple)=@_; | ||
267 : | unless ($multiple) {$multiple=0} | ||
268 : | |||
269 : | my @display = ( 'All', 'Archaea', 'Bacteria', 'Eucarya', 'Viruses', 'Environmental samples' ); | ||
270 : | |||
271 : | # | ||
272 : | # Canonical names must match the keywords used in the DBMS. They are | ||
273 : | # defined in compute_genome_counts.pl | ||
274 : | # | ||
275 : | my %canonical = ( | ||
276 : | 'All' => undef, | ||
277 : | 'Archaea' => 'Archaea', | ||
278 : | 'Bacteria' => 'Bacteria', | ||
279 : | 'Eucarya' => 'Eukaryota', | ||
280 : | 'Viruses' => 'Virus', | ||
281 : | 'Environmental samples' => 'Environmental Sample' | ||
282 : | ); | ||
283 : | |||
284 : | my $req_dom = $cgi->param( 'domain' ) || 'All'; | ||
285 : | my @domains = $cgi->radio_group( -name => 'domain', | ||
286 : | -default => $req_dom, | ||
287 : | -override => 1, | ||
288 : | -values => [ @display ] | ||
289 : | ); | ||
290 : | |||
291 : | my $n_domain = 0; | ||
292 : | my %dom_num = map { ( $_, $n_domain++ ) } @display; | ||
293 : | my $req_dom_num = $dom_num{ $req_dom } || 0; | ||
294 : | |||
295 : | # | ||
296 : | # Viruses and Environmental samples must have completeness = All (that is | ||
297 : | # how they are in the database). Otherwise, default is Only "complete". | ||
298 : | # | ||
299 : | my $req_comp = ( $req_dom_num > $dom_num{ 'Eucarya' } ) ? 'All' | ||
300 : | : $cgi->param( 'complete' ) || 'Only "complete"'; | ||
301 : | my @complete = $cgi->radio_group( -name => 'complete', | ||
302 : | -default => $req_comp, | ||
303 : | -override => 1, | ||
304 : | -values => [ 'All', 'Only "complete"' ] | ||
305 : | ); | ||
306 : | # | ||
307 : | # Use $fig->genomes( complete, restricted, domain ) to get org list: | ||
308 : | # | ||
309 : | my $complete = ( $req_comp =~ /^all$/i ) ? undef : "complete"; | ||
310 : | |||
311 : | my $orgs; my $label; | ||
312 : | @$orgs = $fig->genomes( $complete, undef, $canonical{ $req_dom } ); | ||
313 : | |||
314 : | foreach (@$orgs) { | ||
315 : | my $gs = $fig->genus_species($_); | ||
316 : | my $gc=scalar $fig->all_contigs($_); | ||
317 : | $label->{$_} = "$gs ($_) [$gc contigs]"; | ||
318 : | } | ||
319 : | |||
320 : | @$orgs = sort {$label->{$a} cmp $label->{$b}} @$orgs; | ||
321 : | |||
322 : | my $n_genomes = @$orgs; | ||
323 : | |||
324 : | return ( "<TABLE>\n", | ||
325 : | " <TR>\n", | ||
326 : | " <TD>", | ||
327 : | redwards | 1.6 | $cgi->scrolling_list( -name => 'korgs', |
328 : | -values => $orgs, | ||
329 : | -labels => $label, | ||
330 : | -size => 10, | ||
331 : | -multiple => $multiple, | ||
332 : | redwards | 1.5 | ), $cgi->br, |
333 : | "$n_genomes genomes shown ", | ||
334 : | $cgi->submit( 'Update List' ), $cgi->reset, $cgi->br, | ||
335 : | " </TD>", | ||
336 : | " <TD>", | ||
337 : | join( "<br>", "<b>Domain(s) to show:</b>", @domains), "<br>\n", | ||
338 : | join( "<br>", "<b>Completeness?</b>", @complete), "\n", | ||
339 : | "</TD>", | ||
340 : | " </TR>\n", | ||
341 : | "</TABLE>\n", | ||
342 : | $cgi->hr | ||
343 : | ); | ||
344 : | } | ||
345 : | |||
346 : | |||
347 : | redwards | 1.1 | |
348 : | |||
349 : | |||
350 : | 1; |
MCS Webmaster | ViewVC Help |
Powered by ViewVC 1.0.3 |