[Bio] / FigKernelPackages / raelib.pm Repository:
ViewVC logotype

Annotation of /FigKernelPackages/raelib.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.22 - (view) (download) (as text)

1 : redwards 1.1 # -*- perl -*-
2 :    
3 :     =pod
4 :    
5 : parrello 1.13 =head1 RAE Library
6 : redwards 1.1
7 :     Some routines and things that Rob uses. Please feel free to use at will and incorporate into
8 :     your own code or move them into FIG.pm or elsewhere.
9 :    
10 :     =cut
11 :    
12 :     package raelib;
13 :     use strict;
14 : redwards 1.17 use Bio::SeqIO;
15 :     use Bio::Seq;
16 :     use Bio::SeqFeature::Generic;
17 : redwards 1.1 use FIG;
18 :     my $fig=new FIG;
19 :    
20 : redwards 1.5 =head2 new
21 :    
22 :     Just instantiate the object and return $self
23 :    
24 :     =cut
25 :    
26 :     sub new {
27 :     my $self=shift;
28 :     return $self
29 :     }
30 :    
31 :    
32 :    
33 :    
34 : redwards 1.4 =head2 features_on_contig
35 :    
36 :     Returns a reference to an array containing all the features on a contig in a genome.
37 :    
38 :     use:
39 :    
40 :     my $arrayref=$rae->features_on_contig($genome, $contig);
41 :    
42 :     or
43 :    
44 :     foreach my $peg (@{$rae->features_on_contig($genome, $contig)}) {
45 :     ... blah blah ...
46 :     }
47 :    
48 :     returns undef if contig is not a part of genome or there is nothing to return, otherwise returns a list of pegs
49 :    
50 :     v. experimental and guaranteed not to work!
51 :    
52 :     =cut
53 :    
54 :     sub features_on_contig {
55 :     my ($self, $genome, $contig)=@_;
56 :     # were this in FIG.pm you'd use this line:
57 :     #my $rdbH = $self->db_handle;
58 :    
59 :     my $rdbH = $fig->db_handle;
60 :     my $relational_db_response=$rdbH->SQL('SELECT id FROM features WHERE (genome = \'' . $genome . '\' AND location ~* \'' . $contig . '\')');
61 :     # this is complicated. A reference to an array of references to arrays, and we only want the first element.
62 :     # simplify.
63 :     my @results;
64 :     foreach my $res (@$relational_db_response) {push @results, $res->[0]}
65 :     return \@results;
66 :     }
67 :    
68 :    
69 :    
70 :    
71 :    
72 :    
73 :    
74 : redwards 1.7 =head2 pirsfcorrespondence
75 : redwards 1.1
76 : redwards 1.18 Generate the pirsf->fig id correspondence. This is only done once and the correspondence file is written. This is so that we can easily go back and forth.
77 : redwards 1.1
78 : redwards 1.18 The correspondence has PIR ID \t FIG ID\n, and is probably based on ftp://ftp.pir.georgetown.edu/pir_databases/pirsf/data/pirsfinfo.dat
79 : redwards 1.1
80 : redwards 1.18 This method takes three arguments:
81 : redwards 1.9 from : pirsfinfo.dat file
82 :     to : file to write information to
83 :     verbose : report on progress
84 :    
85 : redwards 1.18 Note that if the from filename ends in .gz it assumed to be a gzipped file and will be opened accordingly.
86 :    
87 :     Returns the number of lines in the pirsinfo file that were read.
88 : redwards 1.9
89 : redwards 1.1 =cut
90 :    
91 : redwards 1.7 sub pirsfcorrespondence {
92 : redwards 1.9 my ($self, $from, $to, $verbose)=@_;
93 : redwards 1.10 unless (-e $from) {
94 :     print STDERR "File $from does not exist as called in $0\n";
95 :     return 0;
96 :     }
97 : redwards 1.18 if ($from =~ /\.gz$/) {
98 :     open(IN, "|gunzip -c $from") || die "Can't open $from using a gunzip pipe";
99 :     }
100 :     else {
101 :     open (IN, $from) || die "Can't open $from";
102 :     }
103 : redwards 1.8 open (OUT, ">$to") || die "Can't write to $to";
104 : redwards 1.9 my $linecount;
105 : redwards 1.1 while (<IN>) {
106 : redwards 1.9 $linecount++;
107 : redwards 1.14 if ($verbose && !($linecount % 10000)) {print STDERR "Parsed $linecount lines\n"}
108 : redwards 1.1 if (/^>/) {print OUT; next}
109 :     chomp;
110 : redwards 1.14 foreach my $peg ($self->swiss_pir_ids($_)) {
111 : redwards 1.1 print OUT $_, "\t", $peg, "\n";
112 :     }
113 : redwards 1.14 }
114 :     close IN;
115 :     close OUT;
116 :     return $linecount;
117 :     }
118 :    
119 :     =head2 uniprotcorrespondence
120 :    
121 :     Generate a correspondence table between uniprot knowledge base IDs and FIG ID's.
122 :    
123 :     The uniprot KB file is in the form: UniProtKB_Primary_Accession | UniProtKB_ID | Section | Protein Name
124 :    
125 :     This method takes three arguments:
126 :     from : uniprotKB file
127 :     to : file to write information to
128 :     verbose : report on progress
129 :    
130 : redwards 1.18 Note that if the from filename ends in .gz it assumed to be a gzipped file and will be opened accordingly.
131 :    
132 :     Returns the number of lines in the uniprotkb file that were read.
133 : redwards 1.14
134 :     =cut
135 :    
136 :     sub uniprotcorrespondence {
137 :     my ($self, $from, $to, $verbose)=@_;
138 :     unless (-e $from) {
139 :     print STDERR "File $from does not exist as called in $0\n";
140 :     return 0;
141 :     }
142 : redwards 1.18 if ($from =~ /\.gz$/) {
143 :     open(IN, "|gunzip -c $from") || die "Can't open $from using a gunzip pipe";
144 :     }
145 :     else {
146 :     open (IN, $from) || die "Can't open $from";
147 :     }
148 : redwards 1.14 open (OUT, ">$to") || die "Can't write to $to";
149 :     my $linecount;
150 :     while (<IN>) {
151 :     chomp;
152 :     $linecount++;
153 :     if ($verbose && !($linecount % 10000)) {print STDERR "Parsed $linecount lines\n"}
154 :     my @line=split /\s+\|\s+/;
155 : redwards 1.16 my $added;
156 : redwards 1.14 foreach my $peg ($self->swiss_pir_ids($line[0])) {
157 :     print OUT "$_ | $peg\n";
158 : redwards 1.16 $added=1;
159 : redwards 1.12 }
160 : redwards 1.16 unless ($added) {print OUT "$_\n"}
161 : redwards 1.1 }
162 :     close IN;
163 :     close OUT;
164 : redwards 1.9 return $linecount;
165 : redwards 1.1 }
166 :    
167 : redwards 1.18 =head2 prositecorrespondence
168 :    
169 :     Generate a correspondence table between prosite and seed using sp id's and seed ids.
170 :    
171 :     The SwissProt prosite file is from ftp://ca.expasy.org/databases/prosite/release_with_updates/prosite.dat and is in horrible swiss prot format, so we'll parse out those things that we need and put them in the file
172 :    
173 :     The output file will have the following columns:
174 :    
175 :     prosite family accession number, prosite family name, family type, swiss-prot protein id, fig protein id.
176 :    
177 :     The family type is one of rule, pattern, or matrix. Right now (Prosite Release 19.2 of 24-May-2005) there are 4 rules, 1322 patterns, and 521 matrices.
178 :    
179 :     This method takes three arguments:
180 :     from : prosite file
181 :     to : file to write information to
182 :     verbose : report on progress
183 :    
184 :     Note that if the from filename ends in .gz it assumed to be a gzipped file and will be opened accordingly.
185 :    
186 :     Returns the number of lines in the prosite file that were read.
187 :    
188 :     =cut
189 :    
190 :     sub prositecorrespondence {
191 :     my ($self, $from, $to, $verbose)=@_;
192 :     unless (-e $from) {
193 :     print STDERR "File $from does not exist as called in $0\n";
194 :     return 0;
195 :     }
196 :     if ($from =~ /\.gz$/) {
197 :     open(IN, "|gunzip -c $from") || die "Can't open $from using a gunzip pipe";
198 :     }
199 :     else {
200 :     open (IN, $from) || die "Can't open $from";
201 :     }
202 :     open (OUT, ">$to") || die "Can't write to $to";
203 :     my $linecount;
204 :     my ($famac, $famname, $famtype)=('','','');
205 :     while (<IN>) {
206 :     chomp;
207 :     $linecount++;
208 :     if ($verbose && !($linecount % 10000)) {print STDERR "Parsed $linecount lines\n"}
209 :     if (m#//#) {($famac, $famname, $famtype)=('','',''); next}
210 :     elsif (m/^ID\s*(.*?);\s*(\S+)/) {($famname, $famtype)=($1, $2); next}
211 : redwards 1.19 elsif (m/^AC\s*(\S+)/) {$famac=$1; $famac =~ s/\;\s*$//; next}
212 : redwards 1.18 next unless (m/^DR/); # ignore all the other crap in the prosite file for now. Note we might, at some point, want to grab all that, but that is for another time.
213 :     #
214 :     # this is the format of the DR lines:
215 :     # DR P11460, FATB_VIBAN , T; P40409, FEUA_BACSU , T; P37580, FHUD_BACSU , T;
216 :     s/^DR\s*//;
217 :     foreach my $piece (split /\s*\;\s*/, $_) {
218 :     my ($acc, $nam, $unk)=split /\s*\,\s*/, $piece;
219 :     foreach my $fig ($self->swiss_pir_ids($acc)) {
220 : redwards 1.20 print OUT join "\t", ($famac, $famname, $famtype, $acc, $fig), "\n";
221 : redwards 1.18 }
222 :     }
223 :     }
224 :     }
225 : redwards 1.14
226 :     =head2 swiss_pir_ids()
227 :    
228 : redwards 1.18 SwissProt/PIR have lots of ID's that we want to get, usually in this order - uni --> tr --> sp. This routine will map swissprot/pir ids to fig id's, and return an array of FIG id's that match the ID.
229 : redwards 1.14
230 :     =cut
231 :    
232 :     sub swiss_pir_ids {
233 :     my ($self, $id)=@_;
234 :     return () unless ($id);
235 : redwards 1.18 $id =~ s/^\s+//; $id =~ s/\s+$//; # trim off the whitespace
236 :    
237 : redwards 1.15 my @return=($fig->by_alias("uni|$id"));
238 : redwards 1.14 return @return if ($return[0]);
239 :    
240 : redwards 1.15 @return=($fig->by_alias("tr|$id"));
241 : redwards 1.14 return @return if ($return[0]);
242 :    
243 : redwards 1.15 @return=($fig->by_alias("sp|$id"));
244 : redwards 1.14 return @return if ($return[0]);
245 :    
246 :     return ();
247 :     }
248 : redwards 1.1
249 :     =head2 ss_by_id
250 :    
251 :     Generate a list of subsystems that a peg occurs in. This is a ; separated list.
252 :     This is a wrapper that removes roles and ignores essential things
253 :    
254 :     =cut
255 :    
256 :     sub ss_by_id {
257 :     my ($self, $peg)=@_;
258 :     my $ssout;
259 :     foreach my $ss (sort $fig->subsystems_for_peg($peg))
260 :     {
261 :     next if ($$ss[0] =~ /essential/i); # Ignore the Essential B-subtilis subsystems
262 :     $ssout.=$$ss[0]."; ";
263 :     }
264 :     $ssout =~ s/; $//;
265 :     return $ssout;
266 :     }
267 :    
268 : redwards 1.3 =head2 ss_by_homol
269 :    
270 :     Generate a list of subsystems that homologs of a peg occur in. This is a ; separated list.
271 :     This is also a wrapper around sims and ss, but makes everything unified
272 :    
273 :     =cut
274 :    
275 :     sub ss_by_homol {
276 :     my ($self, $peg)=@_;
277 :     return unless ($peg);
278 :     my ($maxN, $maxP)=(50, 1e-20);
279 :    
280 :     # find the sims
281 :     my @sims=$fig->sims($peg, $maxN, $maxP, 'fig');
282 :    
283 :     # we are only going to keep the best hit for each peg
284 :     # in a subsystem
285 :     my $best_ss_score; my $best_ss_id;
286 :     foreach my $sim (@sims)
287 :     {
288 :     my $simpeg=$$sim[1];
289 :     my $simscore=$$sim[10];
290 :     my @subsys=$fig->subsystems_for_peg($simpeg);
291 :     foreach my $ss (@subsys)
292 :     {
293 :     if (! defined $best_ss_score->{$$ss[0]}) {$best_ss_score->{$$ss[0]}=$simscore; $best_ss_id->{$$ss[0]}=$simpeg}
294 :     elsif ($best_ss_score->{$$ss[0]} > $simscore)
295 :     {
296 :     $best_ss_score->{$$ss[0]}=$simscore;
297 :     $best_ss_id->{$$ss[0]}=$simpeg;
298 :     }
299 :     }
300 :     }
301 :    
302 :     my $ssoutput=join "", (map {"$_ (".$best_ss_id->{$_}."), "} keys %$best_ss_id);
303 :    
304 :     $ssoutput =~ s/, $//;
305 :     return $ssoutput;
306 :     }
307 :    
308 :     =head2 tagvalue
309 :    
310 :     This will just check for tag value pairs and return either an array of values or a single ; separated list (if called as a scalar)
311 :    
312 :     e.g. $values=raelib->tagvalue($peg, "PIRSF"); print join "\n", @$values;
313 :    
314 :     Returns an empty array if no tag/value appropriate.
315 :    
316 :     Just because I use this a lot I don't want to waste rewriting it.
317 :    
318 :     =cut
319 :    
320 :     sub tagvalue {
321 :     my ($self, $peg, $tag)=@_;
322 :     my @return;
323 :     my @attr=$fig->feature_attributes($peg);
324 :     foreach my $attr (@attr) {
325 : redwards 1.11 my ($gotpeg, $gottag, $val, $link)=@$attr;
326 : redwards 1.3 push @return, $val if ($gottag eq $tag);
327 :     }
328 :     return wantarray ? @return : join "; ", @return;
329 :     }
330 : redwards 1.1
331 : redwards 1.5 =head2 locations_on_contig
332 :    
333 :     Return the locations of a sequence on a contig.
334 :    
335 :     This will look for exact matches to a sequence on a contig, and return a reference to an array that has all the locations.
336 :    
337 :     my $locations=$raelib->locations_on_contig($genome, $contig, 'GATC', undef);
338 :     foreach my $bp (@$locations) { ... do something ... }
339 :    
340 :     first argument : genome number
341 :     second argument : contig name
342 :     third argument : sequence to look for
343 :     fourth argument : beginning position to start looking from (can be undef)
344 :     fifth argument : end position to stop looking from (can be undef)
345 :     sixth argument : check reverse complement (0 or undef will check forward, 1 or true will check rc)
346 :    
347 :     Note, the position is calculated before the sequence is rc'd
348 :    
349 :     =cut
350 :    
351 :     sub locations_on_contig {
352 :     my ($self, $genome, $contig, $sequence, $from, $to, $check_reverse)=@_;
353 :     my $return=[];
354 :    
355 :     # get the dna sequence of the contig, and make sure it is uppercase
356 :     my $contig_ln=$fig->contig_ln($genome, $contig);
357 :     return $return unless ($contig_ln);
358 :     unless ($from) {$from=1}
359 :     unless ($to) {$to=$contig_ln}
360 :     if ($from > $to) {($from, $to)=($to, $from)}
361 :     my $dna_seq=$fig->dna_seq($genome, $contig."_".$from."_".$to);
362 :     $dna_seq=uc($dna_seq);
363 :    
364 :     # if we want to check the rc, we actually rc the query
365 :     $sequence=$fig->reverse_comp($sequence) if ($check_reverse);
366 :     $sequence=uc($sequence);
367 :    
368 :     # now find all the matches
369 :     my $posn=index($dna_seq, $sequence, 0);
370 :     while ($posn > -1) {
371 :     push @$return, $posn;
372 :     $posn=index($dna_seq, $sequence, $posn+1);
373 :     }
374 :     return $return;
375 :     }
376 :    
377 :    
378 :     =head2 scrolling_org_list
379 :    
380 :     This is the list from index.cgi, that I call often. It has one minor modification: the value returned is solely the organisms id and does not contain genus_species information. I abstracted this here: 1, so I could call it often, and 2, so I could edit it once.
381 :    
382 :     use like this push @$html, $raelib->scrolling_org_list($cgi, $multiple);
383 :    
384 :     multiple selections will only be set if $multiple is true
385 :    
386 :     =cut
387 :    
388 :     sub scrolling_org_list {
389 :     my ($self, $cgi, $multiple)=@_;
390 :     unless ($multiple) {$multiple=0}
391 :    
392 :     my @display = ( 'All', 'Archaea', 'Bacteria', 'Eucarya', 'Viruses', 'Environmental samples' );
393 :    
394 :     #
395 :     # Canonical names must match the keywords used in the DBMS. They are
396 :     # defined in compute_genome_counts.pl
397 :     #
398 :     my %canonical = (
399 :     'All' => undef,
400 :     'Archaea' => 'Archaea',
401 :     'Bacteria' => 'Bacteria',
402 :     'Eucarya' => 'Eukaryota',
403 :     'Viruses' => 'Virus',
404 :     'Environmental samples' => 'Environmental Sample'
405 :     );
406 :    
407 :     my $req_dom = $cgi->param( 'domain' ) || 'All';
408 :     my @domains = $cgi->radio_group( -name => 'domain',
409 :     -default => $req_dom,
410 :     -override => 1,
411 :     -values => [ @display ]
412 :     );
413 :    
414 :     my $n_domain = 0;
415 :     my %dom_num = map { ( $_, $n_domain++ ) } @display;
416 :     my $req_dom_num = $dom_num{ $req_dom } || 0;
417 :    
418 :     #
419 :     # Viruses and Environmental samples must have completeness = All (that is
420 :     # how they are in the database). Otherwise, default is Only "complete".
421 :     #
422 :     my $req_comp = ( $req_dom_num > $dom_num{ 'Eucarya' } ) ? 'All'
423 :     : $cgi->param( 'complete' ) || 'Only "complete"';
424 :     my @complete = $cgi->radio_group( -name => 'complete',
425 :     -default => $req_comp,
426 :     -override => 1,
427 :     -values => [ 'All', 'Only "complete"' ]
428 :     );
429 :     #
430 :     # Use $fig->genomes( complete, restricted, domain ) to get org list:
431 :     #
432 :     my $complete = ( $req_comp =~ /^all$/i ) ? undef : "complete";
433 :    
434 :     my $orgs; my $label;
435 :     @$orgs = $fig->genomes( $complete, undef, $canonical{ $req_dom } );
436 :    
437 :     foreach (@$orgs) {
438 :     my $gs = $fig->genus_species($_);
439 :     my $gc=scalar $fig->all_contigs($_);
440 :     $label->{$_} = "$gs ($_) [$gc contigs]";
441 :     }
442 :    
443 :     @$orgs = sort {$label->{$a} cmp $label->{$b}} @$orgs;
444 :    
445 :     my $n_genomes = @$orgs;
446 :    
447 :     return ( "<TABLE>\n",
448 :     " <TR>\n",
449 :     " <TD>",
450 : redwards 1.6 $cgi->scrolling_list( -name => 'korgs',
451 :     -values => $orgs,
452 :     -labels => $label,
453 :     -size => 10,
454 :     -multiple => $multiple,
455 : redwards 1.5 ), $cgi->br,
456 :     "$n_genomes genomes shown ",
457 :     $cgi->submit( 'Update List' ), $cgi->reset, $cgi->br,
458 :     " </TD>",
459 :     " <TD>",
460 :     join( "<br>", "<b>Domain(s) to show:</b>", @domains), "<br>\n",
461 :     join( "<br>", "<b>Completeness?</b>", @complete), "\n",
462 :     "</TD>",
463 :     " </TR>\n",
464 :     "</TABLE>\n",
465 :     );
466 :     }
467 :    
468 : redwards 1.21
469 :     =head2 scrolling_subsys_list
470 :    
471 :     Create a scrolling list of all subsystems. Just like scrolling_org_list, this will make the list and allow you to select multiples.
472 :    
473 :     use like this
474 :    
475 :     push @$html, $raelib->scrolling_subsys_list($cgi, $multiple);
476 :    
477 :     =cut
478 :    
479 :     sub scrolling_subsys_list {
480 :     my ($self, $cgi, $multiple)=@_;
481 :     $multiple=0 unless (defined $multiple);
482 : redwards 1.22 my @ss=sort {uc($a) cmp uc($b)} $fig->all_subsystems();
483 : redwards 1.21 my $label;
484 :     # generate labels for the list
485 :     foreach my $s (@ss) {my $k=$s; $k =~ s/\_/ /g; $k =~ s/ / /g; $k =~ s/\s+$//; $label->{$s}=$k}
486 :     return $cgi->scrolling_list(
487 :     -name => 'subsystems',
488 :     -values => \@ss,
489 :     -labels => $label,
490 :     -size => 10,
491 :     -multiple=> $multiple,
492 :     );
493 :     }
494 :    
495 :     =head2 subsys_names_for_display
496 :    
497 :     Return a list of subsystem names for display. This will take a list as an argument and return a nice clean list for display.
498 :    
499 :     $raelib->subsys_names_for_display(@ss);
500 :     or
501 :     $raelib->subsys_names_for_display($fig->all_subsystems());
502 :    
503 :     =cut
504 :    
505 :     sub subsys_names_for_display {
506 :     my ($self, @ss)=@_;
507 :     foreach (@ss) {s/\_/ /g; 1 while (s/ / /g); s/\s+$//}
508 :     return @ss;
509 :     }
510 :    
511 : redwards 1.17 =head2 GenBank
512 : redwards 1.5
513 : redwards 1.17 This object will take a genome number and return a Bio::Seq::RichSeq object that has the whole genome
514 :     in GenBank format. This should be a nice way of getting some data out, but will probably be quite slow
515 :     at building the object.
516 : redwards 1.1
517 : redwards 1.17 Note that you need to call this with the genome name and the contig. This will then go through that contig.
518 : redwards 1.1
519 : redwards 1.17 Something like this should work
520 :    
521 :     foreach my $contig ($fig->all_contigs($genome)) {
522 :     my $seqobj=FIGRob->GenBank($genome, $contig);
523 :     # process the contig
524 :     }
525 :    
526 :     =cut
527 :    
528 :     sub GenBank {
529 :     my ($self, $genome, $contig)=@_;
530 :     my $gs=$fig->genus_species($genome);
531 :     return unless ($gs);
532 :     unless ($contig) {
533 :     print STDERR "You didn't provide a contig for $gs. I think that was a mistake. Sorry\n";
534 :     return;
535 :     }
536 :     my $len=$fig->contig_ln($genome, $contig);
537 :     unless ($len) {
538 :     print STDERR "$contig from $gs doesn't appear to have a length. Is it right?\n";
539 :     return;
540 :     }
541 :    
542 :    
543 :     # first find all the pegs ...
544 :     my $features; # all the features in the genome
545 :     my $allpegs; # all the pegs
546 :     my $translation; # all the protein sequences
547 :     foreach my $peg ($fig->pegs_of($genome)) {
548 :     my @location=$fig->feature_location($peg);
549 :     my $func=$fig->function_of($peg);
550 :     foreach my $loc (@location) {
551 :     $loc =~ /^(.*)\_(\d+)\_(\d+)$/;
552 :     my ($cg, $start, $stop)=($1, $2, $3);
553 :     next unless ($cg eq $contig);
554 :     # save this information for later
555 :     $features->{'peg'}->{$loc}=$func;
556 :     $allpegs->{'peg'}->{$loc}=$peg;
557 :     $translation->{'peg'}->{$loc}=$fig->get_translation($peg);
558 :     }
559 :     }
560 :     # ... and all the RNAs
561 :     foreach my $peg ($fig->rnas_of($genome)) {
562 :     my @location=$fig->feature_location($peg);
563 :     my $func=$fig->function_of($peg);
564 :     foreach my $loc (@location) {
565 :     $loc =~ /^(.*)\_(\d+)\_(\d+)$/;
566 :     my ($cg, $start, $stop)=($1, $2, $3);
567 :     next unless ($cg eq $contig);
568 :     # save this information for later
569 :     $features->{'rna'}->{$loc}=$func;
570 :     $allpegs->{'rna'}->{$loc}=$peg;
571 :     }
572 :     }
573 :    
574 :    
575 :     # now get all the contigs out
576 :     my $seq=$fig->dna_seq($genome, $contig."_1_".$len);
577 :     my $description = "Contig $contig from " . $fig->genus_species($genome);
578 :     my $sobj=Bio::Seq->new(
579 :     -seq => $seq,
580 :     -id => $contig,
581 :     -desc => $description,
582 :     -accession_number => $genome
583 :     );
584 :     foreach my $prot (keys %{$features->{'peg'}}) {
585 :     $prot =~ /^(.*)\_(\d+)\_(\d+)$/;
586 :     my ($cg, $start, $stop)=($1, $2, $3);
587 :     my $strand=1;
588 :     if ($stop < $start) {
589 :     ($stop, $start)=($start, $stop);
590 :     $strand=-1;
591 :     }
592 :    
593 :     my $feat=Bio::SeqFeature::Generic->new(
594 :     -start => $start,
595 :     -end => $stop,
596 :     -strand => $strand,
597 :     -primary => 'CDS',
598 :     -display_name => $allpegs->{'peg'}->{$prot},
599 :     -source_tag => 'the SEED',
600 :     -tag =>
601 :     {
602 :     db_xref => $allpegs->{'peg'}->{$prot},
603 :     note => 'Generated by the Fellowship for the Interpretation of Genomes',
604 :     function => $features->{'peg'}->{$prot},
605 :     translation => $translation->{'peg'}->{$prot}
606 :     }
607 :     );
608 :    
609 :     $sobj->add_SeqFeature($feat);
610 :     }
611 :    
612 :     foreach my $prot (keys %{$features->{'rna'}}) {
613 :     $prot =~ /^(.*)\_(\d+)\_(\d+)$/;
614 :     my ($cg, $start, $stop)=($1, $2, $3);
615 :     my $strand=1;
616 :     if ($stop < $start) {
617 :     ($stop, $start)=($start, $stop);
618 :     $strand=-1;
619 :     }
620 :    
621 :     my $feat=Bio::SeqFeature::Generic->new(
622 :     -start => $start,
623 :     -end => $stop,
624 :     -strand => $strand,
625 :     -primary => 'RNA',
626 :     -source_tag => 'the SEED',
627 :     -display_name => $allpegs->{'rna'}->{$prot},
628 :     -tag =>
629 :     {
630 :     db_xref => $allpegs->{'rna'}->{$prot},
631 :     note => 'Generated by the Fellowship for the Interpretation of Genomes',
632 :     function => $features->{'rna'}->{$prot},
633 :     }
634 :     );
635 :    
636 :     $sobj->add_SeqFeature($feat);
637 :     }
638 :     return $sobj;
639 :     }
640 :    
641 :     =head2 best_hit
642 :    
643 :     Returns the FIG id of the single best hit to a peg
644 :    
645 :     eg
646 :    
647 :     my $bh=$fr->best_hit($peg);
648 :     print 'function is ', scalar $fig->function_of($bh);
649 :    
650 :     =cut
651 :    
652 :     sub best_hit {
653 :     my ($self, $peg)=@_;
654 :     return unless ($peg);
655 :    
656 :     my ($maxN, $maxP)=(1, 1e-5);
657 :     my @sims=$fig->sims($peg, $maxN, $maxP, 'fig');
658 :     return ${$sims[0]}[1];
659 :     }
660 : redwards 1.1
661 :     1;
662 : redwards 1.17

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3