[Bio] / FigKernelPackages / raelib.pm Repository:
ViewVC logotype

Diff of /FigKernelPackages/raelib.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.3, Wed Feb 2 01:07:44 2005 UTC revision 1.13, Wed May 4 03:07:23 2005 UTC
# Line 2  Line 2 
2    
3  =pod  =pod
4    
5  =head1  =head1 RAE Library
6    
7   Some routines and things that Rob uses. Please feel free to use at will and incorporate into   Some routines and things that Rob uses. Please feel free to use at will and incorporate into
8   your own code or move them into FIG.pm or elsewhere.   your own code or move them into FIG.pm or elsewhere.
# Line 14  Line 14 
14  use FIG;  use FIG;
15  my $fig=new FIG;  my $fig=new FIG;
16    
17  =head2 pirsfcorrespondance  =head2 new
18    
19   Generate the pirsf->fig id correspondance. This is only done once and the correspondance file is written.  Just instantiate the object and return $self
20    
21    =cut
22    
23    sub new {
24     my $self=shift;
25     return $self
26    }
27    
28    
29    
30    
31    =head2 features_on_contig
32    
33     Returns a reference to an array containing all the features on a contig in a genome.
34    
35     use:
36    
37     my $arrayref=$rae->features_on_contig($genome, $contig);
38    
39     or
40    
41     foreach my $peg (@{$rae->features_on_contig($genome, $contig)}) {
42      ... blah blah ...
43     }
44    
45     returns undef if contig is not a part of genome or there is nothing to return, otherwise returns a list of pegs
46    
47     v. experimental and guaranteed not to work!
48    
49    =cut
50    
51    sub features_on_contig {
52     my ($self, $genome, $contig)=@_;
53     # were this in FIG.pm you'd use this line:
54     #my $rdbH = $self->db_handle;
55    
56     my $rdbH = $fig->db_handle;
57     my $relational_db_response=$rdbH->SQL('SELECT id FROM features WHERE  (genome = \'' . $genome . '\' AND  location ~* \'' . $contig . '\')');
58     # this is complicated. A reference to an array of references to arrays, and we only want the first element.
59     # simplify.
60     my @results;
61     foreach my $res (@$relational_db_response) {push @results, $res->[0]}
62     return \@results;
63    }
64    
65    
66    
67    
68    
69    
70    
71    =head2 pirsfcorrespondence
72    
73     Generate the pirsf->fig id correspondence. This is only done once and the correspondence file is written.
74   This is so that we can easily go back and forth.   This is so that we can easily go back and forth.
75    
76   The correspondance has PIR ID \t FIG ID\n, and is probably based on ftp://ftp.pir.georgetown.edu/pir_databases/pirsf/data/pirsfinfo.dat   The correspondence has PIR ID \t FIG ID\n, and is probably based on ftp://ftp.pir.georgetown.edu/pir_databases/pirsf/data/pirsfinfo.dat
77    
78     This method takes three arguments:
79       from    : pirsfinfo.dat file
80       to      : file to write information to
81       verbose : report on progress
82    
83     Returns the number of lines in the pirsinfo file that were read.
84    
85  =cut  =cut
86    
87  sub pirsfcorrespondance {  sub pirsfcorrespondence {
88   my ($self, $from, $to)=@_;   my ($self, $from, $to, $verbose)=@_;
89   die "File $from does not exist as called in $0" unless (-e $from);   unless (-e $from) {
90      print STDERR "File $from does not exist as called in $0\n";
91      return 0;
92     }
93   open (IN, $from) || die "Can't open $from";   open (IN, $from) || die "Can't open $from";
94   open (OUT, ">$to") || die "Can't write tot $to";   open (OUT, ">$to") || die "Can't write to $to";
95     my $linecount;
96   while (<IN>) {   while (<IN>) {
97      $linecount++;
98      unless ($linecount % 10000) {print STDERR "Correspondence of $linecount lines calculated\n"}
99    if (/^>/) {print OUT; next}    if (/^>/) {print OUT; next}
100    chomp;    chomp;
101    my $done;    my $done;
# Line 36  Line 103 
103     print OUT $_, "\t", $peg, "\n";     print OUT $_, "\t", $peg, "\n";
104     $done=1;     $done=1;
105    }    }
106      unless ($done) {
107       foreach my $peg ($fig->by_alias("tr|$_")) {
108        print OUT $_, "\t", $peg, "\n";
109        $done=1;
110       }
111      }
112      unless ($done) {
113       foreach my $peg ($fig->by_alias("sp|$_")) {
114        print OUT $_, "\t", $peg, "\n";
115        $done=1;
116       }
117      }
118    unless ($done) {print OUT $_, "\t\n"}    unless ($done) {print OUT $_, "\t\n"}
119   }   }
120   close IN;   close IN;
121   close OUT;   close OUT;
122     return $linecount;
123  }  }
124    
125    
# Line 119  Line 199 
199   my @return;   my @return;
200   my @attr=$fig->feature_attributes($peg);   my @attr=$fig->feature_attributes($peg);
201   foreach my $attr (@attr) {   foreach my $attr (@attr) {
202    my ($gottag, $val, $link)=@$attr;    my ($gotpeg, $gottag, $val, $link)=@$attr;
203    push @return, $val if ($gottag eq $tag);    push @return, $val if ($gottag eq $tag);
204   }   }
205   return wantarray ? @return : join "; ", @return;   return wantarray ? @return : join "; ", @return;
206  }  }
207    
208    =head2 locations_on_contig
209    
210    Return the locations of a sequence on a contig.
211    
212    This will look for exact matches to a sequence on a contig, and return a reference to an array that has all the locations.
213    
214    my $locations=$raelib->locations_on_contig($genome, $contig, 'GATC', undef);
215    foreach my $bp (@$locations) { ... do something ... }
216    
217    first argument  : genome number
218    second argument : contig name
219    third argument  : sequence to look for
220    fourth argument : beginning position to start looking from (can be undef)
221    fifth argument  : end position to stop looking from (can be undef)
222    sixth argument : check reverse complement (0 or undef will check forward, 1 or true will check rc)
223    
224    Note, the position is calculated before the sequence is rc'd
225    
226    =cut
227    
228    sub locations_on_contig {
229     my ($self, $genome, $contig, $sequence, $from, $to, $check_reverse)=@_;
230     my $return=[];
231    
232     # get the dna sequence of the contig, and make sure it is uppercase
233     my $contig_ln=$fig->contig_ln($genome, $contig);
234     return $return unless ($contig_ln);
235     unless ($from) {$from=1}
236     unless ($to) {$to=$contig_ln}
237     if ($from > $to) {($from, $to)=($to, $from)}
238     my $dna_seq=$fig->dna_seq($genome, $contig."_".$from."_".$to);
239     $dna_seq=uc($dna_seq);
240    
241     # if we want to check the rc, we actually rc the query
242     $sequence=$fig->reverse_comp($sequence) if ($check_reverse);
243     $sequence=uc($sequence);
244    
245     # now find all the matches
246     my $posn=index($dna_seq, $sequence, 0);
247     while ($posn > -1) {
248      push @$return, $posn;
249      $posn=index($dna_seq, $sequence, $posn+1);
250     }
251     return $return;
252    }
253    
254    
255    =head2 scrolling_org_list
256    
257    This is the list from index.cgi, that I call often. It has one minor modification: the value returned is solely the organisms id and does not contain genus_species information. I abstracted this here: 1, so I could call it often, and 2, so I could edit it once.
258    
259    use like this push @$html, $raelib->scrolling_org_list($cgi, $multiple);
260    
261    multiple selections will only be set if $multiple is true
262    
263    =cut
264    
265    sub scrolling_org_list {
266     my ($self, $cgi, $multiple)=@_;
267     unless ($multiple) {$multiple=0}
268    
269     my @display = ( 'All', 'Archaea', 'Bacteria', 'Eucarya', 'Viruses', 'Environmental samples' );
270    
271     #
272     #  Canonical names must match the keywords used in the DBMS.  They are
273     #  defined in compute_genome_counts.pl
274     #
275     my %canonical = (
276            'All'                   =>  undef,
277            'Archaea'               => 'Archaea',
278            'Bacteria'              => 'Bacteria',
279            'Eucarya'               => 'Eukaryota',
280            'Viruses'               => 'Virus',
281            'Environmental samples' => 'Environmental Sample'
282         );
283    
284     my $req_dom = $cgi->param( 'domain' ) || 'All';
285     my @domains = $cgi->radio_group( -name     => 'domain',
286                                         -default  => $req_dom,
287                                         -override => 1,
288                                         -values   => [ @display ]
289                                    );
290    
291     my $n_domain = 0;
292     my %dom_num = map { ( $_, $n_domain++ ) } @display;
293     my $req_dom_num = $dom_num{ $req_dom } || 0;
294    
295     #
296     #  Viruses and Environmental samples must have completeness = All (that is
297     #  how they are in the database).  Otherwise, default is Only "complete".
298     #
299     my $req_comp = ( $req_dom_num > $dom_num{ 'Eucarya' } ) ? 'All'
300                  : $cgi->param( 'complete' ) || 'Only "complete"';
301     my @complete = $cgi->radio_group( -name     => 'complete',
302                                       -default  => $req_comp,
303                                       -override => 1,
304                                        -values   => [ 'All', 'Only "complete"' ]
305                           );
306     #
307     #  Use $fig->genomes( complete, restricted, domain ) to get org list:
308     #
309     my $complete = ( $req_comp =~ /^all$/i ) ? undef : "complete";
310    
311     my $orgs; my $label;
312     @$orgs =  $fig->genomes( $complete, undef, $canonical{ $req_dom } );
313    
314     foreach (@$orgs) {
315       my $gs = $fig->genus_species($_);
316       my $gc=scalar $fig->all_contigs($_);
317       $label->{$_} = "$gs ($_) [$gc contigs]";
318      }
319    
320     @$orgs = sort {$label->{$a} cmp $label->{$b}} @$orgs;
321    
322     my $n_genomes = @$orgs;
323    
324     return (         "<TABLE>\n",
325                      "   <TR>\n",
326                      "      <TD>",
327                      $cgi->scrolling_list( -name     => 'korgs',
328                                            -values   => $orgs,
329                                            -labels   => $label,
330                                            -size     => 10,
331                                            -multiple => $multiple,
332                                          ), $cgi->br,
333                      "$n_genomes genomes shown ",
334                      $cgi->submit( 'Update List' ), $cgi->reset, $cgi->br,
335                      "      </TD>",
336                      "      <TD>",
337                      join( "<br>", "<b>Domain(s) to show:</b>", @domains), "<br>\n",
338                      join( "<br>", "<b>Completeness?</b>", @complete), "\n",
339                      "</TD>",
340                      "   </TR>\n",
341                      "</TABLE>\n",
342                      $cgi->hr
343            );
344    }
345    
346    
347    
348    
349    

Legend:
Removed from v.1.3  
changed lines
  Added in v.1.13

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3