[Bio] / FigKernelPackages / raelib.pm Repository:
ViewVC logotype

Diff of /FigKernelPackages/raelib.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.1, Sat Jan 29 20:58:48 2005 UTC revision 1.13, Wed May 4 03:07:23 2005 UTC
# Line 2  Line 2 
2    
3  =pod  =pod
4    
5  =head1  =head1 RAE Library
6    
7   Some routines and things that Rob uses. Please feel free to use at will and incorporate into   Some routines and things that Rob uses. Please feel free to use at will and incorporate into
8   your own code or move them into FIG.pm or elsewhere.   your own code or move them into FIG.pm or elsewhere.
# Line 14  Line 14 
14  use FIG;  use FIG;
15  my $fig=new FIG;  my $fig=new FIG;
16    
17  =head2 pirsfcorrespondance  =head2 new
18    
19   Generate the pirsf->fig id correspondance. This is only done once and the correspondance file is written.  Just instantiate the object and return $self
20    
21    =cut
22    
23    sub new {
24     my $self=shift;
25     return $self
26    }
27    
28    
29    
30    
31    =head2 features_on_contig
32    
33     Returns a reference to an array containing all the features on a contig in a genome.
34    
35     use:
36    
37     my $arrayref=$rae->features_on_contig($genome, $contig);
38    
39     or
40    
41     foreach my $peg (@{$rae->features_on_contig($genome, $contig)}) {
42      ... blah blah ...
43     }
44    
45     returns undef if contig is not a part of genome or there is nothing to return, otherwise returns a list of pegs
46    
47     v. experimental and guaranteed not to work!
48    
49    =cut
50    
51    sub features_on_contig {
52     my ($self, $genome, $contig)=@_;
53     # were this in FIG.pm you'd use this line:
54     #my $rdbH = $self->db_handle;
55    
56     my $rdbH = $fig->db_handle;
57     my $relational_db_response=$rdbH->SQL('SELECT id FROM features WHERE  (genome = \'' . $genome . '\' AND  location ~* \'' . $contig . '\')');
58     # this is complicated. A reference to an array of references to arrays, and we only want the first element.
59     # simplify.
60     my @results;
61     foreach my $res (@$relational_db_response) {push @results, $res->[0]}
62     return \@results;
63    }
64    
65    
66    
67    
68    
69    
70    
71    =head2 pirsfcorrespondence
72    
73     Generate the pirsf->fig id correspondence. This is only done once and the correspondence file is written.
74   This is so that we can easily go back and forth.   This is so that we can easily go back and forth.
75    
76   The correspondance has PIR ID \t FIG ID\n, and is probably based on ftp://ftp.pir.georgetown.edu/pir_databases/pirsf/data/pirsfinfo.dat   The correspondence has PIR ID \t FIG ID\n, and is probably based on ftp://ftp.pir.georgetown.edu/pir_databases/pirsf/data/pirsfinfo.dat
77    
78     This method takes three arguments:
79       from    : pirsfinfo.dat file
80       to      : file to write information to
81       verbose : report on progress
82    
83     Returns the number of lines in the pirsinfo file that were read.
84    
85  =cut  =cut
86    
87  sub pirsfcorrespondance {  sub pirsfcorrespondence {
88   my ($self, $from, $to)=@_;   my ($self, $from, $to, $verbose)=@_;
89   die "File $from does not exist as called in $0" unless (-e $from);   unless (-e $from) {
90      print STDERR "File $from does not exist as called in $0\n";
91      return 0;
92     }
93   open (IN, $from) || die "Can't open $from";   open (IN, $from) || die "Can't open $from";
94   open (OUT, ">$to") || die "Can't write tot $to";   open (OUT, ">$to") || die "Can't write to $to";
95     my $linecount;
96   while (<IN>) {   while (<IN>) {
97      $linecount++;
98      unless ($linecount % 10000) {print STDERR "Correspondence of $linecount lines calculated\n"}
99    if (/^>/) {print OUT; next}    if (/^>/) {print OUT; next}
100    chomp;    chomp;
101    my $done;    my $done;
102    foreach my $peg ($fig->by_alias($_)) {    foreach my $peg ($fig->by_alias("uni|$_")) {
103       print OUT $_, "\t", $peg, "\n";
104       $done=1;
105      }
106      unless ($done) {
107       foreach my $peg ($fig->by_alias("tr|$_")) {
108     print OUT $_, "\t", $peg, "\n";     print OUT $_, "\t", $peg, "\n";
109     $done=1;     $done=1;
110    }    }
111      }
112      unless ($done) {
113       foreach my $peg ($fig->by_alias("sp|$_")) {
114        print OUT $_, "\t", $peg, "\n";
115        $done=1;
116       }
117      }
118    unless ($done) {print OUT $_, "\t\n"}    unless ($done) {print OUT $_, "\t\n"}
119   }   }
120   close IN;   close IN;
121   close OUT;   close OUT;
122     return $linecount;
123  }  }
124    
125    
# Line 53  Line 133 
133  sub ss_by_id {  sub ss_by_id {
134   my ($self, $peg)=@_;   my ($self, $peg)=@_;
135   my $ssout;   my $ssout;
  print STDERR "RAELIB Looking for ss from $peg\n";  
136   foreach my $ss (sort $fig->subsystems_for_peg($peg))   foreach my $ss (sort $fig->subsystems_for_peg($peg))
137   {   {
138    next if ($$ss[0] =~ /essential/i); # Ignore the Essential B-subtilis subsystems    next if ($$ss[0] =~ /essential/i); # Ignore the Essential B-subtilis subsystems
# Line 63  Line 142 
142   return $ssout;   return $ssout;
143  }  }
144    
145    =head2 ss_by_homol
146    
147     Generate a list of subsystems that homologs of a peg occur in. This is a ; separated list.
148     This is also a wrapper around sims and ss, but makes everything unified
149    
150    =cut
151    
152    sub ss_by_homol {
153     my ($self, $peg)=@_;
154     return unless ($peg);
155     my ($maxN, $maxP)=(50, 1e-20);
156    
157     # find the sims
158     my @sims=$fig->sims($peg, $maxN, $maxP, 'fig');
159    
160     # we are only going to keep the best hit for each peg
161     # in a subsystem
162     my $best_ss_score; my $best_ss_id;
163     foreach my $sim (@sims)
164     {
165      my $simpeg=$$sim[1];
166      my $simscore=$$sim[10];
167      my @subsys=$fig->subsystems_for_peg($simpeg);
168      foreach my $ss (@subsys)
169      {
170       if (! defined $best_ss_score->{$$ss[0]}) {$best_ss_score->{$$ss[0]}=$simscore; $best_ss_id->{$$ss[0]}=$simpeg}
171       elsif ($best_ss_score->{$$ss[0]} > $simscore)
172       {
173        $best_ss_score->{$$ss[0]}=$simscore;
174        $best_ss_id->{$$ss[0]}=$simpeg;
175       }
176      }
177     }
178    
179     my $ssoutput=join "", (map {"$_ (".$best_ss_id->{$_}."), "} keys %$best_ss_id);
180    
181     $ssoutput =~ s/, $//;
182     return $ssoutput;
183    }
184    
185    =head2 tagvalue
186    
187     This will just check for tag value pairs and return either an array of values or a single ; separated list (if called as a scalar)
188    
189     e.g. $values=raelib->tagvalue($peg, "PIRSF"); print join "\n", @$values;
190    
191     Returns an empty array if no tag/value appropriate.
192    
193     Just because I use this a lot I don't want to waste rewriting it.
194    
195    =cut
196    
197    sub tagvalue {
198     my ($self, $peg, $tag)=@_;
199     my @return;
200     my @attr=$fig->feature_attributes($peg);
201     foreach my $attr (@attr) {
202      my ($gotpeg, $gottag, $val, $link)=@$attr;
203      push @return, $val if ($gottag eq $tag);
204     }
205     return wantarray ? @return : join "; ", @return;
206    }
207    
208    =head2 locations_on_contig
209    
210    Return the locations of a sequence on a contig.
211    
212    This will look for exact matches to a sequence on a contig, and return a reference to an array that has all the locations.
213    
214    my $locations=$raelib->locations_on_contig($genome, $contig, 'GATC', undef);
215    foreach my $bp (@$locations) { ... do something ... }
216    
217    first argument  : genome number
218    second argument : contig name
219    third argument  : sequence to look for
220    fourth argument : beginning position to start looking from (can be undef)
221    fifth argument  : end position to stop looking from (can be undef)
222    sixth argument : check reverse complement (0 or undef will check forward, 1 or true will check rc)
223    
224    Note, the position is calculated before the sequence is rc'd
225    
226    =cut
227    
228    sub locations_on_contig {
229     my ($self, $genome, $contig, $sequence, $from, $to, $check_reverse)=@_;
230     my $return=[];
231    
232     # get the dna sequence of the contig, and make sure it is uppercase
233     my $contig_ln=$fig->contig_ln($genome, $contig);
234     return $return unless ($contig_ln);
235     unless ($from) {$from=1}
236     unless ($to) {$to=$contig_ln}
237     if ($from > $to) {($from, $to)=($to, $from)}
238     my $dna_seq=$fig->dna_seq($genome, $contig."_".$from."_".$to);
239     $dna_seq=uc($dna_seq);
240    
241     # if we want to check the rc, we actually rc the query
242     $sequence=$fig->reverse_comp($sequence) if ($check_reverse);
243     $sequence=uc($sequence);
244    
245     # now find all the matches
246     my $posn=index($dna_seq, $sequence, 0);
247     while ($posn > -1) {
248      push @$return, $posn;
249      $posn=index($dna_seq, $sequence, $posn+1);
250     }
251     return $return;
252    }
253    
254    
255    =head2 scrolling_org_list
256    
257    This is the list from index.cgi, that I call often. It has one minor modification: the value returned is solely the organisms id and does not contain genus_species information. I abstracted this here: 1, so I could call it often, and 2, so I could edit it once.
258    
259    use like this push @$html, $raelib->scrolling_org_list($cgi, $multiple);
260    
261    multiple selections will only be set if $multiple is true
262    
263    =cut
264    
265    sub scrolling_org_list {
266     my ($self, $cgi, $multiple)=@_;
267     unless ($multiple) {$multiple=0}
268    
269     my @display = ( 'All', 'Archaea', 'Bacteria', 'Eucarya', 'Viruses', 'Environmental samples' );
270    
271     #
272     #  Canonical names must match the keywords used in the DBMS.  They are
273     #  defined in compute_genome_counts.pl
274     #
275     my %canonical = (
276            'All'                   =>  undef,
277            'Archaea'               => 'Archaea',
278            'Bacteria'              => 'Bacteria',
279            'Eucarya'               => 'Eukaryota',
280            'Viruses'               => 'Virus',
281            'Environmental samples' => 'Environmental Sample'
282         );
283    
284     my $req_dom = $cgi->param( 'domain' ) || 'All';
285     my @domains = $cgi->radio_group( -name     => 'domain',
286                                         -default  => $req_dom,
287                                         -override => 1,
288                                         -values   => [ @display ]
289                                    );
290    
291     my $n_domain = 0;
292     my %dom_num = map { ( $_, $n_domain++ ) } @display;
293     my $req_dom_num = $dom_num{ $req_dom } || 0;
294    
295     #
296     #  Viruses and Environmental samples must have completeness = All (that is
297     #  how they are in the database).  Otherwise, default is Only "complete".
298     #
299     my $req_comp = ( $req_dom_num > $dom_num{ 'Eucarya' } ) ? 'All'
300                  : $cgi->param( 'complete' ) || 'Only "complete"';
301     my @complete = $cgi->radio_group( -name     => 'complete',
302                                       -default  => $req_comp,
303                                       -override => 1,
304                                        -values   => [ 'All', 'Only "complete"' ]
305                           );
306     #
307     #  Use $fig->genomes( complete, restricted, domain ) to get org list:
308     #
309     my $complete = ( $req_comp =~ /^all$/i ) ? undef : "complete";
310    
311     my $orgs; my $label;
312     @$orgs =  $fig->genomes( $complete, undef, $canonical{ $req_dom } );
313    
314     foreach (@$orgs) {
315       my $gs = $fig->genus_species($_);
316       my $gc=scalar $fig->all_contigs($_);
317       $label->{$_} = "$gs ($_) [$gc contigs]";
318      }
319    
320     @$orgs = sort {$label->{$a} cmp $label->{$b}} @$orgs;
321    
322     my $n_genomes = @$orgs;
323    
324     return (         "<TABLE>\n",
325                      "   <TR>\n",
326                      "      <TD>",
327                      $cgi->scrolling_list( -name     => 'korgs',
328                                            -values   => $orgs,
329                                            -labels   => $label,
330                                            -size     => 10,
331                                            -multiple => $multiple,
332                                          ), $cgi->br,
333                      "$n_genomes genomes shown ",
334                      $cgi->submit( 'Update List' ), $cgi->reset, $cgi->br,
335                      "      </TD>",
336                      "      <TD>",
337                      join( "<br>", "<b>Domain(s) to show:</b>", @domains), "<br>\n",
338                      join( "<br>", "<b>Completeness?</b>", @complete), "\n",
339                      "</TD>",
340                      "   </TR>\n",
341                      "</TABLE>\n",
342                      $cgi->hr
343            );
344    }
345    
346    
347    
348    

Legend:
Removed from v.1.1  
changed lines
  Added in v.1.13

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3