[Bio] / FigKernelPackages / raelib.pm Repository:
ViewVC logotype

Diff of /FigKernelPackages/raelib.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.2, Mon Jan 31 16:58:38 2005 UTC revision 1.7, Sun Apr 3 02:15:15 2005 UTC
# Line 14  Line 14 
14  use FIG;  use FIG;
15  my $fig=new FIG;  my $fig=new FIG;
16    
17  =head2 pirsfcorrespondance  =head2 new
18    
19   Generate the pirsf->fig id correspondance. This is only done once and the correspondance file is written.  Just instantiate the object and return $self
20    
21    =cut
22    
23    sub new {
24     my $self=shift;
25     return $self
26    }
27    
28    
29    
30    
31    =head2 features_on_contig
32    
33     Returns a reference to an array containing all the features on a contig in a genome.
34    
35     use:
36    
37     my $arrayref=$rae->features_on_contig($genome, $contig);
38    
39     or
40    
41     foreach my $peg (@{$rae->features_on_contig($genome, $contig)}) {
42      ... blah blah ...
43     }
44    
45     returns undef if contig is not a part of genome or there is nothing to return, otherwise returns a list of pegs
46    
47     v. experimental and guaranteed not to work!
48    
49    =cut
50    
51    sub features_on_contig {
52     my ($self, $genome, $contig)=@_;
53     # were this in FIG.pm you'd use this line:
54     #my $rdbH = $self->db_handle;
55    
56     my $rdbH = $fig->db_handle;
57     my $relational_db_response=$rdbH->SQL('SELECT id FROM features WHERE  (genome = \'' . $genome . '\' AND  location ~* \'' . $contig . '\')');
58     # this is complicated. A reference to an array of references to arrays, and we only want the first element.
59     # simplify.
60     my @results;
61     foreach my $res (@$relational_db_response) {push @results, $res->[0]}
62     return \@results;
63    }
64    
65    
66    
67    
68    
69    
70    
71    =head2 pirsfcorrespondence
72    
73     Generate the pirsf->fig id correspondence. This is only done once and the correspondence file is written.
74   This is so that we can easily go back and forth.   This is so that we can easily go back and forth.
75    
76   The correspondance has PIR ID \t FIG ID\n, and is probably based on ftp://ftp.pir.georgetown.edu/pir_databases/pirsf/data/pirsfinfo.dat   The correspondence has PIR ID \t FIG ID\n, and is probably based on ftp://ftp.pir.georgetown.edu/pir_databases/pirsf/data/pirsfinfo.dat
77    
78  =cut  =cut
79    
80  sub pirsfcorrespondance {  sub pirsfcorrespondence {
81   my ($self, $from, $to)=@_;   my ($self, $from, $to)=@_;
82   die "File $from does not exist as called in $0" unless (-e $from);   die "File $from does not exist as called in $0" unless (-e $from);
83   open (IN, $from) || die "Can't open $from";   open (IN, $from) || die "Can't open $from";
# Line 53  Line 107 
107  sub ss_by_id {  sub ss_by_id {
108   my ($self, $peg)=@_;   my ($self, $peg)=@_;
109   my $ssout;   my $ssout;
  print STDERR "RAELIB Looking for ss from $peg\n";  
110   foreach my $ss (sort $fig->subsystems_for_peg($peg))   foreach my $ss (sort $fig->subsystems_for_peg($peg))
111   {   {
112    next if ($$ss[0] =~ /essential/i); # Ignore the Essential B-subtilis subsystems    next if ($$ss[0] =~ /essential/i); # Ignore the Essential B-subtilis subsystems
# Line 63  Line 116 
116   return $ssout;   return $ssout;
117  }  }
118    
119    =head2 ss_by_homol
120    
121     Generate a list of subsystems that homologs of a peg occur in. This is a ; separated list.
122     This is also a wrapper around sims and ss, but makes everything unified
123    
124    =cut
125    
126    sub ss_by_homol {
127     my ($self, $peg)=@_;
128     return unless ($peg);
129     my ($maxN, $maxP)=(50, 1e-20);
130    
131     # find the sims
132     my @sims=$fig->sims($peg, $maxN, $maxP, 'fig');
133    
134     # we are only going to keep the best hit for each peg
135     # in a subsystem
136     my $best_ss_score; my $best_ss_id;
137     foreach my $sim (@sims)
138     {
139      my $simpeg=$$sim[1];
140      my $simscore=$$sim[10];
141      my @subsys=$fig->subsystems_for_peg($simpeg);
142      foreach my $ss (@subsys)
143      {
144       if (! defined $best_ss_score->{$$ss[0]}) {$best_ss_score->{$$ss[0]}=$simscore; $best_ss_id->{$$ss[0]}=$simpeg}
145       elsif ($best_ss_score->{$$ss[0]} > $simscore)
146       {
147        $best_ss_score->{$$ss[0]}=$simscore;
148        $best_ss_id->{$$ss[0]}=$simpeg;
149       }
150      }
151     }
152    
153     my $ssoutput=join "", (map {"$_ (".$best_ss_id->{$_}."), "} keys %$best_ss_id);
154    
155     $ssoutput =~ s/, $//;
156     return $ssoutput;
157    }
158    
159    =head2 tagvalue
160    
161     This will just check for tag value pairs and return either an array of values or a single ; separated list (if called as a scalar)
162    
163     e.g. $values=raelib->tagvalue($peg, "PIRSF"); print join "\n", @$values;
164    
165     Returns an empty array if no tag/value appropriate.
166    
167     Just because I use this a lot I don't want to waste rewriting it.
168    
169    =cut
170    
171    sub tagvalue {
172     my ($self, $peg, $tag)=@_;
173     my @return;
174     my @attr=$fig->feature_attributes($peg);
175     foreach my $attr (@attr) {
176      my ($gottag, $val, $link)=@$attr;
177      push @return, $val if ($gottag eq $tag);
178     }
179     return wantarray ? @return : join "; ", @return;
180    }
181    
182    =head2 locations_on_contig
183    
184    Return the locations of a sequence on a contig.
185    
186    This will look for exact matches to a sequence on a contig, and return a reference to an array that has all the locations.
187    
188    my $locations=$raelib->locations_on_contig($genome, $contig, 'GATC', undef);
189    foreach my $bp (@$locations) { ... do something ... }
190    
191    first argument  : genome number
192    second argument : contig name
193    third argument  : sequence to look for
194    fourth argument : beginning position to start looking from (can be undef)
195    fifth argument  : end position to stop looking from (can be undef)
196    sixth argument : check reverse complement (0 or undef will check forward, 1 or true will check rc)
197    
198    Note, the position is calculated before the sequence is rc'd
199    
200    =cut
201    
202    sub locations_on_contig {
203     my ($self, $genome, $contig, $sequence, $from, $to, $check_reverse)=@_;
204     my $return=[];
205    
206     # get the dna sequence of the contig, and make sure it is uppercase
207     my $contig_ln=$fig->contig_ln($genome, $contig);
208     return $return unless ($contig_ln);
209     unless ($from) {$from=1}
210     unless ($to) {$to=$contig_ln}
211     if ($from > $to) {($from, $to)=($to, $from)}
212     my $dna_seq=$fig->dna_seq($genome, $contig."_".$from."_".$to);
213     $dna_seq=uc($dna_seq);
214    
215     # if we want to check the rc, we actually rc the query
216     $sequence=$fig->reverse_comp($sequence) if ($check_reverse);
217     $sequence=uc($sequence);
218    
219     # now find all the matches
220     my $posn=index($dna_seq, $sequence, 0);
221     while ($posn > -1) {
222      push @$return, $posn;
223      $posn=index($dna_seq, $sequence, $posn+1);
224     }
225     return $return;
226    }
227    
228    
229    =head2 scrolling_org_list
230    
231    This is the list from index.cgi, that I call often. It has one minor modification: the value returned is solely the organisms id and does not contain genus_species information. I abstracted this here: 1, so I could call it often, and 2, so I could edit it once.
232    
233    use like this push @$html, $raelib->scrolling_org_list($cgi, $multiple);
234    
235    multiple selections will only be set if $multiple is true
236    
237    =cut
238    
239    sub scrolling_org_list {
240     my ($self, $cgi, $multiple)=@_;
241     unless ($multiple) {$multiple=0}
242    
243     my @display = ( 'All', 'Archaea', 'Bacteria', 'Eucarya', 'Viruses', 'Environmental samples' );
244    
245     #
246     #  Canonical names must match the keywords used in the DBMS.  They are
247     #  defined in compute_genome_counts.pl
248     #
249     my %canonical = (
250            'All'                   =>  undef,
251            'Archaea'               => 'Archaea',
252            'Bacteria'              => 'Bacteria',
253            'Eucarya'               => 'Eukaryota',
254            'Viruses'               => 'Virus',
255            'Environmental samples' => 'Environmental Sample'
256         );
257    
258     my $req_dom = $cgi->param( 'domain' ) || 'All';
259     my @domains = $cgi->radio_group( -name     => 'domain',
260                                         -default  => $req_dom,
261                                         -override => 1,
262                                         -values   => [ @display ]
263                                    );
264    
265     my $n_domain = 0;
266     my %dom_num = map { ( $_, $n_domain++ ) } @display;
267     my $req_dom_num = $dom_num{ $req_dom } || 0;
268    
269     #
270     #  Viruses and Environmental samples must have completeness = All (that is
271     #  how they are in the database).  Otherwise, default is Only "complete".
272     #
273     my $req_comp = ( $req_dom_num > $dom_num{ 'Eucarya' } ) ? 'All'
274                  : $cgi->param( 'complete' ) || 'Only "complete"';
275     my @complete = $cgi->radio_group( -name     => 'complete',
276                                       -default  => $req_comp,
277                                       -override => 1,
278                                        -values   => [ 'All', 'Only "complete"' ]
279                           );
280     #
281     #  Use $fig->genomes( complete, restricted, domain ) to get org list:
282     #
283     my $complete = ( $req_comp =~ /^all$/i ) ? undef : "complete";
284    
285     my $orgs; my $label;
286     @$orgs =  $fig->genomes( $complete, undef, $canonical{ $req_dom } );
287    
288     foreach (@$orgs) {
289       my $gs = $fig->genus_species($_);
290       my $gc=scalar $fig->all_contigs($_);
291       $label->{$_} = "$gs ($_) [$gc contigs]";
292      }
293    
294     @$orgs = sort {$label->{$a} cmp $label->{$b}} @$orgs;
295    
296     my $n_genomes = @$orgs;
297    
298     return (         "<TABLE>\n",
299                      "   <TR>\n",
300                      "      <TD>",
301                      $cgi->scrolling_list( -name     => 'korgs',
302                                            -values   => $orgs,
303                                            -labels   => $label,
304                                            -size     => 10,
305                                            -multiple => $multiple,
306                                          ), $cgi->br,
307                      "$n_genomes genomes shown ",
308                      $cgi->submit( 'Update List' ), $cgi->reset, $cgi->br,
309                      "      </TD>",
310                      "      <TD>",
311                      join( "<br>", "<b>Domain(s) to show:</b>", @domains), "<br>\n",
312                      join( "<br>", "<b>Completeness?</b>", @complete), "\n",
313                      "</TD>",
314                      "   </TR>\n",
315                      "</TABLE>\n",
316                      $cgi->hr
317            );
318    }
319    
320    
321    
322    

Legend:
Removed from v.1.2  
changed lines
  Added in v.1.7

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3