[Bio] / FigKernelPackages / raelib.pm Repository:
ViewVC logotype

Diff of /FigKernelPackages/raelib.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.2, Mon Jan 31 16:58:38 2005 UTC revision 1.11, Fri Apr 8 20:22:23 2005 UTC
# Line 14  Line 14 
14  use FIG;  use FIG;
15  my $fig=new FIG;  my $fig=new FIG;
16    
17  =head2 pirsfcorrespondance  =head2 new
18    
19   Generate the pirsf->fig id correspondance. This is only done once and the correspondance file is written.  Just instantiate the object and return $self
20    
21    =cut
22    
23    sub new {
24     my $self=shift;
25     return $self
26    }
27    
28    
29    
30    
31    =head2 features_on_contig
32    
33     Returns a reference to an array containing all the features on a contig in a genome.
34    
35     use:
36    
37     my $arrayref=$rae->features_on_contig($genome, $contig);
38    
39     or
40    
41     foreach my $peg (@{$rae->features_on_contig($genome, $contig)}) {
42      ... blah blah ...
43     }
44    
45     returns undef if contig is not a part of genome or there is nothing to return, otherwise returns a list of pegs
46    
47     v. experimental and guaranteed not to work!
48    
49    =cut
50    
51    sub features_on_contig {
52     my ($self, $genome, $contig)=@_;
53     # were this in FIG.pm you'd use this line:
54     #my $rdbH = $self->db_handle;
55    
56     my $rdbH = $fig->db_handle;
57     my $relational_db_response=$rdbH->SQL('SELECT id FROM features WHERE  (genome = \'' . $genome . '\' AND  location ~* \'' . $contig . '\')');
58     # this is complicated. A reference to an array of references to arrays, and we only want the first element.
59     # simplify.
60     my @results;
61     foreach my $res (@$relational_db_response) {push @results, $res->[0]}
62     return \@results;
63    }
64    
65    
66    
67    
68    
69    
70    
71    =head2 pirsfcorrespondence
72    
73     Generate the pirsf->fig id correspondence. This is only done once and the correspondence file is written.
74   This is so that we can easily go back and forth.   This is so that we can easily go back and forth.
75    
76   The correspondance has PIR ID \t FIG ID\n, and is probably based on ftp://ftp.pir.georgetown.edu/pir_databases/pirsf/data/pirsfinfo.dat   The correspondence has PIR ID \t FIG ID\n, and is probably based on ftp://ftp.pir.georgetown.edu/pir_databases/pirsf/data/pirsfinfo.dat
77    
78     This method takes three arguments:
79       from    : pirsfinfo.dat file
80       to      : file to write information to
81       verbose : report on progress
82    
83     Returns the number of lines in the pirsinfo file that were read.
84    
85  =cut  =cut
86    
87  sub pirsfcorrespondance {  sub pirsfcorrespondence {
88   my ($self, $from, $to)=@_;   my ($self, $from, $to, $verbose)=@_;
89   die "File $from does not exist as called in $0" unless (-e $from);   unless (-e $from) {
90      print STDERR "File $from does not exist as called in $0\n";
91      return 0;
92     }
93   open (IN, $from) || die "Can't open $from";   open (IN, $from) || die "Can't open $from";
94   open (OUT, ">$to") || die "Can't write tot $to";   open (OUT, ">$to") || die "Can't write to $to";
95     my $linecount;
96   while (<IN>) {   while (<IN>) {
97      $linecount++;
98      unless ($linecount % 10000) {print STDERR "Correspondence of $linecount lines calculated\n"}
99    if (/^>/) {print OUT; next}    if (/^>/) {print OUT; next}
100    chomp;    chomp;
101    my $done;    my $done;
# Line 40  Line 107 
107   }   }
108   close IN;   close IN;
109   close OUT;   close OUT;
110     return $linecount;
111  }  }
112    
113    
# Line 53  Line 121 
121  sub ss_by_id {  sub ss_by_id {
122   my ($self, $peg)=@_;   my ($self, $peg)=@_;
123   my $ssout;   my $ssout;
  print STDERR "RAELIB Looking for ss from $peg\n";  
124   foreach my $ss (sort $fig->subsystems_for_peg($peg))   foreach my $ss (sort $fig->subsystems_for_peg($peg))
125   {   {
126    next if ($$ss[0] =~ /essential/i); # Ignore the Essential B-subtilis subsystems    next if ($$ss[0] =~ /essential/i); # Ignore the Essential B-subtilis subsystems
# Line 63  Line 130 
130   return $ssout;   return $ssout;
131  }  }
132    
133    =head2 ss_by_homol
134    
135     Generate a list of subsystems that homologs of a peg occur in. This is a ; separated list.
136     This is also a wrapper around sims and ss, but makes everything unified
137    
138    =cut
139    
140    sub ss_by_homol {
141     my ($self, $peg)=@_;
142     return unless ($peg);
143     my ($maxN, $maxP)=(50, 1e-20);
144    
145     # find the sims
146     my @sims=$fig->sims($peg, $maxN, $maxP, 'fig');
147    
148     # we are only going to keep the best hit for each peg
149     # in a subsystem
150     my $best_ss_score; my $best_ss_id;
151     foreach my $sim (@sims)
152     {
153      my $simpeg=$$sim[1];
154      my $simscore=$$sim[10];
155      my @subsys=$fig->subsystems_for_peg($simpeg);
156      foreach my $ss (@subsys)
157      {
158       if (! defined $best_ss_score->{$$ss[0]}) {$best_ss_score->{$$ss[0]}=$simscore; $best_ss_id->{$$ss[0]}=$simpeg}
159       elsif ($best_ss_score->{$$ss[0]} > $simscore)
160       {
161        $best_ss_score->{$$ss[0]}=$simscore;
162        $best_ss_id->{$$ss[0]}=$simpeg;
163       }
164      }
165     }
166    
167     my $ssoutput=join "", (map {"$_ (".$best_ss_id->{$_}."), "} keys %$best_ss_id);
168    
169     $ssoutput =~ s/, $//;
170     return $ssoutput;
171    }
172    
173    =head2 tagvalue
174    
175     This will just check for tag value pairs and return either an array of values or a single ; separated list (if called as a scalar)
176    
177     e.g. $values=raelib->tagvalue($peg, "PIRSF"); print join "\n", @$values;
178    
179     Returns an empty array if no tag/value appropriate.
180    
181     Just because I use this a lot I don't want to waste rewriting it.
182    
183    =cut
184    
185    sub tagvalue {
186     my ($self, $peg, $tag)=@_;
187     my @return;
188     my @attr=$fig->feature_attributes($peg);
189     foreach my $attr (@attr) {
190      my ($gotpeg, $gottag, $val, $link)=@$attr;
191      push @return, $val if ($gottag eq $tag);
192     }
193     return wantarray ? @return : join "; ", @return;
194    }
195    
196    =head2 locations_on_contig
197    
198    Return the locations of a sequence on a contig.
199    
200    This will look for exact matches to a sequence on a contig, and return a reference to an array that has all the locations.
201    
202    my $locations=$raelib->locations_on_contig($genome, $contig, 'GATC', undef);
203    foreach my $bp (@$locations) { ... do something ... }
204    
205    first argument  : genome number
206    second argument : contig name
207    third argument  : sequence to look for
208    fourth argument : beginning position to start looking from (can be undef)
209    fifth argument  : end position to stop looking from (can be undef)
210    sixth argument : check reverse complement (0 or undef will check forward, 1 or true will check rc)
211    
212    Note, the position is calculated before the sequence is rc'd
213    
214    =cut
215    
216    sub locations_on_contig {
217     my ($self, $genome, $contig, $sequence, $from, $to, $check_reverse)=@_;
218     my $return=[];
219    
220     # get the dna sequence of the contig, and make sure it is uppercase
221     my $contig_ln=$fig->contig_ln($genome, $contig);
222     return $return unless ($contig_ln);
223     unless ($from) {$from=1}
224     unless ($to) {$to=$contig_ln}
225     if ($from > $to) {($from, $to)=($to, $from)}
226     my $dna_seq=$fig->dna_seq($genome, $contig."_".$from."_".$to);
227     $dna_seq=uc($dna_seq);
228    
229     # if we want to check the rc, we actually rc the query
230     $sequence=$fig->reverse_comp($sequence) if ($check_reverse);
231     $sequence=uc($sequence);
232    
233     # now find all the matches
234     my $posn=index($dna_seq, $sequence, 0);
235     while ($posn > -1) {
236      push @$return, $posn;
237      $posn=index($dna_seq, $sequence, $posn+1);
238     }
239     return $return;
240    }
241    
242    
243    =head2 scrolling_org_list
244    
245    This is the list from index.cgi, that I call often. It has one minor modification: the value returned is solely the organisms id and does not contain genus_species information. I abstracted this here: 1, so I could call it often, and 2, so I could edit it once.
246    
247    use like this push @$html, $raelib->scrolling_org_list($cgi, $multiple);
248    
249    multiple selections will only be set if $multiple is true
250    
251    =cut
252    
253    sub scrolling_org_list {
254     my ($self, $cgi, $multiple)=@_;
255     unless ($multiple) {$multiple=0}
256    
257     my @display = ( 'All', 'Archaea', 'Bacteria', 'Eucarya', 'Viruses', 'Environmental samples' );
258    
259     #
260     #  Canonical names must match the keywords used in the DBMS.  They are
261     #  defined in compute_genome_counts.pl
262     #
263     my %canonical = (
264            'All'                   =>  undef,
265            'Archaea'               => 'Archaea',
266            'Bacteria'              => 'Bacteria',
267            'Eucarya'               => 'Eukaryota',
268            'Viruses'               => 'Virus',
269            'Environmental samples' => 'Environmental Sample'
270         );
271    
272     my $req_dom = $cgi->param( 'domain' ) || 'All';
273     my @domains = $cgi->radio_group( -name     => 'domain',
274                                         -default  => $req_dom,
275                                         -override => 1,
276                                         -values   => [ @display ]
277                                    );
278    
279     my $n_domain = 0;
280     my %dom_num = map { ( $_, $n_domain++ ) } @display;
281     my $req_dom_num = $dom_num{ $req_dom } || 0;
282    
283     #
284     #  Viruses and Environmental samples must have completeness = All (that is
285     #  how they are in the database).  Otherwise, default is Only "complete".
286     #
287     my $req_comp = ( $req_dom_num > $dom_num{ 'Eucarya' } ) ? 'All'
288                  : $cgi->param( 'complete' ) || 'Only "complete"';
289     my @complete = $cgi->radio_group( -name     => 'complete',
290                                       -default  => $req_comp,
291                                       -override => 1,
292                                        -values   => [ 'All', 'Only "complete"' ]
293                           );
294     #
295     #  Use $fig->genomes( complete, restricted, domain ) to get org list:
296     #
297     my $complete = ( $req_comp =~ /^all$/i ) ? undef : "complete";
298    
299     my $orgs; my $label;
300     @$orgs =  $fig->genomes( $complete, undef, $canonical{ $req_dom } );
301    
302     foreach (@$orgs) {
303       my $gs = $fig->genus_species($_);
304       my $gc=scalar $fig->all_contigs($_);
305       $label->{$_} = "$gs ($_) [$gc contigs]";
306      }
307    
308     @$orgs = sort {$label->{$a} cmp $label->{$b}} @$orgs;
309    
310     my $n_genomes = @$orgs;
311    
312     return (         "<TABLE>\n",
313                      "   <TR>\n",
314                      "      <TD>",
315                      $cgi->scrolling_list( -name     => 'korgs',
316                                            -values   => $orgs,
317                                            -labels   => $label,
318                                            -size     => 10,
319                                            -multiple => $multiple,
320                                          ), $cgi->br,
321                      "$n_genomes genomes shown ",
322                      $cgi->submit( 'Update List' ), $cgi->reset, $cgi->br,
323                      "      </TD>",
324                      "      <TD>",
325                      join( "<br>", "<b>Domain(s) to show:</b>", @domains), "<br>\n",
326                      join( "<br>", "<b>Completeness?</b>", @complete), "\n",
327                      "</TD>",
328                      "   </TR>\n",
329                      "</TABLE>\n",
330                      $cgi->hr
331            );
332    }
333    
334    
335    
336    

Legend:
Removed from v.1.2  
changed lines
  Added in v.1.11

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3