[Bio] / FigKernelPackages / raelib.pm Repository:
ViewVC logotype

Diff of /FigKernelPackages/raelib.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.4, Fri Feb 11 21:26:25 2005 UTC revision 1.11, Fri Apr 8 20:22:23 2005 UTC
# Line 14  Line 14 
14  use FIG;  use FIG;
15  my $fig=new FIG;  my $fig=new FIG;
16    
17    =head2 new
18    
19    Just instantiate the object and return $self
20    
21    =cut
22    
23    sub new {
24     my $self=shift;
25     return $self
26    }
27    
28    
29    
30    
31  =head2 features_on_contig  =head2 features_on_contig
32    
33   Returns a reference to an array containing all the features on a contig in a genome.   Returns a reference to an array containing all the features on a contig in a genome.
# Line 54  Line 68 
68    
69    
70    
71  =head2 pirsfcorrespondance  =head2 pirsfcorrespondence
72    
73   Generate the pirsf->fig id correspondance. This is only done once and the correspondance file is written.   Generate the pirsf->fig id correspondence. This is only done once and the correspondence file is written.
74   This is so that we can easily go back and forth.   This is so that we can easily go back and forth.
75    
76   The correspondance has PIR ID \t FIG ID\n, and is probably based on ftp://ftp.pir.georgetown.edu/pir_databases/pirsf/data/pirsfinfo.dat   The correspondence has PIR ID \t FIG ID\n, and is probably based on ftp://ftp.pir.georgetown.edu/pir_databases/pirsf/data/pirsfinfo.dat
77    
78     This method takes three arguments:
79       from    : pirsfinfo.dat file
80       to      : file to write information to
81       verbose : report on progress
82    
83     Returns the number of lines in the pirsinfo file that were read.
84    
85  =cut  =cut
86    
87  sub pirsfcorrespondance {  sub pirsfcorrespondence {
88   my ($self, $from, $to)=@_;   my ($self, $from, $to, $verbose)=@_;
89   die "File $from does not exist as called in $0" unless (-e $from);   unless (-e $from) {
90      print STDERR "File $from does not exist as called in $0\n";
91      return 0;
92     }
93   open (IN, $from) || die "Can't open $from";   open (IN, $from) || die "Can't open $from";
94   open (OUT, ">$to") || die "Can't write tot $to";   open (OUT, ">$to") || die "Can't write to $to";
95     my $linecount;
96   while (<IN>) {   while (<IN>) {
97      $linecount++;
98      unless ($linecount % 10000) {print STDERR "Correspondence of $linecount lines calculated\n"}
99    if (/^>/) {print OUT; next}    if (/^>/) {print OUT; next}
100    chomp;    chomp;
101    my $done;    my $done;
# Line 80  Line 107 
107   }   }
108   close IN;   close IN;
109   close OUT;   close OUT;
110     return $linecount;
111  }  }
112    
113    
# Line 159  Line 187 
187   my @return;   my @return;
188   my @attr=$fig->feature_attributes($peg);   my @attr=$fig->feature_attributes($peg);
189   foreach my $attr (@attr) {   foreach my $attr (@attr) {
190    my ($gottag, $val, $link)=@$attr;    my ($gotpeg, $gottag, $val, $link)=@$attr;
191    push @return, $val if ($gottag eq $tag);    push @return, $val if ($gottag eq $tag);
192   }   }
193   return wantarray ? @return : join "; ", @return;   return wantarray ? @return : join "; ", @return;
194  }  }
195    
196    =head2 locations_on_contig
197    
198    Return the locations of a sequence on a contig.
199    
200    This will look for exact matches to a sequence on a contig, and return a reference to an array that has all the locations.
201    
202    my $locations=$raelib->locations_on_contig($genome, $contig, 'GATC', undef);
203    foreach my $bp (@$locations) { ... do something ... }
204    
205    first argument  : genome number
206    second argument : contig name
207    third argument  : sequence to look for
208    fourth argument : beginning position to start looking from (can be undef)
209    fifth argument  : end position to stop looking from (can be undef)
210    sixth argument : check reverse complement (0 or undef will check forward, 1 or true will check rc)
211    
212    Note, the position is calculated before the sequence is rc'd
213    
214    =cut
215    
216    sub locations_on_contig {
217     my ($self, $genome, $contig, $sequence, $from, $to, $check_reverse)=@_;
218     my $return=[];
219    
220     # get the dna sequence of the contig, and make sure it is uppercase
221     my $contig_ln=$fig->contig_ln($genome, $contig);
222     return $return unless ($contig_ln);
223     unless ($from) {$from=1}
224     unless ($to) {$to=$contig_ln}
225     if ($from > $to) {($from, $to)=($to, $from)}
226     my $dna_seq=$fig->dna_seq($genome, $contig."_".$from."_".$to);
227     $dna_seq=uc($dna_seq);
228    
229     # if we want to check the rc, we actually rc the query
230     $sequence=$fig->reverse_comp($sequence) if ($check_reverse);
231     $sequence=uc($sequence);
232    
233     # now find all the matches
234     my $posn=index($dna_seq, $sequence, 0);
235     while ($posn > -1) {
236      push @$return, $posn;
237      $posn=index($dna_seq, $sequence, $posn+1);
238     }
239     return $return;
240    }
241    
242    
243    =head2 scrolling_org_list
244    
245    This is the list from index.cgi, that I call often. It has one minor modification: the value returned is solely the organisms id and does not contain genus_species information. I abstracted this here: 1, so I could call it often, and 2, so I could edit it once.
246    
247    use like this push @$html, $raelib->scrolling_org_list($cgi, $multiple);
248    
249    multiple selections will only be set if $multiple is true
250    
251    =cut
252    
253    sub scrolling_org_list {
254     my ($self, $cgi, $multiple)=@_;
255     unless ($multiple) {$multiple=0}
256    
257     my @display = ( 'All', 'Archaea', 'Bacteria', 'Eucarya', 'Viruses', 'Environmental samples' );
258    
259     #
260     #  Canonical names must match the keywords used in the DBMS.  They are
261     #  defined in compute_genome_counts.pl
262     #
263     my %canonical = (
264            'All'                   =>  undef,
265            'Archaea'               => 'Archaea',
266            'Bacteria'              => 'Bacteria',
267            'Eucarya'               => 'Eukaryota',
268            'Viruses'               => 'Virus',
269            'Environmental samples' => 'Environmental Sample'
270         );
271    
272     my $req_dom = $cgi->param( 'domain' ) || 'All';
273     my @domains = $cgi->radio_group( -name     => 'domain',
274                                         -default  => $req_dom,
275                                         -override => 1,
276                                         -values   => [ @display ]
277                                    );
278    
279     my $n_domain = 0;
280     my %dom_num = map { ( $_, $n_domain++ ) } @display;
281     my $req_dom_num = $dom_num{ $req_dom } || 0;
282    
283     #
284     #  Viruses and Environmental samples must have completeness = All (that is
285     #  how they are in the database).  Otherwise, default is Only "complete".
286     #
287     my $req_comp = ( $req_dom_num > $dom_num{ 'Eucarya' } ) ? 'All'
288                  : $cgi->param( 'complete' ) || 'Only "complete"';
289     my @complete = $cgi->radio_group( -name     => 'complete',
290                                       -default  => $req_comp,
291                                       -override => 1,
292                                        -values   => [ 'All', 'Only "complete"' ]
293                           );
294     #
295     #  Use $fig->genomes( complete, restricted, domain ) to get org list:
296     #
297     my $complete = ( $req_comp =~ /^all$/i ) ? undef : "complete";
298    
299     my $orgs; my $label;
300     @$orgs =  $fig->genomes( $complete, undef, $canonical{ $req_dom } );
301    
302     foreach (@$orgs) {
303       my $gs = $fig->genus_species($_);
304       my $gc=scalar $fig->all_contigs($_);
305       $label->{$_} = "$gs ($_) [$gc contigs]";
306      }
307    
308     @$orgs = sort {$label->{$a} cmp $label->{$b}} @$orgs;
309    
310     my $n_genomes = @$orgs;
311    
312     return (         "<TABLE>\n",
313                      "   <TR>\n",
314                      "      <TD>",
315                      $cgi->scrolling_list( -name     => 'korgs',
316                                            -values   => $orgs,
317                                            -labels   => $label,
318                                            -size     => 10,
319                                            -multiple => $multiple,
320                                          ), $cgi->br,
321                      "$n_genomes genomes shown ",
322                      $cgi->submit( 'Update List' ), $cgi->reset, $cgi->br,
323                      "      </TD>",
324                      "      <TD>",
325                      join( "<br>", "<b>Domain(s) to show:</b>", @domains), "<br>\n",
326                      join( "<br>", "<b>Completeness?</b>", @complete), "\n",
327                      "</TD>",
328                      "   </TR>\n",
329                      "</TABLE>\n",
330                      $cgi->hr
331            );
332    }
333    
334    
335    
336    
337    

Legend:
Removed from v.1.4  
changed lines
  Added in v.1.11

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3