[Bio] / SeedViewer / MetagenomeAnalysis.pm Repository:
ViewVC logotype

Diff of /SeedViewer/MetagenomeAnalysis.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.2, Thu Apr 10 14:46:51 2008 UTC revision 1.3, Fri Apr 18 20:09:19 2008 UTC
# Line 12  Line 12 
12    
13    
14    
15    use constant QUERY_DEFAULTS =>
16      { 1 => { evalue => '1e-05', align_len => 50 }, # RDP
17        2 => { evalue => '0.01' }, # SEED
18        3 => { evalue => '1e-05', align_len => 50 }, # Greengenes
19        4 => { evalue => '1e-05', align_len => 50 }, # LSU
20        5 => { evalue => '1e-05', align_len => 50 }, # SSU
21        6 => { evalue => '0.01' }, # Subsystem
22      };
23    
24    
25    
26    
27  sub new {  sub new {
28    my ($class, $job) = @_;    my ($class, $job) = @_;
# Line 43  Line 54 
54    my $self = { job => $job,    my $self = { job => $job,
55                 dbh => $dbh,                 dbh => $dbh,
56                 key2taxa => undef,                 key2taxa => undef,
57                   query => {},
58               };               };
59    bless $self, $class;    bless $self, $class;
60    
# Line 131  Line 143 
143  }  }
144    
145    
146    #******************************************************************************
147    #* MANAGING QUERY CRITERIA
148    #******************************************************************************
149    
150    =pod
151    
152    =item * B<query_evalue> (I<evalue>)
153    
154    Set/get the expectation value which is currently used to query the database.
155    Parameter I<evalue> has to be a float or in '1e-5'-like format or undef.
156    
157    =cut
158    
159    sub query_evalue {
160      if(scalar(@_)>1) {
161        $_[0]->{query}->{evalue} = $_[1];
162      }
163      return $_[0]->{query}->{evalue};
164    }
165    
166    
167    =pod
168    
169    =item * B<query_bitscore> (I<score>)
170    
171    Set/get the bitscore which is currently used to query the database.
172    Parameter I<score> has to be a float or undef.
173    
174    =cut
175    
176    sub query_bitscore {
177      if(scalar(@_)>1) {
178        $_[0]->{query}->{bitscore} = $_[1];
179      }
180      return $_[0]->{query}->{bitscore};
181    }
182    
183    
184    =pod
185    
186    =item * B<query_align_len> (I<length>)
187    
188    Set/get the minimum alignment which is currently used to query the database.
189    Parameter I<length> has to be a positive integer or undef.
190    
191    =cut
192    
193    sub query_align_len {
194      if(scalar(@_)>1) {
195        if($_[1] and $_[1]<0) {
196          die "Alignment length has to be positive: ".$_[1];
197        }
198        $_[0]->{query}->{align_len} = $_[1];
199      }
200      return $_[0]->{query}->{align_len};
201    }
202    
203    
204    =pod
205    
206    =item * B<query_identity> (I<percent>)
207    
208    Set/get the minimum percent identity which is currently used to query the database.
209    Parameter I<percent> has to be a number in 0..100 or undef.
210    
211    =cut
212    
213    sub query_identity {
214      if(scalar(@_)>1) {
215        if($_[1] and ($_[1]<0 or $_[1]>100)) {
216          die "Identity has to be between 0 and 100: ".$_[1];
217        }
218        $_[0]->{query}->{identity} = $_[1];
219      }
220      return $_[0]->{query}->{identity};
221    }
222    
223    
224    =pod
225    
226    =item * B<query_load_from_cgi> (I<cgi>, [I<dataset>])
227    
228    Sets all query parameter to the values provided in the CGI query object I<cgi>.
229    This method recognises 'evalue', 'pvalue' (bitscore), 'alignment_length' and
230    'percent_identity' as query criteria. Any missing param will be set to undef.
231    If the optional parameter I<dataset> is set to one of the accepted datasets
232    (db types), the method will additionally load the defaults for this type into
233    the CGI object.
234    
235    
236    =cut
237    
238    sub query_load_from_cgi {
239      my ($self, $cgi, $dataset) = @_;
240    
241      unless(ref $cgi and $cgi->isa("CGI")) {
242        die "Query load from cgi requires a valid CGI object.";
243      }
244    
245      # load the defaults if necessary
246      if($dataset and $self->get_dataset_id($dataset)) {
247    
248        my $d = $self->get_dataset_id($dataset);
249    
250        my @v = qw( evalue bitscore align_len identity );
251        foreach my $v (@v) {
252          if(!defined($cgi->param($v)) and QUERY_DEFAULTS->{$d}->{$v}) {
253            $cgi->param($v, QUERY_DEFAULTS->{$d}->{$v});
254          }
255        }
256      }
257    
258      # set the query params
259      my $evalue = $cgi->param('evalue') || '';
260      $self->query_evalue($evalue);
261    
262      my $bitscore = $cgi->param('bitscore') || '';
263      $self->query_bitscore($bitscore);
264    
265      my $align_len = $cgi->param('align_len') || '';
266      $self->query_align_len($align_len);
267    
268      my $identity = $cgi->param('identity') || '';
269      $self->query_identity($identity);
270    
271      return $self;
272    
273    }
274    
275    
276    =pod
277    
278    =item * B<get_where_clause> ()
279    
280    Returns for the current query parameters the where clause as applicable to the
281    tax_sim_XYZ table SQL queries. The method will take care of all conversions to
282    eg the logscore evalues.
283    
284    =cut
285    
286    sub get_where_clause {
287      my ($self) = @_;
288    
289      my @params;
290    
291      if($self->{query}->{evalue}) {
292        push @params, "logpsc<=".$self->evalue2log($self->{query}->{evalue});
293      }
294    
295      if($self->{query}->{bitscore}) {
296        push @params, "bsc>=".$self->{query}->{bitscore};
297      }
298    
299      if($self->{query}->{align_len}) {
300        push @params, "ali_ln>=".$self->{query}->{align_len};
301      }
302    
303      if($self->{query}->{identity}) {
304        push @params, "iden>=".$self->{query}->{identity};
305      }
306    
307      return join(' and ', @params);
308    
309    }
310    
311    
312    
313    #******************************************************************************
314    #* OTHER
315    #******************************************************************************
316    
317    
318    
319    
320    
321    =pod
322    
323  =item * B<get_sequence> (I<sequence_id>)  =item * B<get_sequence> (I<sequence_id>)
324    
325  Retrieve the sequence I<sequence_id> from the metagenome job directory.  Retrieve the sequence I<sequence_id> from the metagenome job directory.
# Line 159  Line 348 
348  }  }
349    
350    
351  =item * B<get_hits_count> (I<dataset_name>, I<where_clause>)  =pod
352    
353    =item * B<get_hits_count> (I<dataset_name>)
354    
355  Given a dataset name (db_id) and optional a where clause, this method returns  Given a dataset name (db_id), this method returns
356  the total number of sequences that contain a hit.  the total number of sequences that contain a hit.
357    
358  =cut  =cut
359    
360  sub get_hits_count {  sub get_hits_count {
361    my ($self, $dataset, $where) = @_;    my ($self, $dataset) = @_;
362    
363    my $table = $self->dbtable;    my $table = $self->dbtable;
364    my $dbid  = $self->get_dataset_id($dataset);    my $dbid  = $self->get_dataset_id($dataset);
365    $where = '1' unless ($where);    my $where = $self->get_where_clause();
366      $where = ($where) ? "and $where" : '';
367    
368    my $sth = $self->dbh->prepare("select count(*) from ( select id1, min(rank_psc) from $table where dbid=$dbid and $where group by id1) as b");    my $sth = $self->dbh->prepare("select count(*) from ( select id1, min(rank_psc) from $table where dbid=$dbid $where group by id1) as b");
369    $sth->execute;    $sth->execute;
370    my ($result) = $sth->fetchrow_array;    my ($result) = $sth->fetchrow_array;
371    
# Line 182  Line 374 
374  }  }
375    
376    
377    =pod
378    
379    =item * B<get_group_counts> (I<dataset_name>, [I<group>, I<filter1>, I<filter2>])
380    
381    Given a dataset name (db_id), this method returns the total counts for all
382    taxonomy groups of a certain depth which are hit. If no group name I<group>
383    was given, the method returns counts for tax_group_1.
384    Optionally, I<group> may be 'tax_group_2' or 'tax_group_3' and in that case
385    any optional provided filters I<filter1> and I<filter2> will be applied to
386    the column 'tax_group_1' and 'tax_group_2' respectively.
387    
388    =cut
389    
390    sub get_group_counts {
391      my ($self, $dataset, $group, $filter1, $filter2) = @_;
392    
393      my $table = $self->dbtable;
394      my $dbid  = $self->get_dataset_id($dataset);
395      my $where = $self->get_where_clause();
396      $where = ($where) ? "and $where" : '';
397      $group = 'tax_group_1' unless($group);
398    
399      my @filters;
400      push @filters, "tax_group_1='$filter1'" if($filter1);
401      push @filters, "tax_group_2='$filter2'" if($filter2);
402      my $filter = (scalar(@filters)) ? 'and '.join(' and ', @filters) : '';
403    
404      my $sth = $self->dbh->prepare("select s.$group as tax, count(*) as num from ( select id1, min(rank_psc) as rank from $table where dbid=$dbid $where group by id1) as b inner join $table as s on b.id1=s.id1 and b.rank=s.rank_psc where dbid=$dbid $filter group by s.$group");
405      $sth->execute;
406      my $result = $sth->fetchall_arrayref();
407    
408      return $result;
409    
410    }
411    
412    
413    =pod
414    
415    =item * B<get_taxa_counts> (I<dataset_name>)
416    
417    Given a dataset name (db_id), this method returns the total counts for all
418    taxonomy strings which are hit.
419    
420    =cut
421    
422    sub get_taxa_counts {
423      my ($self, $dataset) = @_;
424    
425      my $table = $self->dbtable;
426      my $dbid  = $self->get_dataset_id($dataset);
427      my $where = $self->get_where_clause();
428      $where = ($where) ? "and $where" : '';
429    
430      my $sth = $self->dbh->prepare("select s.tax_str as tax, count(*) as num from ( select id1, min(rank_psc) as rank from $table where dbid=$dbid $where group by id1) as b inner join $table as s on b.id1=s.id1 and b.rank=s.rank_psc where dbid=$dbid group by s.tax_str");
431    
432      $sth->execute;
433      my $result = $sth->fetchall_arrayref();
434    
435      return $result;
436    
437    }
438    
439    
440    =pod
441    
442    =item * B<get_subsystem_counts> (I<dataset_name>)
443    
444    Given a dataset name (db_id), this method returns the total counts for all
445    subsystems which are hit.
446    
447    =cut
448    
449    sub get_subsystem_counts {
450      my ($self, $dataset) = @_;
451    
452      my $table = $self->dbtable;
453      my $dbid  = $self->get_dataset_id($dataset);
454      my $where = $self->get_where_clause();
455      $where = ($where) ? "and $where" : '';
456    
457      my $sth = $self->dbh->prepare("select s.tax_group_1, s.tax_group_2, s.tax_group_3, s.tax_str, count(*) as num from ( select id1, min(rank_psc) as rank from $table where dbid=$dbid $where group by id1) as b inner join $table as s on b.id1=s.id1 and b.rank=s.rank_psc where dbid=$dbid group by s.tax_group_1, s.tax_group_2, s.tax_group_3");
458    
459      $sth->execute;
460      my $result = $sth->fetchall_arrayref();
461    
462      return $result;
463    
464    }
465    
466    
467    =pod
468    
469    =item * B<get_sequence_subset> (I<dataset_name>, I<filter>)
470    
471    Given a dataset name (db_id), this method returns all sequence ids,
472    the alignment length, the match id and the taxonomy string for all
473    sequences which match the criteria and have their tax_str start with
474    the filter string I<filter>.
475    
476    =cut
477    
478    sub get_sequence_subset {
479      my ($self, $dataset, $filter) = @_;
480    
481      my $table = $self->dbtable;
482      my $dbid  = $self->get_dataset_id($dataset);
483      my $where = $self->get_where_clause();
484      $where = ($where) ? "and $where" : '';
485    
486      my $sth = $self->dbh->prepare("select s.id1, s.ali_ln, s.id2, s.tax_str from ( select id1, min(rank_psc) as rank from $table where dbid=$dbid $where group by id1) as b inner join $table as s on b.id1=s.id1 and b.rank=s.rank_psc where dbid=$dbid and s.tax_str like '$filter%'");
487    
488      $sth->execute;
489      my $result = $sth->fetchall_arrayref();
490    
491      return $result;
492    
493    }
494    
495    
496    =pod
497    
498  =item * B<get_align_len_range> (I<dataset_name>)  =item * B<get_align_len_range> (I<dataset_name>)
499    
500  Given a dataset name (db_id), this method returns  Given a dataset name (db_id), this method returns
# Line 195  Line 508 
508    my $table = $self->dbtable;    my $table = $self->dbtable;
509    my $dbid  = $self->get_dataset_id($dataset);    my $dbid  = $self->get_dataset_id($dataset);
510    
511    my $sth = $self->dbh->prepare("select min(ali_ln), max(ali_ln) from $table");    my $sth = $self->dbh->prepare("select min(ali_ln), max(ali_ln) from $table where dbid=$dbid");
512    $sth->execute;    $sth->execute;
513    my ($min, $max) = $sth->fetchrow_array;    my ($min, $max) = $sth->fetchrow_array;
514    

Legend:
Removed from v.1.2  
changed lines
  Added in v.1.3

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3