[Bio] / SeedViewer / MetagenomeAnalysis.pm Repository:
ViewVC logotype

Diff of /SeedViewer/MetagenomeAnalysis.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.8, Wed May 7 20:31:19 2008 UTC revision 1.14, Thu Jun 12 15:18:36 2008 UTC
# Line 7  Line 7 
7    
8  use FIG_Config;  use FIG_Config;
9  use DBI;  use DBI;
10    use Data::Dumper;
11    
12  1;  1;
13    
   
   
14  use constant QUERY_DEFAULTS =>  use constant QUERY_DEFAULTS =>
15    { 1 => { evalue => '1e-05', align_len => 50 }, # RDP    { 1 => { evalue => '1e-05', align_len => 50 }, # RDP
16      2 => { evalue => '0.01' }, # SEED      2 => { evalue => '0.01' }, # SEED
# Line 21  Line 20 
20      6 => { evalue => '0.01' }, # Subsystem      6 => { evalue => '0.01' }, # Subsystem
21    };    };
22    
   
   
   
23  sub new {  sub new {
24    my ($class, $job) = @_;    my ($class, $job) = @_;
25    
# Line 70  Line 66 
66                 dbh => $dbh,                 dbh => $dbh,
67                 key2taxa => undef,                 key2taxa => undef,
68                 query => {},                 query => {},
69                       dbid_cache => {},
70               };               };
71    bless $self, $class;    bless $self, $class;
72    
# Line 80  Line 77 
77    
78  }  }
79    
   
   
   
80  sub job {  sub job {
81    return $_[0]->{job};    return $_[0]->{job};
82  }  }
# Line 100  Line 94 
94    return $_[0]->{dbtable};    return $_[0]->{dbtable};
95  }  }
96    
97    sub dbtable_best_psc {
98      unless (defined $_[0]->{dbtable}) {
99        $_[0]->{dbtable} = 'tax_sim_best_by_psc_'.$_[0]->job->id;
100      }
101      return $_[0]->{dbtable};
102    }
103    
104    sub dbtable_best_iden {
105      unless (defined $_[0]->{dbtable}) {
106        $_[0]->{dbtable} = 'tax_sim_best_by_iden_'.$_[0]->job->id;
107      }
108      return $_[0]->{dbtable};
109    }
110    
111  sub get_key2taxa_mapping {  sub get_key2taxa_mapping {
112    
# Line 144  Line 151 
151  }  }
152    
153    
154    #
155    # Determine the correct dbid for this job. Use sims.database_list
156    # to find the version that the analysis was run with.
157    #
158  sub get_dataset_id {  sub get_dataset_id {
159    # mapping contained in table db_type      my($self, $dataset) = @_;
   # unstable, changes every time  
   my $db_ids = { 'SEED' => 2,  
                  'Greengenes' => 3,  
                  'RDP' => 1,  
                  'LSU' => 4,  
                  'SSU' => 5,  
                  'Subsystem' => 6,  
                };  
   return $db_ids->{ $_[1] } || undef;  
 }  
160    
161        my $id = $self->{dbid_cache}->{$dataset};
162        return $id if defined($id);
163    
164        my($dbname, $type) = split(/:/, $dataset);
165    
166        my $dbs = $self->job->metaxml->get_metadata('sims.database_list');
167    
168        my @this = grep { $_->{name} eq $dbname } @$dbs;
169        if (@this)
170        {
171            my $vers = $this[0]->{version};
172    
173            #
174            # Now we can find the dbid ni the database.
175            #
176            my $res = $self->dbh->selectcol_arrayref(qq(SELECT dbid
177                                                        FROM seq_db
178                                                        WHERE name = ? AND version = ?
179                                                            AND tax_db_name = ?), undef,
180                                                     $dbname, $vers, $type);
181            if (@$res)
182            {
183                #print STDERR "Found @$res for $dbname $type $vers\n";
184                $id = $res->[0];
185                $self->{dbid_cache}->{$dataset} = $id;
186                return $id;
187            }
188            print STDERR "Did not find anything for dataset='$dataset' '$dbname' '$type' '$vers'\n";
189        }
190        print STDERR "did not find a vers for dataset='$dataset' $dbname $type\n" . Dumper($dbs);
191    }
192    
193  #******************************************************************************  #******************************************************************************
194  #* MANAGING QUERY CRITERIA  #* MANAGING QUERY CRITERIA
# Line 332  Line 364 
364  #******************************************************************************  #******************************************************************************
365    
366    
   
   
   
367  =pod  =pod
368    
369  =item * B<get_sequence> (I<sequence_id>)  =item * B<get_sequence> (I<sequence_id>)
# Line 377  Line 406 
406  sub get_hits_count {  sub get_hits_count {
407    my ($self, $dataset) = @_;    my ($self, $dataset) = @_;
408    
409    my $table = $self->dbtable;    my $table = $self->dbtable_best_psc;
410    my $dbid  = $self->get_dataset_id($dataset);    my $dbid  = $self->get_dataset_id($dataset);
411    my $where = $self->get_where_clause();    my $where = $self->get_where_clause();
412    $where = ($where) ? "and $where" : '';    $where = ($where) ? "and $where" : '';
413    
414    my $sth = $self->dbh->prepare("select count(*) from ( select id1, min(rank_psc) from $table where dbid=$dbid $where group by id1) as b");    my $sth = $self->dbh->prepare("select count(distinct id1) from  $table where dbid=$dbid $where");
415    $sth->execute;    $sth->execute;
416    my ($result) = $sth->fetchrow_array;    my ($result) = $sth->fetchrow_array;
417    
# Line 407  Line 436 
436  sub get_group_counts {  sub get_group_counts {
437    my ($self, $dataset, $group, $filter1, $filter2) = @_;    my ($self, $dataset, $group, $filter1, $filter2) = @_;
438    
439    my $table = $self->dbtable;    my $table = $self->dbtable_best_psc;
440    my $dbid  = $self->get_dataset_id($dataset);    my $dbid  = $self->get_dataset_id($dataset);
441    my $where = $self->get_where_clause();    my $where = $self->get_where_clause();
442    $where = ($where) ? "and $where" : '';    $where = ($where) ? "and $where" : '';
# Line 418  Line 447 
447    push @filters, "tax_group_2='$filter2'" if($filter2);    push @filters, "tax_group_2='$filter2'" if($filter2);
448    my $filter = (scalar(@filters)) ? 'and '.join(' and ', @filters) : '';    my $filter = (scalar(@filters)) ? 'and '.join(' and ', @filters) : '';
449    
450    my $sth = $self->dbh->prepare("select s.$group as tax, count(*) as num from ( select id1, min(rank_psc) as rank from $table where dbid=$dbid $where group by id1) as b inner join $table as s on b.id1=s.id1 and b.rank=s.rank_psc where dbid=$dbid $filter group by s.$group");    my $sth = $self->dbh->prepare("select $group as tax, count(*) as num from $table where dbid=$dbid $where $filter group by tax");
451    $sth->execute;    $sth->execute;
452    my $result = $sth->fetchall_arrayref();    my $result = $sth->fetchall_arrayref();
453    
454      print STDERR "get_group_counts: ds=$dataset group=$group filter1=$filter1 filter2=$filter2\n";
455      print STDERR Dumper($result);
456    return $result;    return $result;
457    
458  }  }
# Line 439  Line 470 
470  sub get_taxa_counts {  sub get_taxa_counts {
471    my ($self, $dataset) = @_;    my ($self, $dataset) = @_;
472    
473    my $table = $self->dbtable;    my $table = $self->dbtable_best_psc;
474    my $dbid  = $self->get_dataset_id($dataset);    my $dbid  = $self->get_dataset_id($dataset);
475    my $where = $self->get_where_clause();    my $where = $self->get_where_clause();
476    $where = ($where) ? "and $where" : '';    $where = ($where) ? "and $where" : '';
477    
478    my $sth = $self->dbh->prepare("select s.tax_str as tax, count(*) as num from ( select id1, min(rank_psc) as rank from $table where dbid=$dbid $where group by id1) as b inner join $table as s on b.id1=s.id1 and b.rank=s.rank_psc where dbid=$dbid group by s.tax_str");    my $sth = $self->dbh->prepare("select tax_str as tax, count(*) from $table where dbid=$dbid group by tax");
479    
480    $sth->execute;    $sth->execute;
481    my $result = $sth->fetchall_arrayref();    my $result = $sth->fetchall_arrayref();
# Line 466  Line 497 
497  sub get_subsystem_counts {  sub get_subsystem_counts {
498    my ($self, $dataset) = @_;    my ($self, $dataset) = @_;
499    
500    my $table = $self->dbtable;    my $table = $self->dbtable_best_psc;
501    my $dbid  = $self->get_dataset_id($dataset);    my $dbid  = $self->get_dataset_id($dataset);
502    my $where = $self->get_where_clause();    my $where = $self->get_where_clause();
503    $where = ($where) ? "and $where" : '';    $where = ($where) ? "and $where" : '';
504    
505    #print STDERR "select s.tax_group_1, s.tax_group_2, s.tax_group_3, s.tax_str, count(*) as num from ( select id1, min(rank_psc) as rank from $table where dbid=$dbid $where group by id1) as b inner join $table as s on b.id1=s.id1 and b.rank=s.rank_psc where dbid=$dbid group by s.tax_group_1, s.tax_group_2, s.tax_group_3";    my $sth = $self->dbh->prepare("select tax_group_1, tax_group_2, tax_group_3, tax_str, count(*) as num from $table where dbid=$dbid group by tax_group_1, tax_group_2, tax_group_3, tax_str");
  #die;  
   my $sth = $self->dbh->prepare("select s.tax_group_1, s.tax_group_2, s.tax_group_3, s.tax_str, count(*) as num from ( select id1, min(rank_psc) as rank from $table where dbid=$dbid $where group by id1) as b inner join $table as s on b.id1=s.id1 and b.rank=s.rank_psc where dbid=$dbid group by s.tax_group_1, s.tax_group_2, s.tax_group_3");  
506    
507    $sth->execute;    $sth->execute;
508    my $result = $sth->fetchall_arrayref();    my $result = $sth->fetchall_arrayref();
# Line 497  Line 526 
526  sub get_sequence_subset {  sub get_sequence_subset {
527    my ($self, $dataset, $filter) = @_;    my ($self, $dataset, $filter) = @_;
528    
529    my $table = $self->dbtable;    my $table = $self->dbtable_best_psc;
530    my $dbid  = $self->get_dataset_id($dataset);    my $dbid  = $self->get_dataset_id($dataset);
531    my $where = $self->get_where_clause();    my $where = $self->get_where_clause();
532    $where = ($where) ? "and $where" : '';    $where = ($where) ? "and $where" : '';
533    
534    my $sth = $self->dbh->prepare("select s.id1, s.ali_ln, s.id2, s.tax_str from ( select id1, min(rank_psc) as rank from $table where dbid=$dbid $where group by id1) as b inner join $table as s on b.id1=s.id1 and b.rank=s.rank_psc where dbid=$dbid and s.tax_str like '$filter%'");    my $sth = $self->dbh->prepare("select id1, ali_ln, id2, tax_str from $table where dbid=$dbid $where and tax_str like '$filter%'");
   
535    $sth->execute;    $sth->execute;
536    my $result = $sth->fetchall_arrayref();    my $result = $sth->fetchall_arrayref();
537    
# Line 526  Line 554 
554  sub get_recruitment_plot_data {  sub get_recruitment_plot_data {
555    my ($self, $genome) = @_;    my ($self, $genome) = @_;
556    
557    my $table = $self->dbtable;    my $table = $self->dbtable_best_psc;
558    my $dbid  = $self->get_dataset_id("SEED");    my $dbid  = $self->get_dataset_id("SEED:seed_genome_tax");
559    my $where = $self->get_where_clause();    my $where = $self->get_where_clause();
560    $where = ($where) ? "and $where" : '';    $where = ($where) ? "and $where" : '';
561    
562    my ($tax_id) = $self->dbh->selectrow_array("select tax_str from rdp_to_tax where seq_num='". $genome . "'");    my ($tax_id) = $self->dbh->selectrow_array(qq(SELECT tax_str
563                                                    FROM rdp_to_tax
564                                                    WHERE seq_num= ?), undef, $genome);
565    
566    my $sth = $self->dbh->prepare("select s.id1, s.id2, s.b2, s.e2, s.logpsc from ( select id1, min(rank_psc) as rank from $table where dbid=$dbid $where group by id1) as b inner join $table as s on b.id1=s.id1 and b.rank=s.rank_psc where dbid=$dbid and s.tax_str='$tax_id'");    if($tax_id =~ /(\S+)\s/){
567        $tax_id = $1;
568      }
569    
570    $sth->execute;    my $sth = $self->dbh->prepare(qq(SELECT id1, id2, b2, e2, logpsc
571                                       FROM $table
572                                       WHERE dbid=? $where and tax_str=?));
573    
574    
575      $sth->execute($dbid, $tax_id);
576    my $result = $sth->fetchall_arrayref();    my $result = $sth->fetchall_arrayref();
577    
578    return $result;    return $result;
# Line 564  Line 601 
601    my $dbid  = $self->get_dataset_id($dataset);    my $dbid  = $self->get_dataset_id($dataset);
602    $limit = 10 unless ($limit);    $limit = 10 unless ($limit);
603    
604    my $sth = $self->dbh->prepare("select id2, tax_str, logpsc, bsc, ali_ln, iden, b1, e1 from $table where id1=$id and dbid=$dbid and rank_psc<$limit;");    my $sth = $self->dbh->prepare(qq(SELECT id2, tax_str, logpsc, bsc, ali_ln,
605    $sth->execute;                                          iden, b1, e1
606                                       FROM $table
607                                       WHERE id1=? AND dbid=? AND rank_psc < ?));
608      $sth->execute($id, $dbid, $limit);
609    my $result = $sth->fetchall_arrayref();    my $result = $sth->fetchall_arrayref();
610    
611    return $result;    return $result;
# Line 598  Line 638 
638    
639  =pod  =pod
640    
641    =item * B<get_genome_id> (I<tax_str>)
642    
643    =cut
644    
645    sub get_genome_id {
646      my ($self, $tax_str) = @_;
647      $tax_str =~ s/'/''/g;
648      my $retval =  $self->dbh->selectrow_array("select seq_num from rdp_to_tax where tax_str='". $tax_str . "'");
649      if (ref($retval) eq 'ARRAY') {
650        return $retval->[0];
651      } else {
652        return $retval;
653      }
654    }
655    
656    =pod
657    
658  =back  =back
659    
660  =cut  =cut

Legend:
Removed from v.1.8  
changed lines
  Added in v.1.14

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3