[Bio] / SeedViewer / MetagenomeAnalysis.pm Repository:
ViewVC logotype

Diff of /SeedViewer/MetagenomeAnalysis.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.4, Tue Apr 22 19:56:18 2008 UTC revision 1.13, Wed Jun 11 21:17:56 2008 UTC
# Line 7  Line 7 
7    
8  use FIG_Config;  use FIG_Config;
9  use DBI;  use DBI;
10    use Data::Dumper;
11    
12  1;  1;
13    
   
   
14  use constant QUERY_DEFAULTS =>  use constant QUERY_DEFAULTS =>
15    { 1 => { evalue => '1e-05', align_len => 50 }, # RDP    { 1 => { evalue => '1e-05', align_len => 50 }, # RDP
16      2 => { evalue => '0.01' }, # SEED      2 => { evalue => '0.01' }, # SEED
# Line 21  Line 20 
20      6 => { evalue => '0.01' }, # Subsystem      6 => { evalue => '0.01' }, # Subsystem
21    };    };
22    
   
   
   
23  sub new {  sub new {
24    my ($class, $job) = @_;    my ($class, $job) = @_;
25    
# Line 36  Line 32 
32    my $dbh;    my $dbh;
33    eval {    eval {
34    
35        my $dbms     = $FIG_Config::mgrast_dbms;
36      my $host     = $FIG_Config::mgrast_dbhost;      my $host     = $FIG_Config::mgrast_dbhost;
37      my $database = $FIG_Config::mgrast_db;      my $database = $FIG_Config::mgrast_db;
38      my $user     = $FIG_Config::mgrast_dbuser;      my $user     = $FIG_Config::mgrast_dbuser;
39      my $password = '';      my $password = '';
40    
41        if ($dbms eq 'Pg')
42        {
43            $dbh = DBI->connect("DBI:Pg:dbname=$database;host=$host", $user, $password,
44                            { RaiseError => 1, AutoCommit => 0, PrintError => 0 }) ||
45                                die "database connect error.";
46        }
47        elsif ($dbms eq 'mysql' or $dbms eq '') # Default to mysql
48        {
49      $dbh = DBI->connect("DBI:mysql:database=$database;host=$host", $user, $password,      $dbh = DBI->connect("DBI:mysql:database=$database;host=$host", $user, $password,
50                          { RaiseError => 1, AutoCommit => 0, PrintError => 0 }) ||                          { RaiseError => 1, AutoCommit => 0, PrintError => 0 }) ||
51                                            die "database connect error.";                                            die "database connect error.";
52        }
53        else
54        {
55            die "MetagenomeAnalysis: unknown dbms '$dbms'";
56        }
57    
58    };    };
59    if ($@) {    if ($@) {
60      warn "Unable to connect to metagenomics database: $@\n";      warn "Unable to connect to metagenomics database: $@\n";
# Line 55  Line 66 
66                 dbh => $dbh,                 dbh => $dbh,
67                 key2taxa => undef,                 key2taxa => undef,
68                 query => {},                 query => {},
69                       dbid_cache => {},
70               };               };
71    bless $self, $class;    bless $self, $class;
72    
# Line 65  Line 77 
77    
78  }  }
79    
   
   
   
80  sub job {  sub job {
81    return $_[0]->{job};    return $_[0]->{job};
82  }  }
# Line 85  Line 94 
94    return $_[0]->{dbtable};    return $_[0]->{dbtable};
95  }  }
96    
97    sub dbtable_best_psc {
98      unless (defined $_[0]->{dbtable}) {
99        $_[0]->{dbtable} = 'tax_sim_best_by_psc_'.$_[0]->job->id;
100      }
101      return $_[0]->{dbtable};
102    }
103    
104    sub dbtable_best_iden {
105      unless (defined $_[0]->{dbtable}) {
106        $_[0]->{dbtable} = 'tax_sim_best_by_iden_'.$_[0]->job->id;
107      }
108      return $_[0]->{dbtable};
109    }
110    
111  sub get_key2taxa_mapping {  sub get_key2taxa_mapping {
112    
# Line 129  Line 151 
151  }  }
152    
153    
154    #
155    # Determine the correct dbid for this job. Use sims.database_list
156    # to find the version that the analysis was run with.
157    #
158  sub get_dataset_id {  sub get_dataset_id {
159    # mapping contained in table db_type      my($self, $dataset) = @_;
160    # unstable, changes every time  
161    my $db_ids = { 'SEED' => 2,      my $id = $self->{dbid_cache}->{$dataset};
162                   'Greengenes' => 3,      return $id if defined($id);
163                   'RDP' => 1,  
164                   'LSU' => 4,      my($dbname, $type) = split(/:/, $dataset);
                  'SSU' => 5,  
                  'Subsystem' => 6,  
                };  
   return $db_ids->{ $_[1] } || undef;  
 }  
165    
166        my $dbs = $self->job->metaxml->get_metadata('sims.database_list');
167    
168        my @this = grep { $_->{name} eq $dbname } @$dbs;
169        if (@this)
170        {
171            my $vers = $this[0]->{version};
172    
173            #
174            # Now we can find the dbid ni the database.
175            #
176            my $res = $self->dbh->selectcol_arrayref(qq(SELECT dbid
177                                                        FROM seq_db
178                                                        WHERE name = ? AND version = ?
179                                                            AND tax_db_name = ?), undef,
180                                                     $dbname, $vers, $type);
181            if (@$res)
182            {
183                print STDERR "Found @$res for $dbname $type $vers\n";
184                $id = $res->[0];
185                $self->{dbid_cache}->{$dataset} = $id;
186                return $id;
187            }
188            print STDERR "Did not find anything for dataset='$dataset' '$dbname' '$type' '$vers'\n";
189        }
190        print STDERR "did not find a vers for dataset='$dataset' $dbname $type\n" . Dumper($dbs);
191    }
192    
193  #******************************************************************************  #******************************************************************************
194  #* MANAGING QUERY CRITERIA  #* MANAGING QUERY CRITERIA
# Line 149  Line 196 
196    
197  =pod  =pod
198    
199    =over 4
200    
201  =item * B<query_evalue> (I<evalue>)  =item * B<query_evalue> (I<evalue>)
202    
203  Set/get the expectation value which is currently used to query the database.  Set/get the expectation value which is currently used to query the database.
# Line 315  Line 364 
364  #******************************************************************************  #******************************************************************************
365    
366    
   
   
   
367  =pod  =pod
368    
369  =item * B<get_sequence> (I<sequence_id>)  =item * B<get_sequence> (I<sequence_id>)
# Line 360  Line 406 
406  sub get_hits_count {  sub get_hits_count {
407    my ($self, $dataset) = @_;    my ($self, $dataset) = @_;
408    
409    my $table = $self->dbtable;    my $table = $self->dbtable_best_psc;
410    my $dbid  = $self->get_dataset_id($dataset);    my $dbid  = $self->get_dataset_id($dataset);
411    my $where = $self->get_where_clause();    my $where = $self->get_where_clause();
412    $where = ($where) ? "and $where" : '';    $where = ($where) ? "and $where" : '';
413    
414    my $sth = $self->dbh->prepare("select count(*) from ( select id1, min(rank_psc) from $table where dbid=$dbid $where group by id1) as b");    my $sth = $self->dbh->prepare("select count(distinct id1) from  $table where dbid=$dbid $where");
415    $sth->execute;    $sth->execute;
416    my ($result) = $sth->fetchrow_array;    my ($result) = $sth->fetchrow_array;
417    
# Line 390  Line 436 
436  sub get_group_counts {  sub get_group_counts {
437    my ($self, $dataset, $group, $filter1, $filter2) = @_;    my ($self, $dataset, $group, $filter1, $filter2) = @_;
438    
439    my $table = $self->dbtable;    my $table = $self->dbtable_best_psc;
440    my $dbid  = $self->get_dataset_id($dataset);    my $dbid  = $self->get_dataset_id($dataset);
441    my $where = $self->get_where_clause();    my $where = $self->get_where_clause();
442    $where = ($where) ? "and $where" : '';    $where = ($where) ? "and $where" : '';
# Line 401  Line 447 
447    push @filters, "tax_group_2='$filter2'" if($filter2);    push @filters, "tax_group_2='$filter2'" if($filter2);
448    my $filter = (scalar(@filters)) ? 'and '.join(' and ', @filters) : '';    my $filter = (scalar(@filters)) ? 'and '.join(' and ', @filters) : '';
449    
450    my $sth = $self->dbh->prepare("select s.$group as tax, count(*) as num from ( select id1, min(rank_psc) as rank from $table where dbid=$dbid $where group by id1) as b inner join $table as s on b.id1=s.id1 and b.rank=s.rank_psc where dbid=$dbid $filter group by s.$group");    my $sth = $self->dbh->prepare("select $group as tax, count(*) as num from $table where dbid=$dbid $where $filter group by tax");
451    $sth->execute;    $sth->execute;
452    my $result = $sth->fetchall_arrayref();    my $result = $sth->fetchall_arrayref();
453    
454      print STDERR "get_group_counts: ds=$dataset group=$group filter1=$filter1 filter2=$filter2\n";
455      print STDERR Dumper($result);
456    return $result;    return $result;
457    
458  }  }
# Line 422  Line 470 
470  sub get_taxa_counts {  sub get_taxa_counts {
471    my ($self, $dataset) = @_;    my ($self, $dataset) = @_;
472    
473    my $table = $self->dbtable;    my $table = $self->dbtable_best_psc;
474    my $dbid  = $self->get_dataset_id($dataset);    my $dbid  = $self->get_dataset_id($dataset);
475    my $where = $self->get_where_clause();    my $where = $self->get_where_clause();
476    $where = ($where) ? "and $where" : '';    $where = ($where) ? "and $where" : '';
477    
478    my $sth = $self->dbh->prepare("select s.tax_str as tax, count(*) as num from ( select id1, min(rank_psc) as rank from $table where dbid=$dbid $where group by id1) as b inner join $table as s on b.id1=s.id1 and b.rank=s.rank_psc where dbid=$dbid group by s.tax_str");    my $sth = $self->dbh->prepare("select tax_str as tax, count(*) from $table where dbid=$dbid group by tax");
479    
480    $sth->execute;    $sth->execute;
481    my $result = $sth->fetchall_arrayref();    my $result = $sth->fetchall_arrayref();
# Line 449  Line 497 
497  sub get_subsystem_counts {  sub get_subsystem_counts {
498    my ($self, $dataset) = @_;    my ($self, $dataset) = @_;
499    
500    my $table = $self->dbtable;    my $table = $self->dbtable_best_psc;
501    my $dbid  = $self->get_dataset_id($dataset);    my $dbid  = $self->get_dataset_id($dataset);
502    my $where = $self->get_where_clause();    my $where = $self->get_where_clause();
503    $where = ($where) ? "and $where" : '';    $where = ($where) ? "and $where" : '';
504    
505    #print STDERR "select s.tax_group_1, s.tax_group_2, s.tax_group_3, s.tax_str, count(*) as num from ( select id1, min(rank_psc) as rank from $table where dbid=$dbid $where group by id1) as b inner join $table as s on b.id1=s.id1 and b.rank=s.rank_psc where dbid=$dbid group by s.tax_group_1, s.tax_group_2, s.tax_group_3";    my $sth = $self->dbh->prepare("select tax_group_1, tax_group_2, tax_group_3, tax_str, count(*) as num from $table where dbid=$dbid group by tax_group_1, tax_group_2, tax_group_3, tax_str");
  #die;  
   my $sth = $self->dbh->prepare("select s.tax_group_1, s.tax_group_2, s.tax_group_3, s.tax_str, count(*) as num from ( select id1, min(rank_psc) as rank from $table where dbid=$dbid $where group by id1) as b inner join $table as s on b.id1=s.id1 and b.rank=s.rank_psc where dbid=$dbid group by s.tax_group_1, s.tax_group_2, s.tax_group_3");  
506    
507    $sth->execute;    $sth->execute;
508    my $result = $sth->fetchall_arrayref();    my $result = $sth->fetchall_arrayref();
# Line 480  Line 526 
526  sub get_sequence_subset {  sub get_sequence_subset {
527    my ($self, $dataset, $filter) = @_;    my ($self, $dataset, $filter) = @_;
528    
529    my $table = $self->dbtable;    my $table = $self->dbtable_best_psc;
530    my $dbid  = $self->get_dataset_id($dataset);    my $dbid  = $self->get_dataset_id($dataset);
531    my $where = $self->get_where_clause();    my $where = $self->get_where_clause();
532    $where = ($where) ? "and $where" : '';    $where = ($where) ? "and $where" : '';
533    
534    my $sth = $self->dbh->prepare("select s.id1, s.ali_ln, s.id2, s.tax_str from ( select id1, min(rank_psc) as rank from $table where dbid=$dbid $where group by id1) as b inner join $table as s on b.id1=s.id1 and b.rank=s.rank_psc where dbid=$dbid and s.tax_str like '$filter%'");    my $sth = $self->dbh->prepare("select id1, ali_ln, id2, tax_str from $table where dbid=$dbid $where and tax_str like '$filter%'");
   
535    $sth->execute;    $sth->execute;
536    my $result = $sth->fetchall_arrayref();    my $result = $sth->fetchall_arrayref();
537    
# Line 497  Line 542 
542    
543  =pod  =pod
544    
545    =item * B<get_recruitment_plot_data> (I<genome>)
546    
547    Given a genome id (83333.1), this method returns all sequence ids,
548    the alignment length, the match id and the taxonomy string for all
549    sequences which match the criteria and have their tax_str start equal
550    the genome tax string I<filter>.
551    
552    =cut
553    
554    sub get_recruitment_plot_data {
555      my ($self, $genome) = @_;
556    
557      my $table = $self->dbtable_best_psc;
558      my $dbid  = $self->get_dataset_id("SEED:seed_genome_tax");
559      my $where = $self->get_where_clause();
560      $where = ($where) ? "and $where" : '';
561    
562      my ($tax_id) = $self->dbh->selectrow_array(qq(SELECT tax_str
563                                                    FROM rdp_to_tax
564                                                    WHERE seq_num= ?), undef, $genome);
565    
566      if($tax_id =~ /(\S+)\s/){
567        $tax_id = $1;
568      }
569    
570      my $sth = $self->dbh->prepare(qq(SELECT id1, id2, b2, e2, logpsc
571                                       FROM $table
572                                       WHERE dbid=? $where and tax_str=?));
573    
574    
575      $sth->execute($dbid, $tax_id);
576      my $result = $sth->fetchall_arrayref();
577    
578      return $result;
579    
580    }
581    
582    
583    
584    
585    =pod
586    
587  =item * B<get_hits_for_sequence> (I<seq_id>, I<dataset>, I<limit>)  =item * B<get_hits_for_sequence> (I<seq_id>, I<dataset>, I<limit>)
588    
589  Given a sequence id I<seq_id> (id1) and a dataset name (db_id), this method returns  Given a sequence id I<seq_id> (id1) and a dataset name (db_id), this method returns
# Line 514  Line 601 
601    my $dbid  = $self->get_dataset_id($dataset);    my $dbid  = $self->get_dataset_id($dataset);
602    $limit = 10 unless ($limit);    $limit = 10 unless ($limit);
603    
604    my $sth = $self->dbh->prepare("select id2, tax_str, logpsc, bsc, ali_ln, iden, b1, e1 from $table where id1=$id and dbid=$dbid and rank_psc<$limit;");    my $sth = $self->dbh->prepare(qq(SELECT id2, tax_str, logpsc, bsc, ali_ln,
605    $sth->execute;                                          iden, b1, e1
606                                       FROM $table
607                                       WHERE id1=? AND dbid=? AND rank_psc < ?));
608      $sth->execute($id, $dbid, $limit);
609    my $result = $sth->fetchall_arrayref();    my $result = $sth->fetchall_arrayref();
610    
611    return $result;    return $result;
# Line 546  Line 636 
636    
637  }  }
638    
639    =pod
640    
641    =item * B<get_genome_id> (I<tax_str>)
642    
643    =cut
644    
645    sub get_genome_id {
646      my ($self, $tax_str) = @_;
647      return $self->dbh->selectrow_array("select seq_num from rdp_to_tax where tax_str='". $tax_str . "'")->[0];
648    }
649    
650    =pod
651    
652    =back
653    
654    =cut

Legend:
Removed from v.1.4  
changed lines
  Added in v.1.13

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3