[Bio] / SeedViewer / MetagenomeAnalysis.pm Repository:
ViewVC logotype

Diff of /SeedViewer/MetagenomeAnalysis.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.8, Wed May 7 20:31:19 2008 UTC revision 1.17, Fri Jun 13 16:47:56 2008 UTC
# Line 7  Line 7 
7    
8  use FIG_Config;  use FIG_Config;
9  use DBI;  use DBI;
10    use Data::Dumper;
11    
12  1;  1;
13    
   
   
14  use constant QUERY_DEFAULTS =>  use constant QUERY_DEFAULTS =>
15    { 1 => { evalue => '1e-05', align_len => 50 }, # RDP    { 1 => { evalue => '1e-05', align_len => 50 }, # RDP
16      2 => { evalue => '0.01' }, # SEED      2 => { evalue => '0.01' }, # SEED
# Line 21  Line 20 
20      6 => { evalue => '0.01' }, # Subsystem      6 => { evalue => '0.01' }, # Subsystem
21    };    };
22    
   
   
   
23  sub new {  sub new {
24    my ($class, $job) = @_;    my ($class, $job) = @_;
25    
# Line 70  Line 66 
66                 dbh => $dbh,                 dbh => $dbh,
67                 key2taxa => undef,                 key2taxa => undef,
68                 query => {},                 query => {},
69                       dbid_cache => {},
70               };               };
71    bless $self, $class;    bless $self, $class;
72    
# Line 80  Line 77 
77    
78  }  }
79    
   
   
   
80  sub job {  sub job {
81    return $_[0]->{job};    return $_[0]->{job};
82  }  }
# Line 100  Line 94 
94    return $_[0]->{dbtable};    return $_[0]->{dbtable};
95  }  }
96    
97    sub dbtable_best_psc {
98      unless (defined $_[0]->{dbtable}) {
99        $_[0]->{dbtable} = 'tax_sim_best_by_psc_'.$_[0]->job->id;
100      }
101      return $_[0]->{dbtable};
102    }
103    
104    sub dbtable_best_iden {
105      unless (defined $_[0]->{dbtable}) {
106        $_[0]->{dbtable} = 'tax_sim_best_by_iden_'.$_[0]->job->id;
107      }
108      return $_[0]->{dbtable};
109    }
110    
111  sub get_key2taxa_mapping {  sub get_key2taxa_mapping {
112    
# Line 144  Line 151 
151  }  }
152    
153    
154    #
155    # Determine the correct dbid for this job. Use sims.database_list
156    # to find the version that the analysis was run with.
157    #
158  sub get_dataset_id {  sub get_dataset_id {
159    # mapping contained in table db_type      my($self, $dataset) = @_;
160    # unstable, changes every time  
161    my $db_ids = { 'SEED' => 2,      my $id = $self->{dbid_cache}->{$dataset};
162                   'Greengenes' => 3,      return $id if defined($id);
                  'RDP' => 1,  
                  'LSU' => 4,  
                  'SSU' => 5,  
                  'Subsystem' => 6,  
                };  
   return $db_ids->{ $_[1] } || undef;  
 }  
163    
164        my($dbname, $type) = split(/:/, $dataset);
165    
166        my $dbs = $self->job->metaxml->get_metadata('sims.database_list');
167    
168        my @this = grep { $_->{name} eq $dbname } @$dbs;
169        if (@this)
170        {
171            my $vers = $this[0]->{version};
172    
173            #
174            # Now we can find the dbid ni the database.
175            #
176            my $res = $self->dbh->selectcol_arrayref(qq(SELECT dbid
177                                                        FROM seq_db
178                                                        WHERE name = ? AND version = ?
179                                                            AND tax_db_name = ?), undef,
180                                                     $dbname, $vers, $type);
181            if (@$res)
182            {
183                #print STDERR "Found @$res for $dbname $type $vers\n";
184                $id = $res->[0];
185                $self->{dbid_cache}->{$dataset} = $id;
186                return $id;
187            }
188            print STDERR "Did not find anything for dataset='$dataset' '$dbname' '$type' '$vers'\n";
189        }
190        print STDERR "did not find a vers for dataset='$dataset' $dbname $type\n" . Dumper($dbs);
191    }
192    
193  #******************************************************************************  #******************************************************************************
194  #* MANAGING QUERY CRITERIA  #* MANAGING QUERY CRITERIA
# Line 332  Line 364 
364  #******************************************************************************  #******************************************************************************
365    
366    
   
   
   
367  =pod  =pod
368    
369  =item * B<get_sequence> (I<sequence_id>)  =item * B<get_sequence> (I<sequence_id>)
# Line 364  Line 393 
393    
394  }  }
395    
396    =pod
397    
398    =item * B<get_sequences_fasta> (I<sequence_ids>)
399    
400    Retrieve the sequences for a given list of I<sequence_ids> from the metagenome job directory in fasta format.
401    
402    =cut
403    
404    sub get_sequences_fasta {
405      my ($self, $ids) = @_;
406    
407      # get the path to the sequence file
408      my $sequence_file = $self->job->org_dir.'/contigs';
409    
410      # hash the ids
411      my %idh = map { $_ => 1 } @$ids;
412    
413      # store the result
414      my $sequence = '';
415    
416      # iterate over the sequence file
417      open(FASTA, "<$sequence_file") or die "Unable to read metagenome sequences: $!";
418      while(<FASTA>) {
419    
420        # look only at id lines
421        next unless /^\>/;
422    
423        # get the id
424        my ($id) = /^\>(.+)/;
425        chomp($id);
426    
427        # find the id in the requested id hash
428        next unless exists ($idh{$id});
429    
430        # id was found, delete it from the hash
431        delete $idh{$id};
432    
433        # print the header line
434        $sequence .= $_;
435    
436        # print all the following lines to the result
437        while(<FASTA>) {
438          last if /^>/;
439          $sequence .= $_;
440        }
441    
442        last unless (scalar(keys(%idh)));
443      }
444    
445      return $sequence;
446    
447    }
448    
449  =pod  =pod
450    
# Line 377  Line 458 
458  sub get_hits_count {  sub get_hits_count {
459    my ($self, $dataset) = @_;    my ($self, $dataset) = @_;
460    
461    my $table = $self->dbtable;    my $table = $self->dbtable_best_psc;
462    my $dbid  = $self->get_dataset_id($dataset);    my $dbid  = $self->get_dataset_id($dataset);
463    my $where = $self->get_where_clause();    my $where = $self->get_where_clause();
464    $where = ($where) ? "and $where" : '';    $where = ($where) ? "and $where" : '';
465    
466    my $sth = $self->dbh->prepare("select count(*) from ( select id1, min(rank_psc) from $table where dbid=$dbid $where group by id1) as b");    my $sth = $self->dbh->prepare("select count(distinct id1) from  $table where dbid=$dbid $where");
467    $sth->execute;    $sth->execute;
468    my ($result) = $sth->fetchrow_array;    my ($result) = $sth->fetchrow_array;
469    
# Line 407  Line 488 
488  sub get_group_counts {  sub get_group_counts {
489    my ($self, $dataset, $group, $filter1, $filter2) = @_;    my ($self, $dataset, $group, $filter1, $filter2) = @_;
490    
491    my $table = $self->dbtable;    my $table = $self->dbtable_best_psc;
492    my $dbid  = $self->get_dataset_id($dataset);    my $dbid  = $self->get_dataset_id($dataset);
493    my $where = $self->get_where_clause();    my $where = $self->get_where_clause();
494    $where = ($where) ? "and $where" : '';    $where = ($where) ? "and $where" : '';
# Line 418  Line 499 
499    push @filters, "tax_group_2='$filter2'" if($filter2);    push @filters, "tax_group_2='$filter2'" if($filter2);
500    my $filter = (scalar(@filters)) ? 'and '.join(' and ', @filters) : '';    my $filter = (scalar(@filters)) ? 'and '.join(' and ', @filters) : '';
501    
502    my $sth = $self->dbh->prepare("select s.$group as tax, count(*) as num from ( select id1, min(rank_psc) as rank from $table where dbid=$dbid $where group by id1) as b inner join $table as s on b.id1=s.id1 and b.rank=s.rank_psc where dbid=$dbid $filter group by s.$group");    my $sth = $self->dbh->prepare("select $group as tax, count(*) as num from $table where dbid=$dbid $where $filter group by tax");
503    $sth->execute;    $sth->execute;
504    my $result = $sth->fetchall_arrayref();    my $result = $sth->fetchall_arrayref();
505    
506      print STDERR "get_group_counts: ds=$dataset group=$group filter1=$filter1 filter2=$filter2\n";
507      print STDERR Dumper($result);
508    return $result;    return $result;
509    
510  }  }
# Line 439  Line 522 
522  sub get_taxa_counts {  sub get_taxa_counts {
523    my ($self, $dataset) = @_;    my ($self, $dataset) = @_;
524    
525    my $table = $self->dbtable;    my $table = $self->dbtable_best_psc;
526    my $dbid  = $self->get_dataset_id($dataset);    my $dbid  = $self->get_dataset_id($dataset);
527    my $where = $self->get_where_clause();    my $where = $self->get_where_clause();
528    $where = ($where) ? "and $where" : '';    $where = ($where) ? "and $where" : '';
529    
530    my $sth = $self->dbh->prepare("select s.tax_str as tax, count(*) as num from ( select id1, min(rank_psc) as rank from $table where dbid=$dbid $where group by id1) as b inner join $table as s on b.id1=s.id1 and b.rank=s.rank_psc where dbid=$dbid group by s.tax_str");    my $sth = $self->dbh->prepare("select tax_str as tax, count(*) from $table where dbid=$dbid $where group by tax");
531    
532    $sth->execute;    $sth->execute;
533    my $result = $sth->fetchall_arrayref();    my $result = $sth->fetchall_arrayref();
# Line 466  Line 549 
549  sub get_subsystem_counts {  sub get_subsystem_counts {
550    my ($self, $dataset) = @_;    my ($self, $dataset) = @_;
551    
552    my $table = $self->dbtable;    my $table = $self->dbtable_best_psc;
553    my $dbid  = $self->get_dataset_id($dataset);    my $dbid  = $self->get_dataset_id($dataset);
554    my $where = $self->get_where_clause();    my $where = $self->get_where_clause();
555    $where = ($where) ? "and $where" : '';    $where = ($where) ? "and $where" : '';
556    
557    #print STDERR "select s.tax_group_1, s.tax_group_2, s.tax_group_3, s.tax_str, count(*) as num from ( select id1, min(rank_psc) as rank from $table where dbid=$dbid $where group by id1) as b inner join $table as s on b.id1=s.id1 and b.rank=s.rank_psc where dbid=$dbid group by s.tax_group_1, s.tax_group_2, s.tax_group_3";    my $sth = $self->dbh->prepare("select tax_group_1, tax_group_2, tax_group_3, tax_str, count(*) as num from $table where dbid=$dbid $where group by tax_group_1, tax_group_2, tax_group_3, tax_str");
  #die;  
   my $sth = $self->dbh->prepare("select s.tax_group_1, s.tax_group_2, s.tax_group_3, s.tax_str, count(*) as num from ( select id1, min(rank_psc) as rank from $table where dbid=$dbid $where group by id1) as b inner join $table as s on b.id1=s.id1 and b.rank=s.rank_psc where dbid=$dbid group by s.tax_group_1, s.tax_group_2, s.tax_group_3");  
558    
559    $sth->execute;    $sth->execute;
560    my $result = $sth->fetchall_arrayref();    my $result = $sth->fetchall_arrayref();
# Line 497  Line 578 
578  sub get_sequence_subset {  sub get_sequence_subset {
579    my ($self, $dataset, $filter) = @_;    my ($self, $dataset, $filter) = @_;
580    
581    my $table = $self->dbtable;    my $table = $self->dbtable_best_psc;
582    my $dbid  = $self->get_dataset_id($dataset);    my $dbid  = $self->get_dataset_id($dataset);
583    my $where = $self->get_where_clause();    my $where = $self->get_where_clause();
584    $where = ($where) ? "and $where" : '';    $where = ($where) ? "and $where" : '';
585    
586    my $sth = $self->dbh->prepare("select s.id1, s.ali_ln, s.id2, s.tax_str from ( select id1, min(rank_psc) as rank from $table where dbid=$dbid $where group by id1) as b inner join $table as s on b.id1=s.id1 and b.rank=s.rank_psc where dbid=$dbid and s.tax_str like '$filter%'");    $filter =~ s/'/''/g;
587    
588      my $sth = $self->dbh->prepare("select id1, ali_ln, id2, tax_str from $table where dbid=$dbid $where and tax_str like '$filter%'");
589    $sth->execute;    $sth->execute;
590    my $result = $sth->fetchall_arrayref();    my $result = $sth->fetchall_arrayref();
591    
# Line 526  Line 608 
608  sub get_recruitment_plot_data {  sub get_recruitment_plot_data {
609    my ($self, $genome) = @_;    my ($self, $genome) = @_;
610    
611    my $table = $self->dbtable;    my $table = $self->dbtable_best_psc;
612    my $dbid  = $self->get_dataset_id("SEED");    my $dbid  = $self->get_dataset_id("SEED:seed_genome_tax");
613    my $where = $self->get_where_clause();    my $where = $self->get_where_clause();
614    $where = ($where) ? "and $where" : '';    $where = ($where) ? "and $where" : '';
615    
616    my ($tax_id) = $self->dbh->selectrow_array("select tax_str from rdp_to_tax where seq_num='". $genome . "'");    my ($tax_id) = $self->dbh->selectrow_array(qq(SELECT tax_str
617                                                    FROM rdp_to_tax
618                                                    WHERE seq_num= ?), undef, $genome);
619    
620    my $sth = $self->dbh->prepare("select s.id1, s.id2, s.b2, s.e2, s.logpsc from ( select id1, min(rank_psc) as rank from $table where dbid=$dbid $where group by id1) as b inner join $table as s on b.id1=s.id1 and b.rank=s.rank_psc where dbid=$dbid and s.tax_str='$tax_id'");    if($tax_id =~ /(\S+)\s/){
621        $tax_id = $1;
622      }
623    
624    $sth->execute;    my $sth = $self->dbh->prepare(qq(SELECT id1, id2, b2, e2, logpsc
625                                       FROM $table
626                                       WHERE dbid=? $where and tax_str=?));
627    
628    
629      $sth->execute($dbid, $tax_id);
630    my $result = $sth->fetchall_arrayref();    my $result = $sth->fetchall_arrayref();
631    
632    return $result;    return $result;
# Line 564  Line 655 
655    my $dbid  = $self->get_dataset_id($dataset);    my $dbid  = $self->get_dataset_id($dataset);
656    $limit = 10 unless ($limit);    $limit = 10 unless ($limit);
657    
658    my $sth = $self->dbh->prepare("select id2, tax_str, logpsc, bsc, ali_ln, iden, b1, e1 from $table where id1=$id and dbid=$dbid and rank_psc<$limit;");    my $sth = $self->dbh->prepare(qq(SELECT id2, tax_str, logpsc, bsc, ali_ln,
659    $sth->execute;                                          iden, b1, e1
660                                       FROM $table
661                                       WHERE id1=? AND dbid=? AND rank_psc < ?));
662      $sth->execute($id, $dbid, $limit);
663    my $result = $sth->fetchall_arrayref();    my $result = $sth->fetchall_arrayref();
664    
665    return $result;    return $result;
# Line 598  Line 692 
692    
693  =pod  =pod
694    
695    =item * B<get_genome_id> (I<tax_str>)
696    
697    =cut
698    
699    sub get_genome_id {
700      my ($self, $tax_str) = @_;
701      $tax_str =~ s/'/''/g;
702      my $retval =  $self->dbh->selectrow_array("select seq_num from rdp_to_tax where tax_str='". $tax_str . "'");
703      if (ref($retval) eq 'ARRAY') {
704        return $retval->[0];
705      } else {
706        return $retval;
707      }
708    }
709    
710    =pod
711    
712  =back  =back
713    
714  =cut  =cut

Legend:
Removed from v.1.8  
changed lines
  Added in v.1.17

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3