use FIG_Config; use DBMaster; use MGRAST::MetagenomeAnalysis; use DBMaster; use strict; use warnings; if(scalar(@ARGV) == 0 ){ print "usage: perl compute_PCA_for_metagenomes.pl \$dir_for_output \$metagenome_job_id_1 \$metagenome_job_id_2 ...\n"; exit; } my $output_dir = shift(@ARGV); my @jobs; foreach my $arg (@ARGV){ if($arg =~/\d+/){ push(@jobs,$arg); #print "job: $arg\n"; } } my %all_subsystems; my %mg_ss_total_hits; my %mg_ss_hits; my $jobcache = &_jobdb(); foreach my $job_id (@jobs){ my $total_hits = 0; my $job_obj = &_get_job($job_id); if(! $job_obj){next;} else{print "good id: $job_id\n";} my $mgdb = &get_mgdb($job_obj); my $ss = $mgdb->get_subsystem_counts("SEED:subsystem_tax"); foreach my $s (@$ss){ my $name = $mgdb->key2taxa(@$s[2]); if($name){ $all_subsystems{$name} = 1; my $ss_peg_count = @$s[4]; my $combo = $job_id."_".$name; $mg_ss_hits{$combo} = $ss_peg_count; $total_hits = $ss_peg_count + $total_hits; #print "ss_name: $name\n"; } } $mg_ss_total_hits{$job_id} = $total_hits; #print "$job_id th: $total_hits\n"; } my $data_matrix = open(OUT,">$output_dir/data_matrix.txt"); my @sorted = sort { $a cmp $b } keys(%all_subsystems); my $ss_count = scalar(@sorted); print "ss_count: $ss_count\n"; my $header_ss_string = join("\t",@sorted); print OUT "metagenome\t$header_ss_string\n"; foreach my $job_id (@jobs){ my $total_hits = $mg_ss_total_hits{$job_id}; my @cells = (); push(@cells,$job_id); foreach my $ss (@sorted){ my $combo = $job_id."_".$ss; my $ss_hits = $mg_ss_hits{$combo}; my $cell_entry; if($ss_hits){$cell_entry = $ss_hits/$total_hits;} else{$cell_entry = 0;} push(@cells,$cell_entry); } my $cell_count = scalar(@cells); print "cell_count for $job_id: $cell_count\n"; my $row_string = join("\t",@cells); print OUT "$row_string\n"; } close(OUT); open(OUT,">$output_dir/pca_script.r"); print OUT qq(m= read.table('$output_dir/data_matrix.txt', header= TRUE, row.names = 1, sep = '\\t')\n); print OUT qq(pca_obj = prcomp(m)\n); print OUT qq(s = summary(pca_obj)\n); print OUT qq(write.table(s[2],'$output_dir/pca_rotational_matrix.txt', sep ='\\t')\n); print OUT qq(write.table(s[1],'$output_dir/pca_std_deviation.txt', sep = '\\t')\n); close(OUT); my $cmd = qq( R CMD BATCH $output_dir/pca_script.r ); my $errorlevel = system($cmd); if ($errorlevel) { print "error with pca run\n"; } else{ print "good pca run\n"; } my @Xs; my @Ys; open(IN,"$output_dir/pca_rotational_matrix.txt"); while($_ = ){ chomp($_); if($_ =~/rotation.PC1/){next;} my @parts = split("\t",$_); push(@Xs,$parts[1]); push(@Ys,$parts[2]); } close(IN); my $x_string = join(",",@Xs); my $y_string = join(",",@Ys); open(OUT,">$output_dir/graphic_maker.r"); print OUT "pdf(file ='$output_dir/pc1_vs_pc2.pdf')\n"; print OUT "pc1<-c($x_string)\n"; print OUT "pc2<-c($y_string)\n"; print OUT "plot(pc1,pc2,xlim=c(-1,1),ylim=c(-1,1))\n"; print OUT "dev.off()\n"; close(OUT); my $cmd = qq( R CMD BATCH $output_dir/graphic_maker.r ); my $errorlevel = system($cmd); if ($errorlevel) { print "error with making graphic\n"; } else{ print "png made\n"; } #`R CMD BATCH $output_dir/pca_script.r`; ############################ # # Accessor functions # ############################ sub get_mgdb { my ($job) = @_; unless (ref $job and $job->isa('MG_jobcache::Job')) { warn "ERROR get_database_handle must be called with job object"; return undef; } else { return MGRAST::MetagenomeAnalysis->new($job); } } ############################ # # Internal functions # ############################ sub _get_job { my ($jobnum) = @_; my ($job) = $jobcache->Job->get_objects({id=>$jobnum})->[0]; if (defined $job){ return $job; } else { print "no job returned from jobcache\n"; return undef; } } sub _jobdb { my $handle; print $FIG_Config::mgrast_jobcache_db."\t".$FIG_Config::mgrast_jobcache_host."\t".$FIG_Config::mgrast_jobcache_user."\n"; eval { $handle = DBMaster->new( -database => $FIG_Config::mgrast_jobcache_db || 'JobCacheMGRast', -host => $FIG_Config::mgrast_jobcache_host, -user => $FIG_Config::mgrast_jobcache_user, -password => $FIG_Config::mgrast_jobcache_password ); }; if ($@) { warn "Unable to connect to MGRAST database: $@\n"; $handle = undef; } #else{ print "connected to db\n";} return $handle; } sub get_key2taxa_mapping { unless (defined($_[0]->{key2taxa})) { my $sth = $_[0]->dbh->prepare("select dbkey, str from tax_item"); $sth->execute; $_[0]->{key2taxa} = $sth->fetchall_hashref('dbkey'); } return $_[0]->{key2taxa}; } sub key2taxa { if(defined $_[1]) { my $t = $_[0]->{key2taxa}->{$_[1]}->{str}; $t =~ s/\t+$//; return $t; } return ''; }