[Bio] / FigKernelScripts / compute_PCA_for_metagenomes.pl Repository:
ViewVC logotype

View of /FigKernelScripts/compute_PCA_for_metagenomes.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (download) (as text) (annotate)
Wed Jun 10 15:59:11 2009 UTC (10 years, 6 months ago) by mkubal
Branch: MAIN
CVS Tags: mgrast_dev_08112011, mgrast_dev_08022011, rast_rel_2014_0912, myrast_rel40, mgrast_dev_05262011, mgrast_dev_04082011, rast_rel_2010_0928, mgrast_version_3_2, mgrast_dev_12152011, mgrast_dev_06072011, rast_rel_2009_0925, rast_rel_2010_0526, rast_rel_2014_0729, mgrast_dev_02212011, rast_rel_2010_1206, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2010_0118, rast_rel_2011_0119, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, mgrast_dev_04012011, rast_rel_2009_07_09, rast_rel_2010_0827, myrast_33, rast_rel_2011_0928, mgrast_dev_04052011, mgrast_dev_02222011, mgrast_dev_10262011, HEAD
called by PCA_ssav

use FIG_Config;
use DBMaster;
use MGRAST::MetagenomeAnalysis;
use DBMaster;
use strict;
use warnings;

if(scalar(@ARGV) == 0 ){
    print "usage: perl compute_PCA_for_metagenomes.pl \$dir_for_output \$metagenome_job_id_1 \$metagenome_job_id_2 ...\n";
    exit;
}

my $output_dir = shift(@ARGV);

my @jobs;
foreach my $arg (@ARGV){
    if($arg =~/\d+/){
	push(@jobs,$arg);
	#print "job: $arg\n";
    }
}

my %all_subsystems;
my %mg_ss_total_hits;
my %mg_ss_hits;

my $jobcache = &_jobdb();

foreach my $job_id (@jobs){
    my $total_hits = 0;
    my $job_obj = &_get_job($job_id);
    if(! $job_obj){next;}
    else{print "good id: $job_id\n";}
    my $mgdb = &get_mgdb($job_obj);
    
    my $ss = $mgdb->get_subsystem_counts("SEED:subsystem_tax");
    foreach my $s (@$ss){
	my $name = $mgdb->key2taxa(@$s[2]);
	if($name){
	    $all_subsystems{$name} = 1;
	    my $ss_peg_count = @$s[4];
	    my $combo = $job_id."_".$name;
	    $mg_ss_hits{$combo} = $ss_peg_count;
	    $total_hits = $ss_peg_count + $total_hits;
	    #print "ss_name: $name\n";
	}
    }

    $mg_ss_total_hits{$job_id} = $total_hits;
    #print "$job_id th: $total_hits\n";
}

my $data_matrix = open(OUT,">$output_dir/data_matrix.txt");
my @sorted = sort { $a cmp $b } keys(%all_subsystems);

my $ss_count  = scalar(@sorted);

print "ss_count: $ss_count\n";

my $header_ss_string = join("\t",@sorted);

print OUT "metagenome\t$header_ss_string\n";

foreach my $job_id (@jobs){
    my $total_hits = $mg_ss_total_hits{$job_id};
    my @cells = ();
    push(@cells,$job_id);
    foreach my $ss (@sorted){
	my $combo = $job_id."_".$ss;
	my $ss_hits = $mg_ss_hits{$combo};
	my $cell_entry;
	if($ss_hits){$cell_entry = $ss_hits/$total_hits;}
	else{$cell_entry = 0;}
	push(@cells,$cell_entry);
    }

    my $cell_count = scalar(@cells);
    print "cell_count for $job_id: $cell_count\n";
    

    my $row_string = join("\t",@cells);
    print OUT "$row_string\n";
}

close(OUT);

open(OUT,">$output_dir/pca_script.r");
print OUT qq(m= read.table('$output_dir/data_matrix.txt', header= TRUE, row.names = 1, sep = '\\t')\n);
print OUT qq(pca_obj = prcomp(m)\n);
print OUT qq(s = summary(pca_obj)\n);
print OUT qq(write.table(s[2],'$output_dir/pca_rotational_matrix.txt', sep ='\\t')\n);
print OUT qq(write.table(s[1],'$output_dir/pca_std_deviation.txt', sep = '\\t')\n);
close(OUT);

my $cmd = qq( R CMD BATCH $output_dir/pca_script.r );
my $errorlevel = system($cmd);

if ($errorlevel) {
    print "error with pca run\n";
}
else{
    print "good pca run\n";
}


my @Xs;
my @Ys;

open(IN,"$output_dir/pca_rotational_matrix.txt");
while($_ = <IN>){
    chomp($_);
    if($_ =~/rotation.PC1/){next;}
    my @parts = split("\t",$_);
    push(@Xs,$parts[1]);
    push(@Ys,$parts[2]);
}
close(IN);

my $x_string = join(",",@Xs);
my $y_string = join(",",@Ys);
open(OUT,">$output_dir/graphic_maker.r");
print OUT "pdf(file ='$output_dir/pc1_vs_pc2.pdf')\n";
print OUT "pc1<-c($x_string)\n";
print OUT "pc2<-c($y_string)\n";
print OUT "plot(pc1,pc2,xlim=c(-1,1),ylim=c(-1,1))\n";
print OUT "dev.off()\n";
close(OUT);

my $cmd = qq( R CMD BATCH $output_dir/graphic_maker.r );
my $errorlevel = system($cmd);

if ($errorlevel) {
    print "error with making graphic\n";
}
else{
    print "png made\n";
}

#`R CMD BATCH $output_dir/pca_script.r`;


############################
#
# Accessor functions
#
############################

sub get_mgdb {
  my ($job) = @_;  
  unless (ref $job and $job->isa('MG_jobcache::Job')) {
    warn "ERROR get_database_handle must be called with job object";
    return undef;
  } else {
    return MGRAST::MetagenomeAnalysis->new($job);
  }
}

############################
#
# Internal functions
#
############################

sub _get_job {
  my ($jobnum) = @_;
  my ($job) = $jobcache->Job->get_objects({id=>$jobnum})->[0];

  if (defined $job){
    return $job;
  } else {
    print "no job returned from jobcache\n";  
    return undef;
  }
}

sub _jobdb {
  my $handle;
  print $FIG_Config::mgrast_jobcache_db."\t".$FIG_Config::mgrast_jobcache_host."\t".$FIG_Config::mgrast_jobcache_user."\n"; 
  eval {
    $handle = DBMaster->new( -database => $FIG_Config::mgrast_jobcache_db || 'JobCacheMGRast',
			     -host     => $FIG_Config::mgrast_jobcache_host,
			     -user     => $FIG_Config::mgrast_jobcache_user,
			     -password => $FIG_Config::mgrast_jobcache_password );
  };
  if ($@) {
    warn "Unable to connect to MGRAST database: $@\n";
    $handle = undef;
  }
  #else{ print "connected to db\n";}
  
  return $handle;
}

sub get_key2taxa_mapping {

    unless (defined($_[0]->{key2taxa})) {
	my $sth  = $_[0]->dbh->prepare("select dbkey, str from tax_item");
	$sth->execute;
	$_[0]->{key2taxa} = $sth->fetchall_hashref('dbkey');
    }

    return $_[0]->{key2taxa};

}


sub key2taxa {
    if(defined $_[1]) {
	my $t = $_[0]->{key2taxa}->{$_[1]}->{str};
	$t =~ s/\t+$//;
	return $t;
    }
    return '';
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3