[Bio] / FigKernelScripts / GeneSet_P_Values.R Repository:
ViewVC logotype

View of /FigKernelScripts/GeneSet_P_Values.R

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (download) (annotate)
Sat Feb 19 19:27:43 2011 UTC (8 years, 11 months ago) by dejongh
Branch: MAIN
CVS Tags: mgrast_dev_08112011, mgrast_dev_08022011, rast_rel_2014_0912, myrast_rel40, mgrast_dev_05262011, mgrast_dev_04082011, mgrast_version_3_2, mgrast_dev_12152011, mgrast_dev_06072011, rast_rel_2014_0729, mgrast_dev_02212011, mgrast_release_3_0, mgrast_dev_03252011, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, mgrast_dev_04012011, myrast_33, rast_rel_2011_0928, mgrast_dev_04052011, mgrast_dev_02222011, mgrast_dev_10262011, HEAD
Changes since 1.2: +1 -1 lines
fixes

##vector of fold changes##
fold_change = scan("gene_values.txt")

##Gene set matrix: gene names for the sets##
gene_sets=readLines("gene_sets.txt")
gene_sets = strsplit(gene_sets, ",")

# gene set names
gene_set_names=readLines("gene_set_names.txt")

##############################################################

##This function computes the up and down regulated one-sided
##p-values (based on 10000 random sets of the same size) for each gene set 
##provided. The statistic is the mean and is computed on 
##expression numbers that are all presumed to be positive (e.g. fold-changes)
##upregulated p-values are for fold-changes>1, downregulated for fold-changes<1##

##############################################################

gsa=function(fold_change,gene_names,gene_sets) {

##Step 0. Find all of the set sizes##
setsize_temp=vector(mode="numeric", length = length(gene_sets))

for (i in 1:length(gene_sets)) {

	setsize_temp[i]=length(gene_sets[[i]])
}
##Setsizes vector##
set_sizes=as.numeric(names(table(setsize_temp)));

##Step 1. Create the random set matrix##
random_set_matrix=matrix(0,nrow=10000,ncol=length(set_sizes));

for (j in 1:length(set_sizes)) {
	cat("computing for size ", set_sizes[j], "\n");
	for (i in 1:10000) {
		random_set_matrix[i,j]=mean(sample(fold_change,set_sizes[j],replace=FALSE));
	}
}

##Step 2. Find the p-values for each set##
output=matrix(0,ncol=4,nrow=length(gene_sets));

for (i in 1:length(gene_sets)) {
	set=gene_sets[[i]]
	run_sum=0;
	
	for (k in 1:length(set)){
		run_sum=run_sum+fold_change[as.numeric(set[k])];
	}

	##Find which column in the random sets matrix corresponds to the set size of interest
	column=which(set_sizes==length(set));
	
	##One-sided pvalues##
	output[i,1]=gene_set_names[i];
	output[i,2]=run_sum/length(set);
	output[i,3]=length(which(random_set_matrix[,column]>=run_sum/length(set)))/10000;
	output[i,4]=length(which(random_set_matrix[,column]<=run_sum/length(set)))/10000;

}

output[rank(as.numeric(output[,3]), ties.method="random"),] = output[c(1:nrow(output)),]

return(output);

}##End of the function

output1 = gsa(fold_change,gene_names,gene_sets);
write.table(output1, file="gsa_output.txt", row.names=FALSE, col.names=FALSE, quote=FALSE, sep="\t");


MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3