[Bio] / FigKernelScripts / Pipeline.R Repository:
ViewVC logotype

Annotation of /FigKernelScripts/Pipeline.R

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (view) (download)

1 : overbeek 1.1 find_low_variability_genes = function(gxp_data)
2 :     {
3 :     ##For each gene##
4 :     output=c();
5 :     for (i in 1:dim(gxp_data)[1])
6 :     {
7 :     percents=c(quantile(gxp_data[i,],probs=c(0.05,0.95)));
8 :     output=rbind(output,cbind(percents[2]-percents[1],sd(gxp_data[i,])));
9 :     }
10 :    
11 :     ##Output contains the peg, range, mid 90%, mid 80%, IQR and std for each gene##
12 :     ##Now convert those to the ranks, 100%=largest, 0%=smallest
13 :     ranks=cbind(rank(output[,1])/dim(gxp_data)[1],rank(output[,2])/dim(gxp_data)[1]);
14 :    
15 :     # create a vector with a zero meaning not flagged, and a one meaning flagged
16 :     retval = vector(mode="integer", length=dim(gxp_data)[1]);
17 :     retval[which(ranks[,1]<.02)] = 1;
18 :     retval[which(ranks[,2]<.02)] = 1;
19 :     return(retval);
20 :     }
21 :    
22 :     split_geneset = function(geneset, gxp_data, pearson_cutoff)
23 :     {
24 :     values=matrix(0,nrow=length(geneset), ncol=length(geneset));
25 :    
26 :     for (i in 1:(length(geneset)-1))
27 :     {
28 :     for (j in (i+1):(length(geneset)))
29 :     {
30 :     values[j,i] = cor(gxp_data[geneset[i],],gxp_data[geneset[j],], use="everything", method="pearson");
31 :     }
32 :     }
33 :    
34 :     values=as.dist(-(values-1));
35 :     hc = hclust(values, method="complete");
36 :     treecut = cutree(hc, h=(-(pearson_cutoff-1)));
37 :     tb=table(treecut);
38 :     num_singletons = sum(tb==1);
39 :     #print(c("num_singletons:", num_singletons, "out of", length(geneset), "genes"), quote=FALSE);
40 :     tb=tb[tb!=1]; # remove singletons
41 :     new_genesets = vector("list", length(tb));
42 :    
43 :     if (length(tb)==0) {return (new_genesets);}
44 :    
45 :     for (i in 1:length(tb))
46 :     {
47 :     cluster = as.integer(names(tb))[i];
48 :     new_genesets[[i]] = geneset[treecut==cluster];
49 :     }
50 :    
51 :     return (new_genesets);
52 :     }
53 :    
54 :     #####################################################################################################
55 :     # ALGORITHM 1
56 :     # find the on/off cutoffs for one experiment
57 :     # genesets is a list containing vectors that represent gene sets;
58 :     # each vector contains row numbers that index into gxp_data
59 :     # gxp_data is a column of expression values for one experiment
60 :    
61 :     find_cutoffs=function(genesets, gxp_data, low_variability_genes)
62 :     {
63 :    
64 :     # Loop through the gene sets and build up a list containing vectors of gene expression values
65 :     # for each gene set; these values are expressed as rank orders normalized from 0 to 1
66 :     ranked_gxp_data = rank(gxp_data, ties.method="first")/length(gxp_data);
67 :     geneset_data = list();
68 :    
69 :     i=1;
70 :     while (i <= length(genesets))
71 :     {
72 :     temp = c();
73 :    
74 :     for (k in genesets[[i]])
75 :     {
76 :     if (low_variability_genes[k]==0)
77 :     {
78 :     temp = cbind(temp, ranked_gxp_data[k]);
79 :     }
80 :     }
81 :    
82 :     if (length(temp) > 1)
83 :     {
84 :     geneset_data[[length(geneset_data)+1]]=temp;
85 :     }
86 :    
87 :     i=i+1;
88 :     }
89 :    
90 :     # this vector will hold the index for the gene sets that still need to be called after Step 1
91 :     not_called_sets=c();
92 :    
93 :     # we are trying to determine the proper values for off and on
94 :     off=0; on=1;
95 :    
96 :     # Step 1a: Make an initial call for those sets that we are quite confident about.
97 :     i=1;
98 :     while (i <= length(geneset_data))
99 :     {
100 :     ##called the set 1= YES, 0=NO##
101 :     called=0;
102 :    
103 :     ##Find the largest and smallest and second smallest and second largest values of the set
104 :     max_exp = max(geneset_data[[i]]);
105 :     min_exp = min(geneset_data[[i]]);
106 :     ranks=rank(geneset_data[[i]], ties.method="first");
107 :     almost_min=geneset_data[[i]][ranks==2];
108 :     almost_max=geneset_data[[i]][ranks==(length(ranks)-1)];
109 :    
110 :     ##1a.1.If the minimum gene expression value in the set is less than 20%, ##
111 :     ##and the range of the set is less than 20%, then the set is definitely OFF.##
112 :     if(min_exp<0.20 && (max_exp-min_exp<0.2))
113 :     {
114 :     ##If the current off is less than max value in the set, update the off cutoff
115 :     if(off<max_exp) {off=max_exp;}
116 :     called=1;
117 :     }
118 :    
119 :     ##1a.2 If the second smallest gene expression value in the set is less than 30%,##
120 :     ## and the range of the set is less than 10%, then the set is definitely OFF.##
121 :     if(almost_min<0.30 && (max_exp-min_exp<0.1))
122 :     {
123 :     ##If the current off is less than max value in the set, update the off cutoff
124 :     if(off<max_exp) {off=max_exp;}
125 :     called=1;
126 :     }
127 :    
128 :     ##1a.3.If the maximum gene expression value in the set is less than 30%, ##
129 :     ##then the gene set is definitely OFF.##
130 :     if(max_exp<0.30)
131 :     {
132 :     ##If the current off is less than max value in the set, update the off cutoff
133 :     if(off<max_exp) {off=max_exp;}
134 :     called=1;
135 :     }
136 :    
137 :     ##1a.4. If the maximum gene expression value in the set is greater than 80%,##
138 :     ## and the range of the set is less than 20%, then the set is definitely ON.##
139 :     if(max_exp>0.80 && (max_exp-min_exp<0.2))
140 :     {
141 :     ## If the current on is more than the min value in the set update the on cutoff
142 :     if(on>min_exp) {on=min_exp;}
143 :     called=1;
144 :     }
145 :    
146 :     ##1a.5. If the second largest gene expression value in the set is greater than 70%,##
147 :     ## and the range of the set is less than 10%, then the set is definitely ON.##
148 :     if(almost_max>0.70 && (max_exp-min_exp<0.1))
149 :     {
150 :     ## If the current on is more than the min value in the set update the on cutoff
151 :     if(on>min_exp) {on=min_exp;}
152 :     called=1;
153 :     }
154 :    
155 :     ##1a.6. If the minimum gene expression value in the set is greater than 70%,##
156 :     ## then the set is definitely ON.##
157 :     if(min_exp>0.70)
158 :     {
159 :     ## If the current on is more than the min value in the set update the on cutoff
160 :     if(on>min_exp) {on=min_exp;}
161 :     called=1;
162 :     }
163 :    
164 :     ##If the gene set hasn't been called##
165 :     if(called==0)
166 :     {
167 :     not_called_sets=cbind(not_called_sets, i);
168 :     }
169 :    
170 :     i=i+1;
171 :     }##End of each gene set
172 :    
173 :     ##STEP 1b. NOW EXPLORE THE NOT CALLED SETS FURTHER TO SEE IF WE CAN IMPROVE THE ON/OFF CALLS AT ALL##
174 :    
175 :     newoff=off; newon=on;
176 :    
177 :     i=1;
178 :     while (i <= length(not_called_sets))
179 :     {
180 :     geneset_index=not_called_sets[i];
181 :    
182 :     ##Find the smallest and second smallest and largest and second largest values of the set
183 :     max_exp = max(geneset_data[[geneset_index]]);
184 :     min_exp = min(geneset_data[[geneset_index]]); ranks=rank(geneset_data[[geneset_index]], ties.method="first");
185 :     almost_min=geneset_data[[geneset_index]][ranks==2];
186 :     almost_max=geneset_data[[geneset_index]][ranks==(length(ranks)-1)];
187 :    
188 :     ##1b.1. If the minimum gene expression value in the set is less than 20%, ##
189 :     ##and the maximum value in the set is less than 45%, then the set is probably OFF.##
190 :     if(min_exp<0.20 && max_exp<0.45)
191 :     {
192 :     ##If the current off is less than second largest value in the set, update the off cutoff
193 :     if(newoff<almost_max) {newoff=almost_max;}
194 :     }
195 :    
196 :     ##1b.2. If the second smallest gene expression value in the set is less than 30%,##
197 :     ## and the maximum value in the set is less than 45%, then the set is probably OFF.##
198 :     if(almost_min<0.30 && max_exp<0.45)
199 :     {
200 :     ##If the current off is less than second largest value in the set, update the off cutoff
201 :     if(newoff<almost_max) {newoff=almost_max;}
202 :     }
203 :    
204 :     ##1b.3. If the second smallest gene expression value in the set is less than 30%,##
205 :     ## and the maximum value in the set is less than 45%, then the set is probably OFF.##
206 :     if(max_exp<0.40)
207 :     {
208 :     ##If the current off is less than second largest value in the set, update the off cutoff
209 :     if(newoff<almost_max) {newoff=almost_max;}
210 :     }
211 :    
212 :     ##1b.4. If the maximum gene expression value in the set is more than 80%, ##
213 :     ## and the minimum value in the set is more than 55%, then the set is probably ON.##
214 :     if(max_exp>0.80 && min_exp>0.55)
215 :     {
216 :     ##If the current off is less than max value in the set, update the off cutoff
217 :     if(newon>almost_min) {newon=almost_min;}
218 :     }
219 :    
220 :     ##1b.5. If the second largest gene expression value in the set is more than 70%, ##
221 :     ## and the minimum value in the set is more than 55%, then the set is probably ON.##
222 :     if(almost_max>0.70 && min_exp>0.55)
223 :     {
224 :     ##If the current off is less than max value in the set, update the off cutoff
225 :     if(newon>almost_min) {newon=almost_min;}
226 :     }
227 :    
228 :     ##1b.6. If the minimum gene expression value in the set is more than 60%, ##
229 :     ## then the set is probably ON.##
230 :     if(min_exp>0.60)
231 :     {
232 :     ##If the current off is less than max value in the set, update the off cutoff
233 :     if(newon>almost_min) {newon=almost_min;}
234 :     }
235 :    
236 :     i=i+1;
237 :     }##End of each set
238 :    
239 :     # Done with Steps 1a and 1b; newoff and newon contain the updated off/on limits
240 :    
241 :     # Start Step 2
242 :    
243 :     ##Each set gets one vote for where the new ON or OFF point should be##
244 :     vote_on=c();
245 :     vote_off=c();
246 :    
247 :     # Steps 2a and 2b are coded together in this for loop
248 :     i=1;
249 :     while (i <= length(not_called_sets))
250 :     {
251 :     geneset_index=not_called_sets[i];
252 :     max_exp = max(geneset_data[[geneset_index]]);
253 :     min_exp = min(geneset_data[[geneset_index]]);
254 :    
255 :     ##If the set is not inconsistent, see where the set would like the new cutpoint to be##
256 :     if (min_exp>newoff || max_exp<newon)
257 :     {
258 :     ##The set is "trying" to be ON##
259 :     if (min_exp>newoff && max_exp>newon)
260 :     {
261 :     vote_on=cbind(vote_on,min_exp);
262 :     }
263 :    
264 :     ##The set is "trying" to be OFF##
265 :     if (max_exp<newon && min_exp<newoff)
266 :     {
267 :     vote_off=cbind(vote_off,max_exp);
268 :     }
269 :    
270 :     ##Ignores sets that have all their genes in the current "gray" area##
271 :     ##Ignores sets that are already all ON or all OFF
272 :     ##Ignores sets that are inconsistent
273 :     }
274 :    
275 :     i=i+1;
276 :     }
277 :    
278 :     ##Step 2c: Tally the votes to modify the calls
279 :     ##Restrictions: the final gray area must be at least 10% wide
280 :     curr_off_cut=newoff; curr_on_cut=newon;
281 :     curr_off_votes=vote_off; curr_on_votes=vote_on;
282 :    
283 :     if (!is.null(vote_off)==TRUE && !is.null(vote_on)==TRUE)
284 :     {
285 :     # make sure each vector has at least two entries, including current cuts
286 :     curr_off_votes=c(curr_off_votes,curr_off_cut);
287 :     curr_on_votes=c(curr_on_votes,curr_on_cut);
288 :    
289 :     ##For each on vote
290 :     done=0;
291 :     while (done==0)
292 :     {
293 :     dropon=0; dropoff=0;
294 :     on_ranks=rank(curr_on_votes, ties.method="first");
295 :     min_on=min(curr_on_votes);
296 :     off_ranks=rank(curr_off_votes, ties.method="first");
297 :     max_off=max(curr_off_votes);
298 :    
299 :     ##Step 2c.1. if the lowest on vote and largest off vote are at least 10% apart, you're done##
300 :     if (min_on>(max_off+0.1))
301 :     {
302 :     curr_on_cut=curr_on_votes[on_ranks==2];
303 :     curr_off_cut=curr_off_votes[off_ranks==(length(curr_off_votes)-1)];
304 :     done=1;
305 :     break;
306 :     }
307 :     ##Step 2c.2. If the current low on vote and largest off vote are less than 10% apart, then ##
308 :     ##Get rid of the more extreme one (farther from the current cutoff)##
309 :     else
310 :     {
311 :     ##If the on vote is more extreme, get rid of it
312 :     if ((curr_on_cut-min_on)>(max_off-curr_off_cut))
313 :     {
314 :     dropon=1;
315 :     curr_on_votes[on_ranks==1]=curr_on_cut;
316 :     }
317 :     ##If the off vote is more extreme, get rid of it
318 :     else
319 :     {
320 :     dropoff=1;
321 :     curr_off_votes[off_ranks==length(curr_off_votes)]=curr_off_cut;
322 :     }
323 :     }
324 :    
325 :     # recalculate since we may have modified these
326 :     if (dropon == 1) {on_ranks=rank(curr_on_votes, ties.method="first"); min_on=min(curr_on_votes);}
327 :     if (dropoff == 1) {off_ranks=rank(curr_off_votes, ties.method="first"); max_off=max(curr_off_votes);}
328 :    
329 :     ##Step 2c.3. if the lowest on vote and largest off vote are at least 10% apart, you're done##
330 :     if (min_on>(max_off+0.1))
331 :     {
332 :     curr_on_cut=curr_on_votes[on_ranks==2];
333 :     curr_off_cut=curr_off_votes[off_ranks==(length(curr_off_votes)-1)];
334 :     done=1;
335 :     break;
336 :     }
337 :     ##Step 2c.4. If the current low on vote and largest off vote are less than 10% apart, then ##
338 :     ##Get rid of the most extreme on or off, whichever one you didn't drop in step 2##
339 :     else
340 :     {
341 :     ##If dumped OFF vote last time, this time dump ON vote
342 :     if (dropoff==1)
343 :     {
344 :     curr_on_votes[on_ranks==1]=curr_on_cut;
345 :     }
346 :     ##If dumped ON vote last time, this time dump OFF vote
347 :     else
348 :     {
349 :     curr_off_votes[off_ranks==length(curr_off_votes)]=curr_off_cut;
350 :     }
351 :     }
352 :     }##End of while votes yield cutpoints less than 10% apart.
353 :    
354 :     }##End of if !null
355 :    
356 :     return(cbind(curr_off_cut,curr_on_cut));
357 :    
358 :     }##End of find cutoffs function
359 :    
360 :     #####################################################################################################
361 :     # ALGORITHM 2
362 :     tweak_calls_by_gene = function(on_off_calls, gxp_data)
363 :     {
364 :     ###Step 1. Find the counts of on/off/gray for each gene
365 :     ### Fourth column is reserved for Step 2.
366 :     call_counts=matrix(0,nrow=dim(gxp_data)[1],ncol=4);
367 :    
368 :     for (i in 1:dim(on_off_calls)[1])
369 :     {
370 :    
371 :     temp=table(as.numeric(on_off_calls[i,]));
372 :    
373 :     if (length(temp)==3)
374 :     {
375 :     # there are on/off/gray calls
376 :     call_counts[i,]=c(temp[1],temp[2],temp[3],0);
377 :     }
378 :     else if (length(temp)==2)
379 :     {
380 :     ##No OFF calls##
381 :     if (names(temp)[1]=="0" && names(temp)[2]=="1") {call_counts[i,]=c(0,temp[1],temp[2],0);}
382 :    
383 :     ##No ON calls##
384 :     else if (names(temp)[1]=="-1" && names(temp)[2]=="0") {call_counts[i,]=c(temp[1],temp[2],0,0);}
385 :    
386 :     ##No gray calls##
387 :     else {call_counts[i,]=c(temp[1],0,temp[2],0);}
388 :     }
389 :     else # (length(temp)==1)
390 :     {
391 :     ##All OFF calls##
392 :     if (names(temp)[1]=="-1") {call_counts[i,]=c(temp,0,0,0);}
393 :    
394 :     ##all gray calls##
395 :     else if (names(temp)[1]=="0") {call_counts[i,]=c(0,temp,0,0);}
396 :    
397 :     ##All ON calls##
398 :     else {call_counts[i,]=c(0,0,temp,0);}
399 :     }
400 :     }
401 :    
402 :     print("Done with Step 1");
403 :    
404 :     ##Step 2. Flag genes that are always on/off
405 :     for (i in 1:dim(call_counts)[1])
406 :     {
407 :     ##Less than 5 off calls, gene is always on##
408 :     if (call_counts[i,1]<5) {call_counts[i,4]=1;}
409 :    
410 :     ##Less than 5 on calls, gene is always off##
411 :     else if (call_counts[i,3]<5) {call_counts[i,4]=-1;}
412 :     }
413 :    
414 :     print("Done with Step 2");
415 :    
416 :     ##Step 3. The main "tweaking" algorithm##
417 :     new_on_off_calls=matrix(0,nrow=dim(gxp_data)[1],ncol=dim(gxp_data)[2]);
418 :    
419 :     ##For each gene##
420 :     for (i in 1:dim(on_off_calls)[1])
421 :     {
422 :     ##STEP 3a. Tweak the gene calls for always on/off genes###
423 :    
424 :     ##If the gene is flagged always OFF in the call_counts file,##
425 :     if (call_counts[i,4]==-1)
426 :     {
427 :     ##For each experiment
428 :     for (j in 1:dim(on_off_calls)[2])
429 :     {
430 :     ##If the gene is called OFF, keep it that way, everything else is gray##
431 :     if (on_off_calls[i,j]==-1) {new_on_off_calls[i,j]=-1;}
432 :     }
433 :     }
434 :    
435 :     ##If the gene is flagged always ON in the call_counts file,##
436 :     else if (call_counts[i,4]==1)
437 :     {
438 :     ##For each experiment
439 :     for (j in 1:dim(on_off_calls)[2])
440 :     {
441 :     ##If the gene is called ON, keep it that way, everything else is gray##
442 :     if (on_off_calls[i,j]==1) {new_on_off_calls[i,j]=1;}
443 :     }
444 :     }
445 :    
446 :     ##Step 3b. Tweak the gene calls for genes that are not flagged as always ON/OFF##
447 :    
448 :     else
449 :     {
450 :     to_boxplot=cbind(as.numeric(on_off_calls[i,]),as.numeric(gxp_data[i,]));
451 :    
452 :     ##Step 3bi. Split the to_boxplot matrix into three vectors based on the original on, off, gray calls##
453 :     off_rows=matrix(to_boxplot[to_boxplot[,1]==-1,],ncol=2);
454 :     on_rows=matrix(to_boxplot[to_boxplot[,1]==1,],ncol=2);
455 :     gray_rows=matrix(to_boxplot[to_boxplot[,1]==0,],ncol=2);
456 :    
457 :     if (length(gray_rows) != 0)
458 :     {
459 :     ##STEP 3bii. Find the 75th percentile of the off calls, the 25th and 75th of gray and 25th of ON##
460 :     off_75=quantile(off_rows[,2],0.75);
461 :     gray_25=quantile(gray_rows[,2],0.25);
462 :     gray_75=quantile(gray_rows[,2],0.75);
463 :     on_25=quantile(on_rows[,2],0.25);
464 :    
465 :     ##Step 3biii. find the new cutpoints based on these percentiles
466 :    
467 :     if (off_75<=gray_25) {new_off=off_75;}
468 :     else {new_off=gray_25;}
469 :    
470 :     if (gray_75<=on_25) {new_on=on_25;}
471 :     else {new_on=gray_75;}
472 :    
473 :     ##Step 3biv. Use the new on/off cutoffs to recall the genes##
474 :     for (j in 1:dim(gxp_data)[2])
475 :     {
476 :     if (as.numeric(gxp_data[i,j])<new_off) {new_on_off_calls[i,j]=-1; }
477 :     else if (as.numeric(gxp_data[i,j])>new_on) {new_on_off_calls[i,j]=1;}
478 :    
479 :     }
480 :     }
481 :     }
482 :     }
483 :    
484 :     print("Done with Step 3");
485 :    
486 :     return(list(new_on_off_calls,call_counts));
487 :     }
488 :    
489 :    
490 :     #####################################################################################################
491 :     # ALGORITHM 3
492 :     # geneset is a vector with row numbers corresponding to genes,
493 :     # gxp_data is the experimental data, on_off_calls is the frozen calls
494 :     recall_genesets=function(geneset, gxp_data, on_off_calls)
495 :     {
496 :     num_experiments = dim(gxp_data)[2];
497 :     geneset_data = matrix(0,nrow=length(geneset),ncol=dim(gxp_data)[2]);
498 :     geneset_calls = matrix(0,nrow=length(geneset),ncol=dim(on_off_calls)[2]);
499 :    
500 :     # this function will modify the on/off calls for only the rows in the geneset
501 :     final_on_off_calls = on_off_calls;
502 :    
503 :     i=1;
504 :     for (k in geneset)
505 :     {
506 :     geneset_data[i,] = gxp_data[k,];
507 :     geneset_calls[i,] = on_off_calls[k,];
508 :     i=i+1;
509 :     }
510 :    
511 :     ##Step 1. Characterize each experiment based on the initial calls##
512 :    
513 :     exptcalls=matrix(0,nrow=num_experiments,ncol=1);
514 :    
515 :     ##For each experiment, look at the genecalls in the second half of the merged_data##
516 :     for (j in 1:num_experiments)
517 :     {
518 :     atleastoneon=sum(geneset_calls[,j]==1);
519 :     atleastonegray=sum(geneset_calls[,j]==0);
520 :     atleastoneoff=sum(geneset_calls[,j]==-1);
521 :    
522 :     ##1 means set is always on##
523 :     if (atleastoneon>=1 && atleastoneoff==0 && atleastonegray==0) {exptcalls[j]=1;}
524 :     ##2 means set is always off##
525 :     if (atleastoneon==0 && atleastoneoff>=1 && atleastonegray==0) {exptcalls[j]=2;}
526 :     ##3 means set is always off AND GRAY##
527 :     if (atleastoneon==0 && atleastoneoff>=1 && atleastonegray>=1) {exptcalls[j]=3;}
528 :     ##4 means set is always on AND GRAY##
529 :     if (atleastoneon>=1 && atleastoneoff==0 && atleastonegray>=1) {exptcalls[j]=4;}
530 :     ##5 means set is has at least one on and one off##
531 :     if (atleastoneon>=1 && atleastoneoff>=1) {exptcalls[j]=5;}
532 :     ##6 means set is always gray##
533 :     if (atleastoneon==0 && atleastoneoff==0 && atleastonegray>=1) {exptcalls[j]=6;}
534 :     }
535 :    
536 :     ## Step 2. Pull out the experiments that are always on/always off
537 :     ## or inconsistent (some on and some off)##
538 :     alwayson= matrix(geneset_data[,exptcalls==1],nrow=dim(geneset_data)[1]);
539 :     alwaysoff= matrix(geneset_data[,exptcalls==2],nrow=dim(geneset_data)[1]);
540 :     inconsistent= matrix(geneset_data[,exptcalls==5],nrow=dim(geneset_data)[1]);
541 :    
542 :     if (length(alwayson)==0 || length(alwaysoff)==0)
543 :     {
544 :     ## If the set doesn't have at least one experiment always on and
545 :     ## one experiment always off we're done
546 :     finalcalls=geneset_calls;
547 :     finalexptcalls=exptcalls;
548 :     }
549 :     else
550 :     {
551 :     ## Step 3. For each gene in the set, find a "robust" initial "off" or "on" call##
552 :     robuston = apply(alwayson,1,min);
553 :     robustoff = apply(alwaysoff,1,max);
554 :    
555 :     ##Step 4. Check to see if there are inconsistencies. If there are then tweak the ##
556 :     ##robust on/off calls to reduce inconsistencies##
557 :     if (length(inconsistent) != 0)
558 :     {
559 :     ##for each gene
560 :     for (i in 1:dim(geneset_data)[1])
561 :     {
562 :     ##If median on call is bigger than inconsistent on call##
563 :     if (median(as.numeric(alwayson[i,]))>max(as.numeric(inconsistent[i,])))
564 :     {
565 :     robuston[i]=max(as.numeric(inconsistent[i,]));
566 :     }
567 :     ##Else make it the median on call##
568 :     else
569 :     {
570 :     robuston[i]=median(as.numeric(alwayson[i,]));
571 :     }
572 :    
573 :     ##If median off call is smaller than inconsistent off call##
574 :     if (median(as.numeric(alwaysoff[i,]))<min(as.numeric(inconsistent[i,])))
575 :     {
576 :     robustoff[i]=min(as.numeric(inconsistent[i,]));
577 :     }
578 :     ##Else make it the median off call##
579 :     else
580 :     {
581 :     robustoff[i]=median(as.numeric(alwaysoff[i,]));
582 :     }
583 :     }
584 :     }
585 :    
586 :     ##Step 5. Recall the genes in the set using the new thresholds and then reclassify
587 :     ##the experiments
588 :     newcalls=matrix(0,nrow=dim(geneset_data)[1],ncol=num_experiments);
589 :    
590 :     ##For each gene in the set
591 :     for (i in 1:dim(newcalls)[1])
592 :     {
593 :     ##For each experiment
594 :     for (j in 1:num_experiments)
595 :     {
596 :     if (as.numeric(geneset_data[i,j])<as.numeric(robustoff[i])) {newcalls[i,j]=-1;}
597 :     else if (as.numeric(geneset_data[i,j])>as.numeric(robuston[i])) {newcalls[i,j]=1;}
598 :     }
599 :     }
600 :    
601 :     ##Step 6. Recharacterize each experiment based on the initial calls##
602 :     newexptcalls=matrix(0,nrow=dim(geneset_data)[1],ncol=1);
603 :    
604 :     for (j in 1:num_experiments)
605 :     {
606 :     atleastoneon=sum(newcalls[,j]==1);
607 :     atleastonegray=sum(newcalls[,j]==0);
608 :     atleastoneoff=sum(newcalls[,j]==-1);
609 :    
610 :     ##1 means set is always on##
611 :     if (atleastoneon>=1 && atleastoneoff==0 && atleastonegray==0) {newexptcalls[j]=1;}
612 :     ##2 means set is always off##
613 :     if (atleastoneon==0 && atleastoneoff>=1 && atleastonegray==0) {newexptcalls[j]=2;}
614 :     ##3 means set is always off AND GRAY##
615 :     if (atleastoneon==0 && atleastoneoff>=1 && atleastonegray>=1) {newexptcalls[j]=3;}
616 :     ##4 means set is always on AND GRAY##
617 :     if (atleastoneon>=1 && atleastoneoff==0 && atleastonegray>=1) {newexptcalls[j]=4;}
618 :     ##5 means set is has at least one on and one off##
619 :     if (atleastoneon>=1 && atleastoneoff>=1) {newexptcalls[j]=5;}
620 :     ##6 means set is always gray##
621 :     if (atleastoneon==0 && atleastoneoff==0 && atleastonegray>=1) {newexptcalls[j]=6;}
622 :     }
623 :    
624 :     ##Step 7. Modify the on/off cutpoints for each gene in the
625 :     ##experiment based on the off/gray and on/gray experiments##
626 :     new_off_cut=c();
627 :     new_on_cut=c();
628 :    
629 :     ##For each gene in the set
630 :     for (i in 1:dim(newcalls)[1])
631 :     {
632 :     gray=cbind(geneset_data[i,newcalls[i,]==0],newexptcalls[newcalls[i,]==0]);
633 :    
634 :     ##Vote to refine the cutoff##
635 :     offvotes=c();
636 :     onvotes=c();
637 :    
638 :     if (!is.null(gray))
639 :     {
640 :     ##for each experiment where the gene is called gray##
641 :     offvotes=matrix(gray[gray[,2]==3,1],ncol=1);
642 :     onvotes=matrix(gray[gray[,2]==4,1],ncol=1);
643 :     }
644 :    
645 :     ##IF there are no off votes, then set the off vote to current off cut
646 :     if (length(offvotes)==0) {offvotes=robustoff[i];}
647 :     if (length(onvotes)==0) {onvotes=robuston[i];}
648 :    
649 :     ##No overlap for the gene, done##
650 :     if (max(offvotes)<min(onvotes))
651 :     {
652 :     new_off_cut=rbind(new_off_cut,max(offvotes));
653 :     new_on_cut=rbind(new_on_cut,min(onvotes));
654 :     }
655 :     ##If overlap, then be conservative and set the points so no overlap
656 :     else
657 :     {
658 :     new_off_cut=rbind(new_off_cut,min(onvotes)-0.0001);
659 :     new_on_cut=rbind(new_on_cut,(max(offvotes)+0.0001));
660 :     }
661 :     }
662 :    
663 :     ##Step 8. May have introduced some on/off inconsistencies into the ALL GRAY experiments##
664 :    
665 :     nearlyfinalcalls=matrix(0,nrow=dim(geneset_data)[1],ncol=num_experiments);
666 :    
667 :     ##For each gene in the set
668 :     for (i in 1:dim(nearlyfinalcalls)[1]) {
669 :    
670 :     ##For each experiment
671 :     for (j in 1:num_experiments)
672 :     {
673 :     if (as.numeric(geneset_data[i,j])<as.numeric(new_off_cut[i,1])) {nearlyfinalcalls[i,j]=-1;}
674 :     else if (as.numeric(geneset_data[i,j])>as.numeric(new_on_cut[i,1])) {nearlyfinalcalls[i,j]=1;}
675 :     }
676 :     }
677 :    
678 :     ##Step 9. Recharacterize each experiment based on the nearly done calls##
679 :     nearlydoneexptcalls=matrix(0,nrow=num_experiments,ncol=1);
680 :    
681 :     ##For each experiment##
682 :     for (j in 1: num_experiments)
683 :     {
684 :     atleastoneon=sum(nearlyfinalcalls[,j]==1);
685 :     atleastonegray=sum(nearlyfinalcalls[,j]==0);
686 :     atleastoneoff=sum(nearlyfinalcalls[,j]==-1);
687 :    
688 :     ##1 means set is always on##
689 :     if (atleastoneon>=1 && atleastoneoff==0 && atleastonegray==0) {nearlydoneexptcalls[j]=1;}
690 :     ##2 means set is always off##
691 :     if (atleastoneon==0 && atleastoneoff>=1 && atleastonegray==0) {nearlydoneexptcalls[j]=2;}
692 :     ##3 means set is always off AND GRAY##
693 :     if (atleastoneon==0 && atleastoneoff>=1 && atleastonegray>=1) {nearlydoneexptcalls[j]=3;}
694 :     ##4 means set is always on AND GRAY##
695 :     if (atleastoneon>=1 && atleastoneoff==0 && atleastonegray>=1) {nearlydoneexptcalls[j]=4;}
696 :     ##5 means set is has at least one on and one off##
697 :     if (atleastoneon>=1 && atleastoneoff>=1) {nearlydoneexptcalls[j]=5;}
698 :     ##6 means set is always gray##
699 :     if (atleastoneon==0 && atleastoneoff==0 && atleastonegray>=1) {nearlydoneexptcalls[j]=6;}
700 :     }
701 :    
702 :     ##Step 10. Modify the on/off cutpoints for each gene based on the introduced inconsistencies##
703 :     final_off_cut=new_off_cut;
704 :     final_on_cut=new_on_cut;
705 :    
706 :     ##for each experiment where the gene is called gray##
707 :     for (k in 1:length(nearlydoneexptcalls))
708 :     {
709 :     ##Inconsistent case##
710 :     if (nearlydoneexptcalls[k]==5)
711 :     {
712 :     ##Check to make sure the case is still inconsistent since cutpoints are being changed in this loop
713 :     inconsistent=1;
714 :     thereisoff=any(final_off_cut[,1]>as.numeric(geneset_data[,k]));
715 :     thereison=any(final_on_cut[,1]<as.numeric(geneset_data[,k]));
716 :     count=thereisoff+thereison;
717 :    
718 :     ##There is still inconsistency
719 :     if (count==2)
720 :     {
721 :     ##compile the on, off intensities for the genes in the set##
722 :     ##Actually compiling the distance of the on/off intensities from the current on/off cutpoint
723 :     on_dist=matrix(geneset_data[nearlyfinalcalls[,k]==1,k]-final_on_cut[nearlyfinalcalls[,k]==1,1],ncol=1);
724 :     off_dist=matrix(final_off_cut[nearlyfinalcalls[,k]==-1,1]-geneset_data[nearlyfinalcalls[,k]==-1,k],ncol=1);
725 :    
726 :     ##Now, for each inconsistent experiment I have the distances of the genes to the current cuts##
727 :    
728 :     ##If changing the off cuts are easier than changing the on cuts, change them##
729 :     if (max(off_dist)<max(on_dist))
730 :     {
731 :     ##For each gene
732 :     for (i in 1:dim(nearlyfinalcalls)[1])
733 :     {
734 :     ##If off, change off cut appropriately##
735 :     if (nearlyfinalcalls[i,k]==-1) {final_off_cut[i,1]=as.numeric(geneset_data[i,k])-0.0001;}
736 :     }
737 :     }
738 :     ##If changing the on cuts are easier than changing the off cuts, change them##
739 :     else
740 :     {
741 :     ##For each gene
742 :     for (i in 1:dim(nearlyfinalcalls)[1])
743 :     {
744 :     ##If on, change on cut appropriately##
745 :     if (nearlyfinalcalls[i,k]==1) {final_on_cut[i,1]=as.numeric(geneset_data[i,k])+0.0001;}
746 :     }
747 :     }
748 :     }##End of there is still inconsistency
749 :     }
750 :     }
751 :    
752 :     ##Step 11. Do a final call on the genes in the set using the new thresholds and then reclassify
753 :     ##the experiments
754 :     finalcalls=matrix(0,nrow=dim(geneset_data)[1],ncol=num_experiments);
755 :    
756 :     ##For each gene in the set
757 :     for (i in 1:dim(finalcalls)[1])
758 :     {
759 :     ##For each experiment
760 :     for (j in 1:num_experiments)
761 :     {
762 :     if (as.numeric(geneset_data[i,j])<as.numeric(final_off_cut[i,1])) {finalcalls[i,j]=-1;}
763 :     else if (as.numeric(geneset_data[i,j])>as.numeric(final_on_cut[i,1])) {finalcalls[i,j]=1;}
764 :     }
765 :     }
766 :    
767 :     ##Step 12. Recharacterize each experiment based on the initial calls##
768 :    
769 :     finalexptcalls=matrix(0,nrow=num_experiments,ncol=1);
770 :    
771 :     ##For each experiment##
772 :     for (j in 1: num_experiments)
773 :     {
774 :     atleastoneon=sum(finalcalls[,j]==1);
775 :     atleastonegray=sum(finalcalls[,j]==0);
776 :     atleastoneoff=sum(finalcalls[,j]==-1);
777 :    
778 :     ##1 means set is always on##
779 :     if (atleastoneon>=1 && atleastoneoff==0 && atleastonegray==0) {finalexptcalls[j]=1;}
780 :     ##2 means set is always off##
781 :     if (atleastoneon==0 && atleastoneoff>=1 && atleastonegray==0) {finalexptcalls[j]=2;}
782 :     ##3 means set is always off AND GRAY##
783 :     if (atleastoneon==0 && atleastoneoff>=1 && atleastonegray>=1) {finalexptcalls[j]=3;}
784 :     ##4 means set is always on AND GRAY##
785 :     if (atleastoneon>=1 && atleastoneoff==0 && atleastonegray>=1) {finalexptcalls[j]=4;}
786 :     ##5 means set is has at least one on and one off##
787 :     if (atleastoneon>=1 && atleastoneoff>=1) {finalexptcalls[j]=5;}
788 :     ##6 means set is always gray##
789 :     if (atleastoneon==0 && atleastoneoff==0 && atleastonegray>=1) {finalexptcalls[j]=6;}
790 :     }
791 :    
792 :     }##End of there was at least one ON and one OFF experiment to start with
793 :    
794 :     # update the final on/off calls for this geneset
795 :     i=1;
796 :     for (k in geneset)
797 :     {
798 :     final_on_off_calls[k,] = finalcalls[i,];
799 :     i=i+1;
800 :     }
801 :    
802 :     output=list(rbind(t(t(finalcalls)),t(finalexptcalls)),final_on_off_calls);
803 :    
804 :     return(output);
805 :    
806 :     }
807 :     #########END OF FUNCTION
808 :    
809 :     ################################################################################################################
810 :     ############################ MAIN BODY #####################################
811 :     ################################################################################################################
812 :    
813 :     args = commandArgs(TRUE);
814 :     if (length(args) != 2) { print("usage: Rscript Pipeline.R merged_gene_sets pearson_cutoff"); q() }
815 :     merged_gene_sets_file = args[1];
816 :     splitting_cutoff = as.numeric(args[2]);
817 :    
818 :     ## Read in gene expression data ##
819 :     ## This file is the output of 'exprs' from Bioconductor's RMA procedure
820 :     ## Header line contains experiment names
821 :     ## row name is gene id, rest of the columns are normalized expression values per experiment
822 :     all_geneexp=as.matrix(read.table("raw_data.tab",header=TRUE));
823 :     num_genes = dim(all_geneexp)[1];
824 :     num_experiments = dim(all_geneexp)[2];
825 :    
826 :     low_variability_genes = find_low_variability_genes(all_geneexp);
827 :    
828 :     ## Ross_gene_sets_merged.txt contains the gene set information ##
829 :     ## Each line is a comma separated value containing gene ids corresponding to the first column in the raw data ##
830 :     ## Need to convert these to row numbers in the raw data
831 :     input_genesets = readLines(merged_gene_sets_file);
832 :     input_genesets = strsplit(input_genesets, ",");
833 :     merged_genesets = list();
834 :    
835 :     for (i in 1:length(input_genesets))
836 :     {
837 :     input_geneset = input_genesets[[i]];
838 :     merged_geneset = c();
839 :     for (j in 1:length(input_geneset))
840 :     {
841 :     # check if the peg is in the raw_data
842 :     row_num = which(rownames(all_geneexp)==input_geneset[j]);
843 :    
844 :     if (length(row_num) > 0) {
845 :     merged_geneset = cbind(merged_geneset, row_num);
846 :     }
847 :     else {
848 :     print(c("Couldn't find row for", input_geneset[j]), quote=FALSE);
849 :     }
850 :     }
851 :    
852 :     if (length(merged_geneset) > 1) {
853 :     merged_genesets[[length(merged_genesets)+1]] = merged_geneset;
854 :     }
855 :     }
856 :    
857 :     # Now split all the merged_genesets on pearson correlations
858 :     split_genesets=list();
859 :    
860 :     for (i in 1:length(merged_genesets))
861 :     {
862 :     split_genesets = append(split_genesets,split_geneset(merged_genesets[[i]], all_geneexp, splitting_cutoff));
863 :     }
864 :    
865 :     print("Starting Algorithm 1");
866 :    
867 :     per_cutoffs=matrix(0,nrow=num_experiments,ncol=2);
868 :    
869 :     # for each experiment
870 :     for (i in 1:num_experiments)
871 :     {
872 :     per_cutoffs[i,]=find_cutoffs(split_genesets, all_geneexp[,i], low_variability_genes);
873 :     }
874 :    
875 :     ##convert percentiles to intensities##
876 :     cutoffs=matrix(0,nrow=num_experiments,ncol=2);
877 :     for (i in 1:num_experiments)
878 :     {
879 :     cutoffs[i,c(1,2)]=quantile(all_geneexp[,i],probs=c(per_cutoffs[i,1],per_cutoffs[i,2]));
880 :     }
881 :    
882 :     initial_calls=matrix(0,nrow=num_genes,ncol=num_experiments);
883 :    
884 :     for (i in 1:num_genes)
885 :     {
886 :     for (j in 1:num_experiments)
887 :     {
888 :     if (all_geneexp[i,j] < cutoffs[j,1]) { initial_calls[i,j] = -1; }
889 :     if (all_geneexp[i,j] > cutoffs[j,2]) { initial_calls[i,j] = 1; }
890 :     }
891 :     }
892 :    
893 :     print("Starting Algorithm 2");
894 :    
895 :     temp=tweak_calls_by_gene(initial_calls, all_geneexp);
896 :     tweaked_calls = temp[[1]];
897 :     alg2_flagged_genes = cbind(rownames(all_geneexp),temp[[2]][,4]);
898 :    
899 :     # gray out low variability genes
900 :     tweaked_calls[which(low_variability_genes != 0),]=0;
901 :    
902 :     print("Starting Algorithm 3");
903 :    
904 :     finalgenecalls=c();
905 :     finalexptcalls=c();
906 :     final_on_off_calls = tweaked_calls;
907 :    
908 :     for (i in 1:length(split_genesets))
909 :     {
910 :     rgout=recall_genesets(split_genesets[[i]], all_geneexp, final_on_off_calls);
911 :     temp = rgout[[1]];
912 :     finalgenecalls=rbind(finalgenecalls, temp[1:dim(temp)[1]-1,]);
913 :     finalexptcalls=rbind(finalexptcalls, temp[dim(temp)[1],]);
914 :     final_on_off_calls = rgout[[2]];
915 :     }
916 :    
917 :     write.table(alg2_flagged_genes, "flagged_genes.txt", quote=FALSE, sep="\t", col.names=FALSE,row.names=FALSE);
918 :     colnames(final_on_off_calls)=colnames(all_geneexp);
919 :     rownames(final_on_off_calls)=rownames(all_geneexp);
920 :     write.table(final_on_off_calls,"final_on_off_calls.txt", quote=FALSE, sep="\t", col.names=TRUE,row.names=TRUE);
921 :    
922 :    
923 :     ############################################ QUANTIFY IMPROVEMENT OVER THREE ALGORITHMS ########################
924 :    
925 :     # you are in the workspace after running the pipeline
926 :    
927 :     # strip flagged genes from the genesets
928 :     stripped_genesets = list();
929 :    
930 :     for (i in 1:length(split_genesets))
931 :     {
932 :     geneset = split_genesets[[i]];
933 :     stripped_geneset = c();
934 :    
935 :     for (k in geneset)
936 :     {
937 :     # comment out next line if you don't want to strip flagged genes
938 :     #if (alg2_flagged_genes[k,2]==0)
939 :     {stripped_geneset = cbind(stripped_geneset,k)}
940 :     }
941 :    
942 :     if (length(stripped_geneset) > 1)
943 :     {
944 :     stripped_genesets[[length(stripped_genesets)+1]]=stripped_geneset;
945 :     }
946 :     }
947 :    
948 :     final_table = matrix(0,nrow=6,ncol=3);
949 :     colnames(final_table)=c("Alg1","Alg2", "Alg3");
950 :    
951 :     print("Assessing consistency of Alg1 cutoffs");
952 :    
953 :     table2=matrix(0,nrow=length(stripped_genesets),ncol=num_experiments);
954 :     exptcalls = matrix(0,nrow=(length(stripped_genesets)*num_experiments), ncol=1);
955 :     counter = 1;
956 :    
957 :     for (i in 1:length(stripped_genesets))
958 :     {
959 :     geneset = stripped_genesets[[i]];
960 :     geneset_calls = matrix(0,nrow=length(geneset),ncol=num_experiments);
961 :     j=1;
962 :    
963 :     for (k in geneset)
964 :     {
965 :     geneset_calls[j,] = initial_calls[k,];
966 :     j=j+1;
967 :     }
968 :    
969 :     for (j in 1:num_experiments)
970 :     {
971 :     atleastoneon=sum(geneset_calls[,j]==1);
972 :     atleastonegray=sum(geneset_calls[,j]==0);
973 :     atleastoneoff=sum(geneset_calls[,j]==-1);
974 :    
975 :     ##1 means all genes in set are on for this experiment##
976 :     if (atleastoneon>=1 && atleastoneoff==0 && atleastonegray==0) {exptcalls[counter]=1;}
977 :     ##2 means all genes in set are off for this experiment##
978 :     if (atleastoneon==0 && atleastoneoff>=1 && atleastonegray==0) {exptcalls[counter]=2;}
979 :     ##3 means all genes in set are either off or gray for this experiment##
980 :     if (atleastoneon==0 && atleastoneoff>=1 && atleastonegray>=1) {exptcalls[counter]=3;}
981 :     ##4 means all genes in set are either on or gray for this experiment##
982 :     if (atleastoneon>=1 && atleastoneoff==0 && atleastonegray>=1) {exptcalls[counter]=4;}
983 :     ##5 means at least one gene in set is off and at least one gene is on for this experiment##
984 :     if (atleastoneon>=1 && atleastoneoff>=1) {exptcalls[counter]=5;}
985 :     ##6 means all genes in set are gray for this experiment##
986 :     if (atleastoneon==0 && atleastoneoff==0 && atleastonegray>=1) {exptcalls[counter]=6;}
987 :    
988 :     table2[i,j] = exptcalls[counter];
989 :     counter = counter + 1;
990 :     }
991 :     }
992 :    
993 :    
994 :     for (i in 1:6) {
995 :     final_table[i,1]=sum(exptcalls==i);
996 :     }
997 :    
998 :     print("Assessing consistency of Alg2 cutoffs");
999 :    
1000 :     table3=matrix(0,nrow=length(stripped_genesets),ncol=num_experiments);
1001 :     exptcalls = matrix(0,nrow=(length(stripped_genesets)*num_experiments), ncol=1);
1002 :     counter = 1;
1003 :    
1004 :     for (i in 1:length(stripped_genesets))
1005 :     {
1006 :     geneset = stripped_genesets[[i]];
1007 :     geneset_calls = matrix(0,nrow=length(geneset),ncol=num_experiments);
1008 :     j=1;
1009 :    
1010 :     for (k in geneset)
1011 :     {
1012 :     geneset_calls[j,] = tweaked_calls[k,];
1013 :     j=j+1;
1014 :     }
1015 :    
1016 :     for (j in 1:num_experiments)
1017 :     {
1018 :     atleastoneon=sum(geneset_calls[,j]==1);
1019 :     atleastonegray=sum(geneset_calls[,j]==0);
1020 :     atleastoneoff=sum(geneset_calls[,j]==-1);
1021 :    
1022 :     ##1 means all genes in set are on for this experiment##
1023 :     if (atleastoneon>=1 && atleastoneoff==0 && atleastonegray==0) {exptcalls[counter]=1;}
1024 :     ##2 means all genes in set are off for this experiment##
1025 :     if (atleastoneon==0 && atleastoneoff>=1 && atleastonegray==0) {exptcalls[counter]=2;}
1026 :     ##3 means all genes in set are either off or gray for this experiment##
1027 :     if (atleastoneon==0 && atleastoneoff>=1 && atleastonegray>=1) {exptcalls[counter]=3;}
1028 :     ##4 means all genes in set are either on or gray for this experiment##
1029 :     if (atleastoneon>=1 && atleastoneoff==0 && atleastonegray>=1) {exptcalls[counter]=4;}
1030 :     ##5 means at least one gene in set is off and at least one gene is on for this experiment##
1031 :     if (atleastoneon>=1 && atleastoneoff>=1) {exptcalls[counter]=5;}
1032 :     ##6 means all genes in set are gray for this experiment##
1033 :     if (atleastoneon==0 && atleastoneoff==0 && atleastonegray>=1) {exptcalls[counter]=6;}
1034 :    
1035 :     table3[i,j] = exptcalls[counter];
1036 :     counter = counter + 1;
1037 :     }
1038 :     }
1039 :    
1040 :    
1041 :     for (i in 1:6) {
1042 :     final_table[i,2]=sum(exptcalls==i);
1043 :     }
1044 :    
1045 :     print("Assessing consistency of Alg3 cutoffs");
1046 :    
1047 :     table4=matrix(0,nrow=length(stripped_genesets),ncol=num_experiments);
1048 :     exptcalls = matrix(0,nrow=(length(stripped_genesets)*num_experiments), ncol=1);
1049 :     counter = 1;
1050 :    
1051 :     for (i in 1:length(stripped_genesets))
1052 :     {
1053 :     geneset = stripped_genesets[[i]];
1054 :     geneset_calls = matrix(0,nrow=length(geneset),ncol=num_experiments);
1055 :     j=1;
1056 :    
1057 :     for (k in geneset)
1058 :     {
1059 :     geneset_calls[j,] = final_on_off_calls[k,];
1060 :     j=j+1;
1061 :     }
1062 :    
1063 :     for (j in 1:num_experiments)
1064 :     {
1065 :     atleastoneon=sum(geneset_calls[,j]==1);
1066 :     atleastonegray=sum(geneset_calls[,j]==0);
1067 :     atleastoneoff=sum(geneset_calls[,j]==-1);
1068 :    
1069 :     ##1 means all genes in set are on for this experiment##
1070 :     if (atleastoneon>=1 && atleastoneoff==0 && atleastonegray==0) {exptcalls[counter]=1;}
1071 :     ##2 means all genes in set are off for this experiment##
1072 :     if (atleastoneon==0 && atleastoneoff>=1 && atleastonegray==0) {exptcalls[counter]=2;}
1073 :     ##3 means all genes in set are either off or gray for this experiment##
1074 :     if (atleastoneon==0 && atleastoneoff>=1 && atleastonegray>=1) {exptcalls[counter]=3;}
1075 :     ##4 means all genes in set are either on or gray for this experiment##
1076 :     if (atleastoneon>=1 && atleastoneoff==0 && atleastonegray>=1) {exptcalls[counter]=4;}
1077 :     ##5 means at least one gene in set is off and at least one gene is on for this experiment##
1078 :     if (atleastoneon>=1 && atleastoneoff>=1) {exptcalls[counter]=5;}
1079 :     ##6 means all genes in set are gray for this experiment##
1080 :     if (atleastoneon==0 && atleastoneoff==0 && atleastonegray>=1) {exptcalls[counter]=6;}
1081 :    
1082 :     table4[i,j] = exptcalls[counter];
1083 :     counter = counter + 1;
1084 :     }
1085 :     }
1086 :    
1087 :    
1088 :     for (i in 1:6) {
1089 :     final_table[i,3]=sum(exptcalls==i);
1090 :     }
1091 :    
1092 :     rownames(final_table)=c("all on", "all off", "off or gray", "on or gray", "inconsistent", "all gray");
1093 :     write.table(final_table, "final_quality.txt", col.names=TRUE, row.names=TRUE, sep="\t", quote=FALSE);

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3