[Bio] / FigKernelScripts / RunRMA_SIF_format.R Repository:
ViewVC logotype

Annotation of /FigKernelScripts/RunRMA_SIF_format.R

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (view) (download)

1 : olson 1.1 args = commandArgs(TRUE);
2 :     if (length(args) != 3) { print("usage: Rscript RunRMA_SIF_format.R cdf_pkg experiment_dir output_dir"); q() }
3 :     force_chip_descriptor=args[1]
4 :     experiments_dir = args[2];
5 :     output_dir = args[3];
6 :    
7 :     library(affy)
8 :     library(affyio)
9 :     library(preprocessCore)
10 :    
11 :     affy_data = AllButCelsForReadAffy(celfile.path = experiments_dir)
12 :    
13 :     if (length(affy_data$filenames) == 0)
14 :     {
15 :     print("No experiments were found");
16 :     q();
17 :     }
18 :    
19 :     cdfs = unique(lapply(affy_data$filenames, whatcdf));
20 :     if (length(cdfs) > 1)
21 :     {
22 :     print("More than one CDF type appeared in the data");
23 :     q();
24 :     }
25 :     chip_descriptor = cdfs[[1]];
26 :     print(c("Using chip description", chip_descriptor));
27 :    
28 :     # read CDF environment
29 :     cdf_library = cleancdfname(force_chip_descriptor)
30 :     library(cdf_library, character.only=TRUE)
31 :     #require(cdf_library, character.only=TRUE, lib.loc = ".Library")
32 :    
33 :     # OLD code
34 :     # #
35 :     # # Read the Data and perform RMA
36 :     # eset = justRMA(celfile.path=experiments_dir, cdfname="newcdfenv", compress=TRUE, verbose=TRUE)
37 :     # geneexp=exprs(eset)
38 :    
39 :     # # remove the no_match and write the table
40 :     # geneexp=geneexp[rownames(geneexp)!='no_match',]
41 :     # write.table(geneexp, file="rma_normalized.tab", row.names=TRUE, col.names=TRUE, sep="\t", quote=FALSE)
42 :    
43 :     #
44 :     # NEW code. Use the results of the affy read above to get the filename list and other chunks of information
45 :     # that the various stages need.
46 :     #
47 :    
48 :     filenames = affy_data$filenames;
49 :     phenoData = affy_data$phenoData;
50 :    
51 :     checkValidFilenames(filenames)
52 :    
53 :     n <- length(filenames)
54 :    
55 :     pdata <- pData(phenoData)
56 :    
57 :     #
58 :     # Much of the below is lifted from the R code in justrma.R from the affy package.
59 :     #
60 :    
61 :     ##try to read sample names form phenoData. if not there use CEL filenames
62 :    
63 :     #
64 :     # #f empty pdata filename are samplenames
65 :     #
66 :    
67 :     if (dim(pdata)[1] != n)
68 :     {
69 :     warning("Incompatible phenoData object. Created a new one.\n");
70 :    
71 :     samplenames <- gsub("^/?([^/]*/)*", "", unlist(filenames));
72 :     pdata <- data.frame(sample=1:n,row.names=samplenames);
73 :     phenoData <- new("AnnotatedDataFrame",
74 :     data=pdata,
75 :     varMetadata=data.frame(
76 :     labelDescription="arbitrary numbering",
77 :     row.names="sample"));
78 :     } else {
79 :     samplenames <- rownames(pdata);
80 :     }
81 :    
82 :     write.table(samplenames, file=paste(output_dir, "experiment.names", sep="/"),
83 :     row.names = TRUE, col.names = FALSE, sep= "\t", quote = FALSE);
84 :    
85 :     description = affy_data$description;
86 :     if (is.null(description))
87 :     {
88 :     description <- new("MIAME");
89 :     description@preprocessing$filenames <- filenames;
90 :     description@preprocessing$affyversion <- library(help=affy)$info[[2]][[2]][2];
91 :     }
92 :    
93 :     #
94 :     # We're using the CDF that we constructed above.
95 :     #
96 :    
97 :     headdetails <- read.celfile.header(filenames[[1]]);
98 :     cdfname = force_chip_descriptor;
99 :    
100 :     if(is.null(cdfname))
101 :     {
102 :     cdfname <- headdetails[[1]];
103 :     }
104 :    
105 :     scandates <-
106 :     sapply(seq_len(length(filenames)), function(i) {
107 :     sdate <- read.celfile.header(filenames[i], info = "full")[["ScanDate"]];
108 :     if (is.null(sdate) || length(sdate) == 0) NA_character_ else sdate;
109 :     });
110 :     protocol <-
111 :     new("AnnotatedDataFrame",
112 :     data=data.frame("ScanDate"=scandates, row.names = sampleNames(phenoData),
113 :     stringsAsFactors=FALSE),
114 :     dimLabels=c("sampleNames", "sampleColumns"));
115 :     tmp <- new("AffyBatch",
116 :     cdfName=cdfname,
117 :     annotation=cleancdfname(cdfname, addcdf=FALSE));
118 :     pmIndex <- pmindex(tmp);
119 :     probenames <- rep(names(pmIndex), unlist(lapply(pmIndex,length)));
120 :     pNList <- split(0:(length(probenames) -1), probenames);
121 :     print("foo");
122 :    
123 :     chip_tmp <- new("AffyBatch",
124 :     cdfName=cdf_library,
125 :     # cdfName=chip_descriptor,
126 :     annotation=cleancdfname(chip_descriptor, addcdf=FALSE));
127 :     chip_pmIndex <- pmindex(chip_tmp);
128 :     print("foo2");
129 :    
130 :     chip_probenames <- rep(names(chip_pmIndex), unlist(lapply(chip_pmIndex,length)));
131 :     print("foo3");
132 :    
133 :     outer_index = 1
134 :     pfile = file(paste(output_dir, "probe.info", sep="/"), "w");
135 :     for (pname in (names(chip_pmIndex)))
136 :     {
137 :     for (idx in chip_pmIndex[[pname]])
138 :     {
139 :     xy = indices2xy(idx, cdf=cdf_library);
140 :    
141 :     cat(outer_index, "\t", pname, "\t", idx, "\t", xy[[1]], "\t", xy[[2]], "\n", sep="", file = pfile);
142 :     outer_index = outer_index + 1;
143 :     }
144 :     }
145 :     close(pfile);
146 :    
147 :     write.table(chip_probenames, file=paste(output_dir, "probe.names", sep="/"),
148 :     row.names = TRUE, col.names=FALSE, sep="\t", quote=FALSE);
149 :    
150 :     ## read pm data into matrix
151 :    
152 :     probeintensities <- read.probematrix(filenames=filenames, cdfname = cdfname);
153 :    
154 :     #
155 :     # This is the initial data. Write to output file.
156 :     #
157 :     write.table(probeintensities$pm, file=paste(output_dir, "probes.raw", sep="/"),row.names = TRUE, col.names = TRUE, sep="\t", quote=FALSE);
158 :    
159 :     ##pass matrix of pm values to rma
160 :    
161 :     ngenes <- length(geneNames(tmp));
162 :    
163 :     #
164 :     # Default parameters from justrma.R
165 :     #
166 :     background = TRUE;
167 :     bgversion = 2;
168 :     normalize = TRUE
169 :     verbose = TRUE
170 :     notes = ""
171 :    
172 :     #
173 :     # Do bg correction first
174 :     #
175 :    
176 :     #
177 :     # The conditional here is for sanity checks; if you don't take this branch
178 :     # the justRMA code is allowed to do the background correction.
179 :     #
180 :    
181 :     if (1)
182 :     {
183 :     rma.background.correct(probeintensities$pm, FALSE)
184 :    
185 :     #
186 :     # Write the background corrected data.
187 :     #
188 :     write.table(probeintensities$pm, file=paste(output_dir, "probes.bg_corrected", sep="/"),row.names = TRUE, col.names = TRUE, sep="\t", quote=FALSE);
189 :    
190 :     background = FALSE;
191 :     }
192 :    
193 :     #
194 :     # Same story here but for the normalization.
195 :     #
196 :     if (1)
197 :     {
198 :     x = .Call("R_qnorm_c",probeintensities$pm, FALSE, PACKAGE="preprocessCore");
199 :     write.table(probeintensities$pm, file=paste(output_dir, "probes.normalized", sep="/"),row.names = TRUE, col.names = TRUE, sep="\t", quote=FALSE);
200 :     rm(x)
201 :     normalize = FALSE;
202 :     }
203 :    
204 :     #exprs <- .Call("rma_c_complete",new_intensities, pNList, ngenes, normalize, background, bgversion, verbose, PACKAGE="affy");
205 :     exprs <- .Call("rma_c_complete",probeintensities$pm, pNList, ngenes, normalize, background, bgversion, verbose, PACKAGE="affy");
206 :    
207 :     colnames(exprs) <- samplenames;
208 :     se.exprs <- array(NA, dim(exprs),
209 :     dimnames=list(rownames(exprs), colnames(exprs)));
210 :    
211 :     # Read the sif2peg file and extract the geneexp rows that match, substituting peg ids
212 :     #sif2peg = read.table(paste(output_dir, "sif2peg", sep="/"), sep="\t")
213 :     sif2peg = read.table(paste(output_dir, "sif2peg", sep="/"), sep="\t", colClasses="character")
214 :    
215 :    
216 :     geneexp_rn = merge(exprs,sif2peg,by.x=0,by.y=1)
217 :     geneexp = geneexp_rn[,2:(dim(geneexp_rn)[2]-1)]
218 :     row.names(geneexp)=geneexp_rn[,dim(geneexp_rn)[2]]
219 :    
220 :     write.table(geneexp, file=paste(output_dir, "rma_normalized.tab", sep = "/"), row.names = TRUE, col.names = TRUE, sep="\t", quote=FALSE);
221 :    
222 :     # write the probepeg.info using matching pegs where possible
223 :     outer_index = 1
224 :     pfile = file(paste(output_dir, "probepeg.info", sep="/"), "w");
225 :     p2file = file(paste(output_dir, "peg.probe.table", sep="/"), "w");
226 :     for (pname in (names(pmIndex)))
227 :     {
228 :     # changed next three lines
229 :     peg_row = which(sif2peg[,1]==pname);
230 :     if (length(peg_row)==1) {
231 :     peg = sif2peg[peg_row,2];
232 :     for (idx in pmIndex[[pname]])
233 :     {
234 :     xy = indices2xy(idx, cdf=cdf_library);
235 :     cat(outer_index, "\t", peg, "\t", idx, "\t", xy[[1]], "\t", xy[[2]], "\n", sep="", file = pfile);
236 :     cat(peg, "\t", xy[[1]], "_", xy[[2]], "\n", sep="", file=p2file);
237 :     outer_index = outer_index + 1;
238 :     }
239 :     }
240 :     }
241 :     close(pfile);
242 :     close(p2file);

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3