5 ##' Normalize using MvA loess using multiple processors
7 ##' For analyses where the number of samples makes all-against-all
8 ##' normalization prohibitive, this variant subselects a set of
9 ##' comparisons and runs them, iterating a specific number of times
10 ##' using as many cores as possible.
11 ##' @title loess_normalization
12 ##' @param expressions Gene expression values with probes in rows and samples in columns
13 ##' @param iterations Number of iterations to run (Default: 2)
14 ##' @param small.positive Number to replace negative and zero expressions with (Default: 0.1).
15 ##' @param sample.size Number of combinations of samples to run (Default: 200).
16 ##' @param num.cores Number of cores to use (Default: half of them)
17 ##' @param noramlization.method Normalization method (Default: mean).
18 ##' @return data.frame of normalized expression values
19 ##' @author Don Armstrong
20 loess_normalization <- function(expressions,iterations=2,small.positive=0.1,sample.size=200,num.cores=max(floor(detectCores()/2),1),normalization.method="mean"){
21 n <- ncol(expressions)
22 ## replace NA with the mean
23 expressions.na <- is.na(expressions)
24 expressions.na.which <- which(expressions.na,arr.ind=TRUE)
25 if (nrow(expressions.na.which)>0) {
26 if (normalization.method=="mean") {
27 expressions[unique(expressions.na.which[,"row"]),] <-
28 apply(expressions[unique(expressions.na.which[,"row"]),],
32 x[] <- small.positive;
34 x[is.na(x)] <- mean(x,na.rm=TRUE);
37 } else if (normalization.method=="knn") {
38 expressions <- impute.knn(expressions)$data
40 stop("Unknown normalization.method; currently supported methods are 'mean' and 'knn'")
44 pb <- txtProgressBar(0,iterations,style=3)
45 for (q in 1:iterations) {
47 combn(ncol(expressions),2)
48 expressions.comb.sample <- NULL
49 if (sample.size >= ncol(expressions.comb)) {
50 expressions.comb.sample <-
53 expressions.comb.sample <-
54 expressions.comb[,sample.int(n=ncol(expressions.comb),
58 ### calculate the n for each sample
59 expressions.comb.sample.n <-
60 apply(sapply(1:ncol(expressions),
61 `==`,expressions.comb.sample),
63 result.expressions <- array(1,dim=dim(expressions))
65 ## do this in batches of no more than 200 to avoid making a list
66 ## which is way too large to fit in memory
67 while (m <= ncol(expressions.comb.sample)) {
69 expressions.comb.sample[,m:min(m+200,ncol(expressions.comb.sample))]
71 mclapply(1:ncol(e.c.subsample),
73 i <- e.c.subsample[1,k]
74 j <- e.c.subsample[2,k]
75 mva <- data.frame(m=log2(expressions[,i]/expressions[,j]),
76 a=0.5*log2(expressions[,i]*expressions[,j]))
78 loess(m~a,mva,control=loess.control(surface="interpolate",
79 statistics="approximate",
80 trace.hat="approximate",
82 ## predict for i, predict for j (\hat{M})
83 ## subtract out \hat{M} from M
84 m.hat <- predict(temp.loess,mva[,"a"])
85 return(data.frame(i=2^(-0.5*m.hat/expressions.comb.sample.n[i]),
86 j=2^(0.5*m.hat/expressions.comb.sample.n[j])))
90 for (k in 1:ncol(e.c.subsample)) {
91 result.expressions[,e.c.subsample[1,k]] <-
92 result.expressions[,e.c.subsample[1,k]] *
93 bar.m.hat.list[[k]][,"i"]
94 result.expressions[,e.c.subsample[2,k]] <-
95 result.expressions[,e.c.subsample[2,k]] *
96 bar.m.hat.list[[k]][,"j"]
98 m <- min(m+200,ncol(expressions.comb.sample))+1
99 setTxtProgressBar(pb,(q-1)+m/(ncol(expressions.comb.sample)+1))
101 setTxtProgressBar(pb,q)
102 ## correct expressionss
103 expressions <- expressions*result.expressions