--- /dev/null
+Package: loessNormalization
+Type: Package
+Title: Loess normalization and plotting routines
+Version: 0.1
+Date: 2016-06-14
+Author: Don Armstrong
+Maintainer: Don Armstrong <don@donarmstrong.com>
+Description: Loess normalization and plotting routines
+License: GPL 3+
--- /dev/null
+exportPattern("^[[:alpha:]]+")
--- /dev/null
+library("stats")
+library("parallel")
+library("impute")
+
+##' Normalize using MvA loess using multiple processors
+##'
+##' For analyses where the number of samples makes all-against-all
+##' normalization prohibitive, this variant subselects a set of
+##' comparisons and runs them, iterating a specific number of times
+##' using as many cores as possible.
+##' @title loess_normalization
+##' @param expressions Gene expression values with probes in rows and samples in columns
+##' @param iterations Number of iterations to run (Default: 2)
+##' @param small.positive Number to replace negative and zero expressions with (Default: 0.1).
+##' @param sample.size Number of combinations of samples to run (Default: 200).
+##' @param num.cores Number of cores to use (Default: half of them)
+##' @param noramlization.method Normalization method (Default: mean).
+##' @return data.frame of normalized expression values
+##' @author Don Armstrong
+loess_normalization <- function(expressions,iterations=2,small.positive=0.1,sample.size=200,num.cores=max(floor(detectCores()/2),1),normalization.method="mean"){
+ n <- ncol(expressions)
+ ## replace NA with the mean
+ expressions.na <- is.na(expressions)
+ expressions.na.which <- which(expressions.na,arr.ind=TRUE)
+ if (nrow(expressions.na.which)>0) {
+ if (normalization.method=="mean") {
+ expressions[unique(expressions.na.which[,"row"]),] <-
+ apply(expressions[unique(expressions.na.which[,"row"]),],
+ 1,
+ function(x){
+ if (all(is.na(x))) {
+ x[] <- small.positive;
+ } else {
+ x[is.na(x)] <- mean(x,na.rm=TRUE);
+ }
+ x})
+ } else if (normalization.method=="knn") {
+ expressions <- impute.knn(expressions)$data
+ } else {
+ stop("Unknown normalization.method; currently supported methods are 'mean' and 'knn'")
+ }
+
+ }
+ pb <- txtProgressBar(0,iterations,style=3)
+ for (q in 1:iterations) {
+ expressions.comb <-
+ combn(ncol(expressions),2)
+ expressions.comb.sample <- NULL
+ if (sample.size >= ncol(expressions.comb)) {
+ expressions.comb.sample <-
+ expressions.comb
+ } else {
+ expressions.comb.sample <-
+ expressions.comb[,sample.int(n=ncol(expressions.comb),
+ size=sample.size,
+ replace=FALSE)]
+ }
+ ### calculate the n for each sample
+ expressions.comb.sample.n <-
+ apply(sapply(1:ncol(expressions),
+ `==`,expressions.comb.sample),
+ 2,sum)
+ result.expressions <- array(1,dim=dim(expressions))
+ m <- 1
+ ## do this in batches of no more than 200 to avoid making a list
+ ## which is way too large to fit in memory
+ while (m <= ncol(expressions.comb.sample)) {
+ e.c.subsample <-
+ expressions.comb.sample[,m:min(m+200,ncol(expressions.comb.sample))]
+ bar.m.hat.list <-
+ mclapply(1:ncol(e.c.subsample),
+ function(k) {
+ i <- e.c.subsample[1,k]
+ j <- e.c.subsample[2,k]
+ mva <- data.frame(m=log2(expressions[,i]/expressions[,j]),
+ a=0.5*log2(expressions[,i]*expressions[,j]))
+ temp.loess <-
+ loess(m~a,mva,control=loess.control(surface="interpolate",
+ statistics="approximate",
+ trace.hat="approximate",
+ iterations=2))
+ ## predict for i, predict for j (\hat{M})
+ ## subtract out \hat{M} from M
+ m.hat <- predict(temp.loess,mva[,"a"])
+ return(data.frame(i=2^(-0.5*m.hat/expressions.comb.sample.n[i]),
+ j=2^(0.5*m.hat/expressions.comb.sample.n[j])))
+ },
+ mc.cores=num.cores
+ )
+ for (k in 1:ncol(e.c.subsample)) {
+ result.expressions[,e.c.subsample[1,k]] <-
+ result.expressions[,e.c.subsample[1,k]] *
+ bar.m.hat.list[[k]][,"i"]
+ result.expressions[,e.c.subsample[2,k]] <-
+ result.expressions[,e.c.subsample[2,k]] *
+ bar.m.hat.list[[k]][,"j"]
+ }
+ m <- min(m+200,ncol(expressions.comb.sample))+1
+ setTxtProgressBar(pb,(q-1)+m/(ncol(expressions.comb.sample)+1))
+ }
+ setTxtProgressBar(pb,q)
+ ## correct expressionss
+ expressions <- expressions*result.expressions
+ }
+ close(pb)
+ return(expressions)
+}
--- /dev/null
+# MvA plotting for microarray data
+
+
+# Copyright 2003 By Don Armstrong
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+# 02111-1307, USA.
+
+# $Id: mva_plotting.R,v 1.5 2006/06/10 04:14:45 don Exp $
+
+
+library("grid")
+library("ggplot2")
+
+# mva.plot
+# Plots a pairwise M versus A plot with a loess fit line
+##' Plots a pairwise M versus A plot with a loess fit line
+##'
+##' .. content for \details{} ..
+##' @title mva.plot.pair
+##' @param array Array of data to plot
+##' @param title Title of plot (Default: FALSE; no title)
+##' @param small.positive replace numbers less than 0 with this
+##' @author Don Armstrong \email{don@@donarmstrong.com}
+mva.plot.pair <- function(array,title=FALSE,small.positive=0.1){
+ size <- ncol(array)
+ npair <- size*(size-1)/2
+ height <- min(round((npair*3/4)^0.5),3)
+ width <- min(ceiling(npair/height),3)
+ def.par <- par(no.readonly = TRUE)
+ mva.layout <- grid.layout(ncol=width,nrow=height)
+ pushViewport(viewport(layout=mva.layout))
+ ## we could also use combn here instead
+ num.plot <- 0
+ for(a in 1:(size-1)){
+ for(b in (a+1):size){
+ num.plot <- num.plot + 1
+ group.1 <- array[,a]
+ group.2 <- array[,b]
+ group.keep <- is.finite(group.1) & is.finite(group.2)
+ group.1 <- group.1[group.keep]
+ group.2 <- group.2[group.keep]
+ group.min <- min(c(group.1,group.2))
+ if (group.min <= 0) {
+ group.1 <- group.1 - group.min + small.positive
+ group.2 <- group.2 - group.min + small.positive
+ }
+ A <- 0.5*log2(group.1*group.2)
+ M <- log2(group.1/group.2)
+ temp.frame <- na.omit(data.frame(cbind(A,M)))
+ if (num.plot > 1 &&
+ floor((num.plot -1)/ width) %% height == 0 &&
+ (num.plot-1) %% width == 0) {
+ popViewport(1)
+ grid.text(title,x=unit(0.0,"npc")+unit(1,"lines"),y=unit(0.5,"npc"),just="center",rot=90)
+ grid.newpage()
+ pushViewport(viewport(layout=mva.layout))
+ }
+ ## print(floor((num.plot -1)/ width) %% height+1)
+ ## print((num.plot - 1)%% width+1)
+ print(ggplot(temp.frame,aes(y=M,x=A))+
+ geom_hex()+
+ stat_smooth(method="gam")+
+ ggtitle(paste0(colnames(array)[a],"\nvs\n",colnames(array)[b]))+
+ theme(legend.position="none"),
+ vp=viewport(layout.pos.row=floor((num.plot -1)/ width) %% height+1, layout.pos.col=((num.plot - 1)%% width+1)))
+
+ }
+ }
+ popViewport()
+}
--- /dev/null
+.onAttach <- function(...) {
+ ## do nothing currently.
+}
--- /dev/null
+\name{loessNormalization-package}
+\alias{loessNormalization-package}
+\alias{loessNormalization}
+\docType{package}
+\title{
+\packageTitle{loessNormalization}
+}
+\description{
+\packageDescription{loessNormalization}
+}
+\details{
+
+The DESCRIPTION file:
+\packageDESCRIPTION{loessNormalization}
+\packageIndices{loessNormalization}
+~~ An overview of how to use the package, including the most important ~~
+~~ functions ~~
+}
+\author{
+\packageAuthor{loessNormalization}
+
+Maintainer: \packageMaintainer{loessNormalization}
+}
+\references{
+~~ Literature or other references for background information ~~
+}
+~~ Optionally other standard keywords, one per line, from file KEYWORDS in ~~
+~~ the R documentation directory ~~
+\keyword{ package }
+\seealso{
+~~ Optional links to other man pages, e.g. ~~
+~~ \code{\link[<pkg>:<pkg>-package]{<pkg>}} ~~
+}
+\examples{
+~~ simple examples of the most important functions ~~
+}
--- /dev/null
+% Generated by roxygen2 (4.1.1): do not edit by hand
+% Please edit documentation in R/ma_loess_norm.R
+\name{loess_normalization}
+\alias{loess_normalization}
+\title{loess_normalization}
+\usage{
+loess_normalization(expressions, iterations = 2, small.positive = 0.1,
+ sample.size = 200, num.cores = max(floor(detectCores()/2), 1),
+ normalization.method = "mean")
+}
+\arguments{
+\item{expressions}{Gene expression values with probes in rows and samples in columns}
+
+\item{iterations}{Number of iterations to run (Default: 2)}
+
+\item{small.positive}{Number to replace negative and zero expressions with (Default: 0.1).}
+
+\item{sample.size}{Number of combinations of samples to run (Default: 200).}
+
+\item{num.cores}{Number of cores to use (Default: half of them)}
+
+\item{noramlization.method}{Normalization method (Default: mean).}
+}
+\value{
+data.frame of normalized expression values
+}
+\description{
+Normalize using MvA loess using multiple processors
+}
+\details{
+For analyses where the number of samples makes all-against-all
+normalization prohibitive, this variant subselects a set of
+comparisons and runs them, iterating a specific number of times
+using as many cores as possible.
+}
+\author{
+Don Armstrong
+}
+
--- /dev/null
+% Generated by roxygen2 (4.1.1): do not edit by hand
+% Please edit documentation in R/mva_plotting.R
+\name{mva.plot.pair}
+\alias{mva.plot.pair}
+\title{mva.plot.pair}
+\usage{
+mva.plot.pair(array, title = F, small.positive = 0.1)
+}
+\arguments{
+\item{array}{Array of data to plot}
+
+\item{title}{Title of plot (Default: FALSE; no title)}
+
+\item{small.positive}{replace numbers less than 0 with this}
+}
+\description{
+Plots a pairwise M versus A plot with a loess fit line
+}
+\details{
+.. content for \details{} ..
+}
+\author{
+Don Armstrong \email{don@donarmstrong.com}
+}
+