add functions

author Don Armstrong <don@donarmstrong.com>

Mon, 3 Mar 2014 05:12:55 +0000 (21:12 -0800)

committer Don Armstrong <don@donarmstrong.com>

Mon, 3 Mar 2014 05:12:55 +0000 (21:12 -0800)
author Don Armstrong <don@donarmstrong.com>
Mon, 3 Mar 2014 05:12:55 +0000 (21:12 -0800)
committer Don Armstrong <don@donarmstrong.com>
Mon, 3 Mar 2014 05:12:55 +0000 (21:12 -0800)
diff --git a/DESCRIPTION b/DESCRIPTION

index f170ab827e0027245789ed567cd15363c0ff24a4..0c87e257f937321c895ef88169112d6811df8990 100644 (file)
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -3,8 +3,11 @@ Type: Package
  Title: Don's Microarray Analysis Routines
  Version: 1.0
  Date: 2014-02-28
+Depends: qvalue, hexbin, stats
  Author: Don Armstrong <don@donarmstrong.com>
  Maintainer: Don Armstrong <don@donarmstrong.com>
  Description: Routines which handle analyzing Affymetrix-style microarrays
-License: GPL3+
+License: GPL (>= 3)
+URL: http://www.donarmstrong.com/projects/don_microarray
+
  
diff --git a/R/bayesian_analysis.R b/R/bayesian_analysis.R

new file mode 100644 (file)

index 0000000..438ee59
--- /dev/null
+++ b/R/bayesian_analysis.R
@@ -0,0 +1,453 @@
+#   Copyright (C) 1999 Anthony D. Long, Ecology & Evolutionary Biology
+#      University of California, Irvine (tdlong@uci.edu)
+
+# Portions Copyright 2002,2003 Don Armstrong <don@donarmstrong.com>
+
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# $Id: bayesian_analysis.R,v 1.5 2006/07/12 03:27:03 don Exp $
+
+pierre.pair.from.unpaired <- function(h,cs,ce,es,ee,end,
+                                      experror=0.05,winsize=101,conf=10,minrep=3,file="pierre.pair",
+                                      file.save=TRUE,returnh=FALSE,qvalues=TRUE)
+{
+  # Use the difference between experimental and control as the test statistic.
+  new.h <- h[,es:ee]-h[,cs:ce]
+  if (cs > 1){
+    new.h <- cbind(h[,1:cs-1],new.h)
+  }
+  totalexpresscol <- apply(h[,c(cs:ce,es:ee)],1,function(x) {mean(log(x[cs:ce]),log(x[es:ee]))});
+  new.h <- cbind(new.h,totalexpresscol)
+  if (end > ee){
+    new.h <- cbind(new.h,h[,(ee+1):end]) 
+  }
+  newer.h <- pierre.pair(new.h,cs,ce,(end-(ee-es)),ce+1,file=file,experror=experror,
+                         winsize=winsize,conf=conf,minrep=minrep,file.save=file.save,returnh=T)
+  if (returnh){
+    return(newer.h)
+  }
+}
+
+pierre.pair <-  function (h, cs, ce, end, totalexpresscol,
+                          experror=0.05, winsize=101, conf=10, minrep=3, file="pierre.pair",
+                          file.save=TRUE,returnh=FALSE,qvalues=TRUE)
+{
+# note 'totalexpresscol' added April 18th 2001:
+# this should be the column number of the column in "h[1:N]" that represents
+# total expression for that gene. 
+# A good value for 'totalexpresscol' would be the mean of the log of the
+# "total expression level" over both treatments (i.e., control and
+# experimentals) and replicates, where the total expression level for each
+# gene/treatment/replicate is given as a fraction of total expression over
+# all genes for that treatment/replicate.
+
+# number of non-zero samples, their mean and st.dev., index_col, running average sd.
+
+     S.N <- apply(h[, cs:ce], 1, function(x) sum(x !=  0 & !is.na(x)))
+     S.mean <- apply(h[, cs:ce], 1, function(x) if (sum(x[x !=
+         0 & !is.na(x)] != 0))
+         mean(x[x != 0 & !is.na(x)])
+     else NA)
+     S.sd <- apply(h[, cs:ce], 1, function(x) if (sum(x[x !=
+         0 & !is.na(x)]  != 0) >= minrep)
+         sqrt(var(x[x != 0 & !is.na(x)]))
+     else NA)
+     index.col <- h[,totalexpresscol]
+
+#   running average standard deviation
+     xxx <- S.sd[!is.na(S.sd)][order(index.col[!is.na(S.sd)])]
+     xxx <- runavg(xxx, winsize)
+     xxx <- xxx[rank(index.col[!is.na(S.sd)])]
+     S.rasd <- rep(NA, nrow(h))
+     S.rasd[!is.na(S.sd)] <- xxx
+
+     h <- cbind(h, S.N, S.mean, S.sd, S.rasd)
+     colnames(h)[(end + 1):(end + 4)] <- c("N", "x", "sd", "rasd")
+     rm(S.N, S.mean, S.sd, S.rasd)
+
+#   regular t-test, Bayesian variance, Bayesian t-test, p-reg, p-Bayes
+     # Regular t
+     temp1 <- sqrt(h[end + 1]) * (h[end + 2]/h[end + 3])
+     # Baysian variance
+     temp2 <- sqrt((conf * (h[end + 4])^2 + (h[end + 1] - 1) * (h[end + 3])^2)/(conf + h[end + 1] - 2))
+     # Baysian t-test
+     temp3 <-  sqrt(h[end + 1]) * (h[end + 2]/temp2)
+#  modify d.f. to reflect effect of conf  Jan. 10 / 2001
+     # calc regular p  (temp4 is the same paired t-test as in doitall.pair)
+     temp4 <- 1 - pf((temp1)^2, 1, h[, end + 1] - 1)
+     # and Bayesian-p.  temp5 is a _paired_ t-test with a Bayesian correction.
+     # The degrees of freedom are  h[,end+1] - 1 + conf - 1.  Since it is a paired
+     # t-test the degrees of  freedom are fewer than non-paired approaches!!
+     temp5 <- 1 - pf((temp3)^2, 1, h[, end + 1] + conf - 2)
+     h <- cbind(h, temp1, temp2, temp3, temp4, temp5)
+     rm(temp1, temp2, temp3, temp4, temp5)
+     colnames(h)[(end + 5):(end + 9)] <- c("reg-t", "Bay-sd", "Bay-t", "reg-p", "Bay-p")
+
+     if (returnh & ! file.save){
+       return(h)
+     }
+     write.table(h, file = paste(file,"_from_R.txt",sep=""), sep = "\t")
+     png(file = paste(file,"_%03d.png",sep=""), width = 1600, height = 1200)
+     plot(h[, end + 2], h[, end + 9], xlab = "difference in expression",
+          ylab = "p-value", main = "Does difference predict significance?",
+          abline(lm(h[, end + 9] ~ h[, end + 2])))
+     graphics.off()
+     if (returnh){
+       return(h)
+     }
+}
+
+tstat.general <- function (n1, n2, x1, x2, sd1, sd2,
+                           small=0, minrep=2)
+{
+     ##  calculates and returns values associated with a t-test of the
+     ## difference between two means
+     ##  x is the dataframe
+     ##  nn1 and nn2 are the number of control and experimental observations respectively
+     ##  xx1 and xx2 are the means of the C's and E's
+     ##  vv1 and vv2 are the standard deviations
+     ##  small is the smallest detectable "signal"
+     ##  minrep defines the smallest number of replicates required to do a two sample t-test
+     ##   this must be AT LEAST TWO otherwise the variance will not be claculated
+     ##  depending on the number of repllicate observations different t-tests are carried out
+
+  ##  do the two sample t-test
+  if (n1 >= minrep & n2 >= minrep) {
+    tt <- -(x1 - x2)/
+      sqrt((((n1 - 1) * sd1^2 + (n2 - 1) * sd2^2)/
+            (n1 + n2 - 2))
+           * ((n1 + n2)/(n1 * n2)))
+    dft <- n1 + n2 - 2
+    rvar <- max((sd1^2)/(sd2^2), (sd2^2)/(sd1^2))
+  }
+
+  ##  do not do a t-test
+  if (n1 < minrep & n2 < minrep) {
+    tt <- NA
+    dft <- n1 + n2 - 2
+    rvar <- NA
+  }
+
+  ##  do the one sample versus a constant t-test -- use the mean for
+  ##  the constant if available, otherwise use "small"
+  if (n1 < minrep & n2 >= minrep) {
+    if(is.na(x1)){
+      tt <- (x2 - small)/(sd2/sqrt(n2))
+    }
+    else{
+      tt <- (x2 - x1)/(sd2/sqrt(n2))
+    }
+    dft <- n2 - 1
+    rvar <- NA
+  }
+  if (n1 >= minrep & n2 < minrep) {
+    if(is.na(x2)){
+      tt <- -(x1 - small)/(sd1/sqrt(n1))
+    }
+    else{
+      tt <- -(x1 - x2)/(sd1/sqrt(n1))
+    }
+    dft <- n1 - 1
+    rvar <- NA
+  }
+  return(list(t=tt,
+              df=dft,
+              rvar=rvar)
+         )
+}
+
+
+microarray.bayesian <-
+  function (data,
+            winsize=101, conf=10,
+            minrep=3) {
+     # experror is the experiment wide probability of a false positive
+     # winsize control the degree of local averaging
+     # conf is the weighting of the Baysian prior relative to the observed within gene variance
+     # temp1 and 2 are the number of control and experimental observations
+  cont <- attr(data,"controls")
+  exp  <- attr(data,"experiments")
+  data[data == 0] <- NA
+  ## temp1 and 2
+  cont.nsamples <- apply(data[,cont],1,function(x){NROW(which(!is.na(x)))})
+  exp.nsamples <- apply(data[,exp],1,function(x){NROW(which(!is.na(x)))})
+  ## temp3 and 4
+  cont.mean <- apply(data[,cont],1,function(x){mean(x,na.rm=TRUE)})
+  exp.mean <-  apply(data[, exp],1,function(x){mean(x,na.rm=TRUE)})
+  ## temp5 and 6
+  cont.sd <- apply(data[,cont],1,function(x){sd(x,na.rm=TRUE)})
+  exp.sd  <- apply(data[, exp],1,function(x){sd(x,na.rm=TRUE)})
+  ## temp7 and 8
+  cont.mean.log <- apply(data[,cont],1,function(x){mean(log(x),na.rm=TRUE)})
+  exp.mean.log  <- apply(data[, exp],1,function(x){mean(log(x),na.rm=TRUE)})
+  # temp9 and 10
+  cont.sd.log <- apply(data[,cont],1,function(x){sd(log(x),na.rm=TRUE)})
+  exp.sd.log  <- apply(data[, exp],1,function(x){sd(log(x),na.rm=TRUE)})
+  ## temp11 and 12 are the running average standard deviations where
+  ## the running averages are based on "winsize" and the function
+  ## runavg. In short all the data within a treatment are sorted by
+  ## average expression level and the running average calculated on
+  ## the estimated st. dev. for each locus. All the wierd code is a
+  ## work around of missing values
+  cont.sd.runavg <- runavg(cont.sd[order(cont.mean)])[order(order(cont.mean))]
+   exp.sd.runavg <- runavg( exp.sd[order( exp.mean)])[order(order( exp.mean))]
+  ## xxx <- temp5[!is.na(temp5)][order(temp3[!is.na(temp5)])]
+  ## xxx <- runavg(xxx, winsize)
+  ## xxx <- xxx[rank(temp3[!is.na(temp5)])]
+  ## temp11 <- rep(NA, nrow(h))
+  ## temp11[!is.na(temp5)] <- xxx
+  ## xxx <- temp6[!is.na(temp6)][order(temp4[!is.na(temp6)])]
+  ## xxx <- runavg(xxx, winsize)
+  ## xxx <- xxx[rank(temp4[!is.na(temp6)])]
+  ## temp12 <- rep(NA, nrow(h))
+  ## temp12[!is.na(temp6)] <- xxx
+  cont.sd.log.runavg <- runavg(cont.sd.log[order(cont.mean.log)])[order(order(cont.mean.log))]
+   exp.sd.log.runavg <- runavg( exp.sd.log[order( exp.mean.log)])[order(order( exp.mean.log))]
+  ## xxx <- temp9[!is.na(temp9)][order(temp7[!is.na(temp9)])]
+  ## xxx <- runavg(xxx, winsize)
+  ## xxx <- xxx[rank(temp7[!is.na(temp9)])]
+  ## ##   like 11 and 12 but for the log transofrmed data
+  ## temp13 <- rep(NA, nrow(h))
+  ## temp13[!is.na(temp9)] <- xxx
+  ## xxx <- temp10[!is.na(temp10)][order(temp8[!is.na(temp10)])]
+  ## xxx <- runavg(xxx, winsize)
+  ## xxx <- xxx[rank(temp8[!is.na(temp10)])]
+  ## temp14 <- rep(NA, nrow(h))
+  ## temp14[!is.na(temp10)] <- xxx
+
+  ## now calculate the weighted average (Baysian) estimate of the
+  ## standard deviation
+  cont.sd.bayes <- sqrt((conf * cont.sd.runavg^2 +
+                        (cont.nsamples - 1) * cont.sd^2)/
+                          (conf + cont.nsamples - 1))
+  exp.sd.bayes <- sqrt((conf * exp.sd.runavg^2 +
+                       (exp.nsamples - 1) * exp.sd^2)/
+                         (conf + exp.nsamples - 1))
+  cont.sd.log.bayes <- sqrt((conf * cont.sd.log.runavg^2 +
+                        (cont.nsamples - 1) * cont.sd.log^2)/
+                          (conf + cont.nsamples - 1))
+  exp.sd.log.bayes <- sqrt((conf * exp.sd.log.runavg^2 +
+                       (exp.nsamples - 1) * exp.sd.log^2)/
+                         (conf + exp.nsamples - 1))
+  ## temp11 <- sqrt((conf * temp11^2 + (temp1 - 1) * temp5^2)/
+  ##                (conf + temp1 - 2))
+  ## temp12 <- sqrt((conf * temp12^2 + (temp2 - 1) * temp6^2)/
+  ##                (conf + temp2 - 2))
+  ## temp13 <- sqrt((conf * temp13^2 + (temp1 - 1) * temp9^2)/(conf +
+  ##                                                          temp1 - 2))
+  ## temp14 <- sqrt((conf * temp14^2 + (temp2 - 1) * temp10^2)/(conf +
+  ##                                                            temp2 - 2)
+  cont.cols <- 1:NCOL(data[,cont])
+  exp.cols <- cont.cols[length(cont.cols)]+1:NCOL(data[,exp])
+  result <- cbind(data[,cont],data[,exp],
+                  cont.nsamples, exp.nsamples,
+                  cont.mean, exp.mean,
+                  cont.sd, exp.sd,
+                  cont.mean.log, exp.mean.log,
+                  cont.sd.log, exp.sd.log,
+                  cont.sd.bayes,exp.sd.bayes,
+                  cont.sd.log.bayes,exp.sd.log.bayes
+                  )
+  colnames(result)[cont.cols] <-
+    sapply(colnames(result)[cont.cols],
+           function(x){
+             paste(sep="",collapse="",
+                   "cont.",x)})
+  colnames(result)[exp.cols] <-
+    sapply(colnames(result)[exp.cols],
+           function(x){
+             paste(sep="",collapse="",
+                   "exp.",x)})
+  colnames(result)[(NCOL(data[,cont|exp])+1):NCOL(result)] <-
+    c("controls.num","experiments.num",
+      "controls.mean","experiments.mean",
+      "controls.sd","experiments.sd",
+      "controls.mean.log","experiments.mean.log",
+      "controls.sd.log","experiments.sd.log",
+      "controls.sd.bayesian","experiments.sd.bayesian",
+      "controls.sd.log.bayesian","experiments.sd.log.bayesian"
+      )
+  ## calculate the "lower bound" on the limit to detection the lower
+  ## bound is simply the 0.0025 quantile of the means
+  threshold <- c(quantile(cont.mean, 0.0025,na.rm=TRUE),
+                 quantile(exp.mean, 0.0025,na.rm=TRUE)
+                 )
+  ## calculate statistics associated with the t-test for the raw data
+  ## (temp1) and log transformed data (temp2). The call to pierre uses
+  ## the st. dev.'s which incorporate the running average
+  temp1 <- apply(result, 1, function(x)
+                 {
+                   temp <- tstat.general(n1=x["controls.num"],  n2=x["experiments.num"],
+                                         x1=x["controls.mean"], x2=x["experiments.mean"],
+                                         sd1=x["controls.sd.bayesian"], sd2=x["experiments.sd.bayesian"],
+                                         small=(exp(threshold[1]) + exp(threshold[2]))/2,
+                                         minrep=minrep)
+                   return(c(temp$t,temp$df,temp$rvar))
+                 }
+                 )
+  temp2 <- apply(result, 1, function(x)
+                 {
+                   temp <- tstat.general(n1=x["controls.num"],  n2=x["experiments.num"],
+                                         x1=x["controls.mean.log"], x2=x["experiments.mean.log"],
+                                         sd1=x["controls.sd.log.bayesian"], sd2=x["experiments.sd.log.bayesian"],
+                                         small=(exp(threshold[1]) + exp(threshold[2]))/2,
+                                         minrep=minrep)
+                   return(c(temp$t,temp$df,temp$rvar))
+                 }
+                 )
+  result <- cbind(result, t(temp1), t(temp2))
+  colnames(result)[(NCOL(result)-5):NCOL(result)] <-
+    c("t.bayes", "df.bayes", "vr.bayes",
+      "t.log.bayes", "df.log.bayes", "vr.log.bayes")
+  rm(temp1, temp2)
+  
+  ##   p-values associated with the two t-stats change d.f. to
+  ##   incorporate Bayesian Jan 10 / 2001 the calculations below are
+  ##   both regular t-tests with a Bayesian correction. The last term
+  ##   'h[,end+16|19]' are degrees of freedom associated with the
+  ##   regular t-test plus the '2*conf - 2' which is the d.f.'s
+  ##   associated with the Bayes estimate
+
+  ## p-raw
+  p.bayes <- 2*pt(-abs(result[,"t.bayes"]), result[,"df.bayes"] + 2*conf - 2)
+  ## p-log
+  p.log.bayes <- 2*pt(-abs(result[,"t.log.bayes"]), result[,"df.log.bayes"] + 2*conf - 2)
+  qvalue.p.bayes <- rep(NA,times=length(p.log.bayes))
+  temp.qvalue <- qvalue(p=na.omit(p.bayes))
+  if (class(temp.qvalue)=="qvalue") {
+    qvalue.p.bayes[!is.na(p.bayes)] <- temp.qvalue$qvalues
+  }
+  qvalue.p.log.bayes <- rep(NA,times=length(p.log.bayes))
+  temp.qvalue <- qvalue(p=na.omit(p.log.bayes))
+  if (class(temp.qvalue)=="qvalue") {
+    qvalue.p.log.bayes[!is.na(p.log.bayes)] <- temp.qvalue$qvalues
+  }
+  result <- cbind(result,
+                  p.bayes,
+                  p.log.bayes,
+                  qvalue.p.bayes,
+                  qvalue.p.log.bayes,
+                  p.adjust(p.bayes,method="BH"),
+                  p.adjust(p.log.bayes,method="BH")
+                  )
+  colnames(result)[(NCOL(result)-5):NCOL(result)] <-
+    c("p.bayes","p.log.bayes",
+      "fdr.q.bayes","fdr.q.log.bayes",
+      "fdr.bh.bayes","fdr.bh.log.bayes"
+      )
+  
+  ## calculate Bonferroni threshold, also calculate fold expression
+  ## (signed) print a file of all genes that pass the Bonferroni
+  ## and print all file that gives the results for all genes
+
+  ## we no longer bother to calculate a bonferoni correction; that's
+  ## the job of things following to do
+  
+  ## Bonf <- 1 - exp(log(1 - experror)/length(h[!is.na(h[, end +
+  ##                                                     22]), end + 22]))
+
+  fold.change <- exp.mean/cont.mean
+  fold.change[fold.change<1] <- - 1 / fold.change[fold.change<1]
+  ## temp1 <- -(h[, end + 3]/h[, end + 4]) *
+  ##   ((h[, end + 3]/h[,end + 4]) >= 1) +
+  ##     (h[, end + 4]/h[, end + 3]) *
+  ##       ((h[,end + 3]/h[, end + 4]) < 1)
+  
+  ##  calculate fold change stuff
+  ## thres <- (exp(threshold[1]) + exp(threshold[2]))/2
+  ## I'm not sure if this correction by threshold is actually
+  ## necessary or a good idea.
+  ## temp2 <- is.na(temp1) *  is.na(h[,end + 3]) * (h[,end+4]/thres)
+  ## temp3 <- is.na(temp1) *  is.na(h[,end + 4]) * -(h[,end+3]/thres)
+  ## ttemp <- cbind(temp1,temp2,temp3)
+  ## rm(temp1,temp2,temp3)
+  ## temp4 <- apply(ttemp,1,function(x) sum(x,na.rm=TRUE))
+  ## rm(ttemp)
+  result <- cbind(result, fold.change)
+  ## rm(temp4)
+  colnames(result)[NCOL(result)] <- "fold.change"
+
+  ## calculate the t-test statistic, p-value and degrees of freedom
+  ## for raw data and log transformed data change the na.action to
+  ## na.omit to allow to continue calculating past bad values
+
+  temp.t <- t(apply(data, 1,
+                    function(x) {
+                      if ((NROW(na.omit(x[cont])) < 2) ||  (NROW(na.omit(x[exp])) < 2)){
+                        return(c(NA,NA,NA))
+                      }
+                      temp <- t.test(na.omit(x[cont]),
+                                     na.omit(x[exp])
+                                     )
+                      c(temp$p.value,temp$statistic,temp$parameter)
+                    }))
+  temp.logt <- t(apply(data, 1,
+                       function(x) {
+                         if ((NROW(na.omit(x[cont])) < 2) ||  (NROW(na.omit(x[exp]))<2)){
+                           return(c(NA,NA,NA))
+                         }
+                         temp <- t.test(na.omit(log(x[cont])),
+                                        na.omit(log(x[exp]))
+                                        )
+                         c(temp$p.value,temp$statistic,temp$parameter)
+                       }))
+  result <- cbind(result,temp.t,temp.logt)
+  colnames(result)[(NCOL(result)-5):NCOL(result)] <- c('t.test.p','t.test.t','t.test.df',
+                                                       't.test.p.log','t.test.t.log','t.test.df.log')
+
+  ## calculate q values for the normal t test results
+  temp.t.test.q <- result[,"t.test.p"]
+  temp.t.test.q[!is.finite(temp.t.test.q)] <- NA
+  temp.qvalue <- qvalue(p=na.omit(temp.t.test.q))
+  if (class(temp.qvalue)=="qvalue")
+    temp.t.test.q[!is.na(temp.t.test.q)] <-  temp.qvalue$qvalues
+  temp.t.test.q.log <- result[,"t.test.p.log"]
+  temp.t.test.q.log[!is.finite(temp.t.test.q.log)] <- NA
+  temp.qvalue <- qvalue(p=na.omit(temp.t.test.q.log))
+  if (class(temp.qvalue)=="qvalue")
+    temp.t.test.q.log[!is.na(temp.t.test.q.log)] <- temp.qvalue$qvalues
+  result <- cbind(result,temp.t.test.q,
+                  temp.t.test.q.log,
+                  p.adjust(result[,"t.test.p"],method="BH"),
+                  p.adjust(result[,"t.test.p.log"],method="BH"))
+  colnames(result)[(NCOL(result)-3):NCOL(result)] <-
+    c('t.test.fdr.q','t.test.fdr.q.log',
+      't.test.fdr.bh','t.test.fdr.bh.log')
+  attr(result,"controls") <- cont.cols
+  attr(result,"experiments") <- exp.cols
+  
+  return(result)
+}
+
+
+runavg <- function(x,k=1){
+  if (k < 0)
+    stop("k must be greater than or equal to 0");
+  if (k==0)
+    return(x)
+  n <- length(x)
+  r <- numeric(n)
+  # one side of the window
+  for (i in 1:n) {
+    if(is.na(x[i])) {
+      r[i] <- NA
+    } else {
+      j <- i - k
+      if (j < 1)
+        j <- 1
+      l <- i + k
+      if (l > n)
+        l <- n
+      r[i] <- mean(x[j:l],na.rm=TRUE)
+    }
+  }
+  return(r)
+}
diff --git a/R/bayesian_analysis_plot.R b/R/bayesian_analysis_plot.R

new file mode 100644 (file)

index 0000000..47dfd6a
--- /dev/null
+++ b/R/bayesian_analysis_plot.R
@@ -0,0 +1,370 @@
+#   Copyright (C) 1999 Anthony D. Long, Ecology & Evolutionary Biology
+#      University of California, Irvine (tdlong@uci.edu)
+
+# Portions Copyright 2002,2003 Don Armstrong <don@donarmstrong.com>
+
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# $Id: bayesian_analysis.R,v 1.5 2006/07/12 03:27:03 don Exp $
+
+
+microarray.bayesian.plot <-
+  function(data) {
+
+    ## png(file = paste(file,"_%03d.png",sep=""), width = 1600, height = 1200)
+    plotting.function <- getFunction("plot")
+    text.function <- getFunction("text")
+    use.grid <- 0
+    if (NROW(data)>1e4)
+      use.grid <- 1
+    if (use.grid) {
+      nbins <- 80
+      plotting.function <-
+        function(x,y,
+                 xbins = nbins, shape = 1,
+                 xbnds = range(x), ybnds = range(y),
+                 xlab = NULL, ylab = NULL, IDs = FALSE,...) {
+          return(plot(hexbin(x,y,
+                             xbins=xbins,
+                             shape=shape,
+                             xbnds=xbnds,
+                             xlab=xlab,
+                             ylab=ylab,
+                             IDs=IDs),
+                      colramp=BTY,
+                      ...))
+      }
+      text.function <- function(x,y,text) {
+        grid.text(text,x=unit(x,"native"),y=unit(y,"native"))
+      }
+    }
+    plotting.function(data[,    "controls.mean"]/1e3,
+                      data[, "experiments.mean"]/1e3,
+                      xlab = expression(paste("Mean of Control Expression (",10**3,")")),
+                      ylab = expression(paste("Mean of Experimental Expression (",10**3,")")),
+                      main = "Mean expression level comparison"
+                      )
+    plotting.function(data[,    "controls.mean.log"],
+                      data[, "experiments.mean.log"],
+                      xlab = "Mean of Controls",
+                      ylab = "Mean of Experimentals",
+                      main = "log transformed expression level comparison"
+                      )
+    ## controls mean v controls sd
+    temp.y <- data[,"controls.sd"]/1000
+    temp.x <- data[, "controls.mean"]/1000
+    lmline <- lm(temp.y~temp.x)
+    temp <-
+      plotting.function(data[, "controls.mean"]/1e3,
+                        data[, "controls.sd"]/1e3,
+                        xlab = expression(paste("Mean of Control Expression (",10**3,")")),
+                        ylab = expression(paste("SD of Control Expression (",10**3,")")),
+                        main = "Control SD as a function of Control Mean"
+                        )
+    if (use.grid) {
+      pushHexport(temp$plot.vp)
+      panel.abline(lmline)
+    }
+    else {
+      abline(lmline)
+    }
+    xlim <- range(data[,"controls.mean"]/1e3)
+    ylim <- range(data[,"controls.sd"]/1e3)
+    text.function(0.5*(xlim[2]-xlim[1])+xlim[1],
+         0.9*(ylim[2]-ylim[1])+ylim[1],
+         substitute(r**2 == y,
+                    list(y=format(summary(lmline)$r.squared,
+                                    digits=4)))
+         )
+    if (use.grid)
+      popViewport(1)
+    
+    ## experiments mean v experiments sd
+    temp.y <- data[,"experiments.sd"]/1000
+    temp.x <- data[,"experiments.mean"]/1000
+    temp <-
+      plotting.function(data[, "experiments.mean"]/1e3,
+                        data[, "experiments.sd"]/1e3,
+                        xlab = expression(paste("Mean of Experimental Expression (",10**3,")")),
+                        ylab = expression(paste(  "SD of Experimental Expression (",10**3,")")),
+                        main = "Experiment SD as a function of Experiment Mean"
+                        )
+    if (use.grid) {
+      pushHexport(temp$plot.vp)
+      panel.abline(lmline)
+    }
+    else {
+      abline(lmline)
+    }
+    xlim <- range(data[,"experiments.mean"])
+    ylim <- range(data[,"experiments.sd"])
+    text.function(0.5*(xlim[2]-xlim[1])+xlim[1],
+         0.9*(ylim[2]-ylim[1])+ylim[1],
+         substitute(r**2 == y,
+                    list(y=format(summary(lmline)$r.squared,
+                                    digits=4)))
+         )
+    if (use.grid)
+      popViewport(1)
+                                
+    ## controls log mean v controls log sd
+    lmline <- lm(data[,"controls.sd.log"]~data[, "controls.mean.log"])
+    temp <- plotting.function(data[, "controls.mean.log"],
+                              data[, "controls.sd.log"],
+                              xlab = "Mean of log(Control Expression)",
+                              ylab = "SD of log(Control Expression)",
+                              main = "log(Control SD) as a function of log(Control Mean)"
+                              )
+    if (use.grid) {
+      pushHexport(temp$plot.vp)
+      panel.abline(lmline)
+    }
+    else {
+      abline(lmline)
+    }
+    xlim <- range(data[,"controls.mean.log"])
+    ylim <- range(data[,"controls.sd.log"])
+    print(xlim)
+    print(ylim)
+    print(summary(lmline))
+    print(0.5*(xlim[2]-xlim[1])+xlim[1])
+    print(0.9*(ylim[2]-ylim[1])+ylim[1])
+    if (use.grid)
+      print(current.vpTree())
+    text.function(0.5*(xlim[2]-xlim[1])+xlim[1],
+         0.9*(ylim[2]-ylim[1])+ylim[1],
+         substitute(r**2 == y,
+                    list(y=format(summary(lmline)$r.squared,
+                                    digits=4)))
+         )
+    if (use.grid)
+      popViewport(1)
+
+    ## experiments mean v experiments sd
+    lmline <- lm(data[,"experiments.sd.log"]~data[, "experiments.mean.log"])
+    temp <- plotting.function(data[, "experiments.mean.log"],
+                              data[, "experiments.sd.log"],
+                              xlab = "Mean of log(Experiment Expression)",
+                              ylab = "SD of log(Experiment Expression)",
+                              main = "log(Experiment SD) as a function of log(Experiment Mean)"
+                              )
+    if (use.grid) {
+      pushHexport(temp$plot.vp)
+      panel.abline(lmline)
+    }
+    else {
+      abline(lmline)
+    }
+    xlim <- range(data[,"experiments.mean.log"])
+    ylim <- range(data[,"experiments.sd.log"])
+    text.function(0.5*(xlim[2]-xlim[1])+xlim[1],
+         0.9*(ylim[2]-ylim[1])+ylim[1],
+         substitute(r**2 == y,
+                    list(y=format(summary(lmline)$r.squared,
+                           digits=4)))
+         )
+    if (use.grid)
+      popViewport(1)
+
+    ## mean expression vs significance
+    mean.expression <- apply(data[,c("experiments.mean","controls.mean")],1,mean)
+    lmline <- lm(data[,"t.test.p"] ~
+                 mean.expression)
+    temp <- plotting.function(mean.expression,
+                              data[, "t.test.p"],
+                              xlab = "Mean Expression",
+                              ylab = "p value of T Test",
+                              main = "Significance as a function of Mean Expression"
+                              )
+    if (use.grid) {
+      pushHexport(temp$plot.vp)
+      panel.abline(lmline)
+      panel.abline(h=0.05)
+    }
+    else {
+      abline(lmline)
+      abline(h=0.05)
+    }
+    xlim <- range(mean.expression)
+    ylim <- range(data[, "t.test.p"])
+    text.function(0.5*(xlim[2]-xlim[1])+xlim[1],
+         0.9*(ylim[2]-ylim[1])+ylim[1],
+         substitute(r**2 == y,
+                    list(y=format(summary(lmline)$r.squared,
+                                    digits=4)))
+         )
+
+    if (use.grid)
+      popViewport(1)
+
+    ## log transformed mean expression vs significance
+    mean.expression <- apply(data[,c("experiments.mean.log","controls.mean.log")],1,mean)
+    lmline <- lm(data[, "t.test.p.log"] ~
+                 mean.expression)
+    temp <-
+      plotting.function(mean.expression,
+                        data[, "t.test.p.log"],
+                        xlab = "Mean Expression",
+                        ylab = "p value of T Test log(Expression)",
+                        main = "Significance of log(Expression) as a function of Mean Expression"
+                        )
+    if (use.grid) {
+      pushHexport(temp$plot.vp)
+      panel.abline(lmline)
+      panel.abline(h=0.05)
+    }
+    else {
+      abline(lmline)
+      abline(h=0.05)
+    }
+    xlim <- range(mean.expression)
+    ylim <- range(data[, "t.test.p.log"])
+    text.function(0.5*(xlim[2]-xlim[1])+xlim[1],
+         0.9*(ylim[2]-ylim[1])+ylim[1],
+         substitute(r**2 == y,
+                    list(y=format(summary(lmline)$r.squared,
+                                    digits=4)))
+         )
+    if (use.grid)
+      popViewport(1)
+
+
+    
+    ## Fold change vs significance 
+    lmline <- lm(data[abs(data[, "fold.change"]) < 20, "t.test.p"] ~
+                 abs(data[abs(data[, "fold.change"]) < 20, "fold.change"]))
+    temp <- plotting.function(abs(data[abs(data[,"fold.change"]) < 20, "fold.change"]),
+                              data[abs(data[,"fold.change"]) < 20, "t.test.p"],
+                              xlab = "Fold Change of Expression",
+                              ylab = "p value of T Test",
+                              main = "Significance as a function of Fold Change"
+                              )
+    if (use.grid) {
+      pushHexport(temp$plot.vp)
+      panel.abline(lmline)
+      panel.abline(h=0.05)
+    }
+    else {
+      abline(lmline)
+      abline(h=0.05)
+    }
+    xlim <- range(abs(data[abs(data[,"fold.change"]) < 20, "fold.change"]))
+    ylim <- range(data[abs(data[,"fold.change"]) < 20, "t.test.p"])
+    text.function(0.5*(xlim[2]-xlim[1])+xlim[1],
+         0.9*(ylim[2]-ylim[1])+ylim[1],
+         substitute(r**2 == y,
+                    list(y=format(summary(lmline)$r.squared,
+                                    digits=4)))
+         )
+
+    if (use.grid)
+      popViewport(1)
+
+    ## log transformed fold change vs significance
+    print("log transformed fold change vs significance")
+    temp.data <- na.omit(cbind(data[abs(data[, "fold.change"]) < 20, "t.test.p.log"],
+                               abs(data[abs(data[, "fold.change"]) < 20, "fold.change"])))
+    lmline <- lm(temp.data[,1]~temp.data[,2])
+    temp <-
+      plotting.function(temp.data[,2],temp.data[,1],
+                        xlab = "Fold Change of Expression",
+                        ylab = "p value of T Test log(Expression)",
+                        main = "Significance of log(Expression) as a function of Fold Change"
+                        )
+    if (use.grid) {
+      pushHexport(temp$plot.vp)
+      panel.abline(lmline)
+      panel.abline(h=0.05)
+    }
+    else {
+      abline(lmline)
+      abline(h=0.05)
+    }
+    xlim <- range(temp.data[,2])
+    ylim <- range(temp.data[,1])
+    text.function(0.5*(xlim[2]-xlim[1])+xlim[1],
+         0.9*(ylim[2]-ylim[1])+ylim[1],
+         substitute(r**2 == y,
+                    list(y=format(summary(lmline)$r.squared,
+                                    digits=4)))
+         )
+    if (use.grid)
+      popViewport(1)
+
+    ## bayesian vs t test
+    temp.data <- log(na.omit(cbind(data[,"t.test.p"],
+                                   data[,"p.bayes"])))
+    lmline <- lm(temp.data[,1]~temp.data[,2])
+    temp <-
+      plotting.function(temp.data[,2],temp.data[,1],
+                        xlab = "log(p value) of Bayesian",
+                        ylab = "log(p value) of t test",
+                        main = "Correspondance of t-test and bayesian p values"
+                        )
+    if (use.grid) {
+      pushHexport(temp$plot.vp)
+      panel.abline(lmline)
+    }
+    else {
+      abline(lmline)
+    }
+    xlim <- range(temp.data[,2])
+    ylim <- range(temp.data[,1])
+    text.function(0.5*(xlim[2]-xlim[1])+xlim[1],
+         0.9*(ylim[2]-ylim[1])+ylim[1],
+         substitute(r**2 == y,
+                    list(y=format(summary(lmline)$r.squared,
+                                    digits=4)))
+         )
+
+    if (use.grid)
+      popViewport(1)
+
+    ## log(bayesian) vs t test
+    print("log(bayesian) vs t test")
+    temp.data <- log(na.omit(cbind(data[,"t.test.p.log"],
+                                   data[,"p.log.bayes"])))
+    lmline <- lm(temp.data[,1]~temp.data[,2])
+    temp <-
+      plotting.function(temp.data[,2],temp.data[,1],
+                        xlab = "log(p value) of Bayesian (log transformed)",
+                        ylab = "log(p value) of t test (log transformed)",
+                        main = "Correspondance of t-test and bayesian p values of log transformed values"
+                        )
+    if (use.grid) {
+      pushHexport(temp$plot.vp)
+      panel.abline(lmline)
+    }
+    else {
+      abline(lmline)
+    }
+    xlim <- range(temp.data[,2])
+    ylim <- range(temp.data[,1])
+    text.function(0.5*(xlim[2]-xlim[1])+xlim[1],
+         0.9*(ylim[2]-ylim[1])+ylim[1],
+         substitute(r**2 == y,
+                    list(y=format(summary(lmline)$r.squared,
+                                    digits=4)))
+         )
+
+    if (use.grid)
+      popViewport(1)
+
+
+    ## histograms
+    print("histograms")
+    plot(hist(data[,"t.test.p"],breaks=40,plot=FALSE),main="Histogram of T Test p values")
+    plot(hist(data[,"t.test.p.log"],breaks=40,plot=FALSE),main="Histogram of T Test p values for log transformed data")
+    plot(hist(data[,"p.bayes"],breaks=40,plot=FALSE),main="Histogram of Bayesian p values")
+    plot(hist(data[,"p.log.bayes"],breaks=40,plot=FALSE),main="Histogram of Bayesian p values for log transformed data")
+    
+  }
+
diff --git a/R/ma_loess_norm.R b/R/ma_loess_norm.R

new file mode 100644 (file)

index 0000000..250fb81
--- /dev/null
+++ b/R/ma_loess_norm.R
@@ -0,0 +1,86 @@
+library(stats)
+
+
+# the appropriate method is to average over the \hat{M}'s to find the
+# average fit, and then use that to correct the data.
+
+# ma.loessnorm <- function(array,iterations=2){
+#   n <- ncol(array)
+#   for (q in 1:iterations){
+#     bar.m.hat.array <- array(0,dim=dim(array))
+#     for (i in 1:(n-1)){
+#       for (j in (i+1):n){
+#         print(paste(i,j))
+#         # fit lowess
+#         # [we have to use loess so we can predict using predict.loess
+#         mva <- cbind(log2(array[,i]/array[,j]),0.5*log2(array[,i]*array[,j]))
+#         colnames(mva) <- c("m","a")
+#         mva <- data.frame(na.omit(mva))
+#         temp.loess <- loess(mva[,"m"]~mva[,"a"],control = loess.control(surface = "direct"))
+#         # predict for i, predict for j (\hat{M})
+#         # subtract out \hat{M} from M
+#         m.hat <- predict(temp.loess,mva[,"a"])
+#         # weight correction by the inverse of 2(n-1) . [so that there will be n adjustments]
+#         bar.m.hat.array[,i] <- bar.m.hat.array[,i]+m.hat/(n-1)
+#         bar.m.hat.array[,j] <- bar.m.hat.array[,j]+m.hat/(n-1)
+#         print(paste(i,j))
+#       }
+#     }
+#     # correct arrays
+#     result.array <- array(0,dim=dim(array))
+#     i <- 1
+#     for (j in (i+1):n){
+#       mva <- cbind(log2(array[,i]/array[,j]),0.5*log2(array[,i]*array[,j]))
+#       colnames(mva) <- c("m","a")
+#       if (j == 2){
+#         # correct Mi the first time through
+#         result.array[,i] <- 2^(mva[,"a"]+0.5*(mva[,"m"]-bar.m.hat.array[,i]))
+#       }
+#       result.array[,j] <- 2^(mva[,"a"]-0.5*(mva[,"m"]-bar.m.hat.array[,i]))
+#     }
+#     rownames(result.array) <- rownames(array)
+#     colnames(result.array) <- colnames(array)
+#     array <- result.array
+#   }
+#   return(array)
+# }
+
+
+ma.loessnorm2 <- function(array,iterations=2,small.positive=0.1,print.status=F){
+  n <- ncol(array)
+  # strip na values.
+  array <- na.omit(array)
+  for (q in 1:iterations){
+    if (print.status){
+      print(q)
+    }
+    result.array <- array(0,dim=dim(array))
+    for (i in 1:(n-1)){
+      for (j in (i+1):n){
+        #print(paste(i,j))
+        # replace 0's or negative values with a small positive value
+        array[array[,i]<=0,i] <- small.positive
+        array[array[,j]<=0,j] <- small.positive
+        # fit lowess
+        # [we have to use loess so we can predict using predict.loess
+        mva <- cbind(log2(array[,i]/array[,j]),0.5*log2(array[,i]*array[,j]))
+        colnames(mva) <- c("m","a")
+        mva <- data.frame(mva)
+        #temp.loess <- loess(m~a,mva,control = loess.control(surface = "direct"))
+        temp.loess <- loess(m~a,mva,control=loess.control(surface="interpolate",statistics="approximate",
+                                                          trace.hat="approximate",iterations=2))
+        # predict for i, predict for j (\hat{M})
+        # subtract out \hat{M} from M
+        m.hat <- predict(temp.loess,mva[,"a"])
+        # weight correction by the inverse of 2(n-1) . [so that there will be n adjustments]
+        result.array[,i] <- result.array[,i]+2^(mva[,"a"]+0.5*(mva[,"m"]-m.hat))/(n-1)
+        result.array[,j] <- result.array[,j]+2^(mva[,"a"]-0.5*(mva[,"m"]-m.hat))/(n-1)
+      }
+    }
+    # correct arrays
+    rownames(result.array) <- rownames(array)
+    colnames(result.array) <- colnames(array)
+    array <- result.array
+  }
+  return(array)
+}
diff --git a/R/mva_plotting.R b/R/mva_plotting.R

new file mode 100644 (file)

index 0000000..f40be20
--- /dev/null
+++ b/R/mva_plotting.R
@@ -0,0 +1,112 @@
+# MvA plotting for microarray data
+
+
+# Copyright 2003 By Don Armstrong
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+# 02111-1307, USA.
+
+# $Id: mva_plotting.R,v 1.5 2006/06/10 04:14:45 don Exp $
+
+
+# mva.plot
+# Plots a pairwise M versus A plot with a loess fit line
+mva.plot.pair <- function(array,title=F,small.positive=0.1){
+  size <- ncol(array)
+  npair <- size*(size-1)/2
+  height <- min(round((npair*3/4)^0.5),3)
+  width <- min(ceiling(npair/height),3)
+  ## print(c("width",width,"height",height))
+  def.par <- par(no.readonly = TRUE)
+  use.grid <- 0
+  if (NROW(array[,1]) > 1e4) {
+    use.grid <- 1
+  }
+  if (use.grid) {
+    mva.layout <- grid.layout(width,height)
+    pushViewport(viewport(layout=mva.layout))
+  }
+  else {
+    par(mfcol=c(width,height),ps=9,cex.main=1)
+  }
+  # we could also use combn here instead
+  num.plot <- 0
+  for(a in 1:(size-1)){
+    for(b in (a+1):size){
+#      par(mfg=c(a,b-1))
+      num.plot <- num.plot + 1
+      array[array[,a]<=0,a] <- small.positive
+      array[array[,b]<=0,b] <- small.positive
+      A <- 0.5*log2(array[,a]*array[,b])
+      M <- log2(array[,a]/array[,b])
+      temp.frame <- data.frame(cbind(A,M))
+      temp.frame <- temp.frame[abs(temp.frame$M) < 7.8 & temp.frame$A < 15.8 & temp.frame$A > 0,]
+      if (use.grid) {
+        ## print(c("num.plot",num.plot))
+        ## print(c("row",floor((num.plot -1)/ width) %% height))
+        ## print(c("col",(num.plot-1) %% width))
+        if (num.plot > 1 && floor((num.plot -1)/ width) %% height == 0 && (num.plot-1) %% width == 0) {
+          popViewport(1)
+          grid.text(title,x=unit(0.0,"npc")+unit(1,"lines"),y=unit(0.5,"npc"),just="center",rot=90)
+          grid.newpage()
+          pushViewport(viewport(layout=mva.layout))
+        }
+        ## print(floor((num.plot -1)/ width) %% height+1)
+        ## print((num.plot - 1)%% width+1)
+        pushViewport(viewport(layout.pos.row=(floor((num.plot -1)/ width) %% height)+1, layout.pos.col=((num.plot - 1)%% width)+1))
+        hb <- hexbin(temp.frame,
+                     ##xlab="A",ylab="M",
+                     ##ybnds = c(-8,8),
+                     ##xbnds = c(0,16)
+                     );
+        # P <- plot(hb,type="n",
+        #           ## main=paste(colnames(array)[a],colnames(array)[b],sep=" vs. "),
+        #           xlab="",ylab="",newpage=FALSE,legend=FALSE)
+        hb.viewport <- hexViewport(hb,mar=unit(0.1,"npc"))
+        pushHexport(hb.viewport)
+        grid.hexagons(hb,style="colorscale",border=gray(.1),pen=gray(.6),minarea=.1,maxarea=1.5,colramp=BTY)
+        grid.xaxis()
+        grid.yaxis()
+        temp.loess <- loess.smooth(A,M,family="gaussian",
+                                control=loess.control(surface="interpolate",statistics="approximate",
+                                  trace.hat="approximate",iterations=2,cell=0.6))
+        panel.loess(A,M,
+                    col="red",lwd=2)
+        popViewport(1)
+        ## grid.rect()
+        grid.text(paste(colnames(array)[a],colnames(array)[b],sep=" vs. "),y=unit(0.9,"npc"),x=unit(0.5,"npc"),just="center")
+        popViewport(1)
+#        grid.lines()
+      }
+      else {
+        plot(A,M,pch=".",main=paste(colnames(array)[a],colnames(array)[b],sep=" vs. "),xlab="A",ylab="M",ylim=c(-8,8),xlim=c(0,16))
+        lines(loess.smooth(A,M,family="gaussian",
+                           control=loess.control(surface="interpolate",statistics="approximate",
+                             trace.hat="approximate",iterations=2,cell=0.6)),
+              col="blue",lwd=2)
+      }
+      ## abline(h=0)
+    }
+  }
+  if (use.grid) {
+    popViewport(1)
+    grid.text(title,x=unit(0.0,"npc")+unit(1,"lines"),y=unit(0.5,"npc"),just="center",rot=90)
+    ## grid.newpage()
+  }
+  else {
+    title(title,outer=TRUE)
+    par(def.par)
+  }
+}
diff --git a/R/power_analysis.R b/R/power_analysis.R

new file mode 100644 (file)

index 0000000..3598910
--- /dev/null
+++ b/R/power_analysis.R
@@ -0,0 +1,76 @@
+# Power analysis for microarray data
+
+
+# Copyright 2003 By Don Armstrong
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+# 02111-1307, USA.
+
+
+
+# power.ma.partition
+# calcualate the power for data split into paritions of 200 genes.
+power.ma.partition <- function(dataset,partition=200,n=7,delta.mul=1,sig.level=0.05){
+  power.ma.temp <- dataset[names(sort(apply(dataset,1,mean))),]
+  if (partition>NROW(power.ma.temp)){
+    partition <- NROW(power.ma.temp)
+  }
+  power.ma.p.high <- NROW(power.ma.temp)
+  power.ma.return <- c()
+  for (power.ma.p.low in ceiling(seq(NROW(power.ma.temp),0,length=partition))){
+    if(power.ma.p.low == power.ma.p.high){
+      next;
+    }
+    power.ma.p.sd <- mean(apply(power.ma.temp[power.ma.p.low:power.ma.p.high,],1,sd,na.rm=T))
+    power.ma.p.mean <- mean(apply(power.ma.temp[power.ma.p.low:power.ma.p.high,],1,mean,na.rm=T))
+    power.ma.p.power <- power.t.test(n=n,sd=power.ma.p.sd,delta=power.ma.p.mean*delta.mul,sig.level=0.05)
+    power.ma.p.power <- power.ma.p.power$power
+    power.ma.return <- rbind(cbind(power.ma.p.mean,power.ma.p.power,power.ma.p.sd),power.ma.return)
+    power.ma.p.high <- power.ma.p.low -1;
+  }
+
+  return(power.ma.return)
+}
+
+# power.ma
+# calculate the power of individual genes
+power.ma <- function(dataset,n=7,delta.mul=1,sig.level=0.05){
+  power.ma.p.sd <- apply(dataset,1,sd,na.rm=T)
+  power.ma.p.mean <- apply(dataset,1,mean,na.rm=T)
+  power.ma.p.return <- cbind(power.ma.p.mean,power.ma.p.sd)
+  power.ma.p.power.calc <- function(sdmeanarray,n=7,delta.mul=1,sig.level=0.05){
+    power.ma.p.power.calc.power <- power.t.test(n=n,sd=sdmeanarray[2],delta=sdmeanarray[1]*delta.mul,sig.level=sig.level)
+    return(power.ma.p.power.calc.power$power)
+  }
+  power.ma.p.power <- apply(power.ma.p.return,1,power.ma.p.power.calc,delta.mul=delta.mul,sig.level=sig.level,n=n)
+  power.ma.p.return <- cbind(power.ma.p.return,power.ma.p.power)
+  
+  return(power.ma.p.return)
+}
+
+power.ma.sdmean <- function(dataset,n=7,delta.mul=1,sig.level=0.05){
+  power.ma.p.mean <- dataset[,1]
+  power.ma.p.sd <- dataset[,2]
+  power.ma.p.return <- cbind(power.ma.p.mean,power.ma.p.sd)
+  power.ma.p.power.calc <- function(sdmeanarray,n=7,delta.mul=1,sig.level=0.05){
+    power.ma.p.power.calc.power <- power.t.test(n=n,sd=sdmeanarray[2],delta=sdmeanarray[1]*delta.mul,sig.level=sig.level)
+    return(power.ma.p.power.calc.power$power)
+  }
+  power.ma.p.power <- apply(power.ma.p.return,1,power.ma.p.power.calc,delta.mul=delta.mul,sig.level=sig.level,n=n)
+  power.ma.p.return <- cbind(power.ma.p.return,power.ma.p.power)
+  
+  return(power.ma.p.return)
+}
+
diff --git a/R/significant_cut_util.R b/R/significant_cut_util.R

new file mode 100644 (file)

index 0000000..523d1c3
--- /dev/null
+++ b/R/significant_cut_util.R
@@ -0,0 +1,10 @@
+significant.cut.util <-
+  function(significant.cuts,experiments,cut="complete.cut") {
+    result <- NULL
+    for (experiment in experiments) {
+      result <- cbind(result,
+                      significant.cuts[[experiment]][[cut]])
+    }
+    colnames(result) <- experiments
+    return(result)
+  }
author	Don Armstrong <don@donarmstrong.com>
	Mon, 3 Mar 2014 05:12:55 +0000 (21:12 -0800)
committer	Don Armstrong <don@donarmstrong.com>
	Mon, 3 Mar 2014 05:12:55 +0000 (21:12 -0800)
DESCRIPTION		patch \| blob \| history
R/bayesian_analysis.R	[new file with mode: 0644]	patch \| blob
R/bayesian_analysis_plot.R	[new file with mode: 0644]	patch \| blob
R/ma_loess_norm.R	[new file with mode: 0644]	patch \| blob
R/mva_plotting.R	[new file with mode: 0644]	patch \| blob
R/power_analysis.R	[new file with mode: 0644]	patch \| blob
R/significant_cut_util.R	[new file with mode: 0644]	patch \| blob