# Copyright (C) 1999 Anthony D. Long, Ecology & Evolutionary Biology # University of California, Irvine (tdlong@uci.edu) # Portions Copyright 2002,2003 Don Armstrong # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # $Id: bayesian_analysis.R,v 1.5 2006/07/12 03:27:03 don Exp $ microarray.bayesian.plot <- function(data) { ## png(file = paste(file,"_%03d.png",sep=""), width = 1600, height = 1200) plotting.function <- getFunction("plot") text.function <- getFunction("text") use.grid <- 0 if (NROW(data)>1e4) use.grid <- 1 if (use.grid) { nbins <- 80 plotting.function <- function(x,y, xbins = nbins, shape = 1, xbnds = range(x), ybnds = range(y), xlab = NULL, ylab = NULL, IDs = FALSE,...) { return(plot(hexbin(x,y, xbins=xbins, shape=shape, xbnds=xbnds, xlab=xlab, ylab=ylab, IDs=IDs), colramp=BTY, ...)) } text.function <- function(x,y,text) { grid.text(text,x=unit(x,"native"),y=unit(y,"native")) } } plotting.function(data[, "controls.mean"]/1e3, data[, "experiments.mean"]/1e3, xlab = expression(paste("Mean of Control Expression (",10**3,")")), ylab = expression(paste("Mean of Experimental Expression (",10**3,")")), main = "Mean expression level comparison" ) plotting.function(data[, "controls.mean.log"], data[, "experiments.mean.log"], xlab = "Mean of Controls", ylab = "Mean of Experimentals", main = "log transformed expression level comparison" ) ## controls mean v controls sd temp.y <- data[,"controls.sd"]/1000 temp.x <- data[, "controls.mean"]/1000 lmline <- lm(temp.y~temp.x) temp <- plotting.function(data[, "controls.mean"]/1e3, data[, "controls.sd"]/1e3, xlab = expression(paste("Mean of Control Expression (",10**3,")")), ylab = expression(paste("SD of Control Expression (",10**3,")")), main = "Control SD as a function of Control Mean" ) if (use.grid) { pushHexport(temp$plot.vp) panel.abline(lmline) } else { abline(lmline) } xlim <- range(data[,"controls.mean"]/1e3) ylim <- range(data[,"controls.sd"]/1e3) text.function(0.5*(xlim[2]-xlim[1])+xlim[1], 0.9*(ylim[2]-ylim[1])+ylim[1], substitute(r**2 == y, list(y=format(summary(lmline)$r.squared, digits=4))) ) if (use.grid) popViewport(1) ## experiments mean v experiments sd temp.y <- data[,"experiments.sd"]/1000 temp.x <- data[,"experiments.mean"]/1000 temp <- plotting.function(data[, "experiments.mean"]/1e3, data[, "experiments.sd"]/1e3, xlab = expression(paste("Mean of Experimental Expression (",10**3,")")), ylab = expression(paste( "SD of Experimental Expression (",10**3,")")), main = "Experiment SD as a function of Experiment Mean" ) if (use.grid) { pushHexport(temp$plot.vp) panel.abline(lmline) } else { abline(lmline) } xlim <- range(data[,"experiments.mean"]) ylim <- range(data[,"experiments.sd"]) text.function(0.5*(xlim[2]-xlim[1])+xlim[1], 0.9*(ylim[2]-ylim[1])+ylim[1], substitute(r**2 == y, list(y=format(summary(lmline)$r.squared, digits=4))) ) if (use.grid) popViewport(1) ## controls log mean v controls log sd lmline <- lm(data[,"controls.sd.log"]~data[, "controls.mean.log"]) temp <- plotting.function(data[, "controls.mean.log"], data[, "controls.sd.log"], xlab = "Mean of log(Control Expression)", ylab = "SD of log(Control Expression)", main = "log(Control SD) as a function of log(Control Mean)" ) if (use.grid) { pushHexport(temp$plot.vp) panel.abline(lmline) } else { abline(lmline) } xlim <- range(data[,"controls.mean.log"]) ylim <- range(data[,"controls.sd.log"]) print(xlim) print(ylim) print(summary(lmline)) print(0.5*(xlim[2]-xlim[1])+xlim[1]) print(0.9*(ylim[2]-ylim[1])+ylim[1]) if (use.grid) print(current.vpTree()) text.function(0.5*(xlim[2]-xlim[1])+xlim[1], 0.9*(ylim[2]-ylim[1])+ylim[1], substitute(r**2 == y, list(y=format(summary(lmline)$r.squared, digits=4))) ) if (use.grid) popViewport(1) ## experiments mean v experiments sd lmline <- lm(data[,"experiments.sd.log"]~data[, "experiments.mean.log"]) temp <- plotting.function(data[, "experiments.mean.log"], data[, "experiments.sd.log"], xlab = "Mean of log(Experiment Expression)", ylab = "SD of log(Experiment Expression)", main = "log(Experiment SD) as a function of log(Experiment Mean)" ) if (use.grid) { pushHexport(temp$plot.vp) panel.abline(lmline) } else { abline(lmline) } xlim <- range(data[,"experiments.mean.log"]) ylim <- range(data[,"experiments.sd.log"]) text.function(0.5*(xlim[2]-xlim[1])+xlim[1], 0.9*(ylim[2]-ylim[1])+ylim[1], substitute(r**2 == y, list(y=format(summary(lmline)$r.squared, digits=4))) ) if (use.grid) popViewport(1) ## mean expression vs significance mean.expression <- apply(data[,c("experiments.mean","controls.mean")],1,mean) lmline <- lm(data[,"t.test.p"] ~ mean.expression) temp <- plotting.function(mean.expression, data[, "t.test.p"], xlab = "Mean Expression", ylab = "p value of T Test", main = "Significance as a function of Mean Expression" ) if (use.grid) { pushHexport(temp$plot.vp) panel.abline(lmline) panel.abline(h=0.05) } else { abline(lmline) abline(h=0.05) } xlim <- range(mean.expression) ylim <- range(data[, "t.test.p"]) text.function(0.5*(xlim[2]-xlim[1])+xlim[1], 0.9*(ylim[2]-ylim[1])+ylim[1], substitute(r**2 == y, list(y=format(summary(lmline)$r.squared, digits=4))) ) if (use.grid) popViewport(1) ## log transformed mean expression vs significance mean.expression <- apply(data[,c("experiments.mean.log","controls.mean.log")],1,mean) lmline <- lm(data[, "t.test.p.log"] ~ mean.expression) temp <- plotting.function(mean.expression, data[, "t.test.p.log"], xlab = "Mean Expression", ylab = "p value of T Test log(Expression)", main = "Significance of log(Expression) as a function of Mean Expression" ) if (use.grid) { pushHexport(temp$plot.vp) panel.abline(lmline) panel.abline(h=0.05) } else { abline(lmline) abline(h=0.05) } xlim <- range(mean.expression) ylim <- range(data[, "t.test.p.log"]) text.function(0.5*(xlim[2]-xlim[1])+xlim[1], 0.9*(ylim[2]-ylim[1])+ylim[1], substitute(r**2 == y, list(y=format(summary(lmline)$r.squared, digits=4))) ) if (use.grid) popViewport(1) ## Fold change vs significance lmline <- lm(data[abs(data[, "fold.change"]) < 20, "t.test.p"] ~ abs(data[abs(data[, "fold.change"]) < 20, "fold.change"])) temp <- plotting.function(abs(data[abs(data[,"fold.change"]) < 20, "fold.change"]), data[abs(data[,"fold.change"]) < 20, "t.test.p"], xlab = "Fold Change of Expression", ylab = "p value of T Test", main = "Significance as a function of Fold Change" ) if (use.grid) { pushHexport(temp$plot.vp) panel.abline(lmline) panel.abline(h=0.05) } else { abline(lmline) abline(h=0.05) } xlim <- range(abs(data[abs(data[,"fold.change"]) < 20, "fold.change"])) ylim <- range(data[abs(data[,"fold.change"]) < 20, "t.test.p"]) text.function(0.5*(xlim[2]-xlim[1])+xlim[1], 0.9*(ylim[2]-ylim[1])+ylim[1], substitute(r**2 == y, list(y=format(summary(lmline)$r.squared, digits=4))) ) if (use.grid) popViewport(1) ## log transformed fold change vs significance print("log transformed fold change vs significance") temp.data <- na.omit(cbind(data[abs(data[, "fold.change"]) < 20, "t.test.p.log"], abs(data[abs(data[, "fold.change"]) < 20, "fold.change"]))) lmline <- lm(temp.data[,1]~temp.data[,2]) temp <- plotting.function(temp.data[,2],temp.data[,1], xlab = "Fold Change of Expression", ylab = "p value of T Test log(Expression)", main = "Significance of log(Expression) as a function of Fold Change" ) if (use.grid) { pushHexport(temp$plot.vp) panel.abline(lmline) panel.abline(h=0.05) } else { abline(lmline) abline(h=0.05) } xlim <- range(temp.data[,2]) ylim <- range(temp.data[,1]) text.function(0.5*(xlim[2]-xlim[1])+xlim[1], 0.9*(ylim[2]-ylim[1])+ylim[1], substitute(r**2 == y, list(y=format(summary(lmline)$r.squared, digits=4))) ) if (use.grid) popViewport(1) ## bayesian vs t test temp.data <- log(na.omit(cbind(data[,"t.test.p"], data[,"p.bayes"]))) lmline <- lm(temp.data[,1]~temp.data[,2]) temp <- plotting.function(temp.data[,2],temp.data[,1], xlab = "log(p value) of Bayesian", ylab = "log(p value) of t test", main = "Correspondance of t-test and bayesian p values" ) if (use.grid) { pushHexport(temp$plot.vp) panel.abline(lmline) } else { abline(lmline) } xlim <- range(temp.data[,2]) ylim <- range(temp.data[,1]) text.function(0.5*(xlim[2]-xlim[1])+xlim[1], 0.9*(ylim[2]-ylim[1])+ylim[1], substitute(r**2 == y, list(y=format(summary(lmline)$r.squared, digits=4))) ) if (use.grid) popViewport(1) ## log(bayesian) vs t test print("log(bayesian) vs t test") temp.data <- log(na.omit(cbind(data[,"t.test.p.log"], data[,"p.log.bayes"]))) lmline <- lm(temp.data[,1]~temp.data[,2]) temp <- plotting.function(temp.data[,2],temp.data[,1], xlab = "log(p value) of Bayesian (log transformed)", ylab = "log(p value) of t test (log transformed)", main = "Correspondance of t-test and bayesian p values of log transformed values" ) if (use.grid) { pushHexport(temp$plot.vp) panel.abline(lmline) } else { abline(lmline) } xlim <- range(temp.data[,2]) ylim <- range(temp.data[,1]) text.function(0.5*(xlim[2]-xlim[1])+xlim[1], 0.9*(ylim[2]-ylim[1])+ylim[1], substitute(r**2 == y, list(y=format(summary(lmline)$r.squared, digits=4))) ) if (use.grid) popViewport(1) ## histograms print("histograms") plot(hist(data[,"t.test.p"],breaks=40,plot=FALSE),main="Histogram of T Test p values") plot(hist(data[,"t.test.p.log"],breaks=40,plot=FALSE),main="Histogram of T Test p values for log transformed data") plot(hist(data[,"p.bayes"],breaks=40,plot=FALSE),main="Histogram of Bayesian p values") plot(hist(data[,"p.log.bayes"],breaks=40,plot=FALSE),main="Histogram of Bayesian p values for log transformed data") }