]> git.donarmstrong.com Git - rsem.git/blob - EBSeq/R/GeneSimu.R
Included EBSeq for downstream differential expression analysis
[rsem.git] / EBSeq / R / GeneSimu.R
1 GeneSimu<-
2 function(DVDconstant=NULL, DVDqt1=NULL, DVDqt2=NULL, Conditions, NumofSample, NumofGene=NULL, DEGeneProp, Phiconstant=NULL, Phi.qt1=NULL, Phi.qt2=NULL, Meanconstant=NULL,NormFactor=NULL, OnlyData=T)
3 {
4 # 2012 feb 1 paired simulation
5 if(is.null(NormFactor)) NormFactor=rep(1,NumofSample)
6 data(GeneEBresultGouldBart2)
7 MeansC1=GeneEBresultGouldBart2$C1Mean[[1]]
8 MeansC2=GeneEBresultGouldBart2$C2Mean[[1]]
9
10 MeanDVD=MeansC1/MeansC2
11
12 if(is.null(DVDconstant))DVDLibrary=MeanDVD[MeanDVD<quantile(MeanDVD[MeanDVD!=Inf],DVDqt2) & MeanDVD>quantile(MeanDVD[MeanDVD!=Inf],DVDqt1)]
13
14
15 # If DVD constant, use constant when generate
16 # If not, use DVDLibrary
17
18 MeanInputraw=GeneEBresultGouldBart2$MeanList[[1]]
19
20 if(length(NumofGene)!=0)
21 NumofGene.raw=NumofGene*2
22
23 if(length(NumofGene)==0)
24 NumofGene.raw=length(MeanInputraw)
25
26
27 PhiInput.raw=GeneEBresultGouldBart2$RList[[1]]
28 if (length(Phiconstant)==0){
29         PhiLibrary=PhiInput.raw[(1/PhiInput.raw)<quantile(1/PhiInput.raw,Phi.qt2) & 1/PhiInput.raw>quantile(1/PhiInput.raw,Phi.qt1)]
30         PhiInputNames=sample(names(PhiLibrary),NumofGene.raw,replace=T)
31         PhiInput=PhiInput.raw[PhiInputNames]
32 }
33
34 if (length(Phiconstant)!=0)PhiInput=rep(Phiconstant,length(MeanInputraw))
35 if(length(Meanconstant)==0)MeanInput=GeneEBresultGouldBart2$MeanList[[1]][PhiInputNames]
36 if(length(Meanconstant)!=0)MeanInput=rep(Meanconstant,length(GeneEBresultGouldBart2$MeanList[[1]]))
37
38 DEGeneNumbers=round(NumofGene.raw*DEGeneProp/2)*2
39 GeneNames=paste("G",c(1:NumofGene.raw),sep="_")
40 names(PhiInput)=GeneNames
41 names(MeanInput)=GeneNames
42 #########
43 # data
44 #########
45 EEList=sapply(1:NumofGene.raw, function(j) sapply(1:NumofSample, function(i)suppressWarnings(rnbinom(1,mu=NormFactor[i]*MeanInput[j], size=PhiInput[j]))))
46
47
48
49
50     generateDataraw=t(EEList)
51         if(length(DVDconstant)==0){
52                 DVDSample=sample(DVDLibrary,DEGeneNumbers,replace=T)
53                 for(j in 1:NumofGene.raw){
54                  if (j<=(DEGeneNumbers/2)) generateDataraw[j,((NumofSample/2)+1):NumofSample]=sapply(((NumofSample/2) +1):NumofSample, function(i)suppressWarnings(rnbinom(1, size=PhiInput[j], mu=DVDSample[j]*MeanInput[j]*NormFactor[i])),simplify=T)
55                 if (j>=((DEGeneNumbers/2)+1) & j <=DEGeneNumbers) generateDataraw[j,1:(NumofSample/2)]=sapply(1:(NumofSample/2),function(i)suppressWarnings(rnbinom(1, size=MeanInput[j], mu= DVDSample[j]*MeanInput[j]*NormFactor[i])),simplify=T)
56 }
57          }
58         if(length(DVDconstant)!=0){
59         for(j in 1:NumofGene.raw){
60              if (j<=(DEGeneNumbers/2)) generateDataraw[j,((NumofSample/2)+1):NumofSample]=sapply((NumofSample/2+1):NumofSample, function(i)suppressWarnings(rnbinom(1, size=MeanInput[j],mu=DVDconstant*MeanInput[j]*NormFactor[i])),simplify=T)
61              if (j>=((DEGeneNumbers/2)+1) & j <=DEGeneNumbers) generateDataraw[j,1:(NumofSample/2)]=sapply(1:(NumofSample/2),function(i)suppressWarnings(rnbinom(1, size=MeanInput[j],mu=DVDconstant*MeanInput[j]*NormFactor[i])),simplify=T)
62                 }
63         }
64 rownames(generateDataraw)=GeneNames
65 MeanVector=rowMeans(generateDataraw)
66 VarVector=apply(generateDataraw,1,var)
67 MOV.post=MeanVector/VarVector
68
69
70
71 ### Remove MOV=NA
72 generateData=generateDataraw
73 generateData=generateData[!is.na(MOV.post)& MeanVector>2 & MeanVector<10000 ,] 
74 #print(paste("NA MOV's",sum(is.na(MOV.post)),sum( MeanVector<2), sum(MeanVector>10000)))
75 ## DE
76 NumDENow=sum(rownames(generateData)%in%rownames(generateDataraw)[1:DEGeneNumbers])
77
78 if(length(NumofGene)!=0)
79     generateData=generateData[c(sample(1:NumDENow,round(NumofGene*DEGeneProp),replace=F),round( (dim(generateData)[1]+1-NumofGene*(1-DEGeneProp)):dim(generateData)[1])),]
80
81
82 UseName=rownames(generateData)
83 phiuse=PhiInput[rownames(generateData)]
84 meanuse=MeanInput[rownames(generateData)]
85
86
87 TrueDE=UseName[UseName%in%rownames(generateDataraw)[1:DEGeneNumbers]]
88
89 if(OnlyData==T){
90         OutName=paste("Gene",c(1:nrow(generateData)),sep="_")
91         names(OutName)=rownames(generateData)
92         OutData=generateData
93         rownames(OutData)=as.vector(OutName)
94         OutTrueDE=as.vector(OutName[TrueDE])
95         output=list(data=OutData, TrueDE=OutTrueDE)
96         return(output)
97         }
98 ## DESeq
99
100 cds=newCountDataSet(round(generateData),Conditions)
101 cds=estimateSizeFactors(cds)
102 Sizes=sizeFactors(cds)
103 if(dim(generateData)[2]>4)cds=estimateVarianceFunctions(cds)
104 else  cds=estimateVarianceFunctions(cds, method="blind")
105
106 res=nbinomTest(cds, "1", "2")
107 ResAdj=res$padj
108 names(ResAdj)=res$id
109 SmallPValueName=names(ResAdj)[which(ResAdj<=.05)]
110 print(paste("DESEq found",length(SmallPValueName)))
111 print(paste("In True DE",sum(SmallPValueName%in%TrueDE)))
112
113 print("DESeq Size factors")
114 print(Sizes)
115
116 NewData=generateData
117
118
119 #source("/z/Comp/kendziorskigroup/ningleng/RNASEQ/CODE/FinalV/NBBetaBiasUniqueP_PoolVar_SpeedUp_MDFPoi_NoNormVar.R")
120 #source("/z/Comp/kendziorskigroup/ningleng/RNASEQ/CODE/FinalV/NBBetaBiasUniqueP_PoolVar_SpeedUp_MDFPoi_NoNormPoolR.R")
121
122 EBresult=EBTest(NewData,rep(1,dim(NewData)[1]), rep(1,dim(NewData)[1]), rep(1,dim(NewData)[1]),Conditions,sizeFactors=Sizes,5)
123 #library(EBarrays)
124
125 #EBres2=NBBetaEB.bias.uniqueP_PoolVarSpeedUp_MDFPoi_NoNormPoolR(NewData,rep(1,dim(NewData)[1]), rep(1,dim(NewData)[1]), rep(1,dim(NewData)[1]),Conditions,sizeFactors=Sizes,5)
126
127
128 zlist.unlist=EBresult[[5]]
129 fdr=max(.5,crit_fun(1-zlist.unlist,.05))
130 EBDE=names(zlist.unlist)[which(zlist.unlist>fdr)]
131 EBDE.Poi=names(EBresult[[6]])[which(EBresult[[6]]>fdr)]
132 zlist.unlist.whole=c(EBresult[[5]],EBresult[[6]])
133 print(paste("Soft EB Poi",length(EBDE.Poi)))
134 EBDE=c(EBDE, EBDE.Poi)
135 print(paste("Soft EB found",length(EBDE)))
136 print(paste("In True DE",sum(EBDE%in%TrueDE)))
137
138 EBDE95=names(zlist.unlist)[which(zlist.unlist>.95)]
139 EBDE95.Poi=names(EBresult[[6]])[which(EBresult[[6]]>.95)]
140 print(paste("Hard Poi found",length(EBDE95.Poi)))
141 EBDE95=c(EBDE95, EBDE95.Poi)
142 print(paste("Hard EB found" ,length(EBDE95)))
143 print(paste("In True DE",sum(EBDE95%in%TrueDE)))
144
145 ### edgeR
146 library(edgeR,lib.loc="~/RCODE")
147 edgeRList.b2=DGEList(NewData,group=Conditions)
148 if(length(Phiconstant)==1){
149         edgeRList.b2=estimateCommonDisp(edgeRList.b2)
150         edgeRRes.b2=exactTest(edgeRList.b2)
151 }
152 if(length(Phiconstant)==0){
153         edgeRList.b2=estimateCommonDisp(edgeRList.b2)   
154         edgeRList.b2=estimateTagwiseDisp(edgeRList.b2)
155         edgeRRes.b2=exactTest(edgeRList.b2, common.disp = FALSE)
156 }
157 edgeRPvalue.b2.raw=edgeRRes.b2[[1]][[3]]
158 edgeRPvalue.b2=p.adjust(edgeRPvalue.b2.raw, method="BH")
159 names(edgeRPvalue.b2)=rownames(NewData)
160 edgeRSmallpvalue=names(which(edgeRPvalue.b2<.05))
161 print(paste("edgeR found",length(edgeRSmallpvalue)))
162 print(paste("In True DE",sum(edgeRSmallpvalue%in%TrueDE)))
163
164 ### Bayseq
165 library(baySeq, lib.loc="~/RCODE")
166 library(snow, lib.loc="~/RCODE")
167 cl <- makeCluster(4, "SOCK")
168 groups <- list(NDE = rep(1,NumofSample), DE = rep(c(1,2),each=NumofSample/2))
169 CD <- new("countData", data = NewData, replicates = Conditions, libsizes = as.integer(colSums(NewData)), groups = groups)
170 CDP.NBML <- getPriors.NB(CD, samplesize = dim(NewData)[1], estimation = "QL", cl = cl)
171 CDPost.NBML <- getLikelihoods.NB(CDP.NBML, pET = "BIC", cl = cl)
172 bayseqPost=CDPost.NBML@posteriors
173 rownames(bayseqPost)=rownames(NewData)
174 bayseqDE=rownames(NewData)[bayseqPost[,2]>log(.95)]
175 print(paste("bayseq found",length(bayseqDE)))
176 print(paste("In True DE",sum(bayseqDE%in%TrueDE)))
177
178
179 ### BBSeq
180 library("BBSeq",lib.loc="~/RCODE")
181 CondM=cbind(rep(1,NumofSample),rep(c(0,1),each=NumofSample/2))
182 output=free.estimate(NewData,CondM)
183 beta.free = output$betahat.free
184 p.free = output$p.free
185 psi.free = output$psi.free
186 names(p.free)=rownames(NewData)
187 # Top p free?
188 #out.model=constrained.estimate(NewData,CondM, gn=3, beta.free ,psi.free)
189 #p.constrained = out.model$p.model
190 p.free.adj=p.adjust(p.free, method="BH")
191
192 BBDE=names(p.free.adj)[which(p.free.adj<.05)]
193 print(paste("BBSeq found",length(BBDE)))
194 print(paste("In True DE",sum(BBDE%in%TrueDE)))
195
196
197 #########################
198 # Generate table
199 Table=matrix(rep(0,12),ncol=2)
200 colnames(Table)=c("Power","FDR")
201 rownames(Table)=c("DESeq","edgeR","BaySeq","BBSeq","EBSeq_ModifiedSoft","EBSeq_Hard")
202
203         Length=length(TrueDE)
204         Table[1,1]=sum(SmallPValueName%in%TrueDE)/Length
205         Table[2,1]=sum(edgeRSmallpvalue%in%TrueDE)/Length
206         Table[3,1]=sum(bayseqDE%in%TrueDE)/Length
207         Table[4,1]=sum(BBDE%in%TrueDE)/Length
208         Table[5,1]=sum(EBDE%in%TrueDE)/Length
209         Table[6,1]=sum(EBDE95%in%TrueDE)/Length
210         Table[1,2]=sum(!SmallPValueName%in%TrueDE)/length(SmallPValueName)
211         Table[2,2]=sum(!edgeRSmallpvalue%in%TrueDE)/length(edgeRSmallpvalue)
212         Table[3,2]=sum(!bayseqDE%in%TrueDE)/length(bayseqDE)
213         Table[4,2]=sum(!BBDE%in%TrueDE)/length(BBDE)
214         Table[5,2]=sum(!EBDE%in%TrueDE)/length(EBDE)
215         Table[6,2]=sum(!EBDE95%in%TrueDE)/length(EBDE95)
216         Table=round(Table,2)
217
218 ValueTable=matrix(rep(0,12),ncol=2)
219 colnames(ValueTable)=c("Power","FDR")
220 rownames(ValueTable)=c("DESeq","edgeR","BaySeq","BBSeq","EBSeq_ModifiedSoft","EBSeq_Hard")
221         ValueTable[1,1]=sum(SmallPValueName%in%TrueDE)
222         ValueTable[2,1]=sum(edgeRSmallpvalue%in%TrueDE)
223         ValueTable[3,1]=sum(bayseqDE%in%TrueDE)
224         ValueTable[4,1]=sum(BBDE%in%TrueDE)
225         ValueTable[5,1]=sum(EBDE%in%TrueDE)
226         ValueTable[6,1]=sum(EBDE95%in%TrueDE)
227         ValueTable[1,2]=sum(!SmallPValueName%in%TrueDE)
228         ValueTable[2,2]=sum(!edgeRSmallpvalue%in%TrueDE)
229         ValueTable[3,2]=sum(!bayseqDE%in%TrueDE)
230         ValueTable[4,2]=sum(!BBDE%in%TrueDE)
231         ValueTable[5,2]=sum(!EBDE%in%TrueDE)
232         ValueTable[6,2]=sum(!EBDE95%in%TrueDE)
233         
234 if(length(DVDconstant)==0)DVD=c(quantile(MeanDVD[MeanDVD!=Inf],DVDqt1), quantile(MeanDVD[MeanDVD!=Inf],DVDqt2))
235 if(length(DVDconstant)!=0) DVD=DVDconstant
236 if(length(Phiconstant)==0)Phi=c(quantile(PhiInput.raw,Phi.qt1), quantile(PhiInput.raw,Phi.qt2))
237 if(length(Phiconstant)!=0) Phi=Phiconstant
238 OUT=list(Table=Table, ValueTable=ValueTable, DVD=DVD, Phi=Phi, generateData=NewData, TrueDE=TrueDE,phi.vector=phiuse,mean.vector=meanuse,NormFactor=NormFactor, DESeqP=ResAdj, edgeRP=edgeRPvalue.b2, EBSeqPP=zlist.unlist.whole, BaySeqPP=bayseqPost,BBSeqP=p.free.adj,EBoutput=EBresult
239 ,DESeqDE=SmallPValueName, edgeRDE=edgeRSmallpvalue, bayDE=bayseqDE, BBDE=BBDE, EBDE95=EBDE95)
240 }
241