]> git.donarmstrong.com Git - rsem.git/blob - EBSeq/R/GeneSimuAt.R
Included EBSeq for downstream differential expression analysis
[rsem.git] / EBSeq / R / GeneSimuAt.R
1 GeneSimuAt<-function(DVDconstant=NULL, DVDqt1=NULL, DVDqt2=NULL, Conditions, NumofSample, NumofGene=NULL, DEGeneProp, Phiconstant=NULL, Phi.qt1=NULL, Phi.qt2=NULL, Meanconstant=NULL,NormFactor=NULL, OnlyData=T)
2 {
3 # 2012 feb 1 
4 # paired level simulation
5
6 data(GeneEBresultGouldBart2)
7 if(is.null(NormFactor)) NormFactor=rep(1,NumofSample)
8
9 #MeansC1=rowMeans(GeneV.norm1.NZ.b2[,1:4])
10 #MeansC2=rowMeans(GeneV.norm1.NZ.b2[,5:8])
11 MeansC1=GeneEBresultGouldBart2$C1Mean[[1]]
12 MeansC2=GeneEBresultGouldBart2$C2Mean[[1]]
13
14 MeanDVD=MeansC1/MeansC2
15
16 if(is.null(DVDconstant))DVDLibrary=MeanDVD[MeanDVD<quantile(MeanDVD[MeanDVD!=Inf],DVDqt2) & MeanDVD>quantile(MeanDVD[MeanDVD!=Inf],DVDqt1)]
17
18
19 # If DVD constant, use constant when generate
20 # If not, use DVDLibrary
21
22 MeanInputraw=GeneEBresultGouldBart2$MeanList[[1]]
23 #MeanInputraw=rowMeans(GeneV.norm1.NZ.b2)
24 #Var1=apply(GeneV.norm1.NZ.b2[,1:4],1,var)
25 #Var2=apply(GeneV.norm1.NZ.b2[,5:8],1,var)
26 #VarInput=(Var1 + Var2)/2
27 #If NumofGene.raw=NULL, empirical # of Gene
28 #If !=NULL , Input a 9-vector
29 NumofGene.raw=length(MeanInputraw)
30
31 # here phi denotes r -- which is 1/phi' in which sigma^2=mu(1+mu phi')
32 # In negative binomial 
33 # size is 1/phi'
34 # rnbinom(100,size=100,mu=10) 
35 # var(qq)
36 #[1] 10.93687 
37 # qq=rnbinom(100,size=10,mu=10)
38 # var(qq)
39 #[1] 24.01404
40
41 #PhiInput.raw=(MeanInputraw^2) / (VarInput - MeanInputraw)
42 PhiInput.raw=GeneEBresultGouldBart2$RList[[1]]
43 if (length(Phiconstant)==0){
44         PhiLibrary=PhiInput.raw[1/(PhiInput.raw)<quantile(1/(PhiInput.raw),Phi.qt2) & 1/(PhiInput.raw)>quantile(1/(PhiInput.raw),Phi.qt1)]
45     PhiInputNames=sample(names(PhiLibrary),NumofGene.raw,replace=T)
46         PhiInput=PhiInput.raw[PhiInputNames]
47
48
49 }
50
51 if (length(Phiconstant)!=0)PhiInput=rep(Phiconstant,length(MeanInputraw))
52 if(length(Meanconstant)==0)MeanInput=GeneEBresultGouldBart2$MeanList[[1]][PhiInputNames]
53 if(length(Meanconstant)!=0)MeanInput=rep(Meanconstant,length(GeneEBresultGouldBart2$MeanList[[1]]))
54
55 # Wanna DENumbers be proportion to 2 
56 DEGeneNumbers=round(NumofGene.raw*DEGeneProp/2)*2
57 GeneNames=paste("G",c(1:NumofGene.raw),sep="_")
58 names(PhiInput)=GeneNames
59 names(MeanInput)=GeneNames
60
61 #########
62 # data
63 #########
64 EEList=sapply(1:NumofGene.raw, function(j) sapply(1:NumofSample, function(i)rnbinom(1,mu=NormFactor[i]*MeanInput[j], size=PhiInput[j])))
65
66
67
68
69     generateDataraw=t(EEList)
70         if(length(DVDconstant)==0){
71                 DVDSample=sample(DVDLibrary,DEGeneNumbers,replace=T)
72                 for(j in 1:NumofGene.raw){
73                  if (j<=(DEGeneNumbers/2)) generateDataraw[j,((NumofSample/2)+1):NumofSample]=sapply(((NumofSample/2) +1):NumofSample, function(i)rnbinom(1, size=PhiInput[j], mu=DVDSample[j]*MeanInput[j]*NormFactor[i]),simplify=T)
74                 if (j>=((DEGeneNumbers/2)+1) & j <=DEGeneNumbers) generateDataraw[j,1:(NumofSample/2)]=sapply(1:(NumofSample/2),function(i)rnbinom(1, size=MeanInput[j], mu= DVDSample[j]*MeanInput[j]*NormFactor[i]),simplify=T)
75 }
76          }
77         if(length(DVDconstant)!=0){
78         for(j in 1:NumofGene.raw){
79              if (j<=(DEGeneNumbers/2)) generateDataraw[j,((NumofSample/2)+1):NumofSample]=sapply((NumofSample/2+1):NumofSample, function(i)rnbinom(1, size=MeanInput[j],mu=DVDconstant*MeanInput[j]*NormFactor[i]),simplify=T)
80              if (j>=((DEGeneNumbers/2)+1) & j <=DEGeneNumbers) generateDataraw[j,1:(NumofSample/2)]=sapply(1:(NumofSample/2),function(i)rnbinom(1, size=MeanInput[j],mu=DVDconstant*MeanInput[j]*NormFactor[i]),simplify=T)
81                 }
82         }
83 rownames(generateDataraw)=GeneNames
84 MeanVector=rowMeans(generateDataraw)
85 VarVector=apply(generateDataraw,1,var)
86 MOV.post=MeanVector/VarVector
87
88
89
90 ### Remove MOV=NA
91 generateData=generateDataraw
92 generateData=generateData[!is.na(MOV.post)& MeanVector>2 & MeanVector<10000 ,] 
93 print(paste("NA MOV's",sum(is.na(MOV.post)),sum( MeanVector<2), sum(MeanVector>10000)))
94 ## DE
95 NumDENow=sum(rownames(generateData)%in%rownames(generateDataraw)[1:DEGeneNumbers])
96
97 if(length(NumofGene)!=0)
98     generateData=generateData[c(sample(1:NumDENow,round(NumofGene*DEGeneProp),replace=F),round( (dim(generateData)[1]+1-NumofGene*(1-DEGeneProp)):dim(generateData)[1])),]
99
100
101 UseName=rownames(generateData)
102
103 TrueDE=UseName[UseName%in%rownames(generateDataraw)[1:DEGeneNumbers]]
104 phiuse=PhiInput[rownames(generateData)]
105 meanuse=MeanInput[rownames(generateData)]
106
107 #ArtiNames=rownames(generateData)[(DEGeneNumbers+1):(2*DEGeneNumbers)]
108 #Noise=sample(c(1,ncol(generateData)),DEGeneNumbers,replace=T)
109 TrueDELength=length(TrueDE)
110 AtLoc=sample(c(1:length(Conditions)), TrueDELength, replace=T)
111 AtFold=sample(c(4,6,8,10),TrueDELength, replace=T)
112
113 AtNames_Level=vector("list",4)
114 names(AtNames_Level)=c(4,6,8,10)
115 for(i in 1:TrueDELength){
116 generateData[(TrueDELength+i),AtLoc[i]]=generateData[(TrueDELength+i),AtLoc[i]]*AtFold[i]
117 AtNames_Level[[as.character(AtFold[i])]]=c(AtNames_Level[[as.character(AtFold[i])]],rownames(generateData)[TrueDELength+i])
118 }
119
120
121 if(OnlyData==T){
122         OutName=paste("Gene",c(1:nrow(generateData)),sep="_")
123         names(OutName)=rownames(generateData)
124     OutData=generateData
125     rownames(OutData)=as.vector(OutName)
126         OutAt=as.vector(OutName[AtNames_Level])
127         OutTrueDE=as.vector(OutName[TrueDE])
128     output=list(data=OutData, TrueDE=OutTrueDE,Outliers=OutAt)
129         return(output)
130         }
131 ## DESeq
132
133 cds=newCountDataSet(round(generateData),Conditions)
134 cds=estimateSizeFactors(cds)
135 Sizes=sizeFactors(cds)
136 if(dim(generateData)[2]>4)cds=estimateVarianceFunctions(cds)
137 else  cds=estimateVarianceFunctions(cds, method="blind")
138
139 res=nbinomTest(cds, "1", "2")
140 ResAdj=res$padj
141 names(ResAdj)=res$id
142 SmallPValueName=names(ResAdj)[which(ResAdj<=.05)]
143 print(paste("DESEq found",length(SmallPValueName)))
144 print(paste("In True DE",sum(SmallPValueName%in%TrueDE)))
145
146 print("DESeq Size factors")
147 print(Sizes)
148
149 ## DESeq each group
150 ## Ours
151 NewData=generateData
152
153
154 #source("/z/Comp/kendziorskigroup/ningleng/RNASEQ/CODE/FinalV/NBBetaBiasUniqueP_PoolVar_SpeedUp_MDFPoi_NoNormVar.R")
155 #source("/z/Comp/kendziorskigroup/ningleng/RNASEQ/CODE/FinalV/NBBetaBiasUniqueP_PoolVar_SpeedUp_MDFPoi_NoNormPoolR.R")
156
157 EBresult=EBTest(NewData,rep(1,dim(NewData)[1]), rep(1,dim(NewData)[1]), rep(1,dim(NewData)[1]),Conditions,sizeFactors=Sizes,5)
158
159 #EBres2=NBBetaEB.bias.uniqueP_PoolVarSpeedUp_MDFPoi_NoNormPoolR(NewData,rep(1,dim(NewData)[1]), rep(1,dim(NewData)[1]), rep(1,dim(NewData)[1]),Conditions,sizeFactors=Sizes,5)
160
161
162 zlist.unlist=EBresult[[5]]
163 fdr=max(.5,crit_fun(1-zlist.unlist,.05))
164 EBDE=names(zlist.unlist)[which(zlist.unlist>fdr)]
165 EBDE.Poi=names(EBresult[[6]])[which(EBresult[[6]]>fdr)]
166 zlist.unlist.whole=c(EBresult[[5]],EBresult[[6]])
167 print(paste("Soft EB Poi",length(EBDE.Poi)))
168 EBDE=c(EBDE, EBDE.Poi)
169 print(paste("Soft EB found",length(EBDE)))
170 print(paste("In True DE",sum(EBDE%in%TrueDE)))
171
172 EBDE95=names(zlist.unlist)[which(zlist.unlist>.95)]
173 EBDE95.Poi=names(EBresult[[6]])[which(EBresult[[6]]>.95)]
174 print(paste("Hard Poi found",length(EBDE95.Poi)))
175 EBDE95=c(EBDE95, EBDE95.Poi)
176 print(paste("Hard EB found" ,length(EBDE95)))
177 print(paste("In True DE",sum(EBDE95%in%TrueDE)))
178
179 ### edgeR
180 library(edgeR,lib.loc="~/RCODE")
181 edgeRList.b2=DGEList(NewData,group=Conditions)
182 if(length(Phiconstant)==1){
183         edgeRList.b2=estimateCommonDisp(edgeRList.b2)
184         edgeRRes.b2=exactTest(edgeRList.b2)
185 }
186 if(length(Phiconstant)==0){
187         edgeRList.b2=estimateCommonDisp(edgeRList.b2)   
188         edgeRList.b2=estimateTagwiseDisp(edgeRList.b2)
189         edgeRRes.b2=exactTest(edgeRList.b2, common.disp = FALSE)
190 }
191 edgeRPvalue.b2.raw=edgeRRes.b2[[1]][[3]]
192 edgeRPvalue.b2=p.adjust(edgeRPvalue.b2.raw, method="BH")
193 names(edgeRPvalue.b2)=rownames(NewData)
194 edgeRSmallpvalue=names(which(edgeRPvalue.b2<.05))
195 print(paste("edgeR found",length(edgeRSmallpvalue)))
196 print(paste("In True DE",sum(edgeRSmallpvalue%in%TrueDE)))
197
198 ### Bayseq
199 library(baySeq, lib.loc="~/RCODE")
200 library(snow, lib.loc="~/RCODE")
201 cl <- makeCluster(4, "SOCK")
202 groups <- list(NDE = rep(1,NumofSample), DE = rep(c(1,2),each=NumofSample/2))
203 CD <- new("countData", data = NewData, replicates = Conditions, libsizes = as.integer(colSums(NewData)), groups = groups)
204 CDP.NBML <- getPriors.NB(CD, samplesize = dim(NewData)[1], estimation = "QL", cl = cl)
205 CDPost.NBML <- getLikelihoods.NB(CDP.NBML, pET = "BIC", cl = cl)
206 bayseqPost=CDPost.NBML@posteriors
207 rownames(bayseqPost)=rownames(NewData)
208 bayseqDE=rownames(NewData)[bayseqPost[,2]>log(.95)]
209 print(paste("bayseq found",length(bayseqDE)))
210 print(paste("In True DE",sum(bayseqDE%in%TrueDE)))
211
212
213 ### BBSeq
214 library("BBSeq",lib.loc="~/RCODE")
215 CondM=cbind(rep(1,NumofSample),rep(c(0,1),each=NumofSample/2))
216 output=free.estimate(NewData,CondM)
217 beta.free = output$betahat.free
218 p.free = output$p.free
219 psi.free = output$psi.free
220 names(p.free)=rownames(NewData)
221 p.free.adj=p.adjust(p.free,method="BH")
222 # Top p free?
223 #out.model=constrained.estimate(NewData,CondM, gn=3, beta.free ,psi.free)
224 #p.constrained = out.model$p.model
225 BBDE=names(p.free.adj)[which(p.free.adj<.05)]
226 print(paste("BBSeq found",length(BBDE)))
227 print(paste("In True DE",sum(BBDE%in%TrueDE)))
228
229
230 #########################
231 # Generate table
232 Table=matrix(rep(0,12),ncol=2)
233 colnames(Table)=c("Power","FDR")
234 rownames(Table)=c("DESeq","edgeR","BaySeq","BBSeq","EBSeq_ModifiedSoft","EBSeq_Hard")
235
236         Length=length(TrueDE)
237         Table[1,1]=sum(SmallPValueName%in%TrueDE)/Length
238         Table[2,1]=sum(edgeRSmallpvalue%in%TrueDE)/Length
239         Table[3,1]=sum(bayseqDE%in%TrueDE)/Length
240         Table[4,1]=sum(BBDE%in%TrueDE)/Length
241         Table[5,1]=sum(EBDE%in%TrueDE)/Length
242         Table[6,1]=sum(EBDE95%in%TrueDE)/Length
243         Table[1,2]=sum(!SmallPValueName%in%TrueDE)/length(SmallPValueName)
244         Table[2,2]=sum(!edgeRSmallpvalue%in%TrueDE)/length(edgeRSmallpvalue)
245         Table[3,2]=sum(!bayseqDE%in%TrueDE)/length(bayseqDE)
246         Table[4,2]=sum(!BBDE%in%TrueDE)/length(BBDE)
247         Table[5,2]=sum(!EBDE%in%TrueDE)/length(EBDE)
248         Table[6,2]=sum(!EBDE95%in%TrueDE)/length(EBDE95)
249         Table=round(Table,2)
250
251 ValueTable=matrix(rep(0,12),ncol=2)
252 colnames(ValueTable)=c("Power","FDR")
253 rownames(ValueTable)=c("DESeq","edgeR","BaySeq","BBSeq","EBSeq_ModifiedSoft","EBSeq_Hard")
254         ValueTable[1,1]=sum(SmallPValueName%in%TrueDE)
255         ValueTable[2,1]=sum(edgeRSmallpvalue%in%TrueDE)
256         ValueTable[3,1]=sum(bayseqDE%in%TrueDE)
257         ValueTable[4,1]=sum(BBDE%in%TrueDE)
258         ValueTable[5,1]=sum(EBDE%in%TrueDE)
259         ValueTable[6,1]=sum(EBDE95%in%TrueDE)
260         ValueTable[1,2]=sum(!SmallPValueName%in%TrueDE)
261         ValueTable[2,2]=sum(!edgeRSmallpvalue%in%TrueDE)
262         ValueTable[3,2]=sum(!bayseqDE%in%TrueDE)
263         ValueTable[4,2]=sum(!BBDE%in%TrueDE)
264         ValueTable[5,2]=sum(!EBDE%in%TrueDE)
265         ValueTable[6,2]=sum(!EBDE95%in%TrueDE)
266
267
268 AtFoundTable=matrix(rep(0,24),ncol=4)
269 colnames(AtFoundTable)=paste("Level",c(1:4),sep="_")
270 rownames(Table)=c("DESeq","edgeR","BaySeq","BBSeq","EBSeq_ModifiedSoft","EBSeq_Hard")
271 for(i in 1:4){
272         AtFoundTable[1,i]=sum(SmallPValueName%in%AtNames_Level[[i]])
273         AtFoundTable[2,i]=sum(edgeRSmallpvalue%in%AtNames_Level[[i]])
274         AtFoundTable[3,i]=sum(bayseqDE%in%AtNames_Level[[i]])
275         AtFoundTable[4,i]=sum(BBDE%in%AtNames_Level[[i]])
276         AtFoundTable[5,i]=sum(EBDE%in%AtNames_Level[[i]])
277         AtFoundTable[6,i]=sum(EBDE95%in%AtNames_Level[[i]])     
278         }
279
280         
281 if(length(DVDconstant)==0)DVD=c(quantile(MeanDVD[MeanDVD!=Inf],DVDqt1), quantile(MeanDVD[MeanDVD!=Inf],DVDqt2))
282 if(length(DVDconstant)!=0) DVD=DVDconstant
283 if(length(Phiconstant)==0)Phi=c(quantile(PhiInput.raw,Phi.qt1), quantile(PhiInput.raw,Phi.qt2))
284 if(length(Phiconstant)!=0) Phi=Phiconstant
285 OUT=list(Table=Table, ValueTable=ValueTable, DVD=DVD, Phi=Phi, generateData=NewData, TrueDE=TrueDE,phi.vector=phiuse,mean.vector=meanuse,NormFactor=NormFactor, DESeqP=ResAdj, edgeRP=edgeRPvalue.b2, EBSeqPP=zlist.unlist.whole, BaySeqPP=bayseqPost,BBSeqP=p.free.adj,EBoutput=EBresult,  AtFoundTable= AtFoundTable,Outliers=AtNames_Level)
286
287
288
289 }
290
291