]> git.donarmstrong.com Git - rsem.git/blob - EBSeq/R/EBMultiTest.R
Included EBSeq for downstream differential expression analysis
[rsem.git] / EBSeq / R / EBMultiTest.R
1 EBMultiTest <-
2 function(Data,NgVector=NULL,Conditions,AllParti=NULL, sizeFactors, maxround, tau=NULL,CI=NULL,CIthre=NULL, Pool=F, NumBin=1000, Approx=10^-10,PoolLower=.25, PoolUpper=.75)
3 {
4
5         if(is.null(NgVector))NgVector=rep(1,nrow(Data))
6         if(!is.factor(Conditions))Conditions=as.factor(Conditions)
7
8
9         #ReNameThem
10         IsoNamesIn=rownames(Data)
11         Names=paste("I",c(1:dim(Data)[1]),sep="")
12         names(IsoNamesIn)=Names
13         rownames(Data)=paste("I",c(1:dim(Data)[1]),sep="")
14         names(NgVector)=paste("I",c(1:dim(Data)[1]),sep="")
15         
16         # If PossibleCond==NULL, use all combinations
17         NumCond=nlevels(Conditions)
18         CondLevels=levels(Conditions)
19         #library(blockmodeling)
20         if(is.null(AllParti)){
21                 AllPartiList=sapply(1:NumCond,function(i)nkpartitions(NumCond,i))
22                 AllParti=do.call(rbind,AllPartiList)
23                 colnames(AllParti)=CondLevels
24             rownames(AllParti)=paste("Pattern",1:nrow(AllParti),sep="")
25         }
26         if(!length(sizeFactors)==ncol(Data)){
27                 rownames(sizeFactors)=rownames(Data)
28                 colnames(sizeFactors)=Conditions
29         }
30
31         
32         NoneZeroLength=nlevels(as.factor(NgVector))
33         NameList=sapply(1:NoneZeroLength,function(i)names(NgVector)[NgVector==i],simplify=F)
34         DataList=sapply(1:NoneZeroLength , function(i) Data[NameList[[i]],],simplify=F)
35         names(DataList)=names(NameList)
36     
37         NumEachGroup=sapply(1:NoneZeroLength , function(i)dim(DataList)[i])
38         # Unlist 
39         DataList.unlist=do.call(rbind, DataList)
40
41         # Divide by SampleSize factor
42         
43         if(length(sizeFactors)==ncol(Data))
44         DataList.unlist.dvd=t(t( DataList.unlist)/sizeFactors)
45         
46         if(length(sizeFactors)!=ncol(Data))
47         DataList.unlist.dvd=DataList.unlist/sizeFactors
48         
49         # Pool or Not
50         if(Pool==T){
51         DataforPoolSP.dvd=MeanforPoolSP.dvd=vector("list",NumCond)
52         for(lv in 1:NumCond){
53                 DataforPoolSP.dvd[[lv]]=matrix(DataList.unlist.dvd[,Conditions==levels(Conditions)[lv]],nrow=dim(DataList.unlist)[1])   
54                 MeanforPoolSP.dvd[[lv]]=rowMeans(DataforPoolSP.dvd[[lv]])
55         }
56         MeanforPool.dvd=rowMeans(DataList.unlist.dvd)
57         NumInBin=floor(dim(DataList.unlist)[1]/NumBin)
58         StartSeq=c(0:(NumBin-1))*NumInBin+1
59         EndSeq=c(StartSeq[-1]-1,dim(DataList.unlist)[1])
60         MeanforPool.dvd.Sort=sort(MeanforPool.dvd,decreasing=T) 
61         MeanforPool.dvd.Order=order(MeanforPool.dvd,decreasing=T)
62         PoolGroups=sapply(1:NumBin,function(i)(names(MeanforPool.dvd.Sort)[StartSeq[i]:EndSeq[i]]),simplify=F)
63         #FCforPool=MeanforPoolSP.dvd1/MeanforPoolSP.dvd2
64         # Use GeoMean of every two-group partition
65         Parti2=nkpartitions(NumCond,2)
66         FCForPoolList=sapply(1:nrow(Parti2),function(i)rowMeans(do.call(cbind,
67                                                         MeanforPoolSP.dvd[Parti2[i,]==1]))/
68                                                         rowMeans(do.call(cbind,MeanforPoolSP.dvd[Parti2[i,]==2])),
69                                                         simplify=F)
70         FCForPoolMat=do.call(cbind,FCForPoolList)
71         FCforPool=apply(FCForPoolMat,1,function(i)exp(mean(log(i))))
72         names(FCforPool)=names(MeanforPool.dvd)
73         FC_Use=names(FCforPool)[which(FCforPool>=quantile(FCforPool[!is.na(FCforPool)],PoolLower) & FCforPool<=quantile(FCforPool[!is.na(FCforPool)],PoolUpper))]
74         PoolGroupVar=sapply(1:NumBin,function(i)(mean(apply(matrix(DataList.unlist[PoolGroups[[i]][PoolGroups[[i]]%in%FC_Use],],ncol=ncol(DataList.unlist)),1,var))))   
75         PoolGroupVarInList=sapply(1:NumBin,function(i)(rep(PoolGroupVar[i],length(PoolGroups[[i]]))),simplify=F)
76         PoolGroupVarVector=unlist(PoolGroupVarInList)
77         VarPool=PoolGroupVarVector[MeanforPool.dvd.Order]
78         names(VarPool)=names(MeanforPool.dvd)
79                 }
80
81         DataListSP=vector("list",nlevels(Conditions))
82         DataListSP.dvd=vector("list",nlevels(Conditions))
83         SizeFSP=DataListSP
84         MeanSP=DataListSP
85         VarSP=DataListSP
86         GetPSP=DataListSP
87         RSP=DataListSP
88         CISP=DataListSP
89         tauSP=DataListSP
90         
91         NumEachCondLevel=summary(Conditions)
92         if(Pool==F & is.null(CI)) CondLevelsUse=CondLevels[NumEachCondLevel>1]
93         if(Pool==T | !is.null(CI)) CondLevelsUse=CondLevels
94         NumCondUse=length(CondLevelsUse)        
95
96         for (lv in 1:nlevels(Conditions)){
97         DataListSP[[lv]]= matrix(DataList.unlist[,Conditions==levels(Conditions)[lv]],nrow=dim(DataList.unlist)[1])
98         rownames(DataListSP[[lv]])=rownames(DataList.unlist)
99         DataListSP.dvd[[lv]]= matrix(DataList.unlist.dvd[,Conditions==levels(Conditions)[lv]],nrow=dim(DataList.unlist.dvd)[1])
100         if(ncol(DataListSP[[lv]])==1 & Pool==F & !is.null(CI)){
101         CISP[[lv]]=matrix(CI[,Conditions==levels(Conditions)[lv]],nrow=dim(DataList.unlist.dvd)[1])
102         tauSP[[lv]]=matrix(tau[,Conditions==levels(Conditions)[lv]],nrow=dim(DataList.unlist.dvd)[1])
103         }
104         # no matter sizeFactors is a vector or a matrix. Matrix should be columns are the normalization factors
105         # may input one for each 
106         if(length(sizeFactors)==ncol(Data))SizeFSP[[lv]]=sizeFactors[Conditions==levels(Conditions)[lv]]
107         if(length(sizeFactors)!=ncol(Data))SizeFSP[[lv]]=sizeFactors[,Conditions==levels(Conditions)[lv]]
108                 
109         MeanSP[[lv]]=rowMeans(DataListSP.dvd[[lv]])
110         
111         if(length(sizeFactors)==ncol(Data))PrePareVar=sapply(1:ncol( DataListSP[[lv]]),function(i)( DataListSP[[lv]][,i]- SizeFSP[[lv]][i]*MeanSP[[lv]])^2 /SizeFSP[[lv]][i])
112         if(length(sizeFactors)!=ncol(Data))PrePareVar=sapply(1:ncol( DataListSP[[lv]]),function(i)( DataListSP[[lv]][,i]- SizeFSP[[lv]][,i]*MeanSP[[lv]])^2 /SizeFSP[[lv]][,i])
113
114         if(ncol(DataListSP[[lv]])==1 & Pool==F & !is.null(CI))
115                 VarSP[[lv]]=as.vector(((DataListSP[[lv]]/tauSP[[lv]]) * CISP[[lv]]/(CIthre*2))^2)
116         if( Pool==T){
117                 VarSP[[lv]]=VarPool     
118                 }
119         if(ncol(DataListSP[[lv]])!=1){
120                 VarSP[[lv]]=rowSums(PrePareVar)/ncol( DataListSP[[lv]])
121                 names(VarSP[[lv]])=rownames(DataList.unlist)
122                 GetPSP[[lv]]=MeanSP[[lv]]/VarSP[[lv]]
123             RSP[[lv]]=MeanSP[[lv]]*GetPSP[[lv]]/(1-GetPSP[[lv]])
124         }
125         names(MeanSP[[lv]])=rownames(DataList.unlist)
126         }
127
128         # Get Empirical R
129         # POOL R???
130         MeanList=rowMeans(DataList.unlist.dvd)
131         VarList=apply(DataList.unlist.dvd, 1, var)
132         Varcbind=do.call(cbind,VarSP[CondLevels%in%CondLevelsUse])
133         PoolVarSpeedUp_MDFPoi_NoNormVarList=rowMeans(Varcbind)
134         VarrowMin=apply(Varcbind,1,min)
135         GetP=MeanList/PoolVarSpeedUp_MDFPoi_NoNormVarList
136         
137     EmpiricalRList=MeanList*GetP/(1-GetP) 
138         # sep
139         #Rcb=cbind(RSP[[1]],RSP[[2]])
140         #Rbest=apply(Rcb,1,function(i)max(i[!is.na(i) & i!=Inf]))
141         EmpiricalRList[EmpiricalRList==Inf]     =max(EmpiricalRList[EmpiricalRList!=Inf])
142         # fine
143         # 
144         GoodData=names(MeanList)[EmpiricalRList>0 &  VarrowMin!=0 & EmpiricalRList!=Inf & !is.na(VarrowMin) & !is.na(EmpiricalRList)]
145         NotIn=names(MeanList)[EmpiricalRList<=0 | VarrowMin==0 | EmpiricalRList==Inf |  is.na(VarrowMin) | is.na(EmpiricalRList)]
146         #NotIn.BestR=Rbest[NotIn.raw]
147         #NotIn.fix=NotIn.BestR[which(NotIn.BestR>0)]
148         #EmpiricalRList[names(NotIn.fix)]=NotIn.fix
149         #print(paste("ZeroVar",sum(VarrowMin==0), "InfR", length(which(EmpiricalRList==Inf)), "Poi", length(which(EmpiricalRList<0)), ""))
150         #GoodData=c(GoodData.raw,names(NotIn.fix))
151         #NotIn=NotIn.raw[!NotIn.raw%in%names(NotIn.fix)]
152         EmpiricalRList.NotIn=EmpiricalRList[NotIn]
153         EmpiricalRList.Good=EmpiricalRList[GoodData]
154         EmpiricalRList.Good[EmpiricalRList.Good<1]=1+EmpiricalRList.Good[EmpiricalRList.Good<1]
155         if(length(sizeFactors)==ncol(Data))
156         EmpiricalRList.Good.mat= outer(EmpiricalRList.Good, sizeFactors)        
157         if(!length(sizeFactors)==ncol(Data))
158         EmpiricalRList.Good.mat=EmpiricalRList.Good* sizeFactors[GoodData,]
159
160
161         # Only Use Data has Good q's
162         DataList.In=sapply(1:NoneZeroLength, function(i)DataList[[i]][GoodData[GoodData%in%rownames(DataList[[i]])],],simplify=F)
163         DataList.NotIn=sapply(1:NoneZeroLength, function(i)DataList[[i]][NotIn[NotIn%in%rownames(DataList[[i]])],],simplify=F)
164         DataListIn.unlist=do.call(rbind, DataList.In)
165         DataListNotIn.unlist=do.call(rbind, DataList.NotIn)
166         
167         DataListSPIn=vector("list",nlevels(Conditions))
168         DataListSPNotIn=vector("list",nlevels(Conditions))
169         EmpiricalRList.Good.mat.SP=vector("list",nlevels(Conditions))
170         for (lv in 1:nlevels(Conditions)){
171                 DataListSPIn[[lv]]= matrix(DataListIn.unlist[,Conditions==levels(Conditions)[lv]],nrow=dim(DataListIn.unlist)[1])
172                 if(length(NotIn)>0)     DataListSPNotIn[[lv]]= matrix(DataListNotIn.unlist[,Conditions==levels(Conditions)[lv]],nrow=dim(DataListNotIn.unlist)[1])
173                 rownames(DataListSPIn[[lv]])=rownames(DataListIn.unlist)
174                 if(length(NotIn)>0)rownames(DataListSPNotIn[[lv]])=rownames(DataListNotIn.unlist)
175                 EmpiricalRList.Good.mat.SP[[lv]]=matrix(EmpiricalRList.Good.mat[,Conditions==levels(Conditions)[lv]],nrow=dim(EmpiricalRList.Good.mat)[1])
176         }       
177
178         NumOfEachGroupIn=sapply(1:NoneZeroLength, function(i)max(0,dim(DataList.In[[i]])[1]))
179         NumOfEachGroupNotIn=sapply(1:NoneZeroLength, function(i)max(0,dim(DataList.NotIn[[i]])[1]))
180
181         #Initialize SigIn & ...
182         AlphaIn=0.5
183         BetaIn=rep(0.5,NoneZeroLength)
184         PIn=rep(1/nrow(AllParti),nrow(AllParti))
185
186         ####use while to make an infinity round?
187         UpdateAlpha=NULL
188         UpdateBeta=NULL
189         UpdateP=NULL
190         UpdatePFromZ=NULL
191     Timeperround=NULL 
192         for (times in 1:maxround){
193         temptime1=proc.time()
194                 UpdateOutput=suppressWarnings(LogNMulti(DataListIn.unlist,DataListSPIn, EmpiricalRList.Good.mat ,EmpiricalRList.Good.mat.SP,  
195                                                            NumOfEachGroupIn, AlphaIn, BetaIn, PIn, NoneZeroLength, AllParti,Conditions))
196         print(paste("iteration", times, "done",sep=" "))
197                 AlphaIn=UpdateOutput$AlphaNew
198         BetaIn=UpdateOutput$BetaNew
199         PIn=UpdateOutput$PNew
200                 PFromZ=UpdateOutput$PFromZ
201         FOut=UpdateOutput$FGood
202                 UpdateAlpha=rbind(UpdateAlpha,AlphaIn)
203                 UpdateBeta=rbind(UpdateBeta,BetaIn)
204         UpdateP=rbind(UpdateP,PIn)
205                 UpdatePFromZ=rbind(UpdatePFromZ,PFromZ)
206                 temptime2=proc.time()
207                 Timeperround=c(Timeperround,temptime2[3]-temptime1[3])
208                 print(paste("time" ,Timeperround[times],sep=" "))
209                 Z.output=UpdateOutput$ZEachGood
210                 Z.NA.Names=UpdateOutput$zNaNName
211                 }
212                 #Remove this } after testing!!
213                  
214 #       if (times!=1){  
215 #               if((UpdateAlpha[times]-UpdateAlpha[times-1])^2+UpdateBeta[times]-UpdateBeta[times-1])^2+UpdateR[times]-UpdateR[times-1])^2+UpdateP[times]-UpdateP[times-1])^2<=10^(-6)){ 
216 #                       Result=list(Sig=SigIn, Miu=MiuIn, Tau=TauIn)
217 #                       break
218 #        }
219 #    }
220 #}
221
222 ##########Change Names############
223 ## Only z are for Good Ones
224 ## Others are for ALL Data
225 GoodData=GoodData[!GoodData%in%Z.NA.Names]
226 IsoNamesIn.Good=as.vector(IsoNamesIn[GoodData])
227 RealName.Z.output=Z.output
228 RealName.F=FOut
229 rownames(RealName.Z.output)=IsoNamesIn.Good
230 rownames(RealName.F)=IsoNamesIn.Good
231
232 RealName.EmpiricalRList=sapply(1:NoneZeroLength,function(i)EmpiricalRList[names(EmpiricalRList)%in%NameList[[i]]], simplify=F)
233 RealName.MeanList=sapply(1:NoneZeroLength,function(i)MeanList[names(MeanList)%in%NameList[[i]]], simplify=F)
234 RealName.SPMeanList=sapply(1:NoneZeroLength,function(i)sapply(1:length(MeanSP), function(j)MeanSP[[j]][names(MeanSP[[j]])%in%NameList[[i]]],simplify=F), simplify=F)
235 RealName.SPVarList=sapply(1:NoneZeroLength,function(i)sapply(1:length(VarSP), function(j)VarSP[[j]][names(VarSP[[j]])%in%NameList[[i]]],simplify=F), simplify=F)
236 RealName.DataList=sapply(1:NoneZeroLength,function(i)DataList[[i]][rownames(DataList[[i]])%in%NameList[[i]],], simplify=F)
237
238 RealName.VarList=sapply(1:NoneZeroLength,function(i)VarList[names(VarList)%in%NameList[[i]]], simplify=F)
239 RealName.PoolVarList=sapply(1:NoneZeroLength,function(i)PoolVarSpeedUp_MDFPoi_NoNormVarList[names(PoolVarSpeedUp_MDFPoi_NoNormVarList)%in%NameList[[i]]], simplify=F)
240 RealName.QList=sapply(1:NoneZeroLength,function(i)sapply(1:length(GetPSP), function(j)GetPSP[[j]][names(GetPSP[[j]])%in%NameList[[i]]],simplify=F), simplify=F)
241
242
243 for (i in 1:NoneZeroLength){
244 tmp=NameList[[i]]
245 names=IsoNamesIn[tmp]
246 RealName.MeanList[[i]]=RealName.MeanList[[i]][NameList[[i]]]
247 RealName.VarList[[i]]=RealName.VarList[[i]][NameList[[i]]]
248         for(j in 1:NumCond){
249                 RealName.SPMeanList[[i]][[j]]=RealName.SPMeanList[[i]][[j]][NameList[[i]]]
250                 if(!is.null(RealName.QList[[i]][[j]])){
251                         RealName.QList[[i]][[j]]=RealName.QList[[i]][[j]][NameList[[i]]]
252                         RealName.SPVarList[[i]][[j]]=RealName.SPVarList[[i]][[j]][NameList[[i]]]
253                         names(RealName.QList[[i]][[j]])=names
254                         names(RealName.SPVarList[[i]][[j]])=names
255                 }
256                 names(RealName.SPMeanList[[i]][[j]])=names
257         }
258 RealName.EmpiricalRList[[i]]=RealName.EmpiricalRList[[i]][NameList[[i]]]
259 RealName.PoolVarList[[i]]=RealName.PoolVarList[[i]][NameList[[i]]]
260 RealName.DataList[[i]]=RealName.DataList[[i]][NameList[[i]],]
261
262 names(RealName.MeanList[[i]])=names
263 names(RealName.VarList[[i]])=names
264
265 names(RealName.EmpiricalRList[[i]])=names
266 names(RealName.PoolVarList[[i]])=names
267 rownames(RealName.DataList[[i]])=names
268
269 }
270
271
272 #########posterior part for other data set here later############
273 AllNA=unique(c(Z.NA.Names,NotIn))
274 AllZ=NULL
275 AllF=NULL
276 if(length(AllNA)==0){
277         AllZ=RealName.Z.output[IsoNamesIn,]
278         AllF=RealName.F[IsoNamesIn,]
279 }
280 ZEachNA=NULL
281 if (length(AllNA)>0){
282         Ng.NA=NgVector[AllNA]
283         AllNA.Ngorder=AllNA[order(Ng.NA)]
284         NumOfEachGroupNA=rep(0,NoneZeroLength)
285         NumOfEachGroupNA.tmp=tapply(Ng.NA,Ng.NA,length)
286         names(NumOfEachGroupNA)=c(1:NoneZeroLength)
287         NumOfEachGroupNA[names(NumOfEachGroupNA.tmp)]=NumOfEachGroupNA.tmp
288         PNotIn=rep(1-Approx,length(AllNA.Ngorder))
289         MeanList.NotIn=MeanList[AllNA.Ngorder]
290         R.NotIn.raw=MeanList.NotIn*PNotIn/(1-PNotIn) 
291         if(length(sizeFactors)==ncol(Data))
292         R.NotIn=matrix(outer(R.NotIn.raw,sizeFactors),nrow=length(AllNA.Ngorder))
293         if(!length(sizeFactors)==ncol(Data))
294         R.NotIn=matrix(R.NotIn.raw*sizeFactors[NotIn,],nrow=length(AllNA.Ngorder))
295     
296         DataListNotIn.unlistWithZ=DataList.unlist[AllNA.Ngorder,]
297         DataListSPNotInWithZ=vector("list",nlevels(Conditions))
298         RListSPNotInWithZ=vector("list",nlevels(Conditions))
299         for (lv in 1:nlevels(Conditions)) {
300                 DataListSPNotInWithZ[[lv]] = matrix(DataListSP[[lv]][AllNA.Ngorder,],nrow=length(AllNA.Ngorder))
301                 RListSPNotInWithZ[[lv]]=matrix(R.NotIn[,Conditions==levels(Conditions)[lv]],nrow=length(AllNA.Ngorder))
302         }
303         FListNA=sapply(1:nrow(AllParti),function(i)sapply(1:nlevels(as.factor(AllParti[i,])),
304                         function(j)f0(do.call(cbind, DataListSPNotInWithZ[AllParti[i,]==j]),AlphaIn, BetaIn,
305                 do.call(cbind,RListSPNotInWithZ[AllParti[i,]==j]), NumOfEachGroupNA, log=T)),
306                                                        simplify=F)
307         FPartiLogNA=sapply(FListNA,rowSums)
308         FMatNA=exp(FPartiLogNA)
309         
310         rownames(FMatNA)=rownames(DataListNotIn.unlistWithZ)
311         PMatNA=matrix(rep(1,nrow(DataListNotIn.unlistWithZ)),ncol=1)%*%matrix(PIn,nrow=1)
312         FmultiPNA=FMatNA*PMatNA
313     DenomNA=rowSums(FmultiPNA)
314         ZEachNA=apply(FmultiPNA,2,function(i)i/DenomNA)
315
316         rownames(ZEachNA)=IsoNamesIn[AllNA.Ngorder]
317
318         AllZ=rbind(RealName.Z.output,ZEachNA)
319         AllZ=AllZ[IsoNamesIn,]
320         
321         F.NotIn=FMatNA
322         rownames(F.NotIn)=IsoNamesIn[rownames(FMatNA)]
323         AllF=rbind(RealName.F,F.NotIn)
324         AllF=AllF[IsoNamesIn,]
325
326 }
327 colnames(AllZ)=rownames(AllParti)
328 colnames(AllF)=rownames(AllParti)
329
330 #############Result############################
331 Result=list(Alpha=UpdateAlpha,Beta=UpdateBeta,P=UpdateP,PFromZ=UpdatePFromZ, 
332                         Z=RealName.Z.output,PoissonZ=ZEachNA, RList=RealName.EmpiricalRList, MeanList=RealName.MeanList, 
333                         VarList=RealName.VarList, QList=RealName.QList, SPMean=RealName.SPMeanList, SPEstVar=RealName.SPVarList, 
334                         PoolVar=RealName.PoolVarList , DataList=RealName.DataList,PPDE=AllZ,f=AllF, AllParti=AllParti)
335 }
336