]> git.donarmstrong.com Git - rsem.git/blob - EBSeq/R/EBTest.R
Included EBSeq for downstream differential expression analysis
[rsem.git] / EBSeq / R / EBTest.R
1 EBTest <-
2 function(Data,NgVector=NULL,Vect5End=NULL,Vect3End=NULL,Conditions, sizeFactors, maxround, tau=NULL,CI=NULL,CIthre=NULL, Pool=F, NumBin=1000,ApproxVal=10^-10)
3 {
4         Dataraw=Data
5         AllZeroNames=which(rowMeans(Data)==0)
6         NotAllZeroNames=which(rowMeans(Data)>0)
7         if(length(AllZeroNames)>0) print("Remove transcripts with all zero")
8         Data=Data[NotAllZeroNames,]
9         if(!is.null(NgVector))NgVector=NgVector[NotAllZeroNames]
10         if(!length(sizeFactors)==ncol(Data))sizeFactors=sizeFactors[NotAllZeroNames,]
11
12         if(is.null(NgVector))NgVector=rep(1,nrow(Data))
13
14         #Rename Them
15         IsoNamesIn=rownames(Data)
16         Names=paste("I",c(1:dim(Data)[1]),sep="")
17         names(IsoNamesIn)=Names
18         rownames(Data)=paste("I",c(1:dim(Data)[1]),sep="")
19         names(NgVector)=paste("I",c(1:dim(Data)[1]),sep="")
20         
21
22         if(!length(sizeFactors)==ncol(Data)){
23                 rownames(sizeFactors)=rownames(Data)
24                 colnames(sizeFactors)=Conditions
25         }
26         
27         NumOfNg=nlevels(as.factor(NgVector))
28         NameList=sapply(1:NumOfNg,function(i)Names[NgVector==i],simplify=F)
29         names(NameList)=paste("Ng",c(1:NumOfNg),sep="")
30         NotNone=NULL
31         for (i in 1:NumOfNg) {
32                 if (length(NameList[[i]])!=0) 
33                         NotNone=c(NotNone,names(NameList)[i])
34                 }
35         NameList=NameList[NotNone]
36                 
37         NoneZeroLength=length(NameList)
38         DataList=vector("list",NoneZeroLength)
39         DataList=sapply(1:NoneZeroLength , function(i) Data[NameList[[i]],],simplify=F)
40         names(DataList)=names(NameList)
41     
42         NumEachGroup=sapply(1:NoneZeroLength , function(i)dim(DataList)[i])
43         # Unlist 
44         DataList.unlist=do.call(rbind, DataList)
45
46         # Divide by SampleSize factor
47         
48         if(length(sizeFactors)==ncol(Data))
49         DataList.unlist.dvd=t(t( DataList.unlist)/sizeFactors)
50         
51         if(length(sizeFactors)!=ncol(Data))
52         DataList.unlist.dvd=DataList.unlist/sizeFactors
53         
54         # Get FC and VarPool for pooling - Only works on 2 conditions
55         if(ncol(Data)==2){
56         DataforPoolSP.dvd1=matrix(DataList.unlist.dvd[,Conditions==levels(Conditions)[1]],nrow=dim(DataList.unlist)[1]) 
57         DataforPoolSP.dvd2=matrix(DataList.unlist.dvd[,Conditions==levels(Conditions)[2]],nrow=dim(DataList.unlist)[1])
58         MeanforPoolSP.dvd1=rowMeans(DataforPoolSP.dvd1)
59         MeanforPoolSP.dvd2=rowMeans(DataforPoolSP.dvd2)
60         FCforPool=MeanforPoolSP.dvd1/MeanforPoolSP.dvd2
61         names(FCforPool)=rownames(Data)
62         FC_Use=which(FCforPool>=quantile(FCforPool[!is.na(FCforPool)],.25) & 
63                                                                   FCforPool<=quantile(FCforPool[!is.na(FCforPool)],.75))
64         
65         Var_FC_Use=apply( DataList.unlist.dvd[FC_Use,],1,var )
66         Mean_FC_Use=(MeanforPoolSP.dvd1[FC_Use]+MeanforPoolSP.dvd2[FC_Use])/2
67         MeanforPool=(MeanforPoolSP.dvd1+MeanforPoolSP.dvd2)/2
68         FC_Use2=which(Var_FC_Use>=Mean_FC_Use)
69         Var_FC_Use2=Var_FC_Use[FC_Use2]
70         Mean_FC_Use2=Mean_FC_Use[FC_Use2]
71         Phi=mean((Var_FC_Use2-Mean_FC_Use2)/Mean_FC_Use2^2)
72         VarEst= MeanforPool*(1+MeanforPool*Phi)
73         print(Phi)
74         }
75
76         #DataListSP Here also unlist.. Only two lists
77         DataListSP=vector("list",nlevels(Conditions))
78         DataListSP.dvd=vector("list",nlevels(Conditions))
79         SizeFSP=DataListSP
80         MeanSP=DataListSP
81         VarSP=DataListSP
82         GetPSP=DataListSP
83         RSP=DataListSP
84         CISP=DataListSP
85         tauSP=DataListSP
86         NumSampleEachCon=rep(NULL,nlevels(Conditions))
87
88         for (lv in 1:nlevels(Conditions)){
89                 DataListSP[[lv]]= matrix(DataList.unlist[,Conditions==levels(Conditions)[lv]],nrow=dim(DataList.unlist)[1])
90                 rownames(DataListSP[[lv]])=rownames(DataList.unlist)
91                 DataListSP.dvd[[lv]]= matrix(DataList.unlist.dvd[,Conditions==levels(Conditions)[lv]],nrow=dim(DataList.unlist.dvd)[1])
92                 NumSampleEachCon[lv]=ncol(DataListSP[[lv]])
93
94         if(ncol(DataListSP[[lv]])==1 & !is.null(CI)){
95                 CISP[[lv]]=matrix(CI[,Conditions==levels(Conditions)[lv]],nrow=dim(DataList.unlist.dvd)[1])
96                 tauSP[[lv]]=matrix(tau[,Conditions==levels(Conditions)[lv]],nrow=dim(DataList.unlist.dvd)[1])
97         }
98         # no matter sizeFactors is a vector or a matrix. Matrix should be columns are the normalization factors
99         # may input one for each 
100         if(length(sizeFactors)==ncol(Data))SizeFSP[[lv]]=sizeFactors[Conditions==levels(Conditions)[lv]]
101         if(length(sizeFactors)!=ncol(Data))SizeFSP[[lv]]=sizeFactors[,Conditions==levels(Conditions)[lv]]
102         
103         
104         MeanSP[[lv]]=rowMeans(DataListSP.dvd[[lv]])
105         
106         if(length(sizeFactors)==ncol(Data))PrePareVar=sapply(1:ncol( DataListSP[[lv]]),function(i)( DataListSP[[lv]][,i]- SizeFSP[[lv]][i]*MeanSP[[lv]])^2 /SizeFSP[[lv]][i])
107         if(length(sizeFactors)!=ncol(Data))PrePareVar=sapply(1:ncol( DataListSP[[lv]]),function(i)( DataListSP[[lv]][,i]- SizeFSP[[lv]][,i]*MeanSP[[lv]])^2 /SizeFSP[[lv]][,i])
108
109         if(ncol(DataListSP[[lv]])==1 & !is.null(CI))
110                 VarSP[[lv]]=as.vector(((DataListSP[[lv]]/tauSP[[lv]]) * CISP[[lv]]/(CIthre*2))^2)
111         if(ncol(DataListSP[[lv]])!=1){
112                 VarSP[[lv]]=rowSums(PrePareVar)/ncol( DataListSP[[lv]])
113                 names(MeanSP[[lv]])=rownames(DataList.unlist)
114                 names(VarSP[[lv]])=rownames(DataList.unlist)
115                 GetPSP[[lv]]=MeanSP[[lv]]/VarSP[[lv]]
116                 RSP[[lv]]=MeanSP[[lv]]*GetPSP[[lv]]/(1-GetPSP[[lv]])
117         }
118 }
119         
120         
121         MeanList=rowMeans(DataList.unlist.dvd)
122         VarList=apply(DataList.unlist.dvd, 1, var)
123         if(ncol(Data)==2)PoolVar=VarEst
124         if(!ncol(Data)==2){
125                 CondWithRep=which(NumSampleEachCon>1)
126                 VarCondWithRep=do.call(cbind,VarSP[CondWithRep])
127                 PoolVar=rowMeans(VarCondWithRep)
128         }
129         GetP=MeanList/PoolVar
130         
131     EmpiricalRList=MeanList*GetP/(1-GetP) 
132         EmpiricalRList[EmpiricalRList==Inf]     =max(EmpiricalRList[EmpiricalRList!=Inf])
133         
134         if(ncol(Data)!=2){
135         Varcbind=do.call(cbind,VarSP)
136         VarrowMin=apply(Varcbind,1,min)
137         }
138
139         if(ncol(Data)==2){
140                 Varcbind=VarEst
141                 VarrowMin=VarEst
142         }
143         # 
144         # 
145         GoodData=names(MeanList)[EmpiricalRList>0 &  VarrowMin!=0 & EmpiricalRList!=Inf & !is.na(VarrowMin) & !is.na(EmpiricalRList)]
146         NotIn=names(MeanList)[EmpiricalRList<=0 | VarrowMin==0 | EmpiricalRList==Inf |  is.na(VarrowMin) | is.na(EmpiricalRList)]
147         #print(paste("ZeroVar",sum(VarrowMin==0), "InfR", length(which(EmpiricalRList==Inf)), "Poi", length(which(EmpiricalRList<0)), ""))
148         EmpiricalRList.NotIn=EmpiricalRList[NotIn]
149         EmpiricalRList.Good=EmpiricalRList[GoodData]
150         EmpiricalRList.Good[EmpiricalRList.Good<1]=1+EmpiricalRList.Good[EmpiricalRList.Good<1]
151         if(length(sizeFactors)==ncol(Data))
152         EmpiricalRList.Good.mat= outer(EmpiricalRList.Good, sizeFactors)        
153         if(!length(sizeFactors)==ncol(Data))
154         EmpiricalRList.Good.mat=EmpiricalRList.Good* sizeFactors[GoodData,]
155
156
157         # Only Use Data has Good q's
158         DataList.In=sapply(1:NoneZeroLength, function(i)DataList[[i]][GoodData[GoodData%in%rownames(DataList[[i]])],],simplify=F)
159         DataList.NotIn=sapply(1:NoneZeroLength, function(i)DataList[[i]][NotIn[NotIn%in%rownames(DataList[[i]])],],simplify=F)
160         DataListIn.unlist=do.call(rbind, DataList.In)
161         DataListNotIn.unlist=do.call(rbind, DataList.NotIn)
162         
163         DataListSPIn=vector("list",nlevels(Conditions))
164         DataListSPNotIn=vector("list",nlevels(Conditions))
165         EmpiricalRList.Good.mat.SP=vector("list",nlevels(Conditions))
166         for (lv in 1:nlevels(Conditions)){
167                 DataListSPIn[[lv]]= matrix(DataListIn.unlist[,Conditions==levels(Conditions)[lv]],nrow=dim(DataListIn.unlist)[1])
168         if(length(NotIn)>0){    DataListSPNotIn[[lv]]= matrix(DataListNotIn.unlist[,Conditions==levels(Conditions)[lv]],nrow=dim(DataListNotIn.unlist)[1])
169         rownames(DataListSPNotIn[[lv]])=rownames(DataListNotIn.unlist)
170         }
171         rownames(DataListSPIn[[lv]])=rownames(DataListIn.unlist)
172         EmpiricalRList.Good.mat.SP[[lv]]=matrix(EmpiricalRList.Good.mat[,Conditions==levels(Conditions)[lv]],nrow=dim(EmpiricalRList.Good.mat)[1])
173 }       
174
175         NumOfEachGroupIn=sapply(1:NoneZeroLength, function(i)max(0,dim(DataList.In[[i]])[1]))
176         NumOfEachGroupNotIn=sapply(1:NoneZeroLength, function(i)max(0,dim(DataList.NotIn[[i]])[1]))
177
178         #Initialize SigIn & ...
179         AlphaIn=0.5
180         BetaIn=rep(0.5,NoneZeroLength)
181         PIn=0.5
182
183         ####use while to make an infinity round?
184         UpdateAlpha=NULL
185         UpdateBeta=NULL
186         UpdateP=NULL
187         UpdatePFromZ=NULL
188     Timeperround=NULL 
189         for (times in 1:maxround){
190         temptime1=proc.time()
191                 UpdateOutput=suppressWarnings(LogN(DataListIn.unlist,DataListSPIn, EmpiricalRList.Good.mat ,EmpiricalRList.Good.mat.SP,  NumOfEachGroupIn, AlphaIn, BetaIn, PIn, NoneZeroLength))
192         print(paste("iteration", times, "done",sep=" "))
193                 AlphaIn=UpdateOutput$AlphaNew
194         BetaIn=UpdateOutput$BetaNew
195         PIn=UpdateOutput$PNew
196                 PFromZ=UpdateOutput$PFromZ
197         F0Out=UpdateOutput$F0Out
198                 F1Out=UpdateOutput$F1Out
199                 UpdateAlpha=rbind(UpdateAlpha,AlphaIn)
200                 UpdateBeta=rbind(UpdateBeta,BetaIn)
201         UpdateP=rbind(UpdateP,PIn)
202                 UpdatePFromZ=rbind(UpdatePFromZ,PFromZ)
203                 temptime2=proc.time()
204                 Timeperround=c(Timeperround,temptime2[3]-temptime1[3])
205                 print(paste("time" ,Timeperround[times],sep=" "))
206                 Z.output=UpdateOutput$ZNew.list[!is.na(UpdateOutput$ZNew.list)]
207                 Z.NA.Names=UpdateOutput$zNaNName
208                 }
209                 #Remove this } after testing!!
210                  
211 #       if (times!=1){  
212 #               if((UpdateAlpha[times]-UpdateAlpha[times-1])^2+UpdateBeta[times]-UpdateBeta[times-1])^2+UpdateR[times]-UpdateR[times-1])^2+UpdateP[times]-UpdateP[times-1])^2<=10^(-6)){ 
213 #                       Result=list(Sig=SigIn, Miu=MiuIn, Tau=TauIn)
214 #                       break
215 #        }
216 #    }
217 #}
218
219 ##########Change Names############
220 ## Only z are for Good Ones
221 ## Others are for ALL Data
222 GoodData=GoodData[!GoodData%in%Z.NA.Names]
223 IsoNamesIn.Good=IsoNamesIn[GoodData]
224 RealName.Z.output=Z.output
225 RealName.F0=F0Out
226 RealName.F1=F1Out
227 names(RealName.Z.output)=IsoNamesIn.Good
228 names(RealName.F0)=IsoNamesIn.Good
229 names(RealName.F1)=IsoNamesIn.Good
230
231
232 RealName.EmpiricalRList=sapply(1:NoneZeroLength,function(i)EmpiricalRList[names(EmpiricalRList)%in%NameList[[i]]], simplify=F)
233 RealName.MeanList=sapply(1:NoneZeroLength,function(i)MeanList[names(MeanList)%in%NameList[[i]]], simplify=F)
234 RealName.C1MeanList=sapply(1:NoneZeroLength,function(i)MeanSP[[1]][names(MeanSP[[1]])%in%NameList[[i]]], simplify=F)
235 RealName.C2MeanList=sapply(1:NoneZeroLength,function(i)MeanSP[[2]][names(MeanSP[[2]])%in%NameList[[i]]], simplify=F)
236 RealName.C1VarList=sapply(1:NoneZeroLength,function(i)VarSP[[1]][names(VarSP[[1]])%in%NameList[[i]]], simplify=F)
237 RealName.C2VarList=sapply(1:NoneZeroLength,function(i)VarSP[[2]][names(VarSP[[2]])%in%NameList[[i]]], simplify=F)
238 RealName.DataList=sapply(1:NoneZeroLength,function(i)DataList[[i]][rownames(DataList[[i]])%in%NameList[[i]],], simplify=F)
239
240
241
242 RealName.VarList=sapply(1:NoneZeroLength,function(i)VarList[names(VarList)%in%NameList[[i]]], simplify=F)
243 RealName.PoolVarList=sapply(1:NoneZeroLength,function(i)PoolVar[names(PoolVar)%in%NameList[[i]]], simplify=F)
244
245
246 RealName.QList1=sapply(1:NoneZeroLength,function(i)GetPSP[[1]][names(GetPSP[[1]])%in%NameList[[i]]], simplify=F)
247 RealName.QList2=sapply(1:NoneZeroLength,function(i)GetPSP[[2]][names(GetPSP[[2]])%in%NameList[[i]]], simplify=F)
248
249
250 for (i in 1:NoneZeroLength){
251 tmp=NameList[[i]]
252 names=IsoNamesIn[tmp]
253
254 RealName.MeanList[[i]]=RealName.MeanList[[i]][NameList[[i]]]
255 RealName.VarList[[i]]=RealName.VarList[[i]][NameList[[i]]]
256 RealName.QList1[[i]]=RealName.QList1[[i]][NameList[[i]]]
257 RealName.QList2[[i]]=RealName.QList2[[i]][NameList[[i]]]
258 RealName.EmpiricalRList[[i]]=RealName.EmpiricalRList[[i]][NameList[[i]]]
259 RealName.C1MeanList[[i]]=RealName.C1MeanList[[i]][NameList[[i]]]
260 RealName.C2MeanList[[i]]=RealName.C2MeanList[[i]][NameList[[i]]]
261 RealName.PoolVarList[[i]]=RealName.PoolVarList[[i]][NameList[[i]]]
262 RealName.C1VarList[[i]]=RealName.C1VarList[[i]][NameList[[i]]]
263 RealName.C2VarList[[i]]=RealName.C2VarList[[i]][NameList[[i]]]
264 RealName.DataList[[i]]=RealName.DataList[[i]][NameList[[i]],]
265
266 names(RealName.MeanList[[i]])=names
267 names(RealName.VarList[[i]])=names
268 if(ncol(DataListSP[[1]])!=1){
269         names(RealName.QList1[[i]])=names
270         names(RealName.C1VarList[[i]])=names
271 }
272 if(ncol(DataListSP[[2]])!=1){
273         names(RealName.QList2[[i]])=names
274         names(RealName.C2VarList[[i]])=names
275 }
276
277 names(RealName.EmpiricalRList[[i]])=names
278 names(RealName.C1MeanList[[i]])=names
279 names(RealName.C2MeanList[[i]])=names
280 names(RealName.PoolVarList[[i]])=names
281 rownames(RealName.DataList[[i]])=names
282
283
284 }
285
286
287 #########posterior part for other data set here later############
288 AllNA=unique(c(Z.NA.Names,NotIn))
289 z.list.NotIn=NULL
290 AllF0=c(RealName.F0)
291 AllF1=c(RealName.F1)
292 AllZ=RealName.Z.output
293
294 if (length(AllNA)>0){
295         Ng.NA=NgVector[AllNA]
296         AllNA.Ngorder=AllNA[order(Ng.NA)]
297         NumOfEachGroupNA=rep(0,NoneZeroLength)
298         NumOfEachGroupNA.tmp=tapply(Ng.NA,Ng.NA,length)
299         names(NumOfEachGroupNA)=c(1:NoneZeroLength)
300         NumOfEachGroupNA[names(NumOfEachGroupNA.tmp)]=NumOfEachGroupNA.tmp
301         PNotIn=rep(1-ApproxVal,length(AllNA.Ngorder))
302         MeanList.NotIn=MeanList[AllNA.Ngorder]
303         R.NotIn.raw=MeanList.NotIn*PNotIn/(1-PNotIn) 
304         if(length(sizeFactors)==ncol(Data))
305         R.NotIn=outer(R.NotIn.raw,sizeFactors)
306         if(!length(sizeFactors)==ncol(Data))
307         R.NotIn=R.NotIn.raw*sizeFactors[NotIn,]
308         R.NotIn1=matrix(R.NotIn[,Conditions==levels(Conditions)[1]],nrow=nrow(R.NotIn))
309         R.NotIn2=matrix(R.NotIn[,Conditions==levels(Conditions)[2]],nrow=nrow(R.NotIn))
310     
311         DataListNotIn.unlistWithZ=DataList.unlist[AllNA.Ngorder,]
312         DataListSPNotInWithZ=vector("list",nlevels(Conditions))
313         for (lv in 1:nlevels(Conditions)) 
314                 DataListSPNotInWithZ[[lv]] = matrix(DataListSP[[lv]][AllNA.Ngorder,],nrow=length(AllNA.Ngorder))
315                 F0=f0(DataListNotIn.unlistWithZ,  AlphaIn, BetaIn, R.NotIn, NumOfEachGroupNA, log=F)
316         F1=f1(DataListSPNotInWithZ[[1]], DataListSPNotInWithZ[[2]], AlphaIn, BetaIn, R.NotIn1,R.NotIn2, NumOfEachGroupNA, log=F)
317         z.list.NotIn=PIn*F1/(PIn*F1+(1-PIn)*F0)
318 #       names(z.list.NotIn)=IsoNamesIn.Good=IsoNamesIn[which(Names%in%NotIn)]
319         names(z.list.NotIn)=IsoNamesIn[AllNA.Ngorder]
320
321         AllZ=c(RealName.Z.output,z.list.NotIn)
322         AllZ=AllZ[IsoNamesIn]
323         AllZ[is.na(AllZ)]=0
324         F0.NotIn=F0
325         F1.NotIn=F1
326         names(F0.NotIn)=IsoNamesIn[names(F0)]
327     names(F1.NotIn)=IsoNamesIn[names(F1)]
328         AllF0=c(RealName.F0,F0.NotIn)
329         AllF1=c(RealName.F1,F1.NotIn)
330         AllF0=AllF0[IsoNamesIn]
331         AllF1=AllF1[IsoNamesIn]
332         AllF0[is.na(AllF0)]=0
333         AllF1[is.na(AllF1)]=0
334 }
335 #############Result############################
336 Result=list(Alpha=UpdateAlpha,Beta=UpdateBeta,P=UpdateP,PFromZ=UpdatePFromZ, Z=RealName.Z.output,PoissonZ=z.list.NotIn, RList=RealName.EmpiricalRList, MeanList=RealName.MeanList, VarList=RealName.VarList, QList1=RealName.QList1, QList2=RealName.QList2, C1Mean=RealName.C1MeanList, C2Mean=RealName.C2MeanList,C1EstVar=RealName.C1VarList, C2EstVar=RealName.C2VarList, PoolVar=RealName.PoolVarList , DataList=RealName.DataList,PPDE=AllZ,f0=AllF0, f1=AllF1,
337                         AllZeroIndex=AllZeroNames)
338 }
339