Included EBSeq for downstream differential expression analysis

author Bo Li <bli@cs.wisc.edu>

Thu, 26 Apr 2012 13:50:22 +0000 (08:50 -0500)

committer Bo Li <bli@cs.wisc.edu>

Thu, 26 Apr 2012 13:50:22 +0000 (08:50 -0500)
author Bo Li <bli@cs.wisc.edu>
Thu, 26 Apr 2012 13:50:22 +0000 (08:50 -0500)
committer Bo Li <bli@cs.wisc.edu>
Thu, 26 Apr 2012 13:50:22 +0000 (08:50 -0500)
diff --git a/EBSeq/DESCRIPTION b/EBSeq/DESCRIPTION

new file mode 100644 (file)

index 0000000..5f61713
--- /dev/null
+++ b/EBSeq/DESCRIPTION
@@ -0,0 +1,12 @@
+Package: EBSeq
+Type: Package
+Title: A R package for Gene and Isoform Differential Expression Analysis On RNA-Seq Data
+Version: 1.1
+Date: 2012-4-18
+Author: Ning Leng
+Maintainer: Ning Leng <nleng@wisc.edu>
+Depends:blockmodeling 
+Description: RNA-Seq Differential Expression Analysis on both gene and isoform level
+License: 
+LazyLoad: yes
+Packaged: 2012-04-25 05:25:10 UTC; ningleng
diff --git a/EBSeq/NAMESPACE b/EBSeq/NAMESPACE

new file mode 100644 (file)

index 0000000..27e0f29
--- /dev/null
+++ b/EBSeq/NAMESPACE
@@ -0,0 +1,38 @@
+export(beta.mom)
+export(CheckNg)
+export(crit_fun)
+export(DenNHist)
+export(DenNHistTable)
+export(EBTest)
+export(f0)
+export(f1)
+export(GeneSimuAt)
+export(GeneSimu)
+export(GetData)
+export(GetNg)
+export(GetPP)
+export(IsoSimuAt)
+export(IsoSimu)
+export(Likefun)
+export(LogN)
+export(MedianNorm)
+export(MergeGene)
+export(MergeIso)
+export(PlotFDTP)
+export(PlotFPTP)
+export(PlotTopCts)
+export(PolyFitPlot)
+export(PoolMatrix)
+export(PostFC)
+export(QQP)
+export(QuantileNorm)
+export(RankNorm)
+export(TopCts)
+export(TPFDRplot)
+export(EBMultiTest)
+export(GeneMultiSimu)
+export(GetMultiPP)
+export(LikefunMulti)
+export(LogNMulti)
+export(GetPatterns)
+export(PlotPattern)
diff --git a/EBSeq/R/CheckNg.R b/EBSeq/R/CheckNg.R

new file mode 100644 (file)

index 0000000..092f907
--- /dev/null
+++ b/EBSeq/R/CheckNg.R
@@ -0,0 +1,21 @@
+CheckNg<-function(NewMean, NewVar,nterm, xlim, ylim){
+       Ng=1=PolyFit_ENAR(NewMean[[1]],NewVar[[1]],nterm,"Mean","Variance","Ng=1",xlim, ylim)
+       sortNg1=order(NewMean[[1]])
+       Ng=2=PolyFit_ENAR(unlist(NewMean[c(2,4,6,8)]),unlist(NewVar[c(2,4,6,8)]),nterm,"Mean","Variance","Ng=2",xlim, ylim)
+       sortNg2=order(unlist(NewMean[c(2,4,6,8)]))
+       Ng=3=PolyFit_ENAR(unlist(NewMean[c(3,5,7,9)]),unlist(NewVar[c(3,5,7,9)]),nterm,"Mean","Variance","Ng=3",xlim, ylim)
+       sortNg3=order(unlist(NewMean[c(3,5,7,9)]))
+
+       ALL=PolyFit_ENAR(unlist(NewMean),unlist(NewVar),nterm,"Mean","Variance","",xlim, ylim)
+       lines(log10(unlist(NewMean[c(2,4,6,8)]))[sortNg2],Ng=2$fit[sortNg2],col="green",lwd=2)
+       lines(log10(unlist(NewMean[c(3,5,7,9)]))[sortNg3],Ng=3$fit[sortNg3],col="orange",lwd=2)
+       lines(log10(unlist(NewMean[1]))[sortNg1],Ng=1$fit[sortNg1],col="pink",lwd=2)
+       legend("topleft",col=c("red","pink","green","orange"),c("all","Ng=1","Ng=2","Ng=3"),lwd=2)
+}
+
+
+
+
+
+
+
diff --git a/EBSeq/R/DenNHist.R b/EBSeq/R/DenNHist.R

new file mode 100644 (file)

index 0000000..76e5d60
--- /dev/null
+++ b/EBSeq/R/DenNHist.R
@@ -0,0 +1,20 @@
+DenNHist <-
+function(QList,Alpha,Beta,name,AList="F",GroupName)
+{
+    if(!is.list(QList)) QList=list(QList)      
+       for (i in 1:length(QList)){
+               if (AList=="F") alpha.use=Alpha
+                       if(AList=="T")  alpha.use=Alpha[i]
+       hist(QList[[i]][QList[[i]]<.98&QList[[i]]>0],prob=T,col="blue",breaks=100,main=paste(GroupName[i],name,sep=" "),xlim=c(0,1),xlab=paste("Q alpha=",round(alpha.use,2)," beta=",round(Beta[i],2),sep=""))
+       tmpSize=length(QList[[i]][QList[[i]]<.98])
+        tmpseq=seq(0.001,1,length=1000)
+        #tmpdensity=dbeta(tmpseq,AlphaResult,BetaResult[i])
+        #points(tmpseq,tmpdensity, type="l",col="green")
+       #ll=dbeta(tmpseq,Alpha,Beta[i])
+       ll=tmpseq
+                lines(ll,dbeta(ll,alpha.use,Beta[i]),col="green",lwd=2)
+       legend("topright",c("Data","Fitted density"),col=c("blue","green"),lwd=2)
+}
+       
+       }
+
diff --git a/EBSeq/R/DenNHistTable.R b/EBSeq/R/DenNHistTable.R

new file mode 100644 (file)

index 0000000..e3a6855
--- /dev/null
+++ b/EBSeq/R/DenNHistTable.R
@@ -0,0 +1,38 @@
+DenNHistTable <-
+function(QList,Alpha,Beta,AList="F")
+{      
+       par(mfrow=c(3,4))
+       plot(1, type="n", axes=F, xlab="", ylab="", main="No 3' end  No 5' end",cex.main=1)
+       plot(1, type="n", axes=F, xlab="", ylab="",main="With 3' end No 5' end",cex.main=1)
+       plot(1, type="n", axes=F, xlab="", ylab="",main="With 5' end No 3' end",cex.main=1)
+       for (i in c(1,2,4,6,8)){
+                alpha.use=Alpha
+       hist(QList[[i]][QList[[i]]<.98&QList[[i]]>0],prob=T,col="blue",breaks=100,main=ifelse(i==1,"With 5' end With 3' end",""),cex.main=1, xlim=c(0,1),xlab=paste("Q alpha=",round(alpha.use,2)," beta=",round(Beta[i],2),sep=""))
+       if(i==1)mtext("Ng=1",side=4, cex=1)
+       if(i==8)mtext("Ng=2", side=4,cex=1)
+       tmpSize=length(QList[[i]][QList[[i]]<.98])
+
+        tmpseq=seq(0.001,1,length=1000)
+       ll=tmpseq
+                lines(ll,dbeta(ll,alpha.use,Beta[i]),col="green",lwd=2)
+       legend("topright",c("Data","Fitted density"),col=c("blue","green"),lwd=2,cex=.5)
+}
+       
+       for (i in c(3,5,7,9)){
+                alpha.use=Alpha
+       hist(QList[[i]][QList[[i]]<.98&QList[[i]]>0],prob=T,col="blue",breaks=100,main=ifelse(i==1,"With 5' end With 3' end exons",""),xlim=c(0,1),xlab=paste("Q alpha=",round(alpha.use,2)," beta=",round(Beta[i],2),sep=""))
+       if(i==9)mtext("Ng=3", side=4,cex=1)
+
+       tmpSize=length(QList[[i]][QList[[i]]<.98])
+
+        tmpseq=seq(0.001,1,length=1000)
+       ll=tmpseq
+                lines(ll,dbeta(ll,alpha.use,Beta[i]),col="green",lwd=2)
+       legend("topright",c("Data","Fitted density"),col=c("blue","green"),cex=.5, lwd=2)
+}
+
+
+
+
+       }
+
diff --git a/EBSeq/R/EBMultiTest.R b/EBSeq/R/EBMultiTest.R

new file mode 100644 (file)

index 0000000..ab23f87
--- /dev/null
+++ b/EBSeq/R/EBMultiTest.R
@@ -0,0 +1,336 @@
+EBMultiTest <-
+function(Data,NgVector=NULL,Conditions,AllParti=NULL, sizeFactors, maxround, tau=NULL,CI=NULL,CIthre=NULL, Pool=F, NumBin=1000, Approx=10^-10,PoolLower=.25, PoolUpper=.75)
+{
+
+       if(is.null(NgVector))NgVector=rep(1,nrow(Data))
+       if(!is.factor(Conditions))Conditions=as.factor(Conditions)
+
+
+       #ReNameThem
+       IsoNamesIn=rownames(Data)
+       Names=paste("I",c(1:dim(Data)[1]),sep="")
+       names(IsoNamesIn)=Names
+       rownames(Data)=paste("I",c(1:dim(Data)[1]),sep="")
+       names(NgVector)=paste("I",c(1:dim(Data)[1]),sep="")
+       
+       # If PossibleCond==NULL, use all combinations
+       NumCond=nlevels(Conditions)
+       CondLevels=levels(Conditions)
+       #library(blockmodeling)
+       if(is.null(AllParti)){
+               AllPartiList=sapply(1:NumCond,function(i)nkpartitions(NumCond,i))
+               AllParti=do.call(rbind,AllPartiList)
+               colnames(AllParti)=CondLevels
+           rownames(AllParti)=paste("Pattern",1:nrow(AllParti),sep="")
+       }
+       if(!length(sizeFactors)==ncol(Data)){
+               rownames(sizeFactors)=rownames(Data)
+               colnames(sizeFactors)=Conditions
+       }
+
+       
+       NoneZeroLength=nlevels(as.factor(NgVector))
+       NameList=sapply(1:NoneZeroLength,function(i)names(NgVector)[NgVector==i],simplify=F)
+       DataList=sapply(1:NoneZeroLength , function(i) Data[NameList[[i]],],simplify=F)
+       names(DataList)=names(NameList)
+    
+       NumEachGroup=sapply(1:NoneZeroLength , function(i)dim(DataList)[i])
+       # Unlist 
+       DataList.unlist=do.call(rbind, DataList)
+
+       # Divide by SampleSize factor
+       
+       if(length(sizeFactors)==ncol(Data))
+       DataList.unlist.dvd=t(t( DataList.unlist)/sizeFactors)
+       
+       if(length(sizeFactors)!=ncol(Data))
+       DataList.unlist.dvd=DataList.unlist/sizeFactors
+       
+       # Pool or Not
+       if(Pool==T){
+       DataforPoolSP.dvd=MeanforPoolSP.dvd=vector("list",NumCond)
+       for(lv in 1:NumCond){
+               DataforPoolSP.dvd[[lv]]=matrix(DataList.unlist.dvd[,Conditions==levels(Conditions)[lv]],nrow=dim(DataList.unlist)[1])   
+               MeanforPoolSP.dvd[[lv]]=rowMeans(DataforPoolSP.dvd[[lv]])
+       }
+       MeanforPool.dvd=rowMeans(DataList.unlist.dvd)
+       NumInBin=floor(dim(DataList.unlist)[1]/NumBin)
+       StartSeq=c(0:(NumBin-1))*NumInBin+1
+       EndSeq=c(StartSeq[-1]-1,dim(DataList.unlist)[1])
+       MeanforPool.dvd.Sort=sort(MeanforPool.dvd,decreasing=T) 
+       MeanforPool.dvd.Order=order(MeanforPool.dvd,decreasing=T)
+       PoolGroups=sapply(1:NumBin,function(i)(names(MeanforPool.dvd.Sort)[StartSeq[i]:EndSeq[i]]),simplify=F)
+       #FCforPool=MeanforPoolSP.dvd1/MeanforPoolSP.dvd2
+       # Use GeoMean of every two-group partition
+       Parti2=nkpartitions(NumCond,2)
+       FCForPoolList=sapply(1:nrow(Parti2),function(i)rowMeans(do.call(cbind,
+                                                       MeanforPoolSP.dvd[Parti2[i,]==1]))/
+                                                       rowMeans(do.call(cbind,MeanforPoolSP.dvd[Parti2[i,]==2])),
+                                                       simplify=F)
+       FCForPoolMat=do.call(cbind,FCForPoolList)
+       FCforPool=apply(FCForPoolMat,1,function(i)exp(mean(log(i))))
+       names(FCforPool)=names(MeanforPool.dvd)
+       FC_Use=names(FCforPool)[which(FCforPool>=quantile(FCforPool[!is.na(FCforPool)],PoolLower) & FCforPool<=quantile(FCforPool[!is.na(FCforPool)],PoolUpper))]
+       PoolGroupVar=sapply(1:NumBin,function(i)(mean(apply(matrix(DataList.unlist[PoolGroups[[i]][PoolGroups[[i]]%in%FC_Use],],ncol=ncol(DataList.unlist)),1,var))))   
+       PoolGroupVarInList=sapply(1:NumBin,function(i)(rep(PoolGroupVar[i],length(PoolGroups[[i]]))),simplify=F)
+       PoolGroupVarVector=unlist(PoolGroupVarInList)
+       VarPool=PoolGroupVarVector[MeanforPool.dvd.Order]
+       names(VarPool)=names(MeanforPool.dvd)
+               }
+
+       DataListSP=vector("list",nlevels(Conditions))
+       DataListSP.dvd=vector("list",nlevels(Conditions))
+       SizeFSP=DataListSP
+       MeanSP=DataListSP
+       VarSP=DataListSP
+       GetPSP=DataListSP
+       RSP=DataListSP
+       CISP=DataListSP
+       tauSP=DataListSP
+       
+       NumEachCondLevel=summary(Conditions)
+       if(Pool==F & is.null(CI)) CondLevelsUse=CondLevels[NumEachCondLevel>1]
+       if(Pool==T | !is.null(CI)) CondLevelsUse=CondLevels
+       NumCondUse=length(CondLevelsUse)        
+
+       for (lv in 1:nlevels(Conditions)){
+       DataListSP[[lv]]= matrix(DataList.unlist[,Conditions==levels(Conditions)[lv]],nrow=dim(DataList.unlist)[1])
+       rownames(DataListSP[[lv]])=rownames(DataList.unlist)
+       DataListSP.dvd[[lv]]= matrix(DataList.unlist.dvd[,Conditions==levels(Conditions)[lv]],nrow=dim(DataList.unlist.dvd)[1])
+       if(ncol(DataListSP[[lv]])==1 & Pool==F & !is.null(CI)){
+       CISP[[lv]]=matrix(CI[,Conditions==levels(Conditions)[lv]],nrow=dim(DataList.unlist.dvd)[1])
+       tauSP[[lv]]=matrix(tau[,Conditions==levels(Conditions)[lv]],nrow=dim(DataList.unlist.dvd)[1])
+       }
+       # no matter sizeFactors is a vector or a matrix. Matrix should be columns are the normalization factors
+       # may input one for each 
+       if(length(sizeFactors)==ncol(Data))SizeFSP[[lv]]=sizeFactors[Conditions==levels(Conditions)[lv]]
+       if(length(sizeFactors)!=ncol(Data))SizeFSP[[lv]]=sizeFactors[,Conditions==levels(Conditions)[lv]]
+               
+       MeanSP[[lv]]=rowMeans(DataListSP.dvd[[lv]])
+       
+       if(length(sizeFactors)==ncol(Data))PrePareVar=sapply(1:ncol( DataListSP[[lv]]),function(i)( DataListSP[[lv]][,i]- SizeFSP[[lv]][i]*MeanSP[[lv]])^2 /SizeFSP[[lv]][i])
+       if(length(sizeFactors)!=ncol(Data))PrePareVar=sapply(1:ncol( DataListSP[[lv]]),function(i)( DataListSP[[lv]][,i]- SizeFSP[[lv]][,i]*MeanSP[[lv]])^2 /SizeFSP[[lv]][,i])
+
+       if(ncol(DataListSP[[lv]])==1 & Pool==F & !is.null(CI))
+               VarSP[[lv]]=as.vector(((DataListSP[[lv]]/tauSP[[lv]]) * CISP[[lv]]/(CIthre*2))^2)
+       if( Pool==T){
+               VarSP[[lv]]=VarPool     
+               }
+       if(ncol(DataListSP[[lv]])!=1){
+               VarSP[[lv]]=rowSums(PrePareVar)/ncol( DataListSP[[lv]])
+               names(VarSP[[lv]])=rownames(DataList.unlist)
+               GetPSP[[lv]]=MeanSP[[lv]]/VarSP[[lv]]
+           RSP[[lv]]=MeanSP[[lv]]*GetPSP[[lv]]/(1-GetPSP[[lv]])
+       }
+       names(MeanSP[[lv]])=rownames(DataList.unlist)
+       }
+
+       # Get Empirical R
+       # POOL R???
+       MeanList=rowMeans(DataList.unlist.dvd)
+       VarList=apply(DataList.unlist.dvd, 1, var)
+       Varcbind=do.call(cbind,VarSP[CondLevels%in%CondLevelsUse])
+       PoolVarSpeedUp_MDFPoi_NoNormVarList=rowMeans(Varcbind)
+       VarrowMin=apply(Varcbind,1,min)
+       GetP=MeanList/PoolVarSpeedUp_MDFPoi_NoNormVarList
+       
+    EmpiricalRList=MeanList*GetP/(1-GetP) 
+       # sep
+       #Rcb=cbind(RSP[[1]],RSP[[2]])
+       #Rbest=apply(Rcb,1,function(i)max(i[!is.na(i) & i!=Inf]))
+       EmpiricalRList[EmpiricalRList==Inf]     =max(EmpiricalRList[EmpiricalRList!=Inf])
+       # fine
+       # 
+       GoodData=names(MeanList)[EmpiricalRList>0 &  VarrowMin!=0 & EmpiricalRList!=Inf & !is.na(VarrowMin) & !is.na(EmpiricalRList)]
+       NotIn=names(MeanList)[EmpiricalRList<=0 | VarrowMin==0 | EmpiricalRList==Inf |  is.na(VarrowMin) | is.na(EmpiricalRList)]
+       #NotIn.BestR=Rbest[NotIn.raw]
+       #NotIn.fix=NotIn.BestR[which(NotIn.BestR>0)]
+       #EmpiricalRList[names(NotIn.fix)]=NotIn.fix
+       #print(paste("ZeroVar",sum(VarrowMin==0), "InfR", length(which(EmpiricalRList==Inf)), "Poi", length(which(EmpiricalRList<0)), ""))
+       #GoodData=c(GoodData.raw,names(NotIn.fix))
+       #NotIn=NotIn.raw[!NotIn.raw%in%names(NotIn.fix)]
+       EmpiricalRList.NotIn=EmpiricalRList[NotIn]
+       EmpiricalRList.Good=EmpiricalRList[GoodData]
+       EmpiricalRList.Good[EmpiricalRList.Good<1]=1+EmpiricalRList.Good[EmpiricalRList.Good<1]
+       if(length(sizeFactors)==ncol(Data))
+       EmpiricalRList.Good.mat= outer(EmpiricalRList.Good, sizeFactors)        
+       if(!length(sizeFactors)==ncol(Data))
+       EmpiricalRList.Good.mat=EmpiricalRList.Good* sizeFactors[GoodData,]
+
+
+       # Only Use Data has Good q's
+       DataList.In=sapply(1:NoneZeroLength, function(i)DataList[[i]][GoodData[GoodData%in%rownames(DataList[[i]])],],simplify=F)
+       DataList.NotIn=sapply(1:NoneZeroLength, function(i)DataList[[i]][NotIn[NotIn%in%rownames(DataList[[i]])],],simplify=F)
+       DataListIn.unlist=do.call(rbind, DataList.In)
+       DataListNotIn.unlist=do.call(rbind, DataList.NotIn)
+       
+       DataListSPIn=vector("list",nlevels(Conditions))
+       DataListSPNotIn=vector("list",nlevels(Conditions))
+       EmpiricalRList.Good.mat.SP=vector("list",nlevels(Conditions))
+       for (lv in 1:nlevels(Conditions)){
+               DataListSPIn[[lv]]= matrix(DataListIn.unlist[,Conditions==levels(Conditions)[lv]],nrow=dim(DataListIn.unlist)[1])
+               if(length(NotIn)>0)     DataListSPNotIn[[lv]]= matrix(DataListNotIn.unlist[,Conditions==levels(Conditions)[lv]],nrow=dim(DataListNotIn.unlist)[1])
+               rownames(DataListSPIn[[lv]])=rownames(DataListIn.unlist)
+               if(length(NotIn)>0)rownames(DataListSPNotIn[[lv]])=rownames(DataListNotIn.unlist)
+               EmpiricalRList.Good.mat.SP[[lv]]=matrix(EmpiricalRList.Good.mat[,Conditions==levels(Conditions)[lv]],nrow=dim(EmpiricalRList.Good.mat)[1])
+       }       
+
+       NumOfEachGroupIn=sapply(1:NoneZeroLength, function(i)max(0,dim(DataList.In[[i]])[1]))
+       NumOfEachGroupNotIn=sapply(1:NoneZeroLength, function(i)max(0,dim(DataList.NotIn[[i]])[1]))
+
+       #Initialize SigIn & ...
+       AlphaIn=0.5
+       BetaIn=rep(0.5,NoneZeroLength)
+       PIn=rep(1/nrow(AllParti),nrow(AllParti))
+
+       ####use while to make an infinity round?
+       UpdateAlpha=NULL
+       UpdateBeta=NULL
+       UpdateP=NULL
+       UpdatePFromZ=NULL
+    Timeperround=NULL 
+       for (times in 1:maxround){
+       temptime1=proc.time()
+               UpdateOutput=suppressWarnings(LogNMulti(DataListIn.unlist,DataListSPIn, EmpiricalRList.Good.mat ,EmpiricalRList.Good.mat.SP,  
+                                                          NumOfEachGroupIn, AlphaIn, BetaIn, PIn, NoneZeroLength, AllParti,Conditions))
+       print(paste("iteration", times, "done",sep=" "))
+               AlphaIn=UpdateOutput$AlphaNew
+       BetaIn=UpdateOutput$BetaNew
+       PIn=UpdateOutput$PNew
+               PFromZ=UpdateOutput$PFromZ
+       FOut=UpdateOutput$FGood
+               UpdateAlpha=rbind(UpdateAlpha,AlphaIn)
+               UpdateBeta=rbind(UpdateBeta,BetaIn)
+       UpdateP=rbind(UpdateP,PIn)
+               UpdatePFromZ=rbind(UpdatePFromZ,PFromZ)
+               temptime2=proc.time()
+               Timeperround=c(Timeperround,temptime2[3]-temptime1[3])
+               print(paste("time" ,Timeperround[times],sep=" "))
+               Z.output=UpdateOutput$ZEachGood
+               Z.NA.Names=UpdateOutput$zNaNName
+               }
+               #Remove this } after testing!!
+                
+#      if (times!=1){  
+#              if((UpdateAlpha[times]-UpdateAlpha[times-1])^2+UpdateBeta[times]-UpdateBeta[times-1])^2+UpdateR[times]-UpdateR[times-1])^2+UpdateP[times]-UpdateP[times-1])^2<=10^(-6)){ 
+#                      Result=list(Sig=SigIn, Miu=MiuIn, Tau=TauIn)
+#                      break
+#        }
+#    }
+#}
+
+##########Change Names############
+## Only z are for Good Ones
+## Others are for ALL Data
+GoodData=GoodData[!GoodData%in%Z.NA.Names]
+IsoNamesIn.Good=as.vector(IsoNamesIn[GoodData])
+RealName.Z.output=Z.output
+RealName.F=FOut
+rownames(RealName.Z.output)=IsoNamesIn.Good
+rownames(RealName.F)=IsoNamesIn.Good
+
+RealName.EmpiricalRList=sapply(1:NoneZeroLength,function(i)EmpiricalRList[names(EmpiricalRList)%in%NameList[[i]]], simplify=F)
+RealName.MeanList=sapply(1:NoneZeroLength,function(i)MeanList[names(MeanList)%in%NameList[[i]]], simplify=F)
+RealName.SPMeanList=sapply(1:NoneZeroLength,function(i)sapply(1:length(MeanSP), function(j)MeanSP[[j]][names(MeanSP[[j]])%in%NameList[[i]]],simplify=F), simplify=F)
+RealName.SPVarList=sapply(1:NoneZeroLength,function(i)sapply(1:length(VarSP), function(j)VarSP[[j]][names(VarSP[[j]])%in%NameList[[i]]],simplify=F), simplify=F)
+RealName.DataList=sapply(1:NoneZeroLength,function(i)DataList[[i]][rownames(DataList[[i]])%in%NameList[[i]],], simplify=F)
+
+RealName.VarList=sapply(1:NoneZeroLength,function(i)VarList[names(VarList)%in%NameList[[i]]], simplify=F)
+RealName.PoolVarList=sapply(1:NoneZeroLength,function(i)PoolVarSpeedUp_MDFPoi_NoNormVarList[names(PoolVarSpeedUp_MDFPoi_NoNormVarList)%in%NameList[[i]]], simplify=F)
+RealName.QList=sapply(1:NoneZeroLength,function(i)sapply(1:length(GetPSP), function(j)GetPSP[[j]][names(GetPSP[[j]])%in%NameList[[i]]],simplify=F), simplify=F)
+
+
+for (i in 1:NoneZeroLength){
+tmp=NameList[[i]]
+names=IsoNamesIn[tmp]
+RealName.MeanList[[i]]=RealName.MeanList[[i]][NameList[[i]]]
+RealName.VarList[[i]]=RealName.VarList[[i]][NameList[[i]]]
+       for(j in 1:NumCond){
+               RealName.SPMeanList[[i]][[j]]=RealName.SPMeanList[[i]][[j]][NameList[[i]]]
+               if(!is.null(RealName.QList[[i]][[j]])){
+                       RealName.QList[[i]][[j]]=RealName.QList[[i]][[j]][NameList[[i]]]
+                       RealName.SPVarList[[i]][[j]]=RealName.SPVarList[[i]][[j]][NameList[[i]]]
+                       names(RealName.QList[[i]][[j]])=names
+                       names(RealName.SPVarList[[i]][[j]])=names
+               }
+               names(RealName.SPMeanList[[i]][[j]])=names
+       }
+RealName.EmpiricalRList[[i]]=RealName.EmpiricalRList[[i]][NameList[[i]]]
+RealName.PoolVarList[[i]]=RealName.PoolVarList[[i]][NameList[[i]]]
+RealName.DataList[[i]]=RealName.DataList[[i]][NameList[[i]],]
+
+names(RealName.MeanList[[i]])=names
+names(RealName.VarList[[i]])=names
+
+names(RealName.EmpiricalRList[[i]])=names
+names(RealName.PoolVarList[[i]])=names
+rownames(RealName.DataList[[i]])=names
+
+}
+
+
+#########posterior part for other data set here later############
+AllNA=unique(c(Z.NA.Names,NotIn))
+AllZ=NULL
+AllF=NULL
+if(length(AllNA)==0){
+       AllZ=RealName.Z.output[IsoNamesIn,]
+       AllF=RealName.F[IsoNamesIn,]
+}
+ZEachNA=NULL
+if (length(AllNA)>0){
+       Ng.NA=NgVector[AllNA]
+       AllNA.Ngorder=AllNA[order(Ng.NA)]
+       NumOfEachGroupNA=rep(0,NoneZeroLength)
+       NumOfEachGroupNA.tmp=tapply(Ng.NA,Ng.NA,length)
+       names(NumOfEachGroupNA)=c(1:NoneZeroLength)
+       NumOfEachGroupNA[names(NumOfEachGroupNA.tmp)]=NumOfEachGroupNA.tmp
+       PNotIn=rep(1-Approx,length(AllNA.Ngorder))
+       MeanList.NotIn=MeanList[AllNA.Ngorder]
+       R.NotIn.raw=MeanList.NotIn*PNotIn/(1-PNotIn) 
+       if(length(sizeFactors)==ncol(Data))
+       R.NotIn=matrix(outer(R.NotIn.raw,sizeFactors),nrow=length(AllNA.Ngorder))
+       if(!length(sizeFactors)==ncol(Data))
+       R.NotIn=matrix(R.NotIn.raw*sizeFactors[NotIn,],nrow=length(AllNA.Ngorder))
+    
+       DataListNotIn.unlistWithZ=DataList.unlist[AllNA.Ngorder,]
+       DataListSPNotInWithZ=vector("list",nlevels(Conditions))
+       RListSPNotInWithZ=vector("list",nlevels(Conditions))
+       for (lv in 1:nlevels(Conditions)) {
+               DataListSPNotInWithZ[[lv]] = matrix(DataListSP[[lv]][AllNA.Ngorder,],nrow=length(AllNA.Ngorder))
+               RListSPNotInWithZ[[lv]]=matrix(R.NotIn[,Conditions==levels(Conditions)[lv]],nrow=length(AllNA.Ngorder))
+       }
+       FListNA=sapply(1:nrow(AllParti),function(i)sapply(1:nlevels(as.factor(AllParti[i,])),
+                       function(j)f0(do.call(cbind, DataListSPNotInWithZ[AllParti[i,]==j]),AlphaIn, BetaIn,
+                do.call(cbind,RListSPNotInWithZ[AllParti[i,]==j]), NumOfEachGroupNA, log=T)),
+                                                      simplify=F)
+       FPartiLogNA=sapply(FListNA,rowSums)
+       FMatNA=exp(FPartiLogNA)
+       
+       rownames(FMatNA)=rownames(DataListNotIn.unlistWithZ)
+       PMatNA=matrix(rep(1,nrow(DataListNotIn.unlistWithZ)),ncol=1)%*%matrix(PIn,nrow=1)
+       FmultiPNA=FMatNA*PMatNA
+    DenomNA=rowSums(FmultiPNA)
+       ZEachNA=apply(FmultiPNA,2,function(i)i/DenomNA)
+
+       rownames(ZEachNA)=IsoNamesIn[AllNA.Ngorder]
+
+       AllZ=rbind(RealName.Z.output,ZEachNA)
+       AllZ=AllZ[IsoNamesIn,]
+       
+       F.NotIn=FMatNA
+       rownames(F.NotIn)=IsoNamesIn[rownames(FMatNA)]
+       AllF=rbind(RealName.F,F.NotIn)
+       AllF=AllF[IsoNamesIn,]
+
+}
+colnames(AllZ)=rownames(AllParti)
+colnames(AllF)=rownames(AllParti)
+
+#############Result############################
+Result=list(Alpha=UpdateAlpha,Beta=UpdateBeta,P=UpdateP,PFromZ=UpdatePFromZ, 
+                       Z=RealName.Z.output,PoissonZ=ZEachNA, RList=RealName.EmpiricalRList, MeanList=RealName.MeanList, 
+                       VarList=RealName.VarList, QList=RealName.QList, SPMean=RealName.SPMeanList, SPEstVar=RealName.SPVarList, 
+                       PoolVar=RealName.PoolVarList , DataList=RealName.DataList,PPDE=AllZ,f=AllF, AllParti=AllParti)
+}
+
diff --git a/EBSeq/R/EBTest.R b/EBSeq/R/EBTest.R

new file mode 100644 (file)

index 0000000..17619ae
--- /dev/null
+++ b/EBSeq/R/EBTest.R
@@ -0,0 +1,339 @@
+EBTest <-
+function(Data,NgVector=NULL,Vect5End=NULL,Vect3End=NULL,Conditions, sizeFactors, maxround, tau=NULL,CI=NULL,CIthre=NULL, Pool=F, NumBin=1000,ApproxVal=10^-10)
+{
+       Dataraw=Data
+       AllZeroNames=which(rowMeans(Data)==0)
+       NotAllZeroNames=which(rowMeans(Data)>0)
+       if(length(AllZeroNames)>0) print("Remove transcripts with all zero")
+       Data=Data[NotAllZeroNames,]
+       if(!is.null(NgVector))NgVector=NgVector[NotAllZeroNames]
+       if(!length(sizeFactors)==ncol(Data))sizeFactors=sizeFactors[NotAllZeroNames,]
+
+       if(is.null(NgVector))NgVector=rep(1,nrow(Data))
+
+       #Rename Them
+       IsoNamesIn=rownames(Data)
+       Names=paste("I",c(1:dim(Data)[1]),sep="")
+       names(IsoNamesIn)=Names
+       rownames(Data)=paste("I",c(1:dim(Data)[1]),sep="")
+       names(NgVector)=paste("I",c(1:dim(Data)[1]),sep="")
+       
+
+       if(!length(sizeFactors)==ncol(Data)){
+               rownames(sizeFactors)=rownames(Data)
+               colnames(sizeFactors)=Conditions
+       }
+       
+       NumOfNg=nlevels(as.factor(NgVector))
+       NameList=sapply(1:NumOfNg,function(i)Names[NgVector==i],simplify=F)
+       names(NameList)=paste("Ng",c(1:NumOfNg),sep="")
+       NotNone=NULL
+       for (i in 1:NumOfNg) {
+               if (length(NameList[[i]])!=0) 
+                       NotNone=c(NotNone,names(NameList)[i])
+               }
+       NameList=NameList[NotNone]
+               
+       NoneZeroLength=length(NameList)
+       DataList=vector("list",NoneZeroLength)
+       DataList=sapply(1:NoneZeroLength , function(i) Data[NameList[[i]],],simplify=F)
+       names(DataList)=names(NameList)
+    
+       NumEachGroup=sapply(1:NoneZeroLength , function(i)dim(DataList)[i])
+       # Unlist 
+       DataList.unlist=do.call(rbind, DataList)
+
+       # Divide by SampleSize factor
+       
+       if(length(sizeFactors)==ncol(Data))
+       DataList.unlist.dvd=t(t( DataList.unlist)/sizeFactors)
+       
+       if(length(sizeFactors)!=ncol(Data))
+       DataList.unlist.dvd=DataList.unlist/sizeFactors
+       
+       # Get FC and VarPool for pooling - Only works on 2 conditions
+       if(ncol(Data)==2){
+       DataforPoolSP.dvd1=matrix(DataList.unlist.dvd[,Conditions==levels(Conditions)[1]],nrow=dim(DataList.unlist)[1]) 
+       DataforPoolSP.dvd2=matrix(DataList.unlist.dvd[,Conditions==levels(Conditions)[2]],nrow=dim(DataList.unlist)[1])
+       MeanforPoolSP.dvd1=rowMeans(DataforPoolSP.dvd1)
+       MeanforPoolSP.dvd2=rowMeans(DataforPoolSP.dvd2)
+       FCforPool=MeanforPoolSP.dvd1/MeanforPoolSP.dvd2
+       names(FCforPool)=rownames(Data)
+       FC_Use=which(FCforPool>=quantile(FCforPool[!is.na(FCforPool)],.25) & 
+                                                                 FCforPool<=quantile(FCforPool[!is.na(FCforPool)],.75))
+       
+       Var_FC_Use=apply( DataList.unlist.dvd[FC_Use,],1,var )
+       Mean_FC_Use=(MeanforPoolSP.dvd1[FC_Use]+MeanforPoolSP.dvd2[FC_Use])/2
+       MeanforPool=(MeanforPoolSP.dvd1+MeanforPoolSP.dvd2)/2
+       FC_Use2=which(Var_FC_Use>=Mean_FC_Use)
+       Var_FC_Use2=Var_FC_Use[FC_Use2]
+       Mean_FC_Use2=Mean_FC_Use[FC_Use2]
+       Phi=mean((Var_FC_Use2-Mean_FC_Use2)/Mean_FC_Use2^2)
+       VarEst= MeanforPool*(1+MeanforPool*Phi)
+       print(Phi)
+       }
+
+       #DataListSP Here also unlist.. Only two lists
+       DataListSP=vector("list",nlevels(Conditions))
+       DataListSP.dvd=vector("list",nlevels(Conditions))
+       SizeFSP=DataListSP
+       MeanSP=DataListSP
+       VarSP=DataListSP
+       GetPSP=DataListSP
+       RSP=DataListSP
+       CISP=DataListSP
+       tauSP=DataListSP
+       NumSampleEachCon=rep(NULL,nlevels(Conditions))
+
+       for (lv in 1:nlevels(Conditions)){
+               DataListSP[[lv]]= matrix(DataList.unlist[,Conditions==levels(Conditions)[lv]],nrow=dim(DataList.unlist)[1])
+               rownames(DataListSP[[lv]])=rownames(DataList.unlist)
+               DataListSP.dvd[[lv]]= matrix(DataList.unlist.dvd[,Conditions==levels(Conditions)[lv]],nrow=dim(DataList.unlist.dvd)[1])
+               NumSampleEachCon[lv]=ncol(DataListSP[[lv]])
+
+       if(ncol(DataListSP[[lv]])==1 & !is.null(CI)){
+               CISP[[lv]]=matrix(CI[,Conditions==levels(Conditions)[lv]],nrow=dim(DataList.unlist.dvd)[1])
+               tauSP[[lv]]=matrix(tau[,Conditions==levels(Conditions)[lv]],nrow=dim(DataList.unlist.dvd)[1])
+       }
+       # no matter sizeFactors is a vector or a matrix. Matrix should be columns are the normalization factors
+       # may input one for each 
+       if(length(sizeFactors)==ncol(Data))SizeFSP[[lv]]=sizeFactors[Conditions==levels(Conditions)[lv]]
+       if(length(sizeFactors)!=ncol(Data))SizeFSP[[lv]]=sizeFactors[,Conditions==levels(Conditions)[lv]]
+       
+       
+       MeanSP[[lv]]=rowMeans(DataListSP.dvd[[lv]])
+       
+       if(length(sizeFactors)==ncol(Data))PrePareVar=sapply(1:ncol( DataListSP[[lv]]),function(i)( DataListSP[[lv]][,i]- SizeFSP[[lv]][i]*MeanSP[[lv]])^2 /SizeFSP[[lv]][i])
+       if(length(sizeFactors)!=ncol(Data))PrePareVar=sapply(1:ncol( DataListSP[[lv]]),function(i)( DataListSP[[lv]][,i]- SizeFSP[[lv]][,i]*MeanSP[[lv]])^2 /SizeFSP[[lv]][,i])
+
+       if(ncol(DataListSP[[lv]])==1 & !is.null(CI))
+               VarSP[[lv]]=as.vector(((DataListSP[[lv]]/tauSP[[lv]]) * CISP[[lv]]/(CIthre*2))^2)
+       if(ncol(DataListSP[[lv]])!=1){
+               VarSP[[lv]]=rowSums(PrePareVar)/ncol( DataListSP[[lv]])
+               names(MeanSP[[lv]])=rownames(DataList.unlist)
+               names(VarSP[[lv]])=rownames(DataList.unlist)
+               GetPSP[[lv]]=MeanSP[[lv]]/VarSP[[lv]]
+               RSP[[lv]]=MeanSP[[lv]]*GetPSP[[lv]]/(1-GetPSP[[lv]])
+       }
+}
+       
+       
+       MeanList=rowMeans(DataList.unlist.dvd)
+       VarList=apply(DataList.unlist.dvd, 1, var)
+       if(ncol(Data)==2)PoolVar=VarEst
+       if(!ncol(Data)==2){
+               CondWithRep=which(NumSampleEachCon>1)
+               VarCondWithRep=do.call(cbind,VarSP[CondWithRep])
+               PoolVar=rowMeans(VarCondWithRep)
+       }
+       GetP=MeanList/PoolVar
+       
+    EmpiricalRList=MeanList*GetP/(1-GetP) 
+       EmpiricalRList[EmpiricalRList==Inf]     =max(EmpiricalRList[EmpiricalRList!=Inf])
+       
+       if(ncol(Data)!=2){
+       Varcbind=do.call(cbind,VarSP)
+       VarrowMin=apply(Varcbind,1,min)
+       }
+
+       if(ncol(Data)==2){
+               Varcbind=VarEst
+               VarrowMin=VarEst
+       }
+       # 
+       # 
+       GoodData=names(MeanList)[EmpiricalRList>0 &  VarrowMin!=0 & EmpiricalRList!=Inf & !is.na(VarrowMin) & !is.na(EmpiricalRList)]
+       NotIn=names(MeanList)[EmpiricalRList<=0 | VarrowMin==0 | EmpiricalRList==Inf |  is.na(VarrowMin) | is.na(EmpiricalRList)]
+       #print(paste("ZeroVar",sum(VarrowMin==0), "InfR", length(which(EmpiricalRList==Inf)), "Poi", length(which(EmpiricalRList<0)), ""))
+       EmpiricalRList.NotIn=EmpiricalRList[NotIn]
+       EmpiricalRList.Good=EmpiricalRList[GoodData]
+       EmpiricalRList.Good[EmpiricalRList.Good<1]=1+EmpiricalRList.Good[EmpiricalRList.Good<1]
+       if(length(sizeFactors)==ncol(Data))
+       EmpiricalRList.Good.mat= outer(EmpiricalRList.Good, sizeFactors)        
+       if(!length(sizeFactors)==ncol(Data))
+       EmpiricalRList.Good.mat=EmpiricalRList.Good* sizeFactors[GoodData,]
+
+
+       # Only Use Data has Good q's
+       DataList.In=sapply(1:NoneZeroLength, function(i)DataList[[i]][GoodData[GoodData%in%rownames(DataList[[i]])],],simplify=F)
+       DataList.NotIn=sapply(1:NoneZeroLength, function(i)DataList[[i]][NotIn[NotIn%in%rownames(DataList[[i]])],],simplify=F)
+       DataListIn.unlist=do.call(rbind, DataList.In)
+       DataListNotIn.unlist=do.call(rbind, DataList.NotIn)
+       
+       DataListSPIn=vector("list",nlevels(Conditions))
+       DataListSPNotIn=vector("list",nlevels(Conditions))
+       EmpiricalRList.Good.mat.SP=vector("list",nlevels(Conditions))
+       for (lv in 1:nlevels(Conditions)){
+               DataListSPIn[[lv]]= matrix(DataListIn.unlist[,Conditions==levels(Conditions)[lv]],nrow=dim(DataListIn.unlist)[1])
+       if(length(NotIn)>0){    DataListSPNotIn[[lv]]= matrix(DataListNotIn.unlist[,Conditions==levels(Conditions)[lv]],nrow=dim(DataListNotIn.unlist)[1])
+       rownames(DataListSPNotIn[[lv]])=rownames(DataListNotIn.unlist)
+       }
+       rownames(DataListSPIn[[lv]])=rownames(DataListIn.unlist)
+       EmpiricalRList.Good.mat.SP[[lv]]=matrix(EmpiricalRList.Good.mat[,Conditions==levels(Conditions)[lv]],nrow=dim(EmpiricalRList.Good.mat)[1])
+}      
+
+       NumOfEachGroupIn=sapply(1:NoneZeroLength, function(i)max(0,dim(DataList.In[[i]])[1]))
+       NumOfEachGroupNotIn=sapply(1:NoneZeroLength, function(i)max(0,dim(DataList.NotIn[[i]])[1]))
+
+       #Initialize SigIn & ...
+       AlphaIn=0.5
+       BetaIn=rep(0.5,NoneZeroLength)
+       PIn=0.5
+
+       ####use while to make an infinity round?
+       UpdateAlpha=NULL
+       UpdateBeta=NULL
+       UpdateP=NULL
+       UpdatePFromZ=NULL
+    Timeperround=NULL 
+       for (times in 1:maxround){
+       temptime1=proc.time()
+               UpdateOutput=suppressWarnings(LogN(DataListIn.unlist,DataListSPIn, EmpiricalRList.Good.mat ,EmpiricalRList.Good.mat.SP,  NumOfEachGroupIn, AlphaIn, BetaIn, PIn, NoneZeroLength))
+       print(paste("iteration", times, "done",sep=" "))
+               AlphaIn=UpdateOutput$AlphaNew
+       BetaIn=UpdateOutput$BetaNew
+       PIn=UpdateOutput$PNew
+               PFromZ=UpdateOutput$PFromZ
+       F0Out=UpdateOutput$F0Out
+               F1Out=UpdateOutput$F1Out
+               UpdateAlpha=rbind(UpdateAlpha,AlphaIn)
+               UpdateBeta=rbind(UpdateBeta,BetaIn)
+       UpdateP=rbind(UpdateP,PIn)
+               UpdatePFromZ=rbind(UpdatePFromZ,PFromZ)
+               temptime2=proc.time()
+               Timeperround=c(Timeperround,temptime2[3]-temptime1[3])
+               print(paste("time" ,Timeperround[times],sep=" "))
+               Z.output=UpdateOutput$ZNew.list[!is.na(UpdateOutput$ZNew.list)]
+               Z.NA.Names=UpdateOutput$zNaNName
+               }
+               #Remove this } after testing!!
+                
+#      if (times!=1){  
+#              if((UpdateAlpha[times]-UpdateAlpha[times-1])^2+UpdateBeta[times]-UpdateBeta[times-1])^2+UpdateR[times]-UpdateR[times-1])^2+UpdateP[times]-UpdateP[times-1])^2<=10^(-6)){ 
+#                      Result=list(Sig=SigIn, Miu=MiuIn, Tau=TauIn)
+#                      break
+#        }
+#    }
+#}
+
+##########Change Names############
+## Only z are for Good Ones
+## Others are for ALL Data
+GoodData=GoodData[!GoodData%in%Z.NA.Names]
+IsoNamesIn.Good=IsoNamesIn[GoodData]
+RealName.Z.output=Z.output
+RealName.F0=F0Out
+RealName.F1=F1Out
+names(RealName.Z.output)=IsoNamesIn.Good
+names(RealName.F0)=IsoNamesIn.Good
+names(RealName.F1)=IsoNamesIn.Good
+
+
+RealName.EmpiricalRList=sapply(1:NoneZeroLength,function(i)EmpiricalRList[names(EmpiricalRList)%in%NameList[[i]]], simplify=F)
+RealName.MeanList=sapply(1:NoneZeroLength,function(i)MeanList[names(MeanList)%in%NameList[[i]]], simplify=F)
+RealName.C1MeanList=sapply(1:NoneZeroLength,function(i)MeanSP[[1]][names(MeanSP[[1]])%in%NameList[[i]]], simplify=F)
+RealName.C2MeanList=sapply(1:NoneZeroLength,function(i)MeanSP[[2]][names(MeanSP[[2]])%in%NameList[[i]]], simplify=F)
+RealName.C1VarList=sapply(1:NoneZeroLength,function(i)VarSP[[1]][names(VarSP[[1]])%in%NameList[[i]]], simplify=F)
+RealName.C2VarList=sapply(1:NoneZeroLength,function(i)VarSP[[2]][names(VarSP[[2]])%in%NameList[[i]]], simplify=F)
+RealName.DataList=sapply(1:NoneZeroLength,function(i)DataList[[i]][rownames(DataList[[i]])%in%NameList[[i]],], simplify=F)
+
+
+
+RealName.VarList=sapply(1:NoneZeroLength,function(i)VarList[names(VarList)%in%NameList[[i]]], simplify=F)
+RealName.PoolVarList=sapply(1:NoneZeroLength,function(i)PoolVar[names(PoolVar)%in%NameList[[i]]], simplify=F)
+
+
+RealName.QList1=sapply(1:NoneZeroLength,function(i)GetPSP[[1]][names(GetPSP[[1]])%in%NameList[[i]]], simplify=F)
+RealName.QList2=sapply(1:NoneZeroLength,function(i)GetPSP[[2]][names(GetPSP[[2]])%in%NameList[[i]]], simplify=F)
+
+
+for (i in 1:NoneZeroLength){
+tmp=NameList[[i]]
+names=IsoNamesIn[tmp]
+
+RealName.MeanList[[i]]=RealName.MeanList[[i]][NameList[[i]]]
+RealName.VarList[[i]]=RealName.VarList[[i]][NameList[[i]]]
+RealName.QList1[[i]]=RealName.QList1[[i]][NameList[[i]]]
+RealName.QList2[[i]]=RealName.QList2[[i]][NameList[[i]]]
+RealName.EmpiricalRList[[i]]=RealName.EmpiricalRList[[i]][NameList[[i]]]
+RealName.C1MeanList[[i]]=RealName.C1MeanList[[i]][NameList[[i]]]
+RealName.C2MeanList[[i]]=RealName.C2MeanList[[i]][NameList[[i]]]
+RealName.PoolVarList[[i]]=RealName.PoolVarList[[i]][NameList[[i]]]
+RealName.C1VarList[[i]]=RealName.C1VarList[[i]][NameList[[i]]]
+RealName.C2VarList[[i]]=RealName.C2VarList[[i]][NameList[[i]]]
+RealName.DataList[[i]]=RealName.DataList[[i]][NameList[[i]],]
+
+names(RealName.MeanList[[i]])=names
+names(RealName.VarList[[i]])=names
+if(ncol(DataListSP[[1]])!=1){
+       names(RealName.QList1[[i]])=names
+       names(RealName.C1VarList[[i]])=names
+}
+if(ncol(DataListSP[[2]])!=1){
+       names(RealName.QList2[[i]])=names
+       names(RealName.C2VarList[[i]])=names
+}
+
+names(RealName.EmpiricalRList[[i]])=names
+names(RealName.C1MeanList[[i]])=names
+names(RealName.C2MeanList[[i]])=names
+names(RealName.PoolVarList[[i]])=names
+rownames(RealName.DataList[[i]])=names
+
+
+}
+
+
+#########posterior part for other data set here later############
+AllNA=unique(c(Z.NA.Names,NotIn))
+z.list.NotIn=NULL
+AllF0=c(RealName.F0)
+AllF1=c(RealName.F1)
+AllZ=RealName.Z.output
+
+if (length(AllNA)>0){
+       Ng.NA=NgVector[AllNA]
+       AllNA.Ngorder=AllNA[order(Ng.NA)]
+       NumOfEachGroupNA=rep(0,NoneZeroLength)
+       NumOfEachGroupNA.tmp=tapply(Ng.NA,Ng.NA,length)
+       names(NumOfEachGroupNA)=c(1:NoneZeroLength)
+       NumOfEachGroupNA[names(NumOfEachGroupNA.tmp)]=NumOfEachGroupNA.tmp
+       PNotIn=rep(1-ApproxVal,length(AllNA.Ngorder))
+       MeanList.NotIn=MeanList[AllNA.Ngorder]
+       R.NotIn.raw=MeanList.NotIn*PNotIn/(1-PNotIn) 
+       if(length(sizeFactors)==ncol(Data))
+       R.NotIn=outer(R.NotIn.raw,sizeFactors)
+       if(!length(sizeFactors)==ncol(Data))
+       R.NotIn=R.NotIn.raw*sizeFactors[NotIn,]
+       R.NotIn1=matrix(R.NotIn[,Conditions==levels(Conditions)[1]],nrow=nrow(R.NotIn))
+       R.NotIn2=matrix(R.NotIn[,Conditions==levels(Conditions)[2]],nrow=nrow(R.NotIn))
+    
+       DataListNotIn.unlistWithZ=DataList.unlist[AllNA.Ngorder,]
+       DataListSPNotInWithZ=vector("list",nlevels(Conditions))
+       for (lv in 1:nlevels(Conditions)) 
+               DataListSPNotInWithZ[[lv]] = matrix(DataListSP[[lv]][AllNA.Ngorder,],nrow=length(AllNA.Ngorder))
+               F0=f0(DataListNotIn.unlistWithZ,  AlphaIn, BetaIn, R.NotIn, NumOfEachGroupNA, log=F)
+       F1=f1(DataListSPNotInWithZ[[1]], DataListSPNotInWithZ[[2]], AlphaIn, BetaIn, R.NotIn1,R.NotIn2, NumOfEachGroupNA, log=F)
+       z.list.NotIn=PIn*F1/(PIn*F1+(1-PIn)*F0)
+#      names(z.list.NotIn)=IsoNamesIn.Good=IsoNamesIn[which(Names%in%NotIn)]
+       names(z.list.NotIn)=IsoNamesIn[AllNA.Ngorder]
+
+       AllZ=c(RealName.Z.output,z.list.NotIn)
+       AllZ=AllZ[IsoNamesIn]
+       AllZ[is.na(AllZ)]=0
+       F0.NotIn=F0
+       F1.NotIn=F1
+       names(F0.NotIn)=IsoNamesIn[names(F0)]
+    names(F1.NotIn)=IsoNamesIn[names(F1)]
+       AllF0=c(RealName.F0,F0.NotIn)
+       AllF1=c(RealName.F1,F1.NotIn)
+       AllF0=AllF0[IsoNamesIn]
+       AllF1=AllF1[IsoNamesIn]
+       AllF0[is.na(AllF0)]=0
+       AllF1[is.na(AllF1)]=0
+}
+#############Result############################
+Result=list(Alpha=UpdateAlpha,Beta=UpdateBeta,P=UpdateP,PFromZ=UpdatePFromZ, Z=RealName.Z.output,PoissonZ=z.list.NotIn, RList=RealName.EmpiricalRList, MeanList=RealName.MeanList, VarList=RealName.VarList, QList1=RealName.QList1, QList2=RealName.QList2, C1Mean=RealName.C1MeanList, C2Mean=RealName.C2MeanList,C1EstVar=RealName.C1VarList, C2EstVar=RealName.C2VarList, PoolVar=RealName.PoolVarList , DataList=RealName.DataList,PPDE=AllZ,f0=AllF0, f1=AllF1,
+                       AllZeroIndex=AllZeroNames)
+}
+
diff --git a/EBSeq/R/GeneMultiSimu.R b/EBSeq/R/GeneMultiSimu.R

new file mode 100644 (file)

index 0000000..e71babd
--- /dev/null
+++ b/EBSeq/R/GeneMultiSimu.R
@@ -0,0 +1,111 @@
+GeneMultiSimu<-
+function(DVDconstant=NULL, DVDqt1=NULL, DVDqt2=NULL, Conditions,AllParti, NumofSample, NumofGene=NULL, DEGeneProp, Phiconstant=NULL, Phi.qt1=NULL, Phi.qt2=NULL, Meanconstant=NULL,NormFactor=NULL, OnlyData=T)
+{
+# 2012 feb 1 paired simulation
+if(is.null(NormFactor)) NormFactor=rep(1,NumofSample)
+data(GeneEBresultGouldBart2)
+MeansC1=GeneEBresultGouldBart2$C1Mean[[1]]
+MeansC2=GeneEBresultGouldBart2$C2Mean[[1]]
+
+MeanDVD=MeansC1/MeansC2
+
+if(is.null(DVDconstant))DVDLibrary=MeanDVD[MeanDVD<quantile(MeanDVD[MeanDVD!=Inf],DVDqt2) & MeanDVD>quantile(MeanDVD[MeanDVD!=Inf],DVDqt1)]
+if(!is.null(DVDconstant))DVDLibrary=DVDconstant
+
+# If DVD constant, use constant when generate
+# If not, use DVDLibrary
+
+MeanInputraw=GeneEBresultGouldBart2$MeanList[[1]]
+
+if(length(NumofGene)!=0)
+NumofGene.raw=NumofGene*2
+
+if(length(NumofGene)==0)
+NumofGene.raw=length(MeanInputraw)
+
+
+PhiInput.raw=GeneEBresultGouldBart2$RList[[1]]
+if (length(Phiconstant)==0){
+       PhiLibrary=PhiInput.raw[(1/PhiInput.raw)<quantile(1/PhiInput.raw,Phi.qt2) & 1/PhiInput.raw>quantile(1/PhiInput.raw,Phi.qt1)]
+       PhiInputNames=sample(names(PhiLibrary),NumofGene.raw,replace=T)
+       PhiInput=PhiInput.raw[PhiInputNames]
+}
+
+if (length(Phiconstant)!=0)PhiInput=rep(Phiconstant,length(MeanInputraw))
+if(length(Meanconstant)==0)MeanInput=GeneEBresultGouldBart2$MeanList[[1]][PhiInputNames]
+if(length(Meanconstant)!=0)MeanInput=rep(Meanconstant,length(GeneEBresultGouldBart2$MeanList[[1]]))
+
+# length(DEGeneNumbers) should be num of patterns -1. the others EE
+PatternGeneNumbers=round(NumofGene.raw*DEGeneProp/2)*2
+names(PatternGeneNumbers)=rownames(AllParti)
+EEWhich=which(rowSums(AllParti)==ncol(AllParti))
+DEGeneNumbers=PatternGeneNumbers[-EEWhich]
+
+
+OutGeneNumbers=round(NumofGene*DEGeneProp/2)*2
+names(OutGeneNumbers)=rownames(AllParti)
+OutDEGeneNumbers=OutGeneNumbers[-EEWhich]
+OutEEGeneNumbers=OutGeneNumbers[EEWhich]
+OutGenePatterns=c(unlist(sapply(1:length(OutDEGeneNumbers),
+                                                         function(i)rep(names(OutDEGeneNumbers)[i],OutDEGeneNumbers[i]),simplify=F)),
+                                 rep(names(OutEEGeneNumbers),OutEEGeneNumbers))
+
+GeneNames=paste("G",c(1:NumofGene.raw),sep="_")
+names(PhiInput)=GeneNames
+names(MeanInput)=GeneNames
+#########
+# data
+#########
+EEList=sapply(1:NumofGene.raw, function(j) sapply(1:NumofSample, function(i)suppressWarnings(rnbinom(1,mu=NormFactor[i]*MeanInput[j], size=PhiInput[j]))))
+
+generateDataraw=t(EEList)
+DVDSample=sample(DVDLibrary,sum(DEGeneNumbers),replace=T)
+
+DErawNames=vector("list",length(DEGeneNumbers))
+st=1
+for(i in 1:length(DEGeneNumbers)){
+       for(j in st:(st+DEGeneNumbers[i]-1)){
+               NumGroup=max(AllParti[names(DEGeneNumbers)[i],])
+               SampleGroup=sample(NumGroup,NumGroup)
+               DVDSampleEach=c(1,DVDSample[j]^c(1:(NumGroup-1)))
+               for(k in 1:NumGroup){
+               CondWhich=which(AllParti[names(DEGeneNumbers)[i],]==SampleGroup[k])
+               SampleChoose=which(Conditions%in%colnames(AllParti)[CondWhich])
+               generateDataraw[j,SampleChoose]=sapply(1:length(SampleChoose), function(i)suppressWarnings(rnbinom(1, size=PhiInput[j], mu=DVDSampleEach[k]*MeanInput[j]*NormFactor[i])),simplify=T)
+               }}
+               DErawNames[[i]]=GeneNames[st:(st+DEGeneNumbers[i]-1)]
+               st=st+DEGeneNumbers[i]
+}
+
+rownames(generateDataraw)=GeneNames
+MeanVector=rowMeans(generateDataraw)
+VarVector=apply(generateDataraw,1,var)
+MOV.post=MeanVector/VarVector
+EErawNames=GeneNames[!GeneNames%in%unlist(DErawNames)]
+
+
+### Remove MOV=NA
+generateData=generateDataraw
+generateData=generateData[!is.na(MOV.post)& MeanVector>2 & MeanVector<10000 ,] 
+InName=rownames(generateData)
+#print(paste("NA MOV's",sum(is.na(MOV.post)),sum( MeanVector<2), sum(MeanVector>10000)))
+## DE
+##################################
+FinalDEInName=sapply(1:length(DEGeneNumbers),function(i)InName[InName%in%DErawNames[[i]]][1:OutDEGeneNumbers[i]],simplify=F)
+FinalEEInName=InName[InName%in%EErawNames][1:OutEEGeneNumbers]
+FinalNames=c(unlist(FinalDEInName),FinalEEInName)
+
+generateData=generateData[FinalNames,]
+########################################
+
+UseName=rownames(generateData)
+phiuse=PhiInput[rownames(generateData)]
+meanuse=MeanInput[rownames(generateData)]
+
+OutName=paste("Gene",c(1:nrow(generateData)),sep="_")
+names(OutName)=rownames(generateData)
+OutData=generateData
+rownames(OutData)=as.vector(OutName)
+names(OutGenePatterns)=as.vector(OutName)
+output=list(data=OutData, Patterns=OutGenePatterns)
+}
diff --git a/EBSeq/R/GeneSimu.R b/EBSeq/R/GeneSimu.R

new file mode 100644 (file)

index 0000000..1125e32
--- /dev/null
+++ b/EBSeq/R/GeneSimu.R
@@ -0,0 +1,241 @@
+GeneSimu<-
+function(DVDconstant=NULL, DVDqt1=NULL, DVDqt2=NULL, Conditions, NumofSample, NumofGene=NULL, DEGeneProp, Phiconstant=NULL, Phi.qt1=NULL, Phi.qt2=NULL, Meanconstant=NULL,NormFactor=NULL, OnlyData=T)
+{
+# 2012 feb 1 paired simulation
+if(is.null(NormFactor)) NormFactor=rep(1,NumofSample)
+data(GeneEBresultGouldBart2)
+MeansC1=GeneEBresultGouldBart2$C1Mean[[1]]
+MeansC2=GeneEBresultGouldBart2$C2Mean[[1]]
+
+MeanDVD=MeansC1/MeansC2
+
+if(is.null(DVDconstant))DVDLibrary=MeanDVD[MeanDVD<quantile(MeanDVD[MeanDVD!=Inf],DVDqt2) & MeanDVD>quantile(MeanDVD[MeanDVD!=Inf],DVDqt1)]
+
+
+# If DVD constant, use constant when generate
+# If not, use DVDLibrary
+
+MeanInputraw=GeneEBresultGouldBart2$MeanList[[1]]
+
+if(length(NumofGene)!=0)
+NumofGene.raw=NumofGene*2
+
+if(length(NumofGene)==0)
+NumofGene.raw=length(MeanInputraw)
+
+
+PhiInput.raw=GeneEBresultGouldBart2$RList[[1]]
+if (length(Phiconstant)==0){
+       PhiLibrary=PhiInput.raw[(1/PhiInput.raw)<quantile(1/PhiInput.raw,Phi.qt2) & 1/PhiInput.raw>quantile(1/PhiInput.raw,Phi.qt1)]
+       PhiInputNames=sample(names(PhiLibrary),NumofGene.raw,replace=T)
+       PhiInput=PhiInput.raw[PhiInputNames]
+}
+
+if (length(Phiconstant)!=0)PhiInput=rep(Phiconstant,length(MeanInputraw))
+if(length(Meanconstant)==0)MeanInput=GeneEBresultGouldBart2$MeanList[[1]][PhiInputNames]
+if(length(Meanconstant)!=0)MeanInput=rep(Meanconstant,length(GeneEBresultGouldBart2$MeanList[[1]]))
+
+DEGeneNumbers=round(NumofGene.raw*DEGeneProp/2)*2
+GeneNames=paste("G",c(1:NumofGene.raw),sep="_")
+names(PhiInput)=GeneNames
+names(MeanInput)=GeneNames
+#########
+# data
+#########
+EEList=sapply(1:NumofGene.raw, function(j) sapply(1:NumofSample, function(i)suppressWarnings(rnbinom(1,mu=NormFactor[i]*MeanInput[j], size=PhiInput[j]))))
+
+
+
+
+    generateDataraw=t(EEList)
+       if(length(DVDconstant)==0){
+               DVDSample=sample(DVDLibrary,DEGeneNumbers,replace=T)
+               for(j in 1:NumofGene.raw){
+                if (j<=(DEGeneNumbers/2)) generateDataraw[j,((NumofSample/2)+1):NumofSample]=sapply(((NumofSample/2) +1):NumofSample, function(i)suppressWarnings(rnbinom(1, size=PhiInput[j], mu=DVDSample[j]*MeanInput[j]*NormFactor[i])),simplify=T)
+               if (j>=((DEGeneNumbers/2)+1) & j <=DEGeneNumbers) generateDataraw[j,1:(NumofSample/2)]=sapply(1:(NumofSample/2),function(i)suppressWarnings(rnbinom(1, size=MeanInput[j], mu= DVDSample[j]*MeanInput[j]*NormFactor[i])),simplify=T)
+}
+        }
+       if(length(DVDconstant)!=0){
+        for(j in 1:NumofGene.raw){
+             if (j<=(DEGeneNumbers/2)) generateDataraw[j,((NumofSample/2)+1):NumofSample]=sapply((NumofSample/2+1):NumofSample, function(i)suppressWarnings(rnbinom(1, size=MeanInput[j],mu=DVDconstant*MeanInput[j]*NormFactor[i])),simplify=T)
+             if (j>=((DEGeneNumbers/2)+1) & j <=DEGeneNumbers) generateDataraw[j,1:(NumofSample/2)]=sapply(1:(NumofSample/2),function(i)suppressWarnings(rnbinom(1, size=MeanInput[j],mu=DVDconstant*MeanInput[j]*NormFactor[i])),simplify=T)
+               }
+       }
+rownames(generateDataraw)=GeneNames
+MeanVector=rowMeans(generateDataraw)
+VarVector=apply(generateDataraw,1,var)
+MOV.post=MeanVector/VarVector
+
+
+
+### Remove MOV=NA
+generateData=generateDataraw
+generateData=generateData[!is.na(MOV.post)& MeanVector>2 & MeanVector<10000 ,] 
+#print(paste("NA MOV's",sum(is.na(MOV.post)),sum( MeanVector<2), sum(MeanVector>10000)))
+## DE
+NumDENow=sum(rownames(generateData)%in%rownames(generateDataraw)[1:DEGeneNumbers])
+
+if(length(NumofGene)!=0)
+    generateData=generateData[c(sample(1:NumDENow,round(NumofGene*DEGeneProp),replace=F),round( (dim(generateData)[1]+1-NumofGene*(1-DEGeneProp)):dim(generateData)[1])),]
+
+
+UseName=rownames(generateData)
+phiuse=PhiInput[rownames(generateData)]
+meanuse=MeanInput[rownames(generateData)]
+
+
+TrueDE=UseName[UseName%in%rownames(generateDataraw)[1:DEGeneNumbers]]
+
+if(OnlyData==T){
+       OutName=paste("Gene",c(1:nrow(generateData)),sep="_")
+       names(OutName)=rownames(generateData)
+       OutData=generateData
+       rownames(OutData)=as.vector(OutName)
+       OutTrueDE=as.vector(OutName[TrueDE])
+       output=list(data=OutData, TrueDE=OutTrueDE)
+       return(output)
+       }
+## DESeq
+
+cds=newCountDataSet(round(generateData),Conditions)
+cds=estimateSizeFactors(cds)
+Sizes=sizeFactors(cds)
+if(dim(generateData)[2]>4)cds=estimateVarianceFunctions(cds)
+else  cds=estimateVarianceFunctions(cds, method="blind")
+
+res=nbinomTest(cds, "1", "2")
+ResAdj=res$padj
+names(ResAdj)=res$id
+SmallPValueName=names(ResAdj)[which(ResAdj<=.05)]
+print(paste("DESEq found",length(SmallPValueName)))
+print(paste("In True DE",sum(SmallPValueName%in%TrueDE)))
+
+print("DESeq Size factors")
+print(Sizes)
+
+NewData=generateData
+
+
+#source("/z/Comp/kendziorskigroup/ningleng/RNASEQ/CODE/FinalV/NBBetaBiasUniqueP_PoolVar_SpeedUp_MDFPoi_NoNormVar.R")
+#source("/z/Comp/kendziorskigroup/ningleng/RNASEQ/CODE/FinalV/NBBetaBiasUniqueP_PoolVar_SpeedUp_MDFPoi_NoNormPoolR.R")
+
+EBresult=EBTest(NewData,rep(1,dim(NewData)[1]), rep(1,dim(NewData)[1]), rep(1,dim(NewData)[1]),Conditions,sizeFactors=Sizes,5)
+#library(EBarrays)
+
+#EBres2=NBBetaEB.bias.uniqueP_PoolVarSpeedUp_MDFPoi_NoNormPoolR(NewData,rep(1,dim(NewData)[1]), rep(1,dim(NewData)[1]), rep(1,dim(NewData)[1]),Conditions,sizeFactors=Sizes,5)
+
+
+zlist.unlist=EBresult[[5]]
+fdr=max(.5,crit_fun(1-zlist.unlist,.05))
+EBDE=names(zlist.unlist)[which(zlist.unlist>fdr)]
+EBDE.Poi=names(EBresult[[6]])[which(EBresult[[6]]>fdr)]
+zlist.unlist.whole=c(EBresult[[5]],EBresult[[6]])
+print(paste("Soft EB Poi",length(EBDE.Poi)))
+EBDE=c(EBDE, EBDE.Poi)
+print(paste("Soft EB found",length(EBDE)))
+print(paste("In True DE",sum(EBDE%in%TrueDE)))
+
+EBDE95=names(zlist.unlist)[which(zlist.unlist>.95)]
+EBDE95.Poi=names(EBresult[[6]])[which(EBresult[[6]]>.95)]
+print(paste("Hard Poi found",length(EBDE95.Poi)))
+EBDE95=c(EBDE95, EBDE95.Poi)
+print(paste("Hard EB found" ,length(EBDE95)))
+print(paste("In True DE",sum(EBDE95%in%TrueDE)))
+
+### edgeR
+library(edgeR,lib.loc="~/RCODE")
+edgeRList.b2=DGEList(NewData,group=Conditions)
+if(length(Phiconstant)==1){
+       edgeRList.b2=estimateCommonDisp(edgeRList.b2)
+       edgeRRes.b2=exactTest(edgeRList.b2)
+}
+if(length(Phiconstant)==0){
+       edgeRList.b2=estimateCommonDisp(edgeRList.b2)   
+       edgeRList.b2=estimateTagwiseDisp(edgeRList.b2)
+       edgeRRes.b2=exactTest(edgeRList.b2, common.disp = FALSE)
+}
+edgeRPvalue.b2.raw=edgeRRes.b2[[1]][[3]]
+edgeRPvalue.b2=p.adjust(edgeRPvalue.b2.raw, method="BH")
+names(edgeRPvalue.b2)=rownames(NewData)
+edgeRSmallpvalue=names(which(edgeRPvalue.b2<.05))
+print(paste("edgeR found",length(edgeRSmallpvalue)))
+print(paste("In True DE",sum(edgeRSmallpvalue%in%TrueDE)))
+
+### Bayseq
+library(baySeq, lib.loc="~/RCODE")
+library(snow, lib.loc="~/RCODE")
+cl <- makeCluster(4, "SOCK")
+groups <- list(NDE = rep(1,NumofSample), DE = rep(c(1,2),each=NumofSample/2))
+CD <- new("countData", data = NewData, replicates = Conditions, libsizes = as.integer(colSums(NewData)), groups = groups)
+CDP.NBML <- getPriors.NB(CD, samplesize = dim(NewData)[1], estimation = "QL", cl = cl)
+CDPost.NBML <- getLikelihoods.NB(CDP.NBML, pET = "BIC", cl = cl)
+bayseqPost=CDPost.NBML@posteriors
+rownames(bayseqPost)=rownames(NewData)
+bayseqDE=rownames(NewData)[bayseqPost[,2]>log(.95)]
+print(paste("bayseq found",length(bayseqDE)))
+print(paste("In True DE",sum(bayseqDE%in%TrueDE)))
+
+
+### BBSeq
+library("BBSeq",lib.loc="~/RCODE")
+CondM=cbind(rep(1,NumofSample),rep(c(0,1),each=NumofSample/2))
+output=free.estimate(NewData,CondM)
+beta.free = output$betahat.free
+p.free = output$p.free
+psi.free = output$psi.free
+names(p.free)=rownames(NewData)
+# Top p free?
+#out.model=constrained.estimate(NewData,CondM, gn=3, beta.free ,psi.free)
+#p.constrained = out.model$p.model
+p.free.adj=p.adjust(p.free, method="BH")
+
+BBDE=names(p.free.adj)[which(p.free.adj<.05)]
+print(paste("BBSeq found",length(BBDE)))
+print(paste("In True DE",sum(BBDE%in%TrueDE)))
+
+
+#########################
+# Generate table
+Table=matrix(rep(0,12),ncol=2)
+colnames(Table)=c("Power","FDR")
+rownames(Table)=c("DESeq","edgeR","BaySeq","BBSeq","EBSeq_ModifiedSoft","EBSeq_Hard")
+
+       Length=length(TrueDE)
+       Table[1,1]=sum(SmallPValueName%in%TrueDE)/Length
+       Table[2,1]=sum(edgeRSmallpvalue%in%TrueDE)/Length
+       Table[3,1]=sum(bayseqDE%in%TrueDE)/Length
+       Table[4,1]=sum(BBDE%in%TrueDE)/Length
+       Table[5,1]=sum(EBDE%in%TrueDE)/Length
+       Table[6,1]=sum(EBDE95%in%TrueDE)/Length
+       Table[1,2]=sum(!SmallPValueName%in%TrueDE)/length(SmallPValueName)
+       Table[2,2]=sum(!edgeRSmallpvalue%in%TrueDE)/length(edgeRSmallpvalue)
+       Table[3,2]=sum(!bayseqDE%in%TrueDE)/length(bayseqDE)
+       Table[4,2]=sum(!BBDE%in%TrueDE)/length(BBDE)
+       Table[5,2]=sum(!EBDE%in%TrueDE)/length(EBDE)
+       Table[6,2]=sum(!EBDE95%in%TrueDE)/length(EBDE95)
+       Table=round(Table,2)
+
+ValueTable=matrix(rep(0,12),ncol=2)
+colnames(ValueTable)=c("Power","FDR")
+rownames(ValueTable)=c("DESeq","edgeR","BaySeq","BBSeq","EBSeq_ModifiedSoft","EBSeq_Hard")
+       ValueTable[1,1]=sum(SmallPValueName%in%TrueDE)
+       ValueTable[2,1]=sum(edgeRSmallpvalue%in%TrueDE)
+       ValueTable[3,1]=sum(bayseqDE%in%TrueDE)
+       ValueTable[4,1]=sum(BBDE%in%TrueDE)
+       ValueTable[5,1]=sum(EBDE%in%TrueDE)
+       ValueTable[6,1]=sum(EBDE95%in%TrueDE)
+       ValueTable[1,2]=sum(!SmallPValueName%in%TrueDE)
+       ValueTable[2,2]=sum(!edgeRSmallpvalue%in%TrueDE)
+       ValueTable[3,2]=sum(!bayseqDE%in%TrueDE)
+       ValueTable[4,2]=sum(!BBDE%in%TrueDE)
+       ValueTable[5,2]=sum(!EBDE%in%TrueDE)
+       ValueTable[6,2]=sum(!EBDE95%in%TrueDE)
+       
+if(length(DVDconstant)==0)DVD=c(quantile(MeanDVD[MeanDVD!=Inf],DVDqt1), quantile(MeanDVD[MeanDVD!=Inf],DVDqt2))
+if(length(DVDconstant)!=0) DVD=DVDconstant
+if(length(Phiconstant)==0)Phi=c(quantile(PhiInput.raw,Phi.qt1), quantile(PhiInput.raw,Phi.qt2))
+if(length(Phiconstant)!=0) Phi=Phiconstant
+OUT=list(Table=Table, ValueTable=ValueTable, DVD=DVD, Phi=Phi, generateData=NewData, TrueDE=TrueDE,phi.vector=phiuse,mean.vector=meanuse,NormFactor=NormFactor, DESeqP=ResAdj, edgeRP=edgeRPvalue.b2, EBSeqPP=zlist.unlist.whole, BaySeqPP=bayseqPost,BBSeqP=p.free.adj,EBoutput=EBresult
+,DESeqDE=SmallPValueName, edgeRDE=edgeRSmallpvalue, bayDE=bayseqDE, BBDE=BBDE, EBDE95=EBDE95)
+}
+
diff --git a/EBSeq/R/GeneSimuAt.R b/EBSeq/R/GeneSimuAt.R

new file mode 100644 (file)

index 0000000..8b5f7a2
--- /dev/null
+++ b/EBSeq/R/GeneSimuAt.R
@@ -0,0 +1,291 @@
+GeneSimuAt<-function(DVDconstant=NULL, DVDqt1=NULL, DVDqt2=NULL, Conditions, NumofSample, NumofGene=NULL, DEGeneProp, Phiconstant=NULL, Phi.qt1=NULL, Phi.qt2=NULL, Meanconstant=NULL,NormFactor=NULL, OnlyData=T)
+{
+# 2012 feb 1 
+# paired level simulation
+
+data(GeneEBresultGouldBart2)
+if(is.null(NormFactor)) NormFactor=rep(1,NumofSample)
+
+#MeansC1=rowMeans(GeneV.norm1.NZ.b2[,1:4])
+#MeansC2=rowMeans(GeneV.norm1.NZ.b2[,5:8])
+MeansC1=GeneEBresultGouldBart2$C1Mean[[1]]
+MeansC2=GeneEBresultGouldBart2$C2Mean[[1]]
+
+MeanDVD=MeansC1/MeansC2
+
+if(is.null(DVDconstant))DVDLibrary=MeanDVD[MeanDVD<quantile(MeanDVD[MeanDVD!=Inf],DVDqt2) & MeanDVD>quantile(MeanDVD[MeanDVD!=Inf],DVDqt1)]
+
+
+# If DVD constant, use constant when generate
+# If not, use DVDLibrary
+
+MeanInputraw=GeneEBresultGouldBart2$MeanList[[1]]
+#MeanInputraw=rowMeans(GeneV.norm1.NZ.b2)
+#Var1=apply(GeneV.norm1.NZ.b2[,1:4],1,var)
+#Var2=apply(GeneV.norm1.NZ.b2[,5:8],1,var)
+#VarInput=(Var1 + Var2)/2
+#If NumofGene.raw=NULL, empirical # of Gene
+#If !=NULL , Input a 9-vector
+NumofGene.raw=length(MeanInputraw)
+
+# here phi denotes r -- which is 1/phi' in which sigma^2=mu(1+mu phi')
+# In negative binomial 
+# size is 1/phi'
+# rnbinom(100,size=100,mu=10) 
+# var(qq)
+#[1] 10.93687 
+# qq=rnbinom(100,size=10,mu=10)
+# var(qq)
+#[1] 24.01404
+
+#PhiInput.raw=(MeanInputraw^2) / (VarInput - MeanInputraw)
+PhiInput.raw=GeneEBresultGouldBart2$RList[[1]]
+if (length(Phiconstant)==0){
+       PhiLibrary=PhiInput.raw[1/(PhiInput.raw)<quantile(1/(PhiInput.raw),Phi.qt2) & 1/(PhiInput.raw)>quantile(1/(PhiInput.raw),Phi.qt1)]
+    PhiInputNames=sample(names(PhiLibrary),NumofGene.raw,replace=T)
+       PhiInput=PhiInput.raw[PhiInputNames]
+
+
+}
+
+if (length(Phiconstant)!=0)PhiInput=rep(Phiconstant,length(MeanInputraw))
+if(length(Meanconstant)==0)MeanInput=GeneEBresultGouldBart2$MeanList[[1]][PhiInputNames]
+if(length(Meanconstant)!=0)MeanInput=rep(Meanconstant,length(GeneEBresultGouldBart2$MeanList[[1]]))
+
+# Wanna DENumbers be proportion to 2 
+DEGeneNumbers=round(NumofGene.raw*DEGeneProp/2)*2
+GeneNames=paste("G",c(1:NumofGene.raw),sep="_")
+names(PhiInput)=GeneNames
+names(MeanInput)=GeneNames
+
+#########
+# data
+#########
+EEList=sapply(1:NumofGene.raw, function(j) sapply(1:NumofSample, function(i)rnbinom(1,mu=NormFactor[i]*MeanInput[j], size=PhiInput[j])))
+
+
+
+
+    generateDataraw=t(EEList)
+       if(length(DVDconstant)==0){
+               DVDSample=sample(DVDLibrary,DEGeneNumbers,replace=T)
+               for(j in 1:NumofGene.raw){
+                if (j<=(DEGeneNumbers/2)) generateDataraw[j,((NumofSample/2)+1):NumofSample]=sapply(((NumofSample/2) +1):NumofSample, function(i)rnbinom(1, size=PhiInput[j], mu=DVDSample[j]*MeanInput[j]*NormFactor[i]),simplify=T)
+               if (j>=((DEGeneNumbers/2)+1) & j <=DEGeneNumbers) generateDataraw[j,1:(NumofSample/2)]=sapply(1:(NumofSample/2),function(i)rnbinom(1, size=MeanInput[j], mu= DVDSample[j]*MeanInput[j]*NormFactor[i]),simplify=T)
+}
+        }
+       if(length(DVDconstant)!=0){
+        for(j in 1:NumofGene.raw){
+             if (j<=(DEGeneNumbers/2)) generateDataraw[j,((NumofSample/2)+1):NumofSample]=sapply((NumofSample/2+1):NumofSample, function(i)rnbinom(1, size=MeanInput[j],mu=DVDconstant*MeanInput[j]*NormFactor[i]),simplify=T)
+             if (j>=((DEGeneNumbers/2)+1) & j <=DEGeneNumbers) generateDataraw[j,1:(NumofSample/2)]=sapply(1:(NumofSample/2),function(i)rnbinom(1, size=MeanInput[j],mu=DVDconstant*MeanInput[j]*NormFactor[i]),simplify=T)
+               }
+       }
+rownames(generateDataraw)=GeneNames
+MeanVector=rowMeans(generateDataraw)
+VarVector=apply(generateDataraw,1,var)
+MOV.post=MeanVector/VarVector
+
+
+
+### Remove MOV=NA
+generateData=generateDataraw
+generateData=generateData[!is.na(MOV.post)& MeanVector>2 & MeanVector<10000 ,] 
+print(paste("NA MOV's",sum(is.na(MOV.post)),sum( MeanVector<2), sum(MeanVector>10000)))
+## DE
+NumDENow=sum(rownames(generateData)%in%rownames(generateDataraw)[1:DEGeneNumbers])
+
+if(length(NumofGene)!=0)
+    generateData=generateData[c(sample(1:NumDENow,round(NumofGene*DEGeneProp),replace=F),round( (dim(generateData)[1]+1-NumofGene*(1-DEGeneProp)):dim(generateData)[1])),]
+
+
+UseName=rownames(generateData)
+
+TrueDE=UseName[UseName%in%rownames(generateDataraw)[1:DEGeneNumbers]]
+phiuse=PhiInput[rownames(generateData)]
+meanuse=MeanInput[rownames(generateData)]
+
+#ArtiNames=rownames(generateData)[(DEGeneNumbers+1):(2*DEGeneNumbers)]
+#Noise=sample(c(1,ncol(generateData)),DEGeneNumbers,replace=T)
+TrueDELength=length(TrueDE)
+AtLoc=sample(c(1:length(Conditions)), TrueDELength, replace=T)
+AtFold=sample(c(4,6,8,10),TrueDELength, replace=T)
+
+AtNames_Level=vector("list",4)
+names(AtNames_Level)=c(4,6,8,10)
+for(i in 1:TrueDELength){
+generateData[(TrueDELength+i),AtLoc[i]]=generateData[(TrueDELength+i),AtLoc[i]]*AtFold[i]
+AtNames_Level[[as.character(AtFold[i])]]=c(AtNames_Level[[as.character(AtFold[i])]],rownames(generateData)[TrueDELength+i])
+}
+
+
+if(OnlyData==T){
+       OutName=paste("Gene",c(1:nrow(generateData)),sep="_")
+       names(OutName)=rownames(generateData)
+    OutData=generateData
+    rownames(OutData)=as.vector(OutName)
+       OutAt=as.vector(OutName[AtNames_Level])
+       OutTrueDE=as.vector(OutName[TrueDE])
+    output=list(data=OutData, TrueDE=OutTrueDE,Outliers=OutAt)
+       return(output)
+       }
+## DESeq
+
+cds=newCountDataSet(round(generateData),Conditions)
+cds=estimateSizeFactors(cds)
+Sizes=sizeFactors(cds)
+if(dim(generateData)[2]>4)cds=estimateVarianceFunctions(cds)
+else  cds=estimateVarianceFunctions(cds, method="blind")
+
+res=nbinomTest(cds, "1", "2")
+ResAdj=res$padj
+names(ResAdj)=res$id
+SmallPValueName=names(ResAdj)[which(ResAdj<=.05)]
+print(paste("DESEq found",length(SmallPValueName)))
+print(paste("In True DE",sum(SmallPValueName%in%TrueDE)))
+
+print("DESeq Size factors")
+print(Sizes)
+
+## DESeq each group
+## Ours
+NewData=generateData
+
+
+#source("/z/Comp/kendziorskigroup/ningleng/RNASEQ/CODE/FinalV/NBBetaBiasUniqueP_PoolVar_SpeedUp_MDFPoi_NoNormVar.R")
+#source("/z/Comp/kendziorskigroup/ningleng/RNASEQ/CODE/FinalV/NBBetaBiasUniqueP_PoolVar_SpeedUp_MDFPoi_NoNormPoolR.R")
+
+EBresult=EBTest(NewData,rep(1,dim(NewData)[1]), rep(1,dim(NewData)[1]), rep(1,dim(NewData)[1]),Conditions,sizeFactors=Sizes,5)
+
+#EBres2=NBBetaEB.bias.uniqueP_PoolVarSpeedUp_MDFPoi_NoNormPoolR(NewData,rep(1,dim(NewData)[1]), rep(1,dim(NewData)[1]), rep(1,dim(NewData)[1]),Conditions,sizeFactors=Sizes,5)
+
+
+zlist.unlist=EBresult[[5]]
+fdr=max(.5,crit_fun(1-zlist.unlist,.05))
+EBDE=names(zlist.unlist)[which(zlist.unlist>fdr)]
+EBDE.Poi=names(EBresult[[6]])[which(EBresult[[6]]>fdr)]
+zlist.unlist.whole=c(EBresult[[5]],EBresult[[6]])
+print(paste("Soft EB Poi",length(EBDE.Poi)))
+EBDE=c(EBDE, EBDE.Poi)
+print(paste("Soft EB found",length(EBDE)))
+print(paste("In True DE",sum(EBDE%in%TrueDE)))
+
+EBDE95=names(zlist.unlist)[which(zlist.unlist>.95)]
+EBDE95.Poi=names(EBresult[[6]])[which(EBresult[[6]]>.95)]
+print(paste("Hard Poi found",length(EBDE95.Poi)))
+EBDE95=c(EBDE95, EBDE95.Poi)
+print(paste("Hard EB found" ,length(EBDE95)))
+print(paste("In True DE",sum(EBDE95%in%TrueDE)))
+
+### edgeR
+library(edgeR,lib.loc="~/RCODE")
+edgeRList.b2=DGEList(NewData,group=Conditions)
+if(length(Phiconstant)==1){
+       edgeRList.b2=estimateCommonDisp(edgeRList.b2)
+       edgeRRes.b2=exactTest(edgeRList.b2)
+}
+if(length(Phiconstant)==0){
+       edgeRList.b2=estimateCommonDisp(edgeRList.b2)   
+       edgeRList.b2=estimateTagwiseDisp(edgeRList.b2)
+       edgeRRes.b2=exactTest(edgeRList.b2, common.disp = FALSE)
+}
+edgeRPvalue.b2.raw=edgeRRes.b2[[1]][[3]]
+edgeRPvalue.b2=p.adjust(edgeRPvalue.b2.raw, method="BH")
+names(edgeRPvalue.b2)=rownames(NewData)
+edgeRSmallpvalue=names(which(edgeRPvalue.b2<.05))
+print(paste("edgeR found",length(edgeRSmallpvalue)))
+print(paste("In True DE",sum(edgeRSmallpvalue%in%TrueDE)))
+
+### Bayseq
+library(baySeq, lib.loc="~/RCODE")
+library(snow, lib.loc="~/RCODE")
+cl <- makeCluster(4, "SOCK")
+groups <- list(NDE = rep(1,NumofSample), DE = rep(c(1,2),each=NumofSample/2))
+CD <- new("countData", data = NewData, replicates = Conditions, libsizes = as.integer(colSums(NewData)), groups = groups)
+CDP.NBML <- getPriors.NB(CD, samplesize = dim(NewData)[1], estimation = "QL", cl = cl)
+CDPost.NBML <- getLikelihoods.NB(CDP.NBML, pET = "BIC", cl = cl)
+bayseqPost=CDPost.NBML@posteriors
+rownames(bayseqPost)=rownames(NewData)
+bayseqDE=rownames(NewData)[bayseqPost[,2]>log(.95)]
+print(paste("bayseq found",length(bayseqDE)))
+print(paste("In True DE",sum(bayseqDE%in%TrueDE)))
+
+
+### BBSeq
+library("BBSeq",lib.loc="~/RCODE")
+CondM=cbind(rep(1,NumofSample),rep(c(0,1),each=NumofSample/2))
+output=free.estimate(NewData,CondM)
+beta.free = output$betahat.free
+p.free = output$p.free
+psi.free = output$psi.free
+names(p.free)=rownames(NewData)
+p.free.adj=p.adjust(p.free,method="BH")
+# Top p free?
+#out.model=constrained.estimate(NewData,CondM, gn=3, beta.free ,psi.free)
+#p.constrained = out.model$p.model
+BBDE=names(p.free.adj)[which(p.free.adj<.05)]
+print(paste("BBSeq found",length(BBDE)))
+print(paste("In True DE",sum(BBDE%in%TrueDE)))
+
+
+#########################
+# Generate table
+Table=matrix(rep(0,12),ncol=2)
+colnames(Table)=c("Power","FDR")
+rownames(Table)=c("DESeq","edgeR","BaySeq","BBSeq","EBSeq_ModifiedSoft","EBSeq_Hard")
+
+       Length=length(TrueDE)
+       Table[1,1]=sum(SmallPValueName%in%TrueDE)/Length
+       Table[2,1]=sum(edgeRSmallpvalue%in%TrueDE)/Length
+       Table[3,1]=sum(bayseqDE%in%TrueDE)/Length
+       Table[4,1]=sum(BBDE%in%TrueDE)/Length
+       Table[5,1]=sum(EBDE%in%TrueDE)/Length
+       Table[6,1]=sum(EBDE95%in%TrueDE)/Length
+       Table[1,2]=sum(!SmallPValueName%in%TrueDE)/length(SmallPValueName)
+       Table[2,2]=sum(!edgeRSmallpvalue%in%TrueDE)/length(edgeRSmallpvalue)
+       Table[3,2]=sum(!bayseqDE%in%TrueDE)/length(bayseqDE)
+       Table[4,2]=sum(!BBDE%in%TrueDE)/length(BBDE)
+       Table[5,2]=sum(!EBDE%in%TrueDE)/length(EBDE)
+       Table[6,2]=sum(!EBDE95%in%TrueDE)/length(EBDE95)
+       Table=round(Table,2)
+
+ValueTable=matrix(rep(0,12),ncol=2)
+colnames(ValueTable)=c("Power","FDR")
+rownames(ValueTable)=c("DESeq","edgeR","BaySeq","BBSeq","EBSeq_ModifiedSoft","EBSeq_Hard")
+       ValueTable[1,1]=sum(SmallPValueName%in%TrueDE)
+       ValueTable[2,1]=sum(edgeRSmallpvalue%in%TrueDE)
+       ValueTable[3,1]=sum(bayseqDE%in%TrueDE)
+       ValueTable[4,1]=sum(BBDE%in%TrueDE)
+       ValueTable[5,1]=sum(EBDE%in%TrueDE)
+       ValueTable[6,1]=sum(EBDE95%in%TrueDE)
+       ValueTable[1,2]=sum(!SmallPValueName%in%TrueDE)
+       ValueTable[2,2]=sum(!edgeRSmallpvalue%in%TrueDE)
+       ValueTable[3,2]=sum(!bayseqDE%in%TrueDE)
+       ValueTable[4,2]=sum(!BBDE%in%TrueDE)
+       ValueTable[5,2]=sum(!EBDE%in%TrueDE)
+       ValueTable[6,2]=sum(!EBDE95%in%TrueDE)
+
+
+AtFoundTable=matrix(rep(0,24),ncol=4)
+colnames(AtFoundTable)=paste("Level",c(1:4),sep="_")
+rownames(Table)=c("DESeq","edgeR","BaySeq","BBSeq","EBSeq_ModifiedSoft","EBSeq_Hard")
+for(i in 1:4){
+       AtFoundTable[1,i]=sum(SmallPValueName%in%AtNames_Level[[i]])
+       AtFoundTable[2,i]=sum(edgeRSmallpvalue%in%AtNames_Level[[i]])
+       AtFoundTable[3,i]=sum(bayseqDE%in%AtNames_Level[[i]])
+       AtFoundTable[4,i]=sum(BBDE%in%AtNames_Level[[i]])
+       AtFoundTable[5,i]=sum(EBDE%in%AtNames_Level[[i]])
+       AtFoundTable[6,i]=sum(EBDE95%in%AtNames_Level[[i]])     
+       }
+
+       
+if(length(DVDconstant)==0)DVD=c(quantile(MeanDVD[MeanDVD!=Inf],DVDqt1), quantile(MeanDVD[MeanDVD!=Inf],DVDqt2))
+if(length(DVDconstant)!=0) DVD=DVDconstant
+if(length(Phiconstant)==0)Phi=c(quantile(PhiInput.raw,Phi.qt1), quantile(PhiInput.raw,Phi.qt2))
+if(length(Phiconstant)!=0) Phi=Phiconstant
+OUT=list(Table=Table, ValueTable=ValueTable, DVD=DVD, Phi=Phi, generateData=NewData, TrueDE=TrueDE,phi.vector=phiuse,mean.vector=meanuse,NormFactor=NormFactor, DESeqP=ResAdj, edgeRP=edgeRPvalue.b2, EBSeqPP=zlist.unlist.whole, BaySeqPP=bayseqPost,BBSeqP=p.free.adj,EBoutput=EBresult,  AtFoundTable= AtFoundTable,Outliers=AtNames_Level)
+
+
+
+}
+
+
diff --git a/EBSeq/R/GetData.R b/EBSeq/R/GetData.R

new file mode 100644 (file)

index 0000000..ddccf38
--- /dev/null
+++ b/EBSeq/R/GetData.R
@@ -0,0 +1,35 @@
+GetData <-
+function(path,Name1,Name2,type)
+{
+Data=vector("list",8)
+Filenames=NULL
+Tablenames=NULL
+for (name in 1:4)
+       {
+               if (type=="I")
+                       Filenames=c(Filenames,paste(path,Name1,name,"_isoform_nus.tab",sep=""))  
+               if (type=="G")  
+                       Filenames=c(Filenames,paste(path,Name1,name,"_gene_nus.tab",sep=""))  
+               Tablenames=c(Tablenames,paste(Name1,name,sep=""))
+       }
+for (name in 1:4)
+       {
+               if (type=="I")
+                       Filenames=c(Filenames,paste(path,Name2,name,"_isoform_nus.tab",sep=""))
+               if (type=="G")
+                       Filenames=c(Filenames,paste(path,Name2,name,"_gene_nus.tab",sep=""))
+               Tablenames=c(Tablenames,paste(Name2,name,sep=""))
+       }
+
+
+names(Data)=Tablenames
+for (file in 1:8)
+       {
+               temp=read.table(Filenames[file],header=T)
+               temp2=as.matrix(temp[-1])
+               rownames(temp2)=as.vector(as.matrix(temp[1]))
+               Data[[file]]=temp2
+       }
+       Data
+}
+
diff --git a/EBSeq/R/GetMultiPP.R b/EBSeq/R/GetMultiPP.R

new file mode 100644 (file)

index 0000000..4ae2882
--- /dev/null
+++ b/EBSeq/R/GetMultiPP.R
@@ -0,0 +1,6 @@
+GetMultiPP <- function(EBout){
+       PP=EBout$PPDE   
+       MAP=colnames(EBout$f)[apply(EBout$f,1,which.max)]
+       AllParti=EBout$AllParti
+       out=list(PP=PP, MAP=MAP,Patterns=AllParti)
+}
diff --git a/EBSeq/R/GetNg.R b/EBSeq/R/GetNg.R

new file mode 100644 (file)

index 0000000..9312f9a
--- /dev/null
+++ b/EBSeq/R/GetNg.R
@@ -0,0 +1,10 @@
+GetNg<- function(IsoformName, GeneName){
+       GeneNg = tapply(IsoformName, GeneName, length)
+       IsoformNg = GeneNg[GeneName]
+       names(IsoformNg) = IsoformName
+       GeneNgTrun=GeneNg
+       GeneNgTrun[GeneNgTrun>3]=3
+       IsoformNgTrun=IsoformNg
+       IsoformNgTrun[IsoformNgTrun>3]=3
+       out=list( GeneNg=GeneNg, GeneNgTrun=GeneNgTrun, IsoformNg=IsoformNg, IsoformNgTrun=IsoformNgTrun)
+       }
diff --git a/EBSeq/R/GetPP.R b/EBSeq/R/GetPP.R

new file mode 100644 (file)

index 0000000..0c1eeb9
--- /dev/null
+++ b/EBSeq/R/GetPP.R
@@ -0,0 +1,4 @@
+GetPP <- function(EBout){
+       #PP=c(EBout[[5]], EBout[[6]])
+       PP=EBout$PPDE   
+}
diff --git a/EBSeq/R/GetPatterns.R b/EBSeq/R/GetPatterns.R

new file mode 100644 (file)

index 0000000..436df74
--- /dev/null
+++ b/EBSeq/R/GetPatterns.R
@@ -0,0 +1,12 @@
+GetPatterns<-function(Conditions){
+    if(!is.factor(Conditions))Conditions=as.factor(Conditions)
+       NumCond=nlevels(Conditions)
+       CondLevels=levels(Conditions)
+    #library(blockmodeling)
+    AllPartiList=sapply(1:NumCond,function(i)nkpartitions(NumCond,i))
+    AllParti=do.call(rbind,AllPartiList)
+       colnames(AllParti)=CondLevels
+       rownames(AllParti)=paste("Pattern",1:nrow(AllParti),sep="")
+       AllParti
+
+}
diff --git a/EBSeq/R/IsoSimu.R b/EBSeq/R/IsoSimu.R

new file mode 100644 (file)

index 0000000..4c58ed1
--- /dev/null
+++ b/EBSeq/R/IsoSimu.R
@@ -0,0 +1,122 @@
+IsoSimu=function(DVDconstant=NULL, DVDqt1=NULL, DVDqt2=NULL, Conditions, NumofSample, NumofIso=NULL, DEIsoProp, Phiconstant=NULL, Phi.qt1=NULL, Phi.qt2=NULL,NormFactor=NULL, OnlyData=T)
+{
+# 2012 feb 1 
+# paired simulation
+data(IsoEBresultGouldBart2)
+if(is.null(NormFactor)) NormFactor=rep(1,NumofSample)
+
+MeansC1=IsoEBresultGouldBart2$C1Mean
+MeansC2=IsoEBresultGouldBart2$C2Mean
+MeanDVD=sapply(1:9,function(i) MeansC1[[i]]/MeansC2[[i]])
+# DVD library with each group here
+if (length(DVDconstant)==0) DVDLibrary= unlist(MeanDVD)[unlist(MeanDVD)<quantile(unlist(MeanDVD)[unlist(MeanDVD)!=Inf],DVDqt2) & unlist(MeanDVD)>quantile(unlist(MeanDVD)[unlist(MeanDVD)!=Inf],DVDqt1)]
+
+
+
+# If DVD constant, use constant when generate
+# If not, use DVDLibrary
+
+VarInput=IsoEBresultGouldBart2$VarList
+VarInputNg=list(VarInput[[1]],unlist(VarInput[c(2,4,6,8)]),unlist(VarInput[c(3,5,7,9)]))
+#If NumofIso=NULL, empirical # of Iso
+#If !=NULL , Input a 9-vector
+if(length(NumofIso)==0) NumofIso.raw=sapply(1:3,function(i)length(VarInputNg[[i]]))
+if(length(NumofIso)!=0) NumofIso.raw=NumofIso*2
+
+PhiInput.raw=IsoEBresultGouldBart2$RList
+PhiInput.raw.Ng=list(PhiInput.raw[[1]],unlist(PhiInput.raw[c(2,4,6,8)]),unlist(PhiInput.raw[c(3,5,7,9)]))
+
+
+if (length(Phiconstant)==0){
+       PhiLibrary=sapply(1:3,function(i)PhiInput.raw.Ng[[i]][1/PhiInput.raw.Ng[[i]]<quantile(1/PhiInput.raw.Ng[[i]],Phi.qt2) & 1/PhiInput.raw.Ng[[i]]>quantile(1/PhiInput.raw.Ng[[i]],Phi.qt1)],simplify=F)
+       PhiIndex=sapply(1:3, function(i)sample(names(PhiLibrary[[i]]),NumofIso.raw[[i]],replace=T),simplify=F)
+       PhiInputNg=sapply(1:3, function(i)PhiLibrary[[i]][PhiIndex[[i]]])
+}
+if (length(Phiconstant)!=0)PhiInputNg=sapply(1:3,function(i)rep(Phiconstant,NumofIso.raw[[i]]),simplify=F)
+
+# Wanna DENumbers be proportion to 2 
+DEIsoNumbers=round(NumofIso.raw*DEIsoProp/2)*2
+IsoNames=sapply(1:3,function(i)paste("I",i,c(1:NumofIso.raw[i]),sep="_"),simplify=F)
+MeanNg=list(IsoEBresultGouldBart2$MeanList[[1]],unlist(IsoEBresultGouldBart2$MeanList[c(2,4,6,8)]),
+unlist(IsoEBresultGouldBart2$MeanList[c(3,5,7,9)]))
+MeanInputNg=sapply(1:3, function(i)MeanNg[[i]][PhiIndex[[i]]])
+
+for(i in 1:3){
+       names(MeanInputNg[[i]])=IsoNames[[i]]
+       names(PhiInputNg[[i]])=IsoNames[[i]]
+       }
+
+##############################
+# Get Ng version to every one
+##############################
+
+
+#########
+# data
+#########
+EEList=sapply(1:3,function(i) sapply(1:NumofIso.raw[[i]], function(j)sapply(1:NumofSample,function(h) rnbinom(1,mu=MeanInputNg[[i]][j]*NormFactor[h], size=PhiInputNg[[i]][j]))),simplify=F)
+
+
+generateDataraw=vector("list",3)
+MeanVector=vector("list",3)
+VarVector=vector("list",3)
+MOV.post=vector("list",3)
+
+
+for(g in 1:3){
+    generateDataraw[[g]]=t(EEList[[g]][,1:NumofIso.raw[g]])
+       if(length(DVDconstant)==0){
+               for(j in 1:NumofIso.raw[g]){
+                if (j<=(DEIsoNumbers[g]/2)) generateDataraw[[g]][j,((NumofSample/2)+1):NumofSample]=sapply((NumofSample/2+1):NumofSample, function(h)suppressWarnings(rnbinom(1, size=PhiInputNg[[g]][j], mu=sample(DVDLibrary,1)*MeanInputNg[[g]][j]*NormFactor[h])), simplify=T)
+               if (j>=((DEIsoNumbers[g]/2)+1) & j <=DEIsoNumbers[g]) generateDataraw[[g]][j,1:(NumofSample/2)]=sapply(1:(NumofSample/2),function(h) suppressWarnings(rnbinom(1, size=MeanInputNg[[g]][j], mu= sample(DVDLibrary,1)*MeanInputNg[[g]][j]*NormFactor[h])),simplify=T)
+}
+        }
+       if(length(DVDconstant)!=0){
+        for(j in 1:NumofIso.raw[g]){
+             if (j<=(DEIsoNumbers[g]/2)) generateDataraw[[g]][j,((NumofSample/2)+1):NumofSample]=sapply((NumofSample/2+1):NumofSample, function(h)suppressWarnings(rnbinom(1, DVDconstant*MeanInputNg[[g]][j]*NormFactor[h])),simplify=T)
+             if (j>=((DEIsoNumbers[g]/2)+1) & j <=DEIsoNumbers[g]) generateDataraw[[g]][j,1:(NumofSample/2)]=sapply(1:(NumofSample/2),function(h) wuppressWarnings(rnbinom(1, DVDconstant*MeanInputNg[[g]][j]*NormFactor[h])),simplify=T)
+               }
+       }
+rownames(generateDataraw[[g]])=IsoNames[[g]][1:NumofIso.raw[g]]
+MeanVector[[g]]=rowMeans(generateDataraw[[g]])
+VarVector[[g]]=apply(generateDataraw[[g]],1,var)
+MOV.post[[g]]=MeanVector[[g]]/VarVector[[g]]
+}
+
+
+### Remove MOV=NA
+generateData=generateDataraw
+for (i in 1:3) generateData[[i]]=generateData[[i]][!is.na(MOV.post[[i]]),] 
+#print(paste("NA MOV's",sum(is.na(unlist(MOV.post)))))
+NumDENow=sapply(1:3, function(i)sum(rownames(generateData[[i]])%in%rownames(generateDataraw[[i]])[1:DEIsoNumbers[i]]))
+
+if(length(NumofIso)!=0){
+           for(i in 1:3)
+               generateData[[i]]=generateData[[i]][c(sample(1:NumDENow[i],round(NumofIso[i]*DEIsoProp),replace=F),round( (dim(generateData[[i]])[1]+1-NumofIso[i]*(1-DEIsoProp)):dim(generateData[[i]])[1])),]
+}
+generateDataNg=generateData
+
+## DE
+UseName=sapply(1:3, function(i)rownames(generateData[[i]]),simplify=F)
+TrueDE=sapply(1:3, function(i)UseName[[i]][UseName[[i]] %in% rownames(generateDataraw[[i]])[1:DEIsoNumbers[i]]],simplify=F)
+TrueDE.unlist=do.call(c,TrueDE)
+
+phiuse=sapply(1:3,function(i)PhiInputNg[[i]][UseName[[i]]])
+meanuse=sapply(1:3,function(i)MeanInputNg[[i]][UseName[[i]]])
+
+#if(OnlyData==T){
+    
+OutName=sapply(1:3,function(i)paste("Iso",i,c(1:nrow(generateDataNg[[i]])),sep="_"))
+for(i in 1:3)names(OutName[[i]])=rownames(generateDataNg[[i]])
+OutData=generateDataNg
+for(i in 1:3)rownames(OutData[[i]])=as.vector(OutName[[i]])
+OutTrueDE=as.vector(unlist(OutName)[TrueDE.unlist])
+output=list(data=OutData, TrueDE=OutTrueDE)
+
+
+#output=list(data=generateDataNg, TrueDE=TrueDE.unlist)
+return(output)
+#    }
+# Now only OnlyData=T version
+}
+
diff --git a/EBSeq/R/IsoSimuAt.R b/EBSeq/R/IsoSimuAt.R

new file mode 100644 (file)

index 0000000..479518d
--- /dev/null
+++ b/EBSeq/R/IsoSimuAt.R
@@ -0,0 +1,128 @@
+IsoSimuAt<-function(DVDconstant=NULL, DVDqt1=NULL, DVDqt2=NULL, Conditions, NumofSample, NumofIso=NULL, DEIsoProp, Phiconstant=NULL, Phi.qt1=NULL, Phi.qt2=NULL,NormFactor=NULL, OnlyData=T)
+{
+#Ng paired 2012 feb 1
+if(is.null(NormFactor)) NormFactor=rep(1,NumofSample)
+data(IsoEBresultGouldBart2)
+
+MeansC1=IsoEBresultGouldBart2$C1Mean
+MeansC2=IsoEBresultGouldBart2$C2Mean
+MeanDVD=sapply(1:9,function(i) MeansC1[[i]]/MeansC2[[i]])
+if (length(DVDconstant)==0) DVDLibrary= unlist(MeanDVD)[unlist(MeanDVD)<quantile(unlist(MeanDVD)[unlist(MeanDVD)!=Inf],DVDqt2) & unlist(MeanDVD)>quantile(unlist(MeanDVD)[unlist(MeanDVD)!=Inf],DVDqt1)]
+
+
+
+
+VarInput=IsoEBresultGouldBart2$VarList
+VarInputNg=list(VarInput[[1]],unlist(VarInput[c(2,4,6,8)]),unlist(VarInput[c(3,5,7,9)]))
+
+if(length(NumofIso)==0) NumofIso=sapply(1:3,function(i)length(VarInputNg[[i]]))
+PhiInput.raw=IsoEBresultGouldBart2$RList
+PhiInput.raw.Ng=list(PhiInput.raw[[1]],unlist(PhiInput.raw[c(2,4,6,8)]),unlist(PhiInput.raw[c(3,5,7,9)]))
+
+
+if (length(Phiconstant)==0){
+       PhiLibrary=sapply(1:3,function(i)PhiInput.raw.Ng[[i]][1/PhiInput.raw.Ng[[i]]<quantile(1/PhiInput.raw.Ng[[i]],Phi.qt2) & 1/PhiInput.raw.Ng[[i]]>quantile(1/PhiInput.raw.Ng[[i]],Phi.qt1)],simplify=F)
+       PhiIndex=sapply(1:3, function(i)sample(names(PhiLibrary[[i]]),NumofIso[[i]],replace=T),simplify=F)
+       PhiInputNg=sapply(1:3, function(i)PhiLibrary[[i]][PhiIndex[[i]]])
+}
+if (length(Phiconstant)!=0)PhiInputNg=sapply(1:3,function(i)rep(Phiconstant,NumofIso[[i]]),simplify=F)
+
+# Wanna DENumbers be proportion to 2 
+DEIsoNumbers=round(NumofIso*DEIsoProp/2)*2
+IsoNames=sapply(1:3,function(i)paste("I",i,c(1:NumofIso[i]),sep="_"),simplify=F)
+MeanNg=list(IsoEBresultGouldBart2$MeanList[[1]],unlist(IsoEBresultGouldBart2$MeanList[c(2,4,6,8)]),
+unlist(IsoEBresultGouldBart2$MeanList[c(3,5,7,9)]))
+MeanInputNg=sapply(1:3, function(i)MeanNg[[i]][PhiIndex[[i]]])
+
+for(i in 1:3){
+       names(MeanInputNg[[i]])=IsoNames[[i]]
+       names(PhiInputNg[[i]])=IsoNames[[i]]
+       }
+
+#########
+# data
+#########
+EEList=sapply(1:3,function(i) sapply(1:NumofIso[[i]], function(j)sapply(1:NumofSample,function(h) rnbinom(1,mu=MeanInputNg[[i]][j]*NormFactor[h], size=PhiInputNg[[i]][j]))),simplify=F)
+
+
+generateDataraw=vector("list",3)
+MeanVector=vector("list",3)
+VarVector=vector("list",3)
+MOV.post=vector("list",3)
+
+
+for(g in 1:3){
+    generateDataraw[[g]]=t(EEList[[g]][,1:NumofIso[g]])
+       if(length(DVDconstant)==0){
+               for(j in 1:NumofIso[g]){
+                if (j<=(DEIsoNumbers[g]/2)) generateDataraw[[g]][j,((NumofSample/2)+1):NumofSample]=sapply((NumofSample/2+1):NumofSample, function(h)rnbinom(1, size=PhiInputNg[[g]][j], mu=sample(DVDLibrary,1)*MeanInputNg[[g]][j]*NormFactor[h]), simplify=T)
+               if (j>=((DEIsoNumbers[g]/2)+1) & j <=DEIsoNumbers[g]) generateDataraw[[g]][j,1:(NumofSample/2)]=sapply(1:(NumofSample/2),function(h) rnbinom(1, size=MeanInputNg[[g]][j], mu= sample(DVDLibrary,1)*MeanInputNg[[g]][j]*NormFactor[h]),simplify=T)
+}
+        }
+       if(length(DVDconstant)!=0){
+        for(j in 1:NumofIso[g]){
+             if (j<=(DEIsoNumbers[g]/2)) generateDataraw[[g]][j,((NumofSample/2)+1):NumofSample]=sapply((NumofSample/2+1):NumofSample, function(h)rnbinom(1, DVDconstant*MeanInputNg[[g]][j]*NormFactor[h]),simplify=T)
+             if (j>=((DEIsoNumbers[g]/2)+1) & j <=DEIsoNumbers[g]) generateDataraw[[g]][j,1:(NumofSample/2)]=sapply(1:(NumofSample/2),function(h) rnbinom(1, DVDconstant*MeanInputNg[[g]][j]*NormFactor[h]),simplify=T)
+               }
+       }
+rownames(generateDataraw[[g]])=IsoNames[[g]][1:NumofIso[g]]
+MeanVector[[g]]=rowMeans(generateDataraw[[g]])
+VarVector[[g]]=apply(generateDataraw[[g]],1,var)
+MOV.post[[g]]=MeanVector[[g]]/VarVector[[g]]
+}
+
+
+### Remove MOV=NA
+generateData=generateDataraw
+for (i in 1:3) generateData[[i]]=generateData[[i]][!is.na(MOV.post[[i]]),] 
+print(paste("NA MOV's",sum(is.na(unlist(MOV.post)))))
+#tmpmean=sapply(1:9,function(i)rowMeans(generateData[[i]]))
+#tmpvar=sapply(1:9,function(i)apply(generateData[[i]],1,var))
+#source("plot_functions.R")
+#CheckSimuNg(tmpmean,tmpvar,c(-1,5),c(-1,7))
+
+
+
+
+## DE
+UseName=sapply(1:3, function(i)rownames(generateData[[i]]),simplify=F)
+TrueDE=sapply(1:3, function(i)UseName[[i]][UseName[[i]] %in% rownames(generateData[[i]])[1:DEIsoNumbers[i]]],simplify=F)
+TrueDE.unlist=do.call(c,TrueDE)
+
+TrueDELength=sapply(TrueDE,length)
+
+AtNames_Level=vector("list",4)
+AtLoc=vector("list",3)
+AtFold=vector("list",3)
+names(AtNames_Level)=c(4,6,8,10)
+
+
+for(j in 1:3){
+AtLoc[[j]]=sample(c(1:length(Conditions)), TrueDELength[j], replace=T)
+AtFold[[j]]=sample(c(4,6,8,10),TrueDELength[j], replace=T)
+
+for(i in 1:TrueDELength[j]){
+
+generateData[[j]][(TrueDELength[j]+i),AtLoc[[j]][i]]=generateData[[j]][(TrueDELength[j]+i),AtLoc[[j]][i]]*AtFold[[j]][i]
+AtNames_Level[[as.character(AtFold[[j]][i])]]=c(AtNames_Level[[as.character(AtFold[[j]][i])]],rownames(generateData[[j]])[TrueDELength[j]+i])
+}
+}
+phiuse=sapply(1:3,function(i)PhiInputNg[[i]][UseName[[i]]])
+meanuse=sapply(1:3,function(i)MeanInputNg[[i]][UseName[[i]]])
+
+#generateDataNg=list(generateData[[1]], do.call(rbind,generateData[c(2,4,6,8)]), do.call(rbind,generateData[c(3,5,7,9)]))
+generateDataNg=generateData
+
+#if(OnlyData==T){
+
+OutName=sapply(1:3,function(i)paste("Iso",i,c(1:nrow(generateDataNg[[i]])),sep="_"))
+for(i in 1:3)names(OutName[[i]])=rownames(generateDataNg[[i]])
+OutData=generateDataNg
+for(i in 1:3)rownames(OutData[[i]])=as.vector(OutName[[i]])
+OutTrueDE=as.vector(unlist(OutName)[TrueDE.unlist])
+OutAt=as.vector(unlist(OutName)[AtNames <- Level])
+
+output=list(data=OutData, TrueDE=OutTrueDE, Outliers=OutAt)
+#      return(output)
+#    }
+       }
diff --git a/EBSeq/R/Likefun.R b/EBSeq/R/Likefun.R

new file mode 100644 (file)

index 0000000..ebe05ab
--- /dev/null
+++ b/EBSeq/R/Likefun.R
@@ -0,0 +1,26 @@
+Likefun <-
+function(ParamPool, InputPool)
+{
+
+NoneZeroLength=InputPool[[5]]
+AlphaIn=ParamPool[1]
+BetaIn=ParamPool[2:(1+NoneZeroLength)]
+PIn=ParamPool[2+NoneZeroLength]
+ZIn=InputPool[[4]]
+Input=InputPool[[3]]
+Input1=matrix(InputPool[[1]],nrow=nrow(Input))
+Input2=matrix(InputPool[[2]],nrow=nrow(Input))
+RIn=InputPool[[6]]
+RInSP1=matrix(InputPool[[7]],nrow=nrow(Input))
+RInSP2=matrix(InputPool[[8]],nrow=nrow(Input))
+NumIn=InputPool[[9]]
+##Function here
+#LikelihoodFunction<- function(NoneZeroLength){
+       F0=f0(Input, AlphaIn, BetaIn, RIn, NumIn, log=T)
+       F1=f1(Input1, Input2, AlphaIn, BetaIn, RInSP1,RInSP2, NumIn, log=T)
+               F0[F0==Inf]=min(!is.na(F0[F0!=Inf]))
+               F1[F1==Inf]=min(!is.na(F1[F1!=Inf]))
+
+       -sum((1-ZIn)*F0+ (1-ZIn)* log(1-PIn) + ZIn*F1 + ZIn*log(PIn))
+}
+
diff --git a/EBSeq/R/LikefunMulti.R b/EBSeq/R/LikefunMulti.R

new file mode 100644 (file)

index 0000000..510cb1e
--- /dev/null
+++ b/EBSeq/R/LikefunMulti.R
@@ -0,0 +1,28 @@
+LikefunMulti <-
+function(ParamPool, InputPool)
+{
+
+NoneZeroLength=InputPool[[4]]
+AlphaIn=ParamPool[1]
+BetaIn=ParamPool[2:(1+NoneZeroLength)]
+PIn=ParamPool[(2+NoneZeroLength):length(ParamPool)]
+PInAll=c(1-sum(PIn),PIn)
+ZIn=InputPool[[3]]
+Input=InputPool[[2]]
+InputSP=InputPool[[1]]
+RIn=InputPool[[5]]
+RInSP=InputPool[[6]]
+NumIn=InputPool[[7]]
+AllParti=InputPool[[8]]
+PInMat=matrix(rep(1,nrow(Input)),ncol=1)%*%matrix(PInAll,nrow=1)
+##Function here
+FList=sapply(1:nrow(AllParti),function(i)sapply(1:nlevels(as.factor(AllParti[i,])),
+                                               function(j)f0(do.call(cbind,InputSP[AllParti[i,]==j]),AlphaIn, BetaIn, 
+                                       do.call(cbind,RInSP[AllParti[i,]==j]), NumIn, log=T)),
+                                               simplify=F) 
+FPartiLog=sapply(FList,rowSums)
+#FMat=exp(FPartiLog)
+FMat=FPartiLog
+-sum(ZIn*(FMat+log(PInMat)))
+}
+
diff --git a/EBSeq/R/LikefunMultiDVDP.R b/EBSeq/R/LikefunMultiDVDP.R

new file mode 100644 (file)

index 0000000..37dcc2d
--- /dev/null
+++ b/EBSeq/R/LikefunMultiDVDP.R
@@ -0,0 +1,28 @@
+LikefunMulti <-
+function(ParamPool, InputPool)
+{
+
+NoneZeroLength=InputPool[[4]]
+AlphaIn=ParamPool[1]
+BetaIn=ParamPool[2:(1+NoneZeroLength)]
+PInMat=InputPool[[9]]
+#PInAll=c(1-sum(PIn),PIn)
+ZIn=InputPool[[3]]
+Input=InputPool[[2]]
+InputSP=InputPool[[1]]
+RIn=InputPool[[5]]
+RInSP=InputPool[[6]]
+NumIn=InputPool[[7]]
+AllParti=InputPool[[8]]
+#PInMat=matrix(rep(1,nrow(Input)),ncol=1)%*%matrix(PInAll,nrow=1)
+##Function here
+FList=sapply(1:nrow(AllParti),function(i)sapply(1:nlevels(as.factor(AllParti[i,])),
+                                               function(j)f0(do.call(cbind,InputSP[AllParti[i,]==j]),AlphaIn, BetaIn, 
+                                       do.call(cbind,RInSP[AllParti[i,]==j]), NumIn, log=T)),
+                                               simplify=F) 
+FPartiLog=sapply(FList,rowSums)
+#FMat=exp(FPartiLog)
+FMat=FPartiLog
+-sum(ZIn*(FMat+log(PInMat)))
+}
+
diff --git a/EBSeq/R/LikefunMultiEMP.R b/EBSeq/R/LikefunMultiEMP.R

new file mode 100644 (file)

index 0000000..510cb1e
--- /dev/null
+++ b/EBSeq/R/LikefunMultiEMP.R
@@ -0,0 +1,28 @@
+LikefunMulti <-
+function(ParamPool, InputPool)
+{
+
+NoneZeroLength=InputPool[[4]]
+AlphaIn=ParamPool[1]
+BetaIn=ParamPool[2:(1+NoneZeroLength)]
+PIn=ParamPool[(2+NoneZeroLength):length(ParamPool)]
+PInAll=c(1-sum(PIn),PIn)
+ZIn=InputPool[[3]]
+Input=InputPool[[2]]
+InputSP=InputPool[[1]]
+RIn=InputPool[[5]]
+RInSP=InputPool[[6]]
+NumIn=InputPool[[7]]
+AllParti=InputPool[[8]]
+PInMat=matrix(rep(1,nrow(Input)),ncol=1)%*%matrix(PInAll,nrow=1)
+##Function here
+FList=sapply(1:nrow(AllParti),function(i)sapply(1:nlevels(as.factor(AllParti[i,])),
+                                               function(j)f0(do.call(cbind,InputSP[AllParti[i,]==j]),AlphaIn, BetaIn, 
+                                       do.call(cbind,RInSP[AllParti[i,]==j]), NumIn, log=T)),
+                                               simplify=F) 
+FPartiLog=sapply(FList,rowSums)
+#FMat=exp(FPartiLog)
+FMat=FPartiLog
+-sum(ZIn*(FMat+log(PInMat)))
+}
+
diff --git a/EBSeq/R/LogN.R b/EBSeq/R/LogN.R

new file mode 100644 (file)

index 0000000..7767b93
--- /dev/null
+++ b/EBSeq/R/LogN.R
@@ -0,0 +1,45 @@
+LogN <-
+function(Input, InputSP, EmpiricalR, EmpiricalRSP, NumOfEachGroup, AlphaIn, BetaIn,  PIn, NoneZeroLength)
+{
+    #2 condition case (skip the loop then maybe run faster? Code multi condition cases later)
+
+        #For each gene (m rows of Input---m genes)
+        #Save each gene's F0, F1 for further likelihood calculation. 
+  
+        #Get F0 for EE
+        F0=f0(Input,  AlphaIn, BetaIn, EmpiricalR, NumOfEachGroup, log=F)
+        #Get F1 for DE
+        F1=f1(InputSP[[1]], InputSP[[2]], AlphaIn, BetaIn, EmpiricalRSP[[1]],EmpiricalRSP[[2]], NumOfEachGroup, log=F)
+
+        #Get z
+               #Use data.list in logfunction
+        
+               z.list=PIn*F1/(PIn*F1+(1-PIn)*F0)
+               zNaNName=names(z.list)[is.na(z.list)]
+               zGood=which(!is.na(z.list))
+               ###Update P
+        #PFromZ=sapply(1:NoneZeroLength,function(i) sum(z.list[[i]])/length(z.list[[i]]))
+        PFromZ=sum(z.list[zGood])/length(z.list[zGood])
+        F0Good=F0[zGood]
+               F1Good=F1[zGood]
+               ### MLE Part ####
+        # Since we dont wanna update p and Z in this step
+        # Each Ng for one row
+               
+               NumGroupVector=rep(c(1:NoneZeroLength),NumOfEachGroup)
+               
+               NumGroupVector.zGood=NumGroupVector[zGood]
+               NumOfEachGroup.zGood=tapply(NumGroupVector.zGood,NumGroupVector.zGood,length)
+
+        StartValue=c(AlphaIn, BetaIn,PIn)
+                    
+               Result<-optim(StartValue,Likefun,InputPool=list(InputSP[[1]][zGood,],InputSP[[2]][zGood,],Input[zGood,],z.list[zGood], NoneZeroLength,EmpiricalR[zGood, ],EmpiricalRSP[[1]][zGood,], EmpiricalRSP[[2]][zGood,], NumOfEachGroup.zGood))
+        #LikeOutput=Likelihood( StartValue, Input , InputSP , PNEW.list, z.list)
+               AlphaNew= Result$par[1]
+               BetaNew=Result$par[2:(1+NoneZeroLength)]
+        PNew=Result$par[2+NoneZeroLength]
+               ##
+        Output=list(AlphaNew=AlphaNew,BetaNew=BetaNew,PNew=PNew,ZNew.list=z.list,PFromZ=PFromZ, zGood=zGood, zNaNName=zNaNName,F0Out=F0Good, F1Out=F1Good)
+        Output
+    }
+
diff --git a/EBSeq/R/LogNMulti.R b/EBSeq/R/LogNMulti.R

new file mode 100644 (file)

index 0000000..f852540
--- /dev/null
+++ b/EBSeq/R/LogNMulti.R
@@ -0,0 +1,54 @@
+LogNMulti <-
+function(Input, InputSP, EmpiricalR, EmpiricalRSP, NumOfEachGroup, AlphaIn, BetaIn,  PIn, NoneZeroLength, AllParti, Conditions)
+{
+
+        #For each gene (m rows of Input---m genes)
+        #Save each gene's F0, F1 for further likelihood calculation. 
+               FList=sapply(1:nrow(AllParti),function(i)sapply(1:nlevels(as.factor(AllParti[i,])),
+                                  function(j)f0(do.call(cbind,InputSP[AllParti[i,]==j]),AlphaIn, BetaIn, 
+                                                                        do.call(cbind,EmpiricalRSP[AllParti[i,]==j]), NumOfEachGroup, log=T)),
+                                         simplify=F) 
+               FPartiLog=sapply(FList,rowSums)
+               FMat=exp(FPartiLog)
+               rownames(FMat)=rownames(Input)
+        #Get z
+               #Use data.list in logfunction
+        PInMat=matrix(rep(1,nrow(Input)),ncol=1)%*%matrix(PIn,nrow=1)
+               FmultiP=FMat*PInMat
+               Denom=rowSums(FmultiP)
+               ZEach=apply(FmultiP,2,function(i)i/Denom)
+               zNaNName1=names(Denom)[is.na(Denom)]
+               # other NAs in LikeFun
+               LF=ZEach*(log(FmultiP))
+               zNaNMore=rownames(LF)[which(is.na(rowSums(LF)))]
+               zNaNName=unique(c(zNaNName1,zNaNMore))
+               zGood=which(!rownames(LF)%in%zNaNName)
+               ZEachGood=ZEach[zGood,]
+               ###Update P
+        PFromZ=colSums(ZEach[zGood,])/length(zGood)
+        FGood=FMat[zGood,]
+               ### MLE Part ####
+        # Since we dont wanna update p and Z in this step
+        # Each Ng for one row
+               
+               NumGroupVector=rep(c(1:NoneZeroLength),NumOfEachGroup)
+               
+               NumGroupVector.zGood=NumGroupVector[zGood]
+               NumOfEachGroup.zGood=tapply(NumGroupVector.zGood,NumGroupVector.zGood,length)
+
+        StartValue=c(AlphaIn, BetaIn,PIn[-1])
+               InputSPGood=sapply(1:length(InputSP),function(i)InputSP[[i]][zGood,],simplify=F)
+        EmpiricalRSPGood=sapply(1:length(EmpiricalRSP),function(i)EmpiricalRSP[[i]][zGood,],simplify=F)
+
+               Result<-optim(StartValue,LikefunMulti,InputPool=list(InputSPGood,Input[zGood,],ZEach[zGood,], 
+                                        NoneZeroLength,EmpiricalR[zGood, ],EmpiricalRSPGood, NumOfEachGroup.zGood, AllParti))
+               AlphaNew= Result$par[1]
+               BetaNew=Result$par[2:(1+NoneZeroLength)]
+        PNewNo1=Result$par[(2+NoneZeroLength):length(Result$par)]
+               PNew=c(1-sum(PNewNo1),PNewNo1)
+               ##
+        Output=list(AlphaNew=AlphaNew,BetaNew=BetaNew,PNew=PNew,ZEachNew=ZEach, ZEachGood=ZEachGood, 
+                                       PFromZ=PFromZ, zGood=zGood, zNaNName=zNaNName,FGood=FGood)
+        Output
+    }
+
diff --git a/EBSeq/R/LogNMultiDVDP.R b/EBSeq/R/LogNMultiDVDP.R

new file mode 100644 (file)

index 0000000..6e85a5d
--- /dev/null
+++ b/EBSeq/R/LogNMultiDVDP.R
@@ -0,0 +1,57 @@
+LogNMulti <-
+function(Input, InputSP, EmpiricalR, EmpiricalRSP, NumOfEachGroup, AlphaIn, BetaIn,  PIn, NoneZeroLength, AllParti, Conditions)
+{
+
+        #For each gene (m rows of Input---m genes)
+        #Save each gene's F0, F1 for further likelihood calculation. 
+               FList=sapply(1:nrow(AllParti),function(i)sapply(1:nlevels(as.factor(AllParti[i,])),
+                                  function(j)f0(do.call(cbind,InputSP[AllParti[i,]==j]),AlphaIn, BetaIn, 
+                                                                        do.call(cbind,EmpiricalRSP[AllParti[i,]==j]), NumOfEachGroup, log=T)),
+                                         simplify=F) 
+               FPartiLog=sapply(FList,rowSums)
+               FMat=exp(FPartiLog)
+               rownames(FMat)=rownames(Input)
+        #Get z
+               #Use data.list in logfunction
+        PInMat=matrix(rep(1,nrow(Input)),ncol=1)%*%matrix(PIn,nrow=1)
+               FmultiP=FMat*PInMat
+               Denom=rowSums(FmultiP)
+               ZEach=apply(FmultiP,2,function(i)i/Denom)
+               zNaNName1=names(Denom)[is.na(Denom)]
+               # other NAs in LikeFun
+               LF=ZEach*(log(FmultiP))
+               zNaNMore=rownames(LF)[which(is.na(rowSums(LF)))]
+               zNaNName=unique(c(zNaNName1,zNaNMore))
+               zGood=which(!rownames(LF)%in%zNaNName)
+               ZEachGood=ZEach[zGood,]
+               ###Update P
+        PFromZ=colSums(ZEach[zGood,])/length(zGood)
+        NewPInMat=matrix(rep(1,nrow(Input)),ncol=1)%*%matrix(PFromZ,nrow=1)
+               FGood=FMat[zGood,]
+               ### MLE Part ####
+        # Since we dont wanna update p and Z in this step
+        # Each Ng for one row
+               
+               NumGroupVector=rep(c(1:NoneZeroLength),NumOfEachGroup)
+               
+               NumGroupVector.zGood=NumGroupVector[zGood]
+               NumOfEachGroup.zGood=tapply(NumGroupVector.zGood,NumGroupVector.zGood,length)
+
+        StartValue=c(AlphaIn, BetaIn)
+               InputSPGood=sapply(1:length(InputSP),function(i)InputSP[[i]][zGood,],simplify=F)
+        EmpiricalRSPGood=sapply(1:length(EmpiricalRSP),function(i)EmpiricalRSP[[i]][zGood,],simplify=F)
+
+               Result<-optim(StartValue,LikefunMulti,InputPool=list(InputSPGood,Input[zGood,],ZEach[zGood,], 
+                                        NoneZeroLength,EmpiricalR[zGood, ],EmpiricalRSPGood, NumOfEachGroup.zGood, AllParti,
+                                        NewPInMat))
+               AlphaNew= Result$par[1]
+               BetaNew=Result$par[2:(1+NoneZeroLength)]
+        #PNewNo1=Result$par[(2+NoneZeroLength):length(Result$par)]
+               #PNew=c(1-sum(PNewNo1),PNewNo1)
+               PNew= PFromZ
+               ##
+        Output=list(AlphaNew=AlphaNew,BetaNew=BetaNew,PNew=PNew,ZEachNew=ZEach, ZEachGood=ZEachGood, 
+                                       PFromZ=PFromZ, zGood=zGood, zNaNName=zNaNName,FGood=FGood)
+        Output
+    }
+
diff --git a/EBSeq/R/LogNMultiEMP.R b/EBSeq/R/LogNMultiEMP.R

new file mode 100644 (file)

index 0000000..f852540
--- /dev/null
+++ b/EBSeq/R/LogNMultiEMP.R
@@ -0,0 +1,54 @@
+LogNMulti <-
+function(Input, InputSP, EmpiricalR, EmpiricalRSP, NumOfEachGroup, AlphaIn, BetaIn,  PIn, NoneZeroLength, AllParti, Conditions)
+{
+
+        #For each gene (m rows of Input---m genes)
+        #Save each gene's F0, F1 for further likelihood calculation. 
+               FList=sapply(1:nrow(AllParti),function(i)sapply(1:nlevels(as.factor(AllParti[i,])),
+                                  function(j)f0(do.call(cbind,InputSP[AllParti[i,]==j]),AlphaIn, BetaIn, 
+                                                                        do.call(cbind,EmpiricalRSP[AllParti[i,]==j]), NumOfEachGroup, log=T)),
+                                         simplify=F) 
+               FPartiLog=sapply(FList,rowSums)
+               FMat=exp(FPartiLog)
+               rownames(FMat)=rownames(Input)
+        #Get z
+               #Use data.list in logfunction
+        PInMat=matrix(rep(1,nrow(Input)),ncol=1)%*%matrix(PIn,nrow=1)
+               FmultiP=FMat*PInMat
+               Denom=rowSums(FmultiP)
+               ZEach=apply(FmultiP,2,function(i)i/Denom)
+               zNaNName1=names(Denom)[is.na(Denom)]
+               # other NAs in LikeFun
+               LF=ZEach*(log(FmultiP))
+               zNaNMore=rownames(LF)[which(is.na(rowSums(LF)))]
+               zNaNName=unique(c(zNaNName1,zNaNMore))
+               zGood=which(!rownames(LF)%in%zNaNName)
+               ZEachGood=ZEach[zGood,]
+               ###Update P
+        PFromZ=colSums(ZEach[zGood,])/length(zGood)
+        FGood=FMat[zGood,]
+               ### MLE Part ####
+        # Since we dont wanna update p and Z in this step
+        # Each Ng for one row
+               
+               NumGroupVector=rep(c(1:NoneZeroLength),NumOfEachGroup)
+               
+               NumGroupVector.zGood=NumGroupVector[zGood]
+               NumOfEachGroup.zGood=tapply(NumGroupVector.zGood,NumGroupVector.zGood,length)
+
+        StartValue=c(AlphaIn, BetaIn,PIn[-1])
+               InputSPGood=sapply(1:length(InputSP),function(i)InputSP[[i]][zGood,],simplify=F)
+        EmpiricalRSPGood=sapply(1:length(EmpiricalRSP),function(i)EmpiricalRSP[[i]][zGood,],simplify=F)
+
+               Result<-optim(StartValue,LikefunMulti,InputPool=list(InputSPGood,Input[zGood,],ZEach[zGood,], 
+                                        NoneZeroLength,EmpiricalR[zGood, ],EmpiricalRSPGood, NumOfEachGroup.zGood, AllParti))
+               AlphaNew= Result$par[1]
+               BetaNew=Result$par[2:(1+NoneZeroLength)]
+        PNewNo1=Result$par[(2+NoneZeroLength):length(Result$par)]
+               PNew=c(1-sum(PNewNo1),PNewNo1)
+               ##
+        Output=list(AlphaNew=AlphaNew,BetaNew=BetaNew,PNew=PNew,ZEachNew=ZEach, ZEachGood=ZEachGood, 
+                                       PFromZ=PFromZ, zGood=zGood, zNaNName=zNaNName,FGood=FGood)
+        Output
+    }
+
diff --git a/EBSeq/R/MedianNorm.R b/EBSeq/R/MedianNorm.R

new file mode 100644 (file)

index 0000000..44c0a4a
--- /dev/null
+++ b/EBSeq/R/MedianNorm.R
@@ -0,0 +1,5 @@
+MedianNorm=function(Data){
+
+    geomeans <- exp(rowMeans(log(Data)))
+       apply(Data, 2, function(cnts) median((cnts/geomeans)[geomeans >  0]))
+}
diff --git a/EBSeq/R/MergeGene.R b/EBSeq/R/MergeGene.R

new file mode 100644 (file)

index 0000000..4bfdaf4
--- /dev/null
+++ b/EBSeq/R/MergeGene.R
@@ -0,0 +1,107 @@
+MergeGene <-
+function(GeneSIMout, Num, Path="./"){
+NumSample=ncol(GeneSIMout[[i]]$generateData)
+
+NumGene=rep(0,Num)
+for (i in 1:Num)NumGene[i]=nrow(GeneSIMout[[i]]$generateData)
+
+MinNumGene=min(NumGene)
+AproxNumDE=length(GeneSIMout[[1]]$TrueDE)
+       
+GeneMergeTable=matrix(rep(0,12),nrow=6)
+       for(i in 1:Num)GeneMergeTable=GeneMergeTable+GeneSIMout[[i]][[1]]
+       GeneMergeTable=GeneMergeTable/Num
+       GeneMergeTable=round(GeneMergeTable,2)
+                 
+       GeneMergeDVD=rep(0,2)
+         for(i in 1:Num)GeneMergeDVD=GeneMergeDVD+GeneSIMout[[i]][[3]]
+                 GeneMergeDVD=round(GeneMergeDVD/Num,2) 
+                                         
+         GeneMergePhi=matrix(rep(0,2),nrow=2)
+                 for(i in 1:Num)GeneMergePhi=GeneMergePhi+GeneSIMout[[i]][[4]]
+                         GeneMergePhi=round(GeneMergePhi/Num,2)
+## Write
+TXTname=paste(paste(Path,paste("Gene","DVD",GeneMergeDVD[1], GeneMergeDVD[2],"Phi",GeneMergePhi[1], GeneMergePhi[2],"Sample",NumSample,sep="_"),sep=""),".txt",sep="")
+write.table(GeneMergeTable, file=TXTname)
+
+
+####### Note everytime # DE genes and # total genes may different. (since NA issue)
+  GeneMergeFD=matrix(rep(0,5*MinNumGene),ncol=5)
+  GeneMergeFD.p=matrix(rep(0,5*MinNumGene),ncol=5)
+  GeneMergeTP.p=matrix(rep(0,5*MinNumGene),ncol=5)
+  GeneMergeFN.p=matrix(rep(0,5*MinNumGene),ncol=5)
+  GeneMergeTN.p=matrix(rep(0,5*MinNumGene),ncol=5)
+
+  GeneMergeFDR=matrix(rep(0,5*MinNumGene),ncol=5)
+  GeneMergeTPR=matrix(rep(0,5*MinNumGene),ncol=5)
+  GeneMergeFPR=matrix(rep(0,5*MinNumGene),ncol=5)
+
+
+  for(i in 1:Num){
+       # Make sure names in the same order
+       # Get FD number for each number of genes found
+    TotalNum=nrow(GeneSIMout[[i]]$generateData)
+       NumDE=length(GeneSIMout[[i]]$TrueDE)
+       EBSeqNames=names(GeneSIMout[[i]]$EBSeqPP)
+    tmpMatrix=cbind(GeneSIMout[[i]]$DESeqP[EBSeqNames],GeneSIMout[[i]]$edgeRP[EBSeqNames], exp(GeneSIMout[[i]]$BaySeqPP[EBSeqNames,2]),GeneSIMout[[i]]$BBSeqP[EBSeqNames],GeneSIMout[[i]]$EBSeqPP)
+       # Bayseq and EBseq are PP. Others are p value 
+    tmpFD=TopCts(tmpMatrix, c(0,0,1,0,1), GeneSIMout[[i]]$TrueDE[GeneSIMout[[i]]$TrueDE%in%EBSeqNames], MinNumGene)
+    # Get percentage for FP, TP, TN, FN!
+       tmpFD.p=tmpFD/TotalNum
+       # TP = Find - FD
+       tmpTP.p=(c(1:MinNumGene)-tmpFD)/TotalNum
+       # FN = TrueDE - TP
+       tmpFN.p=NumDE/TotalNum - tmpTP.p
+       # TN = TrueEE - FD
+       tmpTN.p=(TotalNum-NumDE)/TotalNum - tmpFD.p
+       
+       
+       tmpFDR=tmpFD.p/(tmpFD.p+tmpTP.p)
+       tmpFPR=tmpFD.p/(tmpFD.p+tmpTN.p)
+       tmpTPR=tmpTP.p/(tmpFN.p+tmpTP.p)
+       GeneMergeFDR=GeneMergeFDR+tmpFDR
+       GeneMergeTPR=GeneMergeTPR+tmpTPR
+       GeneMergeFPR=GeneMergeFPR+tmpFPR
+
+    GeneMergeFD.p=GeneMergeFD.p+tmpFD.p
+       GeneMergeTP.p=GeneMergeTP.p+tmpTP.p
+       GeneMergeFN.p=GeneMergeFN.p+tmpFN.p
+       GeneMergeTN.p=GeneMergeTN.p+tmpTN.p
+
+       GeneMergeFD=GeneMergeFD+tmpFD
+ }   
+  GeneMergeFD=GeneMergeFD/Num
+  GeneMergeFD.p=GeneMergeFD.p/Num
+  GeneMergeTP.p=GeneMergeTP.p/Num
+  GeneMergeFN.p=GeneMergeFN.p/Num
+  GeneMergeTN.p=GeneMergeTN.p/Num
+
+  GeneMergeFDR=GeneMergeFDR/Num
+  GeneMergeTPR=GeneMergeTPR/Num
+  GeneMergeFPR=GeneMergeFPR/Num
+
+
+PlotTopName=paste(paste(Path,paste("Top","Gene","DVD",GeneMergeDVD[1], GeneMergeDVD[2],"Phi",GeneMergePhi[1], GeneMergePhi[2],"Sample",NumSample, sep="_"),sep=""),".pdf",sep="")
+
+TrueDELength=length(GeneSIMout[[i]]$TrueDE[GeneSIMout[[i]]$TrueDE%in%EBSeqNames])
+pdf(PlotTopName)
+  PlotTopCts(TrueDELength,GeneMergeFD[1:TrueDELength,],c("DESeq","edgeR","BaySeq","BBSeq","EBSeq"))
+dev.off()
+
+
+PlotFDName=paste(paste(Path,paste("FDTP","Gene","DVD",GeneMergeDVD[1], GeneMergeDVD[2],"Phi",GeneMergePhi[1], GeneMergePhi[2],"Sample",NumSample,sep="_"),sep=""),".pdf",sep="")
+pdf(PlotFDName)
+  PlotFDTP(MinNumGene,GeneMergeFDR, GeneMergeTPR, c("DESeq","edgeR","BaySeq","BBSeq","EBSeq"))
+dev.off()
+
+PlotFPName=paste(paste(Path,paste("FPRTP","Gene","DVD",GeneMergeDVD[1], GeneMergeDVD[2],"Phi",GeneMergePhi[1], GeneMergePhi[2],"Sample",NumSample,sep="_"),sep=""),".pdf",sep="")
+pdf(PlotFPName)
+  PlotFPTP(MinNumGene,GeneMergeFPR, GeneMergeTPR, c("DESeq","edgeR","BaySeq","BBSeq","EBSeq"))
+  dev.off()
+
+
+out=list(GeneMergeTable=GeneMergeTable, GeneMergeDVD=GeneMergeDVD, GeneMergePhi=GeneMergePhi, GeneMergeFD=GeneMergeFD)
+
+
+}
+
diff --git a/EBSeq/R/MergeIso.R b/EBSeq/R/MergeIso.R

new file mode 100644 (file)

index 0000000..50a0784
--- /dev/null
+++ b/EBSeq/R/MergeIso.R
@@ -0,0 +1,103 @@
+MergeIso <-
+function(IsoSIMout, Num, Path="./"){
+NumSample=ncol(do.call(rbind, IsoSIMout[[i]]$generateData))
+
+NumIso=rep(0,Num)
+for (i in 1:Num)NumIso[i]=nrow(do.call(rbind, IsoSIMout[[i]]$generateData))
+
+MinNumIso=min(NumIso)
+AproxNumDE=length(unlist(IsoSIMout[[1]]$TrueDE))
+       
+IsoMergeTable=matrix(rep(0,60),nrow=10)
+       for(i in 1:Num)IsoMergeTable=IsoMergeTable+IsoSIMout[[i]][[1]]
+       IsoMergeTable=IsoMergeTable/Num
+       IsoMergeTable=round(IsoMergeTable,2)
+                 
+       IsoMergeDVD=rep(0,2)
+         for(i in 1:Num)IsoMergeDVD=IsoMergeDVD+IsoSIMout[[i]][[3]]
+                 IsoMergeDVD=round(IsoMergeDVD/Num,2) 
+                                         
+         IsoMergePhi=matrix(rep(0,18),nrow=2)
+                 for(i in 1:Num)IsoMergePhi=IsoMergePhi+IsoSIMout[[i]][[4]]
+                         IsoMergePhi=round(IsoMergePhi/Num,2)
+## Write
+TXTname=paste(paste("../IsoOutput/",paste("Iso","DVD",IsoMergeDVD[1], IsoMergeDVD[2],"Sample",NumSample,sep="_"),sep=""),".txt",sep="")
+write.table(IsoMergeTable, file=TXTname)
+
+
+####### Note everytime # DE genes and # total genes may different. (since NA issue)
+  IsoMergeFD=matrix(rep(0,5*MinNumIso),ncol=5)
+  IsoMergeFD.p=matrix(rep(0,5*MinNumIso),ncol=5)
+  IsoMergeTP.p=matrix(rep(0,5*MinNumIso),ncol=5)
+  IsoMergeFN.p=matrix(rep(0,5*MinNumIso),ncol=5)
+  IsoMergeTN.p=matrix(rep(0,5*MinNumIso),ncol=5)
+  IsoMergeFDR=matrix(rep(0,5*MinNumIso),ncol=5)
+  IsoMergeTPR=matrix(rep(0,5*MinNumIso),ncol=5)
+  IsoMergeFPR=matrix(rep(0,5*MinNumIso),ncol=5)
+
+  for(i in 1:Num){
+       # Make sure names in the same order
+       # Get FD number for each number of genes found
+       # columns are samples 
+    TotalNum=nrow(do.call(rbind, IsoSIMout[[i]]$generateData))
+       NumDE=length(unlist(IsoSIMout[[i]]$TrueDE))
+       EBSeqNames=names(IsoSIMout[[i]]$EBSeqPP)
+    tmpMatrix=cbind(IsoSIMout[[i]]$DESeqP[EBSeqNames],IsoSIMout[[i]]$edgeRP[EBSeqNames], exp(IsoSIMout[[i]]$BaySeqPP[EBSeqNames,2]),IsoSIMout[[i]]$BBSeqP[EBSeqNames],IsoSIMout[[i]]$EBSeqPP)
+       # Bayseq and EBseq are PP. Others are p value 
+    tmpFD=TopCts(tmpMatrix, c(0,0,1,0,1), unlist(IsoSIMout[[i]]$TrueDE)[unlist(IsoSIMout[[i]]$TrueDE)%in%EBSeqNames], MinNumIso)
+    # Get percentage for FP, TP, TN, FN!
+       tmpFD.p=tmpFD/TotalNum
+       # TP = Find - FD
+       tmpTP.p=(outer(c(1:MinNumIso),rep(1,5))-tmpFD)/TotalNum
+       # FN = TrueDE - TP
+       tmpFN.p=NumDE/TotalNum - tmpTP.p
+       # TN = TrueEE - FD
+       tmpTN.p=(TotalNum-NumDE)/TotalNum - tmpFD.p
+       
+       tmpFDR=tmpFD.p/(tmpFD.p+tmpTP.p)
+       tmpFPR=tmpFD.p/(tmpFD.p+tmpTN.p)
+       tmpTPR=tmpTP.p/(tmpFN.p+tmpTP.p)
+       IsoMergeFDR=IsoMergeFDR+tmpFDR
+       IsoMergeTPR=IsoMergeTPR+tmpTPR
+       IsoMergeFPR=IsoMergeFPR+tmpFPR
+
+    IsoMergeFD.p=IsoMergeFD.p+tmpFD.p
+       IsoMergeTP.p=IsoMergeTP.p+tmpTP.p
+       IsoMergeFN.p=IsoMergeFN.p+tmpFN.p
+       IsoMergeTN.p=IsoMergeTN.p+tmpTN.p
+
+       IsoMergeFD=IsoMergeFD+tmpFD
+ }   
+  IsoMergeFD=IsoMergeFD/Num
+  IsoMergeFD.p=IsoMergeFD.p/Num
+  IsoMergeTP.p=IsoMergeTP.p/Num
+  IsoMergeFN.p=IsoMergeFN.p/Num
+  IsoMergeTN.p=IsoMergeTN.p/Num
+  IsoMergeFDR=IsoMergeFDR/Num
+  IsoMergeTPR=IsoMergeTPR/Num
+  IsoMergeFPR=IsoMergeFPR/Num
+
+PlotTopName=paste(paste(Path,paste("Top","Iso","DVD",IsoMergeDVD[1], IsoMergeDVD[2],"Sample",NumSample, sep="_"),sep=""),".pdf",sep="")
+
+TrueDELength=length(unlist(IsoSIMout[[i]]$TrueDE)[unlist(IsoSIMout[[i]]$TrueDE)%in%EBSeqNames])
+pdf(PlotTopName)
+  PlotTopCts(TrueDELength,IsoMergeFD[1:TrueDELength,],c("DESeq","edgeR","BaySeq","BBSeq","EBSeq"))
+dev.off()
+
+
+PlotFDName=paste(paste(Path,paste("FDTP","Iso","DVD",IsoMergeDVD[1], IsoMergeDVD[2],"Sample",NumSample,sep="_"),sep=""),".pdf",sep="")
+pdf(PlotFDName)
+  PlotFDTP(MinNumIso,IsoMergeFDR, IsoMergeTPR, c("DESeq","edgeR","BaySeq","BBSeq","EBSeq"))
+dev.off()
+
+PlotFPName=paste(paste(Path,paste("FPRTP","Iso","DVD",IsoMergeDVD[1], IsoMergeDVD[2],"Sample",NumSample,sep="_"),sep=""),".pdf",sep="")
+pdf(PlotFPName)
+  PlotFPTP(MinNumIso,IsoMergeFPR, IsoMergeTPR, c("DESeq","edgeR","BaySeq","BBSeq","EBSeq"))
+  dev.off()
+
+
+out=list(IsoMergeTable=IsoMergeTable, IsoMergeDVD=IsoMergeDVD, IsoMergePhi=IsoMergePhi, IsoMergeFD=IsoMergeFD)
+
+
+}
+
diff --git a/EBSeq/R/PlotFDTP.R b/EBSeq/R/PlotFDTP.R

new file mode 100644 (file)

index 0000000..ce029a3
--- /dev/null
+++ b/EBSeq/R/PlotFDTP.R
@@ -0,0 +1,10 @@
+PlotFDTP <-
+function(TopNum, FDR, TPR,names)
+{
+  
+  matplot(FDR, TPR, xlim=c(0,.5), ylim=c(0,1) ,type="l",lwd=2,xlab="FDR", ylab="TPR")
+    legend("bottomright",col=1:TopNum, lwd=2, lty=1:TopNum, names)
+
+
+}
+
diff --git a/EBSeq/R/PlotFPTP.R b/EBSeq/R/PlotFPTP.R

new file mode 100644 (file)

index 0000000..ed8fdb3
--- /dev/null
+++ b/EBSeq/R/PlotFPTP.R
@@ -0,0 +1,10 @@
+PlotFPTP <-
+function(TopNum, FPR, TPR,names)
+{
+        
+         matplot(FPR, TPR,xlim=c(0,.1), ylim=c(0,1) ,type="l",lwd=2, xlab="FPR", ylab="TPR")
+             legend("bottomright",col=1:TopNum,lwd=2, lty=1:TopNum, names)
+
+
+}
+
diff --git a/EBSeq/R/PlotPattern.R b/EBSeq/R/PlotPattern.R

new file mode 100644 (file)

index 0000000..61f93af
--- /dev/null
+++ b/EBSeq/R/PlotPattern.R
@@ -0,0 +1,7 @@
+PlotPattern<-function(Patterns){
+       par(oma=c(3,3,3,3))
+       PatternCol=rainbow(ncol(Patterns))
+       heatmap(Patterns,col=PatternCol,Colv=NA,Rowv=NA,scale="none")
+
+}
+
diff --git a/EBSeq/R/PlotTopCts.R b/EBSeq/R/PlotTopCts.R

new file mode 100644 (file)

index 0000000..cb502d0
--- /dev/null
+++ b/EBSeq/R/PlotTopCts.R
@@ -0,0 +1,8 @@
+PlotTopCts <-
+function(TopNum, FD, names)
+{
+    matplot(c(1:TopNum) , FD,type="l",xlab="Top DE selected", lwd=2, log="y", ylab="FD")
+    legend("topleft",col=1:TopNum, lwd=2, lty=1:TopNum, names)
+
+}
+
diff --git a/EBSeq/R/PolyFitPlot.R b/EBSeq/R/PolyFitPlot.R

new file mode 100644 (file)

index 0000000..59fd29c
--- /dev/null
+++ b/EBSeq/R/PolyFitPlot.R
@@ -0,0 +1,44 @@
+PolyFitPlot <-
+function(X , Y , nterms , xname="Estimated Mean", yname="Estimated Var", pdfname="", xlim=c(-1,5), ylim=c(-1,7), ChangeXY=F,col="red"){
+       
+       b=rep(NA,nterms)
+       logX=matrix(rep(X, nterms),ncol=nterms, byrow=T)
+       for (i in 1:nterms)
+               logX[,i]=(log10(X))^i
+       colnames(logX)=paste("logmu^",c(1:nterms))
+       rownames(logX)=names(X)
+       NotUse=c(names(X)[X==0],names(Y)[Y==0],names(X)[rowMeans(logX)==-Inf],names(X)[rowMeans(logX)==Inf])
+       Use=names(X[!names(X)%in%NotUse])
+       Lm=lm(log10(Y[Use])~logX[Use,1:nterms])
+       b=summary(Lm)$coefficients[2:(nterms+1),1]
+       d=summary(Lm)$coefficients[1,1]
+       bvec=matrix(rep(b,length(X)),ncol=nterms,byrow=T)
+       fit=rowSums(logX*bvec)+d
+       main2=NULL
+       if (ChangeXY==T){
+               X.plot=log10(Y)
+               Y.plot=log10(X)
+               fit.X.plot=fit
+               fit.Y.plot=log10(X)
+       }
+       else{
+        X.plot=log10(X)
+        Y.plot=log10(Y)
+           fit.X.plot=log10(X)
+               fit.Y.plot=fit
+                                  }
+
+       for (i in 1:nterms)
+               main2=paste(main2,round(b[i],2),"*log(",xname,")^",i,"+")
+       main=pdfname
+       
+       smoothScatter(X.plot, Y.plot ,main=main,xlim=xlim,ylim=ylim,xlab=xname,ylab=yname,axes=F)
+       axis(1,at=seq(xlim[1],xlim[2],by=1), 10^seq(xlim[1],xlim[2],by=1))
+       axis(2,at=seq(ylim[1],ylim[2],by=2), 10^seq(ylim[1],ylim[2],by=2))
+       Sortit=order(fit.X.plot)
+       lines(fit.X.plot[Sortit],fit.Y.plot[Sortit],col=col,lwd=3)
+       output=list(b=b,d=d,lm=Lm,fit=fit,sort=Sortit)
+       names(output$b)=paste(xname,"^",c(1:length(output$b)))
+       output
+}
+
diff --git a/EBSeq/R/PoolMatrix.R b/EBSeq/R/PoolMatrix.R

new file mode 100644 (file)

index 0000000..4c80785
--- /dev/null
+++ b/EBSeq/R/PoolMatrix.R
@@ -0,0 +1,25 @@
+PoolMatrix <-
+function(Data,reads,type)
+{
+poolnames=names(Data)
+poolM=NULL
+for (po in 1:8)
+       poolM=cbind(poolM,Data[[po]][,1])
+rownames(poolM)=rownames(Data[[1]])
+colnames(poolM)=poolnames
+
+#poolValue=poolM*reads
+poolValue=poolM
+for (col in 1:8)
+       poolValue[,col]=poolM[,col]*reads[col]
+poolValue=round(poolValue)
+if (type=="G")
+       {
+               poolM=cbind(Data[[1]][,2],poolM)
+               poolValue=cbind(Data[[1]][,2],poolValue)
+               colnames(poolM)=c("Groups",poolnames)
+               colnames(poolValue)=c("Groups",poolnames)
+       }
+poolOutput=list(poolM=poolM,poolValue=poolValue)
+}
+
diff --git a/EBSeq/R/PostFC.R b/EBSeq/R/PostFC.R

new file mode 100644 (file)

index 0000000..1ef2669
--- /dev/null
+++ b/EBSeq/R/PostFC.R
@@ -0,0 +1,28 @@
+PostFC=function(EBoutput) {
+       GeneRealMeanC1=unlist(EBoutput$C1Mean)
+       GeneRealMeanC2=unlist(EBoutput$C2Mean)
+       GeneRealMean=(GeneRealMeanC1+GeneRealMeanC2)/2
+
+       GeneRealFC=GeneRealMeanC1/GeneRealMeanC2
+
+       GeneR=unlist(EBoutput$RList)
+       GeneR[GeneR<=0 | is.na(GeneR)]=GeneRealMean[GeneR<=0 | is.na(GeneR)]*.99/.01
+
+       GeneAlpha=EBoutput[[1]][nrow(EBoutput[[1]]),]
+       GeneBeta=unlist(sapply(1:length(EBoutput$C1Mean),function(i)rep(EBoutput[[2]][nrow(EBoutput[[1]]),i],length(EBoutput$C1Mean[[i]]))))
+       GeneBeta=as.vector(GeneBeta)
+       # Post alpha = alpha + r_C1 * 3
+       # Post beta = beta + Mean_C1 * 3
+       # Post Mean of q in C1 P_q_C1= P_a/ (P_a + P_b)
+       # Post FC = (1-p_q_c1)/p_q_c1 /( (1-p_q_c2)/p_q_c2)
+
+       GenePostAlpha=GeneAlpha+3*GeneR
+       GenePostBetaC1=GeneBeta+3*GeneRealMeanC1
+       GenePostBetaC2=GeneBeta+3*GeneRealMeanC2
+       GenePostQC1=GenePostAlpha/(GenePostAlpha+GenePostBetaC1)
+       GenePostQC2=GenePostAlpha/(GenePostAlpha+GenePostBetaC2)
+
+       GenePostFC=((1-GenePostQC1)/(1-GenePostQC2))*(GenePostQC2/GenePostQC1)
+       Out=list(GenePostFC=GenePostFC, GeneRealFC=GeneRealFC)
+
+}
diff --git a/EBSeq/R/QQP.R b/EBSeq/R/QQP.R

new file mode 100644 (file)

index 0000000..686aa92
--- /dev/null
+++ b/EBSeq/R/QQP.R
@@ -0,0 +1,14 @@
+QQP <-
+function(QList,AlphaResult,BetaResult,name,AList="F",GroupName){
+       
+                   for (i in 1:length(BetaResult)){
+                               tmpSize=length(QList[[i]][QList[[i]]<1 & !is.na(QList[[i]])])
+                       if (AList=="F") rdpts=rbeta(tmpSize,AlphaResult,BetaResult[i])
+                               else rdpts=rbeta(tmpSize,AlphaResult[i],BetaResult[i])
+       qqplot(QList[[i]][QList[[i]]<1], rdpts,xlab="estimated q's", ylab="simulated q's from fitted beta",main=paste(name,GroupName[i],sep=" "),xlim=c(0,1),ylim=c(0,1))
+       fit=lm(sort(rdpts)~sort(QList[[i]][QList[[i]]<1  & !is.na(QList[[i]])]))
+       abline(fit,col="red")
+       
+                       }
+}
+
diff --git a/EBSeq/R/QuantileNorm.R b/EBSeq/R/QuantileNorm.R

new file mode 100644 (file)

index 0000000..a4e49bd
--- /dev/null
+++ b/EBSeq/R/QuantileNorm.R
@@ -0,0 +1,8 @@
+
+QuantileNorm=function(Data, Quantile){
+       #SortData=apply(Data, 2, sort)
+       QtilePt=apply(Data, 2, function(i)quantile(i, Quantile))
+       Size= QtilePt * prod(QtilePt) ^ (-1/ncol(Data))
+       Size
+       }
+
diff --git a/EBSeq/R/RankNorm.R b/EBSeq/R/RankNorm.R

new file mode 100644 (file)

index 0000000..e3b0177
--- /dev/null
+++ b/EBSeq/R/RankNorm.R
@@ -0,0 +1,11 @@
+
+RankNorm=function(Data){
+       RankData=apply(Data, 2, rank)
+       SortData=apply(Data, 2, sort)
+       SortMean=rowMeans(SortData)
+       SortMean[SortMean==0]=1
+       NormMatrix=sapply(1:ncol(Data), function(i)Data[,i]/(SortMean[RankData[,i]]))
+       NormMatrix[NormMatrix==0]=1
+       NormMatrix
+       }
+
diff --git a/EBSeq/R/TPFDRplot.R b/EBSeq/R/TPFDRplot.R

new file mode 100644 (file)

index 0000000..9304330
--- /dev/null
+++ b/EBSeq/R/TPFDRplot.R
@@ -0,0 +1,39 @@
+TPFDRplot <-
+function(DESeqP, EBZ, TrueDE, main, FDR=NULL){
+       Seq=seq(0.001,0.5,by=0.001)
+       DETPR=rep(0,length(Seq))
+       EBTPR=rep(0,length(Seq))
+       DEFDR=rep(0,length(Seq))
+       EBFDR=rep(0,length(Seq))
+       DETPNum=rep(0,length(Seq))
+    EBTPNum=rep(0,length(Seq))
+    DEFDNum=rep(0,length(Seq))
+    EBFDNum=rep(0,length(Seq))
+       for (i in 1:length(Seq)){
+               DESeqOnes=names(DESeqP)[DESeqP<=Seq[i]]
+               if (length(FDR)==0) EBOnes=names(EBZ)[EBZ>=crit.fun(1-EBZ, Seq[i])]
+               else if (FDR=="H") EBOnes=names(EBZ)[EBZ>=(1-Seq[i])]
+                       else EBOnes=names(EBZ)[EBZ>=FDR[i]]
+
+               DETPNum[i]=sum(DESeqOnes%in%TrueDE)
+               EBTPNum[i]=sum(EBOnes%in%TrueDE)
+               DEFDNum[i]=sum(!DESeqOnes%in%TrueDE)
+               EBFDNum[i]=sum(!EBOnes%in%TrueDE)
+               
+               DETPR[i]=DETPNum[i]/length(TrueDE)
+               EBTPR[i]=EBTPNum[i]/length(TrueDE)
+               DEFDR[i]=DEFDNum[i]/length(TrueDE)
+               EBFDR[i]=EBFDNum[i]/length(TrueDE)
+       }
+       plot(Seq,DETPR,ylim=c(0,1),xlim=c(0,.5),type="l",col="red", main=paste(main, "TPR"),xlab="controled FDR level", ylab="TPR",lwd=2)
+       lines(Seq,EBTPR,col="blue",lwd=2)
+       legend("bottomright",lwd=2, col=c("red","blue"), c("DESeq","EBSeq"))
+
+       plot(Seq,DEFDR,ylim=c(0,1),xlim=c(0,.5),type="l",col="red", main=paste(main, "FDR"),xlab="controled FDR level", ylab="TPR",lwd=2)
+       lines(Seq,EBFDR,col="blue",lwd=2)
+       legend("topleft", lwd=2, col=c("red","blue"), c("DESeq","EBSeq"))
+
+
+       output=cbind( DETPR,EBTPR, DEFDR,EBFDR,DETPNum,EBTPNum,DEFDNum,EBFDNum)
+}
+
diff --git a/EBSeq/R/TopCts.R b/EBSeq/R/TopCts.R

new file mode 100644 (file)

index 0000000..137977c
--- /dev/null
+++ b/EBSeq/R/TopCts.R
@@ -0,0 +1,23 @@
+TopCts <-
+function(pvalue, PP=NULL, TrueNames, TopNum){
+       NumOfMethods=ncol(pvalue)
+       puse=pvalue
+       if(1%in%PP)puse[,PP==1]=1-pvalue[,PP==1]
+       #puse.list=data.frame(puse)
+       FD=matrix(rep(0,NumOfMethods*TopNum),ncol=NumOfMethods)
+#      Rank=apply(puse,2,rank)
+#      for(i in 1:TopNum)
+#              FD[i,]=sapply(1:NumOfMethods, function(j)sum(!rownames(Rank)[Rank[,j]<=i]%in%TrueNames))        
+#      FD=sapply(1:TopNum, function(i)sapply(1:NumOfMethods, function(j)sum(!rownames(Rank)[Rank[,j]<=i]%in%TrueNames)))
+       for (s in 1:NumOfMethods){
+               tmp=puse[,s]
+               names(tmp)=rownames(puse)
+               sorttmp=sort(tmp)
+               for( c in 2:TopNum)
+                       FD[c, s]=FD[(c-1),s]+as.numeric(!names(sorttmp)[c]%in%TrueNames)
+       }
+       FD
+       #matplot(TopNum,FD,type="l",ylim=c(0,1),xlab="Top DE selected", ylab="FDR")
+       #legend("rightbottom",col=1:TopNum, lty=1:TopNum, names)
+       }
+
diff --git a/EBSeq/R/beta.mom.R b/EBSeq/R/beta.mom.R

new file mode 100644 (file)

index 0000000..269996d
--- /dev/null
+++ b/EBSeq/R/beta.mom.R
@@ -0,0 +1,10 @@
+beta.mom <-
+function(qs.in){
+       xbar<-mean(qs.in)
+       s2<-var(qs.in)
+       term<-(xbar*(1-xbar))/s2
+       alpha.hat<-xbar*(term-1)
+       beta.hat<-(1-xbar)*(term-1)
+       return(c(alpha.hat,beta.hat))
+}
+
diff --git a/EBSeq/R/crit_fun.R b/EBSeq/R/crit_fun.R

new file mode 100644 (file)

index 0000000..e68ed4b
--- /dev/null
+++ b/EBSeq/R/crit_fun.R
@@ -0,0 +1,15 @@
+crit_fun<-function (PPEE, thre) 
+{
+    y <- cumsum(sort(PPEE))/(1:length(PPEE))
+    mm <- y < thre
+    index <- sum(mm)
+    if (index > 0) {
+        out <- 1 - sort(PPEE)[index]
+           }           
+    if (index == 0) {
+                       out <- 1
+                                   }
+    names(out) <- NULL
+    return(out)
+}
+
diff --git a/EBSeq/R/f0.R b/EBSeq/R/f0.R

new file mode 100644 (file)

index 0000000..a3ec550
--- /dev/null
+++ b/EBSeq/R/f0.R
@@ -0,0 +1,22 @@
+f0 <-
+function(Input, AlphaIn, BetaIn, EmpiricalR, NumOfGroups, log)
+{      
+                
+               BetaVect=do.call(c,sapply(1:length(BetaIn),function(i)rep(BetaIn[i],NumOfGroups[i]),simplify=F))
+               SampleNum=dim(Input)[2]
+               #Product part
+               ChooseParam1=round(Input+EmpiricalR-1)
+               roundInput=round(Input)
+               EachChoose=sapply(1:SampleNum, function(i)lchoose(ChooseParam1[,i], roundInput[,i]))
+               
+               SumEachIso=rowSums(Input)
+               param1=AlphaIn + rowSums(EmpiricalR)
+               param2=BetaVect + SumEachIso
+               LogConst=rowSums(EachChoose)+lbeta(param1, param2)-lbeta(AlphaIn, BetaVect)
+
+
+               if (log==F) FinalResult=exp(LogConst)
+               if (log==T) FinalResult=LogConst
+    FinalResult
+}
+
diff --git a/EBSeq/R/f1.R b/EBSeq/R/f1.R

new file mode 100644 (file)

index 0000000..1f160a0
--- /dev/null
+++ b/EBSeq/R/f1.R
@@ -0,0 +1,10 @@
+f1 <-
+function(Input1, Input2, AlphaIn, BetaIn, EmpiricalRSP1,EmpiricalRSP2,NumOfGroup, log){
+       F0.1=f0(Input1, AlphaIn, BetaIn, EmpiricalRSP1, NumOfGroup, log=log)
+       F0.2=f0(Input2, AlphaIn, BetaIn, EmpiricalRSP2, NumOfGroup, log=log)
+       
+       if (log==F) Result=F0.1*F0.2
+       if (log==T) Result=F0.1+F0.2
+       Result
+}
+
diff --git a/EBSeq/data/GeneEBresultGouldBart2.rda b/EBSeq/data/GeneEBresultGouldBart2.rda

new file mode 100644 (file)

index 0000000..ab7963c

Binary files /dev/null and b/EBSeq/data/GeneEBresultGouldBart2.rda differ
diff --git a/EBSeq/data/GeneMat.rda b/EBSeq/data/GeneMat.rda

new file mode 100644 (file)

index 0000000..1a974cd

Binary files /dev/null and b/EBSeq/data/GeneMat.rda differ
diff --git a/EBSeq/data/IsoEBresultGouldBart2.rda b/EBSeq/data/IsoEBresultGouldBart2.rda

new file mode 100644 (file)

index 0000000..eb136d9

Binary files /dev/null and b/EBSeq/data/IsoEBresultGouldBart2.rda differ
diff --git a/EBSeq/data/IsoList.rda b/EBSeq/data/IsoList.rda

new file mode 100644 (file)

index 0000000..29fbedb

Binary files /dev/null and b/EBSeq/data/IsoList.rda differ
diff --git a/EBSeq/data/MultiGeneMat.rda b/EBSeq/data/MultiGeneMat.rda

new file mode 100644 (file)

index 0000000..b715267

Binary files /dev/null and b/EBSeq/data/MultiGeneMat.rda differ
diff --git a/EBSeq/data/datalist b/EBSeq/data/datalist

new file mode 100644 (file)

index 0000000..70188ff
--- /dev/null
+++ b/EBSeq/data/datalist
@@ -0,0 +1,5 @@
+GeneEBresultGouldBart2
+GeneMat
+IsoEBresultGouldBart2
+IsoList
+MultiGeneMat
diff --git a/EBSeq/demo/EBSeq.R b/EBSeq/demo/EBSeq.R

new file mode 100644 (file)

index 0000000..4ffc4a4
--- /dev/null
+++ b/EBSeq/demo/EBSeq.R
@@ -0,0 +1,110 @@
+library(EBSeq)
+set.seed(13)
+
+# Section 3.1
+
+GeneGenerate=GeneSimu(DVDconstant=4, DVDqt1=NULL, DVDqt2=NULL,
+  Conditions=rep(c(1,2),each=5), NumofSample=10, NumofGene=10000,
+  DEGeneProp=.1, Phiconstant=NULL, Phi.qt1=.1, Phi.qt2=.9,
+  Meanconstant=NULL, OnlyData=T)
+GeneData=GeneGenerate$data
+GeneTrueDENames=GeneGenerate$TrueDE
+str(GeneData)
+str(GeneTrueDENames)
+
+Sizes=MedianNorm(GeneData)
+
+EBres=EBTest(Data=GeneData, 
+  Conditions=as.factor(rep(c(1,2),each=5)),sizeFactors=Sizes, maxround=5)
+
+PP=GetPP(EBres)
+str(PP)
+DEfound=names(PP)[which(PP>=.95)]
+str(DEfound)
+sum(DEfound%in%GeneTrueDENames)
+
+QQP(QList=EBres$QList1, AlphaResult=EBres[[1]][5,1], 
+  BetaResult=EBres[[2]][5,1], name="Gene Simulation", AList="F", GroupName=NULL)
+DenNHist(QList=EBres$QList1, Alpha=EBres[[1]][5,1], Beta=EBres[[2]][5,1], 
+  name="Gene Simulation", AList="F", GroupName=NULL)
+
+# Section 3.2
+
+IsoGenerate=IsoSimu(DVDconstant=NULL, DVDqt1=.97, DVDqt2=.98, 
+  Conditions=as.factor(rep(c(1,2),each=5)), NumofSample=10, 
+  NumofIso=c(1000,2000,3000), DEIsoProp=.1, Phiconstant=NULL, 
+  Phi.qt1=.25, Phi.qt2=.75, OnlyData=T )
+str(IsoGenerate)
+
+IsoMat=do.call(rbind,IsoGenerate$data)
+str(IsoMat)
+
+IsoSizes=MedianNorm(IsoMat)
+
+IsoNames=rownames(IsoMat)
+str(IsoNames)
+GeneNames=paste("Gene",c(1:3000),sep="_")
+IsosGeneNames=c(GeneNames[1:1000],rep(GeneNames[1001:2000],each=2),
+  rep(GeneNames[2001:3000],each=3))
+NgList=GetNg(IsoNames, IsosGeneNames)
+IsoNgTrun=NgList$IsoformNgTrun
+IsoNgTrun[c(1:3,1001:1003,3001:3003)]
+
+IsoEBres=EBTest(Data=IsoMat, NgVector=IsoNgTrun, 
+  Conditions=as.factor(rep(c(1,2),each=5)),sizeFactors=IsoSizes, maxround=5)
+IsoPP=GetPP(IsoEBres)
+str(IsoPP)
+IsoDE=IsoPP[which(IsoPP>=.95)]
+str(IsoDE)
+sum(names(IsoDE)%in%IsoGenerate$TrueDE)
+
+par(mfrow=c(2,2))
+PolyFitValue=vector("list",3)
+for(i in 1:3)
+  PolyFitValue[[i]]=PolyFitPlot(IsoEBres$C1Mean[[i]], 
+    IsoEBres$C1EstVar[[i]],5)
+
+PolyAll=PolyFitPlot(unlist(IsoEBres$C1Mean), unlist(IsoEBres$C1EstVar),5)
+lines(log10(IsoEBres$C1Mean[[1]][PolyFitValue[[1]]$sort]), 
+  PolyFitValue[[1]]$fit[PolyFitValue[[1]]$sort],col="yellow")
+lines(log10(IsoEBres$C1Mean[[2]][PolyFitValue[[2]]$sort]), 
+  PolyFitValue[[2]]$fit[PolyFitValue[[2]]$sort],col="pink")
+lines(log10(IsoEBres$C1Mean[[3]][PolyFitValue[[3]]$sort]), 
+  PolyFitValue[[3]]$fit[PolyFitValue[[3]]$sort],col="green")
+legend("topleft",c("All Isoforms","Ng = 1","Ng = 2","Ng = 3"),
+  col=c("red","yellow","pink","green"),lty=1,lwd=3,box.lwd=2)
+
+par(mfrow=c(2,2))
+QQP(QList=IsoEBres$QList1, AlphaResult=IsoEBres[[1]][5,],
+ BetaResult=IsoEBres[[2]][5,], 
+ name="Isoforms", AList="F", GroupName=paste("Ng = ",c(1:3),sep=""))
+
+DenNHist(QList=IsoEBres$QList1, Alpha=IsoEBres[[1]][5,], 
+  Beta=IsoEBres[[2]][5,], 
+  name="Isoforms", AList="F", GroupName=paste("Ng = ",c(1:3),sep=""))
+
+# Section 3.3
+
+Conditions=c("C1","C1","C2","C2","C3","C3")
+PosParti=GetPatterns(Conditions)
+PosParti
+
+Parti=PosParti[-3,]
+Parti
+
+MultiData=GeneMultiSimu(Conditions=Conditions,AllParti=Parti,
+          NumofSample=6,NumofGene=1000,DEGeneProp=c(.7,.1,.1,.1),
+          DVDqt1=.98,DVDqt2=.99,Phi.qt1=.25,Phi.qt2=.75)
+str(MultiData)
+
+MultiSize=MedianNorm(MultiData$data)
+MultiRes=EBMultiTest(MultiData$data,NgVector=NULL,Conditions=Conditions,
+           AllParti=Parti, sizeFactors=MultiSize, maxround=5)
+MultiPP=GetMultiPP(MultiRes)
+names(MultiPP)
+MultiPP$PP[1:10,]
+MultiPP$MAP[1:10]
+MultiPP$Patterns
+sum(MultiPP$MAP==MultiData$Patterns)
+
+# EOF
+\ No newline at end of file
diff --git a/EBSeq/inst/doc/EBSeq_Vignette.pdf b/EBSeq/inst/doc/EBSeq_Vignette.pdf

new file mode 100644 (file)

index 0000000..30646df

Binary files /dev/null and b/EBSeq/inst/doc/EBSeq_Vignette.pdf differ
diff --git a/EBSeq/man/CheckNg.Rd b/EBSeq/man/CheckNg.Rd

new file mode 100644 (file)

index 0000000..9571d03
--- /dev/null
+++ b/EBSeq/man/CheckNg.Rd
@@ -0,0 +1,66 @@
+\name{CheckNg}
+\alias{CheckNg}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{
+CheckNgStructure
+}
+\description{
+Provide the best polynomial fit of log variance and log mean in each Ng group. 
+}
+\usage{
+CheckNg(NewMean, NewVar,nterm, xlim, ylim)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+  \item{NewMean}{
+A list contains  
+}
+}
+\details{
+%%  ~~ If necessary, more details than the description above ~~
+}
+\value{
+4 plots will be returned. The first 3 are the data and the best fit line from 
+Ng=1, Ng=2 and Ng=3 group.
+The 4th plot is the scatetr plot of all the data.  
+}
+\references{
+}
+\author{
+Ning Leng
+}
+
+%% ~Make other sections like Warning with \section{Warning }{....} ~
+
+\seealso{
+
+}
+\examples{
+##---- Should be DIRECTLY executable !! ----
+##-- ==>  Define data, use random,
+##--   or do  help(data=index)  for the standard data sets.
+
+### Simulate Isoform Level Data
+IsoGenerate=IsoSimu(DVDconstant=NULL, DVDqt1=.97, DVDqt2=.98, Conditions=as.factor(rep(c(1,2),each=5)), NumofSample=10, NumofIso=NULL, DEIsoProp=.1, Phiconstant=NULL, Phi.qt1=.4, Phi.qt2=.6, OnlyData="Y" )
+IsoList=IsoGenerate$data
+
+# Get Vectors and Run EBSeq
+ngv=c(1,2,3,2,3,2,3,2,3)
+b3v=c(1,0,0,1,1,0,0,1,1)
+b5v=c(1,0,0,0,0,1,1,1,1)
+NgV=unlist(sapply(1:9,function(i)rep(ngv[i],dim(IsoList[[i]])[1])))
+Bias3V=unlist(sapply(1:9,function(i)rep(b3v[i],dim(IsoList[[i]])[1])))
+Bias5V=unlist(sapply(1:9,function(i)rep(b5v[i],dim(IsoList[[i]])[1])))
+
+IsoData=do.call(rbind,IsoList)
+IsoEBres=NBBetaEB.bias.uniqueP_PoolVarSpeedUp_MDFPoi_NoNormVar(Data=IsoData, NgVector=NgV, Vect5End=Bias5V, Vect3End=Bias3V, Conditions=as.factor(rep(c(1,2),each=5)),sizeFactors=rep(1,10), maxround=5)
+
+
+# Plot
+CheckNg(IsoEBres$C1Mean, IsoEBres$C1EstVar,5, c(-1,5),c(-1,7))
+
+
+}
+% Add one or more standard keywords, see file 'KEYWORDS' in the
+% R documentation directory.
+\keyword{ Ng }
diff --git a/EBSeq/man/DenNHist.Rd b/EBSeq/man/DenNHist.Rd

new file mode 100644 (file)

index 0000000..f096153
--- /dev/null
+++ b/EBSeq/man/DenNHist.Rd
@@ -0,0 +1,107 @@
+\name{DenNHist}
+\alias{DenNHist}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{
+%%  ~~function to do ... ~~
+Density plot to compare th empirical q's and the simulated q's from the fitted beta distribution.
+}
+\description{
+%%  ~~ A concise (1-5 lines) description of what the function does. ~~
+}
+\usage{
+DenNHist(QList, Alpha, Beta, name, AList = "F", GroupName)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+  \item{QList}{
+The estimated q's from the output of NBBetaEB.bias.uniqueP_PoolVarSpeedUp_MDFPoi_NoNormVar
+. Input could be a vector or a list of different groups of transcripts. The number of lists here should be the same as the length of Beta.
+
+}
+  \item{Alpha}{
+The fitted parameter alpha from the output of NBBetaEB.bias.uniqueP_PoolVarSpeed
+Up_MDFPoi_NoNormVar. Input should be a number if AList is not defined.
+}
+  \item{Beta}{
+The fitted parameter beta from the output of NBBetaEB.bias.uniqueP_PoolVarSpeedUp_MDFPoi_NoNormVar. 
+Input could be one single number or a vector of several numbers. The length of the input should be the same as the number of lists of QList.
+
+}
+  \item{name}{
+The name of the plots
+}
+  \item{AList}{
+Whether a list of alpha's are used
+
+}
+  \item{GroupName}{
+The names of each sub plot. The l
+ength of the input should be the same as the number of lists of QList.
+
+}
+}
+\details{
+%%  ~~ If necessary, more details than the description above ~~
+}
+\value{
+Plots will be generated. Each plot represents a sub-list of the QList.
+The empirical estimation of q's will be represented as blue histogram and the density of
+the fitted beta distribution will be represented as the green line.
+The main title of the plot will be "GroupName name".
+
+}
+\references{
+%% ~put references to the literature/web site here ~
+}
+\author{
+Ning Leng
+}
+\note{
+%%  ~~further notes~~
+}
+
+%% ~Make other sections like Warning with \section{Warning }{....} ~
+
+\seealso{
+beta.mom, DenNHistTable, QQP, NBBetaEB.bias.uniqueP_PoolVarSpeedUp_MDFPoi_NoNormVar
+}
+\examples{
+### Simulate Gene Level Data
+GeneGenerate=GeneSimu(DVDconstant=4, DVDqt1=NULL, DVDqt2=NULL, Conditions=rep(c(1,2),each=5), NumofSample=10, NumofGene=10000, DEGeneProp=.1, Phiconstant=NULL, Phi.qt1=.25, Phi.qt2=.75, Meanconstant=NULL, OnlyData="Y")
+GeneData=GeneGenerate$data
+
+# Run EBSeq
+EBres=NBBetaEB.bias.uniqueP_PoolVarSpeedUp_MDFPoi_NoNormVar(Data=GeneData, NgVector=rep(1,10^4), Vect5End=rep(1,10^4), Vect3End=rep(1,10^4), Conditions=as.factor(rep(c(1,2),each=5)), sizeFactors=rep(1,10),maxround=5)
+# Plot
+DenNHist(QList=EBres$QList1, Alpha=EBres[[1]][5,1], Beta=EBres[[2]][5,1], name="Gene", AList="F", GroupName="")
+
+### Simulate Isoform Level Data
+IsoGenerate=IsoSimu(DVDconstant=NULL, DVDqt1=.97, DVDqt2=.98, Conditions=as.factor(rep(c(1,2),each=5)), NumofSample=10, NumofIso=NULL, DEIsoProp=.1, Phiconstant=NULL, Phi.qt1=.25, Phi.qt2=.75, OnlyData="Y" )
+IsoList=IsoGenerate$data
+
+# Get Vectors and Run EBSeq
+ngv=c(1,2,3,2,3,2,3,2,3)
+b3v=c(1,0,0,1,1,0,0,1,1)
+b5v=c(1,0,0,0,0,1,1,1,1)
+NgV=unlist(sapply(1:9,function(i)rep(ngv[i],dim(IsoList[[i]])[1])))
+Bias3V=unlist(sapply(1:9,function(i)rep(b3v[i],dim(IsoList[[i]])[1])))
+Bias5V=unlist(sapply(1:9,function(i)rep(b5v[i],dim(IsoList[[i]])[1])))
+
+IsoData=do.call(rbind,IsoList)
+IsoEBres=NBBetaEB.bias.uniqueP_PoolVarSpeedUp_MDFPoi_NoNormVar(Data=IsoData, NgVector=NgV, Vect5End=Bias5V, Vect3End=Bias3V, Conditions=as.factor(rep(c(1,2),each=5)),sizeFactors=rep(1,10), maxround=5)
+
+# Plot
+par(mfrow=c(3,3))
+DenNHist(QList=IsoEBres$QList1, Alpha=IsoEBres[[1]][5,], Beta=IsoEBres[[2]][5,], name="Isoform", AList="F", GroupName=paste("group",c(1:9),sep=""))
+
+
+
+
+
+
+
+}
+% Add one or more standard keywords, see file 'KEYWORDS' in the
+% R documentation directory.
+\keyword{ beta }
+\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
diff --git a/EBSeq/man/DenNHistTable.Rd b/EBSeq/man/DenNHistTable.Rd

new file mode 100644 (file)

index 0000000..b480f98
--- /dev/null
+++ b/EBSeq/man/DenNHistTable.Rd
@@ -0,0 +1,85 @@
+\name{DenNHistTable}
+\alias{DenNHistTable}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{
+9 Density plots to compare the empirical q's and the simulated q's from the fitted beta distribution.
+}
+\description{
+Check the beta fit of 9 different groups on isoform level data. 
+}
+\usage{
+DenNHistTable(QList, Alpha, Beta,  AList = "F")
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+  \item{QList}{
+The estimated q's from the output of NBBetaEB.bias.uniqueP_PoolVarSpeedUp_MDFPoi_NoNormVar
+. Input should be a list of different groups of transcripts. The number of lists here should be 9.
+
+}
+
+  \item{Alpha}{
+The fitted parameter alpha from the output of NBBetaEB.bias.uniqueP_PoolVarSpeed
+Up_MDFPoi_NoNormVar. Input should be a number if AList is not defined.
+}
+  \item{Beta}{
+The fitted parameter beta from the output of NBBetaEB.bias.uniqueP_PoolVarSpeedUp_MDFPoi_NoNormVar. 
+Input could be one single number or a vector of several numbers. The length of the input should be 9.
+
+}
+
+  \item{AList}{
+Whether a list of alpha's are used
+
+}
+
+  }
+\details{
+
+}
+\value{
+A plot contains 9 dub-plots will be generated. 
+The empirical estimation of q's will be represented as blue histogram and the density of 
+the fitted beta distribution will be represented as the green line.
+The main title of the plot will be "GroupName name"  
+}
+\references{
+DenNHist, beta.mom, QQP, NBBetaEB.bias.uniqueP_PoolVarSpeedUp_MDFPoi_NoNormVar
+}
+\author{
+Ning Leng
+}
+\note{
+%%  ~~further notes~~
+}
+
+%% ~Make other sections like Warning with \section{Warning }{....} ~
+
+\seealso{
+%% ~~objects to See Also as \code{\link{help}}, ~~~
+}
+\examples{
+# Generate Data
+IsoGenerate=IsoSimu(DVDconstant=NULL, DVDqt1=.97, DVDqt2=.98, Conditions=as.factor(rep(c(1,2),each=5)), NumofSample=10, NumofIso=NULL, DEIsoProp=.1, Phiconstant=NULL, Phi.qt1=.25, Phi.qt2=.75, OnlyData="Y" )
+IsoList=IsoGenerate$data
+
+# Get Ng Vector, 5End Vector and 3End Vector
+ngv=c(1,2,3,2,3,2,3,2,3)
+b3v=c(1,0,0,1,1,0,0,1,1)
+b5v=c(1,0,0,0,0,1,1,1,1)
+NgV=unlist(sapply(1:9,function(i)rep(ngv[i],dim(IsoList[[i]])[1])))
+Bias3V=unlist(sapply(1:9,function(i)rep(b3v[i],dim(IsoList[[i]])[1])))
+Bias5V=unlist(sapply(1:9,function(i)rep(b5v[i],dim(IsoList[[i]])[1])))
+
+#Run EBSeq
+IsoData=do.call(rbind,IsoList)
+IsoEBres=NBBetaEB.bias.uniqueP_PoolVarSpeedUp_MDFPoi_NoNormVar(Data=IsoData, NgVector=NgV, Vect5End=Bias5V, Vect3End=Bias3V, Conditions=as.factor(rep(c(1,2),each=5)),sizeFactors=rep(1,10), maxround=5)
+
+DenNHistTable(QList=IsoEBres$QList1, Alpha=IsoEBres[[1]][5,], Beta=IsoEBres[[2]][5,], AList="F")
+
+
+}
+% Add one or more standard keywords, see file 'KEYWORDS' in the
+% R documentation directory.
+\keyword{ ~kwd1 }
+\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
diff --git a/EBSeq/man/EBMultiTest.Rd b/EBSeq/man/EBMultiTest.Rd

new file mode 100644 (file)

index 0000000..a23b0f7
--- /dev/null
+++ b/EBSeq/man/EBMultiTest.Rd
@@ -0,0 +1,130 @@
+\name{EBMultiTest}
+\alias{EBMultiTest}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{
+Using EM algorithm to calculate the posterior probabilities of interested patterns in multiple condition study
+}
+\description{
+Base on the assumption of NB-Beta Empirical Bayes model, the EM algorithm is used to get the posterior probability of interested patterns.
+}
+\usage{
+EBMultiTest(Data,NgVector=NULL,Conditions,AllParti=NULL, sizeFactors, maxround, tau=NULL,CI=NULL,CIthre=NULL, Pool=F, NumBin=1000, Approx=10^-10,PoolLower=.25, PoolUpper=.75)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+
+  \item{Data}{
+A data matrix contains expression values for each transcript .(Gene level or Isoform level.). In which rows should be transcripts and columns should be samples.
+}
+  \item{NgVector}{
+A vector contains the Ng value of each isoform. If the isoform is in a gene with 2 isoforms, Ng should be 2. Ng could be only 1, 2 or 3. If it's gene level data, Ngvector should all be 1. The vector length should be the same as the number of rows in Data.
+}
+  \item{Conditions}{
+A vector indicates the condition each sample belongs to. 
+}
+
+\item{AllParti}{
+       A matrix indicates the interested patterns. Columns shoule be conditions and rows should be patterns. 
+       The matrix could be obtained by the GetPatterns function. If AllParti=NULL, all possible patterns will be used.
+}
+
+  \item{sizeFactors}{
+The normalization factors. 
+The normalization factors could be a vector with lane specitic numbers.
+Or it could be a matrix with lane and transcript specific numbers.
+}
+  \item{maxround}{
+Number of iterations. The suggested value is 5.
+}
+
+\item{tau}{
+The tau value from RSEM output. If the data has no replicates within condition, 
+EBSeq will use the CI of tau to capture the variation from mapping
+uncertainty and estimate the variance.
+       }
+\item{CI}{
+The CI of each tau from RSEM output    
+       }
+\item{CIthre}{
+The threshold of CI RSEM used.
+       }
+\item{Pool, NumBin}{
+Working without replicates, we should define the Pool=T in the
+ EBTest function to enable pooling.
+By defining NumBin = 1000, EBSeq will group the genes with similar means
+together into 1,000 bins.
+With the assumption that no more than 50\% genes are DE in the data set,
+We take genes whose FC are in the 25\% - 75\% quantile of the FC's  as the
+candidate genes.
+For each bin, the bin-wise variance estimation would be the median of the
+cross condition variance estimations of the candidate genes within that bin.
+We use the cross condition variance estimations for the candidate genes
+and the bin-wise variance estimations of the host bin for the non-candidate genes.
+}
+
+}
+
+\details{
+For each transcript gi within condition, the model assumes:
+X_gis|mu_gi ~ NB (r_gi0 * l_s, q_gi)
+q_gi|alpha, beta^N_g,b_gi ~ Beta (alpha, neta^N_g,b_gi)
+In which the l_s is the sizeFactors of sample s.
+
+The function will test:
+H0: q_giC1 = q_giC2
+H1: q_giC1 != q_giC2
+
+
+}
+\value{
+\item{Alpha }{Fitted parameter alpha of the prior beta distribution. Rows are the values for each iteration.}
+\item{Beta }{Fitted parameter beta of the prior beta distribution. Rows are the values for each iteration.}
+\item{P, PFromZ }{ The bayes estimator of being DE.Rows are the values for each iteration.}
+\item{Z, PoissonZ}{ The Posterior Probability of being DE for each transcript. (Maybe not in the same order of input)}
+\item{RList}{ The fitted values of r for each transcript.}
+\item{MeanList}{The mean of each transcript. (Cross conditions)}
+\item{VarList}{The variance of each transcript. (Cross conditions, using the expression values devided by it's sizeFactors)}
+\item{QListi1}{The fitted q values of each transcript within condition 1.}
+\item{QListi2}{The fitted q values of each transcript within condition 2.}
+\item{C1Mean}{The mean of each transcript within Condition 1}
+\item{C2Mean}{The mean of each transcript within Condition 2}
+\item{C1EstVar}{The estimated variance of each transcript within Condition 1}
+\item{C2EstVar}{The estimated variance of each transcript within Condition 2}
+\item{PoolVar}{The variance of each transcript. (The pooled value of within condition EstVar)}
+\item{DataList}{A List of data that grouped with Ng and bias.}
+\item{PPDE}{The Posterior Probability of being each pattern for each transcript. (The same order of input)}
+\item{f}{The likelihood of predictive distribution of being each pattern for each transcript. }
+\item{AllParti}{The matrix describe the patterns}
+}
+\references{
+}
+\author{
+Ning Leng
+}
+\note{
+}
+
+
+\seealso{
+}
+\examples{
+Conditions=c("C1","C1","C2","C2","C3","C3")
+PosParti=GetPatterns(Conditions)
+AllParti=PosParti[-3,]
+
+MultiData=GeneMultiSimu(Conditions=Conditions,AllParti=AllParti,
+                                                                       NumofSample=6,NumofGene=1000,DEGeneProp=c(.7,.1,.1,.1),
+                                                                                                                       DVDqt1=.98,DVDqt2=.99,Phi.qt1=.25,Phi.qt2=.75)
+
+MultiRes=EBMultiTest(MultiData[[1]],NgVector=NULL,Conditions=Conditions,
+                                                             AllParti=AllParti, sizeFactors=rep(1,6), maxround=5, tau=NULL,CI=NULL,
+                                                                                              CIthre=NULL, Pool=F, NumBin=1000, Approx=10^-10,PoolLower=.25, PoolUpper=.75)
+MultiPP=GetMultiPP(MultiRes)
+
+sum(MultiPP$MAP==MultiData[[2]])
+
+}
+% Add one or more standard keywords, see file 'KEYWORDS' in the
+% R documentation directory.
+\keyword{ ~kwd1 }
+\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
diff --git a/EBSeq/man/EBSeq_NingLeng-package.Rd b/EBSeq/man/EBSeq_NingLeng-package.Rd

new file mode 100644 (file)

index 0000000..ba10fde
--- /dev/null
+++ b/EBSeq/man/EBSeq_NingLeng-package.Rd
@@ -0,0 +1,34 @@
+\name{EBSeq_NingLeng-package}
+\alias{EBSeq_NingLeng-package}
+\alias{EBSeq_NingLeng}
+\docType{package}
+\title{
+EBSeq: RNA-Seq Differential Expression Analysis on both gene and isoform level 
+}
+\description{
+A Negative Binomial - beta model was built to analyze the RNASeq data.
+We used the empirical bayes method and EM algrithom.
+}
+\details{
+\tabular{ll}{
+Package: \tab EBSeq_NingLeng\cr
+Type: \tab Package\cr
+Version: \tab 1.0\cr
+Date: \tab 2011-06-13\cr
+License: \tab What license is it under?\cr
+LazyLoad: \tab yes\cr
+}
+}
+\author{
+Ning Leng
+
+Maintainer: Ning Leng  <nleng@wisc.edu>
+}
+\references{
+}
+\keyword{ package }
+\seealso{
+
+}
+\examples{
+}
diff --git a/EBSeq/man/EBTest.Rd b/EBSeq/man/EBTest.Rd

new file mode 100644 (file)

index 0000000..5d17998
--- /dev/null
+++ b/EBSeq/man/EBTest.Rd
@@ -0,0 +1,139 @@
+\name{EBTest}
+\alias{EBTest}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{
+Using EM algorithm to calculate the posterior probabilities of being DE
+}
+\description{
+Base on the assumption of NB-Beta Empirical Bayes model, the EM algorithm is used to get the posterior probability of being DE.
+}
+\usage{
+EBTest(Data, NgVector=NULL, Vect5End=NULL, Vect3End=NULL, Conditions, sizeFactors, maxround,tau=NULL,CI=NULL,CIthre=NULL, Pool=F, NumBin=1000)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+
+  \item{Data}{
+A data matrix contains expression values for each transcript .(Gene level or Isoform level.). In which rows should be transcripts and columns should be samples.
+}
+  \item{NgVector}{
+A vector contains the Ng value of each isoform. If the isoform is in a gene with 2 isoforms, Ng should be 2. Ng could be only 1, 2 or 3. If it's gene level data, Ngvector should all be 1. The vector length should be the same as the number of rows in Data.
+}
+  \item{Vect5End}{
+A vector contains the 5' end information of each isoform. It should be 1 if the isoform contains 5' end and otherwise should be 0. If it's gene level data, Vect5End should all be 1. The vector length should be the same as the number of rows in Data.
+(Not recommended)
+}
+  \item{Vect3End}{
+A vector contains the 3' end information of each isoform. It should be 1 if the
+isoform contains 3' end and otherwise should be 0. If it's gene level data, Vect3End should all be 1. The vector length should be the same as the number of rows in Data.
+(Not recommended)
+}
+  \item{Conditions}{
+A vector indicates the condition each sample belongs to. 
+}
+
+
+  \item{sizeFactors}{
+The normalization factors. 
+The normalization factors could be a vector with lane specitic numbers.
+Or it could be a matrix with lane and transcript specific numbers.
+}
+  \item{maxround}{
+Number of iterations. The suggested value is 5.
+}
+
+\item{tau}{
+The tau value from RSEM output. If the data has no replicates within condition, 
+EBSeq will use the CI of tau to capture the variation from mapping
+uncertainty and estimate the variance.
+       }
+\item{CI}{
+The CI of each tau from RSEM output    
+       }
+\item{CIthre}{
+The threshold of CI RSEM used.
+       }
+\item{Pool, NumBin}{
+Working without replicates, we should define the Pool=T in the
+ EBTest function to enable pooling.
+By defining NumBin = 1000, EBSeq will group the genes with similar means
+together into 1,000 bins.
+With the assumption that no more than 50\% genes are DE in the data set,
+We take genes whose FC are in the 25\% - 75\% quantile of the FC's  as the
+candidate genes.
+For each bin, the bin-wise variance estimation would be the median of the
+cross condition variance estimations of the candidate genes within that bin.
+We use the cross condition variance estimations for the candidate genes
+and the bin-wise variance estimations of the host bin for the non-candidate genes.
+}
+
+}
+
+\details{
+For each transcript gi within condition, the model assumes:
+X_gis|mu_gi ~ NB (r_gi0 * l_s, q_gi)
+q_gi|alpha, beta^N_g,b_gi ~ Beta (alpha, neta^N_g,b_gi)
+In which the l_s is the sizeFactors of sample s.
+
+The function will test:
+H0: q_giC1 = q_giC2
+H1: q_giC1 != q_giC2
+
+
+}
+\value{
+\item{Alpha }{Fitted parameter alpha of the prior beta distribution. Rows are the values for each iteration.}
+\item{Beta }{Fitted parameter beta of the prior beta distribution. Rows are the values for each iteration.}
+\item{P, PFromZ }{ The bayes estimator of being DE.Rows are the values for each iteration.}
+\item{Z, PoissonZ}{ The Posterior Probability of being DE for each transcript. (Maybe not in the same order of input)}
+\item{RList}{ The fitted values of r for each transcript.}
+\item{MeanList}{The mean of each transcript. (Cross conditions)}
+\item{VarList}{The variance of each transcript. (Cross conditions, using the expression values devided by it's sizeFactors)}
+\item{QListi1}{The fitted q values of each transcript within condition 1.}
+\item{QListi2}{The fitted q values of each transcript within condition 2.}
+\item{C1Mean}{The mean of each transcript within Condition 1}
+\item{C2Mean}{The mean of each transcript within Condition 2}
+\item{C1EstVar}{The estimated variance of each transcript within Condition 1}
+\item{C2EstVar}{The estimated variance of each transcript within Condition 2}
+\item{PoolVar}{The variance of each transcript. (The pooled value of within condition EstVar)}
+\item{DataList}{A List of data that grouped with Ng and bias.}
+\item{PPDE}{The Posterior Probability of being DE for each transcript. (The same order of input)}
+
+
+}
+\references{
+}
+\author{
+Ning Leng
+}
+\note{
+}
+
+
+\seealso{
+}
+\examples{
+#Simulate Gene level data
+GeneGenerate=GeneSimu(DVDconstant=4, DVDqt1=NULL, DVDqt2=NULL, Conditions=rep(c(1,2),each=5), NumofSample=10, NumofGene=10000, DEGeneProp=.1, Phiconstant=NULL, Phi.qt1=.25, Phi.qt2=.75, Meanconstant=NULL, OnlyData="Y")
+GeneData=GeneGenerate$data
+
+# Run EBSeq
+# sizeFactors could be obtained by MedianNorm, QuantileNorm or RankNorm
+EBres=EBTest(Data=GeneData, NgVector=rep(1,10^4), Vect5End=rep(1,10^4), Vect3End=rep(1,10^4), Conditions=as.factor(rep(c(1,2),each=5)), sizeFactors=rep(1,10),maxround=5)
+
+# Isoform Level
+IsoGenerate=IsoSimu(DVDconstant=NULL, DVDqt1=.97, DVDqt2=.98, Conditions=as.factor(rep(c(1,2),each=5)), NumofSample=10, NumofIso=c(1000,2000,3000), DEIsoProp=.1, Phiconstant=NULL, Phi.qt1=.25, Phi.qt2=.75, OnlyData=T )
+
+IsoMat=do.call(rbind,IsoGenerate$data)
+IsoNames=rownames(IsoMat)
+
+Ngvector=GetNg(IsoNames, IsosGeneNames)
+IsoNgTrun=Ngvector$IsoformNgTrun
+
+IsoEBres=EBTest(Data=IsoMat, NgVector=IsoNgTrun, Conditions=as.factor(rep(c(1,2),each=5)),sizeFactors=rep(1,10), maxround=5)
+
+}
+% Add one or more standard keywords, see file 'KEYWORDS' in the
+% R documentation directory.
+\keyword{ ~kwd1 }
+\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
diff --git a/EBSeq/man/GeneEBresultGouldBart2.Rd b/EBSeq/man/GeneEBresultGouldBart2.Rd

new file mode 100644 (file)

index 0000000..6da5305
--- /dev/null
+++ b/EBSeq/man/GeneEBresultGouldBart2.Rd
@@ -0,0 +1,83 @@
+\name{GeneEBresultGouldBart2}
+\alias{GeneEBresultGouldBart2}
+\docType{data}
+\title{
+The EBSeq result of the empirical gene data ( Gould Lab data, bart2 )
+}
+\description{
+%%  ~~ A concise (1-5 lines) description of the dataset. ~~
+}
+\usage{data(GeneEBresultGouldBart2)}
+\format{
+  The format is:
+List of 17
+ $ Alpha   : num [1:5, 1] 0.728 0.724 0.719 0.717 0.717
+  ..- attr(*, "dimnames")=List of 2
+  .. ..$ : chr [1:5] "AlphaIn" "AlphaIn" "AlphaIn" "AlphaIn" ...
+  .. ..$ : NULL
+ $ Beta    : num [1:5, 1] 1.44 1.49 1.49 1.49 1.48
+  ..- attr(*, "dimnames")=List of 2
+  .. ..$ : chr [1:5] "BetaIn" "BetaIn" "BetaIn" "BetaIn" ...
+  .. ..$ : NULL
+ $ P       : num [1:5, 1] 0.1584 0.0767 0.0534 0.046 0.0432
+  ..- attr(*, "dimnames")=List of 2
+  .. ..$ : chr [1:5] "PIn" "PIn" "PIn" "PIn" ...
+  .. ..$ : NULL
+ $ PFromZ  : num [1:5, 1] 0.1585 0.0765 0.0535 0.0459 0.0432
+  ..- attr(*, "dimnames")=List of 2
+  .. ..$ : chr [1:5] "PFromZ" "PFromZ" "PFromZ" "PFromZ" ...
+  .. ..$ : NULL
+ $ Z       : Named num [1:15312] 0.0036 0.00246 0.00122 0.61556 0.00394 ...
+  ..- attr(*, "names")= chr [1:15312] "ENSRNOG00000015181" "ENSRNOG00000015180" "ENSRNOG00000027158" "ENSRNOG00000027157" ...
+ $ PoissonZ: Named num [1:4955] 6.59e-04 5.71e-04 3.80e-04 2.75e-04 2.07e-05 ...
+  ..- attr(*, "names")= chr [1:4955] "ENSRNOG00000027159" "ENSRNOG00000039120" "ENSRNOG00000039118" "ENSRNOG00000003198" ...
+ $ RList   :List of 1
+  ..$ : Named num [1:20267] 19.12 62.3 -3.09 348.78 200.03 ...
+  .. ..- attr(*, "names")= chr [1:20267] "ENSRNOG00000015181" "ENSRNOG00000015180" "ENSRNOG00000027159" "ENSRNOG00000027158" ...
+ $ MeanList:List of 1
+  ..$ : Named num [1:20267] 289.663 302.486 0.398 97.791 106.036 ...
+  .. ..- attr(*, "names")= chr [1:20267] "ENSRNOG00000015181" "ENSRNOG00000015180" "ENSRNOG00000027159" "ENSRNOG00000027158" ...
+ $ VarList :List of 1
+  ..$ : Named num [1:20267] 5792.7 1954 0.6 146.8 513.4 ...
+  .. ..- attr(*, "names")= chr [1:20267] "ENSRNOG00000015181" "ENSRNOG00000015180" "ENSRNOG00000027159" "ENSRNOG00000027158" ...
+ $ QList1  :List of 1
+  ..$ : Named num [1:20267] 0.188 0.152 NaN 0.487 1.118 ...
+  .. ..- attr(*, "names")= chr [1:20267] "ENSRNOG00000015181" "ENSRNOG00000015180" "ENSRNOG00000027159" "ENSRNOG00000027158" ...
+ $ QList2  :List of 1
+  ..$ : Named num [1:20267] 0.0389 0.1951 1.1478 1.7647 0.4149 ...
+  .. ..- attr(*, "names")= chr [1:20267] "ENSRNOG00000015181" "ENSRNOG00000015180" "ENSRNOG00000027159" "ENSRNOG00000027158" ...
+ $ C1Mean  :List of 1
+  ..$ : Named num [1:20267] 271.9 300.7 0 93.8 123.1 ...
+  .. ..- attr(*, "names")= chr [1:20267] "ENSRNOG00000015181" "ENSRNOG00000015180" "ENSRNOG00000027159" "ENSRNOG00000027158" ...
+ $ C2Mean  :List of 1
+  ..$ : Named num [1:20267] 307.414 304.298 0.796 101.798 88.953 ...
+  .. ..- attr(*, "names")= chr [1:20267] "ENSRNOG00000015181" "ENSRNOG00000015180" "ENSRNOG00000027159" "ENSRNOG00000027158" ...
+ $ C1EstVar:List of 1
+  ..$ : Named num [1:20267] 1449 1983 0 193 110 ...
+  .. ..- attr(*, "names")= chr [1:20267] "ENSRNOG00000015181" "ENSRNOG00000015180" "ENSRNOG00000027159" "ENSRNOG00000027158" ...
+ $ C2EstVar:List of 1
+  ..$ : Named num [1:20267] 7905.417 1559.46 0.694 57.687 214.39 ...
+  .. ..- attr(*, "names")= chr [1:20267] "ENSRNOG00000015181" "ENSRNOG00000015180" "ENSRNOG00000027159" "ENSRNOG00000027158" ...
+ $ PoolVar :List of 1
+  ..$ : Named num [1:20267] 4677.246 1771.219 0.347 125.211 162.247 ...
+  .. ..- attr(*, "names")= chr [1:20267] "ENSRNOG00000015181" "ENSRNOG00000015180" "ENSRNOG00000027159" "ENSRNOG00000027158" ...
+ $ DataList:List of 1
+  ..$ Ng1: num [1:20267, 1:8] 287 251 0 87 121 181 5 195 70 5 ...
+  .. ..- attr(*, "dimnames")=List of 2
+  .. .. ..$ : chr [1:20267] "I1" "I2" "I3" "I4" ...
+  .. .. ..$ : chr [1:8] "W5-D1" "W5-D2" "W5-D3" "W5-D4" ...
+}
+\details{
+%%  ~~ If necessary, more details than the __description__ above ~~
+}
+\source{
+%%  ~~ reference to a publication or URL from which the data were obtained ~~
+}
+\seealso{
+IsoEBresultGouldBart2, NBBetaEB.bias.uniqueP_PoolVarSpeedUp_MDFPoi_NoNormVar
+}
+\examples{
+data(GeneEBresultGouldBart2)
+## maybe str(GeneEBresultGouldBart2) ; plot(GeneEBresultGouldBart2) ...
+}
+\keyword{datasets}
diff --git a/EBSeq/man/GeneMultiSimu.Rd b/EBSeq/man/GeneMultiSimu.Rd

new file mode 100644 (file)

index 0000000..a00b4a7
--- /dev/null
+++ b/EBSeq/man/GeneMultiSimu.Rd
@@ -0,0 +1,112 @@
+\name{GeneMultiSimu}
+\alias{GeneMultiSimu}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{
+Gene Level Simulation for multiple conditions
+}
+\description{
+Simulate Gene level expression data from a Negative Binomial assumption. (Without outliers)
+}
+\usage{
+GeneMultiSimu(DVDconstant = NULL, DVDqt1 = NULL, DVDqt2 = NULL, Conditions,AllParti, NumofSample, NumofGene = NULL, DEGeneProp, Phiconstant = NULL, Phi.qt1 = NULL, Phi.qt2 = NULL, Meanconstant = NULL,  NormFactor=NULL, OnlyData = T)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+  \item{DVDconstant}{
+Whether want to use constant fold change value for all the DE genes. 
+If set DVDconstant=4, all the DE genes will have fold change of 4 across two condtions. 
+If this parameter is specified, the settings of DVDqt1 and DVDqt2 will be ignored. 
+}
+  \item{DVDqt1, DVDqt2}{
+If DVDconstant is not specified, the user could use a range of empirical DVD's f
+rom Gould' data. The lower and upper bound ( quantile) could be specified.
+The suggested value is c(.96, .97). DVD for each gene will be randomly choosed within the range.
+
+}
+  \item{Conditions}{
+A vector of charecters to show each sample's condition. 
+(Only the two-condition case is supported now)
+}
+\item{AllParti}{
+           A matrix indicates the interested patterns. Columns shoule be conditions and rows should be patterns.
+                   The matrix could be obtained by the GetPatterns function. If AllParti=NULL, all possible patterns will be used.
+}
+
+  \item{NumofSample}{
+Number of samples to generte.
+}
+  \item{NumofGene}{
+Number of genes to generate.
+}
+  \item{DEGeneProp}{
+The proportion of genes to be generated as DE. The value should be in [0, 1].
+Besides, the same proportion of genes will be generated as EE genes with outlier. 
+The genes will be generated as EE at first, then the count of one of the samples 
+(randomly selected) will be setted as its original counts multiplied by one of (4, 6, 8, 10).
+}
+  \item{Phiconstant}{
+Whether set the disperse parameter phi to be a constant. If this parameter is specified, the settings of Phi.qt1 and Phi.qt2 will be ignored.
+}
+  \item{Phi.qt1, Phi.qt2}{
+If Phiconstant is not specified, the user could use a range of empirical phi's from Gould' data. The lower and upper bound ( quantile) could be specified.
+The suggested value is c(.25, .75). phi for each gene will be randomly choosed w
+ithin the range.
+
+}
+  \item{Meanconstant}{
+Wether set the mean of each gene to be a constant.
+}
+  \item{OnlyData}{
+Wether the user only want the generated data matrix. If OnlyData=T, the function will return the simulated matrix
+and the name of the DE genes.
+Otherwise the funtion will run DESeq, EBSeq, edgeR, baySeq and BBSeq and provide the results of each method. 
+}
+
+\item{NormFactor}{
+If NormFactor is NULL, each lane will be set to be with the same library size. Otherwise NormFactor should be a 
+vector with length NumofSample. 
+}
+
+}
+\details{
+For each gene, we assumed that the expression follows a Negative Binomial distribution with mean mu_g and variance mu_g * (1 + mu_g * phi_g). 
+For DE genes, we assumed that in one condition the genes are with mean mu_g * DVD.
+mu, phi and DVD could be specified by the parameter settings.
+}
+\value{
+\item{data}{
+A matrix of expression values will be generated. The rows of the matrix refer to the genes and the columns of the matrix are the samples. The genes are named "G_1", "G_2", ... The first part of the genes will be the DE ones. (The number depends on the DEGeneProp parameter.)
+}
+\item{Patterns}{The pattern each gene belongs to}
+
+}
+
+\references{
+%% ~put references to the literature/web site here ~
+}
+\author{
+Ning Leng
+}
+\note{
+%%  ~~further notes~~
+}
+
+%% ~Make other sections like Warning with \section{Warning }{....} ~
+
+\seealso{
+IsoSimu, IsoSimuAt, GeneSimuAt
+}
+\examples{
+Conditions=c("C1","C1","C2","C2","C3","C3")
+PosParti=GetPatterns(Conditions)
+AllParti=PosParti[-3,]
+
+MultiData=GeneMultiSimu(Conditions=Conditions,AllParti=AllParti,
+                                                                       NumofSample=6,NumofGene=1000,DEGeneProp=c(.7,.1,.1,.1),
+                                                                                                                       DVDqt1=.98,DVDqt2=.99,Phi.qt1=.25,Phi.qt2=.75)
+
+}
+% Add one or more standard keywords, see file 'KEYWORDS' in the
+% R documentation directory.
+\keyword{ simulation }
+\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
diff --git a/EBSeq/man/GeneSimu.Rd b/EBSeq/man/GeneSimu.Rd

new file mode 100644 (file)

index 0000000..a008fcc
--- /dev/null
+++ b/EBSeq/man/GeneSimu.Rd
@@ -0,0 +1,100 @@
+\name{GeneSimu}
+\alias{GeneSimu}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{
+Gene Level Simulation 
+}
+\description{
+Simulate Gene level expression data from a Negative Binomial assumption. (Without outliers)
+}
+\usage{
+GeneSimu(DVDconstant = NULL, DVDqt1 = NULL, DVDqt2 = NULL, Conditions, NumofSample, NumofGene = NULL, DEGeneProp, Phiconstant = NULL, Phi.qt1 = NULL, Phi.qt2 = NULL, Meanconstant = NULL,  NormFactor=NULL, OnlyData = T)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+  \item{DVDconstant}{
+Whether want to use constant fold change value for all the DE genes. 
+If set DVDconstant=4, all the DE genes will have fold change of 4 across two condtions. 
+If this parameter is specified, the settings of DVDqt1 and DVDqt2 will be ignored. 
+}
+  \item{DVDqt1, DVDqt2}{
+If DVDconstant is not specified, the user could use a range of empirical DVD's f
+rom Gould' data. The lower and upper bound ( quantile) could be specified.
+The suggested value is c(.96, .97). DVD for each gene will be randomly choosed within the range.
+
+}
+  \item{Conditions}{
+A vector of charecters to show each sample's condition. 
+(Only the two-condition case is supported now)
+}
+  \item{NumofSample}{
+Number of samples to generte.
+}
+  \item{NumofGene}{
+Number of genes to generate.
+}
+  \item{DEGeneProp}{
+The proportion of genes to be generated as DE. The value should be in [0, 1].
+Besides, the same proportion of genes will be generated as EE genes with outlier. 
+The genes will be generated as EE at first, then the count of one of the samples 
+(randomly selected) will be setted as its original counts multiplied by one of (4, 6, 8, 10).
+}
+  \item{Phiconstant}{
+Whether set the disperse parameter phi to be a constant. If this parameter is specified, the settings of Phi.qt1 and Phi.qt2 will be ignored.
+}
+  \item{Phi.qt1, Phi.qt2}{
+If Phiconstant is not specified, the user could use a range of empirical phi's from Gould' data. The lower and upper bound ( quantile) could be specified.
+The suggested value is c(.25, .75). phi for each gene will be randomly choosed w
+ithin the range.
+
+}
+  \item{Meanconstant}{
+Wether set the mean of each gene to be a constant.
+}
+  \item{OnlyData}{
+Wether the user only want the generated data matrix. If OnlyData=T, the function will return the simulated matrix
+and the name of the DE genes.
+Otherwise the funtion will run DESeq, EBSeq, edgeR, baySeq and BBSeq and provide the results of each method. 
+}
+
+\item{NormFactor}{
+If NormFactor is NULL, each lane will be set to be with the same library size. Otherwise NormFactor should be a 
+vector with length NumofSample. 
+}
+
+}
+\details{
+For each gene, we assumed that the expression follows a Negative Binomial distribution with mean mu_g and variance mu_g * (1 + mu_g * phi_g). 
+For DE genes, we assumed that in one condition the genes are with mean mu_g * DVD.
+mu, phi and DVD could be specified by the parameter settings.
+}
+\value{
+\item{data}{
+A matrix of expression values will be generated. The rows of the matrix refer to the genes and the columns of the matrix are the samples. The genes are named "G_1", "G_2", ... The first part of the genes will be the DE ones. (The number depends on the DEGeneProp parameter.)
+}
+\item{TrueDE}{The names of the genes who are defined to be DE.}
+}
+
+\references{
+%% ~put references to the literature/web site here ~
+}
+\author{
+Ning Leng
+}
+\note{
+%%  ~~further notes~~
+}
+
+%% ~Make other sections like Warning with \section{Warning }{....} ~
+
+\seealso{
+IsoSimu, IsoSimuAt, GeneSimuAt
+}
+\examples{
+GeneGenerate=GeneSimu(DVDconstant=4, DVDqt1=NULL, DVDqt2=NULL, Conditions=rep(c(1,2),each=5), NumofSample=10, NumofGene=10000, DEGeneProp=.1, Phiconstant=NULL, Phi.qt1=.25, Phi.qt2=.75, Meanconstant=NULL, NormFactor=rep(c(.9,1.2),5),OnlyData=T)
+
+}
+% Add one or more standard keywords, see file 'KEYWORDS' in the
+% R documentation directory.
+\keyword{ simulation }
+\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
diff --git a/EBSeq/man/GeneSimuAt.Rd b/EBSeq/man/GeneSimuAt.Rd

new file mode 100644 (file)

index 0000000..1069ab1
--- /dev/null
+++ b/EBSeq/man/GeneSimuAt.Rd
@@ -0,0 +1,100 @@
+\name{GeneSimuAt}
+\alias{GeneSimuAt}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{
+Gene Level Simulation with outliers
+}
+\description{
+Simulate Gene level expression data from a Negative Binomial assumption. (With outliers)
+}
+\usage{
+GeneSimuAt(DVDconstant = NULL, DVDqt1 = NULL, DVDqt2 = NULL, Conditions, NumofSample, NumofGene = NULL, DEGeneProp, Phiconstant = NULL, Phi.qt1 = NULL, Phi.qt2 = NULL, Meanconstant = NULL,  NormFactor=NULL, OnlyData = T)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+  \item{DVDconstant}{
+Whether want to use constant fold change value for all the DE genes. 
+If set DVDconstant=4, all the DE genes will have fold change of 4 across two condtions. 
+If this parameter is specified, the settings of DVDqt1 and DVDqt2 will be ignored. 
+}
+  \item{DVDqt1, DVDqt2}{
+If DVDconstant is not specified, the user could use a range of empirical DVD's f
+rom Gould' data. The lower and upper bound ( quantile) could be specified.
+The suggested value is c(.96, .97). DVD for each gene will be randomly choosed within the range.
+
+}
+  \item{Conditions}{
+A vector of charecters to show each sample's condition. 
+(Only the two-condition case is supported now)
+}
+  \item{NumofSample}{
+Number of samples to generte.
+}
+  \item{NumofGene}{
+Number of genes to generate.
+}
+  \item{DEGeneProp}{
+The proportion of genes to be generated as DE. The value should be in [0, 1].
+Besides, the same proportion of genes will be generated as EE genes with outlier. 
+The genes will be generated as EE at first, then the count of one of the samples 
+(randomly selected) will be setted as its original counts multiplied by one of (4, 6, 8, 10).
+}
+  \item{Phiconstant}{
+Whether set the disperse parameter phi to be a constant. If this parameter is specified, the settings of Phi.qt1 and Phi.qt2 will be ignored.
+}
+  \item{Phi.qt1, Phi.qt2}{
+If Phiconstant is not specified, the user could use a range of empirical phi's from Gould' data. The lower and upper bound ( quantile) could be specified.
+The suggested value is c(.25, .75). phi for each gene will be randomly choosed w
+ithin the range.
+
+}
+  \item{Meanconstant}{
+Wether set the mean of each gene to be a constant.
+}
+  \item{OnlyData}{
+Wether the user only want the generated data matrix. If OnlyData=T, the function will return the simulated matrix
+and the name of the DE genes.
+Otherwise the funtion will run DESeq, EBSeq, edgeR, baySeq and BBSeq and provide the results of each method. 
+}
+
+\item{NormFactor}{
+If NormFactor is NULL, each lane will be set to be with the same library size. Otherwise NormFactor should be a 
+vector with length NumofSample. 
+}
+
+}
+\details{
+For each gene, we assumed that the expression follows a Negative Binomial distribution with mean mu_g and variance mu_g * (1 + mu_g * phi_g). 
+For DE genes, we assumed that in one condition the genes are with mean mu_g * DVD.
+mu, phi and DVD could be specified by the parameter settings.
+}
+\value{
+\item{data}{
+A matrix of expression values will be generated. The rows of the matrix refer to the genes and the columns of the matrix are the samples. The genes are named "G_1", "G_2", ... The first part of the genes will be the DE ones. (The number depends on the DEGeneProp parameter.)
+}
+\item{TrueDE}{The names of the genes who are defined to be DE.}
+\item{Outliers}{The names of the genes who are defined to be outliers at each level of (4,6,8,10).}
+}
+\references{
+%% ~put references to the literature/web site here ~
+}
+\author{
+Ning Leng
+}
+\note{
+%%  ~~further notes~~
+}
+
+%% ~Make other sections like Warning with \section{Warning }{....} ~
+
+\seealso{
+IsoSimu, IsoSimuAt
+}
+\examples{
+GeneGenerate=GeneSimuAt(DVDconstant=4, DVDqt1=NULL, DVDqt2=NULL, Conditions=rep(c(1,2),each=5), NumofSample=10, NumofGene=10000, DEGeneProp=.1, Phiconstant=NULL, Phi.qt1=.25, Phi.qt2=.75, Meanconstant=NULL, NormFactor=rep(c(.9,1.2),5),OnlyData=T)
+
+}
+% Add one or more standard keywords, see file 'KEYWORDS' in the
+% R documentation directory.
+\keyword{ simulation }
+\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
diff --git a/EBSeq/man/GetData.Rd b/EBSeq/man/GetData.Rd

new file mode 100644 (file)

index 0000000..fbd977f
--- /dev/null
+++ b/EBSeq/man/GetData.Rd
@@ -0,0 +1,94 @@
+\name{GetData}
+\alias{GetData}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{
+Read in RSEM output of Gould data
+}
+\description{
+%%  ~~ A concise (1-5 lines) description of what the function does. ~~
+}
+\usage{
+GetData(path, Name1, Name2, type)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+  \item{path}{
+The path of RSEM outputs
+}
+  \item{Name1}{
+The output names of the files from Condition 1
+}
+  \item{Name2}{
+The output names of the files from Condition 2
+}
+  \item{type}{
+If type="G", read in the gene level output
+If type="I", read in the isoform level output
+}
+}
+\details{
+%%  ~~ If necessary, more details than the description above ~~
+}
+\value{
+The output is the "nu values" from RSEM.
+To generate a expression matrix, the user need to run the PoolMatrix function.
+}
+\references{
+%% ~put references to the literature/web site here ~
+}
+\author{
+Ning Leng
+}
+\note{
+%%  ~~further notes~~
+}
+
+%% ~Make other sections like Warning with \section{Warning }{....} ~
+
+\seealso{
+%% ~~objects to See Also as \code{\link{help}}, ~~~
+}
+\examples{
+##---- Should be DIRECTLY executable !! ----
+##-- ==>  Define data, use random,
+##--   or do  help(data=index)  for the standard data sets.
+
+## The function is currently defined as
+function(path,Name1,Name2,type)
+{
+Data=vector("list",8)
+Filenames=NULL
+Tablenames=NULL
+for (name in 1:4)
+       {
+               if (type=="I")
+                       Filenames=c(Filenames,paste(path,Name1,name,"_isoform_nus.tab",sep=""))  
+               if (type=="G")  
+                       Filenames=c(Filenames,paste(path,Name1,name,"_gene_nus.tab",sep=""))  
+               Tablenames=c(Tablenames,paste(Name1,name,sep=""))
+       }
+for (name in 1:4)
+       {
+               if (type=="I")
+                       Filenames=c(Filenames,paste(path,Name2,name,"_isoform_nus.tab",sep=""))
+               if (type=="G")
+                       Filenames=c(Filenames,paste(path,Name2,name,"_gene_nus.tab",sep=""))
+               Tablenames=c(Tablenames,paste(Name2,name,sep=""))
+       }
+
+
+names(Data)=Tablenames
+for (file in 1:8)
+       {
+               temp=read.table(Filenames[file],header=T)
+               temp2=as.matrix(temp[-1])
+               rownames(temp2)=as.vector(as.matrix(temp[1]))
+               Data[[file]]=temp2
+       }
+       Data
+  }
+}
+% Add one or more standard keywords, see file 'KEYWORDS' in the
+% R documentation directory.
+\keyword{ ~kwd1 }
+\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
diff --git a/EBSeq/man/GetMultiPP.Rd b/EBSeq/man/GetMultiPP.Rd

new file mode 100644 (file)

index 0000000..843b362
--- /dev/null
+++ b/EBSeq/man/GetMultiPP.Rd
@@ -0,0 +1,47 @@
+\name{GetMultiPP}
+\alias{GetMultiPP}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{
+Generate the Posterior Probability of each transcript.
+}
+\description{
+Generate the Posterior Probability of being each pattern of each transcript based on the EBMultiTest output.
+}
+\usage{
+GetMultiPP(EBout)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+  \item{EBout}{
+The output of EBMultiTest function.
+}
+
+}
+\details{
+%%  ~~ If necessary, more details than the description above ~~
+}
+\value{
+\item{PP}{The poster probabilities of being each pattern.}
+\item{MAP}{The most likely pattern each gene belongs to}
+\item{Patterns}{The Patterns}
+}
+\references{
+}
+\author{
+Ning Leng
+}
+
+%% ~Make other sections like Warning with \section{Warning }{....} ~
+
+\seealso{
+}
+\examples{
+##---- Should be DIRECTLY executable !! ----
+##-- ==>  Define data, use random,
+##--   or do  help(data=index)  for the standard data sets.
+
+## The function is currently defined as
+}
+% Add one or more standard keywords, see file 'KEYWORDS' in the
+% R documentation directory.
+\keyword{ Posterior Probability }
diff --git a/EBSeq/man/GetNg.Rd b/EBSeq/man/GetNg.Rd

new file mode 100644 (file)

index 0000000..65207a4
--- /dev/null
+++ b/EBSeq/man/GetNg.Rd
@@ -0,0 +1,66 @@
+\name{GetNg}
+\alias{GetNg}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{
+Generate the Ng vector
+}
+\description{
+Generate the Ng vector for the isoforms
+}
+\usage{
+GetNg(IsoformName, GeneName)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+  \item{IsoformName}{
+A vector contains the isoform names
+}
+  \item{GeneName}{
+The gene names of the isoforms in IsoformNames. (Should be in the same order)
+  }
+
+}
+\details{
+%%  ~~ If necessary, more details than the description above ~~
+}
+\value{
+  \item{GeneNg}{
+  The number of isoforms each gene contains
+  }
+  \item{GeneNgTrun}{
+  The truncated Ng of each gene. (The genes contain more than 3 isoforms are with Ng 3.)
+  }
+   \item{IsoformNg}{
+  The Ng of each isoform 
+  }
+    \item{IsoformNgTrun}{
+   The truncated Ng of each isoform. 
+  }
+
+
+}
+\references{
+}
+\author{
+Ning Leng
+}
+
+%% ~Make other sections like Warning with \section{Warning }{....} ~
+
+\seealso{
+}
+\examples{
+IsoGenerate=IsoSimu(DVDconstant=NULL, DVDqt1=.97, DVDqt2=.98, Conditions=as.factor(rep(c(1,2),each=5)), NumofSample=10, NumofIso=c(1000,2000,3000), DEIsoProp=.1, Phiconstant=NULL, Phi.qt1=.25, Phi.qt2=.75, OnlyData=T )
+
+IsoMat=do.call(rbind,IsoGenerate$data)
+IsoNames=rownames(IsoMat)
+
+Ngvector=GetNg(IsoNames, IsosGeneNames)
+
+IsoEBres=EBTest(Data=IsoMat, NgVector=IsoNgTrun, Conditions=as.factor(rep(c(1,2),each=5)),sizeFactors=rep(1,10), maxround=5)
+
+
+}
+% Add one or more standard keywords, see file 'KEYWORDS' in the
+% R documentation directory.
+\keyword{ Ng }
diff --git a/EBSeq/man/GetPP.Rd b/EBSeq/man/GetPP.Rd

new file mode 100644 (file)

index 0000000..602080c
--- /dev/null
+++ b/EBSeq/man/GetPP.Rd
@@ -0,0 +1,45 @@
+\name{GetPP}
+\alias{GetPP}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{
+Generate the Posterior Probability of each transcript.
+}
+\description{
+Generate the Posterior Probability of being DE of each transcript based on the EBTest output.
+}
+\usage{
+GetPP(EBout)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+  \item{EBout}{
+The output of EBTest function.
+}
+
+}
+\details{
+%%  ~~ If necessary, more details than the description above ~~
+}
+\value{
+The poster probabilities of being DE.
+}
+\references{
+}
+\author{
+Ning Leng
+}
+
+%% ~Make other sections like Warning with \section{Warning }{....} ~
+
+\seealso{
+}
+\examples{
+##---- Should be DIRECTLY executable !! ----
+##-- ==>  Define data, use random,
+##--   or do  help(data=index)  for the standard data sets.
+
+## The function is currently defined as
+}
+% Add one or more standard keywords, see file 'KEYWORDS' in the
+% R documentation directory.
+\keyword{ Posterior Probability }
diff --git a/EBSeq/man/GetPatterns.Rd b/EBSeq/man/GetPatterns.Rd

new file mode 100644 (file)

index 0000000..8b08737
--- /dev/null
+++ b/EBSeq/man/GetPatterns.Rd
@@ -0,0 +1,46 @@
+\name{GetPatterns}
+\alias{GetPatterns}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{
+Generate all possible patterns in multiple condtion study
+}
+\description{
+Generate all possible patterns in multiple condtion study
+}
+\usage{
+GetPatterns(Conditions)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+  \item{Conditions}{
+The names of the Conditions in the study
+}
+
+}
+\details{
+%%  ~~ If necessary, more details than the description above ~~
+}
+\value{
+A matrix describe all possible patterns. 
+
+}
+\references{
+}
+\author{
+Ning Leng
+}
+
+%% ~Make other sections like Warning with \section{Warning }{....} ~
+
+\seealso{
+}
+\examples{
+##---- Should be DIRECTLY executable !! ----
+##-- ==>  Define data, use random,
+##--   or do  help(data=index)  for the standard data sets.
+
+## The function is currently defined as
+}
+% Add one or more standard keywords, see file 'KEYWORDS' in the
+% R documentation directory.
+\keyword{ }
diff --git a/EBSeq/man/IsoEBresultGouldBart2.Rd b/EBSeq/man/IsoEBresultGouldBart2.Rd

new file mode 100644 (file)

index 0000000..1cdb1a8
--- /dev/null
+++ b/EBSeq/man/IsoEBresultGouldBart2.Rd
@@ -0,0 +1,275 @@
+\name{IsoEBresultGouldBart2}
+\alias{IsoEBresultGouldBart2}
+\docType{data}
+\title{
+The EBSeq result of the empirical isoform data ( Gould Lab data, bart2 )
+}
+\description{
+%%  ~~ A concise (1-5 lines) description of the dataset. ~~
+}
+\usage{data(IsoEBresultGouldBart2)}
+\format{
+  The format is:
+List of 17
+ $ Alpha   : num [1:5, 1] 0.49 0.674 0.735 0.739 0.739
+  ..- attr(*, "dimnames")=List of 2
+  .. ..$ : chr [1:5] "AlphaIn" "AlphaIn" "AlphaIn" "AlphaIn" ...
+  .. ..$ : NULL
+ $ Beta    : num [1:5, 1:9] 1.03 1.3 1.4 1.41 1.41 ...
+  ..- attr(*, "dimnames")=List of 2
+  .. ..$ : chr [1:5] "BetaIn" "BetaIn" "BetaIn" "BetaIn" ...
+  .. ..$ : NULL
+ $ P       : num [1:5, 1] 0.1751 0.0955 0.073 0.066 0.0642
+  ..- attr(*, "dimnames")=List of 2
+  .. ..$ : chr [1:5] "PIn" "PIn" "PIn" "PIn" ...
+  .. ..$ : NULL
+ $ PFromZ  : num [1:5, 1] 0.1878 0.0937 0.0736 0.0662 0.0634
+  ..- attr(*, "dimnames")=List of 2
+  .. ..$ : chr [1:5] "PFromZ" "PFromZ" "PFromZ" "PFromZ" ...
+  .. ..$ : NULL
+ $ Z       : Named num [1:19249] 0.00494 0.00349 0.00219 0.72998 0.00593 ...
+  ..- attr(*, "names")= chr [1:19249] "ENSRNOT00000020358" "ENSRNOT00000020388" "ENSRNOT00000056429" "ENSRNOT00000037482" ...
+ $ PoissonZ: Named num [1:6019] 0.001395 0.00111 0.00078 0.000551 0.00111 ...
+  ..- attr(*, "names")= chr [1:6019] "ENSRNOT00000029207" "ENSRNOT00000059839" "ENSRNOT00000056154" "ENSRNOT00000059835" ...
+ $ RList   :List of 9
+  ..$ : Named num [1:15315] 19.03 62.06 -3.08 313.15 207.39 ...
+  .. ..- attr(*, "names")= chr [1:15315] "ENSRNOT00000020358" "ENSRNOT00000020388" "ENSRNOT00000029207" "ENSRNOT00000056429" ...
+  ..$ : Named num [1:1103] 3.369 46.691 0.194 6.79 0.541 ...
+  .. ..- attr(*, "names")= chr [1:1103] "ENSRNOT00000059696" "ENSRNOT00000046399" "ENSRNOT00000056154" "ENSRNOT00000020379" ...
+  ..$ : Named num [1:404] 94.298 -733.445 -0.391 1.102 -3.223 ...
+  .. ..- attr(*, "names")= chr [1:404] "ENSRNOT00000044782" "ENSRNOT00000038046" "ENSRNOT00000052405" "ENSRNOT00000048994" ...
+  ..$ : Named num [1:999] 3 7.46 6.32 -2.5 119.32 ...
+  .. ..- attr(*, "names")= chr [1:999] "ENSRNOT00000004311" "ENSRNOT00000064540" "ENSRNOT00000059572" "ENSRNOT00000020652" ...
+  ..$ : Named num [1:592] 11.168 0.167 0.296 0.882 20.272 ...
+  .. ..- attr(*, "names")= chr [1:592] "ENSRNOT00000020305" "ENSRNOT00000031749" "ENSRNOT00000037178" "ENSRNOT00000042240" ...
+  ..$ : Named num [1:863] 2.456 7.899 25.052 0.177 -0.579 ...
+  .. ..- attr(*, "names")= chr [1:863] "ENSRNOT00000020323" "ENSRNOT00000004326" "ENSRNOT00000004290" "ENSRNOT00000067476" ...
+  ..$ : Named num [1:490] 5.64 45.35 -16.06 -31.73 1.76 ...
+  .. ..- attr(*, "names")= chr [1:490] "ENSRNOT00000048345" "ENSRNOT00000066009" "ENSRNOT00000063985" "ENSRNOT00000068068" ...
+  ..$ : Named num [1:3943] 260.79 1.632 0.719 2.843 0.553 ...
+  .. ..- attr(*, "names")= chr [1:3943] "ENSRNOT00000020405" "ENSRNOT00000038103" "ENSRNOT00000067285" "ENSRNOT00000034889" ...
+  ..$ : Named num [1:1559] 7.43 1.85 2.14 60.4 20.51 ...
+  .. ..- attr(*, "names")= chr [1:1559] "ENSRNOT00000004486" "ENSRNOT00000060705" "ENSRNOT00000043951" "ENSRNOT00000020770" ...
+ $ MeanList:List of 9
+  ..$ : Named num [1:15315] 288.018 300.77 0.396 97.251 105.428 ...
+  .. ..- attr(*, "names")= chr [1:15315] "ENSRNOT00000020358" "ENSRNOT00000020388" "ENSRNOT00000029207" "ENSRNOT00000056429" ...
+  ..$ : Named num [1:1103] 1.616 3442.78 5.275 30.388 0.253 ...
+  .. ..- attr(*, "names")= chr [1:1103] "ENSRNOT00000059696" "ENSRNOT00000046399" "ENSRNOT00000056154" "ENSRNOT00000020379" ...
+  ..$ : Named num [1:404] 988.128 128.831 0.105 0.759 0.502 ...
+  .. ..- attr(*, "names")= chr [1:404] "ENSRNOT00000044782" "ENSRNOT00000038046" "ENSRNOT00000052405" "ENSRNOT00000048994" ...
+  ..$ : Named num [1:999] 9.19 706.27 205.21 1.52 3715.53 ...
+  .. ..- attr(*, "names")= chr [1:999] "ENSRNOT00000004311" "ENSRNOT00000064540" "ENSRNOT00000059572" "ENSRNOT00000020652" ...
+  ..$ : Named num [1:592] 67.12 3.05 1.13 3.09 14.03 ...
+  .. ..- attr(*, "names")= chr [1:592] "ENSRNOT00000020305" "ENSRNOT00000031749" "ENSRNOT00000037178" "ENSRNOT00000042240" ...
+  ..$ : Named num [1:863] 36.175 52.99 2224.885 0.732 0.253 ...
+  .. ..- attr(*, "names")= chr [1:863] "ENSRNOT00000020323" "ENSRNOT00000004326" "ENSRNOT00000004290" "ENSRNOT00000067476" ...
+  ..$ : Named num [1:490] 6.71 187.77 2.14 3.97 63.38 ...
+  .. ..- attr(*, "names")= chr [1:490] "ENSRNOT00000048345" "ENSRNOT00000066009" "ENSRNOT00000063985" "ENSRNOT00000068068" ...
+  ..$ : Named num [1:3943] 217.38 3.72 31.38 93.58 7.63 ...
+  .. ..- attr(*, "names")= chr [1:3943] "ENSRNOT00000020405" "ENSRNOT00000038103" "ENSRNOT00000067285" "ENSRNOT00000034889" ...
+  ..$ : Named num [1:1559] 28.6 32.2 39.2 1275.1 750.5 ...
+  .. ..- attr(*, "names")= chr [1:1559] "ENSRNOT00000004486" "ENSRNOT00000060705" "ENSRNOT00000043951" "ENSRNOT00000020770" ...
+ $ VarList :List of 9
+  ..$ : Named num [1:15315] 5729.745 1929.857 0.593 148.349 505.122 ...
+  .. ..- attr(*, "names")= chr [1:15315] "ENSRNOT00000020358" "ENSRNOT00000020388" "ENSRNOT00000029207" "ENSRNOT00000056429" ...
+  ..$ : Named num [1:1103] 4.69 2.81e+05 2.23e+02 2.21e+02 5.13e-01 ...
+  .. ..- attr(*, "names")= chr [1:1103] "ENSRNOT00000059696" "ENSRNOT00000046399" "ENSRNOT00000056154" "ENSRNOT00000020379" ...
+  ..$ : Named num [1:404] 1.16e+04 1.22e+02 8.75e-02 1.98 5.80e-01 ...
+  .. ..- attr(*, "names")= chr [1:404] "ENSRNOT00000044782" "ENSRNOT00000038046" "ENSRNOT00000052405" "ENSRNOT00000048994" ...
+  ..$ : Named num [1:999] 4.99e+01 7.80e+04 1.80e+04 7.05e-01 1.91e+05 ...
+  .. ..- attr(*, "names")= chr [1:999] "ENSRNOT00000004311" "ENSRNOT00000064540" "ENSRNOT00000059572" "ENSRNOT00000020652" ...
+  ..$ : Named num [1:592] 580.6 74.5 7.8 18.5 26.7 ...
+  .. ..- attr(*, "names")= chr [1:592] "ENSRNOT00000020305" "ENSRNOT00000031749" "ENSRNOT00000037178" "ENSRNOT00000042240" ...
+  ..$ : Named num [1:863] 6.10e+02 6.30e+02 2.69e+05 4.29 2.20e-01 ...
+  .. ..- attr(*, "names")= chr [1:863] "ENSRNOT00000020323" "ENSRNOT00000004326" "ENSRNOT00000004290" "ENSRNOT00000067476" ...
+  ..$ : Named num [1:490] 15.48 1816.69 3.33 5.48 2683.89 ...
+  .. ..- attr(*, "names")= chr [1:490] "ENSRNOT00000048345" "ENSRNOT00000066009" "ENSRNOT00000063985" "ENSRNOT00000068068" ...
+  ..$ : Named num [1:3943] 805.4 27.7 2024.4 4101.5 139 ...
+  .. ..- attr(*, "names")= chr [1:3943] "ENSRNOT00000020405" "ENSRNOT00000038103" "ENSRNOT00000067285" "ENSRNOT00000034889" ...
+  ..$ : Named num [1:1559] 761 1398 854 34973 31500 ...
+  .. ..- attr(*, "names")= chr [1:1559] "ENSRNOT00000004486" "ENSRNOT00000060705" "ENSRNOT00000043951" "ENSRNOT00000020770" ...
+ $ QList1  :List of 9
+  ..$ : Named num [1:15315] 0.191 0.153 NaN 0.477 1.171 ...
+  .. ..- attr(*, "names")= chr [1:15315] "ENSRNOT00000020358" "ENSRNOT00000020388" "ENSRNOT00000029207" "ENSRNOT00000056429" ...
+  ..$ : Named num [1:1103] 0.648 0.0134 NaN 0.6744 NaN ...
+  .. ..- attr(*, "names")= chr [1:1103] "ENSRNOT00000059696" "ENSRNOT00000046399" "ENSRNOT00000056154" "ENSRNOT00000020379" ...
+  ..$ : Named num [1:404] 0.0443 0.9225 1.3649 0.592 1.2961 ...
+  .. ..- attr(*, "names")= chr [1:404] "ENSRNOT00000044782" "ENSRNOT00000038046" "ENSRNOT00000052405" "ENSRNOT00000048994" ...
+  ..$ : Named num [1:999] 0.13755 0.00968 0.0695 1.6441 0.05331 ...
+  .. ..- attr(*, "names")= chr [1:999] "ENSRNOT00000004311" "ENSRNOT00000064540" "ENSRNOT00000059572" "ENSRNOT00000020652" ...
+  ..$ : Named num [1:592] 0.0837 0.0518 1.2634 0.1805 0.5577 ...
+  .. ..- attr(*, "names")= chr [1:592] "ENSRNOT00000020305" "ENSRNOT00000031749" "ENSRNOT00000037178" "ENSRNOT00000042240" ...
+  ..$ : Named num [1:863] 0.0655 0.1031 0.0109 0.195 1.776 ...
+  .. ..- attr(*, "names")= chr [1:863] "ENSRNOT00000020323" "ENSRNOT00000004326" "ENSRNOT00000004290" "ENSRNOT00000067476" ...
+  ..$ : Named num [1:490] 0.3176 0.1285 1.6778 2.0836 0.0221 ...
+  .. ..- attr(*, "names")= chr [1:490] "ENSRNOT00000048345" "ENSRNOT00000066009" "ENSRNOT00000063985" "ENSRNOT00000068068" ...
+  ..$ : Named num [1:3943] 19.8858 0.3047 0.563 0.1257 0.0614 ...
+  .. ..- attr(*, "names")= chr [1:3943] "ENSRNOT00000020405" "ENSRNOT00000038103" "ENSRNOT00000067285" "ENSRNOT00000034889" ...
+  ..$ : Named num [1:1559] 0.0552 0.0491 0.0388 0.0374 0.0282 ...
+  .. ..- attr(*, "names")= chr [1:1559] "ENSRNOT00000004486" "ENSRNOT00000060705" "ENSRNOT00000043951" "ENSRNOT00000020770" ...
+ $ QList2  :List of 9
+  ..$ : Named num [1:15315] 0.0388 0.1935 1.1475 1.7041 0.4143 ...
+  .. ..- attr(*, "names")= chr [1:15315] "ENSRNOT00000020358" "ENSRNOT00000020388" "ENSRNOT00000029207" "ENSRNOT00000056429" ...
+  ..$ : Named num [1:1103] 1.3629 0.0134 0.0354 0.1129 0.6811 ...
+  .. ..- attr(*, "names")= chr [1:1103] "ENSRNOT00000059696" "ENSRNOT00000046399" "ENSRNOT00000056154" "ENSRNOT00000020379" ...
+  ..$ : Named num [1:404] 3.02 1.78 NaN NaN 1.15 ...
+  .. ..- attr(*, "names")= chr [1:404] "ENSRNOT00000044782" "ENSRNOT00000038046" "ENSRNOT00000052405" "ENSRNOT00000048994" ...
+  ..$ : Named num [1:999] 0.4159 0.0116 0.0245 8.6195 0.0227 ...
+  .. ..- attr(*, "names")= chr [1:999] "ENSRNOT00000004311" "ENSRNOT00000064540" "ENSRNOT00000059572" "ENSRNOT00000020652" ...
+  ..$ : Named num [1:592] 1.264 NaN 0.187 0.246 0.632 ...
+  .. ..- attr(*, "names")= chr [1:592] "ENSRNOT00000020305" "ENSRNOT00000031749" "ENSRNOT00000037178" "ENSRNOT00000042240" ...
+  ..$ : Named num [1:863] 0.0613 0.1542 0.0115 NaN NaN ...
+  .. ..- attr(*, "names")= chr [1:863] "ENSRNOT00000020323" "ENSRNOT00000004326" "ENSRNOT00000004290" "ENSRNOT00000067476" ...
+  ..$ : Named num [1:490] 0.7444 0.3209 0.6206 0.9042 0.0366 ...
+  .. ..- attr(*, "names")= chr [1:490] "ENSRNOT00000048345" "ENSRNOT00000066009" "ENSRNOT00000063985" "ENSRNOT00000068068" ...
+  ..$ : Named num [1:3943] 0.2969 NaN 0.0178 0.0187 0.1135 ...
+  .. ..- attr(*, "names")= chr [1:3943] "ENSRNOT00000020405" "ENSRNOT00000038103" "ENSRNOT00000067285" "ENSRNOT00000034889" ...
+  ..$ : Named num [1:1559] 0.2911 0.3678 0.0834 0.0558 0.0252 ...
+  .. ..- attr(*, "names")= chr [1:1559] "ENSRNOT00000004486" "ENSRNOT00000060705" "ENSRNOT00000043951" "ENSRNOT00000020770" ...
+ $ C1Mean  :List of 9
+  ..$ : Named num [1:15315] 270.3 299 0 93.3 122.4 ...
+  .. ..- attr(*, "names")= chr [1:15315] "ENSRNOT00000020358" "ENSRNOT00000020388" "ENSRNOT00000029207" "ENSRNOT00000056429" ...
+  ..$ : Named num [1:1103] 2.98 3490.34 0 27.89 0 ...
+  .. ..- attr(*, "names")= chr [1:1103] "ENSRNOT00000059696" "ENSRNOT00000046399" "ENSRNOT00000056154" "ENSRNOT00000020379" ...
+  ..$ : Named num [1:404] 991.231 129.543 0.209 1.518 0.244 ...
+  .. ..- attr(*, "names")= chr [1:404] "ENSRNOT00000044782" "ENSRNOT00000038046" "ENSRNOT00000052405" "ENSRNOT00000048994" ...
+  ..$ : Named num [1:999] 6.25 775.11 114.76 1.7 3505.16 ...
+  .. ..- attr(*, "names")= chr [1:999] "ENSRNOT00000004311" "ENSRNOT00000064540" "ENSRNOT00000059572" "ENSRNOT00000020652" ...
+  ..$ : Named num [1:592] 74.812 6.103 0.262 1.834 14.543 ...
+  .. ..- attr(*, "names")= chr [1:592] "ENSRNOT00000020305" "ENSRNOT00000031749" "ENSRNOT00000037178" "ENSRNOT00000042240" ...
+  ..$ : Named num [1:863] 40.039 40.329 2394.87 1.464 0.506 ...
+  .. ..- attr(*, "names")= chr [1:863] "ENSRNOT00000020323" "ENSRNOT00000004326" "ENSRNOT00000004290" "ENSRNOT00000067476" ...
+  ..$ : Named num [1:490] 6.29 162.86 3.14 2.93 68.98 ...
+  .. ..- attr(*, "names")= chr [1:490] "ENSRNOT00000048345" "ENSRNOT00000066009" "ENSRNOT00000063985" "ENSRNOT00000068068" ...
+  ..$ : Named num [1:3943] 201.06 7.45 13.4 80.08 12.23 ...
+  .. ..- attr(*, "names")= chr [1:3943] "ENSRNOT00000020405" "ENSRNOT00000038103" "ENSRNOT00000067285" "ENSRNOT00000034889" ...
+  ..$ : Named num [1:1559] 5.5 57.4 41.7 1213.3 749.1 ...
+  .. ..- attr(*, "names")= chr [1:1559] "ENSRNOT00000004486" "ENSRNOT00000060705" "ENSRNOT00000043951" "ENSRNOT00000020770" ...
+ $ C2Mean  :List of 9
+  ..$ : Named num [1:15315] 305.699 302.587 0.792 101.229 88.447 ...
+  .. ..- attr(*, "names")= chr [1:15315] "ENSRNOT00000020358" "ENSRNOT00000020388" "ENSRNOT00000029207" "ENSRNOT00000056429" ...
+  ..$ : Named num [1:1103] 0.253 3395.219 10.551 32.889 0.507 ...
+  .. ..- attr(*, "names")= chr [1:1103] "ENSRNOT00000059696" "ENSRNOT00000046399" "ENSRNOT00000056154" "ENSRNOT00000020379" ...
+  ..$ : Named num [1:404] 985.025 128.12 0 0 0.759 ...
+  .. ..- attr(*, "names")= chr [1:404] "ENSRNOT00000044782" "ENSRNOT00000038046" "ENSRNOT00000052405" "ENSRNOT00000048994" ...
+  ..$ : Named num [1:999] 12.13 637.43 295.65 1.34 3925.9 ...
+  .. ..- attr(*, "names")= chr [1:999] "ENSRNOT00000004311" "ENSRNOT00000064540" "ENSRNOT00000059572" "ENSRNOT00000020652" ...
+  ..$ : Named num [1:592] 59.42 0 2 4.35 13.51 ...
+  .. ..- attr(*, "names")= chr [1:592] "ENSRNOT00000020305" "ENSRNOT00000031749" "ENSRNOT00000037178" "ENSRNOT00000042240" ...
+  ..$ : Named num [1:863] 32.3 65.6 2054.9 0 0 ...
+  .. ..- attr(*, "names")= chr [1:863] "ENSRNOT00000020323" "ENSRNOT00000004326" "ENSRNOT00000004290" "ENSRNOT00000067476" ...
+  ..$ : Named num [1:490] 7.13 212.67 1.14 5.01 57.77 ...
+  .. ..- attr(*, "names")= chr [1:490] "ENSRNOT00000048345" "ENSRNOT00000066009" "ENSRNOT00000063985" "ENSRNOT00000068068" ...
+  ..$ : Named num [1:3943] 233.7 0 49.37 107.08 3.04 ...
+  .. ..- attr(*, "names")= chr [1:3943] "ENSRNOT00000020405" "ENSRNOT00000038103" "ENSRNOT00000067285" "ENSRNOT00000034889" ...
+  ..$ : Named num [1:1559] 51.68 7.12 36.79 1336.85 751.85 ...
+  .. ..- attr(*, "names")= chr [1:1559] "ENSRNOT00000004486" "ENSRNOT00000060705" "ENSRNOT00000043951" "ENSRNOT00000020770" ...
+ $ C1EstVar:List of 9
+  ..$ : Named num [1:15315] 1413 1953 0 195 105 ...
+  .. ..- attr(*, "names")= chr [1:15315] "ENSRNOT00000020358" "ENSRNOT00000020388" "ENSRNOT00000029207" "ENSRNOT00000056429" ...
+  ..$ : Named num [1:1103] 4.6 261211.7 0 41.4 0 ...
+  .. ..- attr(*, "names")= chr [1:1103] "ENSRNOT00000059696" "ENSRNOT00000046399" "ENSRNOT00000056154" "ENSRNOT00000020379" ...
+  ..$ : Named num [1:404] 2.24e+04 1.40e+02 1.53e-01 2.56 1.88e-01 ...
+  .. ..- attr(*, "names")= chr [1:404] "ENSRNOT00000044782" "ENSRNOT00000038046" "ENSRNOT00000052405" "ENSRNOT00000048994" ...
+  ..$ : Named num [1:999] 45.4 80103.39 1651.21 1.04 65751.73 ...
+  .. ..- attr(*, "names")= chr [1:999] "ENSRNOT00000004311" "ENSRNOT00000064540" "ENSRNOT00000059572" "ENSRNOT00000020652" ...
+  ..$ : Named num [1:592] 893.893 117.714 0.207 10.162 26.076 ...
+  .. ..- attr(*, "names")= chr [1:592] "ENSRNOT00000020305" "ENSRNOT00000031749" "ENSRNOT00000037178" "ENSRNOT00000042240" ...
+  ..$ : Named num [1:863] 6.11e+02 3.91e+02 2.20e+05 7.51 2.85e-01 ...
+  .. ..- attr(*, "names")= chr [1:863] "ENSRNOT00000020323" "ENSRNOT00000004326" "ENSRNOT00000004290" "ENSRNOT00000067476" ...
+  ..$ : Named num [1:490] 19.81 1267.78 1.87 1.41 3123.1 ...
+  .. ..- attr(*, "names")= chr [1:490] "ENSRNOT00000048345" "ENSRNOT00000066009" "ENSRNOT00000063985" "ENSRNOT00000068068" ...
+  ..$ : Named num [1:3943] 10.1 24.5 23.8 637 199 ...
+  .. ..- attr(*, "names")= chr [1:3943] "ENSRNOT00000020405" "ENSRNOT00000038103" "ENSRNOT00000067285" "ENSRNOT00000034889" ...
+  ..$ : Named num [1:1559] 99.7 1167.7 1072.9 32440 26599 ...
+  .. ..- attr(*, "names")= chr [1:1559] "ENSRNOT00000004486" "ENSRNOT00000060705" "ENSRNOT00000043951" "ENSRNOT00000020770" ...
+ $ C2EstVar:List of 9
+  ..$ : Named num [1:15315] 7882.46 1563.99 0.69 59.4 213.5 ...
+  .. ..- attr(*, "names")= chr [1:15315] "ENSRNOT00000020358" "ENSRNOT00000020388" "ENSRNOT00000029207" "ENSRNOT00000056429" ...
+  ..$ : Named num [1:1103] 1.86e-01 2.53e+05 2.98e+02 2.91e+02 7.44e-01 ...
+  .. ..- attr(*, "names")= chr [1:1103] "ENSRNOT00000059696" "ENSRNOT00000046399" "ENSRNOT00000056154" "ENSRNOT00000020379" ...
+  ..$ : Named num [1:404] 325.872 71.975 0 0 0.659 ...
+  .. ..- attr(*, "names")= chr [1:404] "ENSRNOT00000044782" "ENSRNOT00000038046" "ENSRNOT00000052405" "ENSRNOT00000048994" ...
+  ..$ : Named num [1:999] 2.92e+01 5.51e+04 1.21e+04 1.55e-01 1.73e+05 ...
+  .. ..- attr(*, "names")= chr [1:999] "ENSRNOT00000004311" "ENSRNOT00000064540" "ENSRNOT00000059572" "ENSRNOT00000020652" ...
+  ..$ : Named num [1:592] 47 0 10.7 17.7 21.4 ...
+  .. ..- attr(*, "names")= chr [1:592] "ENSRNOT00000020305" "ENSRNOT00000031749" "ENSRNOT00000037178" "ENSRNOT00000042240" ...
+  ..$ : Named num [1:863] 527 426 179461 0 0 ...
+  .. ..- attr(*, "names")= chr [1:863] "ENSRNOT00000020323" "ENSRNOT00000004326" "ENSRNOT00000004290" "ENSRNOT00000067476" ...
+  ..$ : Named num [1:490] 9.58 662.66 1.84 5.54 1579.26 ...
+  .. ..- attr(*, "names")= chr [1:490] "ENSRNOT00000048345" "ENSRNOT00000066009" "ENSRNOT00000063985" "ENSRNOT00000068068" ...
+  ..$ : Named num [1:3943] 787 0 2780 5712.2 26.8 ...
+  .. ..- attr(*, "names")= chr [1:3943] "ENSRNOT00000020405" "ENSRNOT00000038103" "ENSRNOT00000067285" "ENSRNOT00000034889" ...
+  ..$ : Named num [1:1559] 177.6 19.4 441.2 23947.8 29826.6 ...
+  .. ..- attr(*, "names")= chr [1:1559] "ENSRNOT00000004486" "ENSRNOT00000060705" "ENSRNOT00000043951" "ENSRNOT00000020770" ...
+ $ PoolVar :List of 9
+  ..$ : Named num [1:15315] 4647.958 1758.495 0.345 127.452 159.023 ...
+  .. ..- attr(*, "names")= chr [1:15315] "ENSRNOT00000020358" "ENSRNOT00000020388" "ENSRNOT00000029207" "ENSRNOT00000056429" ...
+  ..$ : Named num [1:1103] 2.39 2.57e+05 1.49e+02 1.66e+02 3.72e-01 ...
+  .. ..- attr(*, "names")= chr [1:1103] "ENSRNOT00000059696" "ENSRNOT00000046399" "ENSRNOT00000056154" "ENSRNOT00000020379" ...
+  ..$ : Named num [1:404] 1.13e+04 1.06e+02 7.66e-02 1.28 4.23e-01 ...
+  .. ..- attr(*, "names")= chr [1:404] "ENSRNOT00000044782" "ENSRNOT00000038046" "ENSRNOT00000052405" "ENSRNOT00000048994" ...
+  ..$ : Named num [1:999] 3.73e+01 6.76e+04 6.87e+03 5.96e-01 1.19e+05 ...
+  .. ..- attr(*, "names")= chr [1:999] "ENSRNOT00000004311" "ENSRNOT00000064540" "ENSRNOT00000059572" "ENSRNOT00000020652" ...
+  ..$ : Named num [1:592] 470.45 58.86 5.44 13.94 23.73 ...
+  .. ..- attr(*, "names")= chr [1:592] "ENSRNOT00000020305" "ENSRNOT00000031749" "ENSRNOT00000037178" "ENSRNOT00000042240" ...
+  ..$ : Named num [1:863] 5.69e+02 4.08e+02 2.00e+05 3.75 1.42e-01 ...
+  .. ..- attr(*, "names")= chr [1:863] "ENSRNOT00000020323" "ENSRNOT00000004326" "ENSRNOT00000004290" "ENSRNOT00000067476" ...
+  ..$ : Named num [1:490] 14.69 965.22 1.86 3.48 2351.18 ...
+  .. ..- attr(*, "names")= chr [1:490] "ENSRNOT00000048345" "ENSRNOT00000066009" "ENSRNOT00000063985" "ENSRNOT00000068068" ...
+  ..$ : Named num [1:3943] 398.6 12.2 1401.9 3174.6 112.9 ...
+  .. ..- attr(*, "names")= chr [1:3943] "ENSRNOT00000020405" "ENSRNOT00000038103" "ENSRNOT00000067285" "ENSRNOT00000034889" ...
+  ..$ : Named num [1:1559] 139 594 757 28194 28213 ...
+  .. ..- attr(*, "names")= chr [1:1559] "ENSRNOT00000004486" "ENSRNOT00000060705" "ENSRNOT00000043951" "ENSRNOT00000020770" ...
+ $ DataList:List of 9
+  ..$ Ng1      : num [1:15315, 1:8] 287 251 0 87 121 181 5 195 70 5 ...
+  .. ..- attr(*, "dimnames")=List of 2
+  .. .. ..$ : chr [1:15315] "I1" "I2" "I3" "I4" ...
+  .. .. ..$ : chr [1:8] "W5-D1" "W5-D2" "W5-D3" "W5-D4" ...
+  ..$ Ng2No5No3: num [1:1103, 1:8] 3 3226 0 27 0 ...
+  .. ..- attr(*, "dimnames")=List of 2
+  .. .. ..$ : chr [1:1103] "I14" "I15" "I16" "I66" ...
+  .. .. ..$ : chr [1:8] "W5-D1" "W5-D2" "W5-D3" "W5-D4" ...
+  ..$ Ng3No5No3: num [1:404, 1:8] 827 153 0 3 1 0 0 0 2 19 ...
+  .. ..- attr(*, "dimnames")=List of 2
+  .. .. ..$ : chr [1:404] "I138" "I190" "I191" "I214" ...
+  .. .. ..$ : chr [1:8] "W5-D1" "W5-D2" "W5-D3" "W5-D4" ...
+  ..$ Ng2With3 : num [1:999, 1:8] 0 945 77 2 3763 ...
+  .. ..- attr(*, "dimnames")=List of 2
+  .. .. ..$ : chr [1:999] "I35" "I52" "I79" "I91" ...
+  .. .. ..$ : chr [1:8] "W5-D1" "W5-D2" "W5-D3" "W5-D4" ...
+  ..$ Ng3With3 : num [1:592, 1:8] 25 25 0 0 17 ...
+  .. ..- attr(*, "dimnames")=List of 2
+  .. .. ..$ : chr [1:592] "I131" "I132" "I222" "I266" ...
+  .. .. ..$ : chr [1:8] "W5-D1" "W5-D2" "W5-D3" "W5-D4" ...
+  ..$ Ng2With5 : num [1:863, 1:8] 36 48 1912 0 1 ...
+  .. ..- attr(*, "dimnames")=List of 2
+  .. .. ..$ : chr [1:863] "I111" "I118" "I135" "I193" ...
+  .. .. ..$ : chr [1:8] "W5-D1" "W5-D2" "W5-D3" "W5-D4" ...
+  ..$ Ng3With5 : num [1:490, 1:8] 3 212 5 2 90 5 256 66 21 23 ...
+  .. ..- attr(*, "dimnames")=List of 2
+  .. .. ..$ : chr [1:490] "I43" "I213" "I336" "I556" ...
+  .. .. ..$ : chr [1:8] "W5-D1" "W5-D2" "W5-D3" "W5-D4" ...
+  ..$ Ng2Both  : num [1:3943, 1:8] 209 11 17 101 0 432 631 0 228 878 ...
+  .. ..- attr(*, "dimnames")=List of 2
+  .. .. ..$ : chr [1:3943] "I13" "I34" "I46" "I47" ...
+  .. .. ..$ : chr [1:8] "W5-D1" "W5-D2" "W5-D3" "W5-D4" ...
+  ..$ Ng3Both  : num [1:1559, 1:8] 0 88 25 1455 506 ...
+  .. ..- attr(*, "dimnames")=List of 2
+  .. .. ..$ : chr [1:1559] "I28" "I29" "I30" "I41" ...
+  .. .. ..$ : chr [1:8] "W5-D1" "W5-D2" "W5-D3" "W5-D4" ...
+}
+\details{
+%%  ~~ If necessary, more details than the __description__ above ~~
+}
+\source{
+%%  ~~ reference to a publication or URL from which the data were obtained ~~
+}
+\references{
+%%  ~~ possibly secondary sources and usages ~~
+}
+\examples{
+data(IsoEBresultGouldBart2)
+## maybe str(IsoEBresultGouldBart2) ; plot(IsoEBresultGouldBart2) ...
+}
+\keyword{datasets}
diff --git a/EBSeq/man/IsoSimu.Rd b/EBSeq/man/IsoSimu.Rd

new file mode 100644 (file)

index 0000000..c8f193d
--- /dev/null
+++ b/EBSeq/man/IsoSimu.Rd
@@ -0,0 +1,101 @@
+\name{IsoSimu}
+\alias{IsoSimu}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{
+Isoform level simulation
+}
+\description{
+Simulate isoform level expression data from a Negative Binomial assumption. (Without outliers)
+}
+\usage{
+IsoSimu(DVDconstant = NULL, DVDqt1 = NULL, DVDqt2 = NULL, Conditions, NumofSample, NumofIso = NULL, DEIsoProp, Phiconstant = NULL, Phi.qt1 = NULL, Phi.qt2 = NULL, NormFactor = NULL, OnlyData = T)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+  \item{DVDconstant}{
+Whether want to use constant fold change value for all the DE genes.
+}
+  \item{DVDqt1, DVDqt2}{
+If DVDconstant is not specified, the user could use a range of empirical DVD's f
+rom Gould' data. The lower and upper bound ( quantile) could be specified.
+The suggested value is c(.96, .97). DVD for each gene will be randomly choosed within the range.
+}
+  \item{Conditions}{
+A vector of charecters to show each sample's condition.
+(Only the two-condition case is supported now)
+}
+  \item{NumofSample}{
+Number of samples the user want to generate.
+}
+  \item{NumofIso}{
+Input should be a vector with length 3. All values should be non-negative.
+The ith value represents how many isoforms the user want to generate for isoform group i. 
+}
+  \item{DEIsoProp}{
+The proportion of isoforms to be generated as DE. The value should be in [0, 1].
+}
+  \item{Phiconstant}{
+Whether set the disperse parameter phi to be a constant. If this parameter is specified, the settings of Phi.qt1 and Phi.qt2 will be ignored.
+Input should be a vector with length 3. The ith value indicates the overdisperse parameter of isoform group i.
+}
+  \item{Phi.qt1, Phi.qt2}{
+If Phiconstant is not specified, the user could use a range of empirical phi's from each group of Gould' data. The lower and upper bound ( quantile) could be specified.
+The suggested value is c(.25, .75). phi for each gene will be randomly choosed w
+ithin the range.
+
+}
+  \item{NormFactor}{
+Wether set the mean of each isoform to be a constant.
+}
+  \item{OnlyData}{
+Wether the user only want the generated data matrix. If OnlyData = T, the function will return the simulated matrix
+and the name of the DE genes.
+Otherwise the funtion will run DESeq, EBSeq, edgeR, baySeq and BBSeq and provide the results of each method.
+Currently only OnlyData=T is supported
+}
+}
+\details{
+For each isoform, we assumed that the expression follows a Negative Binomial distribution with mean mu_gi and variance mu_gi * (1 + mu_gi * phi_gi).
+For DE genes, we assumed that in one condition the genes are with mean mu_gi * DVD.
+mu, phi and DVD could be specified by the parameter settings.
+
+}
+\value{
+\item{data}{
+A list of expression values will be generated. 
+Each list represents a group of isoforms.
+Group1: Ng1
+Group2: Ng2
+Group3: Ng3
+The rows refer to the isoforms and the columns are the samples. 
+The isoforms are named "I_GroupNumber_IsoformNumber". The first part of the isoforms of each group will be the DE ones. (The number depends on the DEIsoProp parameter.)
+}
+\item{TrueDE}{The names of the isoforms who are defined to be DE.}
+
+}
+\references{
+%% ~put references to the literature/web site here ~
+}
+\author{
+Ning Leng
+}
+\note{
+%%  ~~further notes~~
+}
+
+%% ~Make other sections like Warning with \section{Warning }{....} ~
+
+\seealso{
+GeneSimu
+}
+\examples{
+##---- Should be DIRECTLY executable !! ----
+##-- ==>  Define data, use random,
+##--   or do  help(data=index)  for the standard data sets.
+IsoGenerate=IsoSimu(DVDconstant=NULL, DVDqt1=.97, DVDqt2=.98, Conditions=as.factor(rep(c(1,2),each=5)), NumofSample=10, NumofIso=NULL, DEIsoProp=.1, Phiconstant=NULL, Phi.qt1=.25, Phi.qt2=.75, OnlyData=T )
+
+}
+% Add one or more standard keywords, see file 'KEYWORDS' in the
+% R documentation directory.
+\keyword{ ~kwd1 }
+\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
diff --git a/EBSeq/man/IsoSimuAt.Rd b/EBSeq/man/IsoSimuAt.Rd

new file mode 100644 (file)

index 0000000..60fb12e
--- /dev/null
+++ b/EBSeq/man/IsoSimuAt.Rd
@@ -0,0 +1,107 @@
+\name{IsoSimuAt}
+\alias{IsoSimuAt}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{
+Isoform level simulation with outliers
+}
+\description{
+Simulate isoform level expression data from a Negative Binomial assumption. (With outliers)
+}
+\usage{
+IsoSimuAt(DVDconstant = NULL, DVDqt1 = NULL, DVDqt2 = NULL, Conditions, NumofSample, NumofIso = NULL, DEIsoProp, Phiconstant = NULL, Phi.qt1 = NULL, Phi.qt2 = NULL, NormFactor = NULL, OnlyData = T)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+  \item{DVDconstant}{
+Whether want to use constant fold change value for all the DE genes.
+}
+  \item{DVDqt1, DVDqt2}{
+If DVDconstant is not specified, the user could use a range of empirical DVD's f
+rom Gould' data. The lower and upper bound ( quantile) could be specified.
+The suggested value is c(.96, .97). DVD for each gene will be randomly choosed within the range.
+}
+  \item{Conditions}{
+A vector of charecters to show each sample's condition.
+(Only the two-condition case is supported now)
+}
+  \item{NumofSample}{
+Number of samples the user want to generate.
+}
+  \item{NumofIso}{
+Input should be a vector with length 3. All values should be non-negative.
+The ith value represents how many isoforms the user want to generate for isoform group i. 
+}
+  \item{DEIsoProp}{
+The proportion of isoforms to be generated as DE. The value should be in [0, 1].
+Besides, the same proportion of isoforms will be generated as EE isoforms with outlier.
+The genes will be generated as EE at first, then the count of one of the samples
+(randomly selected) will be setted as its original counts multiplied by one of (4, 6, 8, 10).
+
+}
+  \item{Phiconstant}{
+Whether set the disperse parameter phi to be a constant. If this parameter is specified, the settings of Phi.qt1 and Phi.qt2 will be ignored.
+Input should be a vector with length 3. The ith value indicates the overdisperse parameter of isoform group i.
+}
+  \item{Phi.qt1, Phi.qt2}{
+If Phiconstant is not specified, the user could use a range of empirical phi's from each group of Gould' data. The lower and upper bound ( quantile) could be specified.
+The suggested value is c(.25, .75). phi for each gene will be randomly choosed w
+ithin the range.
+
+}
+  \item{NormFactor}{
+Wether set the mean of each isoform to be a constant.
+}
+  \item{OnlyData}{
+Wether the user only want the generated data matrix. If OnlyData = T, the function will return the simulated matrix
+and the name of the DE genes.
+Otherwise the funtion will run DESeq, EBSeq, edgeR, baySeq and BBSeq and provide the results of each method.
+Currently only OnlyData=T is supported
+}
+}
+\details{
+For each isoform, we assumed that the expression follows a Negative Binomial distribution with mean mu_gi and variance mu_gi * (1 + mu_gi * phi_gi).
+For DE genes, we assumed that in one condition the genes are with mean mu_gi * DVD.
+mu, phi and DVD could be specified by the parameter settings.
+
+}
+\value{
+\item{data}{
+A list of expression values will be generated. 
+Each list represents a group of isoforms.
+Group1: Ng1
+Group2: Ng2
+Group3: Ng3
+The rows refer to the isoforms and the columns are the samples. 
+The isoforms are named "I_GroupNumber_IsoformNumber". The first part of the isoforms of each group will be the DE ones. (The number depends on the DEIsoProp parameter.)
+}
+\item{TrueDE}{The names of the isoforms who are defined to be DE.}
+
+\item{Outliers}{The names of the genes who are defined to be outliers at each level of (4,6,8,10).}
+}
+
+\references{
+%% ~put references to the literature/web site here ~
+}
+\author{
+Ning Leng
+}
+\note{
+%%  ~~further notes~~
+}
+
+%% ~Make other sections like Warning with \section{Warning }{....} ~
+
+\seealso{
+GeneSimu, IsoSimu
+}
+\examples{
+##---- Should be DIRECTLY executable !! ----
+##-- ==>  Define data, use random,
+##--   or do  help(data=index)  for the standard data sets.
+IsoGenerate=IsoSimuAt(DVDconstant=NULL, DVDqt1=.97, DVDqt2=.98, Conditions=as.factor(rep(c(1,2),each=5)), NumofSample=10, NumofIso=NULL, DEIsoProp=.1, Phiconstant=NULL, Phi.qt1=.25, Phi.qt2=.75, OnlyData=T )
+
+}
+% Add one or more standard keywords, see file 'KEYWORDS' in the
+% R documentation directory.
+\keyword{ ~kwd1 }
+\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
diff --git a/EBSeq/man/Likefun.Rd b/EBSeq/man/Likefun.Rd

new file mode 100644 (file)

index 0000000..dea5882
--- /dev/null
+++ b/EBSeq/man/Likefun.Rd
@@ -0,0 +1,50 @@
+\name{Likefun}
+\alias{Likefun}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{
+Likelihood Function of the NB-Beta Model
+}
+\description{
+%%  ~~ A concise (1-5 lines) description of what the function does. ~~
+}
+\usage{
+Likefun(ParamPool, InputPool)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+  \item{ParamPool}{
+%%     ~~Describe \code{ParamPool} here~~
+}
+  \item{InputPool}{
+%%     ~~Describe \code{InputPool} here~~
+}
+}
+\details{
+%%  ~~ If necessary, more details than the description above ~~
+}
+\value{
+%%  ~Describe the value returned
+%%  If it is a LIST, use
+%%  \item{comp1 }{Description of 'comp1'}
+%%  \item{comp2 }{Description of 'comp2'}
+%% ...
+}
+\references{
+%% ~put references to the literature/web site here ~
+}
+\author{
+%%  ~~who you are~~
+}
+\note{
+%%  ~~further notes~~
+}
+
+%% ~Make other sections like Warning with \section{Warning }{....} ~
+
+\seealso{
+%% ~~objects to See Also as \code{\link{help}}, ~~~
+}
+\examples{
+}
+% Add one or more standard keywords, see file 'KEYWORDS' in the
+% R documentation directory.
diff --git a/EBSeq/man/LikefunMulti.Rd b/EBSeq/man/LikefunMulti.Rd

new file mode 100644 (file)

index 0000000..713311f
--- /dev/null
+++ b/EBSeq/man/LikefunMulti.Rd
@@ -0,0 +1,50 @@
+\name{LikefunMulti}
+\alias{LikefunMulti}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{
+Likelihood Function of the NB-Beta Model
+}
+\description{
+%%  ~~ A concise (1-5 lines) description of what the function does. ~~
+}
+\usage{
+LikefunMulti(ParamPool, InputPool)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+  \item{ParamPool}{
+%%     ~~Describe \code{ParamPool} here~~
+}
+  \item{InputPool}{
+%%     ~~Describe \code{InputPool} here~~
+}
+}
+\details{
+%%  ~~ If necessary, more details than the description above ~~
+}
+\value{
+%%  ~Describe the value returned
+%%  If it is a LIST, use
+%%  \item{comp1 }{Description of 'comp1'}
+%%  \item{comp2 }{Description of 'comp2'}
+%% ...
+}
+\references{
+%% ~put references to the literature/web site here ~
+}
+\author{
+%%  ~~who you are~~
+}
+\note{
+%%  ~~further notes~~
+}
+
+%% ~Make other sections like Warning with \section{Warning }{....} ~
+
+\seealso{
+%% ~~objects to See Also as \code{\link{help}}, ~~~
+}
+\examples{
+}
+% Add one or more standard keywords, see file 'KEYWORDS' in the
+% R documentation directory.
diff --git a/EBSeq/man/LogN.Rd b/EBSeq/man/LogN.Rd

new file mode 100644 (file)

index 0000000..3b26607
--- /dev/null
+++ b/EBSeq/man/LogN.Rd
@@ -0,0 +1,73 @@
+\name{LogN}
+\alias{LogN}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{
+The function to run EM (one round) using optim.
+}
+\description{
+%%  ~~ A concise (1-5 lines) description of what the function does. ~~
+}
+\usage{
+LogN(Input, InputSP, EmpiricalR, EmpiricalRSP, NumOfEachGroup, AlphaIn, BetaIn, PIn, NoneZeroLength)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+  \item{Input}{
+%%     ~~Describe \code{Input} here~~
+}
+  \item{InputSP}{
+%%     ~~Describe \code{InputSP} here~~
+}
+  \item{EmpiricalR}{
+%%     ~~Describe \code{EmpiricalR} here~~
+}
+  \item{EmpiricalRSP}{
+%%     ~~Describe \code{EmpiricalRSP} here~~
+}
+  \item{NumOfEachGroup}{
+%%     ~~Describe \code{NumOfEachGroup} here~~
+}
+  \item{AlphaIn}{
+%%     ~~Describe \code{AlphaIn} here~~
+}
+  \item{BetaIn}{
+%%     ~~Describe \code{BetaIn} here~~
+}
+  \item{PIn}{
+%%     ~~Describe \code{PIn} here~~
+}
+  \item{NoneZeroLength}{
+%%     ~~Describe \code{NoneZeroLength} here~~
+}
+}
+\details{
+%%  ~~ If necessary, more details than the description above ~~
+}
+\value{
+%%  ~Describe the value returned
+%%  If it is a LIST, use
+%%  \item{comp1 }{Description of 'comp1'}
+%%  \item{comp2 }{Description of 'comp2'}
+%% ...
+}
+\references{
+%% ~put references to the literature/web site here ~
+}
+\author{
+%%  ~~who you are~~
+}
+\note{
+%%  ~~further notes~~
+}
+
+%% ~Make other sections like Warning with \section{Warning }{....} ~
+
+\seealso{
+%% ~~objects to See Also as \code{\link{help}}, ~~~
+}
+\examples{
+}
+% Add one or more standard keywords, see file 'KEYWORDS' in the
+% R documentation directory.
+\keyword{ ~kwd1 }
+\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
diff --git a/EBSeq/man/LogNMulti.Rd b/EBSeq/man/LogNMulti.Rd

new file mode 100644 (file)

index 0000000..627348c
--- /dev/null
+++ b/EBSeq/man/LogNMulti.Rd
@@ -0,0 +1,73 @@
+\name{LogNMulti}
+\alias{LogNMulti}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{
+The function to run EM (one round) using optim.
+}
+\description{
+%%  ~~ A concise (1-5 lines) description of what the function does. ~~
+}
+\usage{
+LogNMulti(Input, InputSP, EmpiricalR, EmpiricalRSP, NumOfEachGroup, AlphaIn, BetaIn, PIn, NoneZeroLength,AllParti, Conditions)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+  \item{Input}{
+%%     ~~Describe \code{Input} here~~
+}
+  \item{InputSP}{
+%%     ~~Describe \code{InputSP} here~~
+}
+  \item{EmpiricalR}{
+%%     ~~Describe \code{EmpiricalR} here~~
+}
+  \item{EmpiricalRSP}{
+%%     ~~Describe \code{EmpiricalRSP} here~~
+}
+  \item{NumOfEachGroup}{
+%%     ~~Describe \code{NumOfEachGroup} here~~
+}
+  \item{AlphaIn}{
+%%     ~~Describe \code{AlphaIn} here~~
+}
+  \item{BetaIn}{
+%%     ~~Describe \code{BetaIn} here~~
+}
+  \item{PIn}{
+%%     ~~Describe \code{PIn} here~~
+}
+  \item{NoneZeroLength}{
+%%     ~~Describe \code{NoneZeroLength} here~~
+}
+}
+\details{
+%%  ~~ If necessary, more details than the description above ~~
+}
+\value{
+%%  ~Describe the value returned
+%%  If it is a LIST, use
+%%  \item{comp1 }{Description of 'comp1'}
+%%  \item{comp2 }{Description of 'comp2'}
+%% ...
+}
+\references{
+%% ~put references to the literature/web site here ~
+}
+\author{
+%%  ~~who you are~~
+}
+\note{
+%%  ~~further notes~~
+}
+
+%% ~Make other sections like Warning with \section{Warning }{....} ~
+
+\seealso{
+%% ~~objects to See Also as \code{\link{help}}, ~~~
+}
+\examples{
+}
+% Add one or more standard keywords, see file 'KEYWORDS' in the
+% R documentation directory.
+\keyword{ ~kwd1 }
+\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
diff --git a/EBSeq/man/MedianNorm.Rd b/EBSeq/man/MedianNorm.Rd

new file mode 100644 (file)

index 0000000..ef8d718
--- /dev/null
+++ b/EBSeq/man/MedianNorm.Rd
@@ -0,0 +1,53 @@
+\name{MedianNorm}
+\alias{MedianNorm}
+\title{
+Median Normalization
+}
+\description{
+The median normalization from Anders et. al.2010
+}
+\usage{
+MedianNorm(Data)
+}
+\arguments{
+
+  \item{Data}{
+The data matrix with transcripts in rows and lanes in columns.
+}
+}
+\details{
+%%  ~~ If necessary, more details than the description above ~~
+}
+\value{
+The function will return a vector contains the normalization factor for each lane.
+% ...
+}
+\references{
+Simon Anders and Wolfgang Huber: Differential expression analysis for sequence count data
+Genome Biology (2010) 11:R106 (open access)
+}
+\author{
+Ning Leng
+}
+\note{
+%%  ~~further notes~~
+}
+
+%% ~Make other sections like Warning with \section{Warning }{....} ~
+
+\seealso{
+%% ~~objects to See Also as \code{\link{help}}, ~~~
+}
+\examples{
+GeneGenerate=GeneSimu(DVDconstant=4, DVDqt1=NULL, DVDqt2=NULL, Conditions=rep(c(1,2),each=5), NumofSample=10, NumofGene=10000, DEGeneProp=.1, Phiconstant=NULL, Phi.qt1=.25, Phi.qt2=.75, Meanconstant=NULL, OnlyData="Y")
+GeneData=GeneGenerate$data
+
+Sizes=MedianNorm(GeneData)
+# Run EBSeq
+EBres=EBTest(Data=GeneData, NgVector=rep(1,10^4), Vect5End=rep(1,10^4), Vect3End=rep(1,10^4), Conditions=as.factor(rep(c(1,2),each=5)), sizeFactors=Sizes,maxround=5)
+
+}
+% Add one or more standard keywords, see file 'KEYWORDS' in the
+% R documentation directory.
+\keyword{ ~kwd1 }
+\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
diff --git a/EBSeq/man/MergeGene.Rd b/EBSeq/man/MergeGene.Rd

new file mode 100644 (file)

index 0000000..77044c3
--- /dev/null
+++ b/EBSeq/man/MergeGene.Rd
@@ -0,0 +1,61 @@
+\name{MergeGene}
+\alias{MergeGene}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{
+Plots of gene simulation result
+}
+\description{
+%%  ~~ A concise (1-5 lines) description of what the function does. ~~
+}
+\usage{
+MergeGene(GeneSIMout, Num, Path = "./")
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+  \item{GeneSIMout}{
+The output of GeneSimu with OnlyData="F".
+}
+  \item{Num}{
+How many times the simulation ran.
+}
+  \item{Path}{
+The path to store the plots
+}
+}
+\details{
+%%  ~~ If necessary, more details than the description above ~~
+}
+\value{
+3 plots will be generated.
+1 FPR vs TPR of each method
+2 FDR vs TPR of each method
+2 Top counts vs FDR of each method
+
+A table will be generated which contains the FDR and TPR of each method.
+(Using p-value=.05 or Posterior Probability=.95).
+
+}
+\references{
+%% ~put references to the literature/web site here ~
+}
+\author{
+Ning Leng
+}
+\note{
+%%  ~~further notes~~
+}
+
+%% ~Make other sections like Warning with \section{Warning }{....} ~
+
+\seealso{
+IsoMerge
+}
+\examples{
+GeneGenerate=GeneSimu(DVDconstant=4, DVDqt1=NULL, DVDqt2=NULL, Conditions=rep(c(1,2),each=5), NumofSample=10, NumofGene=10000, DEGeneProp=.1, Phiconstant=NULL, Phi.qt1=.25, Phi.qt2=.75, Meanconstant=NULL, OnlyData="F")
+
+GeneTable=MergeGene(GeneGenerate,1,"./")
+}
+% Add one or more standard keywords, see file 'KEYWORDS' in the
+% R documentation directory.
+\keyword{ ~kwd1 }
+\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
diff --git a/EBSeq/man/MergeIso.Rd b/EBSeq/man/MergeIso.Rd

new file mode 100644 (file)

index 0000000..4ace949
--- /dev/null
+++ b/EBSeq/man/MergeIso.Rd
@@ -0,0 +1,63 @@
+\name{MergeIso}
+\alias{MergeIso}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{
+Isoforms of gene simulation result
+}
+\description{
+}
+\usage{
+MergeIso(IsoSIMout, Num, Path = "./")
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+  \item{IsoSIMout}{
+The output of IsoSimu with OnlyData="F".
+}
+  \item{Num}{
+How many times the simulation ran.
+
+}
+  \item{Path}{
+         The path to store the plots.
+}
+}
+\details{
+%%  ~~ If necessary, more details than the description above ~~
+}
+\value{
+3 plots will be generated.
+1 FPR vs TPR of each method
+2 FDR vs TPR of each method
+2 Top counts vs FDR of each method
+
+A table will be generated which contains the FDR and TPR of each method.
+Each method will be ran on all the data and within group.
+(Using p-value=.05 or Posterior Probability=.95).
+
+
+}
+\references{
+%% ~put references to the literature/web site here ~
+}
+\author{
+Ning Leng
+}
+\note{
+%%  ~~further notes~~
+}
+
+%% ~Make other sections like Warning with \section{Warning }{....} ~
+
+\seealso{
+MergeGene
+}
+\examples{
+IsoGenerate=IsoSimu(DVDconstant=NULL, DVDqt1=.97, DVDqt2=.98, Conditions=as.factor(rep(c(1,2),each=5)), NumofSample=10, NumofIso=NULL, DEIsoProp=.1, Phiconstant=NULL, Phi.qt1=.25, Phi.qt2=.75, OnlyData="F" )
+
+IsoTable=MergeIso(IsoGenerate,1,"./")
+}
+% Add one or more standard keywords, see file 'KEYWORDS' in the
+% R documentation directory.
+\keyword{ ~kwd1 }
+\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
diff --git a/EBSeq/man/PlotFDTP.Rd b/EBSeq/man/PlotFDTP.Rd

new file mode 100644 (file)

index 0000000..e3fff39
--- /dev/null
+++ b/EBSeq/man/PlotFDTP.Rd
@@ -0,0 +1,71 @@
+\name{PlotFDTP}
+\alias{PlotFDTP}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{
+Plot the FDR vs TPR for each method in simulation data
+}
+\description{
+%%  ~~ A concise (1-5 lines) description of what the function does. ~~
+}
+\usage{
+PlotFDTP(TopNum, FDR, TPR, names)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+  \item{TopNum}{
+%%     ~~Describe \code{TopNum} here~~
+}
+  \item{FDR}{
+%%     ~~Describe \code{FDR} here~~
+}
+  \item{TPR}{
+%%     ~~Describe \code{TPR} here~~
+}
+  \item{names}{
+%%     ~~Describe \code{names} here~~
+}
+}
+\details{
+%%  ~~ If necessary, more details than the description above ~~
+}
+\value{
+%%  ~Describe the value returned
+%%  If it is a LIST, use
+%%  \item{comp1 }{Description of 'comp1'}
+%%  \item{comp2 }{Description of 'comp2'}
+%% ...
+}
+\references{
+%% ~put references to the literature/web site here ~
+}
+\author{
+%%  ~~who you are~~
+}
+\note{
+%%  ~~further notes~~
+}
+
+%% ~Make other sections like Warning with \section{Warning }{....} ~
+
+\seealso{
+%% ~~objects to See Also as \code{\link{help}}, ~~~
+}
+\examples{
+##---- Should be DIRECTLY executable !! ----
+##-- ==>  Define data, use random,
+##--   or do  help(data=index)  for the standard data sets.
+
+## The function is currently defined as
+function(TopNum, FDR, TPR,names)
+{
+  
+  matplot(FDR, TPR, xlim=c(0,.5), ylim=c(0,1) ,type="l",lwd=2,xlab="FDR", ylab="TPR")
+    legend("bottomright",col=1:TopNum, lwd=2, lty=1:TopNum, names)
+
+
+  }
+}
+% Add one or more standard keywords, see file 'KEYWORDS' in the
+% R documentation directory.
+\keyword{ ~kwd1 }
+\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
diff --git a/EBSeq/man/PlotFPTP.Rd b/EBSeq/man/PlotFPTP.Rd

new file mode 100644 (file)

index 0000000..8e10aba
--- /dev/null
+++ b/EBSeq/man/PlotFPTP.Rd
@@ -0,0 +1,71 @@
+\name{PlotFPTP}
+\alias{PlotFPTP}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{
+Plot the FPR vs TPR for each method in simulation data
+}
+\description{
+%%  ~~ A concise (1-5 lines) description of what the function does. ~~
+}
+\usage{
+PlotFPTP(TopNum, FPR, TPR, names)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+  \item{TopNum}{
+%%     ~~Describe \code{TopNum} here~~
+}
+  \item{FPR}{
+%%     ~~Describe \code{FPR} here~~
+}
+  \item{TPR}{
+%%     ~~Describe \code{TPR} here~~
+}
+  \item{names}{
+%%     ~~Describe \code{names} here~~
+}
+}
+\details{
+%%  ~~ If necessary, more details than the description above ~~
+}
+\value{
+%%  ~Describe the value returned
+%%  If it is a LIST, use
+%%  \item{comp1 }{Description of 'comp1'}
+%%  \item{comp2 }{Description of 'comp2'}
+%% ...
+}
+\references{
+%% ~put references to the literature/web site here ~
+}
+\author{
+%%  ~~who you are~~
+}
+\note{
+%%  ~~further notes~~
+}
+
+%% ~Make other sections like Warning with \section{Warning }{....} ~
+
+\seealso{
+%% ~~objects to See Also as \code{\link{help}}, ~~~
+}
+\examples{
+##---- Should be DIRECTLY executable !! ----
+##-- ==>  Define data, use random,
+##--   or do  help(data=index)  for the standard data sets.
+
+## The function is currently defined as
+function(TopNum, FPR, TPR,names)
+{
+        
+         matplot(FPR, TPR,xlim=c(0,.1), ylim=c(0,1) ,type="l",lwd=2, xlab="FPR", ylab="TPR")
+             legend("bottomright",col=1:TopNum,lwd=2, lty=1:TopNum, names)
+
+
+  }
+}
+% Add one or more standard keywords, see file 'KEYWORDS' in the
+% R documentation directory.
+\keyword{ ~kwd1 }
+\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
diff --git a/EBSeq/man/PlotPattern.Rd b/EBSeq/man/PlotPattern.Rd

new file mode 100644 (file)

index 0000000..9b13845
--- /dev/null
+++ b/EBSeq/man/PlotPattern.Rd
@@ -0,0 +1,43 @@
+\name{PlotPattern}
+\alias{PlotPattern}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{
+Visualize the patterns
+}
+\description{
+visualize the patterns
+}
+\usage{
+PlotPattern(PosParti)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+  \item{PosParti}{
+The output of GetPatterns function.
+}
+
+}
+\details{
+%%  ~~ If necessary, more details than the description above ~~
+}
+\value{
+A heatmap to visualize the patterns of interest.
+}
+\references{
+}
+\author{
+Ning Leng
+}
+
+%% ~Make other sections like Warning with \section{Warning }{....} ~
+
+\seealso{
+}
+\examples{
+Conditions=c("C1","C1","C2","C2","C3","C3")
+PosParti=GetPatterns(Conditions)
+PlotPattern(PosParti)
+}
+% Add one or more standard keywords, see file 'KEYWORDS' in the
+% R documentation directory.
+\keyword{ }
diff --git a/EBSeq/man/PlotTopCts.Rd b/EBSeq/man/PlotTopCts.Rd

new file mode 100644 (file)

index 0000000..4c3d187
--- /dev/null
+++ b/EBSeq/man/PlotTopCts.Rd
@@ -0,0 +1,66 @@
+\name{PlotTopCts}
+\alias{PlotTopCts}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{
+Plot 3 plots for simulation data
+}
+\description{
+%%  ~~ A concise (1-5 lines) description of what the function does. ~~
+}
+\usage{
+PlotTopCts(TopNum, FD, names)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+  \item{TopNum}{
+%%     ~~Describe \code{TopNum} here~~
+}
+  \item{FD}{
+%%     ~~Describe \code{FD} here~~
+}
+  \item{names}{
+%%     ~~Describe \code{names} here~~
+}
+}
+\details{
+%%  ~~ If necessary, more details than the description above ~~
+}
+\value{
+%%  ~Describe the value returned
+%%  If it is a LIST, use
+%%  \item{comp1 }{Description of 'comp1'}
+%%  \item{comp2 }{Description of 'comp2'}
+%% ...
+}
+\references{
+%% ~put references to the literature/web site here ~
+}
+\author{
+%%  ~~who you are~~
+}
+\note{
+%%  ~~further notes~~
+}
+
+%% ~Make other sections like Warning with \section{Warning }{....} ~
+
+\seealso{
+%% ~~objects to See Also as \code{\link{help}}, ~~~
+}
+\examples{
+##---- Should be DIRECTLY executable !! ----
+##-- ==>  Define data, use random,
+##--   or do  help(data=index)  for the standard data sets.
+
+## The function is currently defined as
+function(TopNum, FD, names)
+{
+    matplot(c(1:TopNum) , FD,type="l",xlab="Top DE selected", lwd=2, log="y", ylab="FD")
+    legend("topleft",col=1:TopNum, lwd=2, lty=1:TopNum, names)
+
+  }
+}
+% Add one or more standard keywords, see file 'KEYWORDS' in the
+% R documentation directory.
+\keyword{ ~kwd1 }
+\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
diff --git a/EBSeq/man/PolyFitPlot.Rd b/EBSeq/man/PolyFitPlot.Rd

new file mode 100644 (file)

index 0000000..491f3ee
--- /dev/null
+++ b/EBSeq/man/PolyFitPlot.Rd
@@ -0,0 +1,84 @@
+\name{PolyFitPlot}
+\alias{PolyFitPlot}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{
+%%  ~~function to do ... ~~
+Fit the mean-var relationship using polynomial regression
+}
+\description{
+%%  ~~ A concise (1-5 lines) description of what the function does. ~~
+}
+\usage{
+PolyFitPlot(X, Y, nterms, xname = "Estimated Mean", yname = "Estimated Var", pdfname = "", xlim =  c(-1,5), ylim = c(-1,7), ChangeXY = F, col = "red")
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+  \item{X}{
+The first group of values want to be fitted by the polynomial regression. ( e.g Mean of the data. )
+}
+  \item{Y}{
+The second group of values want to be fitted by the polynomial regression. ( e.g. variance of the data.) The length of Y should be the same as the length of X.
+}
+  \item{nterms}{
+How many polynomial terms want to be used.
+}
+  \item{xname}{
+Name of the x axis.
+}
+  \item{yname}{
+Name of the y axis.
+}
+  \item{pdfname}{
+Name of the plot.
+}
+  \item{xlim}{
+The x limits of the plot. 
+}
+  \item{ylim}{
+The y limits of the plot.
+
+}
+  \item{ChangeXY}{
+If ChangeXY is setted to be TRUE, X will be treated as the dependent variable and Y will be treated as the independent one. Default is FALSE.
+}
+  \item{col}{
+Color of the fitted line.
+}
+}
+\details{
+%%  ~~ If necessary, more details than the description above ~~
+}
+\value{
+       The PolyFitPlot function provides a smooth scatter plot of two variables and their best fitting line of polynomial regression.
+%%  ~Describe the value returned
+%%  If it is a LIST, use
+%%  \item{comp1 }{Description of 'comp1'}
+%%  \item{comp2 }{Description of 'comp2'}
+%% ...
+}
+\references{
+%% ~put references to the literature/web site here ~
+}
+\author{
+Ning Leng
+}
+\note{
+%%  ~~further notes~~
+}
+
+%% ~Make other sections like Warning with \section{Warning }{....} ~
+
+\seealso{
+%% ~~objects to See Also as \code{\link{help}}, ~~~
+}
+\examples{
+GeneData=GeneSimu(DVDconstant=4, DVDqt1=NULL, DVDqt2=NULL, Conditions=rep(c(1,2),each=5), NumofSample=10, NumofGene=10000, DEGeneProp=.1, Phiconstant=NULL, Phi.qt1=.25, Phi.qt2=.75, Meanconstant=NULL, OnlyData="Y")
+
+EBres=NBBetaEB.bias.uniqueP_PoolVarSpeedUp_MDFPoi(Data=GeneData,NgVector=rep(1,10^4), Vect5End=rep(1,10^4), Vect3End=rep(1,10^4), Conditions=as.factor(rep(c(1,2),each=5)), maxround=5)
+
+poly=PolyFitPlot(X=EBres$MeanList[[1]], Y=EBres$PoolVar[[1]], nterms=5, xname = "mean", yname = "var", pdfname=NULL, xlim = c(0,4.5),ylim = c(-2,8), ChangeXY = F, col = "red")
+}
+% Add one or more standard keywords, see file 'KEYWORDS' in the
+% R documentation directory.
+\keyword{ ~kwd1 }
+\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
diff --git a/EBSeq/man/PoolMatrix.Rd b/EBSeq/man/PoolMatrix.Rd

new file mode 100644 (file)

index 0000000..b2c46d5
--- /dev/null
+++ b/EBSeq/man/PoolMatrix.Rd
@@ -0,0 +1,82 @@
+\name{PoolMatrix}
+\alias{PoolMatrix}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{
+Generate the expression matrix from the output of GetData
+}
+\description{
+%%  ~~ A concise (1-5 lines) description of what the function does. ~~
+}
+\usage{
+PoolMatrix(Data, reads, type)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+  \item{Data}{
+The output from GetData function.
+}
+  \item{reads}{
+The total number of reads in each lane. Could be obtained from the RSEM outputs. 
+}
+  \item{type}{
+If type="S", the outputs will be the a matrix which transcript names in row and sample names in column.
+If type="G", the first column will be the group information. 
+
+}
+}
+\details{
+%%  ~~ If necessary, more details than the description above ~~
+}
+\value{
+\item{PoolM}{The matrix of nu values}
+\item{PoolValue}{The matrix of expression values}
+}
+\references{
+%% ~put references to the literature/web site here ~
+}
+\author{
+Ning Leng
+}
+\note{
+%%  ~~further notes~~
+}
+
+%% ~Make other sections like Warning with \section{Warning }{....} ~
+
+\seealso{
+GetData
+}
+\examples{
+##---- Should be DIRECTLY executable !! ----
+##-- ==>  Define data, use random,
+##--   or do  help(data=index)  for the standard data sets.
+
+## The function is currently defined as
+function(Data,reads,type)
+{
+poolnames=names(Data)
+poolM=NULL
+for (po in 1:8)
+       poolM=cbind(poolM,Data[[po]][,1])
+rownames(poolM)=rownames(Data[[1]])
+colnames(poolM)=poolnames
+
+#poolValue=poolM*reads
+poolValue=poolM
+for (col in 1:8)
+       poolValue[,col]=poolM[,col]*reads[col]
+poolValue=round(poolValue)
+if (type=="G")
+       {
+               poolM=cbind(Data[[1]][,2],poolM)
+               poolValue=cbind(Data[[1]][,2],poolValue)
+               colnames(poolM)=c("Groups",poolnames)
+               colnames(poolValue)=c("Groups",poolnames)
+       }
+poolOutput=list(poolM=poolM,poolValue=poolValue)
+  }
+}
+% Add one or more standard keywords, see file 'KEYWORDS' in the
+% R documentation directory.
+\keyword{ ~kwd1 }
+\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
diff --git a/EBSeq/man/PostFC.Rd b/EBSeq/man/PostFC.Rd

new file mode 100644 (file)

index 0000000..19118aa
--- /dev/null
+++ b/EBSeq/man/PostFC.Rd
@@ -0,0 +1,47 @@
+\name{PostFC}
+\alias{PostFC}
+\title{
+Calculate the posterior fold change for each transcript across conditions
+}
+\description{
+}
+\usage{
+PostFC(EBoutput)
+}
+\arguments{
+
+  \item{EBoutput}{
+The ourput from function EBTest. (Currently only at gene level)
+}
+}
+\details{
+%%  ~~ If necessary, more details than the description above ~~
+}
+\value{
+%%  ~Describe the value returned
+%%  If it is a LIST, use
+%%  \item{comp1 }{Description of 'comp1'}
+%%  \item{comp2 }{Description of 'comp2'}
+%% ...
+}
+\references{
+%% ~put references to the literature/web site here ~
+}
+\author{
+%%  ~~who you are~~
+}
+\note{
+%%  ~~further notes~~
+}
+
+%% ~Make other sections like Warning with \section{Warning }{....} ~
+
+\seealso{
+%% ~~objects to See Also as \code{\link{help}}, ~~~
+}
+\examples{
+}
+% Add one or more standard keywords, see file 'KEYWORDS' in the
+% R documentation directory.
+\keyword{ ~kwd1 }
+\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
diff --git a/EBSeq/man/QQP.Rd b/EBSeq/man/QQP.Rd

new file mode 100644 (file)

index 0000000..6fdde4e
--- /dev/null
+++ b/EBSeq/man/QQP.Rd
@@ -0,0 +1,81 @@
+\name{QQP}
+\alias{QQP}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{
+The QQ Plot of empirical q's and simulated q's from fitted beta distribution
+}
+\description{
+
+}
+\usage{
+QQP(QList, AlphaResult, BetaResult, name, AList="F", GroupName)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+  \item{QList}{
+The estimated q's from the output of NBBetaEB.bias.uniqueP_PoolVarSpeedUp_MDFPoi. Input could be a vector or a list of different groups of transcripts. The number of lists here should be the same as the length of BetaResult.
+
+}
+  \item{AlphaResult}{
+The fitted parameter alpha from the output of NBBetaEB.bias.uniqueP_PoolVarSpeedUp_MDFPoi. Input should be a number if AList is not defined.
+}
+  \item{BetaResult}{
+The fitted parameter beta from the output of NBBetaEB.bias.uniqueP_PoolVarSpeedUp_MDFPoi. Input could be one single number or a vector of several numbers. The length of the input should be the same as the number of lists of QList.
+}
+  \item{name}{
+The name of the plots
+}
+  \item{AList}{
+Whether a list of alpha's are used
+}
+  \item{GroupName}{
+The names of each sub plot. The l
+ength of the input should be the same as the number of lists of QList.
+}
+}
+\details{
+%%  ~~ If necessary, more details than the description above ~~
+}
+\value{
+%%  ~Describe the value returned
+%%  If it is a LIST, use
+%%  \item{comp1 }{Description of 'comp1'}
+%%  \item{comp2 }{Description of 'comp2'}
+%% ...
+}
+\references{
+%% ~put references to the literature/web site here ~
+}
+\author{
+Ning Leng
+}
+\note{
+%%  ~~further notes~~
+}
+
+%% ~Make other sections like Warning with \section{Warning }{....} ~
+
+\seealso{
+ NBBetaEB.bias.uniqueP_PoolVarSpeedUp_MDFPoi , DenNHist
+}
+\examples{
+GeneData=GeneSimu(DVDconstant=4, DVDqt1=NULL, DVDqt2=NULL, Conditions=rep(c(1,2),each=5), NumofSample=10, NumofGene=10000, DEGeneProp=.1, Phiconstant=NULL, Phi.qt1=.25, Phi.qt2=.75, Meanconstant=NULL, OnlyData="Y")
+
+EBres=NBBetaEB.bias.uniqueP_PoolVarSpeedUp_MDFPoi(Data=GeneData, NgVector=rep(1,10^4), Vect5End=rep(1,10^4), Vect3End=rep(1,10^4), Conditions=as.factor(rep(c(1,2),each=5)), maxround=5)
+
+QQP(QList=EBres$QList1, AlphaResult=EBres[[1]][5,1], BetaResult=EBres[[2]][5,1], name="Gene", AList="F", GroupName=NULL)
+
+## The function is currently defined as
+function(QList,AlphaResult,BetaResult,name,AList="F",GroupName){
+                   for (i in 1:length(BetaResult)){
+                               tmpSize=length(QList[[i]][QList[[i]]<1])
+                       if (AList=="F") rdpts=rbeta(tmpSize,AlphaResult,BetaResult[i])
+                               else rdpts=rbeta(tmpSize,AlphaResult[i],BetaResult[i])
+       qqplot(QList[[i]][QList[[i]]<1], rdpts, xlab="estimated q's", "simulated q's from fitted beta distribution",main=paste(names(name,GroupName[i],sep=" "),xlim=c(0,1),ylim=c(0,1))
+                       }
+  }
+}
+% Add one or more standard keywords, see file 'KEYWORDS' in the
+% R documentation directory.
+\keyword{ ~kwd1 }
+\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
diff --git a/EBSeq/man/QuantileNorm.Rd b/EBSeq/man/QuantileNorm.Rd

new file mode 100644 (file)

index 0000000..5d9ec77
--- /dev/null
+++ b/EBSeq/man/QuantileNorm.Rd
@@ -0,0 +1,54 @@
+\name{QuantileNorm}
+\alias{QuantileNorm}
+\title{
+Quantile Normalization
+}
+\description{
+The Quantile normalization
+}
+\usage{
+QuantileNorm(Data, Quantile)
+}
+\arguments{
+
+  \item{Data}{
+The data matrix with transcripts in rows and lanes in columns.
+}
+\item{Quantile}{
+The quantile the user wishs to use. Should be a number between 0 and 1.        
+}
+}
+\details{
+%%  ~~ If necessary, more details than the description above ~~
+Use a quantile point to normalize the data.
+}
+\value{
+The function will return a vector contains the normalization factor for each lane.
+% ...
+}
+\references{}
+\author{
+Ning Leng
+}
+\note{
+%%  ~~further notes~~
+}
+
+%% ~Make other sections like Warning with \section{Warning }{....} ~
+
+\seealso{
+%% ~~objects to See Also as \code{\link{help}}, ~~~
+}
+\examples{
+GeneGenerate=GeneSimu(DVDconstant=4, DVDqt1=NULL, DVDqt2=NULL, Conditions=rep(c(1,2),each=5), NumofSample=10, NumofGene=10000, DEGeneProp=.1, Phiconstant=NULL, Phi.qt1=.25, Phi.qt2=.75, Meanconstant=NULL, OnlyData="Y")
+GeneData=GeneGenerate$data
+
+Sizes=QuantileNorm(GeneData)
+# Run EBSeq
+EBres=EBTest(Data=GeneData, NgVector=rep(1,10^4), Vect5End=rep(1,10^4), Vect3End=rep(1,10^4), Conditions=as.factor(rep(c(1,2),each=5)), sizeFactors=Sizes,maxround=5)
+
+}
+% Add one or more standard keywords, see file 'KEYWORDS' in the
+% R documentation directory.
+\keyword{ ~kwd1 }
+\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
diff --git a/EBSeq/man/RankNorm.Rd b/EBSeq/man/RankNorm.Rd

new file mode 100644 (file)

index 0000000..5fd223e
--- /dev/null
+++ b/EBSeq/man/RankNorm.Rd
@@ -0,0 +1,50 @@
+\name{RankNorm}
+\alias{RankNorm}
+\title{
+Rank Normalization
+}
+\description{
+}
+\usage{
+RankNorm(Data)
+}
+\arguments{
+
+  \item{Data}{
+The data matrix with transcripts in rows and lanes in columns.
+}
+}
+\details{
+%%  ~~ If necessary, more details than the description above ~~
+}
+\value{
+The function will return a matrix contains the normalization factor for each lane and each transcript.
+% ...
+}
+\references{
+}
+\author{
+Ning Leng
+}
+\note{
+%%  ~~further notes~~
+}
+
+%% ~Make other sections like Warning with \section{Warning }{....} ~
+
+\seealso{
+%% ~~objects to See Also as \code{\link{help}}, ~~~
+}
+\examples{
+GeneGenerate=GeneSimu(DVDconstant=4, DVDqt1=NULL, DVDqt2=NULL, Conditions=rep(c(1,2),each=5), NumofSample=10, NumofGene=10000, DEGeneProp=.1, Phiconstant=NULL, Phi.qt1=.25, Phi.qt2=.75, Meanconstant=NULL, OnlyData="Y")
+GeneData=GeneGenerate$data
+
+Sizes=RankNorm(GeneData)
+# Run EBSeq
+EBres=EBTest(Data=GeneData, NgVector=rep(1,10^4), Vect5End=rep(1,10^4), Vect3End=rep(1,10^4), Conditions=as.factor(rep(c(1,2),each=5)), sizeFactors=Sizes,maxround=5)
+
+}
+% Add one or more standard keywords, see file 'KEYWORDS' in the
+% R documentation directory.
+\keyword{ ~kwd1 }
+\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
diff --git a/EBSeq/man/TPFDRplot.Rd b/EBSeq/man/TPFDRplot.Rd

new file mode 100644 (file)

index 0000000..15e1caa
--- /dev/null
+++ b/EBSeq/man/TPFDRplot.Rd
@@ -0,0 +1,103 @@
+\name{TPFDRplot}
+\alias{TPFDRplot}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{
+Plot the number of top counts vs FDR for each method in simulation data
+}
+\description{
+%%  ~~ A concise (1-5 lines) description of what the function does. ~~
+}
+\usage{
+TPFDRplot(DESeqP, EBZ, TrueDE, main, FDR = NULL)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+  \item{DESeqP}{
+%%     ~~Describe \code{DESeqP} here~~
+}
+  \item{EBZ}{
+%%     ~~Describe \code{EBZ} here~~
+}
+  \item{TrueDE}{
+%%     ~~Describe \code{TrueDE} here~~
+}
+  \item{main}{
+%%     ~~Describe \code{main} here~~
+}
+  \item{FDR}{
+%%     ~~Describe \code{FDR} here~~
+}
+}
+\details{
+%%  ~~ If necessary, more details than the description above ~~
+}
+\value{
+%%  ~Describe the value returned
+%%  If it is a LIST, use
+%%  \item{comp1 }{Description of 'comp1'}
+%%  \item{comp2 }{Description of 'comp2'}
+%% ...
+}
+\references{
+%% ~put references to the literature/web site here ~
+}
+\author{
+%%  ~~who you are~~
+}
+\note{
+%%  ~~further notes~~
+}
+
+%% ~Make other sections like Warning with \section{Warning }{....} ~
+
+\seealso{
+%% ~~objects to See Also as \code{\link{help}}, ~~~
+}
+\examples{
+##---- Should be DIRECTLY executable !! ----
+##-- ==>  Define data, use random,
+##--   or do  help(data=index)  for the standard data sets.
+
+## The function is currently defined as
+function(DESeqP, EBZ, TrueDE, main, FDR=NULL){
+       Seq=seq(0.001,0.5,by=0.001)
+       DETPR=rep(0,length(Seq))
+       EBTPR=rep(0,length(Seq))
+       DEFDR=rep(0,length(Seq))
+       EBFDR=rep(0,length(Seq))
+       DETPNum=rep(0,length(Seq))
+    EBTPNum=rep(0,length(Seq))
+    DEFDNum=rep(0,length(Seq))
+    EBFDNum=rep(0,length(Seq))
+       for (i in 1:length(Seq)){
+               DESeqOnes=names(DESeqP)[DESeqP<=Seq[i]]
+               if (length(FDR)==0) EBOnes=names(EBZ)[EBZ>=crit.fun(1-EBZ, Seq[i])]
+               else if (FDR=="H") EBOnes=names(EBZ)[EBZ>=(1-Seq[i])]
+                       else EBOnes=names(EBZ)[EBZ>=FDR[i]]
+
+               DETPNum[i]=sum(DESeqOnes\%in\%TrueDE)
+               EBTPNum[i]=sum(EBOnes\%in\%TrueDE)
+               DEFDNum[i]=sum(!DESeqOnes\%in\%TrueDE)
+               EBFDNum[i]=sum(!EBOnes\%in\%TrueDE)
+               
+               DETPR[i]=DETPNum[i]/length(TrueDE)
+               EBTPR[i]=EBTPNum[i]/length(TrueDE)
+               DEFDR[i]=DEFDNum[i]/length(TrueDE)
+               EBFDR[i]=EBFDNum[i]/length(TrueDE)
+       }
+       plot(Seq,DETPR,ylim=c(0,1),xlim=c(0,.5),type="l",col="red", main=paste(main, "TPR"),xlab="controled FDR level", ylab="TPR",lwd=2)
+       lines(Seq,EBTPR,col="blue",lwd=2)
+       legend("bottomright",lwd=2, col=c("red","blue"), c("DESeq","EBSeq"))
+
+       plot(Seq,DEFDR,ylim=c(0,1),xlim=c(0,.5),type="l",col="red", main=paste(main, "FDR"),xlab="controled FDR level", ylab="TPR",lwd=2)
+       lines(Seq,EBFDR,col="blue",lwd=2)
+       legend("topleft", lwd=2, col=c("red","blue"), c("DESeq","EBSeq"))
+
+
+       output=cbind( DETPR,EBTPR, DEFDR,EBFDR,DETPNum,EBTPNum,DEFDNum,EBFDNum)
+  }
+}
+% Add one or more standard keywords, see file 'KEYWORDS' in the
+% R documentation directory.
+\keyword{ ~kwd1 }
+\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
diff --git a/EBSeq/man/TopCts.Rd b/EBSeq/man/TopCts.Rd

new file mode 100644 (file)

index 0000000..bf57d8d
--- /dev/null
+++ b/EBSeq/man/TopCts.Rd
@@ -0,0 +1,85 @@
+\name{TopCts}
+\alias{TopCts}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{
+Get FDR of Top Counts 
+}
+\description{
+
+}
+\usage{
+TopCts(pvalue, PP = NULL, TrueNames, TopNum)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+  \item{pvalue}{
+A matrix contains the p values (Posterior probabilities) for each transcript and each method.
+Rows are for different methods and columns are for different transcripts.
+}
+  \item{PP}{
+The length of PP vector should be the same as the number of columns in pvalue matrix.
+The value in PP either 0 or 1.
+If the ith value of PP is 0, it means the ith method (the ith row of pvalue) provided p-values.
+If the ith value of PP is 1, it means the ith method (the ith row of pvalue) provided posterior probabilities. 
+}
+  \item{TrueNames}{
+The names of the transcripts who defined to be DE.
+}
+  \item{TopNum}{
+The number of top counts we are interested in.
+For example, if TopNum=1000, we'll calculate the FDR's of each method if we pick the top 1, 2, ... 1000 genes.
+}
+}
+\details{
+%%  ~~ If necessary, more details than the description above ~~
+}
+\value{
+A metrix contains the FDR's.
+}
+\references{
+%% ~put references to the literature/web site here ~
+}
+\author{
+%%  ~~who you are~~
+}
+\note{
+%%  ~~further notes~~
+}
+
+%% ~Make other sections like Warning with \section{Warning }{....} ~
+
+\seealso{
+%% ~~objects to See Also as \code{\link{help}}, ~~~
+}
+\examples{
+##---- Should be DIRECTLY executable !! ----
+##-- ==>  Define data, use random,
+##--   or do  help(data=index)  for the standard data sets.
+
+## The function is currently defined as
+function(pvalue, PP=NULL, TrueNames, TopNum){
+       NumOfMethods=ncol(pvalue)
+       puse=pvalue
+       if(1\%in\%PP)puse[,PP==1]=1-pvalue[,PP==1]
+       #puse.list=data.frame(puse)
+       FD=matrix(rep(0,NumOfMethods*TopNum),ncol=NumOfMethods)
+#      Rank=apply(puse,2,rank)
+#      for(i in 1:TopNum)
+#              FD[i,]=sapply(1:NumOfMethods, function(j)sum(!rownames(Rank)[Rank[,j]<=i]\%in\%TrueNames))      
+#      FD=sapply(1:TopNum, function(i)sapply(1:NumOfMethods, function(j)sum(!rownames(Rank)[Rank[,j]<=i]\%in\%TrueNames)))
+       for (s in 1:NumOfMethods){
+               tmp=puse[,s]
+               names(tmp)=rownames(puse)
+               sorttmp=sort(tmp)
+               for( c in 2:TopNum)
+                       FD[c, s]=FD[(c-1),s]+as.numeric(!names(sorttmp)[c]\%in\%TrueNames)
+       }
+       FD
+       #matplot(TopNum,FD,type="l",ylim=c(0,1),xlab="Top DE selected", ylab="FDR")
+       #legend("rightbottom",col=1:TopNum, lty=1:TopNum, names)
+       }
+}
+% Add one or more standard keywords, see file 'KEYWORDS' in the
+% R documentation directory.
+\keyword{ ~kwd1 }
+\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
diff --git a/EBSeq/man/beta.mom.Rd b/EBSeq/man/beta.mom.Rd

new file mode 100644 (file)

index 0000000..45c7aa4
--- /dev/null
+++ b/EBSeq/man/beta.mom.Rd
@@ -0,0 +1,48 @@
+\name{beta.mom}
+\alias{beta.mom}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{
+Fit the beta distribution by method of moments
+}
+\description{
+Fit the beta distribution by method of moments
+}
+\usage{
+beta.mom(qs.in)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+  \item{qs.in}{
+A vector contains the numbers that are assumed to follow a beta distribution 
+}
+}
+\details{
+%%  ~~ If necessary, more details than the description above ~~
+}
+\value{
+  \item{alpha.hat }{Return the estimation of alpha}
+  \item{beta.hat}{Return the estimation of beta}
+}
+\references{
+}
+\author{
+Ning Leng
+}
+
+%% ~Make other sections like Warning with \section{Warning }{....} ~
+
+\seealso{
+DenNHist, DenNHistTable
+}
+\examples{
+##---- Should be DIRECTLY executable !! ----
+##-- ==>  Define data, use random,
+##--   or do  help(data=index)  for the standard data sets.
+
+## The function is currently defined as
+tmp=rbeta(5,5,100)
+param=beta.mom(tmp)
+}
+% Add one or more standard keywords, see file 'KEYWORDS' in the
+% R documentation directory.
+\keyword{ beta }
diff --git a/EBSeq/man/crit_fun.Rd b/EBSeq/man/crit_fun.Rd

new file mode 100644 (file)

index 0000000..99590c9
--- /dev/null
+++ b/EBSeq/man/crit_fun.Rd
@@ -0,0 +1,48 @@
+\name{crit_fun}
+\alias{crit_fun}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{
+Calculate the adjusted FDR threshold 
+}
+\description{
+Calculate the adjusted FDR threshold using the posterior probabilities at a target FDR
+}
+\usage{
+crit_fun(PPEE, thre)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+  \item{PPEE}{
+The posterior probabilities of being EE.
+}
+  \item{thre}{
+The target FDR.
+  }
+
+}
+\details{
+%%  ~~ If necessary, more details than the description above ~~
+}
+\value{
+The adjusted FDR threshold of target FDR.
+}
+\references{
+}
+\author{
+Ning Leng
+}
+
+%% ~Make other sections like Warning with \section{Warning }{....} ~
+
+\seealso{
+}
+\examples{
+##---- Should be DIRECTLY executable !! ----
+##-- ==>  Define data, use random,
+##--   or do  help(data=index)  for the standard data sets.
+
+## The function is currently defined as
+}
+% Add one or more standard keywords, see file 'KEYWORDS' in the
+% R documentation directory.
+\keyword{ FDR }
diff --git a/EBSeq/man/f0.Rd b/EBSeq/man/f0.Rd

new file mode 100644 (file)

index 0000000..fb0c231
--- /dev/null
+++ b/EBSeq/man/f0.Rd
@@ -0,0 +1,69 @@
+\name{f0}
+\alias{f0}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{
+The Predictive Distribution of being EE
+}
+\description{
+%%  ~~ A concise (1-5 lines) description of what the function does. ~~
+}
+\usage{
+f0(Input, AlphaIn, BetaIn, EmpiricalR, NumOfGroups, log)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+  \item{Input}{
+%%     ~~Describe \code{Input} here~~
+}
+  \item{AlphaIn}{
+%%     ~~Describe \code{AlphaIn} here~~
+}
+  \item{BetaIn}{
+%%     ~~Describe \code{BetaIn} here~~
+}
+  \item{EmpiricalR}{
+%%     ~~Describe \code{EmpiricalR} here~~
+}
+  \item{NumOfGroups}{
+%%     ~~Describe \code{NumOfGroups} here~~
+}
+  \item{log}{
+%%     ~~Describe \code{log} here~~
+}
+}
+\details{
+%%  ~~ If necessary, more details than the description above ~~
+}
+\value{
+%%  ~Describe the value returned
+%%  If it is a LIST, use
+%%  \item{comp1 }{Description of 'comp1'}
+%%  \item{comp2 }{Description of 'comp2'}
+%% ...
+}
+\references{
+%% ~put references to the literature/web site here ~
+}
+\author{
+Ning Leng
+}
+\note{
+%%  ~~further notes~~
+}
+
+%% ~Make other sections like Warning with \section{Warning }{....} ~
+
+\seealso{
+f1
+}
+\examples{
+##---- Should be DIRECTLY executable !! ----
+##-- ==>  Define data, use random,
+##--   or do  help(data=index)  for the standard data sets.
+
+
+}
+% Add one or more standard keywords, see file 'KEYWORDS' in the
+% R documentation directory.
+\keyword{ ~kwd1 }
+\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
diff --git a/EBSeq/man/f1.Rd b/EBSeq/man/f1.Rd

new file mode 100644 (file)

index 0000000..1bf374c
--- /dev/null
+++ b/EBSeq/man/f1.Rd
@@ -0,0 +1,71 @@
+\name{f1}
+\alias{f1}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{
+f1(Input, AlphaIn, BetaIn, EmpiricalR, NumOfGroups, log)
+}
+\description{
+f1(X_gi)=f0(X_giC1)f0(X_giC2)
+}
+\usage{
+f1(Input1, Input2, AlphaIn, BetaIn, EmpiricalRSP1, EmpiricalRSP2, NumOfGroup, log)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+  \item{Input1}{
+%%     ~~Describe \code{Input1} here~~
+}
+  \item{Input2}{
+%%     ~~Describe \code{Input2} here~~
+}
+  \item{AlphaIn}{
+%%     ~~Describe \code{AlphaIn} here~~
+}
+  \item{BetaIn}{
+%%     ~~Describe \code{BetaIn} here~~
+}
+  \item{EmpiricalRSP1}{
+%%     ~~Describe \code{EmpiricalRSP1} here~~
+}
+  \item{EmpiricalRSP2}{
+%%     ~~Describe \code{EmpiricalRSP2} here~~
+}
+  \item{NumOfGroup}{
+%%     ~~Describe \code{NumOfGroup} here~~
+}
+  \item{log}{
+%%     ~~Describe \code{log} here~~
+}
+}
+\details{
+%%  ~~ If necessary, more details than the description above ~~
+}
+\value{
+%%  ~Describe the value returned
+%%  If it is a LIST, use
+%%  \item{comp1 }{Description of 'comp1'}
+%%  \item{comp2 }{Description of 'comp2'}
+%% ...
+}
+\references{
+NBBetaEB.bias.uniqueP_PoolVarSpeedUp_MDFPoi_NoNormVar, f0.bias.uniqueP_PoolVarSpeedUp_MDFPoi_NoNormVar
+}
+\author{
+Ning Leng
+}
+\note{
+%%  ~~further notes~~
+}
+
+%% ~Make other sections like Warning with \section{Warning }{....} ~
+
+\seealso{
+f0
+}
+\examples{
+
+}
+% Add one or more standard keywords, see file 'KEYWORDS' in the
+% R documentation directory.
+\keyword{ ~kwd1 }
+\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
diff --git a/README.md b/README.md

index f582b33661a4e858ecbfc2600bf4b5492cd35727..e85d25001d634b91efb2b9c696474865d5c3ee7d 100644 (file)
--- a/README.md
+++ b/README.md
@@ -14,6 +14,7 @@ Table of Contents
  * [Example](#example)
  * [Simulation](#simulation)
  * [Generate Transcript-to-Gene-Map from Trinity Output](#gen_trinity)
+* [Differential Expression Analysis](#de)
  * [Acknowledgements](#acknowledgements)
  * [License](#license)
  
@@ -169,7 +170,7 @@ For UCSC genome browser, please refer to the [UCSC custom track help page](http:
  
  For integrative genomics viewer, please refer to the [IGV home page](http://www.broadinstitute.org/software/igv/home). Note: Although IGV can generate read depth plot from the BAM file given, it cannot recognize "ZW" tag RSEM puts. Therefore IGV counts each alignment as weight 1 instead of the expected weight for the plot it generates. So we recommend to use the wiggle file generated by RSEM for read depth visualization.
  
-Here are some guidance for visualizing transcript coordinate files:
+Here are some guidance for visualizing transcript coordinate files using IGV:
  
  1) Import the transcript sequences as a genome 
  
@@ -177,7 +178,7 @@ Select File -> Import Genome, then fill in ID, Name and Fasta file. Fasta file s
  
  2) Load visualization files
  
-Select File -> Load from File, then choose one transcript coordinate visualization file generated by RSEM. IGV might require you to convert wiggle file to tdf file. You should use igvtools to perform this task. One way to perform the conversion is to use the following command
+Select File -> Load from File, then choose one transcript coordinate visualization file generated by RSEM. IGV might require you to convert wiggle file to tdf file. You should use igvtools to perform this task. One way to perform the conversion is to use the following command:
  
      igvtools tile reference_name.transcript.wig reference_name.transcript.tdf reference_name.genome   
   
@@ -279,14 +280,85 @@ For Trinity users, RSEM provides a perl script to generate transcript-to-gene-ma
  
  trinity_fasta_file: the fasta file produced by trinity, which contains all transcripts assembled.    
  map_file: transcript-to-gene-map file's name.    
+
+## <a name="de"></a> Differential Expression Analysis
+
+Popular differential expression (DE) analysis tools such as edgeR and
+DESeq do not take variance due to read mapping uncertainty into
+consideration. Because read mapping ambiguity is prevalent among
+isoforms and de novo assembled transcripts, these tools are not ideal
+for DE detection in such conditions. 
+
+**EBSeq**, an empirical Bayesian DE
+analysis tool developed in UW-Madison, can take variance due to read
+mapping ambiguity into consideration by grouping isoforms with parent
+gene's number of isoforms. In addition, it is more robust to
+outliers. RSEM includes the newest version of EBSeq in the folder
+named 'EBSeq'.
+
+For more information about EBSeq (including the paper describing their
+method), please visit <a
+href="http://www.biostat.wisc.edu/~ningleng/EBSeq_Package">here</a>. You
+can also find a local version of vignette under
+'EBSeq/inst/doc/EBSeq_Vignette.pdf'.
+
+EBSeq requires gene-isoform relationship for its isoform DE
+detection. However, for de novo assembled transcriptome, it is hard to
+obtain an accurate gene-isoform relationship. Instead, RSEM provides a
+script 'rsem-generate-ngvector', which clusters isoforms based on
+measures directly relating to read mappaing ambiguity. First, it
+calcualtes the 'unmappability' of each transcript. The 'unmappability'
+of a transcript is the ratio between the number of k mers with at
+least one perfect match to other transcripts and the total number of k
+mers of this transcript, where k is a parameter. Then, Ng vector is
+generated by applying Kmeans algorithm to the 'unmappability' values
+with number of clusters set as 3. This program will make sure the mean
+'unmappability' scores for clusters are in ascending order. All
+transcripts whose lengths are less than k are assigned to cluster
+3. Run
+
+    rsem-generate-ngvector --help
+
+to get usage information or visit the [rsem-generate-ngvector
+documentation
+page](http://deweylab.biostat.wisc.edu/rsem/rsem-generate-ngvector.html).
+
+If your reference is a de novo assembled transcript set, you should
+run 'rsem-generate-ngvector' first. Then load the resulting
+'output_name.ngvec' into R. For example, you can use 
+
+    NgVec <- scan(file="output_name.ngvec", what=0, sep="\n")
+
+. After that, replace 'IsoNgTrun' with 'NgVec' in the second line of
+section 3.2.5 (Page 10) of EBSeq's vignette:
+
+    IsoEBres=EBTest(Data=IsoMat, NgVector=NgVec, ...)
+
+For users' convenience, RSEM also provides a script
+'rsem-form-counts-matrix' to extract input matrix from expression
+results:
+
+    rsem-form-counts-matrix sampleA.[genes/isoforms].results sampleB.[genes/isoforms].results ... > output_name.counts.matrix
+
+The results files are required to be either all gene level results or
+all isoform level results. You can load the matrix into R by
+
+    IsoMat <- read.table(file="output_name.counts.matrix")
+
+before running function 'EBTest'.
+
+Questions related to EBSeq should be sent to <a href="mailto:nleng@wisc.edu">Ning Leng</a>.
   
  ## <a name="acknowledgements"></a> Acknowledgements
  
  RSEM uses the [Boost C++](http://www.boost.org) and
-[samtools](http://samtools.sourceforge.net) libraries.
+[samtools](http://samtools.sourceforge.net) libraries. RSEM includes
+[EBSeq](http://www.biostat.wisc.edu/~ningleng/EBSeq_Package/) for
+differential expression analysis.
  
  We thank earonesty for contributing patches.
  
  ## <a name="license"></a> License
  
-RSEM is licensed under the [GNU General Public License v3](http://www.gnu.org/licenses/gpl-3.0.html).
+RSEM is licensed under the [GNU General Public License
+v3](http://www.gnu.org/licenses/gpl-3.0.html).
diff --git a/WHAT_IS_NEW b/WHAT_IS_NEW

index 81c595c5442ea2c896f0ab40c3192b3481300070..ddcfd959f2accbb06da8a57ef93208aff7a1264d 100644 (file)
--- a/WHAT_IS_NEW
+++ b/WHAT_IS_NEW
@@ -1,3 +1,11 @@
+RSEM v1.1.19
+
+- Allowed > 2^31 hits
+- Added some instructions on how to visualize transcript coordinate BAM/WIG files using IGV
+- Included EBSeq for downstream differential expression analysis
+
+--------------------------------------------------------------------------------------------
+
  RSEM v1.1.18
  
  - Added some user-friendly error messages
diff --git a/calcClusteringInfo.cpp b/calcClusteringInfo.cpp

new file mode 100644 (file)

index 0000000..2103f61
--- /dev/null
+++ b/calcClusteringInfo.cpp
@@ -0,0 +1,149 @@
+#include<cstdio>
+#include<cctype>
+#include<cstring>
+#include<cstdlib>
+#include<cassert>
+#include<fstream>
+#include<iomanip>
+#include<string>
+#include<vector>
+#include<algorithm>
+using namespace std;
+
+typedef unsigned int INTEGER;
+
+const int STRLEN = 1005;
+
+INTEGER M;
+int k; // k-mer size
+vector<string> names;
+vector<string> seqs;
+vector<INTEGER> effL;
+
+// tid starts from 1
+struct ReadType {
+  INTEGER tid, pos;
+
+  ReadType(INTEGER tid, INTEGER pos) {
+    this->tid = tid;
+    this->pos = pos;
+  }
+
+  bool operator< (const ReadType& o) const {
+    string& a = seqs[tid];
+    string& b = seqs[o.tid];
+    for (int i = 0; i < k; i++) {
+      if (a[pos + i] != b[o.pos + i]) {
+       return a[pos + i] < b[o.pos + i];
+      }
+    }
+    return tid < o.tid;
+  }
+
+  bool seq_equal(const ReadType& o) const {
+    string& a = seqs[tid];
+    string& b = seqs[o.tid];
+    for (int i = 0; i < k; i++) 
+      if (a[pos + i] != b[o.pos + i]) return false;
+    return true;
+  }
+};
+
+vector<ReadType> cands;
+vector<double> clusteringInfo; 
+
+string convert(const string& rawseq) {
+  int size = (int)rawseq.size();
+  string seq = rawseq;
+  for (int i = 0; i < size; i++) {
+    seq[i] = toupper(rawseq[i]);
+    if (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G' && seq[i] != 'T') seq[i] = 'N';
+  }
+  return seq;
+}
+
+void loadRef(char* inpF) {
+  ifstream fin(inpF);
+  string tag, line, rawseq;
+  void *pt;
+
+  assert(fin.is_open());
+
+  names.clear(); names.push_back("");
+  seqs.clear(); seqs.push_back("");
+  
+  pt = getline(fin, line);
+  while (pt != 0 && line[0] == '>') {
+    tag = line.substr(1);
+    rawseq = "";
+    while((pt = getline(fin, line)) && line[0] != '>') {
+      rawseq += line;
+    }
+    if (rawseq.size() <= 0) {
+      printf("Warning: Fasta entry %s has an empty sequence! It is omitted!\n", tag.c_str());
+      continue;
+    }
+    names.push_back(tag);
+    seqs.push_back(convert(rawseq));
+  }
+
+  fin.close();
+
+  M = names.size() - 1;
+
+  printf("The reference is loaded.\n");
+}
+
+int main(int argc, char* argv[]) {
+  if (argc != 4) {
+    printf("Usage: rsem-for-ebseq-calculate-clustering-info k input_reference_fasta_file output_file\n");
+    exit(-1);
+  }
+
+  k = atoi(argv[1]);
+  loadRef(argv[2]);
+
+  cands.clear();
+  effL.assign(M + 1, 0);
+  for (INTEGER i = 1; i <= M; i++) {
+    effL[i] = seqs[i].length() - k + 1;
+    if (effL[i] <= 0) effL[i] = 0; // effL should be non-negative
+    for (INTEGER j = 0; j < effL[i]; j++) 
+      cands.push_back(ReadType(i, j));
+  }
+  printf("All possbile %d mers are generated.\n", k);
+
+  sort(cands.begin(), cands.end());
+  printf("All %d mers are sorted.\n", k);
+ 
+  size_t p = 0;
+  clusteringInfo.assign(M + 1, 0.0);
+
+  for (size_t i = 1; i <= cands.size(); i++)
+    if (i == cands.size() || !cands[p].seq_equal(cands[i])) {
+      size_t denominator = i - p;
+      size_t q = p; 
+      for (size_t j = p + 1; j <= i; j++)
+       if (j == i || cands[q].tid != cands[j].tid) {
+         size_t numerator = j - q;
+         //double prob = numerator * 1.0 / denominator;
+         //clusteringInfo[cands[q].tid] += (double)numerator * prob * (1.0 - prob);
+         if (numerator < denominator) clusteringInfo[cands[q].tid] += numerator;
+         q = j;
+       }
+      p = i;
+    }
+
+  for (INTEGER i = 1; i <= M; i++) 
+    if (effL[i] == 0) clusteringInfo[i] = -1.0;
+    else clusteringInfo[i] /= effL[i];
+
+  printf("Clustering information is calculated.\n");
+
+
+  ofstream fout(argv[3]);
+  for (INTEGER i = 1; i <= M; i++) fout<<names[i]<<"\t"<<setprecision(6)<<clusteringInfo[i]<<endl;
+  fout.close();
+
+  return 0;
+}
diff --git a/makefile b/makefile

index e2711b532be12b82ff45a01d845597c0ed8d0ff8..c9f37a8279701589aae3b593874dab5e399bc256 100644 (file)
--- a/makefile
+++ b/makefile
@@ -1,7 +1,7 @@
  CC = g++
  CFLAGS = -Wall -c -I.
  COFLAGS = -Wall -O3 -ffast-math -c -I.
-PROGRAMS = rsem-extract-reference-transcripts rsem-synthesis-reference-transcripts rsem-preref rsem-parse-alignments rsem-build-read-index rsem-run-em rsem-tbam2gbam rsem-run-gibbs rsem-calculate-credibility-intervals rsem-simulate-reads rsem-bam2wig rsem-get-unique rsem-bam2readdepth rsem-sam-validator rsem-scan-for-paired-end-reads
+PROGRAMS = rsem-extract-reference-transcripts rsem-synthesis-reference-transcripts rsem-preref rsem-parse-alignments rsem-build-read-index rsem-run-em rsem-tbam2gbam rsem-run-gibbs rsem-calculate-credibility-intervals rsem-simulate-reads rsem-bam2wig rsem-get-unique rsem-bam2readdepth rsem-sam-validator rsem-scan-for-paired-end-reads rsem-for-ebseq-calculate-clustering-info
  
  
  all : $(PROGRAMS)
@@ -136,6 +136,9 @@ rsem-sam-validator : sam/bam.h sam/sam.h my_assert.h samValidator.cpp sam/libbam
  rsem-scan-for-paired-end-reads : sam/bam.h sam/sam.h my_assert.h scanForPairedEndReads.cpp sam/libbam.a
         $(CC) -O3 -Wall scanForPairedEndReads.cpp sam/libbam.a -lz -o $@
  
+rsem-for-ebseq-calculate-clustering-info : calcClusteringInfo.cpp
+       $(CC) -O3 -Wall calcClusteringInfo.cpp -o $@
+
  clean:
         rm -f *.o *~ $(PROGRAMS)
         cd sam ; ${MAKE} clean
diff --git a/rsem-calculate-expression b/rsem-calculate-expression

index 2a09c1570ce7cd45a560b7587568ff106766fd75..074d895ecf173cad4d5b0866c42b463655365747 100755 (executable)
--- a/rsem-calculate-expression
+++ b/rsem-calculate-expression
@@ -265,7 +265,7 @@ if ($genBamF) {
      else { $command .= " 0"; }
      if ($sampling) { $command .= " --sampling"; }
  }
-if ($calcCI) { $command .= " --gibbs-out"; }
+if ($calcCI || $var_opt) { $command .= " --gibbs-out"; }
  if ($quiet) { $command .= " -q"; }
  
  &runCommand($command);
diff --git a/rsem-for-ebseq-generate-ngvector-from-clustering-info b/rsem-for-ebseq-generate-ngvector-from-clustering-info

new file mode 100755 (executable)

index 0000000..312dbfa
--- /dev/null
+++ b/rsem-for-ebseq-generate-ngvector-from-clustering-info
@@ -0,0 +1,18 @@
+#!/usr/bin/env Rscript
+
+argv <- commandArgs(TRUE)
+if (length(argv) != 2) {
+  cat("Usage: rsem-for-ebseq-generate-ngvector-from-clustering-info input_file output_file\n")
+  q(status = 1)
+}
+
+data <- read.table(file = argv[1], stringsAsFactors = F)
+idx <- data[,2] >= 0
+kmr <- kmeans(data[idx, 2], 3)
+order <- order(kmr$centers)
+
+ngvec <- rep(0, length(idx))
+ngvec[idx] <- order[kmr$cluster]
+ngvec[!idx] <- 3
+
+write.table(ngvec, file = argv[2], row.names = F, col.names = F)
diff --git a/rsem-form-counts-matrix b/rsem-form-counts-matrix

new file mode 100755 (executable)

index 0000000..b5324a5
--- /dev/null
+++ b/rsem-form-counts-matrix
@@ -0,0 +1,40 @@
+#!/usr/bin/perl
+
+use strict;
+
+if (scalar(@ARGV) == 0) {
+    print "Usage: rsem-form-counts-matrix sampleA.[genes/isoforms].results sampleB.[genes/isoforms].results ... > output_name.counts.matrix\n";
+    print "Results files should be either all .genes.results or all .isoforms.results.\n";
+    exit(-1);
+}
+
+my $line;
+my $n = scalar(@ARGV);
+my $M = -1;
+my @matrix = ();
+
+for (my $i = 0; $i < $n; $i++) {
+    my @sample = ();
+    open(INPUT, $ARGV[$i]);
+    while ($line = <INPUT>) {
+       chomp($line); 
+       my @fields = split(/\t/, $line);
+       push(@sample, $fields[1]);
+    }
+    close(INPUT);
+    if (scalar(@sample) == 0) {
+       print STDERR "No transcript is detected! Please check if $ARGV[$i] exists.\n";
+       exit(-1);
+    }
+    if ($M < 0) { $M = scalar(@sample); }
+    elsif ($M != scalar(@sample)) { 
+       print STDERR "Number of transcripts among samples are not equal!\n"; 
+       exit(-1); 
+    }
+    push(@matrix, \@sample);
+}
+
+for (my $i = 0; $i < $M; $i++) {
+    for (my $j = 0; $j < $n - 1; $j++) { print "$matrix[$j][$i]\t"; }
+    print "$matrix[$n - 1][$i]\n";
+}
diff --git a/rsem-generate-ngvector b/rsem-generate-ngvector

new file mode 100755 (executable)

index 0000000..c4d26a8
--- /dev/null
+++ b/rsem-generate-ngvector
@@ -0,0 +1,116 @@
+#!/usr/bin/perl
+
+use Getopt::Long;
+use Pod::Usage;
+use File::Basename;
+use strict;
+
+my $k = 25;
+my $help = 0;
+
+GetOptions("k=i" => \$k,
+          "h|help" => \$help) or pod2usage(-exitval => 2, -verbose => 2);
+
+pod2usage(-verbose => 2) if ($help == 1);
+pod2usage(-msg => "Invalid number of arguments!", -exitval => 2, -verbose => 2) if (scalar(@ARGV) != 2);
+
+my ($fn, $dir, $suf) = fileparse($0);
+my $command = "";
+
+$command = $dir."rsem-for-ebseq-calculate-clustering-info $k $ARGV[0] $ARGV[1].ump";
+&runCommand($command);
+
+$command = $dir."rsem-for-ebseq-generate-ngvector-from-clustering-info $ARGV[1].ump $ARGV[1].ngvec";
+&runCommand($command);
+
+# command, {err_msg}
+sub runCommand {
+    print $_[0]."\n";
+    my $status = system($_[0]);
+    if ($status != 0) {
+        my $errmsg;
+        if (scalar(@_) > 1) { $errmsg = $_[1]; }
+        else { $errmsg = "\"$command\" failed! Plase check if you provide correct parameters/options for the pipeline!"; }
+        print $errmsg."\n";
+        exit(-1);
+    }
+    print "\n";
+}
+
+__END__
+
+=head1 NAME
+
+rsem-generate-ngvector
+
+=head1 SYNOPSIS
+
+=over
+
+ rsem-generate-ngvector [options] input_fasta_file output_name
+
+=back
+
+=head1 ARGUMENTS
+
+=over
+
+=item B<input_fasta_file>
+
+The fasta file containing all reference transcripts. The transcripts must be in the same order as those in expression value files. Thus, 'reference_name.transcripts.fa' generated by 'rsem-prepare-reference' should be used.   
+
+=item B<output_name>
+
+The name of all output files. The Ng vector will be stored as 'output_name.ngvec'.
+
+=back
+
+=head1 OPTIONS
+
+=over
+
+=item B<-k> <int>
+
+k mer length. See description section. (Default: 25)
+
+=item B<-h/--help>
+
+Show help information.
+
+=back
+
+=head1 DESCRIPTION
+
+This program generates the Ng vector required by EBSeq for isoform level differential expression analysis based on reference sequences only. EBSeq can take variance due to read mapping ambiguity into consideration by grouping isoforms with parent gene's number of isoforms. However, for de novo assembled transcriptome, it is hard to obtain an accurate gene-isoform relationship. Instead, this program groups isoforms by using measures on read mappaing ambiguity directly. First, it calcualtes the 'unmappability' of each transcript. The 'unmappability' of a transcript is the ratio between the number of k mers with at least one perfect match to other transcripts and the total number of k mers of this transcript, where k is a parameter. Then, Ng vector is generated by applying Kmeans algorithm to the 'unmappability' values with number of clusters set as 3. 'rsem-generate-ngvector' will make sure the mean 'unmappability' scores for clusters are in ascending order. All transcripts whose lengths are less than k are assigned to cluster 3.   
+
+If your reference is a de novo assembled transcript set, you should run 'rsem-generate-ngvector' first. Then load the resulting 'output_name.ngvec' into R. For example, you can use
+
+ NgVec <- scan(file="output_name.ngvec", what=0, sep="\n")
+
+. After that, replace 'IsoNgTrun' with 'NgVec' in the second line of section 3.2.5 (Page 10) of EBSeq's vignette:
+
+ IsoEBres=EBTest(Data=IsoMat, NgVector=NgVec, ...)
+
+This program only needs to run once per RSEM reference. 
+
+=head1 OUTPUT
+
+=over
+
+=item B<output_name.ump>
+
+'unmappability' scores for each transcript. This file contains two columns. The first column is transcript name and the second column is 'unmappability' score.
+
+=item B<output_name.ngvec>
+
+Ng vector generated by this program.
+
+=back
+
+=head1 EXAMPLES
+
+Suppose the reference sequences file is '/ref/mouse_125/mouse_125.transcripts.fa' and we set the output_name as 'mouse_125':
+
+ rsem-generate-ngvector /ref/mouse_125/mouse_125.transcripts.fa mouse_125
+
+=cut
author	Bo Li <bli@cs.wisc.edu>
	Thu, 26 Apr 2012 13:50:22 +0000 (08:50 -0500)
committer	Bo Li <bli@cs.wisc.edu>
	Thu, 26 Apr 2012 13:50:22 +0000 (08:50 -0500)
EBSeq/DESCRIPTION	[new file with mode: 0644]	patch \| blob
EBSeq/NAMESPACE	[new file with mode: 0644]	patch \| blob
EBSeq/R/CheckNg.R	[new file with mode: 0644]	patch \| blob
EBSeq/R/DenNHist.R	[new file with mode: 0644]	patch \| blob
EBSeq/R/DenNHistTable.R	[new file with mode: 0644]	patch \| blob
EBSeq/R/EBMultiTest.R	[new file with mode: 0644]	patch \| blob
EBSeq/R/EBTest.R	[new file with mode: 0644]	patch \| blob
EBSeq/R/GeneMultiSimu.R	[new file with mode: 0644]	patch \| blob
EBSeq/R/GeneSimu.R	[new file with mode: 0644]	patch \| blob
EBSeq/R/GeneSimuAt.R	[new file with mode: 0644]	patch \| blob
EBSeq/R/GetData.R	[new file with mode: 0644]	patch \| blob
EBSeq/R/GetMultiPP.R	[new file with mode: 0644]	patch \| blob
EBSeq/R/GetNg.R	[new file with mode: 0644]	patch \| blob
EBSeq/R/GetPP.R	[new file with mode: 0644]	patch \| blob
EBSeq/R/GetPatterns.R	[new file with mode: 0644]	patch \| blob
EBSeq/R/IsoSimu.R	[new file with mode: 0644]	patch \| blob
EBSeq/R/IsoSimuAt.R	[new file with mode: 0644]	patch \| blob
EBSeq/R/Likefun.R	[new file with mode: 0644]	patch \| blob
EBSeq/R/LikefunMulti.R	[new file with mode: 0644]	patch \| blob
EBSeq/R/LikefunMultiDVDP.R	[new file with mode: 0644]	patch \| blob
EBSeq/R/LikefunMultiEMP.R	[new file with mode: 0644]	patch \| blob
EBSeq/R/LogN.R	[new file with mode: 0644]	patch \| blob
EBSeq/R/LogNMulti.R	[new file with mode: 0644]	patch \| blob
EBSeq/R/LogNMultiDVDP.R	[new file with mode: 0644]	patch \| blob
EBSeq/R/LogNMultiEMP.R	[new file with mode: 0644]	patch \| blob
EBSeq/R/MedianNorm.R	[new file with mode: 0644]	patch \| blob
EBSeq/R/MergeGene.R	[new file with mode: 0644]	patch \| blob
EBSeq/R/MergeIso.R	[new file with mode: 0644]	patch \| blob
EBSeq/R/PlotFDTP.R	[new file with mode: 0644]	patch \| blob
EBSeq/R/PlotFPTP.R	[new file with mode: 0644]	patch \| blob
EBSeq/R/PlotPattern.R	[new file with mode: 0644]	patch \| blob
EBSeq/R/PlotTopCts.R	[new file with mode: 0644]	patch \| blob
EBSeq/R/PolyFitPlot.R	[new file with mode: 0644]	patch \| blob
EBSeq/R/PoolMatrix.R	[new file with mode: 0644]	patch \| blob
EBSeq/R/PostFC.R	[new file with mode: 0644]	patch \| blob
EBSeq/R/QQP.R	[new file with mode: 0644]	patch \| blob
EBSeq/R/QuantileNorm.R	[new file with mode: 0644]	patch \| blob
EBSeq/R/RankNorm.R	[new file with mode: 0644]	patch \| blob
EBSeq/R/TPFDRplot.R	[new file with mode: 0644]	patch \| blob
EBSeq/R/TopCts.R	[new file with mode: 0644]	patch \| blob
EBSeq/R/beta.mom.R	[new file with mode: 0644]	patch \| blob
EBSeq/R/crit_fun.R	[new file with mode: 0644]	patch \| blob
EBSeq/R/f0.R	[new file with mode: 0644]	patch \| blob
EBSeq/R/f1.R	[new file with mode: 0644]	patch \| blob
EBSeq/data/GeneEBresultGouldBart2.rda	[new file with mode: 0644]	patch \| blob
EBSeq/data/GeneMat.rda	[new file with mode: 0644]	patch \| blob
EBSeq/data/IsoEBresultGouldBart2.rda	[new file with mode: 0644]	patch \| blob
EBSeq/data/IsoList.rda	[new file with mode: 0644]	patch \| blob
EBSeq/data/MultiGeneMat.rda	[new file with mode: 0644]	patch \| blob
EBSeq/data/datalist	[new file with mode: 0644]	patch \| blob
EBSeq/demo/EBSeq.R	[new file with mode: 0644]	patch \| blob
EBSeq/inst/doc/EBSeq_Vignette.pdf	[new file with mode: 0644]	patch \| blob
EBSeq/man/CheckNg.Rd	[new file with mode: 0644]	patch \| blob
EBSeq/man/DenNHist.Rd	[new file with mode: 0644]	patch \| blob
EBSeq/man/DenNHistTable.Rd	[new file with mode: 0644]	patch \| blob
EBSeq/man/EBMultiTest.Rd	[new file with mode: 0644]	patch \| blob
EBSeq/man/EBSeq_NingLeng-package.Rd	[new file with mode: 0644]	patch \| blob
EBSeq/man/EBTest.Rd	[new file with mode: 0644]	patch \| blob
EBSeq/man/GeneEBresultGouldBart2.Rd	[new file with mode: 0644]	patch \| blob
EBSeq/man/GeneMultiSimu.Rd	[new file with mode: 0644]	patch \| blob
EBSeq/man/GeneSimu.Rd	[new file with mode: 0644]	patch \| blob
EBSeq/man/GeneSimuAt.Rd	[new file with mode: 0644]	patch \| blob
EBSeq/man/GetData.Rd	[new file with mode: 0644]	patch \| blob
EBSeq/man/GetMultiPP.Rd	[new file with mode: 0644]	patch \| blob
EBSeq/man/GetNg.Rd	[new file with mode: 0644]	patch \| blob
EBSeq/man/GetPP.Rd	[new file with mode: 0644]	patch \| blob
EBSeq/man/GetPatterns.Rd	[new file with mode: 0644]	patch \| blob
EBSeq/man/IsoEBresultGouldBart2.Rd	[new file with mode: 0644]	patch \| blob
EBSeq/man/IsoSimu.Rd	[new file with mode: 0644]	patch \| blob
EBSeq/man/IsoSimuAt.Rd	[new file with mode: 0644]	patch \| blob
EBSeq/man/Likefun.Rd	[new file with mode: 0644]	patch \| blob
EBSeq/man/LikefunMulti.Rd	[new file with mode: 0644]	patch \| blob
EBSeq/man/LogN.Rd	[new file with mode: 0644]	patch \| blob
EBSeq/man/LogNMulti.Rd	[new file with mode: 0644]	patch \| blob
EBSeq/man/MedianNorm.Rd	[new file with mode: 0644]	patch \| blob
EBSeq/man/MergeGene.Rd	[new file with mode: 0644]	patch \| blob
EBSeq/man/MergeIso.Rd	[new file with mode: 0644]	patch \| blob
EBSeq/man/PlotFDTP.Rd	[new file with mode: 0644]	patch \| blob
EBSeq/man/PlotFPTP.Rd	[new file with mode: 0644]	patch \| blob
EBSeq/man/PlotPattern.Rd	[new file with mode: 0644]	patch \| blob
EBSeq/man/PlotTopCts.Rd	[new file with mode: 0644]	patch \| blob
EBSeq/man/PolyFitPlot.Rd	[new file with mode: 0644]	patch \| blob
EBSeq/man/PoolMatrix.Rd	[new file with mode: 0644]	patch \| blob
EBSeq/man/PostFC.Rd	[new file with mode: 0644]	patch \| blob
EBSeq/man/QQP.Rd	[new file with mode: 0644]	patch \| blob
EBSeq/man/QuantileNorm.Rd	[new file with mode: 0644]	patch \| blob
EBSeq/man/RankNorm.Rd	[new file with mode: 0644]	patch \| blob
EBSeq/man/TPFDRplot.Rd	[new file with mode: 0644]	patch \| blob
EBSeq/man/TopCts.Rd	[new file with mode: 0644]	patch \| blob
EBSeq/man/beta.mom.Rd	[new file with mode: 0644]	patch \| blob
EBSeq/man/crit_fun.Rd	[new file with mode: 0644]	patch \| blob
EBSeq/man/f0.Rd	[new file with mode: 0644]	patch \| blob
EBSeq/man/f1.Rd	[new file with mode: 0644]	patch \| blob
README.md		patch \| blob \| history
WHAT_IS_NEW		patch \| blob \| history
calcClusteringInfo.cpp	[new file with mode: 0644]	patch \| blob
makefile		patch \| blob \| history
rsem-calculate-expression		patch \| blob \| history
rsem-for-ebseq-generate-ngvector-from-clustering-info	[new file with mode: 0755]	patch \| blob
rsem-form-counts-matrix	[new file with mode: 0755]	patch \| blob
rsem-generate-ngvector	[new file with mode: 0755]	patch \| blob