bool notgood = (b->core.flag & 0x0004) || (b2->core.flag & 0x0004);
if (!notgood) {
- //swap if b is mate 2
- if (b->core.flag & 0x0080) {
- assert(b2->core.flag & 0x0040);
- bam1_t *tmp = b;
- b = b2; b2 = tmp;
- }
+ //swap if b is mate 2
+ if (b->core.flag & 0x0080) {
+ assert(b2->core.flag & 0x0040);
+ bam1_t *tmp = b;
+ b = b2; b2 = tmp;
+ }
- hit = wrapper.getNextHit();
- assert(hit != NULL);
+ hit = wrapper.getNextHit();
+ assert(hit != NULL);
- assert(transcripts.getInternalSid(b->core.tid + 1) == hit->getSid());
- assert(transcripts.getInternalSid(b2->core.tid + 1) == hit->getSid());
+ assert(transcripts.getInternalSid(b->core.tid + 1) == hit->getSid());
+ assert(transcripts.getInternalSid(b2->core.tid + 1) == hit->getSid());
- convert(b, hit->getConPrb());
- convert(b2, hit->getConPrb());
+ convert(b, hit->getConPrb());
+ convert(b2, hit->getConPrb());
- b->core.mpos = b2->core.pos;
- b2->core.mpos = b->core.pos;
+ b->core.mpos = b2->core.pos;
+ b2->core.mpos = b->core.pos;
}
/*
class Buffer {
public:
- Buffer(int nMB, int nSamples, int cvlen, const char* tmpF) {
+ // in_mem_arr must be allocated memory before the Buffer is constructed
+ Buffer(int nMB, int nSamples, int vlen, float* in_mem_arr, const char* tmpF) {
cpos = 0;
- size = bufsize_type(nMB) * 1024 * 1024 / FLOATSIZE / cvlen;
+ size = bufsize_type(nMB) * 1024 * 1024 / FLOATSIZE / vlen;
if (size > (bufsize_type)nSamples) size = nSamples;
general_assert(size > 0, "Memory allocated for credibility intervals is not enough!");
- size *= cvlen;
+ size *= vlen;
buffer = new float[size];
ftmpOut.open(tmpF, std::ios::binary);
fr = to = 0;
this->nSamples = nSamples;
- this->cvlen = cvlen;
+ this->vlen = vlen;
+ this->in_mem_arr = in_mem_arr;
}
~Buffer() {
ftmpOut.close();
}
- void write(int n, float **vecs) {
+ void write(float value, float *vec) {
pthread_assert(pthread_mutex_lock(&lock), "pthread_mutex_lock", "Error occurred while acquiring the lock!");
- for (int i = 0; i < n; i++) {
- if (size - cpos < bufsize_type(cvlen)) flushToTempFile();
- memcpy(buffer + cpos, vecs[i], FLOATSIZE * cvlen);
- cpos += cvlen;
- ++to;
- }
+ if (size - cpos < bufsize_type(vlen)) flushToTempFile();
+ in_mem_arr[to] = value;
+ memcpy(buffer + cpos, vec, FLOATSIZE * vlen);
+ cpos += vlen;
+ ++to;
pthread_assert(pthread_mutex_unlock(&lock), "pthread_mutex_unlock", "Error occurred while releasing the lock!");
}
bufsize_type size, cpos; // cpos : current position
float *buffer;
+ float *in_mem_arr;
std::ofstream ftmpOut;
pthread_mutex_t lock;
int fr, to; // each flush, sample fr .. to - 1
- int nSamples, cvlen;
+ int nSamples, vlen; // vlen : vector length
void flushToTempFile() {
std::streampos gap1 = std::streampos(fr) * FLOATSIZE;
float *p = NULL;
ftmpOut.seekp(0, std::ios::beg);
- for (int i = 0; i < cvlen; i++) {
+ for (int i = 0; i < vlen; i++) {
p = buffer + i;
ftmpOut.seekp(gap1, std::ios::cur);
for (int j = fr; j < to; j++) {
ftmpOut.write((char*)p, FLOATSIZE);
- p += cvlen;
+ p += vlen;
}
ftmpOut.seekp(gap2, std::ios::cur);
}
+++ /dev/null
-Package: EBSeq
-Type: Package
-Title: A R package for Gene and Isoform Differential Expression Analysis On RNA-Seq Data
-Version: 1.1
-Date: 2012-4-18
-Author: Ning Leng
-Maintainer: Ning Leng <nleng@wisc.edu>
-Depends:blockmodeling
-Description: RNA-Seq Differential Expression Analysis on both gene and isoform level
-License:
-LazyLoad: yes
-Packaged: 2012-04-25 05:25:10 UTC; ningleng
+++ /dev/null
-export(beta.mom)
-export(CheckNg)
-export(crit_fun)
-export(DenNHist)
-export(DenNHistTable)
-export(EBTest)
-export(f0)
-export(f1)
-export(GeneSimuAt)
-export(GeneSimu)
-export(GetData)
-export(GetNg)
-export(GetPP)
-export(IsoSimuAt)
-export(IsoSimu)
-export(Likefun)
-export(LogN)
-export(MedianNorm)
-export(MergeGene)
-export(MergeIso)
-export(PlotFDTP)
-export(PlotFPTP)
-export(PlotTopCts)
-export(PolyFitPlot)
-export(PoolMatrix)
-export(PostFC)
-export(QQP)
-export(QuantileNorm)
-export(RankNorm)
-export(TopCts)
-export(TPFDRplot)
-export(EBMultiTest)
-export(GeneMultiSimu)
-export(GetMultiPP)
-export(LikefunMulti)
-export(LogNMulti)
-export(GetPatterns)
-export(PlotPattern)
+++ /dev/null
-CheckNg<-function(NewMean, NewVar,nterm, xlim, ylim){
- Ng=1=PolyFit_ENAR(NewMean[[1]],NewVar[[1]],nterm,"Mean","Variance","Ng=1",xlim, ylim)
- sortNg1=order(NewMean[[1]])
- Ng=2=PolyFit_ENAR(unlist(NewMean[c(2,4,6,8)]),unlist(NewVar[c(2,4,6,8)]),nterm,"Mean","Variance","Ng=2",xlim, ylim)
- sortNg2=order(unlist(NewMean[c(2,4,6,8)]))
- Ng=3=PolyFit_ENAR(unlist(NewMean[c(3,5,7,9)]),unlist(NewVar[c(3,5,7,9)]),nterm,"Mean","Variance","Ng=3",xlim, ylim)
- sortNg3=order(unlist(NewMean[c(3,5,7,9)]))
-
- ALL=PolyFit_ENAR(unlist(NewMean),unlist(NewVar),nterm,"Mean","Variance","",xlim, ylim)
- lines(log10(unlist(NewMean[c(2,4,6,8)]))[sortNg2],Ng=2$fit[sortNg2],col="green",lwd=2)
- lines(log10(unlist(NewMean[c(3,5,7,9)]))[sortNg3],Ng=3$fit[sortNg3],col="orange",lwd=2)
- lines(log10(unlist(NewMean[1]))[sortNg1],Ng=1$fit[sortNg1],col="pink",lwd=2)
- legend("topleft",col=c("red","pink","green","orange"),c("all","Ng=1","Ng=2","Ng=3"),lwd=2)
-}
-
-
-
-
-
-
-
+++ /dev/null
-DenNHist <-
-function(QList,Alpha,Beta,name,AList="F",GroupName)
-{
- if(!is.list(QList)) QList=list(QList)
- for (i in 1:length(QList)){
- if (AList=="F") alpha.use=Alpha
- if(AList=="T") alpha.use=Alpha[i]
- hist(QList[[i]][QList[[i]]<.98&QList[[i]]>0],prob=T,col="blue",breaks=100,main=paste(GroupName[i],name,sep=" "),xlim=c(0,1),xlab=paste("Q alpha=",round(alpha.use,2)," beta=",round(Beta[i],2),sep=""))
- tmpSize=length(QList[[i]][QList[[i]]<.98])
- tmpseq=seq(0.001,1,length=1000)
- #tmpdensity=dbeta(tmpseq,AlphaResult,BetaResult[i])
- #points(tmpseq,tmpdensity, type="l",col="green")
- #ll=dbeta(tmpseq,Alpha,Beta[i])
- ll=tmpseq
- lines(ll,dbeta(ll,alpha.use,Beta[i]),col="green",lwd=2)
- legend("topright",c("Data","Fitted density"),col=c("blue","green"),lwd=2)
-}
-
- }
-
+++ /dev/null
-DenNHistTable <-
-function(QList,Alpha,Beta,AList="F")
-{
- par(mfrow=c(3,4))
- plot(1, type="n", axes=F, xlab="", ylab="", main="No 3' end No 5' end",cex.main=1)
- plot(1, type="n", axes=F, xlab="", ylab="",main="With 3' end No 5' end",cex.main=1)
- plot(1, type="n", axes=F, xlab="", ylab="",main="With 5' end No 3' end",cex.main=1)
- for (i in c(1,2,4,6,8)){
- alpha.use=Alpha
- hist(QList[[i]][QList[[i]]<.98&QList[[i]]>0],prob=T,col="blue",breaks=100,main=ifelse(i==1,"With 5' end With 3' end",""),cex.main=1, xlim=c(0,1),xlab=paste("Q alpha=",round(alpha.use,2)," beta=",round(Beta[i],2),sep=""))
- if(i==1)mtext("Ng=1",side=4, cex=1)
- if(i==8)mtext("Ng=2", side=4,cex=1)
- tmpSize=length(QList[[i]][QList[[i]]<.98])
-
- tmpseq=seq(0.001,1,length=1000)
- ll=tmpseq
- lines(ll,dbeta(ll,alpha.use,Beta[i]),col="green",lwd=2)
- legend("topright",c("Data","Fitted density"),col=c("blue","green"),lwd=2,cex=.5)
-}
-
- for (i in c(3,5,7,9)){
- alpha.use=Alpha
- hist(QList[[i]][QList[[i]]<.98&QList[[i]]>0],prob=T,col="blue",breaks=100,main=ifelse(i==1,"With 5' end With 3' end exons",""),xlim=c(0,1),xlab=paste("Q alpha=",round(alpha.use,2)," beta=",round(Beta[i],2),sep=""))
- if(i==9)mtext("Ng=3", side=4,cex=1)
-
- tmpSize=length(QList[[i]][QList[[i]]<.98])
-
- tmpseq=seq(0.001,1,length=1000)
- ll=tmpseq
- lines(ll,dbeta(ll,alpha.use,Beta[i]),col="green",lwd=2)
- legend("topright",c("Data","Fitted density"),col=c("blue","green"),cex=.5, lwd=2)
-}
-
-
-
-
- }
-
+++ /dev/null
-EBMultiTest <-
-function(Data,NgVector=NULL,Conditions,AllParti=NULL, sizeFactors, maxround, tau=NULL,CI=NULL,CIthre=NULL, Pool=F, NumBin=1000, Approx=10^-10,PoolLower=.25, PoolUpper=.75)
-{
-
- if(is.null(NgVector))NgVector=rep(1,nrow(Data))
- if(!is.factor(Conditions))Conditions=as.factor(Conditions)
-
-
- #ReNameThem
- IsoNamesIn=rownames(Data)
- Names=paste("I",c(1:dim(Data)[1]),sep="")
- names(IsoNamesIn)=Names
- rownames(Data)=paste("I",c(1:dim(Data)[1]),sep="")
- names(NgVector)=paste("I",c(1:dim(Data)[1]),sep="")
-
- # If PossibleCond==NULL, use all combinations
- NumCond=nlevels(Conditions)
- CondLevels=levels(Conditions)
- #library(blockmodeling)
- if(is.null(AllParti)){
- AllPartiList=sapply(1:NumCond,function(i)nkpartitions(NumCond,i))
- AllParti=do.call(rbind,AllPartiList)
- colnames(AllParti)=CondLevels
- rownames(AllParti)=paste("Pattern",1:nrow(AllParti),sep="")
- }
- if(!length(sizeFactors)==ncol(Data)){
- rownames(sizeFactors)=rownames(Data)
- colnames(sizeFactors)=Conditions
- }
-
-
- NoneZeroLength=nlevels(as.factor(NgVector))
- NameList=sapply(1:NoneZeroLength,function(i)names(NgVector)[NgVector==i],simplify=F)
- DataList=sapply(1:NoneZeroLength , function(i) Data[NameList[[i]],],simplify=F)
- names(DataList)=names(NameList)
-
- NumEachGroup=sapply(1:NoneZeroLength , function(i)dim(DataList)[i])
- # Unlist
- DataList.unlist=do.call(rbind, DataList)
-
- # Divide by SampleSize factor
-
- if(length(sizeFactors)==ncol(Data))
- DataList.unlist.dvd=t(t( DataList.unlist)/sizeFactors)
-
- if(length(sizeFactors)!=ncol(Data))
- DataList.unlist.dvd=DataList.unlist/sizeFactors
-
- # Pool or Not
- if(Pool==T){
- DataforPoolSP.dvd=MeanforPoolSP.dvd=vector("list",NumCond)
- for(lv in 1:NumCond){
- DataforPoolSP.dvd[[lv]]=matrix(DataList.unlist.dvd[,Conditions==levels(Conditions)[lv]],nrow=dim(DataList.unlist)[1])
- MeanforPoolSP.dvd[[lv]]=rowMeans(DataforPoolSP.dvd[[lv]])
- }
- MeanforPool.dvd=rowMeans(DataList.unlist.dvd)
- NumInBin=floor(dim(DataList.unlist)[1]/NumBin)
- StartSeq=c(0:(NumBin-1))*NumInBin+1
- EndSeq=c(StartSeq[-1]-1,dim(DataList.unlist)[1])
- MeanforPool.dvd.Sort=sort(MeanforPool.dvd,decreasing=T)
- MeanforPool.dvd.Order=order(MeanforPool.dvd,decreasing=T)
- PoolGroups=sapply(1:NumBin,function(i)(names(MeanforPool.dvd.Sort)[StartSeq[i]:EndSeq[i]]),simplify=F)
- #FCforPool=MeanforPoolSP.dvd1/MeanforPoolSP.dvd2
- # Use GeoMean of every two-group partition
- Parti2=nkpartitions(NumCond,2)
- FCForPoolList=sapply(1:nrow(Parti2),function(i)rowMeans(do.call(cbind,
- MeanforPoolSP.dvd[Parti2[i,]==1]))/
- rowMeans(do.call(cbind,MeanforPoolSP.dvd[Parti2[i,]==2])),
- simplify=F)
- FCForPoolMat=do.call(cbind,FCForPoolList)
- FCforPool=apply(FCForPoolMat,1,function(i)exp(mean(log(i))))
- names(FCforPool)=names(MeanforPool.dvd)
- FC_Use=names(FCforPool)[which(FCforPool>=quantile(FCforPool[!is.na(FCforPool)],PoolLower) & FCforPool<=quantile(FCforPool[!is.na(FCforPool)],PoolUpper))]
- PoolGroupVar=sapply(1:NumBin,function(i)(mean(apply(matrix(DataList.unlist[PoolGroups[[i]][PoolGroups[[i]]%in%FC_Use],],ncol=ncol(DataList.unlist)),1,var))))
- PoolGroupVarInList=sapply(1:NumBin,function(i)(rep(PoolGroupVar[i],length(PoolGroups[[i]]))),simplify=F)
- PoolGroupVarVector=unlist(PoolGroupVarInList)
- VarPool=PoolGroupVarVector[MeanforPool.dvd.Order]
- names(VarPool)=names(MeanforPool.dvd)
- }
-
- DataListSP=vector("list",nlevels(Conditions))
- DataListSP.dvd=vector("list",nlevels(Conditions))
- SizeFSP=DataListSP
- MeanSP=DataListSP
- VarSP=DataListSP
- GetPSP=DataListSP
- RSP=DataListSP
- CISP=DataListSP
- tauSP=DataListSP
-
- NumEachCondLevel=summary(Conditions)
- if(Pool==F & is.null(CI)) CondLevelsUse=CondLevels[NumEachCondLevel>1]
- if(Pool==T | !is.null(CI)) CondLevelsUse=CondLevels
- NumCondUse=length(CondLevelsUse)
-
- for (lv in 1:nlevels(Conditions)){
- DataListSP[[lv]]= matrix(DataList.unlist[,Conditions==levels(Conditions)[lv]],nrow=dim(DataList.unlist)[1])
- rownames(DataListSP[[lv]])=rownames(DataList.unlist)
- DataListSP.dvd[[lv]]= matrix(DataList.unlist.dvd[,Conditions==levels(Conditions)[lv]],nrow=dim(DataList.unlist.dvd)[1])
- if(ncol(DataListSP[[lv]])==1 & Pool==F & !is.null(CI)){
- CISP[[lv]]=matrix(CI[,Conditions==levels(Conditions)[lv]],nrow=dim(DataList.unlist.dvd)[1])
- tauSP[[lv]]=matrix(tau[,Conditions==levels(Conditions)[lv]],nrow=dim(DataList.unlist.dvd)[1])
- }
- # no matter sizeFactors is a vector or a matrix. Matrix should be columns are the normalization factors
- # may input one for each
- if(length(sizeFactors)==ncol(Data))SizeFSP[[lv]]=sizeFactors[Conditions==levels(Conditions)[lv]]
- if(length(sizeFactors)!=ncol(Data))SizeFSP[[lv]]=sizeFactors[,Conditions==levels(Conditions)[lv]]
-
- MeanSP[[lv]]=rowMeans(DataListSP.dvd[[lv]])
-
- if(length(sizeFactors)==ncol(Data))PrePareVar=sapply(1:ncol( DataListSP[[lv]]),function(i)( DataListSP[[lv]][,i]- SizeFSP[[lv]][i]*MeanSP[[lv]])^2 /SizeFSP[[lv]][i])
- if(length(sizeFactors)!=ncol(Data))PrePareVar=sapply(1:ncol( DataListSP[[lv]]),function(i)( DataListSP[[lv]][,i]- SizeFSP[[lv]][,i]*MeanSP[[lv]])^2 /SizeFSP[[lv]][,i])
-
- if(ncol(DataListSP[[lv]])==1 & Pool==F & !is.null(CI))
- VarSP[[lv]]=as.vector(((DataListSP[[lv]]/tauSP[[lv]]) * CISP[[lv]]/(CIthre*2))^2)
- if( Pool==T){
- VarSP[[lv]]=VarPool
- }
- if(ncol(DataListSP[[lv]])!=1){
- VarSP[[lv]]=rowSums(PrePareVar)/ncol( DataListSP[[lv]])
- names(VarSP[[lv]])=rownames(DataList.unlist)
- GetPSP[[lv]]=MeanSP[[lv]]/VarSP[[lv]]
- RSP[[lv]]=MeanSP[[lv]]*GetPSP[[lv]]/(1-GetPSP[[lv]])
- }
- names(MeanSP[[lv]])=rownames(DataList.unlist)
- }
-
- # Get Empirical R
- # POOL R???
- MeanList=rowMeans(DataList.unlist.dvd)
- VarList=apply(DataList.unlist.dvd, 1, var)
- Varcbind=do.call(cbind,VarSP[CondLevels%in%CondLevelsUse])
- PoolVarSpeedUp_MDFPoi_NoNormVarList=rowMeans(Varcbind)
- VarrowMin=apply(Varcbind,1,min)
- GetP=MeanList/PoolVarSpeedUp_MDFPoi_NoNormVarList
-
- EmpiricalRList=MeanList*GetP/(1-GetP)
- # sep
- #Rcb=cbind(RSP[[1]],RSP[[2]])
- #Rbest=apply(Rcb,1,function(i)max(i[!is.na(i) & i!=Inf]))
- EmpiricalRList[EmpiricalRList==Inf] =max(EmpiricalRList[EmpiricalRList!=Inf])
- # fine
- #
- GoodData=names(MeanList)[EmpiricalRList>0 & VarrowMin!=0 & EmpiricalRList!=Inf & !is.na(VarrowMin) & !is.na(EmpiricalRList)]
- NotIn=names(MeanList)[EmpiricalRList<=0 | VarrowMin==0 | EmpiricalRList==Inf | is.na(VarrowMin) | is.na(EmpiricalRList)]
- #NotIn.BestR=Rbest[NotIn.raw]
- #NotIn.fix=NotIn.BestR[which(NotIn.BestR>0)]
- #EmpiricalRList[names(NotIn.fix)]=NotIn.fix
- #print(paste("ZeroVar",sum(VarrowMin==0), "InfR", length(which(EmpiricalRList==Inf)), "Poi", length(which(EmpiricalRList<0)), ""))
- #GoodData=c(GoodData.raw,names(NotIn.fix))
- #NotIn=NotIn.raw[!NotIn.raw%in%names(NotIn.fix)]
- EmpiricalRList.NotIn=EmpiricalRList[NotIn]
- EmpiricalRList.Good=EmpiricalRList[GoodData]
- EmpiricalRList.Good[EmpiricalRList.Good<1]=1+EmpiricalRList.Good[EmpiricalRList.Good<1]
- if(length(sizeFactors)==ncol(Data))
- EmpiricalRList.Good.mat= outer(EmpiricalRList.Good, sizeFactors)
- if(!length(sizeFactors)==ncol(Data))
- EmpiricalRList.Good.mat=EmpiricalRList.Good* sizeFactors[GoodData,]
-
-
- # Only Use Data has Good q's
- DataList.In=sapply(1:NoneZeroLength, function(i)DataList[[i]][GoodData[GoodData%in%rownames(DataList[[i]])],],simplify=F)
- DataList.NotIn=sapply(1:NoneZeroLength, function(i)DataList[[i]][NotIn[NotIn%in%rownames(DataList[[i]])],],simplify=F)
- DataListIn.unlist=do.call(rbind, DataList.In)
- DataListNotIn.unlist=do.call(rbind, DataList.NotIn)
-
- DataListSPIn=vector("list",nlevels(Conditions))
- DataListSPNotIn=vector("list",nlevels(Conditions))
- EmpiricalRList.Good.mat.SP=vector("list",nlevels(Conditions))
- for (lv in 1:nlevels(Conditions)){
- DataListSPIn[[lv]]= matrix(DataListIn.unlist[,Conditions==levels(Conditions)[lv]],nrow=dim(DataListIn.unlist)[1])
- if(length(NotIn)>0) DataListSPNotIn[[lv]]= matrix(DataListNotIn.unlist[,Conditions==levels(Conditions)[lv]],nrow=dim(DataListNotIn.unlist)[1])
- rownames(DataListSPIn[[lv]])=rownames(DataListIn.unlist)
- if(length(NotIn)>0)rownames(DataListSPNotIn[[lv]])=rownames(DataListNotIn.unlist)
- EmpiricalRList.Good.mat.SP[[lv]]=matrix(EmpiricalRList.Good.mat[,Conditions==levels(Conditions)[lv]],nrow=dim(EmpiricalRList.Good.mat)[1])
- }
-
- NumOfEachGroupIn=sapply(1:NoneZeroLength, function(i)max(0,dim(DataList.In[[i]])[1]))
- NumOfEachGroupNotIn=sapply(1:NoneZeroLength, function(i)max(0,dim(DataList.NotIn[[i]])[1]))
-
- #Initialize SigIn & ...
- AlphaIn=0.5
- BetaIn=rep(0.5,NoneZeroLength)
- PIn=rep(1/nrow(AllParti),nrow(AllParti))
-
- ####use while to make an infinity round?
- UpdateAlpha=NULL
- UpdateBeta=NULL
- UpdateP=NULL
- UpdatePFromZ=NULL
- Timeperround=NULL
- for (times in 1:maxround){
- temptime1=proc.time()
- UpdateOutput=suppressWarnings(LogNMulti(DataListIn.unlist,DataListSPIn, EmpiricalRList.Good.mat ,EmpiricalRList.Good.mat.SP,
- NumOfEachGroupIn, AlphaIn, BetaIn, PIn, NoneZeroLength, AllParti,Conditions))
- print(paste("iteration", times, "done",sep=" "))
- AlphaIn=UpdateOutput$AlphaNew
- BetaIn=UpdateOutput$BetaNew
- PIn=UpdateOutput$PNew
- PFromZ=UpdateOutput$PFromZ
- FOut=UpdateOutput$FGood
- UpdateAlpha=rbind(UpdateAlpha,AlphaIn)
- UpdateBeta=rbind(UpdateBeta,BetaIn)
- UpdateP=rbind(UpdateP,PIn)
- UpdatePFromZ=rbind(UpdatePFromZ,PFromZ)
- temptime2=proc.time()
- Timeperround=c(Timeperround,temptime2[3]-temptime1[3])
- print(paste("time" ,Timeperround[times],sep=" "))
- Z.output=UpdateOutput$ZEachGood
- Z.NA.Names=UpdateOutput$zNaNName
- }
- #Remove this } after testing!!
-
-# if (times!=1){
-# if((UpdateAlpha[times]-UpdateAlpha[times-1])^2+UpdateBeta[times]-UpdateBeta[times-1])^2+UpdateR[times]-UpdateR[times-1])^2+UpdateP[times]-UpdateP[times-1])^2<=10^(-6)){
-# Result=list(Sig=SigIn, Miu=MiuIn, Tau=TauIn)
-# break
-# }
-# }
-#}
-
-##########Change Names############
-## Only z are for Good Ones
-## Others are for ALL Data
-GoodData=GoodData[!GoodData%in%Z.NA.Names]
-IsoNamesIn.Good=as.vector(IsoNamesIn[GoodData])
-RealName.Z.output=Z.output
-RealName.F=FOut
-rownames(RealName.Z.output)=IsoNamesIn.Good
-rownames(RealName.F)=IsoNamesIn.Good
-
-RealName.EmpiricalRList=sapply(1:NoneZeroLength,function(i)EmpiricalRList[names(EmpiricalRList)%in%NameList[[i]]], simplify=F)
-RealName.MeanList=sapply(1:NoneZeroLength,function(i)MeanList[names(MeanList)%in%NameList[[i]]], simplify=F)
-RealName.SPMeanList=sapply(1:NoneZeroLength,function(i)sapply(1:length(MeanSP), function(j)MeanSP[[j]][names(MeanSP[[j]])%in%NameList[[i]]],simplify=F), simplify=F)
-RealName.SPVarList=sapply(1:NoneZeroLength,function(i)sapply(1:length(VarSP), function(j)VarSP[[j]][names(VarSP[[j]])%in%NameList[[i]]],simplify=F), simplify=F)
-RealName.DataList=sapply(1:NoneZeroLength,function(i)DataList[[i]][rownames(DataList[[i]])%in%NameList[[i]],], simplify=F)
-
-RealName.VarList=sapply(1:NoneZeroLength,function(i)VarList[names(VarList)%in%NameList[[i]]], simplify=F)
-RealName.PoolVarList=sapply(1:NoneZeroLength,function(i)PoolVarSpeedUp_MDFPoi_NoNormVarList[names(PoolVarSpeedUp_MDFPoi_NoNormVarList)%in%NameList[[i]]], simplify=F)
-RealName.QList=sapply(1:NoneZeroLength,function(i)sapply(1:length(GetPSP), function(j)GetPSP[[j]][names(GetPSP[[j]])%in%NameList[[i]]],simplify=F), simplify=F)
-
-
-for (i in 1:NoneZeroLength){
-tmp=NameList[[i]]
-names=IsoNamesIn[tmp]
-RealName.MeanList[[i]]=RealName.MeanList[[i]][NameList[[i]]]
-RealName.VarList[[i]]=RealName.VarList[[i]][NameList[[i]]]
- for(j in 1:NumCond){
- RealName.SPMeanList[[i]][[j]]=RealName.SPMeanList[[i]][[j]][NameList[[i]]]
- if(!is.null(RealName.QList[[i]][[j]])){
- RealName.QList[[i]][[j]]=RealName.QList[[i]][[j]][NameList[[i]]]
- RealName.SPVarList[[i]][[j]]=RealName.SPVarList[[i]][[j]][NameList[[i]]]
- names(RealName.QList[[i]][[j]])=names
- names(RealName.SPVarList[[i]][[j]])=names
- }
- names(RealName.SPMeanList[[i]][[j]])=names
- }
-RealName.EmpiricalRList[[i]]=RealName.EmpiricalRList[[i]][NameList[[i]]]
-RealName.PoolVarList[[i]]=RealName.PoolVarList[[i]][NameList[[i]]]
-RealName.DataList[[i]]=RealName.DataList[[i]][NameList[[i]],]
-
-names(RealName.MeanList[[i]])=names
-names(RealName.VarList[[i]])=names
-
-names(RealName.EmpiricalRList[[i]])=names
-names(RealName.PoolVarList[[i]])=names
-rownames(RealName.DataList[[i]])=names
-
-}
-
-
-#########posterior part for other data set here later############
-AllNA=unique(c(Z.NA.Names,NotIn))
-AllZ=NULL
-AllF=NULL
-if(length(AllNA)==0){
- AllZ=RealName.Z.output[IsoNamesIn,]
- AllF=RealName.F[IsoNamesIn,]
-}
-ZEachNA=NULL
-if (length(AllNA)>0){
- Ng.NA=NgVector[AllNA]
- AllNA.Ngorder=AllNA[order(Ng.NA)]
- NumOfEachGroupNA=rep(0,NoneZeroLength)
- NumOfEachGroupNA.tmp=tapply(Ng.NA,Ng.NA,length)
- names(NumOfEachGroupNA)=c(1:NoneZeroLength)
- NumOfEachGroupNA[names(NumOfEachGroupNA.tmp)]=NumOfEachGroupNA.tmp
- PNotIn=rep(1-Approx,length(AllNA.Ngorder))
- MeanList.NotIn=MeanList[AllNA.Ngorder]
- R.NotIn.raw=MeanList.NotIn*PNotIn/(1-PNotIn)
- if(length(sizeFactors)==ncol(Data))
- R.NotIn=matrix(outer(R.NotIn.raw,sizeFactors),nrow=length(AllNA.Ngorder))
- if(!length(sizeFactors)==ncol(Data))
- R.NotIn=matrix(R.NotIn.raw*sizeFactors[NotIn,],nrow=length(AllNA.Ngorder))
-
- DataListNotIn.unlistWithZ=DataList.unlist[AllNA.Ngorder,]
- DataListSPNotInWithZ=vector("list",nlevels(Conditions))
- RListSPNotInWithZ=vector("list",nlevels(Conditions))
- for (lv in 1:nlevels(Conditions)) {
- DataListSPNotInWithZ[[lv]] = matrix(DataListSP[[lv]][AllNA.Ngorder,],nrow=length(AllNA.Ngorder))
- RListSPNotInWithZ[[lv]]=matrix(R.NotIn[,Conditions==levels(Conditions)[lv]],nrow=length(AllNA.Ngorder))
- }
- FListNA=sapply(1:nrow(AllParti),function(i)sapply(1:nlevels(as.factor(AllParti[i,])),
- function(j)f0(do.call(cbind, DataListSPNotInWithZ[AllParti[i,]==j]),AlphaIn, BetaIn,
- do.call(cbind,RListSPNotInWithZ[AllParti[i,]==j]), NumOfEachGroupNA, log=T)),
- simplify=F)
- FPartiLogNA=sapply(FListNA,rowSums)
- FMatNA=exp(FPartiLogNA)
-
- rownames(FMatNA)=rownames(DataListNotIn.unlistWithZ)
- PMatNA=matrix(rep(1,nrow(DataListNotIn.unlistWithZ)),ncol=1)%*%matrix(PIn,nrow=1)
- FmultiPNA=FMatNA*PMatNA
- DenomNA=rowSums(FmultiPNA)
- ZEachNA=apply(FmultiPNA,2,function(i)i/DenomNA)
-
- rownames(ZEachNA)=IsoNamesIn[AllNA.Ngorder]
-
- AllZ=rbind(RealName.Z.output,ZEachNA)
- AllZ=AllZ[IsoNamesIn,]
-
- F.NotIn=FMatNA
- rownames(F.NotIn)=IsoNamesIn[rownames(FMatNA)]
- AllF=rbind(RealName.F,F.NotIn)
- AllF=AllF[IsoNamesIn,]
-
-}
-colnames(AllZ)=rownames(AllParti)
-colnames(AllF)=rownames(AllParti)
-
-#############Result############################
-Result=list(Alpha=UpdateAlpha,Beta=UpdateBeta,P=UpdateP,PFromZ=UpdatePFromZ,
- Z=RealName.Z.output,PoissonZ=ZEachNA, RList=RealName.EmpiricalRList, MeanList=RealName.MeanList,
- VarList=RealName.VarList, QList=RealName.QList, SPMean=RealName.SPMeanList, SPEstVar=RealName.SPVarList,
- PoolVar=RealName.PoolVarList , DataList=RealName.DataList,PPDE=AllZ,f=AllF, AllParti=AllParti)
-}
-
+++ /dev/null
-EBTest <-
-function(Data,NgVector=NULL,Vect5End=NULL,Vect3End=NULL,Conditions, sizeFactors, maxround, tau=NULL,CI=NULL,CIthre=NULL, Pool=F, NumBin=1000,ApproxVal=10^-10)
-{
- Dataraw=Data
- AllZeroNames=which(rowMeans(Data)==0)
- NotAllZeroNames=which(rowMeans(Data)>0)
- if(length(AllZeroNames)>0) print("Remove transcripts with all zero")
- Data=Data[NotAllZeroNames,]
- if(!is.null(NgVector))NgVector=NgVector[NotAllZeroNames]
- if(!length(sizeFactors)==ncol(Data))sizeFactors=sizeFactors[NotAllZeroNames,]
-
- if(is.null(NgVector))NgVector=rep(1,nrow(Data))
-
- #Rename Them
- IsoNamesIn=rownames(Data)
- Names=paste("I",c(1:dim(Data)[1]),sep="")
- names(IsoNamesIn)=Names
- rownames(Data)=paste("I",c(1:dim(Data)[1]),sep="")
- names(NgVector)=paste("I",c(1:dim(Data)[1]),sep="")
-
-
- if(!length(sizeFactors)==ncol(Data)){
- rownames(sizeFactors)=rownames(Data)
- colnames(sizeFactors)=Conditions
- }
-
- NumOfNg=nlevels(as.factor(NgVector))
- NameList=sapply(1:NumOfNg,function(i)Names[NgVector==i],simplify=F)
- names(NameList)=paste("Ng",c(1:NumOfNg),sep="")
- NotNone=NULL
- for (i in 1:NumOfNg) {
- if (length(NameList[[i]])!=0)
- NotNone=c(NotNone,names(NameList)[i])
- }
- NameList=NameList[NotNone]
-
- NoneZeroLength=length(NameList)
- DataList=vector("list",NoneZeroLength)
- DataList=sapply(1:NoneZeroLength , function(i) Data[NameList[[i]],],simplify=F)
- names(DataList)=names(NameList)
-
- NumEachGroup=sapply(1:NoneZeroLength , function(i)dim(DataList)[i])
- # Unlist
- DataList.unlist=do.call(rbind, DataList)
-
- # Divide by SampleSize factor
-
- if(length(sizeFactors)==ncol(Data))
- DataList.unlist.dvd=t(t( DataList.unlist)/sizeFactors)
-
- if(length(sizeFactors)!=ncol(Data))
- DataList.unlist.dvd=DataList.unlist/sizeFactors
-
- # Get FC and VarPool for pooling - Only works on 2 conditions
- if(ncol(Data)==2){
- DataforPoolSP.dvd1=matrix(DataList.unlist.dvd[,Conditions==levels(Conditions)[1]],nrow=dim(DataList.unlist)[1])
- DataforPoolSP.dvd2=matrix(DataList.unlist.dvd[,Conditions==levels(Conditions)[2]],nrow=dim(DataList.unlist)[1])
- MeanforPoolSP.dvd1=rowMeans(DataforPoolSP.dvd1)
- MeanforPoolSP.dvd2=rowMeans(DataforPoolSP.dvd2)
- FCforPool=MeanforPoolSP.dvd1/MeanforPoolSP.dvd2
- names(FCforPool)=rownames(Data)
- FC_Use=which(FCforPool>=quantile(FCforPool[!is.na(FCforPool)],.25) &
- FCforPool<=quantile(FCforPool[!is.na(FCforPool)],.75))
-
- Var_FC_Use=apply( DataList.unlist.dvd[FC_Use,],1,var )
- Mean_FC_Use=(MeanforPoolSP.dvd1[FC_Use]+MeanforPoolSP.dvd2[FC_Use])/2
- MeanforPool=(MeanforPoolSP.dvd1+MeanforPoolSP.dvd2)/2
- FC_Use2=which(Var_FC_Use>=Mean_FC_Use)
- Var_FC_Use2=Var_FC_Use[FC_Use2]
- Mean_FC_Use2=Mean_FC_Use[FC_Use2]
- Phi=mean((Var_FC_Use2-Mean_FC_Use2)/Mean_FC_Use2^2)
- VarEst= MeanforPool*(1+MeanforPool*Phi)
- print(Phi)
- }
-
- #DataListSP Here also unlist.. Only two lists
- DataListSP=vector("list",nlevels(Conditions))
- DataListSP.dvd=vector("list",nlevels(Conditions))
- SizeFSP=DataListSP
- MeanSP=DataListSP
- VarSP=DataListSP
- GetPSP=DataListSP
- RSP=DataListSP
- CISP=DataListSP
- tauSP=DataListSP
- NumSampleEachCon=rep(NULL,nlevels(Conditions))
-
- for (lv in 1:nlevels(Conditions)){
- DataListSP[[lv]]= matrix(DataList.unlist[,Conditions==levels(Conditions)[lv]],nrow=dim(DataList.unlist)[1])
- rownames(DataListSP[[lv]])=rownames(DataList.unlist)
- DataListSP.dvd[[lv]]= matrix(DataList.unlist.dvd[,Conditions==levels(Conditions)[lv]],nrow=dim(DataList.unlist.dvd)[1])
- NumSampleEachCon[lv]=ncol(DataListSP[[lv]])
-
- if(ncol(DataListSP[[lv]])==1 & !is.null(CI)){
- CISP[[lv]]=matrix(CI[,Conditions==levels(Conditions)[lv]],nrow=dim(DataList.unlist.dvd)[1])
- tauSP[[lv]]=matrix(tau[,Conditions==levels(Conditions)[lv]],nrow=dim(DataList.unlist.dvd)[1])
- }
- # no matter sizeFactors is a vector or a matrix. Matrix should be columns are the normalization factors
- # may input one for each
- if(length(sizeFactors)==ncol(Data))SizeFSP[[lv]]=sizeFactors[Conditions==levels(Conditions)[lv]]
- if(length(sizeFactors)!=ncol(Data))SizeFSP[[lv]]=sizeFactors[,Conditions==levels(Conditions)[lv]]
-
-
- MeanSP[[lv]]=rowMeans(DataListSP.dvd[[lv]])
-
- if(length(sizeFactors)==ncol(Data))PrePareVar=sapply(1:ncol( DataListSP[[lv]]),function(i)( DataListSP[[lv]][,i]- SizeFSP[[lv]][i]*MeanSP[[lv]])^2 /SizeFSP[[lv]][i])
- if(length(sizeFactors)!=ncol(Data))PrePareVar=sapply(1:ncol( DataListSP[[lv]]),function(i)( DataListSP[[lv]][,i]- SizeFSP[[lv]][,i]*MeanSP[[lv]])^2 /SizeFSP[[lv]][,i])
-
- if(ncol(DataListSP[[lv]])==1 & !is.null(CI))
- VarSP[[lv]]=as.vector(((DataListSP[[lv]]/tauSP[[lv]]) * CISP[[lv]]/(CIthre*2))^2)
- if(ncol(DataListSP[[lv]])!=1){
- VarSP[[lv]]=rowSums(PrePareVar)/ncol( DataListSP[[lv]])
- names(MeanSP[[lv]])=rownames(DataList.unlist)
- names(VarSP[[lv]])=rownames(DataList.unlist)
- GetPSP[[lv]]=MeanSP[[lv]]/VarSP[[lv]]
- RSP[[lv]]=MeanSP[[lv]]*GetPSP[[lv]]/(1-GetPSP[[lv]])
- }
-}
-
-
- MeanList=rowMeans(DataList.unlist.dvd)
- VarList=apply(DataList.unlist.dvd, 1, var)
- if(ncol(Data)==2)PoolVar=VarEst
- if(!ncol(Data)==2){
- CondWithRep=which(NumSampleEachCon>1)
- VarCondWithRep=do.call(cbind,VarSP[CondWithRep])
- PoolVar=rowMeans(VarCondWithRep)
- }
- GetP=MeanList/PoolVar
-
- EmpiricalRList=MeanList*GetP/(1-GetP)
- EmpiricalRList[EmpiricalRList==Inf] =max(EmpiricalRList[EmpiricalRList!=Inf])
-
- if(ncol(Data)!=2){
- Varcbind=do.call(cbind,VarSP)
- VarrowMin=apply(Varcbind,1,min)
- }
-
- if(ncol(Data)==2){
- Varcbind=VarEst
- VarrowMin=VarEst
- }
- #
- #
- GoodData=names(MeanList)[EmpiricalRList>0 & VarrowMin!=0 & EmpiricalRList!=Inf & !is.na(VarrowMin) & !is.na(EmpiricalRList)]
- NotIn=names(MeanList)[EmpiricalRList<=0 | VarrowMin==0 | EmpiricalRList==Inf | is.na(VarrowMin) | is.na(EmpiricalRList)]
- #print(paste("ZeroVar",sum(VarrowMin==0), "InfR", length(which(EmpiricalRList==Inf)), "Poi", length(which(EmpiricalRList<0)), ""))
- EmpiricalRList.NotIn=EmpiricalRList[NotIn]
- EmpiricalRList.Good=EmpiricalRList[GoodData]
- EmpiricalRList.Good[EmpiricalRList.Good<1]=1+EmpiricalRList.Good[EmpiricalRList.Good<1]
- if(length(sizeFactors)==ncol(Data))
- EmpiricalRList.Good.mat= outer(EmpiricalRList.Good, sizeFactors)
- if(!length(sizeFactors)==ncol(Data))
- EmpiricalRList.Good.mat=EmpiricalRList.Good* sizeFactors[GoodData,]
-
-
- # Only Use Data has Good q's
- DataList.In=sapply(1:NoneZeroLength, function(i)DataList[[i]][GoodData[GoodData%in%rownames(DataList[[i]])],],simplify=F)
- DataList.NotIn=sapply(1:NoneZeroLength, function(i)DataList[[i]][NotIn[NotIn%in%rownames(DataList[[i]])],],simplify=F)
- DataListIn.unlist=do.call(rbind, DataList.In)
- DataListNotIn.unlist=do.call(rbind, DataList.NotIn)
-
- DataListSPIn=vector("list",nlevels(Conditions))
- DataListSPNotIn=vector("list",nlevels(Conditions))
- EmpiricalRList.Good.mat.SP=vector("list",nlevels(Conditions))
- for (lv in 1:nlevels(Conditions)){
- DataListSPIn[[lv]]= matrix(DataListIn.unlist[,Conditions==levels(Conditions)[lv]],nrow=dim(DataListIn.unlist)[1])
- if(length(NotIn)>0){ DataListSPNotIn[[lv]]= matrix(DataListNotIn.unlist[,Conditions==levels(Conditions)[lv]],nrow=dim(DataListNotIn.unlist)[1])
- rownames(DataListSPNotIn[[lv]])=rownames(DataListNotIn.unlist)
- }
- rownames(DataListSPIn[[lv]])=rownames(DataListIn.unlist)
- EmpiricalRList.Good.mat.SP[[lv]]=matrix(EmpiricalRList.Good.mat[,Conditions==levels(Conditions)[lv]],nrow=dim(EmpiricalRList.Good.mat)[1])
-}
-
- NumOfEachGroupIn=sapply(1:NoneZeroLength, function(i)max(0,dim(DataList.In[[i]])[1]))
- NumOfEachGroupNotIn=sapply(1:NoneZeroLength, function(i)max(0,dim(DataList.NotIn[[i]])[1]))
-
- #Initialize SigIn & ...
- AlphaIn=0.5
- BetaIn=rep(0.5,NoneZeroLength)
- PIn=0.5
-
- ####use while to make an infinity round?
- UpdateAlpha=NULL
- UpdateBeta=NULL
- UpdateP=NULL
- UpdatePFromZ=NULL
- Timeperround=NULL
- for (times in 1:maxround){
- temptime1=proc.time()
- UpdateOutput=suppressWarnings(LogN(DataListIn.unlist,DataListSPIn, EmpiricalRList.Good.mat ,EmpiricalRList.Good.mat.SP, NumOfEachGroupIn, AlphaIn, BetaIn, PIn, NoneZeroLength))
- print(paste("iteration", times, "done",sep=" "))
- AlphaIn=UpdateOutput$AlphaNew
- BetaIn=UpdateOutput$BetaNew
- PIn=UpdateOutput$PNew
- PFromZ=UpdateOutput$PFromZ
- F0Out=UpdateOutput$F0Out
- F1Out=UpdateOutput$F1Out
- UpdateAlpha=rbind(UpdateAlpha,AlphaIn)
- UpdateBeta=rbind(UpdateBeta,BetaIn)
- UpdateP=rbind(UpdateP,PIn)
- UpdatePFromZ=rbind(UpdatePFromZ,PFromZ)
- temptime2=proc.time()
- Timeperround=c(Timeperround,temptime2[3]-temptime1[3])
- print(paste("time" ,Timeperround[times],sep=" "))
- Z.output=UpdateOutput$ZNew.list[!is.na(UpdateOutput$ZNew.list)]
- Z.NA.Names=UpdateOutput$zNaNName
- }
- #Remove this } after testing!!
-
-# if (times!=1){
-# if((UpdateAlpha[times]-UpdateAlpha[times-1])^2+UpdateBeta[times]-UpdateBeta[times-1])^2+UpdateR[times]-UpdateR[times-1])^2+UpdateP[times]-UpdateP[times-1])^2<=10^(-6)){
-# Result=list(Sig=SigIn, Miu=MiuIn, Tau=TauIn)
-# break
-# }
-# }
-#}
-
-##########Change Names############
-## Only z are for Good Ones
-## Others are for ALL Data
-GoodData=GoodData[!GoodData%in%Z.NA.Names]
-IsoNamesIn.Good=IsoNamesIn[GoodData]
-RealName.Z.output=Z.output
-RealName.F0=F0Out
-RealName.F1=F1Out
-names(RealName.Z.output)=IsoNamesIn.Good
-names(RealName.F0)=IsoNamesIn.Good
-names(RealName.F1)=IsoNamesIn.Good
-
-
-RealName.EmpiricalRList=sapply(1:NoneZeroLength,function(i)EmpiricalRList[names(EmpiricalRList)%in%NameList[[i]]], simplify=F)
-RealName.MeanList=sapply(1:NoneZeroLength,function(i)MeanList[names(MeanList)%in%NameList[[i]]], simplify=F)
-RealName.C1MeanList=sapply(1:NoneZeroLength,function(i)MeanSP[[1]][names(MeanSP[[1]])%in%NameList[[i]]], simplify=F)
-RealName.C2MeanList=sapply(1:NoneZeroLength,function(i)MeanSP[[2]][names(MeanSP[[2]])%in%NameList[[i]]], simplify=F)
-RealName.C1VarList=sapply(1:NoneZeroLength,function(i)VarSP[[1]][names(VarSP[[1]])%in%NameList[[i]]], simplify=F)
-RealName.C2VarList=sapply(1:NoneZeroLength,function(i)VarSP[[2]][names(VarSP[[2]])%in%NameList[[i]]], simplify=F)
-RealName.DataList=sapply(1:NoneZeroLength,function(i)DataList[[i]][rownames(DataList[[i]])%in%NameList[[i]],], simplify=F)
-
-
-
-RealName.VarList=sapply(1:NoneZeroLength,function(i)VarList[names(VarList)%in%NameList[[i]]], simplify=F)
-RealName.PoolVarList=sapply(1:NoneZeroLength,function(i)PoolVar[names(PoolVar)%in%NameList[[i]]], simplify=F)
-
-
-RealName.QList1=sapply(1:NoneZeroLength,function(i)GetPSP[[1]][names(GetPSP[[1]])%in%NameList[[i]]], simplify=F)
-RealName.QList2=sapply(1:NoneZeroLength,function(i)GetPSP[[2]][names(GetPSP[[2]])%in%NameList[[i]]], simplify=F)
-
-
-for (i in 1:NoneZeroLength){
-tmp=NameList[[i]]
-names=IsoNamesIn[tmp]
-
-RealName.MeanList[[i]]=RealName.MeanList[[i]][NameList[[i]]]
-RealName.VarList[[i]]=RealName.VarList[[i]][NameList[[i]]]
-RealName.QList1[[i]]=RealName.QList1[[i]][NameList[[i]]]
-RealName.QList2[[i]]=RealName.QList2[[i]][NameList[[i]]]
-RealName.EmpiricalRList[[i]]=RealName.EmpiricalRList[[i]][NameList[[i]]]
-RealName.C1MeanList[[i]]=RealName.C1MeanList[[i]][NameList[[i]]]
-RealName.C2MeanList[[i]]=RealName.C2MeanList[[i]][NameList[[i]]]
-RealName.PoolVarList[[i]]=RealName.PoolVarList[[i]][NameList[[i]]]
-RealName.C1VarList[[i]]=RealName.C1VarList[[i]][NameList[[i]]]
-RealName.C2VarList[[i]]=RealName.C2VarList[[i]][NameList[[i]]]
-RealName.DataList[[i]]=RealName.DataList[[i]][NameList[[i]],]
-
-names(RealName.MeanList[[i]])=names
-names(RealName.VarList[[i]])=names
-if(ncol(DataListSP[[1]])!=1){
- names(RealName.QList1[[i]])=names
- names(RealName.C1VarList[[i]])=names
-}
-if(ncol(DataListSP[[2]])!=1){
- names(RealName.QList2[[i]])=names
- names(RealName.C2VarList[[i]])=names
-}
-
-names(RealName.EmpiricalRList[[i]])=names
-names(RealName.C1MeanList[[i]])=names
-names(RealName.C2MeanList[[i]])=names
-names(RealName.PoolVarList[[i]])=names
-rownames(RealName.DataList[[i]])=names
-
-
-}
-
-
-#########posterior part for other data set here later############
-AllNA=unique(c(Z.NA.Names,NotIn))
-z.list.NotIn=NULL
-AllF0=c(RealName.F0)
-AllF1=c(RealName.F1)
-AllZ=RealName.Z.output
-
-if (length(AllNA)>0){
- Ng.NA=NgVector[AllNA]
- AllNA.Ngorder=AllNA[order(Ng.NA)]
- NumOfEachGroupNA=rep(0,NoneZeroLength)
- NumOfEachGroupNA.tmp=tapply(Ng.NA,Ng.NA,length)
- names(NumOfEachGroupNA)=c(1:NoneZeroLength)
- NumOfEachGroupNA[names(NumOfEachGroupNA.tmp)]=NumOfEachGroupNA.tmp
- PNotIn=rep(1-ApproxVal,length(AllNA.Ngorder))
- MeanList.NotIn=MeanList[AllNA.Ngorder]
- R.NotIn.raw=MeanList.NotIn*PNotIn/(1-PNotIn)
- if(length(sizeFactors)==ncol(Data))
- R.NotIn=outer(R.NotIn.raw,sizeFactors)
- if(!length(sizeFactors)==ncol(Data))
- R.NotIn=R.NotIn.raw*sizeFactors[NotIn,]
- R.NotIn1=matrix(R.NotIn[,Conditions==levels(Conditions)[1]],nrow=nrow(R.NotIn))
- R.NotIn2=matrix(R.NotIn[,Conditions==levels(Conditions)[2]],nrow=nrow(R.NotIn))
-
- DataListNotIn.unlistWithZ=DataList.unlist[AllNA.Ngorder,]
- DataListSPNotInWithZ=vector("list",nlevels(Conditions))
- for (lv in 1:nlevels(Conditions))
- DataListSPNotInWithZ[[lv]] = matrix(DataListSP[[lv]][AllNA.Ngorder,],nrow=length(AllNA.Ngorder))
- F0=f0(DataListNotIn.unlistWithZ, AlphaIn, BetaIn, R.NotIn, NumOfEachGroupNA, log=F)
- F1=f1(DataListSPNotInWithZ[[1]], DataListSPNotInWithZ[[2]], AlphaIn, BetaIn, R.NotIn1,R.NotIn2, NumOfEachGroupNA, log=F)
- z.list.NotIn=PIn*F1/(PIn*F1+(1-PIn)*F0)
-# names(z.list.NotIn)=IsoNamesIn.Good=IsoNamesIn[which(Names%in%NotIn)]
- names(z.list.NotIn)=IsoNamesIn[AllNA.Ngorder]
-
- AllZ=c(RealName.Z.output,z.list.NotIn)
- AllZ=AllZ[IsoNamesIn]
- AllZ[is.na(AllZ)]=0
- F0.NotIn=F0
- F1.NotIn=F1
- names(F0.NotIn)=IsoNamesIn[names(F0)]
- names(F1.NotIn)=IsoNamesIn[names(F1)]
- AllF0=c(RealName.F0,F0.NotIn)
- AllF1=c(RealName.F1,F1.NotIn)
- AllF0=AllF0[IsoNamesIn]
- AllF1=AllF1[IsoNamesIn]
- AllF0[is.na(AllF0)]=0
- AllF1[is.na(AllF1)]=0
-}
-#############Result############################
-Result=list(Alpha=UpdateAlpha,Beta=UpdateBeta,P=UpdateP,PFromZ=UpdatePFromZ, Z=RealName.Z.output,PoissonZ=z.list.NotIn, RList=RealName.EmpiricalRList, MeanList=RealName.MeanList, VarList=RealName.VarList, QList1=RealName.QList1, QList2=RealName.QList2, C1Mean=RealName.C1MeanList, C2Mean=RealName.C2MeanList,C1EstVar=RealName.C1VarList, C2EstVar=RealName.C2VarList, PoolVar=RealName.PoolVarList , DataList=RealName.DataList,PPDE=AllZ,f0=AllF0, f1=AllF1,
- AllZeroIndex=AllZeroNames)
-}
-
+++ /dev/null
-GeneMultiSimu<-
-function(DVDconstant=NULL, DVDqt1=NULL, DVDqt2=NULL, Conditions,AllParti, NumofSample, NumofGene=NULL, DEGeneProp, Phiconstant=NULL, Phi.qt1=NULL, Phi.qt2=NULL, Meanconstant=NULL,NormFactor=NULL, OnlyData=T)
-{
-# 2012 feb 1 paired simulation
-if(is.null(NormFactor)) NormFactor=rep(1,NumofSample)
-data(GeneEBresultGouldBart2)
-MeansC1=GeneEBresultGouldBart2$C1Mean[[1]]
-MeansC2=GeneEBresultGouldBart2$C2Mean[[1]]
-
-MeanDVD=MeansC1/MeansC2
-
-if(is.null(DVDconstant))DVDLibrary=MeanDVD[MeanDVD<quantile(MeanDVD[MeanDVD!=Inf],DVDqt2) & MeanDVD>quantile(MeanDVD[MeanDVD!=Inf],DVDqt1)]
-if(!is.null(DVDconstant))DVDLibrary=DVDconstant
-
-# If DVD constant, use constant when generate
-# If not, use DVDLibrary
-
-MeanInputraw=GeneEBresultGouldBart2$MeanList[[1]]
-
-if(length(NumofGene)!=0)
-NumofGene.raw=NumofGene*2
-
-if(length(NumofGene)==0)
-NumofGene.raw=length(MeanInputraw)
-
-
-PhiInput.raw=GeneEBresultGouldBart2$RList[[1]]
-if (length(Phiconstant)==0){
- PhiLibrary=PhiInput.raw[(1/PhiInput.raw)<quantile(1/PhiInput.raw,Phi.qt2) & 1/PhiInput.raw>quantile(1/PhiInput.raw,Phi.qt1)]
- PhiInputNames=sample(names(PhiLibrary),NumofGene.raw,replace=T)
- PhiInput=PhiInput.raw[PhiInputNames]
-}
-
-if (length(Phiconstant)!=0)PhiInput=rep(Phiconstant,length(MeanInputraw))
-if(length(Meanconstant)==0)MeanInput=GeneEBresultGouldBart2$MeanList[[1]][PhiInputNames]
-if(length(Meanconstant)!=0)MeanInput=rep(Meanconstant,length(GeneEBresultGouldBart2$MeanList[[1]]))
-
-# length(DEGeneNumbers) should be num of patterns -1. the others EE
-PatternGeneNumbers=round(NumofGene.raw*DEGeneProp/2)*2
-names(PatternGeneNumbers)=rownames(AllParti)
-EEWhich=which(rowSums(AllParti)==ncol(AllParti))
-DEGeneNumbers=PatternGeneNumbers[-EEWhich]
-
-
-OutGeneNumbers=round(NumofGene*DEGeneProp/2)*2
-names(OutGeneNumbers)=rownames(AllParti)
-OutDEGeneNumbers=OutGeneNumbers[-EEWhich]
-OutEEGeneNumbers=OutGeneNumbers[EEWhich]
-OutGenePatterns=c(unlist(sapply(1:length(OutDEGeneNumbers),
- function(i)rep(names(OutDEGeneNumbers)[i],OutDEGeneNumbers[i]),simplify=F)),
- rep(names(OutEEGeneNumbers),OutEEGeneNumbers))
-
-GeneNames=paste("G",c(1:NumofGene.raw),sep="_")
-names(PhiInput)=GeneNames
-names(MeanInput)=GeneNames
-#########
-# data
-#########
-EEList=sapply(1:NumofGene.raw, function(j) sapply(1:NumofSample, function(i)suppressWarnings(rnbinom(1,mu=NormFactor[i]*MeanInput[j], size=PhiInput[j]))))
-
-generateDataraw=t(EEList)
-DVDSample=sample(DVDLibrary,sum(DEGeneNumbers),replace=T)
-
-DErawNames=vector("list",length(DEGeneNumbers))
-st=1
-for(i in 1:length(DEGeneNumbers)){
- for(j in st:(st+DEGeneNumbers[i]-1)){
- NumGroup=max(AllParti[names(DEGeneNumbers)[i],])
- SampleGroup=sample(NumGroup,NumGroup)
- DVDSampleEach=c(1,DVDSample[j]^c(1:(NumGroup-1)))
- for(k in 1:NumGroup){
- CondWhich=which(AllParti[names(DEGeneNumbers)[i],]==SampleGroup[k])
- SampleChoose=which(Conditions%in%colnames(AllParti)[CondWhich])
- generateDataraw[j,SampleChoose]=sapply(1:length(SampleChoose), function(i)suppressWarnings(rnbinom(1, size=PhiInput[j], mu=DVDSampleEach[k]*MeanInput[j]*NormFactor[i])),simplify=T)
- }}
- DErawNames[[i]]=GeneNames[st:(st+DEGeneNumbers[i]-1)]
- st=st+DEGeneNumbers[i]
-}
-
-rownames(generateDataraw)=GeneNames
-MeanVector=rowMeans(generateDataraw)
-VarVector=apply(generateDataraw,1,var)
-MOV.post=MeanVector/VarVector
-EErawNames=GeneNames[!GeneNames%in%unlist(DErawNames)]
-
-
-### Remove MOV=NA
-generateData=generateDataraw
-generateData=generateData[!is.na(MOV.post)& MeanVector>2 & MeanVector<10000 ,]
-InName=rownames(generateData)
-#print(paste("NA MOV's",sum(is.na(MOV.post)),sum( MeanVector<2), sum(MeanVector>10000)))
-## DE
-##################################
-FinalDEInName=sapply(1:length(DEGeneNumbers),function(i)InName[InName%in%DErawNames[[i]]][1:OutDEGeneNumbers[i]],simplify=F)
-FinalEEInName=InName[InName%in%EErawNames][1:OutEEGeneNumbers]
-FinalNames=c(unlist(FinalDEInName),FinalEEInName)
-
-generateData=generateData[FinalNames,]
-########################################
-
-UseName=rownames(generateData)
-phiuse=PhiInput[rownames(generateData)]
-meanuse=MeanInput[rownames(generateData)]
-
-OutName=paste("Gene",c(1:nrow(generateData)),sep="_")
-names(OutName)=rownames(generateData)
-OutData=generateData
-rownames(OutData)=as.vector(OutName)
-names(OutGenePatterns)=as.vector(OutName)
-output=list(data=OutData, Patterns=OutGenePatterns)
-}
+++ /dev/null
-GeneSimu<-
-function(DVDconstant=NULL, DVDqt1=NULL, DVDqt2=NULL, Conditions, NumofSample, NumofGene=NULL, DEGeneProp, Phiconstant=NULL, Phi.qt1=NULL, Phi.qt2=NULL, Meanconstant=NULL,NormFactor=NULL, OnlyData=T)
-{
-# 2012 feb 1 paired simulation
-if(is.null(NormFactor)) NormFactor=rep(1,NumofSample)
-data(GeneEBresultGouldBart2)
-MeansC1=GeneEBresultGouldBart2$C1Mean[[1]]
-MeansC2=GeneEBresultGouldBart2$C2Mean[[1]]
-
-MeanDVD=MeansC1/MeansC2
-
-if(is.null(DVDconstant))DVDLibrary=MeanDVD[MeanDVD<quantile(MeanDVD[MeanDVD!=Inf],DVDqt2) & MeanDVD>quantile(MeanDVD[MeanDVD!=Inf],DVDqt1)]
-
-
-# If DVD constant, use constant when generate
-# If not, use DVDLibrary
-
-MeanInputraw=GeneEBresultGouldBart2$MeanList[[1]]
-
-if(length(NumofGene)!=0)
-NumofGene.raw=NumofGene*2
-
-if(length(NumofGene)==0)
-NumofGene.raw=length(MeanInputraw)
-
-
-PhiInput.raw=GeneEBresultGouldBart2$RList[[1]]
-if (length(Phiconstant)==0){
- PhiLibrary=PhiInput.raw[(1/PhiInput.raw)<quantile(1/PhiInput.raw,Phi.qt2) & 1/PhiInput.raw>quantile(1/PhiInput.raw,Phi.qt1)]
- PhiInputNames=sample(names(PhiLibrary),NumofGene.raw,replace=T)
- PhiInput=PhiInput.raw[PhiInputNames]
-}
-
-if (length(Phiconstant)!=0)PhiInput=rep(Phiconstant,length(MeanInputraw))
-if(length(Meanconstant)==0)MeanInput=GeneEBresultGouldBart2$MeanList[[1]][PhiInputNames]
-if(length(Meanconstant)!=0)MeanInput=rep(Meanconstant,length(GeneEBresultGouldBart2$MeanList[[1]]))
-
-DEGeneNumbers=round(NumofGene.raw*DEGeneProp/2)*2
-GeneNames=paste("G",c(1:NumofGene.raw),sep="_")
-names(PhiInput)=GeneNames
-names(MeanInput)=GeneNames
-#########
-# data
-#########
-EEList=sapply(1:NumofGene.raw, function(j) sapply(1:NumofSample, function(i)suppressWarnings(rnbinom(1,mu=NormFactor[i]*MeanInput[j], size=PhiInput[j]))))
-
-
-
-
- generateDataraw=t(EEList)
- if(length(DVDconstant)==0){
- DVDSample=sample(DVDLibrary,DEGeneNumbers,replace=T)
- for(j in 1:NumofGene.raw){
- if (j<=(DEGeneNumbers/2)) generateDataraw[j,((NumofSample/2)+1):NumofSample]=sapply(((NumofSample/2) +1):NumofSample, function(i)suppressWarnings(rnbinom(1, size=PhiInput[j], mu=DVDSample[j]*MeanInput[j]*NormFactor[i])),simplify=T)
- if (j>=((DEGeneNumbers/2)+1) & j <=DEGeneNumbers) generateDataraw[j,1:(NumofSample/2)]=sapply(1:(NumofSample/2),function(i)suppressWarnings(rnbinom(1, size=MeanInput[j], mu= DVDSample[j]*MeanInput[j]*NormFactor[i])),simplify=T)
-}
- }
- if(length(DVDconstant)!=0){
- for(j in 1:NumofGene.raw){
- if (j<=(DEGeneNumbers/2)) generateDataraw[j,((NumofSample/2)+1):NumofSample]=sapply((NumofSample/2+1):NumofSample, function(i)suppressWarnings(rnbinom(1, size=MeanInput[j],mu=DVDconstant*MeanInput[j]*NormFactor[i])),simplify=T)
- if (j>=((DEGeneNumbers/2)+1) & j <=DEGeneNumbers) generateDataraw[j,1:(NumofSample/2)]=sapply(1:(NumofSample/2),function(i)suppressWarnings(rnbinom(1, size=MeanInput[j],mu=DVDconstant*MeanInput[j]*NormFactor[i])),simplify=T)
- }
- }
-rownames(generateDataraw)=GeneNames
-MeanVector=rowMeans(generateDataraw)
-VarVector=apply(generateDataraw,1,var)
-MOV.post=MeanVector/VarVector
-
-
-
-### Remove MOV=NA
-generateData=generateDataraw
-generateData=generateData[!is.na(MOV.post)& MeanVector>2 & MeanVector<10000 ,]
-#print(paste("NA MOV's",sum(is.na(MOV.post)),sum( MeanVector<2), sum(MeanVector>10000)))
-## DE
-NumDENow=sum(rownames(generateData)%in%rownames(generateDataraw)[1:DEGeneNumbers])
-
-if(length(NumofGene)!=0)
- generateData=generateData[c(sample(1:NumDENow,round(NumofGene*DEGeneProp),replace=F),round( (dim(generateData)[1]+1-NumofGene*(1-DEGeneProp)):dim(generateData)[1])),]
-
-
-UseName=rownames(generateData)
-phiuse=PhiInput[rownames(generateData)]
-meanuse=MeanInput[rownames(generateData)]
-
-
-TrueDE=UseName[UseName%in%rownames(generateDataraw)[1:DEGeneNumbers]]
-
-if(OnlyData==T){
- OutName=paste("Gene",c(1:nrow(generateData)),sep="_")
- names(OutName)=rownames(generateData)
- OutData=generateData
- rownames(OutData)=as.vector(OutName)
- OutTrueDE=as.vector(OutName[TrueDE])
- output=list(data=OutData, TrueDE=OutTrueDE)
- return(output)
- }
-## DESeq
-
-cds=newCountDataSet(round(generateData),Conditions)
-cds=estimateSizeFactors(cds)
-Sizes=sizeFactors(cds)
-if(dim(generateData)[2]>4)cds=estimateVarianceFunctions(cds)
-else cds=estimateVarianceFunctions(cds, method="blind")
-
-res=nbinomTest(cds, "1", "2")
-ResAdj=res$padj
-names(ResAdj)=res$id
-SmallPValueName=names(ResAdj)[which(ResAdj<=.05)]
-print(paste("DESEq found",length(SmallPValueName)))
-print(paste("In True DE",sum(SmallPValueName%in%TrueDE)))
-
-print("DESeq Size factors")
-print(Sizes)
-
-NewData=generateData
-
-
-#source("/z/Comp/kendziorskigroup/ningleng/RNASEQ/CODE/FinalV/NBBetaBiasUniqueP_PoolVar_SpeedUp_MDFPoi_NoNormVar.R")
-#source("/z/Comp/kendziorskigroup/ningleng/RNASEQ/CODE/FinalV/NBBetaBiasUniqueP_PoolVar_SpeedUp_MDFPoi_NoNormPoolR.R")
-
-EBresult=EBTest(NewData,rep(1,dim(NewData)[1]), rep(1,dim(NewData)[1]), rep(1,dim(NewData)[1]),Conditions,sizeFactors=Sizes,5)
-#library(EBarrays)
-
-#EBres2=NBBetaEB.bias.uniqueP_PoolVarSpeedUp_MDFPoi_NoNormPoolR(NewData,rep(1,dim(NewData)[1]), rep(1,dim(NewData)[1]), rep(1,dim(NewData)[1]),Conditions,sizeFactors=Sizes,5)
-
-
-zlist.unlist=EBresult[[5]]
-fdr=max(.5,crit_fun(1-zlist.unlist,.05))
-EBDE=names(zlist.unlist)[which(zlist.unlist>fdr)]
-EBDE.Poi=names(EBresult[[6]])[which(EBresult[[6]]>fdr)]
-zlist.unlist.whole=c(EBresult[[5]],EBresult[[6]])
-print(paste("Soft EB Poi",length(EBDE.Poi)))
-EBDE=c(EBDE, EBDE.Poi)
-print(paste("Soft EB found",length(EBDE)))
-print(paste("In True DE",sum(EBDE%in%TrueDE)))
-
-EBDE95=names(zlist.unlist)[which(zlist.unlist>.95)]
-EBDE95.Poi=names(EBresult[[6]])[which(EBresult[[6]]>.95)]
-print(paste("Hard Poi found",length(EBDE95.Poi)))
-EBDE95=c(EBDE95, EBDE95.Poi)
-print(paste("Hard EB found" ,length(EBDE95)))
-print(paste("In True DE",sum(EBDE95%in%TrueDE)))
-
-### edgeR
-library(edgeR,lib.loc="~/RCODE")
-edgeRList.b2=DGEList(NewData,group=Conditions)
-if(length(Phiconstant)==1){
- edgeRList.b2=estimateCommonDisp(edgeRList.b2)
- edgeRRes.b2=exactTest(edgeRList.b2)
-}
-if(length(Phiconstant)==0){
- edgeRList.b2=estimateCommonDisp(edgeRList.b2)
- edgeRList.b2=estimateTagwiseDisp(edgeRList.b2)
- edgeRRes.b2=exactTest(edgeRList.b2, common.disp = FALSE)
-}
-edgeRPvalue.b2.raw=edgeRRes.b2[[1]][[3]]
-edgeRPvalue.b2=p.adjust(edgeRPvalue.b2.raw, method="BH")
-names(edgeRPvalue.b2)=rownames(NewData)
-edgeRSmallpvalue=names(which(edgeRPvalue.b2<.05))
-print(paste("edgeR found",length(edgeRSmallpvalue)))
-print(paste("In True DE",sum(edgeRSmallpvalue%in%TrueDE)))
-
-### Bayseq
-library(baySeq, lib.loc="~/RCODE")
-library(snow, lib.loc="~/RCODE")
-cl <- makeCluster(4, "SOCK")
-groups <- list(NDE = rep(1,NumofSample), DE = rep(c(1,2),each=NumofSample/2))
-CD <- new("countData", data = NewData, replicates = Conditions, libsizes = as.integer(colSums(NewData)), groups = groups)
-CDP.NBML <- getPriors.NB(CD, samplesize = dim(NewData)[1], estimation = "QL", cl = cl)
-CDPost.NBML <- getLikelihoods.NB(CDP.NBML, pET = "BIC", cl = cl)
-bayseqPost=CDPost.NBML@posteriors
-rownames(bayseqPost)=rownames(NewData)
-bayseqDE=rownames(NewData)[bayseqPost[,2]>log(.95)]
-print(paste("bayseq found",length(bayseqDE)))
-print(paste("In True DE",sum(bayseqDE%in%TrueDE)))
-
-
-### BBSeq
-library("BBSeq",lib.loc="~/RCODE")
-CondM=cbind(rep(1,NumofSample),rep(c(0,1),each=NumofSample/2))
-output=free.estimate(NewData,CondM)
-beta.free = output$betahat.free
-p.free = output$p.free
-psi.free = output$psi.free
-names(p.free)=rownames(NewData)
-# Top p free?
-#out.model=constrained.estimate(NewData,CondM, gn=3, beta.free ,psi.free)
-#p.constrained = out.model$p.model
-p.free.adj=p.adjust(p.free, method="BH")
-
-BBDE=names(p.free.adj)[which(p.free.adj<.05)]
-print(paste("BBSeq found",length(BBDE)))
-print(paste("In True DE",sum(BBDE%in%TrueDE)))
-
-
-#########################
-# Generate table
-Table=matrix(rep(0,12),ncol=2)
-colnames(Table)=c("Power","FDR")
-rownames(Table)=c("DESeq","edgeR","BaySeq","BBSeq","EBSeq_ModifiedSoft","EBSeq_Hard")
-
- Length=length(TrueDE)
- Table[1,1]=sum(SmallPValueName%in%TrueDE)/Length
- Table[2,1]=sum(edgeRSmallpvalue%in%TrueDE)/Length
- Table[3,1]=sum(bayseqDE%in%TrueDE)/Length
- Table[4,1]=sum(BBDE%in%TrueDE)/Length
- Table[5,1]=sum(EBDE%in%TrueDE)/Length
- Table[6,1]=sum(EBDE95%in%TrueDE)/Length
- Table[1,2]=sum(!SmallPValueName%in%TrueDE)/length(SmallPValueName)
- Table[2,2]=sum(!edgeRSmallpvalue%in%TrueDE)/length(edgeRSmallpvalue)
- Table[3,2]=sum(!bayseqDE%in%TrueDE)/length(bayseqDE)
- Table[4,2]=sum(!BBDE%in%TrueDE)/length(BBDE)
- Table[5,2]=sum(!EBDE%in%TrueDE)/length(EBDE)
- Table[6,2]=sum(!EBDE95%in%TrueDE)/length(EBDE95)
- Table=round(Table,2)
-
-ValueTable=matrix(rep(0,12),ncol=2)
-colnames(ValueTable)=c("Power","FDR")
-rownames(ValueTable)=c("DESeq","edgeR","BaySeq","BBSeq","EBSeq_ModifiedSoft","EBSeq_Hard")
- ValueTable[1,1]=sum(SmallPValueName%in%TrueDE)
- ValueTable[2,1]=sum(edgeRSmallpvalue%in%TrueDE)
- ValueTable[3,1]=sum(bayseqDE%in%TrueDE)
- ValueTable[4,1]=sum(BBDE%in%TrueDE)
- ValueTable[5,1]=sum(EBDE%in%TrueDE)
- ValueTable[6,1]=sum(EBDE95%in%TrueDE)
- ValueTable[1,2]=sum(!SmallPValueName%in%TrueDE)
- ValueTable[2,2]=sum(!edgeRSmallpvalue%in%TrueDE)
- ValueTable[3,2]=sum(!bayseqDE%in%TrueDE)
- ValueTable[4,2]=sum(!BBDE%in%TrueDE)
- ValueTable[5,2]=sum(!EBDE%in%TrueDE)
- ValueTable[6,2]=sum(!EBDE95%in%TrueDE)
-
-if(length(DVDconstant)==0)DVD=c(quantile(MeanDVD[MeanDVD!=Inf],DVDqt1), quantile(MeanDVD[MeanDVD!=Inf],DVDqt2))
-if(length(DVDconstant)!=0) DVD=DVDconstant
-if(length(Phiconstant)==0)Phi=c(quantile(PhiInput.raw,Phi.qt1), quantile(PhiInput.raw,Phi.qt2))
-if(length(Phiconstant)!=0) Phi=Phiconstant
-OUT=list(Table=Table, ValueTable=ValueTable, DVD=DVD, Phi=Phi, generateData=NewData, TrueDE=TrueDE,phi.vector=phiuse,mean.vector=meanuse,NormFactor=NormFactor, DESeqP=ResAdj, edgeRP=edgeRPvalue.b2, EBSeqPP=zlist.unlist.whole, BaySeqPP=bayseqPost,BBSeqP=p.free.adj,EBoutput=EBresult
-,DESeqDE=SmallPValueName, edgeRDE=edgeRSmallpvalue, bayDE=bayseqDE, BBDE=BBDE, EBDE95=EBDE95)
-}
-
+++ /dev/null
-GeneSimuAt<-function(DVDconstant=NULL, DVDqt1=NULL, DVDqt2=NULL, Conditions, NumofSample, NumofGene=NULL, DEGeneProp, Phiconstant=NULL, Phi.qt1=NULL, Phi.qt2=NULL, Meanconstant=NULL,NormFactor=NULL, OnlyData=T)
-{
-# 2012 feb 1
-# paired level simulation
-
-data(GeneEBresultGouldBart2)
-if(is.null(NormFactor)) NormFactor=rep(1,NumofSample)
-
-#MeansC1=rowMeans(GeneV.norm1.NZ.b2[,1:4])
-#MeansC2=rowMeans(GeneV.norm1.NZ.b2[,5:8])
-MeansC1=GeneEBresultGouldBart2$C1Mean[[1]]
-MeansC2=GeneEBresultGouldBart2$C2Mean[[1]]
-
-MeanDVD=MeansC1/MeansC2
-
-if(is.null(DVDconstant))DVDLibrary=MeanDVD[MeanDVD<quantile(MeanDVD[MeanDVD!=Inf],DVDqt2) & MeanDVD>quantile(MeanDVD[MeanDVD!=Inf],DVDqt1)]
-
-
-# If DVD constant, use constant when generate
-# If not, use DVDLibrary
-
-MeanInputraw=GeneEBresultGouldBart2$MeanList[[1]]
-#MeanInputraw=rowMeans(GeneV.norm1.NZ.b2)
-#Var1=apply(GeneV.norm1.NZ.b2[,1:4],1,var)
-#Var2=apply(GeneV.norm1.NZ.b2[,5:8],1,var)
-#VarInput=(Var1 + Var2)/2
-#If NumofGene.raw=NULL, empirical # of Gene
-#If !=NULL , Input a 9-vector
-NumofGene.raw=length(MeanInputraw)
-
-# here phi denotes r -- which is 1/phi' in which sigma^2=mu(1+mu phi')
-# In negative binomial
-# size is 1/phi'
-# rnbinom(100,size=100,mu=10)
-# var(qq)
-#[1] 10.93687
-# qq=rnbinom(100,size=10,mu=10)
-# var(qq)
-#[1] 24.01404
-
-#PhiInput.raw=(MeanInputraw^2) / (VarInput - MeanInputraw)
-PhiInput.raw=GeneEBresultGouldBart2$RList[[1]]
-if (length(Phiconstant)==0){
- PhiLibrary=PhiInput.raw[1/(PhiInput.raw)<quantile(1/(PhiInput.raw),Phi.qt2) & 1/(PhiInput.raw)>quantile(1/(PhiInput.raw),Phi.qt1)]
- PhiInputNames=sample(names(PhiLibrary),NumofGene.raw,replace=T)
- PhiInput=PhiInput.raw[PhiInputNames]
-
-
-}
-
-if (length(Phiconstant)!=0)PhiInput=rep(Phiconstant,length(MeanInputraw))
-if(length(Meanconstant)==0)MeanInput=GeneEBresultGouldBart2$MeanList[[1]][PhiInputNames]
-if(length(Meanconstant)!=0)MeanInput=rep(Meanconstant,length(GeneEBresultGouldBart2$MeanList[[1]]))
-
-# Wanna DENumbers be proportion to 2
-DEGeneNumbers=round(NumofGene.raw*DEGeneProp/2)*2
-GeneNames=paste("G",c(1:NumofGene.raw),sep="_")
-names(PhiInput)=GeneNames
-names(MeanInput)=GeneNames
-
-#########
-# data
-#########
-EEList=sapply(1:NumofGene.raw, function(j) sapply(1:NumofSample, function(i)rnbinom(1,mu=NormFactor[i]*MeanInput[j], size=PhiInput[j])))
-
-
-
-
- generateDataraw=t(EEList)
- if(length(DVDconstant)==0){
- DVDSample=sample(DVDLibrary,DEGeneNumbers,replace=T)
- for(j in 1:NumofGene.raw){
- if (j<=(DEGeneNumbers/2)) generateDataraw[j,((NumofSample/2)+1):NumofSample]=sapply(((NumofSample/2) +1):NumofSample, function(i)rnbinom(1, size=PhiInput[j], mu=DVDSample[j]*MeanInput[j]*NormFactor[i]),simplify=T)
- if (j>=((DEGeneNumbers/2)+1) & j <=DEGeneNumbers) generateDataraw[j,1:(NumofSample/2)]=sapply(1:(NumofSample/2),function(i)rnbinom(1, size=MeanInput[j], mu= DVDSample[j]*MeanInput[j]*NormFactor[i]),simplify=T)
-}
- }
- if(length(DVDconstant)!=0){
- for(j in 1:NumofGene.raw){
- if (j<=(DEGeneNumbers/2)) generateDataraw[j,((NumofSample/2)+1):NumofSample]=sapply((NumofSample/2+1):NumofSample, function(i)rnbinom(1, size=MeanInput[j],mu=DVDconstant*MeanInput[j]*NormFactor[i]),simplify=T)
- if (j>=((DEGeneNumbers/2)+1) & j <=DEGeneNumbers) generateDataraw[j,1:(NumofSample/2)]=sapply(1:(NumofSample/2),function(i)rnbinom(1, size=MeanInput[j],mu=DVDconstant*MeanInput[j]*NormFactor[i]),simplify=T)
- }
- }
-rownames(generateDataraw)=GeneNames
-MeanVector=rowMeans(generateDataraw)
-VarVector=apply(generateDataraw,1,var)
-MOV.post=MeanVector/VarVector
-
-
-
-### Remove MOV=NA
-generateData=generateDataraw
-generateData=generateData[!is.na(MOV.post)& MeanVector>2 & MeanVector<10000 ,]
-print(paste("NA MOV's",sum(is.na(MOV.post)),sum( MeanVector<2), sum(MeanVector>10000)))
-## DE
-NumDENow=sum(rownames(generateData)%in%rownames(generateDataraw)[1:DEGeneNumbers])
-
-if(length(NumofGene)!=0)
- generateData=generateData[c(sample(1:NumDENow,round(NumofGene*DEGeneProp),replace=F),round( (dim(generateData)[1]+1-NumofGene*(1-DEGeneProp)):dim(generateData)[1])),]
-
-
-UseName=rownames(generateData)
-
-TrueDE=UseName[UseName%in%rownames(generateDataraw)[1:DEGeneNumbers]]
-phiuse=PhiInput[rownames(generateData)]
-meanuse=MeanInput[rownames(generateData)]
-
-#ArtiNames=rownames(generateData)[(DEGeneNumbers+1):(2*DEGeneNumbers)]
-#Noise=sample(c(1,ncol(generateData)),DEGeneNumbers,replace=T)
-TrueDELength=length(TrueDE)
-AtLoc=sample(c(1:length(Conditions)), TrueDELength, replace=T)
-AtFold=sample(c(4,6,8,10),TrueDELength, replace=T)
-
-AtNames_Level=vector("list",4)
-names(AtNames_Level)=c(4,6,8,10)
-for(i in 1:TrueDELength){
-generateData[(TrueDELength+i),AtLoc[i]]=generateData[(TrueDELength+i),AtLoc[i]]*AtFold[i]
-AtNames_Level[[as.character(AtFold[i])]]=c(AtNames_Level[[as.character(AtFold[i])]],rownames(generateData)[TrueDELength+i])
-}
-
-
-if(OnlyData==T){
- OutName=paste("Gene",c(1:nrow(generateData)),sep="_")
- names(OutName)=rownames(generateData)
- OutData=generateData
- rownames(OutData)=as.vector(OutName)
- OutAt=as.vector(OutName[AtNames_Level])
- OutTrueDE=as.vector(OutName[TrueDE])
- output=list(data=OutData, TrueDE=OutTrueDE,Outliers=OutAt)
- return(output)
- }
-## DESeq
-
-cds=newCountDataSet(round(generateData),Conditions)
-cds=estimateSizeFactors(cds)
-Sizes=sizeFactors(cds)
-if(dim(generateData)[2]>4)cds=estimateVarianceFunctions(cds)
-else cds=estimateVarianceFunctions(cds, method="blind")
-
-res=nbinomTest(cds, "1", "2")
-ResAdj=res$padj
-names(ResAdj)=res$id
-SmallPValueName=names(ResAdj)[which(ResAdj<=.05)]
-print(paste("DESEq found",length(SmallPValueName)))
-print(paste("In True DE",sum(SmallPValueName%in%TrueDE)))
-
-print("DESeq Size factors")
-print(Sizes)
-
-## DESeq each group
-## Ours
-NewData=generateData
-
-
-#source("/z/Comp/kendziorskigroup/ningleng/RNASEQ/CODE/FinalV/NBBetaBiasUniqueP_PoolVar_SpeedUp_MDFPoi_NoNormVar.R")
-#source("/z/Comp/kendziorskigroup/ningleng/RNASEQ/CODE/FinalV/NBBetaBiasUniqueP_PoolVar_SpeedUp_MDFPoi_NoNormPoolR.R")
-
-EBresult=EBTest(NewData,rep(1,dim(NewData)[1]), rep(1,dim(NewData)[1]), rep(1,dim(NewData)[1]),Conditions,sizeFactors=Sizes,5)
-
-#EBres2=NBBetaEB.bias.uniqueP_PoolVarSpeedUp_MDFPoi_NoNormPoolR(NewData,rep(1,dim(NewData)[1]), rep(1,dim(NewData)[1]), rep(1,dim(NewData)[1]),Conditions,sizeFactors=Sizes,5)
-
-
-zlist.unlist=EBresult[[5]]
-fdr=max(.5,crit_fun(1-zlist.unlist,.05))
-EBDE=names(zlist.unlist)[which(zlist.unlist>fdr)]
-EBDE.Poi=names(EBresult[[6]])[which(EBresult[[6]]>fdr)]
-zlist.unlist.whole=c(EBresult[[5]],EBresult[[6]])
-print(paste("Soft EB Poi",length(EBDE.Poi)))
-EBDE=c(EBDE, EBDE.Poi)
-print(paste("Soft EB found",length(EBDE)))
-print(paste("In True DE",sum(EBDE%in%TrueDE)))
-
-EBDE95=names(zlist.unlist)[which(zlist.unlist>.95)]
-EBDE95.Poi=names(EBresult[[6]])[which(EBresult[[6]]>.95)]
-print(paste("Hard Poi found",length(EBDE95.Poi)))
-EBDE95=c(EBDE95, EBDE95.Poi)
-print(paste("Hard EB found" ,length(EBDE95)))
-print(paste("In True DE",sum(EBDE95%in%TrueDE)))
-
-### edgeR
-library(edgeR,lib.loc="~/RCODE")
-edgeRList.b2=DGEList(NewData,group=Conditions)
-if(length(Phiconstant)==1){
- edgeRList.b2=estimateCommonDisp(edgeRList.b2)
- edgeRRes.b2=exactTest(edgeRList.b2)
-}
-if(length(Phiconstant)==0){
- edgeRList.b2=estimateCommonDisp(edgeRList.b2)
- edgeRList.b2=estimateTagwiseDisp(edgeRList.b2)
- edgeRRes.b2=exactTest(edgeRList.b2, common.disp = FALSE)
-}
-edgeRPvalue.b2.raw=edgeRRes.b2[[1]][[3]]
-edgeRPvalue.b2=p.adjust(edgeRPvalue.b2.raw, method="BH")
-names(edgeRPvalue.b2)=rownames(NewData)
-edgeRSmallpvalue=names(which(edgeRPvalue.b2<.05))
-print(paste("edgeR found",length(edgeRSmallpvalue)))
-print(paste("In True DE",sum(edgeRSmallpvalue%in%TrueDE)))
-
-### Bayseq
-library(baySeq, lib.loc="~/RCODE")
-library(snow, lib.loc="~/RCODE")
-cl <- makeCluster(4, "SOCK")
-groups <- list(NDE = rep(1,NumofSample), DE = rep(c(1,2),each=NumofSample/2))
-CD <- new("countData", data = NewData, replicates = Conditions, libsizes = as.integer(colSums(NewData)), groups = groups)
-CDP.NBML <- getPriors.NB(CD, samplesize = dim(NewData)[1], estimation = "QL", cl = cl)
-CDPost.NBML <- getLikelihoods.NB(CDP.NBML, pET = "BIC", cl = cl)
-bayseqPost=CDPost.NBML@posteriors
-rownames(bayseqPost)=rownames(NewData)
-bayseqDE=rownames(NewData)[bayseqPost[,2]>log(.95)]
-print(paste("bayseq found",length(bayseqDE)))
-print(paste("In True DE",sum(bayseqDE%in%TrueDE)))
-
-
-### BBSeq
-library("BBSeq",lib.loc="~/RCODE")
-CondM=cbind(rep(1,NumofSample),rep(c(0,1),each=NumofSample/2))
-output=free.estimate(NewData,CondM)
-beta.free = output$betahat.free
-p.free = output$p.free
-psi.free = output$psi.free
-names(p.free)=rownames(NewData)
-p.free.adj=p.adjust(p.free,method="BH")
-# Top p free?
-#out.model=constrained.estimate(NewData,CondM, gn=3, beta.free ,psi.free)
-#p.constrained = out.model$p.model
-BBDE=names(p.free.adj)[which(p.free.adj<.05)]
-print(paste("BBSeq found",length(BBDE)))
-print(paste("In True DE",sum(BBDE%in%TrueDE)))
-
-
-#########################
-# Generate table
-Table=matrix(rep(0,12),ncol=2)
-colnames(Table)=c("Power","FDR")
-rownames(Table)=c("DESeq","edgeR","BaySeq","BBSeq","EBSeq_ModifiedSoft","EBSeq_Hard")
-
- Length=length(TrueDE)
- Table[1,1]=sum(SmallPValueName%in%TrueDE)/Length
- Table[2,1]=sum(edgeRSmallpvalue%in%TrueDE)/Length
- Table[3,1]=sum(bayseqDE%in%TrueDE)/Length
- Table[4,1]=sum(BBDE%in%TrueDE)/Length
- Table[5,1]=sum(EBDE%in%TrueDE)/Length
- Table[6,1]=sum(EBDE95%in%TrueDE)/Length
- Table[1,2]=sum(!SmallPValueName%in%TrueDE)/length(SmallPValueName)
- Table[2,2]=sum(!edgeRSmallpvalue%in%TrueDE)/length(edgeRSmallpvalue)
- Table[3,2]=sum(!bayseqDE%in%TrueDE)/length(bayseqDE)
- Table[4,2]=sum(!BBDE%in%TrueDE)/length(BBDE)
- Table[5,2]=sum(!EBDE%in%TrueDE)/length(EBDE)
- Table[6,2]=sum(!EBDE95%in%TrueDE)/length(EBDE95)
- Table=round(Table,2)
-
-ValueTable=matrix(rep(0,12),ncol=2)
-colnames(ValueTable)=c("Power","FDR")
-rownames(ValueTable)=c("DESeq","edgeR","BaySeq","BBSeq","EBSeq_ModifiedSoft","EBSeq_Hard")
- ValueTable[1,1]=sum(SmallPValueName%in%TrueDE)
- ValueTable[2,1]=sum(edgeRSmallpvalue%in%TrueDE)
- ValueTable[3,1]=sum(bayseqDE%in%TrueDE)
- ValueTable[4,1]=sum(BBDE%in%TrueDE)
- ValueTable[5,1]=sum(EBDE%in%TrueDE)
- ValueTable[6,1]=sum(EBDE95%in%TrueDE)
- ValueTable[1,2]=sum(!SmallPValueName%in%TrueDE)
- ValueTable[2,2]=sum(!edgeRSmallpvalue%in%TrueDE)
- ValueTable[3,2]=sum(!bayseqDE%in%TrueDE)
- ValueTable[4,2]=sum(!BBDE%in%TrueDE)
- ValueTable[5,2]=sum(!EBDE%in%TrueDE)
- ValueTable[6,2]=sum(!EBDE95%in%TrueDE)
-
-
-AtFoundTable=matrix(rep(0,24),ncol=4)
-colnames(AtFoundTable)=paste("Level",c(1:4),sep="_")
-rownames(Table)=c("DESeq","edgeR","BaySeq","BBSeq","EBSeq_ModifiedSoft","EBSeq_Hard")
-for(i in 1:4){
- AtFoundTable[1,i]=sum(SmallPValueName%in%AtNames_Level[[i]])
- AtFoundTable[2,i]=sum(edgeRSmallpvalue%in%AtNames_Level[[i]])
- AtFoundTable[3,i]=sum(bayseqDE%in%AtNames_Level[[i]])
- AtFoundTable[4,i]=sum(BBDE%in%AtNames_Level[[i]])
- AtFoundTable[5,i]=sum(EBDE%in%AtNames_Level[[i]])
- AtFoundTable[6,i]=sum(EBDE95%in%AtNames_Level[[i]])
- }
-
-
-if(length(DVDconstant)==0)DVD=c(quantile(MeanDVD[MeanDVD!=Inf],DVDqt1), quantile(MeanDVD[MeanDVD!=Inf],DVDqt2))
-if(length(DVDconstant)!=0) DVD=DVDconstant
-if(length(Phiconstant)==0)Phi=c(quantile(PhiInput.raw,Phi.qt1), quantile(PhiInput.raw,Phi.qt2))
-if(length(Phiconstant)!=0) Phi=Phiconstant
-OUT=list(Table=Table, ValueTable=ValueTable, DVD=DVD, Phi=Phi, generateData=NewData, TrueDE=TrueDE,phi.vector=phiuse,mean.vector=meanuse,NormFactor=NormFactor, DESeqP=ResAdj, edgeRP=edgeRPvalue.b2, EBSeqPP=zlist.unlist.whole, BaySeqPP=bayseqPost,BBSeqP=p.free.adj,EBoutput=EBresult, AtFoundTable= AtFoundTable,Outliers=AtNames_Level)
-
-
-
-}
-
-
+++ /dev/null
-GetData <-
-function(path,Name1,Name2,type)
-{
-Data=vector("list",8)
-Filenames=NULL
-Tablenames=NULL
-for (name in 1:4)
- {
- if (type=="I")
- Filenames=c(Filenames,paste(path,Name1,name,"_isoform_nus.tab",sep=""))
- if (type=="G")
- Filenames=c(Filenames,paste(path,Name1,name,"_gene_nus.tab",sep=""))
- Tablenames=c(Tablenames,paste(Name1,name,sep=""))
- }
-for (name in 1:4)
- {
- if (type=="I")
- Filenames=c(Filenames,paste(path,Name2,name,"_isoform_nus.tab",sep=""))
- if (type=="G")
- Filenames=c(Filenames,paste(path,Name2,name,"_gene_nus.tab",sep=""))
- Tablenames=c(Tablenames,paste(Name2,name,sep=""))
- }
-
-
-names(Data)=Tablenames
-for (file in 1:8)
- {
- temp=read.table(Filenames[file],header=T)
- temp2=as.matrix(temp[-1])
- rownames(temp2)=as.vector(as.matrix(temp[1]))
- Data[[file]]=temp2
- }
- Data
-}
-
+++ /dev/null
-GetMultiPP <- function(EBout){
- PP=EBout$PPDE
- MAP=colnames(EBout$f)[apply(EBout$f,1,which.max)]
- AllParti=EBout$AllParti
- out=list(PP=PP, MAP=MAP,Patterns=AllParti)
-}
+++ /dev/null
-GetNg<- function(IsoformName, GeneName){
- GeneNg = tapply(IsoformName, GeneName, length)
- IsoformNg = GeneNg[GeneName]
- names(IsoformNg) = IsoformName
- GeneNgTrun=GeneNg
- GeneNgTrun[GeneNgTrun>3]=3
- IsoformNgTrun=IsoformNg
- IsoformNgTrun[IsoformNgTrun>3]=3
- out=list( GeneNg=GeneNg, GeneNgTrun=GeneNgTrun, IsoformNg=IsoformNg, IsoformNgTrun=IsoformNgTrun)
- }
+++ /dev/null
-GetPP <- function(EBout){
- #PP=c(EBout[[5]], EBout[[6]])
- PP=EBout$PPDE
-}
+++ /dev/null
-GetPatterns<-function(Conditions){
- if(!is.factor(Conditions))Conditions=as.factor(Conditions)
- NumCond=nlevels(Conditions)
- CondLevels=levels(Conditions)
- #library(blockmodeling)
- AllPartiList=sapply(1:NumCond,function(i)nkpartitions(NumCond,i))
- AllParti=do.call(rbind,AllPartiList)
- colnames(AllParti)=CondLevels
- rownames(AllParti)=paste("Pattern",1:nrow(AllParti),sep="")
- AllParti
-
-}
+++ /dev/null
-IsoSimu=function(DVDconstant=NULL, DVDqt1=NULL, DVDqt2=NULL, Conditions, NumofSample, NumofIso=NULL, DEIsoProp, Phiconstant=NULL, Phi.qt1=NULL, Phi.qt2=NULL,NormFactor=NULL, OnlyData=T)
-{
-# 2012 feb 1
-# paired simulation
-data(IsoEBresultGouldBart2)
-if(is.null(NormFactor)) NormFactor=rep(1,NumofSample)
-
-MeansC1=IsoEBresultGouldBart2$C1Mean
-MeansC2=IsoEBresultGouldBart2$C2Mean
-MeanDVD=sapply(1:9,function(i) MeansC1[[i]]/MeansC2[[i]])
-# DVD library with each group here
-if (length(DVDconstant)==0) DVDLibrary= unlist(MeanDVD)[unlist(MeanDVD)<quantile(unlist(MeanDVD)[unlist(MeanDVD)!=Inf],DVDqt2) & unlist(MeanDVD)>quantile(unlist(MeanDVD)[unlist(MeanDVD)!=Inf],DVDqt1)]
-
-
-
-# If DVD constant, use constant when generate
-# If not, use DVDLibrary
-
-VarInput=IsoEBresultGouldBart2$VarList
-VarInputNg=list(VarInput[[1]],unlist(VarInput[c(2,4,6,8)]),unlist(VarInput[c(3,5,7,9)]))
-#If NumofIso=NULL, empirical # of Iso
-#If !=NULL , Input a 9-vector
-if(length(NumofIso)==0) NumofIso.raw=sapply(1:3,function(i)length(VarInputNg[[i]]))
-if(length(NumofIso)!=0) NumofIso.raw=NumofIso*2
-
-PhiInput.raw=IsoEBresultGouldBart2$RList
-PhiInput.raw.Ng=list(PhiInput.raw[[1]],unlist(PhiInput.raw[c(2,4,6,8)]),unlist(PhiInput.raw[c(3,5,7,9)]))
-
-
-if (length(Phiconstant)==0){
- PhiLibrary=sapply(1:3,function(i)PhiInput.raw.Ng[[i]][1/PhiInput.raw.Ng[[i]]<quantile(1/PhiInput.raw.Ng[[i]],Phi.qt2) & 1/PhiInput.raw.Ng[[i]]>quantile(1/PhiInput.raw.Ng[[i]],Phi.qt1)],simplify=F)
- PhiIndex=sapply(1:3, function(i)sample(names(PhiLibrary[[i]]),NumofIso.raw[[i]],replace=T),simplify=F)
- PhiInputNg=sapply(1:3, function(i)PhiLibrary[[i]][PhiIndex[[i]]])
-}
-if (length(Phiconstant)!=0)PhiInputNg=sapply(1:3,function(i)rep(Phiconstant,NumofIso.raw[[i]]),simplify=F)
-
-# Wanna DENumbers be proportion to 2
-DEIsoNumbers=round(NumofIso.raw*DEIsoProp/2)*2
-IsoNames=sapply(1:3,function(i)paste("I",i,c(1:NumofIso.raw[i]),sep="_"),simplify=F)
-MeanNg=list(IsoEBresultGouldBart2$MeanList[[1]],unlist(IsoEBresultGouldBart2$MeanList[c(2,4,6,8)]),
-unlist(IsoEBresultGouldBart2$MeanList[c(3,5,7,9)]))
-MeanInputNg=sapply(1:3, function(i)MeanNg[[i]][PhiIndex[[i]]])
-
-for(i in 1:3){
- names(MeanInputNg[[i]])=IsoNames[[i]]
- names(PhiInputNg[[i]])=IsoNames[[i]]
- }
-
-##############################
-# Get Ng version to every one
-##############################
-
-
-#########
-# data
-#########
-EEList=sapply(1:3,function(i) sapply(1:NumofIso.raw[[i]], function(j)sapply(1:NumofSample,function(h) rnbinom(1,mu=MeanInputNg[[i]][j]*NormFactor[h], size=PhiInputNg[[i]][j]))),simplify=F)
-
-
-generateDataraw=vector("list",3)
-MeanVector=vector("list",3)
-VarVector=vector("list",3)
-MOV.post=vector("list",3)
-
-
-for(g in 1:3){
- generateDataraw[[g]]=t(EEList[[g]][,1:NumofIso.raw[g]])
- if(length(DVDconstant)==0){
- for(j in 1:NumofIso.raw[g]){
- if (j<=(DEIsoNumbers[g]/2)) generateDataraw[[g]][j,((NumofSample/2)+1):NumofSample]=sapply((NumofSample/2+1):NumofSample, function(h)suppressWarnings(rnbinom(1, size=PhiInputNg[[g]][j], mu=sample(DVDLibrary,1)*MeanInputNg[[g]][j]*NormFactor[h])), simplify=T)
- if (j>=((DEIsoNumbers[g]/2)+1) & j <=DEIsoNumbers[g]) generateDataraw[[g]][j,1:(NumofSample/2)]=sapply(1:(NumofSample/2),function(h) suppressWarnings(rnbinom(1, size=MeanInputNg[[g]][j], mu= sample(DVDLibrary,1)*MeanInputNg[[g]][j]*NormFactor[h])),simplify=T)
-}
- }
- if(length(DVDconstant)!=0){
- for(j in 1:NumofIso.raw[g]){
- if (j<=(DEIsoNumbers[g]/2)) generateDataraw[[g]][j,((NumofSample/2)+1):NumofSample]=sapply((NumofSample/2+1):NumofSample, function(h)suppressWarnings(rnbinom(1, DVDconstant*MeanInputNg[[g]][j]*NormFactor[h])),simplify=T)
- if (j>=((DEIsoNumbers[g]/2)+1) & j <=DEIsoNumbers[g]) generateDataraw[[g]][j,1:(NumofSample/2)]=sapply(1:(NumofSample/2),function(h) wuppressWarnings(rnbinom(1, DVDconstant*MeanInputNg[[g]][j]*NormFactor[h])),simplify=T)
- }
- }
-rownames(generateDataraw[[g]])=IsoNames[[g]][1:NumofIso.raw[g]]
-MeanVector[[g]]=rowMeans(generateDataraw[[g]])
-VarVector[[g]]=apply(generateDataraw[[g]],1,var)
-MOV.post[[g]]=MeanVector[[g]]/VarVector[[g]]
-}
-
-
-### Remove MOV=NA
-generateData=generateDataraw
-for (i in 1:3) generateData[[i]]=generateData[[i]][!is.na(MOV.post[[i]]),]
-#print(paste("NA MOV's",sum(is.na(unlist(MOV.post)))))
-NumDENow=sapply(1:3, function(i)sum(rownames(generateData[[i]])%in%rownames(generateDataraw[[i]])[1:DEIsoNumbers[i]]))
-
-if(length(NumofIso)!=0){
- for(i in 1:3)
- generateData[[i]]=generateData[[i]][c(sample(1:NumDENow[i],round(NumofIso[i]*DEIsoProp),replace=F),round( (dim(generateData[[i]])[1]+1-NumofIso[i]*(1-DEIsoProp)):dim(generateData[[i]])[1])),]
-}
-generateDataNg=generateData
-
-## DE
-UseName=sapply(1:3, function(i)rownames(generateData[[i]]),simplify=F)
-TrueDE=sapply(1:3, function(i)UseName[[i]][UseName[[i]] %in% rownames(generateDataraw[[i]])[1:DEIsoNumbers[i]]],simplify=F)
-TrueDE.unlist=do.call(c,TrueDE)
-
-phiuse=sapply(1:3,function(i)PhiInputNg[[i]][UseName[[i]]])
-meanuse=sapply(1:3,function(i)MeanInputNg[[i]][UseName[[i]]])
-
-#if(OnlyData==T){
-
-OutName=sapply(1:3,function(i)paste("Iso",i,c(1:nrow(generateDataNg[[i]])),sep="_"))
-for(i in 1:3)names(OutName[[i]])=rownames(generateDataNg[[i]])
-OutData=generateDataNg
-for(i in 1:3)rownames(OutData[[i]])=as.vector(OutName[[i]])
-OutTrueDE=as.vector(unlist(OutName)[TrueDE.unlist])
-output=list(data=OutData, TrueDE=OutTrueDE)
-
-
-#output=list(data=generateDataNg, TrueDE=TrueDE.unlist)
-return(output)
-# }
-# Now only OnlyData=T version
-}
-
+++ /dev/null
-IsoSimuAt<-function(DVDconstant=NULL, DVDqt1=NULL, DVDqt2=NULL, Conditions, NumofSample, NumofIso=NULL, DEIsoProp, Phiconstant=NULL, Phi.qt1=NULL, Phi.qt2=NULL,NormFactor=NULL, OnlyData=T)
-{
-#Ng paired 2012 feb 1
-if(is.null(NormFactor)) NormFactor=rep(1,NumofSample)
-data(IsoEBresultGouldBart2)
-
-MeansC1=IsoEBresultGouldBart2$C1Mean
-MeansC2=IsoEBresultGouldBart2$C2Mean
-MeanDVD=sapply(1:9,function(i) MeansC1[[i]]/MeansC2[[i]])
-if (length(DVDconstant)==0) DVDLibrary= unlist(MeanDVD)[unlist(MeanDVD)<quantile(unlist(MeanDVD)[unlist(MeanDVD)!=Inf],DVDqt2) & unlist(MeanDVD)>quantile(unlist(MeanDVD)[unlist(MeanDVD)!=Inf],DVDqt1)]
-
-
-
-
-VarInput=IsoEBresultGouldBart2$VarList
-VarInputNg=list(VarInput[[1]],unlist(VarInput[c(2,4,6,8)]),unlist(VarInput[c(3,5,7,9)]))
-
-if(length(NumofIso)==0) NumofIso=sapply(1:3,function(i)length(VarInputNg[[i]]))
-PhiInput.raw=IsoEBresultGouldBart2$RList
-PhiInput.raw.Ng=list(PhiInput.raw[[1]],unlist(PhiInput.raw[c(2,4,6,8)]),unlist(PhiInput.raw[c(3,5,7,9)]))
-
-
-if (length(Phiconstant)==0){
- PhiLibrary=sapply(1:3,function(i)PhiInput.raw.Ng[[i]][1/PhiInput.raw.Ng[[i]]<quantile(1/PhiInput.raw.Ng[[i]],Phi.qt2) & 1/PhiInput.raw.Ng[[i]]>quantile(1/PhiInput.raw.Ng[[i]],Phi.qt1)],simplify=F)
- PhiIndex=sapply(1:3, function(i)sample(names(PhiLibrary[[i]]),NumofIso[[i]],replace=T),simplify=F)
- PhiInputNg=sapply(1:3, function(i)PhiLibrary[[i]][PhiIndex[[i]]])
-}
-if (length(Phiconstant)!=0)PhiInputNg=sapply(1:3,function(i)rep(Phiconstant,NumofIso[[i]]),simplify=F)
-
-# Wanna DENumbers be proportion to 2
-DEIsoNumbers=round(NumofIso*DEIsoProp/2)*2
-IsoNames=sapply(1:3,function(i)paste("I",i,c(1:NumofIso[i]),sep="_"),simplify=F)
-MeanNg=list(IsoEBresultGouldBart2$MeanList[[1]],unlist(IsoEBresultGouldBart2$MeanList[c(2,4,6,8)]),
-unlist(IsoEBresultGouldBart2$MeanList[c(3,5,7,9)]))
-MeanInputNg=sapply(1:3, function(i)MeanNg[[i]][PhiIndex[[i]]])
-
-for(i in 1:3){
- names(MeanInputNg[[i]])=IsoNames[[i]]
- names(PhiInputNg[[i]])=IsoNames[[i]]
- }
-
-#########
-# data
-#########
-EEList=sapply(1:3,function(i) sapply(1:NumofIso[[i]], function(j)sapply(1:NumofSample,function(h) rnbinom(1,mu=MeanInputNg[[i]][j]*NormFactor[h], size=PhiInputNg[[i]][j]))),simplify=F)
-
-
-generateDataraw=vector("list",3)
-MeanVector=vector("list",3)
-VarVector=vector("list",3)
-MOV.post=vector("list",3)
-
-
-for(g in 1:3){
- generateDataraw[[g]]=t(EEList[[g]][,1:NumofIso[g]])
- if(length(DVDconstant)==0){
- for(j in 1:NumofIso[g]){
- if (j<=(DEIsoNumbers[g]/2)) generateDataraw[[g]][j,((NumofSample/2)+1):NumofSample]=sapply((NumofSample/2+1):NumofSample, function(h)rnbinom(1, size=PhiInputNg[[g]][j], mu=sample(DVDLibrary,1)*MeanInputNg[[g]][j]*NormFactor[h]), simplify=T)
- if (j>=((DEIsoNumbers[g]/2)+1) & j <=DEIsoNumbers[g]) generateDataraw[[g]][j,1:(NumofSample/2)]=sapply(1:(NumofSample/2),function(h) rnbinom(1, size=MeanInputNg[[g]][j], mu= sample(DVDLibrary,1)*MeanInputNg[[g]][j]*NormFactor[h]),simplify=T)
-}
- }
- if(length(DVDconstant)!=0){
- for(j in 1:NumofIso[g]){
- if (j<=(DEIsoNumbers[g]/2)) generateDataraw[[g]][j,((NumofSample/2)+1):NumofSample]=sapply((NumofSample/2+1):NumofSample, function(h)rnbinom(1, DVDconstant*MeanInputNg[[g]][j]*NormFactor[h]),simplify=T)
- if (j>=((DEIsoNumbers[g]/2)+1) & j <=DEIsoNumbers[g]) generateDataraw[[g]][j,1:(NumofSample/2)]=sapply(1:(NumofSample/2),function(h) rnbinom(1, DVDconstant*MeanInputNg[[g]][j]*NormFactor[h]),simplify=T)
- }
- }
-rownames(generateDataraw[[g]])=IsoNames[[g]][1:NumofIso[g]]
-MeanVector[[g]]=rowMeans(generateDataraw[[g]])
-VarVector[[g]]=apply(generateDataraw[[g]],1,var)
-MOV.post[[g]]=MeanVector[[g]]/VarVector[[g]]
-}
-
-
-### Remove MOV=NA
-generateData=generateDataraw
-for (i in 1:3) generateData[[i]]=generateData[[i]][!is.na(MOV.post[[i]]),]
-print(paste("NA MOV's",sum(is.na(unlist(MOV.post)))))
-#tmpmean=sapply(1:9,function(i)rowMeans(generateData[[i]]))
-#tmpvar=sapply(1:9,function(i)apply(generateData[[i]],1,var))
-#source("plot_functions.R")
-#CheckSimuNg(tmpmean,tmpvar,c(-1,5),c(-1,7))
-
-
-
-
-## DE
-UseName=sapply(1:3, function(i)rownames(generateData[[i]]),simplify=F)
-TrueDE=sapply(1:3, function(i)UseName[[i]][UseName[[i]] %in% rownames(generateData[[i]])[1:DEIsoNumbers[i]]],simplify=F)
-TrueDE.unlist=do.call(c,TrueDE)
-
-TrueDELength=sapply(TrueDE,length)
-
-AtNames_Level=vector("list",4)
-AtLoc=vector("list",3)
-AtFold=vector("list",3)
-names(AtNames_Level)=c(4,6,8,10)
-
-
-for(j in 1:3){
-AtLoc[[j]]=sample(c(1:length(Conditions)), TrueDELength[j], replace=T)
-AtFold[[j]]=sample(c(4,6,8,10),TrueDELength[j], replace=T)
-
-for(i in 1:TrueDELength[j]){
-
-generateData[[j]][(TrueDELength[j]+i),AtLoc[[j]][i]]=generateData[[j]][(TrueDELength[j]+i),AtLoc[[j]][i]]*AtFold[[j]][i]
-AtNames_Level[[as.character(AtFold[[j]][i])]]=c(AtNames_Level[[as.character(AtFold[[j]][i])]],rownames(generateData[[j]])[TrueDELength[j]+i])
-}
-}
-phiuse=sapply(1:3,function(i)PhiInputNg[[i]][UseName[[i]]])
-meanuse=sapply(1:3,function(i)MeanInputNg[[i]][UseName[[i]]])
-
-#generateDataNg=list(generateData[[1]], do.call(rbind,generateData[c(2,4,6,8)]), do.call(rbind,generateData[c(3,5,7,9)]))
-generateDataNg=generateData
-
-#if(OnlyData==T){
-
-OutName=sapply(1:3,function(i)paste("Iso",i,c(1:nrow(generateDataNg[[i]])),sep="_"))
-for(i in 1:3)names(OutName[[i]])=rownames(generateDataNg[[i]])
-OutData=generateDataNg
-for(i in 1:3)rownames(OutData[[i]])=as.vector(OutName[[i]])
-OutTrueDE=as.vector(unlist(OutName)[TrueDE.unlist])
-OutAt=as.vector(unlist(OutName)[AtNames <- Level])
-
-output=list(data=OutData, TrueDE=OutTrueDE, Outliers=OutAt)
-# return(output)
-# }
- }
+++ /dev/null
-Likefun <-
-function(ParamPool, InputPool)
-{
-
-NoneZeroLength=InputPool[[5]]
-AlphaIn=ParamPool[1]
-BetaIn=ParamPool[2:(1+NoneZeroLength)]
-PIn=ParamPool[2+NoneZeroLength]
-ZIn=InputPool[[4]]
-Input=InputPool[[3]]
-Input1=matrix(InputPool[[1]],nrow=nrow(Input))
-Input2=matrix(InputPool[[2]],nrow=nrow(Input))
-RIn=InputPool[[6]]
-RInSP1=matrix(InputPool[[7]],nrow=nrow(Input))
-RInSP2=matrix(InputPool[[8]],nrow=nrow(Input))
-NumIn=InputPool[[9]]
-##Function here
-#LikelihoodFunction<- function(NoneZeroLength){
- F0=f0(Input, AlphaIn, BetaIn, RIn, NumIn, log=T)
- F1=f1(Input1, Input2, AlphaIn, BetaIn, RInSP1,RInSP2, NumIn, log=T)
- F0[F0==Inf]=min(!is.na(F0[F0!=Inf]))
- F1[F1==Inf]=min(!is.na(F1[F1!=Inf]))
-
- -sum((1-ZIn)*F0+ (1-ZIn)* log(1-PIn) + ZIn*F1 + ZIn*log(PIn))
-}
-
+++ /dev/null
-LikefunMulti <-
-function(ParamPool, InputPool)
-{
-
-NoneZeroLength=InputPool[[4]]
-AlphaIn=ParamPool[1]
-BetaIn=ParamPool[2:(1+NoneZeroLength)]
-PIn=ParamPool[(2+NoneZeroLength):length(ParamPool)]
-PInAll=c(1-sum(PIn),PIn)
-ZIn=InputPool[[3]]
-Input=InputPool[[2]]
-InputSP=InputPool[[1]]
-RIn=InputPool[[5]]
-RInSP=InputPool[[6]]
-NumIn=InputPool[[7]]
-AllParti=InputPool[[8]]
-PInMat=matrix(rep(1,nrow(Input)),ncol=1)%*%matrix(PInAll,nrow=1)
-##Function here
-FList=sapply(1:nrow(AllParti),function(i)sapply(1:nlevels(as.factor(AllParti[i,])),
- function(j)f0(do.call(cbind,InputSP[AllParti[i,]==j]),AlphaIn, BetaIn,
- do.call(cbind,RInSP[AllParti[i,]==j]), NumIn, log=T)),
- simplify=F)
-FPartiLog=sapply(FList,rowSums)
-#FMat=exp(FPartiLog)
-FMat=FPartiLog
--sum(ZIn*(FMat+log(PInMat)))
-}
-
+++ /dev/null
-LikefunMulti <-
-function(ParamPool, InputPool)
-{
-
-NoneZeroLength=InputPool[[4]]
-AlphaIn=ParamPool[1]
-BetaIn=ParamPool[2:(1+NoneZeroLength)]
-PInMat=InputPool[[9]]
-#PInAll=c(1-sum(PIn),PIn)
-ZIn=InputPool[[3]]
-Input=InputPool[[2]]
-InputSP=InputPool[[1]]
-RIn=InputPool[[5]]
-RInSP=InputPool[[6]]
-NumIn=InputPool[[7]]
-AllParti=InputPool[[8]]
-#PInMat=matrix(rep(1,nrow(Input)),ncol=1)%*%matrix(PInAll,nrow=1)
-##Function here
-FList=sapply(1:nrow(AllParti),function(i)sapply(1:nlevels(as.factor(AllParti[i,])),
- function(j)f0(do.call(cbind,InputSP[AllParti[i,]==j]),AlphaIn, BetaIn,
- do.call(cbind,RInSP[AllParti[i,]==j]), NumIn, log=T)),
- simplify=F)
-FPartiLog=sapply(FList,rowSums)
-#FMat=exp(FPartiLog)
-FMat=FPartiLog
--sum(ZIn*(FMat+log(PInMat)))
-}
-
+++ /dev/null
-LikefunMulti <-
-function(ParamPool, InputPool)
-{
-
-NoneZeroLength=InputPool[[4]]
-AlphaIn=ParamPool[1]
-BetaIn=ParamPool[2:(1+NoneZeroLength)]
-PIn=ParamPool[(2+NoneZeroLength):length(ParamPool)]
-PInAll=c(1-sum(PIn),PIn)
-ZIn=InputPool[[3]]
-Input=InputPool[[2]]
-InputSP=InputPool[[1]]
-RIn=InputPool[[5]]
-RInSP=InputPool[[6]]
-NumIn=InputPool[[7]]
-AllParti=InputPool[[8]]
-PInMat=matrix(rep(1,nrow(Input)),ncol=1)%*%matrix(PInAll,nrow=1)
-##Function here
-FList=sapply(1:nrow(AllParti),function(i)sapply(1:nlevels(as.factor(AllParti[i,])),
- function(j)f0(do.call(cbind,InputSP[AllParti[i,]==j]),AlphaIn, BetaIn,
- do.call(cbind,RInSP[AllParti[i,]==j]), NumIn, log=T)),
- simplify=F)
-FPartiLog=sapply(FList,rowSums)
-#FMat=exp(FPartiLog)
-FMat=FPartiLog
--sum(ZIn*(FMat+log(PInMat)))
-}
-
+++ /dev/null
-LogN <-
-function(Input, InputSP, EmpiricalR, EmpiricalRSP, NumOfEachGroup, AlphaIn, BetaIn, PIn, NoneZeroLength)
-{
- #2 condition case (skip the loop then maybe run faster? Code multi condition cases later)
-
- #For each gene (m rows of Input---m genes)
- #Save each gene's F0, F1 for further likelihood calculation.
-
- #Get F0 for EE
- F0=f0(Input, AlphaIn, BetaIn, EmpiricalR, NumOfEachGroup, log=F)
- #Get F1 for DE
- F1=f1(InputSP[[1]], InputSP[[2]], AlphaIn, BetaIn, EmpiricalRSP[[1]],EmpiricalRSP[[2]], NumOfEachGroup, log=F)
-
- #Get z
- #Use data.list in logfunction
-
- z.list=PIn*F1/(PIn*F1+(1-PIn)*F0)
- zNaNName=names(z.list)[is.na(z.list)]
- zGood=which(!is.na(z.list))
- ###Update P
- #PFromZ=sapply(1:NoneZeroLength,function(i) sum(z.list[[i]])/length(z.list[[i]]))
- PFromZ=sum(z.list[zGood])/length(z.list[zGood])
- F0Good=F0[zGood]
- F1Good=F1[zGood]
- ### MLE Part ####
- # Since we dont wanna update p and Z in this step
- # Each Ng for one row
-
- NumGroupVector=rep(c(1:NoneZeroLength),NumOfEachGroup)
-
- NumGroupVector.zGood=NumGroupVector[zGood]
- NumOfEachGroup.zGood=tapply(NumGroupVector.zGood,NumGroupVector.zGood,length)
-
- StartValue=c(AlphaIn, BetaIn,PIn)
-
- Result<-optim(StartValue,Likefun,InputPool=list(InputSP[[1]][zGood,],InputSP[[2]][zGood,],Input[zGood,],z.list[zGood], NoneZeroLength,EmpiricalR[zGood, ],EmpiricalRSP[[1]][zGood,], EmpiricalRSP[[2]][zGood,], NumOfEachGroup.zGood))
- #LikeOutput=Likelihood( StartValue, Input , InputSP , PNEW.list, z.list)
- AlphaNew= Result$par[1]
- BetaNew=Result$par[2:(1+NoneZeroLength)]
- PNew=Result$par[2+NoneZeroLength]
- ##
- Output=list(AlphaNew=AlphaNew,BetaNew=BetaNew,PNew=PNew,ZNew.list=z.list,PFromZ=PFromZ, zGood=zGood, zNaNName=zNaNName,F0Out=F0Good, F1Out=F1Good)
- Output
- }
-
+++ /dev/null
-LogNMulti <-
-function(Input, InputSP, EmpiricalR, EmpiricalRSP, NumOfEachGroup, AlphaIn, BetaIn, PIn, NoneZeroLength, AllParti, Conditions)
-{
-
- #For each gene (m rows of Input---m genes)
- #Save each gene's F0, F1 for further likelihood calculation.
- FList=sapply(1:nrow(AllParti),function(i)sapply(1:nlevels(as.factor(AllParti[i,])),
- function(j)f0(do.call(cbind,InputSP[AllParti[i,]==j]),AlphaIn, BetaIn,
- do.call(cbind,EmpiricalRSP[AllParti[i,]==j]), NumOfEachGroup, log=T)),
- simplify=F)
- FPartiLog=sapply(FList,rowSums)
- FMat=exp(FPartiLog)
- rownames(FMat)=rownames(Input)
- #Get z
- #Use data.list in logfunction
- PInMat=matrix(rep(1,nrow(Input)),ncol=1)%*%matrix(PIn,nrow=1)
- FmultiP=FMat*PInMat
- Denom=rowSums(FmultiP)
- ZEach=apply(FmultiP,2,function(i)i/Denom)
- zNaNName1=names(Denom)[is.na(Denom)]
- # other NAs in LikeFun
- LF=ZEach*(log(FmultiP))
- zNaNMore=rownames(LF)[which(is.na(rowSums(LF)))]
- zNaNName=unique(c(zNaNName1,zNaNMore))
- zGood=which(!rownames(LF)%in%zNaNName)
- ZEachGood=ZEach[zGood,]
- ###Update P
- PFromZ=colSums(ZEach[zGood,])/length(zGood)
- FGood=FMat[zGood,]
- ### MLE Part ####
- # Since we dont wanna update p and Z in this step
- # Each Ng for one row
-
- NumGroupVector=rep(c(1:NoneZeroLength),NumOfEachGroup)
-
- NumGroupVector.zGood=NumGroupVector[zGood]
- NumOfEachGroup.zGood=tapply(NumGroupVector.zGood,NumGroupVector.zGood,length)
-
- StartValue=c(AlphaIn, BetaIn,PIn[-1])
- InputSPGood=sapply(1:length(InputSP),function(i)InputSP[[i]][zGood,],simplify=F)
- EmpiricalRSPGood=sapply(1:length(EmpiricalRSP),function(i)EmpiricalRSP[[i]][zGood,],simplify=F)
-
- Result<-optim(StartValue,LikefunMulti,InputPool=list(InputSPGood,Input[zGood,],ZEach[zGood,],
- NoneZeroLength,EmpiricalR[zGood, ],EmpiricalRSPGood, NumOfEachGroup.zGood, AllParti))
- AlphaNew= Result$par[1]
- BetaNew=Result$par[2:(1+NoneZeroLength)]
- PNewNo1=Result$par[(2+NoneZeroLength):length(Result$par)]
- PNew=c(1-sum(PNewNo1),PNewNo1)
- ##
- Output=list(AlphaNew=AlphaNew,BetaNew=BetaNew,PNew=PNew,ZEachNew=ZEach, ZEachGood=ZEachGood,
- PFromZ=PFromZ, zGood=zGood, zNaNName=zNaNName,FGood=FGood)
- Output
- }
-
+++ /dev/null
-LogNMulti <-
-function(Input, InputSP, EmpiricalR, EmpiricalRSP, NumOfEachGroup, AlphaIn, BetaIn, PIn, NoneZeroLength, AllParti, Conditions)
-{
-
- #For each gene (m rows of Input---m genes)
- #Save each gene's F0, F1 for further likelihood calculation.
- FList=sapply(1:nrow(AllParti),function(i)sapply(1:nlevels(as.factor(AllParti[i,])),
- function(j)f0(do.call(cbind,InputSP[AllParti[i,]==j]),AlphaIn, BetaIn,
- do.call(cbind,EmpiricalRSP[AllParti[i,]==j]), NumOfEachGroup, log=T)),
- simplify=F)
- FPartiLog=sapply(FList,rowSums)
- FMat=exp(FPartiLog)
- rownames(FMat)=rownames(Input)
- #Get z
- #Use data.list in logfunction
- PInMat=matrix(rep(1,nrow(Input)),ncol=1)%*%matrix(PIn,nrow=1)
- FmultiP=FMat*PInMat
- Denom=rowSums(FmultiP)
- ZEach=apply(FmultiP,2,function(i)i/Denom)
- zNaNName1=names(Denom)[is.na(Denom)]
- # other NAs in LikeFun
- LF=ZEach*(log(FmultiP))
- zNaNMore=rownames(LF)[which(is.na(rowSums(LF)))]
- zNaNName=unique(c(zNaNName1,zNaNMore))
- zGood=which(!rownames(LF)%in%zNaNName)
- ZEachGood=ZEach[zGood,]
- ###Update P
- PFromZ=colSums(ZEach[zGood,])/length(zGood)
- NewPInMat=matrix(rep(1,nrow(Input)),ncol=1)%*%matrix(PFromZ,nrow=1)
- FGood=FMat[zGood,]
- ### MLE Part ####
- # Since we dont wanna update p and Z in this step
- # Each Ng for one row
-
- NumGroupVector=rep(c(1:NoneZeroLength),NumOfEachGroup)
-
- NumGroupVector.zGood=NumGroupVector[zGood]
- NumOfEachGroup.zGood=tapply(NumGroupVector.zGood,NumGroupVector.zGood,length)
-
- StartValue=c(AlphaIn, BetaIn)
- InputSPGood=sapply(1:length(InputSP),function(i)InputSP[[i]][zGood,],simplify=F)
- EmpiricalRSPGood=sapply(1:length(EmpiricalRSP),function(i)EmpiricalRSP[[i]][zGood,],simplify=F)
-
- Result<-optim(StartValue,LikefunMulti,InputPool=list(InputSPGood,Input[zGood,],ZEach[zGood,],
- NoneZeroLength,EmpiricalR[zGood, ],EmpiricalRSPGood, NumOfEachGroup.zGood, AllParti,
- NewPInMat))
- AlphaNew= Result$par[1]
- BetaNew=Result$par[2:(1+NoneZeroLength)]
- #PNewNo1=Result$par[(2+NoneZeroLength):length(Result$par)]
- #PNew=c(1-sum(PNewNo1),PNewNo1)
- PNew= PFromZ
- ##
- Output=list(AlphaNew=AlphaNew,BetaNew=BetaNew,PNew=PNew,ZEachNew=ZEach, ZEachGood=ZEachGood,
- PFromZ=PFromZ, zGood=zGood, zNaNName=zNaNName,FGood=FGood)
- Output
- }
-
+++ /dev/null
-LogNMulti <-
-function(Input, InputSP, EmpiricalR, EmpiricalRSP, NumOfEachGroup, AlphaIn, BetaIn, PIn, NoneZeroLength, AllParti, Conditions)
-{
-
- #For each gene (m rows of Input---m genes)
- #Save each gene's F0, F1 for further likelihood calculation.
- FList=sapply(1:nrow(AllParti),function(i)sapply(1:nlevels(as.factor(AllParti[i,])),
- function(j)f0(do.call(cbind,InputSP[AllParti[i,]==j]),AlphaIn, BetaIn,
- do.call(cbind,EmpiricalRSP[AllParti[i,]==j]), NumOfEachGroup, log=T)),
- simplify=F)
- FPartiLog=sapply(FList,rowSums)
- FMat=exp(FPartiLog)
- rownames(FMat)=rownames(Input)
- #Get z
- #Use data.list in logfunction
- PInMat=matrix(rep(1,nrow(Input)),ncol=1)%*%matrix(PIn,nrow=1)
- FmultiP=FMat*PInMat
- Denom=rowSums(FmultiP)
- ZEach=apply(FmultiP,2,function(i)i/Denom)
- zNaNName1=names(Denom)[is.na(Denom)]
- # other NAs in LikeFun
- LF=ZEach*(log(FmultiP))
- zNaNMore=rownames(LF)[which(is.na(rowSums(LF)))]
- zNaNName=unique(c(zNaNName1,zNaNMore))
- zGood=which(!rownames(LF)%in%zNaNName)
- ZEachGood=ZEach[zGood,]
- ###Update P
- PFromZ=colSums(ZEach[zGood,])/length(zGood)
- FGood=FMat[zGood,]
- ### MLE Part ####
- # Since we dont wanna update p and Z in this step
- # Each Ng for one row
-
- NumGroupVector=rep(c(1:NoneZeroLength),NumOfEachGroup)
-
- NumGroupVector.zGood=NumGroupVector[zGood]
- NumOfEachGroup.zGood=tapply(NumGroupVector.zGood,NumGroupVector.zGood,length)
-
- StartValue=c(AlphaIn, BetaIn,PIn[-1])
- InputSPGood=sapply(1:length(InputSP),function(i)InputSP[[i]][zGood,],simplify=F)
- EmpiricalRSPGood=sapply(1:length(EmpiricalRSP),function(i)EmpiricalRSP[[i]][zGood,],simplify=F)
-
- Result<-optim(StartValue,LikefunMulti,InputPool=list(InputSPGood,Input[zGood,],ZEach[zGood,],
- NoneZeroLength,EmpiricalR[zGood, ],EmpiricalRSPGood, NumOfEachGroup.zGood, AllParti))
- AlphaNew= Result$par[1]
- BetaNew=Result$par[2:(1+NoneZeroLength)]
- PNewNo1=Result$par[(2+NoneZeroLength):length(Result$par)]
- PNew=c(1-sum(PNewNo1),PNewNo1)
- ##
- Output=list(AlphaNew=AlphaNew,BetaNew=BetaNew,PNew=PNew,ZEachNew=ZEach, ZEachGood=ZEachGood,
- PFromZ=PFromZ, zGood=zGood, zNaNName=zNaNName,FGood=FGood)
- Output
- }
-
+++ /dev/null
-MedianNorm=function(Data){
-
- geomeans <- exp(rowMeans(log(Data)))
- apply(Data, 2, function(cnts) median((cnts/geomeans)[geomeans > 0]))
-}
+++ /dev/null
-MergeGene <-
-function(GeneSIMout, Num, Path="./"){
-NumSample=ncol(GeneSIMout[[i]]$generateData)
-
-NumGene=rep(0,Num)
-for (i in 1:Num)NumGene[i]=nrow(GeneSIMout[[i]]$generateData)
-
-MinNumGene=min(NumGene)
-AproxNumDE=length(GeneSIMout[[1]]$TrueDE)
-
-GeneMergeTable=matrix(rep(0,12),nrow=6)
- for(i in 1:Num)GeneMergeTable=GeneMergeTable+GeneSIMout[[i]][[1]]
- GeneMergeTable=GeneMergeTable/Num
- GeneMergeTable=round(GeneMergeTable,2)
-
- GeneMergeDVD=rep(0,2)
- for(i in 1:Num)GeneMergeDVD=GeneMergeDVD+GeneSIMout[[i]][[3]]
- GeneMergeDVD=round(GeneMergeDVD/Num,2)
-
- GeneMergePhi=matrix(rep(0,2),nrow=2)
- for(i in 1:Num)GeneMergePhi=GeneMergePhi+GeneSIMout[[i]][[4]]
- GeneMergePhi=round(GeneMergePhi/Num,2)
-## Write
-TXTname=paste(paste(Path,paste("Gene","DVD",GeneMergeDVD[1], GeneMergeDVD[2],"Phi",GeneMergePhi[1], GeneMergePhi[2],"Sample",NumSample,sep="_"),sep=""),".txt",sep="")
-write.table(GeneMergeTable, file=TXTname)
-
-
-####### Note everytime # DE genes and # total genes may different. (since NA issue)
- GeneMergeFD=matrix(rep(0,5*MinNumGene),ncol=5)
- GeneMergeFD.p=matrix(rep(0,5*MinNumGene),ncol=5)
- GeneMergeTP.p=matrix(rep(0,5*MinNumGene),ncol=5)
- GeneMergeFN.p=matrix(rep(0,5*MinNumGene),ncol=5)
- GeneMergeTN.p=matrix(rep(0,5*MinNumGene),ncol=5)
-
- GeneMergeFDR=matrix(rep(0,5*MinNumGene),ncol=5)
- GeneMergeTPR=matrix(rep(0,5*MinNumGene),ncol=5)
- GeneMergeFPR=matrix(rep(0,5*MinNumGene),ncol=5)
-
-
- for(i in 1:Num){
- # Make sure names in the same order
- # Get FD number for each number of genes found
- TotalNum=nrow(GeneSIMout[[i]]$generateData)
- NumDE=length(GeneSIMout[[i]]$TrueDE)
- EBSeqNames=names(GeneSIMout[[i]]$EBSeqPP)
- tmpMatrix=cbind(GeneSIMout[[i]]$DESeqP[EBSeqNames],GeneSIMout[[i]]$edgeRP[EBSeqNames], exp(GeneSIMout[[i]]$BaySeqPP[EBSeqNames,2]),GeneSIMout[[i]]$BBSeqP[EBSeqNames],GeneSIMout[[i]]$EBSeqPP)
- # Bayseq and EBseq are PP. Others are p value
- tmpFD=TopCts(tmpMatrix, c(0,0,1,0,1), GeneSIMout[[i]]$TrueDE[GeneSIMout[[i]]$TrueDE%in%EBSeqNames], MinNumGene)
- # Get percentage for FP, TP, TN, FN!
- tmpFD.p=tmpFD/TotalNum
- # TP = Find - FD
- tmpTP.p=(c(1:MinNumGene)-tmpFD)/TotalNum
- # FN = TrueDE - TP
- tmpFN.p=NumDE/TotalNum - tmpTP.p
- # TN = TrueEE - FD
- tmpTN.p=(TotalNum-NumDE)/TotalNum - tmpFD.p
-
-
- tmpFDR=tmpFD.p/(tmpFD.p+tmpTP.p)
- tmpFPR=tmpFD.p/(tmpFD.p+tmpTN.p)
- tmpTPR=tmpTP.p/(tmpFN.p+tmpTP.p)
- GeneMergeFDR=GeneMergeFDR+tmpFDR
- GeneMergeTPR=GeneMergeTPR+tmpTPR
- GeneMergeFPR=GeneMergeFPR+tmpFPR
-
- GeneMergeFD.p=GeneMergeFD.p+tmpFD.p
- GeneMergeTP.p=GeneMergeTP.p+tmpTP.p
- GeneMergeFN.p=GeneMergeFN.p+tmpFN.p
- GeneMergeTN.p=GeneMergeTN.p+tmpTN.p
-
- GeneMergeFD=GeneMergeFD+tmpFD
- }
- GeneMergeFD=GeneMergeFD/Num
- GeneMergeFD.p=GeneMergeFD.p/Num
- GeneMergeTP.p=GeneMergeTP.p/Num
- GeneMergeFN.p=GeneMergeFN.p/Num
- GeneMergeTN.p=GeneMergeTN.p/Num
-
- GeneMergeFDR=GeneMergeFDR/Num
- GeneMergeTPR=GeneMergeTPR/Num
- GeneMergeFPR=GeneMergeFPR/Num
-
-
-PlotTopName=paste(paste(Path,paste("Top","Gene","DVD",GeneMergeDVD[1], GeneMergeDVD[2],"Phi",GeneMergePhi[1], GeneMergePhi[2],"Sample",NumSample, sep="_"),sep=""),".pdf",sep="")
-
-TrueDELength=length(GeneSIMout[[i]]$TrueDE[GeneSIMout[[i]]$TrueDE%in%EBSeqNames])
-pdf(PlotTopName)
- PlotTopCts(TrueDELength,GeneMergeFD[1:TrueDELength,],c("DESeq","edgeR","BaySeq","BBSeq","EBSeq"))
-dev.off()
-
-
-PlotFDName=paste(paste(Path,paste("FDTP","Gene","DVD",GeneMergeDVD[1], GeneMergeDVD[2],"Phi",GeneMergePhi[1], GeneMergePhi[2],"Sample",NumSample,sep="_"),sep=""),".pdf",sep="")
-pdf(PlotFDName)
- PlotFDTP(MinNumGene,GeneMergeFDR, GeneMergeTPR, c("DESeq","edgeR","BaySeq","BBSeq","EBSeq"))
-dev.off()
-
-PlotFPName=paste(paste(Path,paste("FPRTP","Gene","DVD",GeneMergeDVD[1], GeneMergeDVD[2],"Phi",GeneMergePhi[1], GeneMergePhi[2],"Sample",NumSample,sep="_"),sep=""),".pdf",sep="")
-pdf(PlotFPName)
- PlotFPTP(MinNumGene,GeneMergeFPR, GeneMergeTPR, c("DESeq","edgeR","BaySeq","BBSeq","EBSeq"))
- dev.off()
-
-
-out=list(GeneMergeTable=GeneMergeTable, GeneMergeDVD=GeneMergeDVD, GeneMergePhi=GeneMergePhi, GeneMergeFD=GeneMergeFD)
-
-
-}
-
+++ /dev/null
-MergeIso <-
-function(IsoSIMout, Num, Path="./"){
-NumSample=ncol(do.call(rbind, IsoSIMout[[i]]$generateData))
-
-NumIso=rep(0,Num)
-for (i in 1:Num)NumIso[i]=nrow(do.call(rbind, IsoSIMout[[i]]$generateData))
-
-MinNumIso=min(NumIso)
-AproxNumDE=length(unlist(IsoSIMout[[1]]$TrueDE))
-
-IsoMergeTable=matrix(rep(0,60),nrow=10)
- for(i in 1:Num)IsoMergeTable=IsoMergeTable+IsoSIMout[[i]][[1]]
- IsoMergeTable=IsoMergeTable/Num
- IsoMergeTable=round(IsoMergeTable,2)
-
- IsoMergeDVD=rep(0,2)
- for(i in 1:Num)IsoMergeDVD=IsoMergeDVD+IsoSIMout[[i]][[3]]
- IsoMergeDVD=round(IsoMergeDVD/Num,2)
-
- IsoMergePhi=matrix(rep(0,18),nrow=2)
- for(i in 1:Num)IsoMergePhi=IsoMergePhi+IsoSIMout[[i]][[4]]
- IsoMergePhi=round(IsoMergePhi/Num,2)
-## Write
-TXTname=paste(paste("../IsoOutput/",paste("Iso","DVD",IsoMergeDVD[1], IsoMergeDVD[2],"Sample",NumSample,sep="_"),sep=""),".txt",sep="")
-write.table(IsoMergeTable, file=TXTname)
-
-
-####### Note everytime # DE genes and # total genes may different. (since NA issue)
- IsoMergeFD=matrix(rep(0,5*MinNumIso),ncol=5)
- IsoMergeFD.p=matrix(rep(0,5*MinNumIso),ncol=5)
- IsoMergeTP.p=matrix(rep(0,5*MinNumIso),ncol=5)
- IsoMergeFN.p=matrix(rep(0,5*MinNumIso),ncol=5)
- IsoMergeTN.p=matrix(rep(0,5*MinNumIso),ncol=5)
- IsoMergeFDR=matrix(rep(0,5*MinNumIso),ncol=5)
- IsoMergeTPR=matrix(rep(0,5*MinNumIso),ncol=5)
- IsoMergeFPR=matrix(rep(0,5*MinNumIso),ncol=5)
-
- for(i in 1:Num){
- # Make sure names in the same order
- # Get FD number for each number of genes found
- # columns are samples
- TotalNum=nrow(do.call(rbind, IsoSIMout[[i]]$generateData))
- NumDE=length(unlist(IsoSIMout[[i]]$TrueDE))
- EBSeqNames=names(IsoSIMout[[i]]$EBSeqPP)
- tmpMatrix=cbind(IsoSIMout[[i]]$DESeqP[EBSeqNames],IsoSIMout[[i]]$edgeRP[EBSeqNames], exp(IsoSIMout[[i]]$BaySeqPP[EBSeqNames,2]),IsoSIMout[[i]]$BBSeqP[EBSeqNames],IsoSIMout[[i]]$EBSeqPP)
- # Bayseq and EBseq are PP. Others are p value
- tmpFD=TopCts(tmpMatrix, c(0,0,1,0,1), unlist(IsoSIMout[[i]]$TrueDE)[unlist(IsoSIMout[[i]]$TrueDE)%in%EBSeqNames], MinNumIso)
- # Get percentage for FP, TP, TN, FN!
- tmpFD.p=tmpFD/TotalNum
- # TP = Find - FD
- tmpTP.p=(outer(c(1:MinNumIso),rep(1,5))-tmpFD)/TotalNum
- # FN = TrueDE - TP
- tmpFN.p=NumDE/TotalNum - tmpTP.p
- # TN = TrueEE - FD
- tmpTN.p=(TotalNum-NumDE)/TotalNum - tmpFD.p
-
- tmpFDR=tmpFD.p/(tmpFD.p+tmpTP.p)
- tmpFPR=tmpFD.p/(tmpFD.p+tmpTN.p)
- tmpTPR=tmpTP.p/(tmpFN.p+tmpTP.p)
- IsoMergeFDR=IsoMergeFDR+tmpFDR
- IsoMergeTPR=IsoMergeTPR+tmpTPR
- IsoMergeFPR=IsoMergeFPR+tmpFPR
-
- IsoMergeFD.p=IsoMergeFD.p+tmpFD.p
- IsoMergeTP.p=IsoMergeTP.p+tmpTP.p
- IsoMergeFN.p=IsoMergeFN.p+tmpFN.p
- IsoMergeTN.p=IsoMergeTN.p+tmpTN.p
-
- IsoMergeFD=IsoMergeFD+tmpFD
- }
- IsoMergeFD=IsoMergeFD/Num
- IsoMergeFD.p=IsoMergeFD.p/Num
- IsoMergeTP.p=IsoMergeTP.p/Num
- IsoMergeFN.p=IsoMergeFN.p/Num
- IsoMergeTN.p=IsoMergeTN.p/Num
- IsoMergeFDR=IsoMergeFDR/Num
- IsoMergeTPR=IsoMergeTPR/Num
- IsoMergeFPR=IsoMergeFPR/Num
-
-PlotTopName=paste(paste(Path,paste("Top","Iso","DVD",IsoMergeDVD[1], IsoMergeDVD[2],"Sample",NumSample, sep="_"),sep=""),".pdf",sep="")
-
-TrueDELength=length(unlist(IsoSIMout[[i]]$TrueDE)[unlist(IsoSIMout[[i]]$TrueDE)%in%EBSeqNames])
-pdf(PlotTopName)
- PlotTopCts(TrueDELength,IsoMergeFD[1:TrueDELength,],c("DESeq","edgeR","BaySeq","BBSeq","EBSeq"))
-dev.off()
-
-
-PlotFDName=paste(paste(Path,paste("FDTP","Iso","DVD",IsoMergeDVD[1], IsoMergeDVD[2],"Sample",NumSample,sep="_"),sep=""),".pdf",sep="")
-pdf(PlotFDName)
- PlotFDTP(MinNumIso,IsoMergeFDR, IsoMergeTPR, c("DESeq","edgeR","BaySeq","BBSeq","EBSeq"))
-dev.off()
-
-PlotFPName=paste(paste(Path,paste("FPRTP","Iso","DVD",IsoMergeDVD[1], IsoMergeDVD[2],"Sample",NumSample,sep="_"),sep=""),".pdf",sep="")
-pdf(PlotFPName)
- PlotFPTP(MinNumIso,IsoMergeFPR, IsoMergeTPR, c("DESeq","edgeR","BaySeq","BBSeq","EBSeq"))
- dev.off()
-
-
-out=list(IsoMergeTable=IsoMergeTable, IsoMergeDVD=IsoMergeDVD, IsoMergePhi=IsoMergePhi, IsoMergeFD=IsoMergeFD)
-
-
-}
-
+++ /dev/null
-PlotFDTP <-
-function(TopNum, FDR, TPR,names)
-{
-
- matplot(FDR, TPR, xlim=c(0,.5), ylim=c(0,1) ,type="l",lwd=2,xlab="FDR", ylab="TPR")
- legend("bottomright",col=1:TopNum, lwd=2, lty=1:TopNum, names)
-
-
-}
-
+++ /dev/null
-PlotFPTP <-
-function(TopNum, FPR, TPR,names)
-{
-
- matplot(FPR, TPR,xlim=c(0,.1), ylim=c(0,1) ,type="l",lwd=2, xlab="FPR", ylab="TPR")
- legend("bottomright",col=1:TopNum,lwd=2, lty=1:TopNum, names)
-
-
-}
-
+++ /dev/null
-PlotPattern<-function(Patterns){
- par(oma=c(3,3,3,3))
- PatternCol=rainbow(ncol(Patterns))
- heatmap(Patterns,col=PatternCol,Colv=NA,Rowv=NA,scale="none")
-
-}
-
+++ /dev/null
-PlotTopCts <-
-function(TopNum, FD, names)
-{
- matplot(c(1:TopNum) , FD,type="l",xlab="Top DE selected", lwd=2, log="y", ylab="FD")
- legend("topleft",col=1:TopNum, lwd=2, lty=1:TopNum, names)
-
-}
-
+++ /dev/null
-PolyFitPlot <-
-function(X , Y , nterms , xname="Estimated Mean", yname="Estimated Var", pdfname="", xlim=c(-1,5), ylim=c(-1,7), ChangeXY=F,col="red"){
-
- b=rep(NA,nterms)
- logX=matrix(rep(X, nterms),ncol=nterms, byrow=T)
- for (i in 1:nterms)
- logX[,i]=(log10(X))^i
- colnames(logX)=paste("logmu^",c(1:nterms))
- rownames(logX)=names(X)
- NotUse=c(names(X)[X==0],names(Y)[Y==0],names(X)[rowMeans(logX)==-Inf],names(X)[rowMeans(logX)==Inf])
- Use=names(X[!names(X)%in%NotUse])
- Lm=lm(log10(Y[Use])~logX[Use,1:nterms])
- b=summary(Lm)$coefficients[2:(nterms+1),1]
- d=summary(Lm)$coefficients[1,1]
- bvec=matrix(rep(b,length(X)),ncol=nterms,byrow=T)
- fit=rowSums(logX*bvec)+d
- main2=NULL
- if (ChangeXY==T){
- X.plot=log10(Y)
- Y.plot=log10(X)
- fit.X.plot=fit
- fit.Y.plot=log10(X)
- }
- else{
- X.plot=log10(X)
- Y.plot=log10(Y)
- fit.X.plot=log10(X)
- fit.Y.plot=fit
- }
-
- for (i in 1:nterms)
- main2=paste(main2,round(b[i],2),"*log(",xname,")^",i,"+")
- main=pdfname
-
- smoothScatter(X.plot, Y.plot ,main=main,xlim=xlim,ylim=ylim,xlab=xname,ylab=yname,axes=F)
- axis(1,at=seq(xlim[1],xlim[2],by=1), 10^seq(xlim[1],xlim[2],by=1))
- axis(2,at=seq(ylim[1],ylim[2],by=2), 10^seq(ylim[1],ylim[2],by=2))
- Sortit=order(fit.X.plot)
- lines(fit.X.plot[Sortit],fit.Y.plot[Sortit],col=col,lwd=3)
- output=list(b=b,d=d,lm=Lm,fit=fit,sort=Sortit)
- names(output$b)=paste(xname,"^",c(1:length(output$b)))
- output
-}
-
+++ /dev/null
-PoolMatrix <-
-function(Data,reads,type)
-{
-poolnames=names(Data)
-poolM=NULL
-for (po in 1:8)
- poolM=cbind(poolM,Data[[po]][,1])
-rownames(poolM)=rownames(Data[[1]])
-colnames(poolM)=poolnames
-
-#poolValue=poolM*reads
-poolValue=poolM
-for (col in 1:8)
- poolValue[,col]=poolM[,col]*reads[col]
-poolValue=round(poolValue)
-if (type=="G")
- {
- poolM=cbind(Data[[1]][,2],poolM)
- poolValue=cbind(Data[[1]][,2],poolValue)
- colnames(poolM)=c("Groups",poolnames)
- colnames(poolValue)=c("Groups",poolnames)
- }
-poolOutput=list(poolM=poolM,poolValue=poolValue)
-}
-
+++ /dev/null
-PostFC=function(EBoutput) {
- GeneRealMeanC1=unlist(EBoutput$C1Mean)
- GeneRealMeanC2=unlist(EBoutput$C2Mean)
- GeneRealMean=(GeneRealMeanC1+GeneRealMeanC2)/2
-
- GeneRealFC=GeneRealMeanC1/GeneRealMeanC2
-
- GeneR=unlist(EBoutput$RList)
- GeneR[GeneR<=0 | is.na(GeneR)]=GeneRealMean[GeneR<=0 | is.na(GeneR)]*.99/.01
-
- GeneAlpha=EBoutput[[1]][nrow(EBoutput[[1]]),]
- GeneBeta=unlist(sapply(1:length(EBoutput$C1Mean),function(i)rep(EBoutput[[2]][nrow(EBoutput[[1]]),i],length(EBoutput$C1Mean[[i]]))))
- GeneBeta=as.vector(GeneBeta)
- # Post alpha = alpha + r_C1 * 3
- # Post beta = beta + Mean_C1 * 3
- # Post Mean of q in C1 P_q_C1= P_a/ (P_a + P_b)
- # Post FC = (1-p_q_c1)/p_q_c1 /( (1-p_q_c2)/p_q_c2)
-
- GenePostAlpha=GeneAlpha+3*GeneR
- GenePostBetaC1=GeneBeta+3*GeneRealMeanC1
- GenePostBetaC2=GeneBeta+3*GeneRealMeanC2
- GenePostQC1=GenePostAlpha/(GenePostAlpha+GenePostBetaC1)
- GenePostQC2=GenePostAlpha/(GenePostAlpha+GenePostBetaC2)
-
- GenePostFC=((1-GenePostQC1)/(1-GenePostQC2))*(GenePostQC2/GenePostQC1)
- Out=list(GenePostFC=GenePostFC, GeneRealFC=GeneRealFC)
-
-}
+++ /dev/null
-QQP <-
-function(QList,AlphaResult,BetaResult,name,AList="F",GroupName){
-
- for (i in 1:length(BetaResult)){
- tmpSize=length(QList[[i]][QList[[i]]<1 & !is.na(QList[[i]])])
- if (AList=="F") rdpts=rbeta(tmpSize,AlphaResult,BetaResult[i])
- else rdpts=rbeta(tmpSize,AlphaResult[i],BetaResult[i])
- qqplot(QList[[i]][QList[[i]]<1], rdpts,xlab="estimated q's", ylab="simulated q's from fitted beta",main=paste(name,GroupName[i],sep=" "),xlim=c(0,1),ylim=c(0,1))
- fit=lm(sort(rdpts)~sort(QList[[i]][QList[[i]]<1 & !is.na(QList[[i]])]))
- abline(fit,col="red")
-
- }
-}
-
+++ /dev/null
-
-QuantileNorm=function(Data, Quantile){
- #SortData=apply(Data, 2, sort)
- QtilePt=apply(Data, 2, function(i)quantile(i, Quantile))
- Size= QtilePt * prod(QtilePt) ^ (-1/ncol(Data))
- Size
- }
-
+++ /dev/null
-
-RankNorm=function(Data){
- RankData=apply(Data, 2, rank)
- SortData=apply(Data, 2, sort)
- SortMean=rowMeans(SortData)
- SortMean[SortMean==0]=1
- NormMatrix=sapply(1:ncol(Data), function(i)Data[,i]/(SortMean[RankData[,i]]))
- NormMatrix[NormMatrix==0]=1
- NormMatrix
- }
-
+++ /dev/null
-TPFDRplot <-
-function(DESeqP, EBZ, TrueDE, main, FDR=NULL){
- Seq=seq(0.001,0.5,by=0.001)
- DETPR=rep(0,length(Seq))
- EBTPR=rep(0,length(Seq))
- DEFDR=rep(0,length(Seq))
- EBFDR=rep(0,length(Seq))
- DETPNum=rep(0,length(Seq))
- EBTPNum=rep(0,length(Seq))
- DEFDNum=rep(0,length(Seq))
- EBFDNum=rep(0,length(Seq))
- for (i in 1:length(Seq)){
- DESeqOnes=names(DESeqP)[DESeqP<=Seq[i]]
- if (length(FDR)==0) EBOnes=names(EBZ)[EBZ>=crit.fun(1-EBZ, Seq[i])]
- else if (FDR=="H") EBOnes=names(EBZ)[EBZ>=(1-Seq[i])]
- else EBOnes=names(EBZ)[EBZ>=FDR[i]]
-
- DETPNum[i]=sum(DESeqOnes%in%TrueDE)
- EBTPNum[i]=sum(EBOnes%in%TrueDE)
- DEFDNum[i]=sum(!DESeqOnes%in%TrueDE)
- EBFDNum[i]=sum(!EBOnes%in%TrueDE)
-
- DETPR[i]=DETPNum[i]/length(TrueDE)
- EBTPR[i]=EBTPNum[i]/length(TrueDE)
- DEFDR[i]=DEFDNum[i]/length(TrueDE)
- EBFDR[i]=EBFDNum[i]/length(TrueDE)
- }
- plot(Seq,DETPR,ylim=c(0,1),xlim=c(0,.5),type="l",col="red", main=paste(main, "TPR"),xlab="controled FDR level", ylab="TPR",lwd=2)
- lines(Seq,EBTPR,col="blue",lwd=2)
- legend("bottomright",lwd=2, col=c("red","blue"), c("DESeq","EBSeq"))
-
- plot(Seq,DEFDR,ylim=c(0,1),xlim=c(0,.5),type="l",col="red", main=paste(main, "FDR"),xlab="controled FDR level", ylab="TPR",lwd=2)
- lines(Seq,EBFDR,col="blue",lwd=2)
- legend("topleft", lwd=2, col=c("red","blue"), c("DESeq","EBSeq"))
-
-
- output=cbind( DETPR,EBTPR, DEFDR,EBFDR,DETPNum,EBTPNum,DEFDNum,EBFDNum)
-}
-
+++ /dev/null
-TopCts <-
-function(pvalue, PP=NULL, TrueNames, TopNum){
- NumOfMethods=ncol(pvalue)
- puse=pvalue
- if(1%in%PP)puse[,PP==1]=1-pvalue[,PP==1]
- #puse.list=data.frame(puse)
- FD=matrix(rep(0,NumOfMethods*TopNum),ncol=NumOfMethods)
-# Rank=apply(puse,2,rank)
-# for(i in 1:TopNum)
-# FD[i,]=sapply(1:NumOfMethods, function(j)sum(!rownames(Rank)[Rank[,j]<=i]%in%TrueNames))
-# FD=sapply(1:TopNum, function(i)sapply(1:NumOfMethods, function(j)sum(!rownames(Rank)[Rank[,j]<=i]%in%TrueNames)))
- for (s in 1:NumOfMethods){
- tmp=puse[,s]
- names(tmp)=rownames(puse)
- sorttmp=sort(tmp)
- for( c in 2:TopNum)
- FD[c, s]=FD[(c-1),s]+as.numeric(!names(sorttmp)[c]%in%TrueNames)
- }
- FD
- #matplot(TopNum,FD,type="l",ylim=c(0,1),xlab="Top DE selected", ylab="FDR")
- #legend("rightbottom",col=1:TopNum, lty=1:TopNum, names)
- }
-
+++ /dev/null
-beta.mom <-
-function(qs.in){
- xbar<-mean(qs.in)
- s2<-var(qs.in)
- term<-(xbar*(1-xbar))/s2
- alpha.hat<-xbar*(term-1)
- beta.hat<-(1-xbar)*(term-1)
- return(c(alpha.hat,beta.hat))
-}
-
+++ /dev/null
-crit_fun<-function (PPEE, thre)
-{
- y <- cumsum(sort(PPEE))/(1:length(PPEE))
- mm <- y < thre
- index <- sum(mm)
- if (index > 0) {
- out <- 1 - sort(PPEE)[index]
- }
- if (index == 0) {
- out <- 1
- }
- names(out) <- NULL
- return(out)
-}
-
+++ /dev/null
-f0 <-
-function(Input, AlphaIn, BetaIn, EmpiricalR, NumOfGroups, log)
-{
-
- BetaVect=do.call(c,sapply(1:length(BetaIn),function(i)rep(BetaIn[i],NumOfGroups[i]),simplify=F))
- SampleNum=dim(Input)[2]
- #Product part
- ChooseParam1=round(Input+EmpiricalR-1)
- roundInput=round(Input)
- EachChoose=sapply(1:SampleNum, function(i)lchoose(ChooseParam1[,i], roundInput[,i]))
-
- SumEachIso=rowSums(Input)
- param1=AlphaIn + rowSums(EmpiricalR)
- param2=BetaVect + SumEachIso
- LogConst=rowSums(EachChoose)+lbeta(param1, param2)-lbeta(AlphaIn, BetaVect)
-
-
- if (log==F) FinalResult=exp(LogConst)
- if (log==T) FinalResult=LogConst
- FinalResult
-}
-
+++ /dev/null
-f1 <-
-function(Input1, Input2, AlphaIn, BetaIn, EmpiricalRSP1,EmpiricalRSP2,NumOfGroup, log){
- F0.1=f0(Input1, AlphaIn, BetaIn, EmpiricalRSP1, NumOfGroup, log=log)
- F0.2=f0(Input2, AlphaIn, BetaIn, EmpiricalRSP2, NumOfGroup, log=log)
-
- if (log==F) Result=F0.1*F0.2
- if (log==T) Result=F0.1+F0.2
- Result
-}
-
--- /dev/null
+#include<cstdio>
+#include<cctype>
+#include<cstring>
+#include<cstdlib>
+#include<cassert>
+#include<fstream>
+#include<iomanip>
+#include<string>
+#include<vector>
+#include<algorithm>
+using namespace std;
+
+typedef unsigned int INTEGER;
+
+const int STRLEN = 1005;
+
+INTEGER M;
+int k; // k-mer size
+vector<string> names;
+vector<string> seqs;
+vector<INTEGER> effL;
+
+// tid starts from 1
+struct ReadType {
+ INTEGER tid, pos;
+
+ ReadType(INTEGER tid, INTEGER pos) {
+ this->tid = tid;
+ this->pos = pos;
+ }
+
+ bool operator< (const ReadType& o) const {
+ string& a = seqs[tid];
+ string& b = seqs[o.tid];
+ for (int i = 0; i < k; i++) {
+ if (a[pos + i] != b[o.pos + i]) {
+ return a[pos + i] < b[o.pos + i];
+ }
+ }
+ return tid < o.tid;
+ }
+
+ bool seq_equal(const ReadType& o) const {
+ string& a = seqs[tid];
+ string& b = seqs[o.tid];
+ for (int i = 0; i < k; i++)
+ if (a[pos + i] != b[o.pos + i]) return false;
+ return true;
+ }
+};
+
+vector<ReadType> cands;
+vector<double> clusteringInfo;
+
+string convert(const string& rawseq) {
+ int size = (int)rawseq.size();
+ string seq = rawseq;
+ for (int i = 0; i < size; i++) {
+ seq[i] = toupper(rawseq[i]);
+ if (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G' && seq[i] != 'T') seq[i] = 'N';
+ }
+ return seq;
+}
+
+void loadRef(char* inpF) {
+ ifstream fin(inpF);
+ string tag, line, rawseq;
+ void *pt;
+
+ assert(fin.is_open());
+
+ names.clear(); names.push_back("");
+ seqs.clear(); seqs.push_back("");
+
+ pt = getline(fin, line);
+ while (pt != 0 && line[0] == '>') {
+ tag = line.substr(1);
+ rawseq = "";
+ while((pt = getline(fin, line)) && line[0] != '>') {
+ rawseq += line;
+ }
+ if (rawseq.size() <= 0) {
+ printf("Warning: Fasta entry %s has an empty sequence! It is omitted!\n", tag.c_str());
+ continue;
+ }
+ names.push_back(tag);
+ seqs.push_back(convert(rawseq));
+ }
+
+ fin.close();
+
+ M = names.size() - 1;
+
+ printf("The reference is loaded.\n");
+}
+
+int main(int argc, char* argv[]) {
+ if (argc != 4) {
+ printf("Usage: rsem-for-ebseq-calculate-clustering-info k input_reference_fasta_file output_file\n");
+ exit(-1);
+ }
+
+ k = atoi(argv[1]);
+ loadRef(argv[2]);
+
+ cands.clear();
+ effL.assign(M + 1, 0);
+ for (INTEGER i = 1; i <= M; i++) {
+ effL[i] = seqs[i].length() - k + 1;
+ if (effL[i] <= 0) effL[i] = 0; // effL should be non-negative
+ for (INTEGER j = 0; j < effL[i]; j++)
+ cands.push_back(ReadType(i, j));
+ }
+ printf("All possbile %d mers are generated.\n", k);
+
+ sort(cands.begin(), cands.end());
+ printf("All %d mers are sorted.\n", k);
+
+ size_t p = 0;
+ clusteringInfo.assign(M + 1, 0.0);
+
+ for (size_t i = 1; i <= cands.size(); i++)
+ if (i == cands.size() || !cands[p].seq_equal(cands[i])) {
+ size_t denominator = i - p;
+ size_t q = p;
+ for (size_t j = p + 1; j <= i; j++)
+ if (j == i || cands[q].tid != cands[j].tid) {
+ size_t numerator = j - q;
+ //double prob = numerator * 1.0 / denominator;
+ //clusteringInfo[cands[q].tid] += (double)numerator * prob * (1.0 - prob);
+ if (numerator < denominator) clusteringInfo[cands[q].tid] += numerator;
+ q = j;
+ }
+ p = i;
+ }
+
+ for (INTEGER i = 1; i <= M; i++)
+ if (effL[i] == 0) clusteringInfo[i] = -1.0;
+ else clusteringInfo[i] /= effL[i];
+
+ printf("Clustering information is calculated.\n");
+
+
+ ofstream fout(argv[3]);
+ for (INTEGER i = 1; i <= M; i++) fout<<names[i]<<"\t"<<setprecision(6)<<clusteringInfo[i]<<endl;
+ fout.close();
+
+ return 0;
+}
+++ /dev/null
-GeneEBresultGouldBart2
-GeneMat
-IsoEBresultGouldBart2
-IsoList
-MultiGeneMat
+++ /dev/null
-library(EBSeq)
-set.seed(13)
-
-# Section 3.1
-
-GeneGenerate=GeneSimu(DVDconstant=4, DVDqt1=NULL, DVDqt2=NULL,
- Conditions=rep(c(1,2),each=5), NumofSample=10, NumofGene=10000,
- DEGeneProp=.1, Phiconstant=NULL, Phi.qt1=.1, Phi.qt2=.9,
- Meanconstant=NULL, OnlyData=T)
-GeneData=GeneGenerate$data
-GeneTrueDENames=GeneGenerate$TrueDE
-str(GeneData)
-str(GeneTrueDENames)
-
-Sizes=MedianNorm(GeneData)
-
-EBres=EBTest(Data=GeneData,
- Conditions=as.factor(rep(c(1,2),each=5)),sizeFactors=Sizes, maxround=5)
-
-PP=GetPP(EBres)
-str(PP)
-DEfound=names(PP)[which(PP>=.95)]
-str(DEfound)
-sum(DEfound%in%GeneTrueDENames)
-
-QQP(QList=EBres$QList1, AlphaResult=EBres[[1]][5,1],
- BetaResult=EBres[[2]][5,1], name="Gene Simulation", AList="F", GroupName=NULL)
-DenNHist(QList=EBres$QList1, Alpha=EBres[[1]][5,1], Beta=EBres[[2]][5,1],
- name="Gene Simulation", AList="F", GroupName=NULL)
-
-# Section 3.2
-
-IsoGenerate=IsoSimu(DVDconstant=NULL, DVDqt1=.97, DVDqt2=.98,
- Conditions=as.factor(rep(c(1,2),each=5)), NumofSample=10,
- NumofIso=c(1000,2000,3000), DEIsoProp=.1, Phiconstant=NULL,
- Phi.qt1=.25, Phi.qt2=.75, OnlyData=T )
-str(IsoGenerate)
-
-IsoMat=do.call(rbind,IsoGenerate$data)
-str(IsoMat)
-
-IsoSizes=MedianNorm(IsoMat)
-
-IsoNames=rownames(IsoMat)
-str(IsoNames)
-GeneNames=paste("Gene",c(1:3000),sep="_")
-IsosGeneNames=c(GeneNames[1:1000],rep(GeneNames[1001:2000],each=2),
- rep(GeneNames[2001:3000],each=3))
-NgList=GetNg(IsoNames, IsosGeneNames)
-IsoNgTrun=NgList$IsoformNgTrun
-IsoNgTrun[c(1:3,1001:1003,3001:3003)]
-
-IsoEBres=EBTest(Data=IsoMat, NgVector=IsoNgTrun,
- Conditions=as.factor(rep(c(1,2),each=5)),sizeFactors=IsoSizes, maxround=5)
-IsoPP=GetPP(IsoEBres)
-str(IsoPP)
-IsoDE=IsoPP[which(IsoPP>=.95)]
-str(IsoDE)
-sum(names(IsoDE)%in%IsoGenerate$TrueDE)
-
-par(mfrow=c(2,2))
-PolyFitValue=vector("list",3)
-for(i in 1:3)
- PolyFitValue[[i]]=PolyFitPlot(IsoEBres$C1Mean[[i]],
- IsoEBres$C1EstVar[[i]],5)
-
-PolyAll=PolyFitPlot(unlist(IsoEBres$C1Mean), unlist(IsoEBres$C1EstVar),5)
-lines(log10(IsoEBres$C1Mean[[1]][PolyFitValue[[1]]$sort]),
- PolyFitValue[[1]]$fit[PolyFitValue[[1]]$sort],col="yellow")
-lines(log10(IsoEBres$C1Mean[[2]][PolyFitValue[[2]]$sort]),
- PolyFitValue[[2]]$fit[PolyFitValue[[2]]$sort],col="pink")
-lines(log10(IsoEBres$C1Mean[[3]][PolyFitValue[[3]]$sort]),
- PolyFitValue[[3]]$fit[PolyFitValue[[3]]$sort],col="green")
-legend("topleft",c("All Isoforms","Ng = 1","Ng = 2","Ng = 3"),
- col=c("red","yellow","pink","green"),lty=1,lwd=3,box.lwd=2)
-
-par(mfrow=c(2,2))
-QQP(QList=IsoEBres$QList1, AlphaResult=IsoEBres[[1]][5,],
- BetaResult=IsoEBres[[2]][5,],
- name="Isoforms", AList="F", GroupName=paste("Ng = ",c(1:3),sep=""))
-
-DenNHist(QList=IsoEBres$QList1, Alpha=IsoEBres[[1]][5,],
- Beta=IsoEBres[[2]][5,],
- name="Isoforms", AList="F", GroupName=paste("Ng = ",c(1:3),sep=""))
-
-# Section 3.3
-
-Conditions=c("C1","C1","C2","C2","C3","C3")
-PosParti=GetPatterns(Conditions)
-PosParti
-
-Parti=PosParti[-3,]
-Parti
-
-MultiData=GeneMultiSimu(Conditions=Conditions,AllParti=Parti,
- NumofSample=6,NumofGene=1000,DEGeneProp=c(.7,.1,.1,.1),
- DVDqt1=.98,DVDqt2=.99,Phi.qt1=.25,Phi.qt2=.75)
-str(MultiData)
-
-MultiSize=MedianNorm(MultiData$data)
-MultiRes=EBMultiTest(MultiData$data,NgVector=NULL,Conditions=Conditions,
- AllParti=Parti, sizeFactors=MultiSize, maxround=5)
-MultiPP=GetMultiPP(MultiRes)
-names(MultiPP)
-MultiPP$PP[1:10,]
-MultiPP$MAP[1:10]
-MultiPP$Patterns
-sum(MultiPP$MAP==MultiData$Patterns)
-
-# EOF
\ No newline at end of file
--- /dev/null
+CC = g++
+PROGRAMS = blockmodeling EBSeq rsem-for-ebseq-calculate-clustering-info
+
+all : $(PROGRAMS)
+
+blockmodeling : blockmodeling_0.1.8.tar.gz
+ R CMD INSTALL -l "." blockmodeling_0.1.8.tar.gz
+
+EBSeq : blockmodeling EBSeq_1.1.3.tar.gz
+ R CMD INSTALL -l "." EBSeq_1.1.3.tar.gz
+
+rsem-for-ebseq-calculate-clustering-info : calcClusteringInfo.cpp
+ $(CC) -O3 -Wall calcClusteringInfo.cpp -o $@
+
+clean :
+ rm -rf $(PROGRAMS)
+++ /dev/null
-\name{CheckNg}
-\alias{CheckNg}
-%- Also NEED an '\alias' for EACH other topic documented here.
-\title{
-CheckNgStructure
-}
-\description{
-Provide the best polynomial fit of log variance and log mean in each Ng group.
-}
-\usage{
-CheckNg(NewMean, NewVar,nterm, xlim, ylim)
-}
-%- maybe also 'usage' for other objects documented here.
-\arguments{
- \item{NewMean}{
-A list contains
-}
-}
-\details{
-%% ~~ If necessary, more details than the description above ~~
-}
-\value{
-4 plots will be returned. The first 3 are the data and the best fit line from
-Ng=1, Ng=2 and Ng=3 group.
-The 4th plot is the scatetr plot of all the data.
-}
-\references{
-}
-\author{
-Ning Leng
-}
-
-%% ~Make other sections like Warning with \section{Warning }{....} ~
-
-\seealso{
-
-}
-\examples{
-##---- Should be DIRECTLY executable !! ----
-##-- ==> Define data, use random,
-##-- or do help(data=index) for the standard data sets.
-
-### Simulate Isoform Level Data
-IsoGenerate=IsoSimu(DVDconstant=NULL, DVDqt1=.97, DVDqt2=.98, Conditions=as.factor(rep(c(1,2),each=5)), NumofSample=10, NumofIso=NULL, DEIsoProp=.1, Phiconstant=NULL, Phi.qt1=.4, Phi.qt2=.6, OnlyData="Y" )
-IsoList=IsoGenerate$data
-
-# Get Vectors and Run EBSeq
-ngv=c(1,2,3,2,3,2,3,2,3)
-b3v=c(1,0,0,1,1,0,0,1,1)
-b5v=c(1,0,0,0,0,1,1,1,1)
-NgV=unlist(sapply(1:9,function(i)rep(ngv[i],dim(IsoList[[i]])[1])))
-Bias3V=unlist(sapply(1:9,function(i)rep(b3v[i],dim(IsoList[[i]])[1])))
-Bias5V=unlist(sapply(1:9,function(i)rep(b5v[i],dim(IsoList[[i]])[1])))
-
-IsoData=do.call(rbind,IsoList)
-IsoEBres=NBBetaEB.bias.uniqueP_PoolVarSpeedUp_MDFPoi_NoNormVar(Data=IsoData, NgVector=NgV, Vect5End=Bias5V, Vect3End=Bias3V, Conditions=as.factor(rep(c(1,2),each=5)),sizeFactors=rep(1,10), maxround=5)
-
-
-# Plot
-CheckNg(IsoEBres$C1Mean, IsoEBres$C1EstVar,5, c(-1,5),c(-1,7))
-
-
-}
-% Add one or more standard keywords, see file 'KEYWORDS' in the
-% R documentation directory.
-\keyword{ Ng }
+++ /dev/null
-\name{DenNHist}
-\alias{DenNHist}
-%- Also NEED an '\alias' for EACH other topic documented here.
-\title{
-%% ~~function to do ... ~~
-Density plot to compare th empirical q's and the simulated q's from the fitted beta distribution.
-}
-\description{
-%% ~~ A concise (1-5 lines) description of what the function does. ~~
-}
-\usage{
-DenNHist(QList, Alpha, Beta, name, AList = "F", GroupName)
-}
-%- maybe also 'usage' for other objects documented here.
-\arguments{
- \item{QList}{
-The estimated q's from the output of NBBetaEB.bias.uniqueP_PoolVarSpeedUp_MDFPoi_NoNormVar
-. Input could be a vector or a list of different groups of transcripts. The number of lists here should be the same as the length of Beta.
-
-}
- \item{Alpha}{
-The fitted parameter alpha from the output of NBBetaEB.bias.uniqueP_PoolVarSpeed
-Up_MDFPoi_NoNormVar. Input should be a number if AList is not defined.
-}
- \item{Beta}{
-The fitted parameter beta from the output of NBBetaEB.bias.uniqueP_PoolVarSpeedUp_MDFPoi_NoNormVar.
-Input could be one single number or a vector of several numbers. The length of the input should be the same as the number of lists of QList.
-
-}
- \item{name}{
-The name of the plots
-}
- \item{AList}{
-Whether a list of alpha's are used
-
-}
- \item{GroupName}{
-The names of each sub plot. The l
-ength of the input should be the same as the number of lists of QList.
-
-}
-}
-\details{
-%% ~~ If necessary, more details than the description above ~~
-}
-\value{
-Plots will be generated. Each plot represents a sub-list of the QList.
-The empirical estimation of q's will be represented as blue histogram and the density of
-the fitted beta distribution will be represented as the green line.
-The main title of the plot will be "GroupName name".
-
-}
-\references{
-%% ~put references to the literature/web site here ~
-}
-\author{
-Ning Leng
-}
-\note{
-%% ~~further notes~~
-}
-
-%% ~Make other sections like Warning with \section{Warning }{....} ~
-
-\seealso{
-beta.mom, DenNHistTable, QQP, NBBetaEB.bias.uniqueP_PoolVarSpeedUp_MDFPoi_NoNormVar
-}
-\examples{
-### Simulate Gene Level Data
-GeneGenerate=GeneSimu(DVDconstant=4, DVDqt1=NULL, DVDqt2=NULL, Conditions=rep(c(1,2),each=5), NumofSample=10, NumofGene=10000, DEGeneProp=.1, Phiconstant=NULL, Phi.qt1=.25, Phi.qt2=.75, Meanconstant=NULL, OnlyData="Y")
-GeneData=GeneGenerate$data
-
-# Run EBSeq
-EBres=NBBetaEB.bias.uniqueP_PoolVarSpeedUp_MDFPoi_NoNormVar(Data=GeneData, NgVector=rep(1,10^4), Vect5End=rep(1,10^4), Vect3End=rep(1,10^4), Conditions=as.factor(rep(c(1,2),each=5)), sizeFactors=rep(1,10),maxround=5)
-# Plot
-DenNHist(QList=EBres$QList1, Alpha=EBres[[1]][5,1], Beta=EBres[[2]][5,1], name="Gene", AList="F", GroupName="")
-
-### Simulate Isoform Level Data
-IsoGenerate=IsoSimu(DVDconstant=NULL, DVDqt1=.97, DVDqt2=.98, Conditions=as.factor(rep(c(1,2),each=5)), NumofSample=10, NumofIso=NULL, DEIsoProp=.1, Phiconstant=NULL, Phi.qt1=.25, Phi.qt2=.75, OnlyData="Y" )
-IsoList=IsoGenerate$data
-
-# Get Vectors and Run EBSeq
-ngv=c(1,2,3,2,3,2,3,2,3)
-b3v=c(1,0,0,1,1,0,0,1,1)
-b5v=c(1,0,0,0,0,1,1,1,1)
-NgV=unlist(sapply(1:9,function(i)rep(ngv[i],dim(IsoList[[i]])[1])))
-Bias3V=unlist(sapply(1:9,function(i)rep(b3v[i],dim(IsoList[[i]])[1])))
-Bias5V=unlist(sapply(1:9,function(i)rep(b5v[i],dim(IsoList[[i]])[1])))
-
-IsoData=do.call(rbind,IsoList)
-IsoEBres=NBBetaEB.bias.uniqueP_PoolVarSpeedUp_MDFPoi_NoNormVar(Data=IsoData, NgVector=NgV, Vect5End=Bias5V, Vect3End=Bias3V, Conditions=as.factor(rep(c(1,2),each=5)),sizeFactors=rep(1,10), maxround=5)
-
-# Plot
-par(mfrow=c(3,3))
-DenNHist(QList=IsoEBres$QList1, Alpha=IsoEBres[[1]][5,], Beta=IsoEBres[[2]][5,], name="Isoform", AList="F", GroupName=paste("group",c(1:9),sep=""))
-
-
-
-
-
-
-
-}
-% Add one or more standard keywords, see file 'KEYWORDS' in the
-% R documentation directory.
-\keyword{ beta }
-\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
+++ /dev/null
-\name{DenNHistTable}
-\alias{DenNHistTable}
-%- Also NEED an '\alias' for EACH other topic documented here.
-\title{
-9 Density plots to compare the empirical q's and the simulated q's from the fitted beta distribution.
-}
-\description{
-Check the beta fit of 9 different groups on isoform level data.
-}
-\usage{
-DenNHistTable(QList, Alpha, Beta, AList = "F")
-}
-%- maybe also 'usage' for other objects documented here.
-\arguments{
- \item{QList}{
-The estimated q's from the output of NBBetaEB.bias.uniqueP_PoolVarSpeedUp_MDFPoi_NoNormVar
-. Input should be a list of different groups of transcripts. The number of lists here should be 9.
-
-}
-
- \item{Alpha}{
-The fitted parameter alpha from the output of NBBetaEB.bias.uniqueP_PoolVarSpeed
-Up_MDFPoi_NoNormVar. Input should be a number if AList is not defined.
-}
- \item{Beta}{
-The fitted parameter beta from the output of NBBetaEB.bias.uniqueP_PoolVarSpeedUp_MDFPoi_NoNormVar.
-Input could be one single number or a vector of several numbers. The length of the input should be 9.
-
-}
-
- \item{AList}{
-Whether a list of alpha's are used
-
-}
-
- }
-\details{
-
-}
-\value{
-A plot contains 9 dub-plots will be generated.
-The empirical estimation of q's will be represented as blue histogram and the density of
-the fitted beta distribution will be represented as the green line.
-The main title of the plot will be "GroupName name"
-}
-\references{
-DenNHist, beta.mom, QQP, NBBetaEB.bias.uniqueP_PoolVarSpeedUp_MDFPoi_NoNormVar
-}
-\author{
-Ning Leng
-}
-\note{
-%% ~~further notes~~
-}
-
-%% ~Make other sections like Warning with \section{Warning }{....} ~
-
-\seealso{
-%% ~~objects to See Also as \code{\link{help}}, ~~~
-}
-\examples{
-# Generate Data
-IsoGenerate=IsoSimu(DVDconstant=NULL, DVDqt1=.97, DVDqt2=.98, Conditions=as.factor(rep(c(1,2),each=5)), NumofSample=10, NumofIso=NULL, DEIsoProp=.1, Phiconstant=NULL, Phi.qt1=.25, Phi.qt2=.75, OnlyData="Y" )
-IsoList=IsoGenerate$data
-
-# Get Ng Vector, 5End Vector and 3End Vector
-ngv=c(1,2,3,2,3,2,3,2,3)
-b3v=c(1,0,0,1,1,0,0,1,1)
-b5v=c(1,0,0,0,0,1,1,1,1)
-NgV=unlist(sapply(1:9,function(i)rep(ngv[i],dim(IsoList[[i]])[1])))
-Bias3V=unlist(sapply(1:9,function(i)rep(b3v[i],dim(IsoList[[i]])[1])))
-Bias5V=unlist(sapply(1:9,function(i)rep(b5v[i],dim(IsoList[[i]])[1])))
-
-#Run EBSeq
-IsoData=do.call(rbind,IsoList)
-IsoEBres=NBBetaEB.bias.uniqueP_PoolVarSpeedUp_MDFPoi_NoNormVar(Data=IsoData, NgVector=NgV, Vect5End=Bias5V, Vect3End=Bias3V, Conditions=as.factor(rep(c(1,2),each=5)),sizeFactors=rep(1,10), maxround=5)
-
-DenNHistTable(QList=IsoEBres$QList1, Alpha=IsoEBres[[1]][5,], Beta=IsoEBres[[2]][5,], AList="F")
-
-
-}
-% Add one or more standard keywords, see file 'KEYWORDS' in the
-% R documentation directory.
-\keyword{ ~kwd1 }
-\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
+++ /dev/null
-\name{EBMultiTest}
-\alias{EBMultiTest}
-%- Also NEED an '\alias' for EACH other topic documented here.
-\title{
-Using EM algorithm to calculate the posterior probabilities of interested patterns in multiple condition study
-}
-\description{
-Base on the assumption of NB-Beta Empirical Bayes model, the EM algorithm is used to get the posterior probability of interested patterns.
-}
-\usage{
-EBMultiTest(Data,NgVector=NULL,Conditions,AllParti=NULL, sizeFactors, maxround, tau=NULL,CI=NULL,CIthre=NULL, Pool=F, NumBin=1000, Approx=10^-10,PoolLower=.25, PoolUpper=.75)
-}
-%- maybe also 'usage' for other objects documented here.
-\arguments{
-
- \item{Data}{
-A data matrix contains expression values for each transcript .(Gene level or Isoform level.). In which rows should be transcripts and columns should be samples.
-}
- \item{NgVector}{
-A vector contains the Ng value of each isoform. If the isoform is in a gene with 2 isoforms, Ng should be 2. Ng could be only 1, 2 or 3. If it's gene level data, Ngvector should all be 1. The vector length should be the same as the number of rows in Data.
-}
- \item{Conditions}{
-A vector indicates the condition each sample belongs to.
-}
-
-\item{AllParti}{
- A matrix indicates the interested patterns. Columns shoule be conditions and rows should be patterns.
- The matrix could be obtained by the GetPatterns function. If AllParti=NULL, all possible patterns will be used.
-}
-
- \item{sizeFactors}{
-The normalization factors.
-The normalization factors could be a vector with lane specitic numbers.
-Or it could be a matrix with lane and transcript specific numbers.
-}
- \item{maxround}{
-Number of iterations. The suggested value is 5.
-}
-
-\item{tau}{
-The tau value from RSEM output. If the data has no replicates within condition,
-EBSeq will use the CI of tau to capture the variation from mapping
-uncertainty and estimate the variance.
- }
-\item{CI}{
-The CI of each tau from RSEM output
- }
-\item{CIthre}{
-The threshold of CI RSEM used.
- }
-\item{Pool, NumBin}{
-Working without replicates, we should define the Pool=T in the
- EBTest function to enable pooling.
-By defining NumBin = 1000, EBSeq will group the genes with similar means
-together into 1,000 bins.
-With the assumption that no more than 50\% genes are DE in the data set,
-We take genes whose FC are in the 25\% - 75\% quantile of the FC's as the
-candidate genes.
-For each bin, the bin-wise variance estimation would be the median of the
-cross condition variance estimations of the candidate genes within that bin.
-We use the cross condition variance estimations for the candidate genes
-and the bin-wise variance estimations of the host bin for the non-candidate genes.
-}
-
-}
-
-\details{
-For each transcript gi within condition, the model assumes:
-X_gis|mu_gi ~ NB (r_gi0 * l_s, q_gi)
-q_gi|alpha, beta^N_g,b_gi ~ Beta (alpha, neta^N_g,b_gi)
-In which the l_s is the sizeFactors of sample s.
-
-The function will test:
-H0: q_giC1 = q_giC2
-H1: q_giC1 != q_giC2
-
-
-}
-\value{
-\item{Alpha }{Fitted parameter alpha of the prior beta distribution. Rows are the values for each iteration.}
-\item{Beta }{Fitted parameter beta of the prior beta distribution. Rows are the values for each iteration.}
-\item{P, PFromZ }{ The bayes estimator of being DE.Rows are the values for each iteration.}
-\item{Z, PoissonZ}{ The Posterior Probability of being DE for each transcript. (Maybe not in the same order of input)}
-\item{RList}{ The fitted values of r for each transcript.}
-\item{MeanList}{The mean of each transcript. (Cross conditions)}
-\item{VarList}{The variance of each transcript. (Cross conditions, using the expression values devided by it's sizeFactors)}
-\item{QListi1}{The fitted q values of each transcript within condition 1.}
-\item{QListi2}{The fitted q values of each transcript within condition 2.}
-\item{C1Mean}{The mean of each transcript within Condition 1}
-\item{C2Mean}{The mean of each transcript within Condition 2}
-\item{C1EstVar}{The estimated variance of each transcript within Condition 1}
-\item{C2EstVar}{The estimated variance of each transcript within Condition 2}
-\item{PoolVar}{The variance of each transcript. (The pooled value of within condition EstVar)}
-\item{DataList}{A List of data that grouped with Ng and bias.}
-\item{PPDE}{The Posterior Probability of being each pattern for each transcript. (The same order of input)}
-\item{f}{The likelihood of predictive distribution of being each pattern for each transcript. }
-\item{AllParti}{The matrix describe the patterns}
-}
-\references{
-}
-\author{
-Ning Leng
-}
-\note{
-}
-
-
-\seealso{
-}
-\examples{
-Conditions=c("C1","C1","C2","C2","C3","C3")
-PosParti=GetPatterns(Conditions)
-AllParti=PosParti[-3,]
-
-MultiData=GeneMultiSimu(Conditions=Conditions,AllParti=AllParti,
- NumofSample=6,NumofGene=1000,DEGeneProp=c(.7,.1,.1,.1),
- DVDqt1=.98,DVDqt2=.99,Phi.qt1=.25,Phi.qt2=.75)
-
-MultiRes=EBMultiTest(MultiData[[1]],NgVector=NULL,Conditions=Conditions,
- AllParti=AllParti, sizeFactors=rep(1,6), maxround=5, tau=NULL,CI=NULL,
- CIthre=NULL, Pool=F, NumBin=1000, Approx=10^-10,PoolLower=.25, PoolUpper=.75)
-MultiPP=GetMultiPP(MultiRes)
-
-sum(MultiPP$MAP==MultiData[[2]])
-
-}
-% Add one or more standard keywords, see file 'KEYWORDS' in the
-% R documentation directory.
-\keyword{ ~kwd1 }
-\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
+++ /dev/null
-\name{EBSeq_NingLeng-package}
-\alias{EBSeq_NingLeng-package}
-\alias{EBSeq_NingLeng}
-\docType{package}
-\title{
-EBSeq: RNA-Seq Differential Expression Analysis on both gene and isoform level
-}
-\description{
-A Negative Binomial - beta model was built to analyze the RNASeq data.
-We used the empirical bayes method and EM algrithom.
-}
-\details{
-\tabular{ll}{
-Package: \tab EBSeq_NingLeng\cr
-Type: \tab Package\cr
-Version: \tab 1.0\cr
-Date: \tab 2011-06-13\cr
-License: \tab What license is it under?\cr
-LazyLoad: \tab yes\cr
-}
-}
-\author{
-Ning Leng
-
-Maintainer: Ning Leng <nleng@wisc.edu>
-}
-\references{
-}
-\keyword{ package }
-\seealso{
-
-}
-\examples{
-}
+++ /dev/null
-\name{EBTest}
-\alias{EBTest}
-%- Also NEED an '\alias' for EACH other topic documented here.
-\title{
-Using EM algorithm to calculate the posterior probabilities of being DE
-}
-\description{
-Base on the assumption of NB-Beta Empirical Bayes model, the EM algorithm is used to get the posterior probability of being DE.
-}
-\usage{
-EBTest(Data, NgVector=NULL, Vect5End=NULL, Vect3End=NULL, Conditions, sizeFactors, maxround,tau=NULL,CI=NULL,CIthre=NULL, Pool=F, NumBin=1000)
-}
-%- maybe also 'usage' for other objects documented here.
-\arguments{
-
- \item{Data}{
-A data matrix contains expression values for each transcript .(Gene level or Isoform level.). In which rows should be transcripts and columns should be samples.
-}
- \item{NgVector}{
-A vector contains the Ng value of each isoform. If the isoform is in a gene with 2 isoforms, Ng should be 2. Ng could be only 1, 2 or 3. If it's gene level data, Ngvector should all be 1. The vector length should be the same as the number of rows in Data.
-}
- \item{Vect5End}{
-A vector contains the 5' end information of each isoform. It should be 1 if the isoform contains 5' end and otherwise should be 0. If it's gene level data, Vect5End should all be 1. The vector length should be the same as the number of rows in Data.
-(Not recommended)
-}
- \item{Vect3End}{
-A vector contains the 3' end information of each isoform. It should be 1 if the
-isoform contains 3' end and otherwise should be 0. If it's gene level data, Vect3End should all be 1. The vector length should be the same as the number of rows in Data.
-(Not recommended)
-}
- \item{Conditions}{
-A vector indicates the condition each sample belongs to.
-}
-
-
- \item{sizeFactors}{
-The normalization factors.
-The normalization factors could be a vector with lane specitic numbers.
-Or it could be a matrix with lane and transcript specific numbers.
-}
- \item{maxround}{
-Number of iterations. The suggested value is 5.
-}
-
-\item{tau}{
-The tau value from RSEM output. If the data has no replicates within condition,
-EBSeq will use the CI of tau to capture the variation from mapping
-uncertainty and estimate the variance.
- }
-\item{CI}{
-The CI of each tau from RSEM output
- }
-\item{CIthre}{
-The threshold of CI RSEM used.
- }
-\item{Pool, NumBin}{
-Working without replicates, we should define the Pool=T in the
- EBTest function to enable pooling.
-By defining NumBin = 1000, EBSeq will group the genes with similar means
-together into 1,000 bins.
-With the assumption that no more than 50\% genes are DE in the data set,
-We take genes whose FC are in the 25\% - 75\% quantile of the FC's as the
-candidate genes.
-For each bin, the bin-wise variance estimation would be the median of the
-cross condition variance estimations of the candidate genes within that bin.
-We use the cross condition variance estimations for the candidate genes
-and the bin-wise variance estimations of the host bin for the non-candidate genes.
-}
-
-}
-
-\details{
-For each transcript gi within condition, the model assumes:
-X_gis|mu_gi ~ NB (r_gi0 * l_s, q_gi)
-q_gi|alpha, beta^N_g,b_gi ~ Beta (alpha, neta^N_g,b_gi)
-In which the l_s is the sizeFactors of sample s.
-
-The function will test:
-H0: q_giC1 = q_giC2
-H1: q_giC1 != q_giC2
-
-
-}
-\value{
-\item{Alpha }{Fitted parameter alpha of the prior beta distribution. Rows are the values for each iteration.}
-\item{Beta }{Fitted parameter beta of the prior beta distribution. Rows are the values for each iteration.}
-\item{P, PFromZ }{ The bayes estimator of being DE.Rows are the values for each iteration.}
-\item{Z, PoissonZ}{ The Posterior Probability of being DE for each transcript. (Maybe not in the same order of input)}
-\item{RList}{ The fitted values of r for each transcript.}
-\item{MeanList}{The mean of each transcript. (Cross conditions)}
-\item{VarList}{The variance of each transcript. (Cross conditions, using the expression values devided by it's sizeFactors)}
-\item{QListi1}{The fitted q values of each transcript within condition 1.}
-\item{QListi2}{The fitted q values of each transcript within condition 2.}
-\item{C1Mean}{The mean of each transcript within Condition 1}
-\item{C2Mean}{The mean of each transcript within Condition 2}
-\item{C1EstVar}{The estimated variance of each transcript within Condition 1}
-\item{C2EstVar}{The estimated variance of each transcript within Condition 2}
-\item{PoolVar}{The variance of each transcript. (The pooled value of within condition EstVar)}
-\item{DataList}{A List of data that grouped with Ng and bias.}
-\item{PPDE}{The Posterior Probability of being DE for each transcript. (The same order of input)}
-
-
-}
-\references{
-}
-\author{
-Ning Leng
-}
-\note{
-}
-
-
-\seealso{
-}
-\examples{
-#Simulate Gene level data
-GeneGenerate=GeneSimu(DVDconstant=4, DVDqt1=NULL, DVDqt2=NULL, Conditions=rep(c(1,2),each=5), NumofSample=10, NumofGene=10000, DEGeneProp=.1, Phiconstant=NULL, Phi.qt1=.25, Phi.qt2=.75, Meanconstant=NULL, OnlyData="Y")
-GeneData=GeneGenerate$data
-
-# Run EBSeq
-# sizeFactors could be obtained by MedianNorm, QuantileNorm or RankNorm
-EBres=EBTest(Data=GeneData, NgVector=rep(1,10^4), Vect5End=rep(1,10^4), Vect3End=rep(1,10^4), Conditions=as.factor(rep(c(1,2),each=5)), sizeFactors=rep(1,10),maxround=5)
-
-# Isoform Level
-IsoGenerate=IsoSimu(DVDconstant=NULL, DVDqt1=.97, DVDqt2=.98, Conditions=as.factor(rep(c(1,2),each=5)), NumofSample=10, NumofIso=c(1000,2000,3000), DEIsoProp=.1, Phiconstant=NULL, Phi.qt1=.25, Phi.qt2=.75, OnlyData=T )
-
-IsoMat=do.call(rbind,IsoGenerate$data)
-IsoNames=rownames(IsoMat)
-
-Ngvector=GetNg(IsoNames, IsosGeneNames)
-IsoNgTrun=Ngvector$IsoformNgTrun
-
-IsoEBres=EBTest(Data=IsoMat, NgVector=IsoNgTrun, Conditions=as.factor(rep(c(1,2),each=5)),sizeFactors=rep(1,10), maxround=5)
-
-}
-% Add one or more standard keywords, see file 'KEYWORDS' in the
-% R documentation directory.
-\keyword{ ~kwd1 }
-\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
+++ /dev/null
-\name{GeneEBresultGouldBart2}
-\alias{GeneEBresultGouldBart2}
-\docType{data}
-\title{
-The EBSeq result of the empirical gene data ( Gould Lab data, bart2 )
-}
-\description{
-%% ~~ A concise (1-5 lines) description of the dataset. ~~
-}
-\usage{data(GeneEBresultGouldBart2)}
-\format{
- The format is:
-List of 17
- $ Alpha : num [1:5, 1] 0.728 0.724 0.719 0.717 0.717
- ..- attr(*, "dimnames")=List of 2
- .. ..$ : chr [1:5] "AlphaIn" "AlphaIn" "AlphaIn" "AlphaIn" ...
- .. ..$ : NULL
- $ Beta : num [1:5, 1] 1.44 1.49 1.49 1.49 1.48
- ..- attr(*, "dimnames")=List of 2
- .. ..$ : chr [1:5] "BetaIn" "BetaIn" "BetaIn" "BetaIn" ...
- .. ..$ : NULL
- $ P : num [1:5, 1] 0.1584 0.0767 0.0534 0.046 0.0432
- ..- attr(*, "dimnames")=List of 2
- .. ..$ : chr [1:5] "PIn" "PIn" "PIn" "PIn" ...
- .. ..$ : NULL
- $ PFromZ : num [1:5, 1] 0.1585 0.0765 0.0535 0.0459 0.0432
- ..- attr(*, "dimnames")=List of 2
- .. ..$ : chr [1:5] "PFromZ" "PFromZ" "PFromZ" "PFromZ" ...
- .. ..$ : NULL
- $ Z : Named num [1:15312] 0.0036 0.00246 0.00122 0.61556 0.00394 ...
- ..- attr(*, "names")= chr [1:15312] "ENSRNOG00000015181" "ENSRNOG00000015180" "ENSRNOG00000027158" "ENSRNOG00000027157" ...
- $ PoissonZ: Named num [1:4955] 6.59e-04 5.71e-04 3.80e-04 2.75e-04 2.07e-05 ...
- ..- attr(*, "names")= chr [1:4955] "ENSRNOG00000027159" "ENSRNOG00000039120" "ENSRNOG00000039118" "ENSRNOG00000003198" ...
- $ RList :List of 1
- ..$ : Named num [1:20267] 19.12 62.3 -3.09 348.78 200.03 ...
- .. ..- attr(*, "names")= chr [1:20267] "ENSRNOG00000015181" "ENSRNOG00000015180" "ENSRNOG00000027159" "ENSRNOG00000027158" ...
- $ MeanList:List of 1
- ..$ : Named num [1:20267] 289.663 302.486 0.398 97.791 106.036 ...
- .. ..- attr(*, "names")= chr [1:20267] "ENSRNOG00000015181" "ENSRNOG00000015180" "ENSRNOG00000027159" "ENSRNOG00000027158" ...
- $ VarList :List of 1
- ..$ : Named num [1:20267] 5792.7 1954 0.6 146.8 513.4 ...
- .. ..- attr(*, "names")= chr [1:20267] "ENSRNOG00000015181" "ENSRNOG00000015180" "ENSRNOG00000027159" "ENSRNOG00000027158" ...
- $ QList1 :List of 1
- ..$ : Named num [1:20267] 0.188 0.152 NaN 0.487 1.118 ...
- .. ..- attr(*, "names")= chr [1:20267] "ENSRNOG00000015181" "ENSRNOG00000015180" "ENSRNOG00000027159" "ENSRNOG00000027158" ...
- $ QList2 :List of 1
- ..$ : Named num [1:20267] 0.0389 0.1951 1.1478 1.7647 0.4149 ...
- .. ..- attr(*, "names")= chr [1:20267] "ENSRNOG00000015181" "ENSRNOG00000015180" "ENSRNOG00000027159" "ENSRNOG00000027158" ...
- $ C1Mean :List of 1
- ..$ : Named num [1:20267] 271.9 300.7 0 93.8 123.1 ...
- .. ..- attr(*, "names")= chr [1:20267] "ENSRNOG00000015181" "ENSRNOG00000015180" "ENSRNOG00000027159" "ENSRNOG00000027158" ...
- $ C2Mean :List of 1
- ..$ : Named num [1:20267] 307.414 304.298 0.796 101.798 88.953 ...
- .. ..- attr(*, "names")= chr [1:20267] "ENSRNOG00000015181" "ENSRNOG00000015180" "ENSRNOG00000027159" "ENSRNOG00000027158" ...
- $ C1EstVar:List of 1
- ..$ : Named num [1:20267] 1449 1983 0 193 110 ...
- .. ..- attr(*, "names")= chr [1:20267] "ENSRNOG00000015181" "ENSRNOG00000015180" "ENSRNOG00000027159" "ENSRNOG00000027158" ...
- $ C2EstVar:List of 1
- ..$ : Named num [1:20267] 7905.417 1559.46 0.694 57.687 214.39 ...
- .. ..- attr(*, "names")= chr [1:20267] "ENSRNOG00000015181" "ENSRNOG00000015180" "ENSRNOG00000027159" "ENSRNOG00000027158" ...
- $ PoolVar :List of 1
- ..$ : Named num [1:20267] 4677.246 1771.219 0.347 125.211 162.247 ...
- .. ..- attr(*, "names")= chr [1:20267] "ENSRNOG00000015181" "ENSRNOG00000015180" "ENSRNOG00000027159" "ENSRNOG00000027158" ...
- $ DataList:List of 1
- ..$ Ng1: num [1:20267, 1:8] 287 251 0 87 121 181 5 195 70 5 ...
- .. ..- attr(*, "dimnames")=List of 2
- .. .. ..$ : chr [1:20267] "I1" "I2" "I3" "I4" ...
- .. .. ..$ : chr [1:8] "W5-D1" "W5-D2" "W5-D3" "W5-D4" ...
-}
-\details{
-%% ~~ If necessary, more details than the __description__ above ~~
-}
-\source{
-%% ~~ reference to a publication or URL from which the data were obtained ~~
-}
-\seealso{
-IsoEBresultGouldBart2, NBBetaEB.bias.uniqueP_PoolVarSpeedUp_MDFPoi_NoNormVar
-}
-\examples{
-data(GeneEBresultGouldBart2)
-## maybe str(GeneEBresultGouldBart2) ; plot(GeneEBresultGouldBart2) ...
-}
-\keyword{datasets}
+++ /dev/null
-\name{GeneMultiSimu}
-\alias{GeneMultiSimu}
-%- Also NEED an '\alias' for EACH other topic documented here.
-\title{
-Gene Level Simulation for multiple conditions
-}
-\description{
-Simulate Gene level expression data from a Negative Binomial assumption. (Without outliers)
-}
-\usage{
-GeneMultiSimu(DVDconstant = NULL, DVDqt1 = NULL, DVDqt2 = NULL, Conditions,AllParti, NumofSample, NumofGene = NULL, DEGeneProp, Phiconstant = NULL, Phi.qt1 = NULL, Phi.qt2 = NULL, Meanconstant = NULL, NormFactor=NULL, OnlyData = T)
-}
-%- maybe also 'usage' for other objects documented here.
-\arguments{
- \item{DVDconstant}{
-Whether want to use constant fold change value for all the DE genes.
-If set DVDconstant=4, all the DE genes will have fold change of 4 across two condtions.
-If this parameter is specified, the settings of DVDqt1 and DVDqt2 will be ignored.
-}
- \item{DVDqt1, DVDqt2}{
-If DVDconstant is not specified, the user could use a range of empirical DVD's f
-rom Gould' data. The lower and upper bound ( quantile) could be specified.
-The suggested value is c(.96, .97). DVD for each gene will be randomly choosed within the range.
-
-}
- \item{Conditions}{
-A vector of charecters to show each sample's condition.
-(Only the two-condition case is supported now)
-}
-\item{AllParti}{
- A matrix indicates the interested patterns. Columns shoule be conditions and rows should be patterns.
- The matrix could be obtained by the GetPatterns function. If AllParti=NULL, all possible patterns will be used.
-}
-
- \item{NumofSample}{
-Number of samples to generte.
-}
- \item{NumofGene}{
-Number of genes to generate.
-}
- \item{DEGeneProp}{
-The proportion of genes to be generated as DE. The value should be in [0, 1].
-Besides, the same proportion of genes will be generated as EE genes with outlier.
-The genes will be generated as EE at first, then the count of one of the samples
-(randomly selected) will be setted as its original counts multiplied by one of (4, 6, 8, 10).
-}
- \item{Phiconstant}{
-Whether set the disperse parameter phi to be a constant. If this parameter is specified, the settings of Phi.qt1 and Phi.qt2 will be ignored.
-}
- \item{Phi.qt1, Phi.qt2}{
-If Phiconstant is not specified, the user could use a range of empirical phi's from Gould' data. The lower and upper bound ( quantile) could be specified.
-The suggested value is c(.25, .75). phi for each gene will be randomly choosed w
-ithin the range.
-
-}
- \item{Meanconstant}{
-Wether set the mean of each gene to be a constant.
-}
- \item{OnlyData}{
-Wether the user only want the generated data matrix. If OnlyData=T, the function will return the simulated matrix
-and the name of the DE genes.
-Otherwise the funtion will run DESeq, EBSeq, edgeR, baySeq and BBSeq and provide the results of each method.
-}
-
-\item{NormFactor}{
-If NormFactor is NULL, each lane will be set to be with the same library size. Otherwise NormFactor should be a
-vector with length NumofSample.
-}
-
-}
-\details{
-For each gene, we assumed that the expression follows a Negative Binomial distribution with mean mu_g and variance mu_g * (1 + mu_g * phi_g).
-For DE genes, we assumed that in one condition the genes are with mean mu_g * DVD.
-mu, phi and DVD could be specified by the parameter settings.
-}
-\value{
-\item{data}{
-A matrix of expression values will be generated. The rows of the matrix refer to the genes and the columns of the matrix are the samples. The genes are named "G_1", "G_2", ... The first part of the genes will be the DE ones. (The number depends on the DEGeneProp parameter.)
-}
-\item{Patterns}{The pattern each gene belongs to}
-
-}
-
-\references{
-%% ~put references to the literature/web site here ~
-}
-\author{
-Ning Leng
-}
-\note{
-%% ~~further notes~~
-}
-
-%% ~Make other sections like Warning with \section{Warning }{....} ~
-
-\seealso{
-IsoSimu, IsoSimuAt, GeneSimuAt
-}
-\examples{
-Conditions=c("C1","C1","C2","C2","C3","C3")
-PosParti=GetPatterns(Conditions)
-AllParti=PosParti[-3,]
-
-MultiData=GeneMultiSimu(Conditions=Conditions,AllParti=AllParti,
- NumofSample=6,NumofGene=1000,DEGeneProp=c(.7,.1,.1,.1),
- DVDqt1=.98,DVDqt2=.99,Phi.qt1=.25,Phi.qt2=.75)
-
-}
-% Add one or more standard keywords, see file 'KEYWORDS' in the
-% R documentation directory.
-\keyword{ simulation }
-\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
+++ /dev/null
-\name{GeneSimu}
-\alias{GeneSimu}
-%- Also NEED an '\alias' for EACH other topic documented here.
-\title{
-Gene Level Simulation
-}
-\description{
-Simulate Gene level expression data from a Negative Binomial assumption. (Without outliers)
-}
-\usage{
-GeneSimu(DVDconstant = NULL, DVDqt1 = NULL, DVDqt2 = NULL, Conditions, NumofSample, NumofGene = NULL, DEGeneProp, Phiconstant = NULL, Phi.qt1 = NULL, Phi.qt2 = NULL, Meanconstant = NULL, NormFactor=NULL, OnlyData = T)
-}
-%- maybe also 'usage' for other objects documented here.
-\arguments{
- \item{DVDconstant}{
-Whether want to use constant fold change value for all the DE genes.
-If set DVDconstant=4, all the DE genes will have fold change of 4 across two condtions.
-If this parameter is specified, the settings of DVDqt1 and DVDqt2 will be ignored.
-}
- \item{DVDqt1, DVDqt2}{
-If DVDconstant is not specified, the user could use a range of empirical DVD's f
-rom Gould' data. The lower and upper bound ( quantile) could be specified.
-The suggested value is c(.96, .97). DVD for each gene will be randomly choosed within the range.
-
-}
- \item{Conditions}{
-A vector of charecters to show each sample's condition.
-(Only the two-condition case is supported now)
-}
- \item{NumofSample}{
-Number of samples to generte.
-}
- \item{NumofGene}{
-Number of genes to generate.
-}
- \item{DEGeneProp}{
-The proportion of genes to be generated as DE. The value should be in [0, 1].
-Besides, the same proportion of genes will be generated as EE genes with outlier.
-The genes will be generated as EE at first, then the count of one of the samples
-(randomly selected) will be setted as its original counts multiplied by one of (4, 6, 8, 10).
-}
- \item{Phiconstant}{
-Whether set the disperse parameter phi to be a constant. If this parameter is specified, the settings of Phi.qt1 and Phi.qt2 will be ignored.
-}
- \item{Phi.qt1, Phi.qt2}{
-If Phiconstant is not specified, the user could use a range of empirical phi's from Gould' data. The lower and upper bound ( quantile) could be specified.
-The suggested value is c(.25, .75). phi for each gene will be randomly choosed w
-ithin the range.
-
-}
- \item{Meanconstant}{
-Wether set the mean of each gene to be a constant.
-}
- \item{OnlyData}{
-Wether the user only want the generated data matrix. If OnlyData=T, the function will return the simulated matrix
-and the name of the DE genes.
-Otherwise the funtion will run DESeq, EBSeq, edgeR, baySeq and BBSeq and provide the results of each method.
-}
-
-\item{NormFactor}{
-If NormFactor is NULL, each lane will be set to be with the same library size. Otherwise NormFactor should be a
-vector with length NumofSample.
-}
-
-}
-\details{
-For each gene, we assumed that the expression follows a Negative Binomial distribution with mean mu_g and variance mu_g * (1 + mu_g * phi_g).
-For DE genes, we assumed that in one condition the genes are with mean mu_g * DVD.
-mu, phi and DVD could be specified by the parameter settings.
-}
-\value{
-\item{data}{
-A matrix of expression values will be generated. The rows of the matrix refer to the genes and the columns of the matrix are the samples. The genes are named "G_1", "G_2", ... The first part of the genes will be the DE ones. (The number depends on the DEGeneProp parameter.)
-}
-\item{TrueDE}{The names of the genes who are defined to be DE.}
-}
-
-\references{
-%% ~put references to the literature/web site here ~
-}
-\author{
-Ning Leng
-}
-\note{
-%% ~~further notes~~
-}
-
-%% ~Make other sections like Warning with \section{Warning }{....} ~
-
-\seealso{
-IsoSimu, IsoSimuAt, GeneSimuAt
-}
-\examples{
-GeneGenerate=GeneSimu(DVDconstant=4, DVDqt1=NULL, DVDqt2=NULL, Conditions=rep(c(1,2),each=5), NumofSample=10, NumofGene=10000, DEGeneProp=.1, Phiconstant=NULL, Phi.qt1=.25, Phi.qt2=.75, Meanconstant=NULL, NormFactor=rep(c(.9,1.2),5),OnlyData=T)
-
-}
-% Add one or more standard keywords, see file 'KEYWORDS' in the
-% R documentation directory.
-\keyword{ simulation }
-\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
+++ /dev/null
-\name{GeneSimuAt}
-\alias{GeneSimuAt}
-%- Also NEED an '\alias' for EACH other topic documented here.
-\title{
-Gene Level Simulation with outliers
-}
-\description{
-Simulate Gene level expression data from a Negative Binomial assumption. (With outliers)
-}
-\usage{
-GeneSimuAt(DVDconstant = NULL, DVDqt1 = NULL, DVDqt2 = NULL, Conditions, NumofSample, NumofGene = NULL, DEGeneProp, Phiconstant = NULL, Phi.qt1 = NULL, Phi.qt2 = NULL, Meanconstant = NULL, NormFactor=NULL, OnlyData = T)
-}
-%- maybe also 'usage' for other objects documented here.
-\arguments{
- \item{DVDconstant}{
-Whether want to use constant fold change value for all the DE genes.
-If set DVDconstant=4, all the DE genes will have fold change of 4 across two condtions.
-If this parameter is specified, the settings of DVDqt1 and DVDqt2 will be ignored.
-}
- \item{DVDqt1, DVDqt2}{
-If DVDconstant is not specified, the user could use a range of empirical DVD's f
-rom Gould' data. The lower and upper bound ( quantile) could be specified.
-The suggested value is c(.96, .97). DVD for each gene will be randomly choosed within the range.
-
-}
- \item{Conditions}{
-A vector of charecters to show each sample's condition.
-(Only the two-condition case is supported now)
-}
- \item{NumofSample}{
-Number of samples to generte.
-}
- \item{NumofGene}{
-Number of genes to generate.
-}
- \item{DEGeneProp}{
-The proportion of genes to be generated as DE. The value should be in [0, 1].
-Besides, the same proportion of genes will be generated as EE genes with outlier.
-The genes will be generated as EE at first, then the count of one of the samples
-(randomly selected) will be setted as its original counts multiplied by one of (4, 6, 8, 10).
-}
- \item{Phiconstant}{
-Whether set the disperse parameter phi to be a constant. If this parameter is specified, the settings of Phi.qt1 and Phi.qt2 will be ignored.
-}
- \item{Phi.qt1, Phi.qt2}{
-If Phiconstant is not specified, the user could use a range of empirical phi's from Gould' data. The lower and upper bound ( quantile) could be specified.
-The suggested value is c(.25, .75). phi for each gene will be randomly choosed w
-ithin the range.
-
-}
- \item{Meanconstant}{
-Wether set the mean of each gene to be a constant.
-}
- \item{OnlyData}{
-Wether the user only want the generated data matrix. If OnlyData=T, the function will return the simulated matrix
-and the name of the DE genes.
-Otherwise the funtion will run DESeq, EBSeq, edgeR, baySeq and BBSeq and provide the results of each method.
-}
-
-\item{NormFactor}{
-If NormFactor is NULL, each lane will be set to be with the same library size. Otherwise NormFactor should be a
-vector with length NumofSample.
-}
-
-}
-\details{
-For each gene, we assumed that the expression follows a Negative Binomial distribution with mean mu_g and variance mu_g * (1 + mu_g * phi_g).
-For DE genes, we assumed that in one condition the genes are with mean mu_g * DVD.
-mu, phi and DVD could be specified by the parameter settings.
-}
-\value{
-\item{data}{
-A matrix of expression values will be generated. The rows of the matrix refer to the genes and the columns of the matrix are the samples. The genes are named "G_1", "G_2", ... The first part of the genes will be the DE ones. (The number depends on the DEGeneProp parameter.)
-}
-\item{TrueDE}{The names of the genes who are defined to be DE.}
-\item{Outliers}{The names of the genes who are defined to be outliers at each level of (4,6,8,10).}
-}
-\references{
-%% ~put references to the literature/web site here ~
-}
-\author{
-Ning Leng
-}
-\note{
-%% ~~further notes~~
-}
-
-%% ~Make other sections like Warning with \section{Warning }{....} ~
-
-\seealso{
-IsoSimu, IsoSimuAt
-}
-\examples{
-GeneGenerate=GeneSimuAt(DVDconstant=4, DVDqt1=NULL, DVDqt2=NULL, Conditions=rep(c(1,2),each=5), NumofSample=10, NumofGene=10000, DEGeneProp=.1, Phiconstant=NULL, Phi.qt1=.25, Phi.qt2=.75, Meanconstant=NULL, NormFactor=rep(c(.9,1.2),5),OnlyData=T)
-
-}
-% Add one or more standard keywords, see file 'KEYWORDS' in the
-% R documentation directory.
-\keyword{ simulation }
-\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
+++ /dev/null
-\name{GetData}
-\alias{GetData}
-%- Also NEED an '\alias' for EACH other topic documented here.
-\title{
-Read in RSEM output of Gould data
-}
-\description{
-%% ~~ A concise (1-5 lines) description of what the function does. ~~
-}
-\usage{
-GetData(path, Name1, Name2, type)
-}
-%- maybe also 'usage' for other objects documented here.
-\arguments{
- \item{path}{
-The path of RSEM outputs
-}
- \item{Name1}{
-The output names of the files from Condition 1
-}
- \item{Name2}{
-The output names of the files from Condition 2
-}
- \item{type}{
-If type="G", read in the gene level output
-If type="I", read in the isoform level output
-}
-}
-\details{
-%% ~~ If necessary, more details than the description above ~~
-}
-\value{
-The output is the "nu values" from RSEM.
-To generate a expression matrix, the user need to run the PoolMatrix function.
-}
-\references{
-%% ~put references to the literature/web site here ~
-}
-\author{
-Ning Leng
-}
-\note{
-%% ~~further notes~~
-}
-
-%% ~Make other sections like Warning with \section{Warning }{....} ~
-
-\seealso{
-%% ~~objects to See Also as \code{\link{help}}, ~~~
-}
-\examples{
-##---- Should be DIRECTLY executable !! ----
-##-- ==> Define data, use random,
-##-- or do help(data=index) for the standard data sets.
-
-## The function is currently defined as
-function(path,Name1,Name2,type)
-{
-Data=vector("list",8)
-Filenames=NULL
-Tablenames=NULL
-for (name in 1:4)
- {
- if (type=="I")
- Filenames=c(Filenames,paste(path,Name1,name,"_isoform_nus.tab",sep=""))
- if (type=="G")
- Filenames=c(Filenames,paste(path,Name1,name,"_gene_nus.tab",sep=""))
- Tablenames=c(Tablenames,paste(Name1,name,sep=""))
- }
-for (name in 1:4)
- {
- if (type=="I")
- Filenames=c(Filenames,paste(path,Name2,name,"_isoform_nus.tab",sep=""))
- if (type=="G")
- Filenames=c(Filenames,paste(path,Name2,name,"_gene_nus.tab",sep=""))
- Tablenames=c(Tablenames,paste(Name2,name,sep=""))
- }
-
-
-names(Data)=Tablenames
-for (file in 1:8)
- {
- temp=read.table(Filenames[file],header=T)
- temp2=as.matrix(temp[-1])
- rownames(temp2)=as.vector(as.matrix(temp[1]))
- Data[[file]]=temp2
- }
- Data
- }
-}
-% Add one or more standard keywords, see file 'KEYWORDS' in the
-% R documentation directory.
-\keyword{ ~kwd1 }
-\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
+++ /dev/null
-\name{GetMultiPP}
-\alias{GetMultiPP}
-%- Also NEED an '\alias' for EACH other topic documented here.
-\title{
-Generate the Posterior Probability of each transcript.
-}
-\description{
-Generate the Posterior Probability of being each pattern of each transcript based on the EBMultiTest output.
-}
-\usage{
-GetMultiPP(EBout)
-}
-%- maybe also 'usage' for other objects documented here.
-\arguments{
- \item{EBout}{
-The output of EBMultiTest function.
-}
-
-}
-\details{
-%% ~~ If necessary, more details than the description above ~~
-}
-\value{
-\item{PP}{The poster probabilities of being each pattern.}
-\item{MAP}{The most likely pattern each gene belongs to}
-\item{Patterns}{The Patterns}
-}
-\references{
-}
-\author{
-Ning Leng
-}
-
-%% ~Make other sections like Warning with \section{Warning }{....} ~
-
-\seealso{
-}
-\examples{
-##---- Should be DIRECTLY executable !! ----
-##-- ==> Define data, use random,
-##-- or do help(data=index) for the standard data sets.
-
-## The function is currently defined as
-}
-% Add one or more standard keywords, see file 'KEYWORDS' in the
-% R documentation directory.
-\keyword{ Posterior Probability }
+++ /dev/null
-\name{GetNg}
-\alias{GetNg}
-%- Also NEED an '\alias' for EACH other topic documented here.
-\title{
-Generate the Ng vector
-}
-\description{
-Generate the Ng vector for the isoforms
-}
-\usage{
-GetNg(IsoformName, GeneName)
-}
-%- maybe also 'usage' for other objects documented here.
-\arguments{
- \item{IsoformName}{
-A vector contains the isoform names
-}
- \item{GeneName}{
-The gene names of the isoforms in IsoformNames. (Should be in the same order)
- }
-
-}
-\details{
-%% ~~ If necessary, more details than the description above ~~
-}
-\value{
- \item{GeneNg}{
- The number of isoforms each gene contains
- }
- \item{GeneNgTrun}{
- The truncated Ng of each gene. (The genes contain more than 3 isoforms are with Ng 3.)
- }
- \item{IsoformNg}{
- The Ng of each isoform
- }
- \item{IsoformNgTrun}{
- The truncated Ng of each isoform.
- }
-
-
-}
-\references{
-}
-\author{
-Ning Leng
-}
-
-%% ~Make other sections like Warning with \section{Warning }{....} ~
-
-\seealso{
-}
-\examples{
-IsoGenerate=IsoSimu(DVDconstant=NULL, DVDqt1=.97, DVDqt2=.98, Conditions=as.factor(rep(c(1,2),each=5)), NumofSample=10, NumofIso=c(1000,2000,3000), DEIsoProp=.1, Phiconstant=NULL, Phi.qt1=.25, Phi.qt2=.75, OnlyData=T )
-
-IsoMat=do.call(rbind,IsoGenerate$data)
-IsoNames=rownames(IsoMat)
-
-Ngvector=GetNg(IsoNames, IsosGeneNames)
-
-IsoEBres=EBTest(Data=IsoMat, NgVector=IsoNgTrun, Conditions=as.factor(rep(c(1,2),each=5)),sizeFactors=rep(1,10), maxround=5)
-
-
-}
-% Add one or more standard keywords, see file 'KEYWORDS' in the
-% R documentation directory.
-\keyword{ Ng }
+++ /dev/null
-\name{GetPP}
-\alias{GetPP}
-%- Also NEED an '\alias' for EACH other topic documented here.
-\title{
-Generate the Posterior Probability of each transcript.
-}
-\description{
-Generate the Posterior Probability of being DE of each transcript based on the EBTest output.
-}
-\usage{
-GetPP(EBout)
-}
-%- maybe also 'usage' for other objects documented here.
-\arguments{
- \item{EBout}{
-The output of EBTest function.
-}
-
-}
-\details{
-%% ~~ If necessary, more details than the description above ~~
-}
-\value{
-The poster probabilities of being DE.
-}
-\references{
-}
-\author{
-Ning Leng
-}
-
-%% ~Make other sections like Warning with \section{Warning }{....} ~
-
-\seealso{
-}
-\examples{
-##---- Should be DIRECTLY executable !! ----
-##-- ==> Define data, use random,
-##-- or do help(data=index) for the standard data sets.
-
-## The function is currently defined as
-}
-% Add one or more standard keywords, see file 'KEYWORDS' in the
-% R documentation directory.
-\keyword{ Posterior Probability }
+++ /dev/null
-\name{GetPatterns}
-\alias{GetPatterns}
-%- Also NEED an '\alias' for EACH other topic documented here.
-\title{
-Generate all possible patterns in multiple condtion study
-}
-\description{
-Generate all possible patterns in multiple condtion study
-}
-\usage{
-GetPatterns(Conditions)
-}
-%- maybe also 'usage' for other objects documented here.
-\arguments{
- \item{Conditions}{
-The names of the Conditions in the study
-}
-
-}
-\details{
-%% ~~ If necessary, more details than the description above ~~
-}
-\value{
-A matrix describe all possible patterns.
-
-}
-\references{
-}
-\author{
-Ning Leng
-}
-
-%% ~Make other sections like Warning with \section{Warning }{....} ~
-
-\seealso{
-}
-\examples{
-##---- Should be DIRECTLY executable !! ----
-##-- ==> Define data, use random,
-##-- or do help(data=index) for the standard data sets.
-
-## The function is currently defined as
-}
-% Add one or more standard keywords, see file 'KEYWORDS' in the
-% R documentation directory.
-\keyword{ }
+++ /dev/null
-\name{IsoEBresultGouldBart2}
-\alias{IsoEBresultGouldBart2}
-\docType{data}
-\title{
-The EBSeq result of the empirical isoform data ( Gould Lab data, bart2 )
-}
-\description{
-%% ~~ A concise (1-5 lines) description of the dataset. ~~
-}
-\usage{data(IsoEBresultGouldBart2)}
-\format{
- The format is:
-List of 17
- $ Alpha : num [1:5, 1] 0.49 0.674 0.735 0.739 0.739
- ..- attr(*, "dimnames")=List of 2
- .. ..$ : chr [1:5] "AlphaIn" "AlphaIn" "AlphaIn" "AlphaIn" ...
- .. ..$ : NULL
- $ Beta : num [1:5, 1:9] 1.03 1.3 1.4 1.41 1.41 ...
- ..- attr(*, "dimnames")=List of 2
- .. ..$ : chr [1:5] "BetaIn" "BetaIn" "BetaIn" "BetaIn" ...
- .. ..$ : NULL
- $ P : num [1:5, 1] 0.1751 0.0955 0.073 0.066 0.0642
- ..- attr(*, "dimnames")=List of 2
- .. ..$ : chr [1:5] "PIn" "PIn" "PIn" "PIn" ...
- .. ..$ : NULL
- $ PFromZ : num [1:5, 1] 0.1878 0.0937 0.0736 0.0662 0.0634
- ..- attr(*, "dimnames")=List of 2
- .. ..$ : chr [1:5] "PFromZ" "PFromZ" "PFromZ" "PFromZ" ...
- .. ..$ : NULL
- $ Z : Named num [1:19249] 0.00494 0.00349 0.00219 0.72998 0.00593 ...
- ..- attr(*, "names")= chr [1:19249] "ENSRNOT00000020358" "ENSRNOT00000020388" "ENSRNOT00000056429" "ENSRNOT00000037482" ...
- $ PoissonZ: Named num [1:6019] 0.001395 0.00111 0.00078 0.000551 0.00111 ...
- ..- attr(*, "names")= chr [1:6019] "ENSRNOT00000029207" "ENSRNOT00000059839" "ENSRNOT00000056154" "ENSRNOT00000059835" ...
- $ RList :List of 9
- ..$ : Named num [1:15315] 19.03 62.06 -3.08 313.15 207.39 ...
- .. ..- attr(*, "names")= chr [1:15315] "ENSRNOT00000020358" "ENSRNOT00000020388" "ENSRNOT00000029207" "ENSRNOT00000056429" ...
- ..$ : Named num [1:1103] 3.369 46.691 0.194 6.79 0.541 ...
- .. ..- attr(*, "names")= chr [1:1103] "ENSRNOT00000059696" "ENSRNOT00000046399" "ENSRNOT00000056154" "ENSRNOT00000020379" ...
- ..$ : Named num [1:404] 94.298 -733.445 -0.391 1.102 -3.223 ...
- .. ..- attr(*, "names")= chr [1:404] "ENSRNOT00000044782" "ENSRNOT00000038046" "ENSRNOT00000052405" "ENSRNOT00000048994" ...
- ..$ : Named num [1:999] 3 7.46 6.32 -2.5 119.32 ...
- .. ..- attr(*, "names")= chr [1:999] "ENSRNOT00000004311" "ENSRNOT00000064540" "ENSRNOT00000059572" "ENSRNOT00000020652" ...
- ..$ : Named num [1:592] 11.168 0.167 0.296 0.882 20.272 ...
- .. ..- attr(*, "names")= chr [1:592] "ENSRNOT00000020305" "ENSRNOT00000031749" "ENSRNOT00000037178" "ENSRNOT00000042240" ...
- ..$ : Named num [1:863] 2.456 7.899 25.052 0.177 -0.579 ...
- .. ..- attr(*, "names")= chr [1:863] "ENSRNOT00000020323" "ENSRNOT00000004326" "ENSRNOT00000004290" "ENSRNOT00000067476" ...
- ..$ : Named num [1:490] 5.64 45.35 -16.06 -31.73 1.76 ...
- .. ..- attr(*, "names")= chr [1:490] "ENSRNOT00000048345" "ENSRNOT00000066009" "ENSRNOT00000063985" "ENSRNOT00000068068" ...
- ..$ : Named num [1:3943] 260.79 1.632 0.719 2.843 0.553 ...
- .. ..- attr(*, "names")= chr [1:3943] "ENSRNOT00000020405" "ENSRNOT00000038103" "ENSRNOT00000067285" "ENSRNOT00000034889" ...
- ..$ : Named num [1:1559] 7.43 1.85 2.14 60.4 20.51 ...
- .. ..- attr(*, "names")= chr [1:1559] "ENSRNOT00000004486" "ENSRNOT00000060705" "ENSRNOT00000043951" "ENSRNOT00000020770" ...
- $ MeanList:List of 9
- ..$ : Named num [1:15315] 288.018 300.77 0.396 97.251 105.428 ...
- .. ..- attr(*, "names")= chr [1:15315] "ENSRNOT00000020358" "ENSRNOT00000020388" "ENSRNOT00000029207" "ENSRNOT00000056429" ...
- ..$ : Named num [1:1103] 1.616 3442.78 5.275 30.388 0.253 ...
- .. ..- attr(*, "names")= chr [1:1103] "ENSRNOT00000059696" "ENSRNOT00000046399" "ENSRNOT00000056154" "ENSRNOT00000020379" ...
- ..$ : Named num [1:404] 988.128 128.831 0.105 0.759 0.502 ...
- .. ..- attr(*, "names")= chr [1:404] "ENSRNOT00000044782" "ENSRNOT00000038046" "ENSRNOT00000052405" "ENSRNOT00000048994" ...
- ..$ : Named num [1:999] 9.19 706.27 205.21 1.52 3715.53 ...
- .. ..- attr(*, "names")= chr [1:999] "ENSRNOT00000004311" "ENSRNOT00000064540" "ENSRNOT00000059572" "ENSRNOT00000020652" ...
- ..$ : Named num [1:592] 67.12 3.05 1.13 3.09 14.03 ...
- .. ..- attr(*, "names")= chr [1:592] "ENSRNOT00000020305" "ENSRNOT00000031749" "ENSRNOT00000037178" "ENSRNOT00000042240" ...
- ..$ : Named num [1:863] 36.175 52.99 2224.885 0.732 0.253 ...
- .. ..- attr(*, "names")= chr [1:863] "ENSRNOT00000020323" "ENSRNOT00000004326" "ENSRNOT00000004290" "ENSRNOT00000067476" ...
- ..$ : Named num [1:490] 6.71 187.77 2.14 3.97 63.38 ...
- .. ..- attr(*, "names")= chr [1:490] "ENSRNOT00000048345" "ENSRNOT00000066009" "ENSRNOT00000063985" "ENSRNOT00000068068" ...
- ..$ : Named num [1:3943] 217.38 3.72 31.38 93.58 7.63 ...
- .. ..- attr(*, "names")= chr [1:3943] "ENSRNOT00000020405" "ENSRNOT00000038103" "ENSRNOT00000067285" "ENSRNOT00000034889" ...
- ..$ : Named num [1:1559] 28.6 32.2 39.2 1275.1 750.5 ...
- .. ..- attr(*, "names")= chr [1:1559] "ENSRNOT00000004486" "ENSRNOT00000060705" "ENSRNOT00000043951" "ENSRNOT00000020770" ...
- $ VarList :List of 9
- ..$ : Named num [1:15315] 5729.745 1929.857 0.593 148.349 505.122 ...
- .. ..- attr(*, "names")= chr [1:15315] "ENSRNOT00000020358" "ENSRNOT00000020388" "ENSRNOT00000029207" "ENSRNOT00000056429" ...
- ..$ : Named num [1:1103] 4.69 2.81e+05 2.23e+02 2.21e+02 5.13e-01 ...
- .. ..- attr(*, "names")= chr [1:1103] "ENSRNOT00000059696" "ENSRNOT00000046399" "ENSRNOT00000056154" "ENSRNOT00000020379" ...
- ..$ : Named num [1:404] 1.16e+04 1.22e+02 8.75e-02 1.98 5.80e-01 ...
- .. ..- attr(*, "names")= chr [1:404] "ENSRNOT00000044782" "ENSRNOT00000038046" "ENSRNOT00000052405" "ENSRNOT00000048994" ...
- ..$ : Named num [1:999] 4.99e+01 7.80e+04 1.80e+04 7.05e-01 1.91e+05 ...
- .. ..- attr(*, "names")= chr [1:999] "ENSRNOT00000004311" "ENSRNOT00000064540" "ENSRNOT00000059572" "ENSRNOT00000020652" ...
- ..$ : Named num [1:592] 580.6 74.5 7.8 18.5 26.7 ...
- .. ..- attr(*, "names")= chr [1:592] "ENSRNOT00000020305" "ENSRNOT00000031749" "ENSRNOT00000037178" "ENSRNOT00000042240" ...
- ..$ : Named num [1:863] 6.10e+02 6.30e+02 2.69e+05 4.29 2.20e-01 ...
- .. ..- attr(*, "names")= chr [1:863] "ENSRNOT00000020323" "ENSRNOT00000004326" "ENSRNOT00000004290" "ENSRNOT00000067476" ...
- ..$ : Named num [1:490] 15.48 1816.69 3.33 5.48 2683.89 ...
- .. ..- attr(*, "names")= chr [1:490] "ENSRNOT00000048345" "ENSRNOT00000066009" "ENSRNOT00000063985" "ENSRNOT00000068068" ...
- ..$ : Named num [1:3943] 805.4 27.7 2024.4 4101.5 139 ...
- .. ..- attr(*, "names")= chr [1:3943] "ENSRNOT00000020405" "ENSRNOT00000038103" "ENSRNOT00000067285" "ENSRNOT00000034889" ...
- ..$ : Named num [1:1559] 761 1398 854 34973 31500 ...
- .. ..- attr(*, "names")= chr [1:1559] "ENSRNOT00000004486" "ENSRNOT00000060705" "ENSRNOT00000043951" "ENSRNOT00000020770" ...
- $ QList1 :List of 9
- ..$ : Named num [1:15315] 0.191 0.153 NaN 0.477 1.171 ...
- .. ..- attr(*, "names")= chr [1:15315] "ENSRNOT00000020358" "ENSRNOT00000020388" "ENSRNOT00000029207" "ENSRNOT00000056429" ...
- ..$ : Named num [1:1103] 0.648 0.0134 NaN 0.6744 NaN ...
- .. ..- attr(*, "names")= chr [1:1103] "ENSRNOT00000059696" "ENSRNOT00000046399" "ENSRNOT00000056154" "ENSRNOT00000020379" ...
- ..$ : Named num [1:404] 0.0443 0.9225 1.3649 0.592 1.2961 ...
- .. ..- attr(*, "names")= chr [1:404] "ENSRNOT00000044782" "ENSRNOT00000038046" "ENSRNOT00000052405" "ENSRNOT00000048994" ...
- ..$ : Named num [1:999] 0.13755 0.00968 0.0695 1.6441 0.05331 ...
- .. ..- attr(*, "names")= chr [1:999] "ENSRNOT00000004311" "ENSRNOT00000064540" "ENSRNOT00000059572" "ENSRNOT00000020652" ...
- ..$ : Named num [1:592] 0.0837 0.0518 1.2634 0.1805 0.5577 ...
- .. ..- attr(*, "names")= chr [1:592] "ENSRNOT00000020305" "ENSRNOT00000031749" "ENSRNOT00000037178" "ENSRNOT00000042240" ...
- ..$ : Named num [1:863] 0.0655 0.1031 0.0109 0.195 1.776 ...
- .. ..- attr(*, "names")= chr [1:863] "ENSRNOT00000020323" "ENSRNOT00000004326" "ENSRNOT00000004290" "ENSRNOT00000067476" ...
- ..$ : Named num [1:490] 0.3176 0.1285 1.6778 2.0836 0.0221 ...
- .. ..- attr(*, "names")= chr [1:490] "ENSRNOT00000048345" "ENSRNOT00000066009" "ENSRNOT00000063985" "ENSRNOT00000068068" ...
- ..$ : Named num [1:3943] 19.8858 0.3047 0.563 0.1257 0.0614 ...
- .. ..- attr(*, "names")= chr [1:3943] "ENSRNOT00000020405" "ENSRNOT00000038103" "ENSRNOT00000067285" "ENSRNOT00000034889" ...
- ..$ : Named num [1:1559] 0.0552 0.0491 0.0388 0.0374 0.0282 ...
- .. ..- attr(*, "names")= chr [1:1559] "ENSRNOT00000004486" "ENSRNOT00000060705" "ENSRNOT00000043951" "ENSRNOT00000020770" ...
- $ QList2 :List of 9
- ..$ : Named num [1:15315] 0.0388 0.1935 1.1475 1.7041 0.4143 ...
- .. ..- attr(*, "names")= chr [1:15315] "ENSRNOT00000020358" "ENSRNOT00000020388" "ENSRNOT00000029207" "ENSRNOT00000056429" ...
- ..$ : Named num [1:1103] 1.3629 0.0134 0.0354 0.1129 0.6811 ...
- .. ..- attr(*, "names")= chr [1:1103] "ENSRNOT00000059696" "ENSRNOT00000046399" "ENSRNOT00000056154" "ENSRNOT00000020379" ...
- ..$ : Named num [1:404] 3.02 1.78 NaN NaN 1.15 ...
- .. ..- attr(*, "names")= chr [1:404] "ENSRNOT00000044782" "ENSRNOT00000038046" "ENSRNOT00000052405" "ENSRNOT00000048994" ...
- ..$ : Named num [1:999] 0.4159 0.0116 0.0245 8.6195 0.0227 ...
- .. ..- attr(*, "names")= chr [1:999] "ENSRNOT00000004311" "ENSRNOT00000064540" "ENSRNOT00000059572" "ENSRNOT00000020652" ...
- ..$ : Named num [1:592] 1.264 NaN 0.187 0.246 0.632 ...
- .. ..- attr(*, "names")= chr [1:592] "ENSRNOT00000020305" "ENSRNOT00000031749" "ENSRNOT00000037178" "ENSRNOT00000042240" ...
- ..$ : Named num [1:863] 0.0613 0.1542 0.0115 NaN NaN ...
- .. ..- attr(*, "names")= chr [1:863] "ENSRNOT00000020323" "ENSRNOT00000004326" "ENSRNOT00000004290" "ENSRNOT00000067476" ...
- ..$ : Named num [1:490] 0.7444 0.3209 0.6206 0.9042 0.0366 ...
- .. ..- attr(*, "names")= chr [1:490] "ENSRNOT00000048345" "ENSRNOT00000066009" "ENSRNOT00000063985" "ENSRNOT00000068068" ...
- ..$ : Named num [1:3943] 0.2969 NaN 0.0178 0.0187 0.1135 ...
- .. ..- attr(*, "names")= chr [1:3943] "ENSRNOT00000020405" "ENSRNOT00000038103" "ENSRNOT00000067285" "ENSRNOT00000034889" ...
- ..$ : Named num [1:1559] 0.2911 0.3678 0.0834 0.0558 0.0252 ...
- .. ..- attr(*, "names")= chr [1:1559] "ENSRNOT00000004486" "ENSRNOT00000060705" "ENSRNOT00000043951" "ENSRNOT00000020770" ...
- $ C1Mean :List of 9
- ..$ : Named num [1:15315] 270.3 299 0 93.3 122.4 ...
- .. ..- attr(*, "names")= chr [1:15315] "ENSRNOT00000020358" "ENSRNOT00000020388" "ENSRNOT00000029207" "ENSRNOT00000056429" ...
- ..$ : Named num [1:1103] 2.98 3490.34 0 27.89 0 ...
- .. ..- attr(*, "names")= chr [1:1103] "ENSRNOT00000059696" "ENSRNOT00000046399" "ENSRNOT00000056154" "ENSRNOT00000020379" ...
- ..$ : Named num [1:404] 991.231 129.543 0.209 1.518 0.244 ...
- .. ..- attr(*, "names")= chr [1:404] "ENSRNOT00000044782" "ENSRNOT00000038046" "ENSRNOT00000052405" "ENSRNOT00000048994" ...
- ..$ : Named num [1:999] 6.25 775.11 114.76 1.7 3505.16 ...
- .. ..- attr(*, "names")= chr [1:999] "ENSRNOT00000004311" "ENSRNOT00000064540" "ENSRNOT00000059572" "ENSRNOT00000020652" ...
- ..$ : Named num [1:592] 74.812 6.103 0.262 1.834 14.543 ...
- .. ..- attr(*, "names")= chr [1:592] "ENSRNOT00000020305" "ENSRNOT00000031749" "ENSRNOT00000037178" "ENSRNOT00000042240" ...
- ..$ : Named num [1:863] 40.039 40.329 2394.87 1.464 0.506 ...
- .. ..- attr(*, "names")= chr [1:863] "ENSRNOT00000020323" "ENSRNOT00000004326" "ENSRNOT00000004290" "ENSRNOT00000067476" ...
- ..$ : Named num [1:490] 6.29 162.86 3.14 2.93 68.98 ...
- .. ..- attr(*, "names")= chr [1:490] "ENSRNOT00000048345" "ENSRNOT00000066009" "ENSRNOT00000063985" "ENSRNOT00000068068" ...
- ..$ : Named num [1:3943] 201.06 7.45 13.4 80.08 12.23 ...
- .. ..- attr(*, "names")= chr [1:3943] "ENSRNOT00000020405" "ENSRNOT00000038103" "ENSRNOT00000067285" "ENSRNOT00000034889" ...
- ..$ : Named num [1:1559] 5.5 57.4 41.7 1213.3 749.1 ...
- .. ..- attr(*, "names")= chr [1:1559] "ENSRNOT00000004486" "ENSRNOT00000060705" "ENSRNOT00000043951" "ENSRNOT00000020770" ...
- $ C2Mean :List of 9
- ..$ : Named num [1:15315] 305.699 302.587 0.792 101.229 88.447 ...
- .. ..- attr(*, "names")= chr [1:15315] "ENSRNOT00000020358" "ENSRNOT00000020388" "ENSRNOT00000029207" "ENSRNOT00000056429" ...
- ..$ : Named num [1:1103] 0.253 3395.219 10.551 32.889 0.507 ...
- .. ..- attr(*, "names")= chr [1:1103] "ENSRNOT00000059696" "ENSRNOT00000046399" "ENSRNOT00000056154" "ENSRNOT00000020379" ...
- ..$ : Named num [1:404] 985.025 128.12 0 0 0.759 ...
- .. ..- attr(*, "names")= chr [1:404] "ENSRNOT00000044782" "ENSRNOT00000038046" "ENSRNOT00000052405" "ENSRNOT00000048994" ...
- ..$ : Named num [1:999] 12.13 637.43 295.65 1.34 3925.9 ...
- .. ..- attr(*, "names")= chr [1:999] "ENSRNOT00000004311" "ENSRNOT00000064540" "ENSRNOT00000059572" "ENSRNOT00000020652" ...
- ..$ : Named num [1:592] 59.42 0 2 4.35 13.51 ...
- .. ..- attr(*, "names")= chr [1:592] "ENSRNOT00000020305" "ENSRNOT00000031749" "ENSRNOT00000037178" "ENSRNOT00000042240" ...
- ..$ : Named num [1:863] 32.3 65.6 2054.9 0 0 ...
- .. ..- attr(*, "names")= chr [1:863] "ENSRNOT00000020323" "ENSRNOT00000004326" "ENSRNOT00000004290" "ENSRNOT00000067476" ...
- ..$ : Named num [1:490] 7.13 212.67 1.14 5.01 57.77 ...
- .. ..- attr(*, "names")= chr [1:490] "ENSRNOT00000048345" "ENSRNOT00000066009" "ENSRNOT00000063985" "ENSRNOT00000068068" ...
- ..$ : Named num [1:3943] 233.7 0 49.37 107.08 3.04 ...
- .. ..- attr(*, "names")= chr [1:3943] "ENSRNOT00000020405" "ENSRNOT00000038103" "ENSRNOT00000067285" "ENSRNOT00000034889" ...
- ..$ : Named num [1:1559] 51.68 7.12 36.79 1336.85 751.85 ...
- .. ..- attr(*, "names")= chr [1:1559] "ENSRNOT00000004486" "ENSRNOT00000060705" "ENSRNOT00000043951" "ENSRNOT00000020770" ...
- $ C1EstVar:List of 9
- ..$ : Named num [1:15315] 1413 1953 0 195 105 ...
- .. ..- attr(*, "names")= chr [1:15315] "ENSRNOT00000020358" "ENSRNOT00000020388" "ENSRNOT00000029207" "ENSRNOT00000056429" ...
- ..$ : Named num [1:1103] 4.6 261211.7 0 41.4 0 ...
- .. ..- attr(*, "names")= chr [1:1103] "ENSRNOT00000059696" "ENSRNOT00000046399" "ENSRNOT00000056154" "ENSRNOT00000020379" ...
- ..$ : Named num [1:404] 2.24e+04 1.40e+02 1.53e-01 2.56 1.88e-01 ...
- .. ..- attr(*, "names")= chr [1:404] "ENSRNOT00000044782" "ENSRNOT00000038046" "ENSRNOT00000052405" "ENSRNOT00000048994" ...
- ..$ : Named num [1:999] 45.4 80103.39 1651.21 1.04 65751.73 ...
- .. ..- attr(*, "names")= chr [1:999] "ENSRNOT00000004311" "ENSRNOT00000064540" "ENSRNOT00000059572" "ENSRNOT00000020652" ...
- ..$ : Named num [1:592] 893.893 117.714 0.207 10.162 26.076 ...
- .. ..- attr(*, "names")= chr [1:592] "ENSRNOT00000020305" "ENSRNOT00000031749" "ENSRNOT00000037178" "ENSRNOT00000042240" ...
- ..$ : Named num [1:863] 6.11e+02 3.91e+02 2.20e+05 7.51 2.85e-01 ...
- .. ..- attr(*, "names")= chr [1:863] "ENSRNOT00000020323" "ENSRNOT00000004326" "ENSRNOT00000004290" "ENSRNOT00000067476" ...
- ..$ : Named num [1:490] 19.81 1267.78 1.87 1.41 3123.1 ...
- .. ..- attr(*, "names")= chr [1:490] "ENSRNOT00000048345" "ENSRNOT00000066009" "ENSRNOT00000063985" "ENSRNOT00000068068" ...
- ..$ : Named num [1:3943] 10.1 24.5 23.8 637 199 ...
- .. ..- attr(*, "names")= chr [1:3943] "ENSRNOT00000020405" "ENSRNOT00000038103" "ENSRNOT00000067285" "ENSRNOT00000034889" ...
- ..$ : Named num [1:1559] 99.7 1167.7 1072.9 32440 26599 ...
- .. ..- attr(*, "names")= chr [1:1559] "ENSRNOT00000004486" "ENSRNOT00000060705" "ENSRNOT00000043951" "ENSRNOT00000020770" ...
- $ C2EstVar:List of 9
- ..$ : Named num [1:15315] 7882.46 1563.99 0.69 59.4 213.5 ...
- .. ..- attr(*, "names")= chr [1:15315] "ENSRNOT00000020358" "ENSRNOT00000020388" "ENSRNOT00000029207" "ENSRNOT00000056429" ...
- ..$ : Named num [1:1103] 1.86e-01 2.53e+05 2.98e+02 2.91e+02 7.44e-01 ...
- .. ..- attr(*, "names")= chr [1:1103] "ENSRNOT00000059696" "ENSRNOT00000046399" "ENSRNOT00000056154" "ENSRNOT00000020379" ...
- ..$ : Named num [1:404] 325.872 71.975 0 0 0.659 ...
- .. ..- attr(*, "names")= chr [1:404] "ENSRNOT00000044782" "ENSRNOT00000038046" "ENSRNOT00000052405" "ENSRNOT00000048994" ...
- ..$ : Named num [1:999] 2.92e+01 5.51e+04 1.21e+04 1.55e-01 1.73e+05 ...
- .. ..- attr(*, "names")= chr [1:999] "ENSRNOT00000004311" "ENSRNOT00000064540" "ENSRNOT00000059572" "ENSRNOT00000020652" ...
- ..$ : Named num [1:592] 47 0 10.7 17.7 21.4 ...
- .. ..- attr(*, "names")= chr [1:592] "ENSRNOT00000020305" "ENSRNOT00000031749" "ENSRNOT00000037178" "ENSRNOT00000042240" ...
- ..$ : Named num [1:863] 527 426 179461 0 0 ...
- .. ..- attr(*, "names")= chr [1:863] "ENSRNOT00000020323" "ENSRNOT00000004326" "ENSRNOT00000004290" "ENSRNOT00000067476" ...
- ..$ : Named num [1:490] 9.58 662.66 1.84 5.54 1579.26 ...
- .. ..- attr(*, "names")= chr [1:490] "ENSRNOT00000048345" "ENSRNOT00000066009" "ENSRNOT00000063985" "ENSRNOT00000068068" ...
- ..$ : Named num [1:3943] 787 0 2780 5712.2 26.8 ...
- .. ..- attr(*, "names")= chr [1:3943] "ENSRNOT00000020405" "ENSRNOT00000038103" "ENSRNOT00000067285" "ENSRNOT00000034889" ...
- ..$ : Named num [1:1559] 177.6 19.4 441.2 23947.8 29826.6 ...
- .. ..- attr(*, "names")= chr [1:1559] "ENSRNOT00000004486" "ENSRNOT00000060705" "ENSRNOT00000043951" "ENSRNOT00000020770" ...
- $ PoolVar :List of 9
- ..$ : Named num [1:15315] 4647.958 1758.495 0.345 127.452 159.023 ...
- .. ..- attr(*, "names")= chr [1:15315] "ENSRNOT00000020358" "ENSRNOT00000020388" "ENSRNOT00000029207" "ENSRNOT00000056429" ...
- ..$ : Named num [1:1103] 2.39 2.57e+05 1.49e+02 1.66e+02 3.72e-01 ...
- .. ..- attr(*, "names")= chr [1:1103] "ENSRNOT00000059696" "ENSRNOT00000046399" "ENSRNOT00000056154" "ENSRNOT00000020379" ...
- ..$ : Named num [1:404] 1.13e+04 1.06e+02 7.66e-02 1.28 4.23e-01 ...
- .. ..- attr(*, "names")= chr [1:404] "ENSRNOT00000044782" "ENSRNOT00000038046" "ENSRNOT00000052405" "ENSRNOT00000048994" ...
- ..$ : Named num [1:999] 3.73e+01 6.76e+04 6.87e+03 5.96e-01 1.19e+05 ...
- .. ..- attr(*, "names")= chr [1:999] "ENSRNOT00000004311" "ENSRNOT00000064540" "ENSRNOT00000059572" "ENSRNOT00000020652" ...
- ..$ : Named num [1:592] 470.45 58.86 5.44 13.94 23.73 ...
- .. ..- attr(*, "names")= chr [1:592] "ENSRNOT00000020305" "ENSRNOT00000031749" "ENSRNOT00000037178" "ENSRNOT00000042240" ...
- ..$ : Named num [1:863] 5.69e+02 4.08e+02 2.00e+05 3.75 1.42e-01 ...
- .. ..- attr(*, "names")= chr [1:863] "ENSRNOT00000020323" "ENSRNOT00000004326" "ENSRNOT00000004290" "ENSRNOT00000067476" ...
- ..$ : Named num [1:490] 14.69 965.22 1.86 3.48 2351.18 ...
- .. ..- attr(*, "names")= chr [1:490] "ENSRNOT00000048345" "ENSRNOT00000066009" "ENSRNOT00000063985" "ENSRNOT00000068068" ...
- ..$ : Named num [1:3943] 398.6 12.2 1401.9 3174.6 112.9 ...
- .. ..- attr(*, "names")= chr [1:3943] "ENSRNOT00000020405" "ENSRNOT00000038103" "ENSRNOT00000067285" "ENSRNOT00000034889" ...
- ..$ : Named num [1:1559] 139 594 757 28194 28213 ...
- .. ..- attr(*, "names")= chr [1:1559] "ENSRNOT00000004486" "ENSRNOT00000060705" "ENSRNOT00000043951" "ENSRNOT00000020770" ...
- $ DataList:List of 9
- ..$ Ng1 : num [1:15315, 1:8] 287 251 0 87 121 181 5 195 70 5 ...
- .. ..- attr(*, "dimnames")=List of 2
- .. .. ..$ : chr [1:15315] "I1" "I2" "I3" "I4" ...
- .. .. ..$ : chr [1:8] "W5-D1" "W5-D2" "W5-D3" "W5-D4" ...
- ..$ Ng2No5No3: num [1:1103, 1:8] 3 3226 0 27 0 ...
- .. ..- attr(*, "dimnames")=List of 2
- .. .. ..$ : chr [1:1103] "I14" "I15" "I16" "I66" ...
- .. .. ..$ : chr [1:8] "W5-D1" "W5-D2" "W5-D3" "W5-D4" ...
- ..$ Ng3No5No3: num [1:404, 1:8] 827 153 0 3 1 0 0 0 2 19 ...
- .. ..- attr(*, "dimnames")=List of 2
- .. .. ..$ : chr [1:404] "I138" "I190" "I191" "I214" ...
- .. .. ..$ : chr [1:8] "W5-D1" "W5-D2" "W5-D3" "W5-D4" ...
- ..$ Ng2With3 : num [1:999, 1:8] 0 945 77 2 3763 ...
- .. ..- attr(*, "dimnames")=List of 2
- .. .. ..$ : chr [1:999] "I35" "I52" "I79" "I91" ...
- .. .. ..$ : chr [1:8] "W5-D1" "W5-D2" "W5-D3" "W5-D4" ...
- ..$ Ng3With3 : num [1:592, 1:8] 25 25 0 0 17 ...
- .. ..- attr(*, "dimnames")=List of 2
- .. .. ..$ : chr [1:592] "I131" "I132" "I222" "I266" ...
- .. .. ..$ : chr [1:8] "W5-D1" "W5-D2" "W5-D3" "W5-D4" ...
- ..$ Ng2With5 : num [1:863, 1:8] 36 48 1912 0 1 ...
- .. ..- attr(*, "dimnames")=List of 2
- .. .. ..$ : chr [1:863] "I111" "I118" "I135" "I193" ...
- .. .. ..$ : chr [1:8] "W5-D1" "W5-D2" "W5-D3" "W5-D4" ...
- ..$ Ng3With5 : num [1:490, 1:8] 3 212 5 2 90 5 256 66 21 23 ...
- .. ..- attr(*, "dimnames")=List of 2
- .. .. ..$ : chr [1:490] "I43" "I213" "I336" "I556" ...
- .. .. ..$ : chr [1:8] "W5-D1" "W5-D2" "W5-D3" "W5-D4" ...
- ..$ Ng2Both : num [1:3943, 1:8] 209 11 17 101 0 432 631 0 228 878 ...
- .. ..- attr(*, "dimnames")=List of 2
- .. .. ..$ : chr [1:3943] "I13" "I34" "I46" "I47" ...
- .. .. ..$ : chr [1:8] "W5-D1" "W5-D2" "W5-D3" "W5-D4" ...
- ..$ Ng3Both : num [1:1559, 1:8] 0 88 25 1455 506 ...
- .. ..- attr(*, "dimnames")=List of 2
- .. .. ..$ : chr [1:1559] "I28" "I29" "I30" "I41" ...
- .. .. ..$ : chr [1:8] "W5-D1" "W5-D2" "W5-D3" "W5-D4" ...
-}
-\details{
-%% ~~ If necessary, more details than the __description__ above ~~
-}
-\source{
-%% ~~ reference to a publication or URL from which the data were obtained ~~
-}
-\references{
-%% ~~ possibly secondary sources and usages ~~
-}
-\examples{
-data(IsoEBresultGouldBart2)
-## maybe str(IsoEBresultGouldBart2) ; plot(IsoEBresultGouldBart2) ...
-}
-\keyword{datasets}
+++ /dev/null
-\name{IsoSimu}
-\alias{IsoSimu}
-%- Also NEED an '\alias' for EACH other topic documented here.
-\title{
-Isoform level simulation
-}
-\description{
-Simulate isoform level expression data from a Negative Binomial assumption. (Without outliers)
-}
-\usage{
-IsoSimu(DVDconstant = NULL, DVDqt1 = NULL, DVDqt2 = NULL, Conditions, NumofSample, NumofIso = NULL, DEIsoProp, Phiconstant = NULL, Phi.qt1 = NULL, Phi.qt2 = NULL, NormFactor = NULL, OnlyData = T)
-}
-%- maybe also 'usage' for other objects documented here.
-\arguments{
- \item{DVDconstant}{
-Whether want to use constant fold change value for all the DE genes.
-}
- \item{DVDqt1, DVDqt2}{
-If DVDconstant is not specified, the user could use a range of empirical DVD's f
-rom Gould' data. The lower and upper bound ( quantile) could be specified.
-The suggested value is c(.96, .97). DVD for each gene will be randomly choosed within the range.
-}
- \item{Conditions}{
-A vector of charecters to show each sample's condition.
-(Only the two-condition case is supported now)
-}
- \item{NumofSample}{
-Number of samples the user want to generate.
-}
- \item{NumofIso}{
-Input should be a vector with length 3. All values should be non-negative.
-The ith value represents how many isoforms the user want to generate for isoform group i.
-}
- \item{DEIsoProp}{
-The proportion of isoforms to be generated as DE. The value should be in [0, 1].
-}
- \item{Phiconstant}{
-Whether set the disperse parameter phi to be a constant. If this parameter is specified, the settings of Phi.qt1 and Phi.qt2 will be ignored.
-Input should be a vector with length 3. The ith value indicates the overdisperse parameter of isoform group i.
-}
- \item{Phi.qt1, Phi.qt2}{
-If Phiconstant is not specified, the user could use a range of empirical phi's from each group of Gould' data. The lower and upper bound ( quantile) could be specified.
-The suggested value is c(.25, .75). phi for each gene will be randomly choosed w
-ithin the range.
-
-}
- \item{NormFactor}{
-Wether set the mean of each isoform to be a constant.
-}
- \item{OnlyData}{
-Wether the user only want the generated data matrix. If OnlyData = T, the function will return the simulated matrix
-and the name of the DE genes.
-Otherwise the funtion will run DESeq, EBSeq, edgeR, baySeq and BBSeq and provide the results of each method.
-Currently only OnlyData=T is supported
-}
-}
-\details{
-For each isoform, we assumed that the expression follows a Negative Binomial distribution with mean mu_gi and variance mu_gi * (1 + mu_gi * phi_gi).
-For DE genes, we assumed that in one condition the genes are with mean mu_gi * DVD.
-mu, phi and DVD could be specified by the parameter settings.
-
-}
-\value{
-\item{data}{
-A list of expression values will be generated.
-Each list represents a group of isoforms.
-Group1: Ng1
-Group2: Ng2
-Group3: Ng3
-The rows refer to the isoforms and the columns are the samples.
-The isoforms are named "I_GroupNumber_IsoformNumber". The first part of the isoforms of each group will be the DE ones. (The number depends on the DEIsoProp parameter.)
-}
-\item{TrueDE}{The names of the isoforms who are defined to be DE.}
-
-}
-\references{
-%% ~put references to the literature/web site here ~
-}
-\author{
-Ning Leng
-}
-\note{
-%% ~~further notes~~
-}
-
-%% ~Make other sections like Warning with \section{Warning }{....} ~
-
-\seealso{
-GeneSimu
-}
-\examples{
-##---- Should be DIRECTLY executable !! ----
-##-- ==> Define data, use random,
-##-- or do help(data=index) for the standard data sets.
-IsoGenerate=IsoSimu(DVDconstant=NULL, DVDqt1=.97, DVDqt2=.98, Conditions=as.factor(rep(c(1,2),each=5)), NumofSample=10, NumofIso=NULL, DEIsoProp=.1, Phiconstant=NULL, Phi.qt1=.25, Phi.qt2=.75, OnlyData=T )
-
-}
-% Add one or more standard keywords, see file 'KEYWORDS' in the
-% R documentation directory.
-\keyword{ ~kwd1 }
-\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
+++ /dev/null
-\name{IsoSimuAt}
-\alias{IsoSimuAt}
-%- Also NEED an '\alias' for EACH other topic documented here.
-\title{
-Isoform level simulation with outliers
-}
-\description{
-Simulate isoform level expression data from a Negative Binomial assumption. (With outliers)
-}
-\usage{
-IsoSimuAt(DVDconstant = NULL, DVDqt1 = NULL, DVDqt2 = NULL, Conditions, NumofSample, NumofIso = NULL, DEIsoProp, Phiconstant = NULL, Phi.qt1 = NULL, Phi.qt2 = NULL, NormFactor = NULL, OnlyData = T)
-}
-%- maybe also 'usage' for other objects documented here.
-\arguments{
- \item{DVDconstant}{
-Whether want to use constant fold change value for all the DE genes.
-}
- \item{DVDqt1, DVDqt2}{
-If DVDconstant is not specified, the user could use a range of empirical DVD's f
-rom Gould' data. The lower and upper bound ( quantile) could be specified.
-The suggested value is c(.96, .97). DVD for each gene will be randomly choosed within the range.
-}
- \item{Conditions}{
-A vector of charecters to show each sample's condition.
-(Only the two-condition case is supported now)
-}
- \item{NumofSample}{
-Number of samples the user want to generate.
-}
- \item{NumofIso}{
-Input should be a vector with length 3. All values should be non-negative.
-The ith value represents how many isoforms the user want to generate for isoform group i.
-}
- \item{DEIsoProp}{
-The proportion of isoforms to be generated as DE. The value should be in [0, 1].
-Besides, the same proportion of isoforms will be generated as EE isoforms with outlier.
-The genes will be generated as EE at first, then the count of one of the samples
-(randomly selected) will be setted as its original counts multiplied by one of (4, 6, 8, 10).
-
-}
- \item{Phiconstant}{
-Whether set the disperse parameter phi to be a constant. If this parameter is specified, the settings of Phi.qt1 and Phi.qt2 will be ignored.
-Input should be a vector with length 3. The ith value indicates the overdisperse parameter of isoform group i.
-}
- \item{Phi.qt1, Phi.qt2}{
-If Phiconstant is not specified, the user could use a range of empirical phi's from each group of Gould' data. The lower and upper bound ( quantile) could be specified.
-The suggested value is c(.25, .75). phi for each gene will be randomly choosed w
-ithin the range.
-
-}
- \item{NormFactor}{
-Wether set the mean of each isoform to be a constant.
-}
- \item{OnlyData}{
-Wether the user only want the generated data matrix. If OnlyData = T, the function will return the simulated matrix
-and the name of the DE genes.
-Otherwise the funtion will run DESeq, EBSeq, edgeR, baySeq and BBSeq and provide the results of each method.
-Currently only OnlyData=T is supported
-}
-}
-\details{
-For each isoform, we assumed that the expression follows a Negative Binomial distribution with mean mu_gi and variance mu_gi * (1 + mu_gi * phi_gi).
-For DE genes, we assumed that in one condition the genes are with mean mu_gi * DVD.
-mu, phi and DVD could be specified by the parameter settings.
-
-}
-\value{
-\item{data}{
-A list of expression values will be generated.
-Each list represents a group of isoforms.
-Group1: Ng1
-Group2: Ng2
-Group3: Ng3
-The rows refer to the isoforms and the columns are the samples.
-The isoforms are named "I_GroupNumber_IsoformNumber". The first part of the isoforms of each group will be the DE ones. (The number depends on the DEIsoProp parameter.)
-}
-\item{TrueDE}{The names of the isoforms who are defined to be DE.}
-
-\item{Outliers}{The names of the genes who are defined to be outliers at each level of (4,6,8,10).}
-}
-
-\references{
-%% ~put references to the literature/web site here ~
-}
-\author{
-Ning Leng
-}
-\note{
-%% ~~further notes~~
-}
-
-%% ~Make other sections like Warning with \section{Warning }{....} ~
-
-\seealso{
-GeneSimu, IsoSimu
-}
-\examples{
-##---- Should be DIRECTLY executable !! ----
-##-- ==> Define data, use random,
-##-- or do help(data=index) for the standard data sets.
-IsoGenerate=IsoSimuAt(DVDconstant=NULL, DVDqt1=.97, DVDqt2=.98, Conditions=as.factor(rep(c(1,2),each=5)), NumofSample=10, NumofIso=NULL, DEIsoProp=.1, Phiconstant=NULL, Phi.qt1=.25, Phi.qt2=.75, OnlyData=T )
-
-}
-% Add one or more standard keywords, see file 'KEYWORDS' in the
-% R documentation directory.
-\keyword{ ~kwd1 }
-\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
+++ /dev/null
-\name{Likefun}
-\alias{Likefun}
-%- Also NEED an '\alias' for EACH other topic documented here.
-\title{
-Likelihood Function of the NB-Beta Model
-}
-\description{
-%% ~~ A concise (1-5 lines) description of what the function does. ~~
-}
-\usage{
-Likefun(ParamPool, InputPool)
-}
-%- maybe also 'usage' for other objects documented here.
-\arguments{
- \item{ParamPool}{
-%% ~~Describe \code{ParamPool} here~~
-}
- \item{InputPool}{
-%% ~~Describe \code{InputPool} here~~
-}
-}
-\details{
-%% ~~ If necessary, more details than the description above ~~
-}
-\value{
-%% ~Describe the value returned
-%% If it is a LIST, use
-%% \item{comp1 }{Description of 'comp1'}
-%% \item{comp2 }{Description of 'comp2'}
-%% ...
-}
-\references{
-%% ~put references to the literature/web site here ~
-}
-\author{
-%% ~~who you are~~
-}
-\note{
-%% ~~further notes~~
-}
-
-%% ~Make other sections like Warning with \section{Warning }{....} ~
-
-\seealso{
-%% ~~objects to See Also as \code{\link{help}}, ~~~
-}
-\examples{
-}
-% Add one or more standard keywords, see file 'KEYWORDS' in the
-% R documentation directory.
+++ /dev/null
-\name{LikefunMulti}
-\alias{LikefunMulti}
-%- Also NEED an '\alias' for EACH other topic documented here.
-\title{
-Likelihood Function of the NB-Beta Model
-}
-\description{
-%% ~~ A concise (1-5 lines) description of what the function does. ~~
-}
-\usage{
-LikefunMulti(ParamPool, InputPool)
-}
-%- maybe also 'usage' for other objects documented here.
-\arguments{
- \item{ParamPool}{
-%% ~~Describe \code{ParamPool} here~~
-}
- \item{InputPool}{
-%% ~~Describe \code{InputPool} here~~
-}
-}
-\details{
-%% ~~ If necessary, more details than the description above ~~
-}
-\value{
-%% ~Describe the value returned
-%% If it is a LIST, use
-%% \item{comp1 }{Description of 'comp1'}
-%% \item{comp2 }{Description of 'comp2'}
-%% ...
-}
-\references{
-%% ~put references to the literature/web site here ~
-}
-\author{
-%% ~~who you are~~
-}
-\note{
-%% ~~further notes~~
-}
-
-%% ~Make other sections like Warning with \section{Warning }{....} ~
-
-\seealso{
-%% ~~objects to See Also as \code{\link{help}}, ~~~
-}
-\examples{
-}
-% Add one or more standard keywords, see file 'KEYWORDS' in the
-% R documentation directory.
+++ /dev/null
-\name{LogN}
-\alias{LogN}
-%- Also NEED an '\alias' for EACH other topic documented here.
-\title{
-The function to run EM (one round) using optim.
-}
-\description{
-%% ~~ A concise (1-5 lines) description of what the function does. ~~
-}
-\usage{
-LogN(Input, InputSP, EmpiricalR, EmpiricalRSP, NumOfEachGroup, AlphaIn, BetaIn, PIn, NoneZeroLength)
-}
-%- maybe also 'usage' for other objects documented here.
-\arguments{
- \item{Input}{
-%% ~~Describe \code{Input} here~~
-}
- \item{InputSP}{
-%% ~~Describe \code{InputSP} here~~
-}
- \item{EmpiricalR}{
-%% ~~Describe \code{EmpiricalR} here~~
-}
- \item{EmpiricalRSP}{
-%% ~~Describe \code{EmpiricalRSP} here~~
-}
- \item{NumOfEachGroup}{
-%% ~~Describe \code{NumOfEachGroup} here~~
-}
- \item{AlphaIn}{
-%% ~~Describe \code{AlphaIn} here~~
-}
- \item{BetaIn}{
-%% ~~Describe \code{BetaIn} here~~
-}
- \item{PIn}{
-%% ~~Describe \code{PIn} here~~
-}
- \item{NoneZeroLength}{
-%% ~~Describe \code{NoneZeroLength} here~~
-}
-}
-\details{
-%% ~~ If necessary, more details than the description above ~~
-}
-\value{
-%% ~Describe the value returned
-%% If it is a LIST, use
-%% \item{comp1 }{Description of 'comp1'}
-%% \item{comp2 }{Description of 'comp2'}
-%% ...
-}
-\references{
-%% ~put references to the literature/web site here ~
-}
-\author{
-%% ~~who you are~~
-}
-\note{
-%% ~~further notes~~
-}
-
-%% ~Make other sections like Warning with \section{Warning }{....} ~
-
-\seealso{
-%% ~~objects to See Also as \code{\link{help}}, ~~~
-}
-\examples{
-}
-% Add one or more standard keywords, see file 'KEYWORDS' in the
-% R documentation directory.
-\keyword{ ~kwd1 }
-\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
+++ /dev/null
-\name{LogNMulti}
-\alias{LogNMulti}
-%- Also NEED an '\alias' for EACH other topic documented here.
-\title{
-The function to run EM (one round) using optim.
-}
-\description{
-%% ~~ A concise (1-5 lines) description of what the function does. ~~
-}
-\usage{
-LogNMulti(Input, InputSP, EmpiricalR, EmpiricalRSP, NumOfEachGroup, AlphaIn, BetaIn, PIn, NoneZeroLength,AllParti, Conditions)
-}
-%- maybe also 'usage' for other objects documented here.
-\arguments{
- \item{Input}{
-%% ~~Describe \code{Input} here~~
-}
- \item{InputSP}{
-%% ~~Describe \code{InputSP} here~~
-}
- \item{EmpiricalR}{
-%% ~~Describe \code{EmpiricalR} here~~
-}
- \item{EmpiricalRSP}{
-%% ~~Describe \code{EmpiricalRSP} here~~
-}
- \item{NumOfEachGroup}{
-%% ~~Describe \code{NumOfEachGroup} here~~
-}
- \item{AlphaIn}{
-%% ~~Describe \code{AlphaIn} here~~
-}
- \item{BetaIn}{
-%% ~~Describe \code{BetaIn} here~~
-}
- \item{PIn}{
-%% ~~Describe \code{PIn} here~~
-}
- \item{NoneZeroLength}{
-%% ~~Describe \code{NoneZeroLength} here~~
-}
-}
-\details{
-%% ~~ If necessary, more details than the description above ~~
-}
-\value{
-%% ~Describe the value returned
-%% If it is a LIST, use
-%% \item{comp1 }{Description of 'comp1'}
-%% \item{comp2 }{Description of 'comp2'}
-%% ...
-}
-\references{
-%% ~put references to the literature/web site here ~
-}
-\author{
-%% ~~who you are~~
-}
-\note{
-%% ~~further notes~~
-}
-
-%% ~Make other sections like Warning with \section{Warning }{....} ~
-
-\seealso{
-%% ~~objects to See Also as \code{\link{help}}, ~~~
-}
-\examples{
-}
-% Add one or more standard keywords, see file 'KEYWORDS' in the
-% R documentation directory.
-\keyword{ ~kwd1 }
-\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
+++ /dev/null
-\name{MedianNorm}
-\alias{MedianNorm}
-\title{
-Median Normalization
-}
-\description{
-The median normalization from Anders et. al.2010
-}
-\usage{
-MedianNorm(Data)
-}
-\arguments{
-
- \item{Data}{
-The data matrix with transcripts in rows and lanes in columns.
-}
-}
-\details{
-%% ~~ If necessary, more details than the description above ~~
-}
-\value{
-The function will return a vector contains the normalization factor for each lane.
-% ...
-}
-\references{
-Simon Anders and Wolfgang Huber: Differential expression analysis for sequence count data
-Genome Biology (2010) 11:R106 (open access)
-}
-\author{
-Ning Leng
-}
-\note{
-%% ~~further notes~~
-}
-
-%% ~Make other sections like Warning with \section{Warning }{....} ~
-
-\seealso{
-%% ~~objects to See Also as \code{\link{help}}, ~~~
-}
-\examples{
-GeneGenerate=GeneSimu(DVDconstant=4, DVDqt1=NULL, DVDqt2=NULL, Conditions=rep(c(1,2),each=5), NumofSample=10, NumofGene=10000, DEGeneProp=.1, Phiconstant=NULL, Phi.qt1=.25, Phi.qt2=.75, Meanconstant=NULL, OnlyData="Y")
-GeneData=GeneGenerate$data
-
-Sizes=MedianNorm(GeneData)
-# Run EBSeq
-EBres=EBTest(Data=GeneData, NgVector=rep(1,10^4), Vect5End=rep(1,10^4), Vect3End=rep(1,10^4), Conditions=as.factor(rep(c(1,2),each=5)), sizeFactors=Sizes,maxround=5)
-
-}
-% Add one or more standard keywords, see file 'KEYWORDS' in the
-% R documentation directory.
-\keyword{ ~kwd1 }
-\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
+++ /dev/null
-\name{MergeGene}
-\alias{MergeGene}
-%- Also NEED an '\alias' for EACH other topic documented here.
-\title{
-Plots of gene simulation result
-}
-\description{
-%% ~~ A concise (1-5 lines) description of what the function does. ~~
-}
-\usage{
-MergeGene(GeneSIMout, Num, Path = "./")
-}
-%- maybe also 'usage' for other objects documented here.
-\arguments{
- \item{GeneSIMout}{
-The output of GeneSimu with OnlyData="F".
-}
- \item{Num}{
-How many times the simulation ran.
-}
- \item{Path}{
-The path to store the plots
-}
-}
-\details{
-%% ~~ If necessary, more details than the description above ~~
-}
-\value{
-3 plots will be generated.
-1 FPR vs TPR of each method
-2 FDR vs TPR of each method
-2 Top counts vs FDR of each method
-
-A table will be generated which contains the FDR and TPR of each method.
-(Using p-value=.05 or Posterior Probability=.95).
-
-}
-\references{
-%% ~put references to the literature/web site here ~
-}
-\author{
-Ning Leng
-}
-\note{
-%% ~~further notes~~
-}
-
-%% ~Make other sections like Warning with \section{Warning }{....} ~
-
-\seealso{
-IsoMerge
-}
-\examples{
-GeneGenerate=GeneSimu(DVDconstant=4, DVDqt1=NULL, DVDqt2=NULL, Conditions=rep(c(1,2),each=5), NumofSample=10, NumofGene=10000, DEGeneProp=.1, Phiconstant=NULL, Phi.qt1=.25, Phi.qt2=.75, Meanconstant=NULL, OnlyData="F")
-
-GeneTable=MergeGene(GeneGenerate,1,"./")
-}
-% Add one or more standard keywords, see file 'KEYWORDS' in the
-% R documentation directory.
-\keyword{ ~kwd1 }
-\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
+++ /dev/null
-\name{MergeIso}
-\alias{MergeIso}
-%- Also NEED an '\alias' for EACH other topic documented here.
-\title{
-Isoforms of gene simulation result
-}
-\description{
-}
-\usage{
-MergeIso(IsoSIMout, Num, Path = "./")
-}
-%- maybe also 'usage' for other objects documented here.
-\arguments{
- \item{IsoSIMout}{
-The output of IsoSimu with OnlyData="F".
-}
- \item{Num}{
-How many times the simulation ran.
-
-}
- \item{Path}{
- The path to store the plots.
-}
-}
-\details{
-%% ~~ If necessary, more details than the description above ~~
-}
-\value{
-3 plots will be generated.
-1 FPR vs TPR of each method
-2 FDR vs TPR of each method
-2 Top counts vs FDR of each method
-
-A table will be generated which contains the FDR and TPR of each method.
-Each method will be ran on all the data and within group.
-(Using p-value=.05 or Posterior Probability=.95).
-
-
-}
-\references{
-%% ~put references to the literature/web site here ~
-}
-\author{
-Ning Leng
-}
-\note{
-%% ~~further notes~~
-}
-
-%% ~Make other sections like Warning with \section{Warning }{....} ~
-
-\seealso{
-MergeGene
-}
-\examples{
-IsoGenerate=IsoSimu(DVDconstant=NULL, DVDqt1=.97, DVDqt2=.98, Conditions=as.factor(rep(c(1,2),each=5)), NumofSample=10, NumofIso=NULL, DEIsoProp=.1, Phiconstant=NULL, Phi.qt1=.25, Phi.qt2=.75, OnlyData="F" )
-
-IsoTable=MergeIso(IsoGenerate,1,"./")
-}
-% Add one or more standard keywords, see file 'KEYWORDS' in the
-% R documentation directory.
-\keyword{ ~kwd1 }
-\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
+++ /dev/null
-\name{PlotFDTP}
-\alias{PlotFDTP}
-%- Also NEED an '\alias' for EACH other topic documented here.
-\title{
-Plot the FDR vs TPR for each method in simulation data
-}
-\description{
-%% ~~ A concise (1-5 lines) description of what the function does. ~~
-}
-\usage{
-PlotFDTP(TopNum, FDR, TPR, names)
-}
-%- maybe also 'usage' for other objects documented here.
-\arguments{
- \item{TopNum}{
-%% ~~Describe \code{TopNum} here~~
-}
- \item{FDR}{
-%% ~~Describe \code{FDR} here~~
-}
- \item{TPR}{
-%% ~~Describe \code{TPR} here~~
-}
- \item{names}{
-%% ~~Describe \code{names} here~~
-}
-}
-\details{
-%% ~~ If necessary, more details than the description above ~~
-}
-\value{
-%% ~Describe the value returned
-%% If it is a LIST, use
-%% \item{comp1 }{Description of 'comp1'}
-%% \item{comp2 }{Description of 'comp2'}
-%% ...
-}
-\references{
-%% ~put references to the literature/web site here ~
-}
-\author{
-%% ~~who you are~~
-}
-\note{
-%% ~~further notes~~
-}
-
-%% ~Make other sections like Warning with \section{Warning }{....} ~
-
-\seealso{
-%% ~~objects to See Also as \code{\link{help}}, ~~~
-}
-\examples{
-##---- Should be DIRECTLY executable !! ----
-##-- ==> Define data, use random,
-##-- or do help(data=index) for the standard data sets.
-
-## The function is currently defined as
-function(TopNum, FDR, TPR,names)
-{
-
- matplot(FDR, TPR, xlim=c(0,.5), ylim=c(0,1) ,type="l",lwd=2,xlab="FDR", ylab="TPR")
- legend("bottomright",col=1:TopNum, lwd=2, lty=1:TopNum, names)
-
-
- }
-}
-% Add one or more standard keywords, see file 'KEYWORDS' in the
-% R documentation directory.
-\keyword{ ~kwd1 }
-\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
+++ /dev/null
-\name{PlotFPTP}
-\alias{PlotFPTP}
-%- Also NEED an '\alias' for EACH other topic documented here.
-\title{
-Plot the FPR vs TPR for each method in simulation data
-}
-\description{
-%% ~~ A concise (1-5 lines) description of what the function does. ~~
-}
-\usage{
-PlotFPTP(TopNum, FPR, TPR, names)
-}
-%- maybe also 'usage' for other objects documented here.
-\arguments{
- \item{TopNum}{
-%% ~~Describe \code{TopNum} here~~
-}
- \item{FPR}{
-%% ~~Describe \code{FPR} here~~
-}
- \item{TPR}{
-%% ~~Describe \code{TPR} here~~
-}
- \item{names}{
-%% ~~Describe \code{names} here~~
-}
-}
-\details{
-%% ~~ If necessary, more details than the description above ~~
-}
-\value{
-%% ~Describe the value returned
-%% If it is a LIST, use
-%% \item{comp1 }{Description of 'comp1'}
-%% \item{comp2 }{Description of 'comp2'}
-%% ...
-}
-\references{
-%% ~put references to the literature/web site here ~
-}
-\author{
-%% ~~who you are~~
-}
-\note{
-%% ~~further notes~~
-}
-
-%% ~Make other sections like Warning with \section{Warning }{....} ~
-
-\seealso{
-%% ~~objects to See Also as \code{\link{help}}, ~~~
-}
-\examples{
-##---- Should be DIRECTLY executable !! ----
-##-- ==> Define data, use random,
-##-- or do help(data=index) for the standard data sets.
-
-## The function is currently defined as
-function(TopNum, FPR, TPR,names)
-{
-
- matplot(FPR, TPR,xlim=c(0,.1), ylim=c(0,1) ,type="l",lwd=2, xlab="FPR", ylab="TPR")
- legend("bottomright",col=1:TopNum,lwd=2, lty=1:TopNum, names)
-
-
- }
-}
-% Add one or more standard keywords, see file 'KEYWORDS' in the
-% R documentation directory.
-\keyword{ ~kwd1 }
-\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
+++ /dev/null
-\name{PlotPattern}
-\alias{PlotPattern}
-%- Also NEED an '\alias' for EACH other topic documented here.
-\title{
-Visualize the patterns
-}
-\description{
-visualize the patterns
-}
-\usage{
-PlotPattern(PosParti)
-}
-%- maybe also 'usage' for other objects documented here.
-\arguments{
- \item{PosParti}{
-The output of GetPatterns function.
-}
-
-}
-\details{
-%% ~~ If necessary, more details than the description above ~~
-}
-\value{
-A heatmap to visualize the patterns of interest.
-}
-\references{
-}
-\author{
-Ning Leng
-}
-
-%% ~Make other sections like Warning with \section{Warning }{....} ~
-
-\seealso{
-}
-\examples{
-Conditions=c("C1","C1","C2","C2","C3","C3")
-PosParti=GetPatterns(Conditions)
-PlotPattern(PosParti)
-}
-% Add one or more standard keywords, see file 'KEYWORDS' in the
-% R documentation directory.
-\keyword{ }
+++ /dev/null
-\name{PlotTopCts}
-\alias{PlotTopCts}
-%- Also NEED an '\alias' for EACH other topic documented here.
-\title{
-Plot 3 plots for simulation data
-}
-\description{
-%% ~~ A concise (1-5 lines) description of what the function does. ~~
-}
-\usage{
-PlotTopCts(TopNum, FD, names)
-}
-%- maybe also 'usage' for other objects documented here.
-\arguments{
- \item{TopNum}{
-%% ~~Describe \code{TopNum} here~~
-}
- \item{FD}{
-%% ~~Describe \code{FD} here~~
-}
- \item{names}{
-%% ~~Describe \code{names} here~~
-}
-}
-\details{
-%% ~~ If necessary, more details than the description above ~~
-}
-\value{
-%% ~Describe the value returned
-%% If it is a LIST, use
-%% \item{comp1 }{Description of 'comp1'}
-%% \item{comp2 }{Description of 'comp2'}
-%% ...
-}
-\references{
-%% ~put references to the literature/web site here ~
-}
-\author{
-%% ~~who you are~~
-}
-\note{
-%% ~~further notes~~
-}
-
-%% ~Make other sections like Warning with \section{Warning }{....} ~
-
-\seealso{
-%% ~~objects to See Also as \code{\link{help}}, ~~~
-}
-\examples{
-##---- Should be DIRECTLY executable !! ----
-##-- ==> Define data, use random,
-##-- or do help(data=index) for the standard data sets.
-
-## The function is currently defined as
-function(TopNum, FD, names)
-{
- matplot(c(1:TopNum) , FD,type="l",xlab="Top DE selected", lwd=2, log="y", ylab="FD")
- legend("topleft",col=1:TopNum, lwd=2, lty=1:TopNum, names)
-
- }
-}
-% Add one or more standard keywords, see file 'KEYWORDS' in the
-% R documentation directory.
-\keyword{ ~kwd1 }
-\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
+++ /dev/null
-\name{PolyFitPlot}
-\alias{PolyFitPlot}
-%- Also NEED an '\alias' for EACH other topic documented here.
-\title{
-%% ~~function to do ... ~~
-Fit the mean-var relationship using polynomial regression
-}
-\description{
-%% ~~ A concise (1-5 lines) description of what the function does. ~~
-}
-\usage{
-PolyFitPlot(X, Y, nterms, xname = "Estimated Mean", yname = "Estimated Var", pdfname = "", xlim = c(-1,5), ylim = c(-1,7), ChangeXY = F, col = "red")
-}
-%- maybe also 'usage' for other objects documented here.
-\arguments{
- \item{X}{
-The first group of values want to be fitted by the polynomial regression. ( e.g Mean of the data. )
-}
- \item{Y}{
-The second group of values want to be fitted by the polynomial regression. ( e.g. variance of the data.) The length of Y should be the same as the length of X.
-}
- \item{nterms}{
-How many polynomial terms want to be used.
-}
- \item{xname}{
-Name of the x axis.
-}
- \item{yname}{
-Name of the y axis.
-}
- \item{pdfname}{
-Name of the plot.
-}
- \item{xlim}{
-The x limits of the plot.
-}
- \item{ylim}{
-The y limits of the plot.
-
-}
- \item{ChangeXY}{
-If ChangeXY is setted to be TRUE, X will be treated as the dependent variable and Y will be treated as the independent one. Default is FALSE.
-}
- \item{col}{
-Color of the fitted line.
-}
-}
-\details{
-%% ~~ If necessary, more details than the description above ~~
-}
-\value{
- The PolyFitPlot function provides a smooth scatter plot of two variables and their best fitting line of polynomial regression.
-%% ~Describe the value returned
-%% If it is a LIST, use
-%% \item{comp1 }{Description of 'comp1'}
-%% \item{comp2 }{Description of 'comp2'}
-%% ...
-}
-\references{
-%% ~put references to the literature/web site here ~
-}
-\author{
-Ning Leng
-}
-\note{
-%% ~~further notes~~
-}
-
-%% ~Make other sections like Warning with \section{Warning }{....} ~
-
-\seealso{
-%% ~~objects to See Also as \code{\link{help}}, ~~~
-}
-\examples{
-GeneData=GeneSimu(DVDconstant=4, DVDqt1=NULL, DVDqt2=NULL, Conditions=rep(c(1,2),each=5), NumofSample=10, NumofGene=10000, DEGeneProp=.1, Phiconstant=NULL, Phi.qt1=.25, Phi.qt2=.75, Meanconstant=NULL, OnlyData="Y")
-
-EBres=NBBetaEB.bias.uniqueP_PoolVarSpeedUp_MDFPoi(Data=GeneData,NgVector=rep(1,10^4), Vect5End=rep(1,10^4), Vect3End=rep(1,10^4), Conditions=as.factor(rep(c(1,2),each=5)), maxround=5)
-
-poly=PolyFitPlot(X=EBres$MeanList[[1]], Y=EBres$PoolVar[[1]], nterms=5, xname = "mean", yname = "var", pdfname=NULL, xlim = c(0,4.5),ylim = c(-2,8), ChangeXY = F, col = "red")
-}
-% Add one or more standard keywords, see file 'KEYWORDS' in the
-% R documentation directory.
-\keyword{ ~kwd1 }
-\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
+++ /dev/null
-\name{PoolMatrix}
-\alias{PoolMatrix}
-%- Also NEED an '\alias' for EACH other topic documented here.
-\title{
-Generate the expression matrix from the output of GetData
-}
-\description{
-%% ~~ A concise (1-5 lines) description of what the function does. ~~
-}
-\usage{
-PoolMatrix(Data, reads, type)
-}
-%- maybe also 'usage' for other objects documented here.
-\arguments{
- \item{Data}{
-The output from GetData function.
-}
- \item{reads}{
-The total number of reads in each lane. Could be obtained from the RSEM outputs.
-}
- \item{type}{
-If type="S", the outputs will be the a matrix which transcript names in row and sample names in column.
-If type="G", the first column will be the group information.
-
-}
-}
-\details{
-%% ~~ If necessary, more details than the description above ~~
-}
-\value{
-\item{PoolM}{The matrix of nu values}
-\item{PoolValue}{The matrix of expression values}
-}
-\references{
-%% ~put references to the literature/web site here ~
-}
-\author{
-Ning Leng
-}
-\note{
-%% ~~further notes~~
-}
-
-%% ~Make other sections like Warning with \section{Warning }{....} ~
-
-\seealso{
-GetData
-}
-\examples{
-##---- Should be DIRECTLY executable !! ----
-##-- ==> Define data, use random,
-##-- or do help(data=index) for the standard data sets.
-
-## The function is currently defined as
-function(Data,reads,type)
-{
-poolnames=names(Data)
-poolM=NULL
-for (po in 1:8)
- poolM=cbind(poolM,Data[[po]][,1])
-rownames(poolM)=rownames(Data[[1]])
-colnames(poolM)=poolnames
-
-#poolValue=poolM*reads
-poolValue=poolM
-for (col in 1:8)
- poolValue[,col]=poolM[,col]*reads[col]
-poolValue=round(poolValue)
-if (type=="G")
- {
- poolM=cbind(Data[[1]][,2],poolM)
- poolValue=cbind(Data[[1]][,2],poolValue)
- colnames(poolM)=c("Groups",poolnames)
- colnames(poolValue)=c("Groups",poolnames)
- }
-poolOutput=list(poolM=poolM,poolValue=poolValue)
- }
-}
-% Add one or more standard keywords, see file 'KEYWORDS' in the
-% R documentation directory.
-\keyword{ ~kwd1 }
-\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
+++ /dev/null
-\name{PostFC}
-\alias{PostFC}
-\title{
-Calculate the posterior fold change for each transcript across conditions
-}
-\description{
-}
-\usage{
-PostFC(EBoutput)
-}
-\arguments{
-
- \item{EBoutput}{
-The ourput from function EBTest. (Currently only at gene level)
-}
-}
-\details{
-%% ~~ If necessary, more details than the description above ~~
-}
-\value{
-%% ~Describe the value returned
-%% If it is a LIST, use
-%% \item{comp1 }{Description of 'comp1'}
-%% \item{comp2 }{Description of 'comp2'}
-%% ...
-}
-\references{
-%% ~put references to the literature/web site here ~
-}
-\author{
-%% ~~who you are~~
-}
-\note{
-%% ~~further notes~~
-}
-
-%% ~Make other sections like Warning with \section{Warning }{....} ~
-
-\seealso{
-%% ~~objects to See Also as \code{\link{help}}, ~~~
-}
-\examples{
-}
-% Add one or more standard keywords, see file 'KEYWORDS' in the
-% R documentation directory.
-\keyword{ ~kwd1 }
-\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
+++ /dev/null
-\name{QQP}
-\alias{QQP}
-%- Also NEED an '\alias' for EACH other topic documented here.
-\title{
-The QQ Plot of empirical q's and simulated q's from fitted beta distribution
-}
-\description{
-
-}
-\usage{
-QQP(QList, AlphaResult, BetaResult, name, AList="F", GroupName)
-}
-%- maybe also 'usage' for other objects documented here.
-\arguments{
- \item{QList}{
-The estimated q's from the output of NBBetaEB.bias.uniqueP_PoolVarSpeedUp_MDFPoi. Input could be a vector or a list of different groups of transcripts. The number of lists here should be the same as the length of BetaResult.
-
-}
- \item{AlphaResult}{
-The fitted parameter alpha from the output of NBBetaEB.bias.uniqueP_PoolVarSpeedUp_MDFPoi. Input should be a number if AList is not defined.
-}
- \item{BetaResult}{
-The fitted parameter beta from the output of NBBetaEB.bias.uniqueP_PoolVarSpeedUp_MDFPoi. Input could be one single number or a vector of several numbers. The length of the input should be the same as the number of lists of QList.
-}
- \item{name}{
-The name of the plots
-}
- \item{AList}{
-Whether a list of alpha's are used
-}
- \item{GroupName}{
-The names of each sub plot. The l
-ength of the input should be the same as the number of lists of QList.
-}
-}
-\details{
-%% ~~ If necessary, more details than the description above ~~
-}
-\value{
-%% ~Describe the value returned
-%% If it is a LIST, use
-%% \item{comp1 }{Description of 'comp1'}
-%% \item{comp2 }{Description of 'comp2'}
-%% ...
-}
-\references{
-%% ~put references to the literature/web site here ~
-}
-\author{
-Ning Leng
-}
-\note{
-%% ~~further notes~~
-}
-
-%% ~Make other sections like Warning with \section{Warning }{....} ~
-
-\seealso{
- NBBetaEB.bias.uniqueP_PoolVarSpeedUp_MDFPoi , DenNHist
-}
-\examples{
-GeneData=GeneSimu(DVDconstant=4, DVDqt1=NULL, DVDqt2=NULL, Conditions=rep(c(1,2),each=5), NumofSample=10, NumofGene=10000, DEGeneProp=.1, Phiconstant=NULL, Phi.qt1=.25, Phi.qt2=.75, Meanconstant=NULL, OnlyData="Y")
-
-EBres=NBBetaEB.bias.uniqueP_PoolVarSpeedUp_MDFPoi(Data=GeneData, NgVector=rep(1,10^4), Vect5End=rep(1,10^4), Vect3End=rep(1,10^4), Conditions=as.factor(rep(c(1,2),each=5)), maxround=5)
-
-QQP(QList=EBres$QList1, AlphaResult=EBres[[1]][5,1], BetaResult=EBres[[2]][5,1], name="Gene", AList="F", GroupName=NULL)
-
-## The function is currently defined as
-function(QList,AlphaResult,BetaResult,name,AList="F",GroupName){
- for (i in 1:length(BetaResult)){
- tmpSize=length(QList[[i]][QList[[i]]<1])
- if (AList=="F") rdpts=rbeta(tmpSize,AlphaResult,BetaResult[i])
- else rdpts=rbeta(tmpSize,AlphaResult[i],BetaResult[i])
- qqplot(QList[[i]][QList[[i]]<1], rdpts, xlab="estimated q's", "simulated q's from fitted beta distribution",main=paste(names(name,GroupName[i],sep=" "),xlim=c(0,1),ylim=c(0,1))
- }
- }
-}
-% Add one or more standard keywords, see file 'KEYWORDS' in the
-% R documentation directory.
-\keyword{ ~kwd1 }
-\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
+++ /dev/null
-\name{QuantileNorm}
-\alias{QuantileNorm}
-\title{
-Quantile Normalization
-}
-\description{
-The Quantile normalization
-}
-\usage{
-QuantileNorm(Data, Quantile)
-}
-\arguments{
-
- \item{Data}{
-The data matrix with transcripts in rows and lanes in columns.
-}
-\item{Quantile}{
-The quantile the user wishs to use. Should be a number between 0 and 1.
-}
-}
-\details{
-%% ~~ If necessary, more details than the description above ~~
-Use a quantile point to normalize the data.
-}
-\value{
-The function will return a vector contains the normalization factor for each lane.
-% ...
-}
-\references{}
-\author{
-Ning Leng
-}
-\note{
-%% ~~further notes~~
-}
-
-%% ~Make other sections like Warning with \section{Warning }{....} ~
-
-\seealso{
-%% ~~objects to See Also as \code{\link{help}}, ~~~
-}
-\examples{
-GeneGenerate=GeneSimu(DVDconstant=4, DVDqt1=NULL, DVDqt2=NULL, Conditions=rep(c(1,2),each=5), NumofSample=10, NumofGene=10000, DEGeneProp=.1, Phiconstant=NULL, Phi.qt1=.25, Phi.qt2=.75, Meanconstant=NULL, OnlyData="Y")
-GeneData=GeneGenerate$data
-
-Sizes=QuantileNorm(GeneData)
-# Run EBSeq
-EBres=EBTest(Data=GeneData, NgVector=rep(1,10^4), Vect5End=rep(1,10^4), Vect3End=rep(1,10^4), Conditions=as.factor(rep(c(1,2),each=5)), sizeFactors=Sizes,maxround=5)
-
-}
-% Add one or more standard keywords, see file 'KEYWORDS' in the
-% R documentation directory.
-\keyword{ ~kwd1 }
-\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
+++ /dev/null
-\name{RankNorm}
-\alias{RankNorm}
-\title{
-Rank Normalization
-}
-\description{
-}
-\usage{
-RankNorm(Data)
-}
-\arguments{
-
- \item{Data}{
-The data matrix with transcripts in rows and lanes in columns.
-}
-}
-\details{
-%% ~~ If necessary, more details than the description above ~~
-}
-\value{
-The function will return a matrix contains the normalization factor for each lane and each transcript.
-% ...
-}
-\references{
-}
-\author{
-Ning Leng
-}
-\note{
-%% ~~further notes~~
-}
-
-%% ~Make other sections like Warning with \section{Warning }{....} ~
-
-\seealso{
-%% ~~objects to See Also as \code{\link{help}}, ~~~
-}
-\examples{
-GeneGenerate=GeneSimu(DVDconstant=4, DVDqt1=NULL, DVDqt2=NULL, Conditions=rep(c(1,2),each=5), NumofSample=10, NumofGene=10000, DEGeneProp=.1, Phiconstant=NULL, Phi.qt1=.25, Phi.qt2=.75, Meanconstant=NULL, OnlyData="Y")
-GeneData=GeneGenerate$data
-
-Sizes=RankNorm(GeneData)
-# Run EBSeq
-EBres=EBTest(Data=GeneData, NgVector=rep(1,10^4), Vect5End=rep(1,10^4), Vect3End=rep(1,10^4), Conditions=as.factor(rep(c(1,2),each=5)), sizeFactors=Sizes,maxround=5)
-
-}
-% Add one or more standard keywords, see file 'KEYWORDS' in the
-% R documentation directory.
-\keyword{ ~kwd1 }
-\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
+++ /dev/null
-\name{TPFDRplot}
-\alias{TPFDRplot}
-%- Also NEED an '\alias' for EACH other topic documented here.
-\title{
-Plot the number of top counts vs FDR for each method in simulation data
-}
-\description{
-%% ~~ A concise (1-5 lines) description of what the function does. ~~
-}
-\usage{
-TPFDRplot(DESeqP, EBZ, TrueDE, main, FDR = NULL)
-}
-%- maybe also 'usage' for other objects documented here.
-\arguments{
- \item{DESeqP}{
-%% ~~Describe \code{DESeqP} here~~
-}
- \item{EBZ}{
-%% ~~Describe \code{EBZ} here~~
-}
- \item{TrueDE}{
-%% ~~Describe \code{TrueDE} here~~
-}
- \item{main}{
-%% ~~Describe \code{main} here~~
-}
- \item{FDR}{
-%% ~~Describe \code{FDR} here~~
-}
-}
-\details{
-%% ~~ If necessary, more details than the description above ~~
-}
-\value{
-%% ~Describe the value returned
-%% If it is a LIST, use
-%% \item{comp1 }{Description of 'comp1'}
-%% \item{comp2 }{Description of 'comp2'}
-%% ...
-}
-\references{
-%% ~put references to the literature/web site here ~
-}
-\author{
-%% ~~who you are~~
-}
-\note{
-%% ~~further notes~~
-}
-
-%% ~Make other sections like Warning with \section{Warning }{....} ~
-
-\seealso{
-%% ~~objects to See Also as \code{\link{help}}, ~~~
-}
-\examples{
-##---- Should be DIRECTLY executable !! ----
-##-- ==> Define data, use random,
-##-- or do help(data=index) for the standard data sets.
-
-## The function is currently defined as
-function(DESeqP, EBZ, TrueDE, main, FDR=NULL){
- Seq=seq(0.001,0.5,by=0.001)
- DETPR=rep(0,length(Seq))
- EBTPR=rep(0,length(Seq))
- DEFDR=rep(0,length(Seq))
- EBFDR=rep(0,length(Seq))
- DETPNum=rep(0,length(Seq))
- EBTPNum=rep(0,length(Seq))
- DEFDNum=rep(0,length(Seq))
- EBFDNum=rep(0,length(Seq))
- for (i in 1:length(Seq)){
- DESeqOnes=names(DESeqP)[DESeqP<=Seq[i]]
- if (length(FDR)==0) EBOnes=names(EBZ)[EBZ>=crit.fun(1-EBZ, Seq[i])]
- else if (FDR=="H") EBOnes=names(EBZ)[EBZ>=(1-Seq[i])]
- else EBOnes=names(EBZ)[EBZ>=FDR[i]]
-
- DETPNum[i]=sum(DESeqOnes\%in\%TrueDE)
- EBTPNum[i]=sum(EBOnes\%in\%TrueDE)
- DEFDNum[i]=sum(!DESeqOnes\%in\%TrueDE)
- EBFDNum[i]=sum(!EBOnes\%in\%TrueDE)
-
- DETPR[i]=DETPNum[i]/length(TrueDE)
- EBTPR[i]=EBTPNum[i]/length(TrueDE)
- DEFDR[i]=DEFDNum[i]/length(TrueDE)
- EBFDR[i]=EBFDNum[i]/length(TrueDE)
- }
- plot(Seq,DETPR,ylim=c(0,1),xlim=c(0,.5),type="l",col="red", main=paste(main, "TPR"),xlab="controled FDR level", ylab="TPR",lwd=2)
- lines(Seq,EBTPR,col="blue",lwd=2)
- legend("bottomright",lwd=2, col=c("red","blue"), c("DESeq","EBSeq"))
-
- plot(Seq,DEFDR,ylim=c(0,1),xlim=c(0,.5),type="l",col="red", main=paste(main, "FDR"),xlab="controled FDR level", ylab="TPR",lwd=2)
- lines(Seq,EBFDR,col="blue",lwd=2)
- legend("topleft", lwd=2, col=c("red","blue"), c("DESeq","EBSeq"))
-
-
- output=cbind( DETPR,EBTPR, DEFDR,EBFDR,DETPNum,EBTPNum,DEFDNum,EBFDNum)
- }
-}
-% Add one or more standard keywords, see file 'KEYWORDS' in the
-% R documentation directory.
-\keyword{ ~kwd1 }
-\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
+++ /dev/null
-\name{TopCts}
-\alias{TopCts}
-%- Also NEED an '\alias' for EACH other topic documented here.
-\title{
-Get FDR of Top Counts
-}
-\description{
-
-}
-\usage{
-TopCts(pvalue, PP = NULL, TrueNames, TopNum)
-}
-%- maybe also 'usage' for other objects documented here.
-\arguments{
- \item{pvalue}{
-A matrix contains the p values (Posterior probabilities) for each transcript and each method.
-Rows are for different methods and columns are for different transcripts.
-}
- \item{PP}{
-The length of PP vector should be the same as the number of columns in pvalue matrix.
-The value in PP either 0 or 1.
-If the ith value of PP is 0, it means the ith method (the ith row of pvalue) provided p-values.
-If the ith value of PP is 1, it means the ith method (the ith row of pvalue) provided posterior probabilities.
-}
- \item{TrueNames}{
-The names of the transcripts who defined to be DE.
-}
- \item{TopNum}{
-The number of top counts we are interested in.
-For example, if TopNum=1000, we'll calculate the FDR's of each method if we pick the top 1, 2, ... 1000 genes.
-}
-}
-\details{
-%% ~~ If necessary, more details than the description above ~~
-}
-\value{
-A metrix contains the FDR's.
-}
-\references{
-%% ~put references to the literature/web site here ~
-}
-\author{
-%% ~~who you are~~
-}
-\note{
-%% ~~further notes~~
-}
-
-%% ~Make other sections like Warning with \section{Warning }{....} ~
-
-\seealso{
-%% ~~objects to See Also as \code{\link{help}}, ~~~
-}
-\examples{
-##---- Should be DIRECTLY executable !! ----
-##-- ==> Define data, use random,
-##-- or do help(data=index) for the standard data sets.
-
-## The function is currently defined as
-function(pvalue, PP=NULL, TrueNames, TopNum){
- NumOfMethods=ncol(pvalue)
- puse=pvalue
- if(1\%in\%PP)puse[,PP==1]=1-pvalue[,PP==1]
- #puse.list=data.frame(puse)
- FD=matrix(rep(0,NumOfMethods*TopNum),ncol=NumOfMethods)
-# Rank=apply(puse,2,rank)
-# for(i in 1:TopNum)
-# FD[i,]=sapply(1:NumOfMethods, function(j)sum(!rownames(Rank)[Rank[,j]<=i]\%in\%TrueNames))
-# FD=sapply(1:TopNum, function(i)sapply(1:NumOfMethods, function(j)sum(!rownames(Rank)[Rank[,j]<=i]\%in\%TrueNames)))
- for (s in 1:NumOfMethods){
- tmp=puse[,s]
- names(tmp)=rownames(puse)
- sorttmp=sort(tmp)
- for( c in 2:TopNum)
- FD[c, s]=FD[(c-1),s]+as.numeric(!names(sorttmp)[c]\%in\%TrueNames)
- }
- FD
- #matplot(TopNum,FD,type="l",ylim=c(0,1),xlab="Top DE selected", ylab="FDR")
- #legend("rightbottom",col=1:TopNum, lty=1:TopNum, names)
- }
-}
-% Add one or more standard keywords, see file 'KEYWORDS' in the
-% R documentation directory.
-\keyword{ ~kwd1 }
-\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
+++ /dev/null
-\name{beta.mom}
-\alias{beta.mom}
-%- Also NEED an '\alias' for EACH other topic documented here.
-\title{
-Fit the beta distribution by method of moments
-}
-\description{
-Fit the beta distribution by method of moments
-}
-\usage{
-beta.mom(qs.in)
-}
-%- maybe also 'usage' for other objects documented here.
-\arguments{
- \item{qs.in}{
-A vector contains the numbers that are assumed to follow a beta distribution
-}
-}
-\details{
-%% ~~ If necessary, more details than the description above ~~
-}
-\value{
- \item{alpha.hat }{Return the estimation of alpha}
- \item{beta.hat}{Return the estimation of beta}
-}
-\references{
-}
-\author{
-Ning Leng
-}
-
-%% ~Make other sections like Warning with \section{Warning }{....} ~
-
-\seealso{
-DenNHist, DenNHistTable
-}
-\examples{
-##---- Should be DIRECTLY executable !! ----
-##-- ==> Define data, use random,
-##-- or do help(data=index) for the standard data sets.
-
-## The function is currently defined as
-tmp=rbeta(5,5,100)
-param=beta.mom(tmp)
-}
-% Add one or more standard keywords, see file 'KEYWORDS' in the
-% R documentation directory.
-\keyword{ beta }
+++ /dev/null
-\name{crit_fun}
-\alias{crit_fun}
-%- Also NEED an '\alias' for EACH other topic documented here.
-\title{
-Calculate the adjusted FDR threshold
-}
-\description{
-Calculate the adjusted FDR threshold using the posterior probabilities at a target FDR
-}
-\usage{
-crit_fun(PPEE, thre)
-}
-%- maybe also 'usage' for other objects documented here.
-\arguments{
- \item{PPEE}{
-The posterior probabilities of being EE.
-}
- \item{thre}{
-The target FDR.
- }
-
-}
-\details{
-%% ~~ If necessary, more details than the description above ~~
-}
-\value{
-The adjusted FDR threshold of target FDR.
-}
-\references{
-}
-\author{
-Ning Leng
-}
-
-%% ~Make other sections like Warning with \section{Warning }{....} ~
-
-\seealso{
-}
-\examples{
-##---- Should be DIRECTLY executable !! ----
-##-- ==> Define data, use random,
-##-- or do help(data=index) for the standard data sets.
-
-## The function is currently defined as
-}
-% Add one or more standard keywords, see file 'KEYWORDS' in the
-% R documentation directory.
-\keyword{ FDR }
+++ /dev/null
-\name{f0}
-\alias{f0}
-%- Also NEED an '\alias' for EACH other topic documented here.
-\title{
-The Predictive Distribution of being EE
-}
-\description{
-%% ~~ A concise (1-5 lines) description of what the function does. ~~
-}
-\usage{
-f0(Input, AlphaIn, BetaIn, EmpiricalR, NumOfGroups, log)
-}
-%- maybe also 'usage' for other objects documented here.
-\arguments{
- \item{Input}{
-%% ~~Describe \code{Input} here~~
-}
- \item{AlphaIn}{
-%% ~~Describe \code{AlphaIn} here~~
-}
- \item{BetaIn}{
-%% ~~Describe \code{BetaIn} here~~
-}
- \item{EmpiricalR}{
-%% ~~Describe \code{EmpiricalR} here~~
-}
- \item{NumOfGroups}{
-%% ~~Describe \code{NumOfGroups} here~~
-}
- \item{log}{
-%% ~~Describe \code{log} here~~
-}
-}
-\details{
-%% ~~ If necessary, more details than the description above ~~
-}
-\value{
-%% ~Describe the value returned
-%% If it is a LIST, use
-%% \item{comp1 }{Description of 'comp1'}
-%% \item{comp2 }{Description of 'comp2'}
-%% ...
-}
-\references{
-%% ~put references to the literature/web site here ~
-}
-\author{
-Ning Leng
-}
-\note{
-%% ~~further notes~~
-}
-
-%% ~Make other sections like Warning with \section{Warning }{....} ~
-
-\seealso{
-f1
-}
-\examples{
-##---- Should be DIRECTLY executable !! ----
-##-- ==> Define data, use random,
-##-- or do help(data=index) for the standard data sets.
-
-
-}
-% Add one or more standard keywords, see file 'KEYWORDS' in the
-% R documentation directory.
-\keyword{ ~kwd1 }
-\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
+++ /dev/null
-\name{f1}
-\alias{f1}
-%- Also NEED an '\alias' for EACH other topic documented here.
-\title{
-f1(Input, AlphaIn, BetaIn, EmpiricalR, NumOfGroups, log)
-}
-\description{
-f1(X_gi)=f0(X_giC1)f0(X_giC2)
-}
-\usage{
-f1(Input1, Input2, AlphaIn, BetaIn, EmpiricalRSP1, EmpiricalRSP2, NumOfGroup, log)
-}
-%- maybe also 'usage' for other objects documented here.
-\arguments{
- \item{Input1}{
-%% ~~Describe \code{Input1} here~~
-}
- \item{Input2}{
-%% ~~Describe \code{Input2} here~~
-}
- \item{AlphaIn}{
-%% ~~Describe \code{AlphaIn} here~~
-}
- \item{BetaIn}{
-%% ~~Describe \code{BetaIn} here~~
-}
- \item{EmpiricalRSP1}{
-%% ~~Describe \code{EmpiricalRSP1} here~~
-}
- \item{EmpiricalRSP2}{
-%% ~~Describe \code{EmpiricalRSP2} here~~
-}
- \item{NumOfGroup}{
-%% ~~Describe \code{NumOfGroup} here~~
-}
- \item{log}{
-%% ~~Describe \code{log} here~~
-}
-}
-\details{
-%% ~~ If necessary, more details than the description above ~~
-}
-\value{
-%% ~Describe the value returned
-%% If it is a LIST, use
-%% \item{comp1 }{Description of 'comp1'}
-%% \item{comp2 }{Description of 'comp2'}
-%% ...
-}
-\references{
-NBBetaEB.bias.uniqueP_PoolVarSpeedUp_MDFPoi_NoNormVar, f0.bias.uniqueP_PoolVarSpeedUp_MDFPoi_NoNormVar
-}
-\author{
-Ning Leng
-}
-\note{
-%% ~~further notes~~
-}
-
-%% ~Make other sections like Warning with \section{Warning }{....} ~
-
-\seealso{
-f0
-}
-\examples{
-
-}
-% Add one or more standard keywords, see file 'KEYWORDS' in the
-% R documentation directory.
-\keyword{ ~kwd1 }
-\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
--- /dev/null
+#!/usr/bin/env Rscript
+
+argv <- commandArgs(TRUE)
+if (length(argv) != 2) {
+ cat("Usage: rsem-for-ebseq-generate-ngvector-from-clustering-info input_file output_file\n")
+ q(status = 1)
+}
+
+data <- read.table(file = argv[1], stringsAsFactors = F)
+idx <- data[,2] >= 0
+kmr <- kmeans(data[idx, 2], 3)
+order <- order(kmr$centers)
+
+ngvec <- rep(0, length(idx))
+ngvec[idx] <- order[kmr$cluster]
+ngvec[!idx] <- 3
+
+write.table(ngvec, file = argv[2], row.names = F, col.names = F)
template<class ModelType>
void calcExpectedEffectiveLengths(ModelType& model) {
- int lb, ub, span;
- double *pdf = NULL, *cdf = NULL, *clen = NULL; // clen[i] = sigma_{j=1}^{i}pdf[i]*(lb+i)
+ int lb, ub, span;
+ double *pdf = NULL, *cdf = NULL, *clen = NULL; // clen[i] = sigma_{j=1}^{i}pdf[i]*(lb+i)
- model.getGLD().copyTo(pdf, cdf, lb, ub, span);
- clen = new double[span + 1];
- clen[0] = 0.0;
- for (int i = 1; i <= span; i++) {
- clen[i] = clen[i - 1] + pdf[i] * (lb + i);
- }
-
- eel.clear();
- eel.resize(M + 1, 0.0);
- for (int i = 1; i <= M; i++) {
- int totLen = refs.getRef(i).getTotLen();
- int fullLen = refs.getRef(i).getFullLen();
- int pos1 = max(min(totLen - fullLen + 1, ub) - lb, 0);
- int pos2 = max(min(totLen, ub) - lb, 0);
-
- if (pos2 == 0) { eel[i] = 0.0; continue; }
+ model.getGLD().copyTo(pdf, cdf, lb, ub, span);
+ clen = new double[span + 1];
+ clen[0] = 0.0;
+ for (int i = 1; i <= span; i++) {
+ clen[i] = clen[i - 1] + pdf[i] * (lb + i);
+ }
+
+ eel.assign(M + 1, 0.0);
+ for (int i = 1; i <= M; i++) {
+ int totLen = refs.getRef(i).getTotLen();
+ int fullLen = refs.getRef(i).getFullLen();
+ int pos1 = max(min(totLen - fullLen + 1, ub) - lb, 0);
+ int pos2 = max(min(totLen, ub) - lb, 0);
+
+ if (pos2 == 0) { eel[i] = 0.0; continue; }
- eel[i] = fullLen * cdf[pos1] + ((cdf[pos2] - cdf[pos1]) * (totLen + 1) - (clen[pos2] - clen[pos1]));
- assert(eel[i] >= 0);
- if (eel[i] < MINEEL) { eel[i] = 0.0; }
- }
+ eel[i] = fullLen * cdf[pos1] + ((cdf[pos2] - cdf[pos1]) * (totLen + 1) - (clen[pos2] - clen[pos1]));
+ assert(eel[i] >= 0);
+ if (eel[i] < MINEEL) { eel[i] = 0.0; }
+ }
- delete[] pdf;
- delete[] cdf;
- delete[] clen;
+ delete[] pdf;
+ delete[] cdf;
+ delete[] clen;
+}
+
+void polishTheta(vector<double>& theta, const vector<double>& eel, const double* mw) {
+ double sum = 0.0;
+
+ /* The reason that for noise gene, mw value is 1 is :
+ * currently, all masked positions are for poly(A) sites, which in theory should be filtered out.
+ * So the theta0 does not containing reads from any masked position
+ */
+
+ for (int i = 0; i <= M; i++) {
+ // i == 0, mw[i] == 1
+ if (i > 0 && (mw[i] < EPSILON || eel[i] < EPSILON)) {
+ theta[i] = 0.0;
+ continue;
+ }
+ theta[i] = theta[i] / mw[i];
+ sum += theta[i];
+ }
+ // currently is OK, since no transcript should be masked totally, only the poly(A) tail related part will be masked
+ general_assert(sum >= EPSILON, "No effective length is no less than" + ftos(MINEEL, 6) + " !");
+ for (int i = 0; i <= M; i++) theta[i] /= sum;
+}
+
+void calcExpressionValues(const vector<double>& theta, const vector<double>& eel, vector<double>& tpm, vector<double>& fpkm) {
+ double denom;
+ vector<double> frac;
+
+ //calculate fraction of count over all mappabile reads
+ denom = 0.0;
+ frac.assign(M + 1, 0.0);
+ for (int i = 1; i <= M; i++)
+ if (eel[i] >= EPSILON) {
+ frac[i] = theta[i];
+ denom += frac[i];
+ }
+ general_assert(denom > 0, "No alignable reads?!");
+ for (int i = 1; i <= M; i++) frac[i] /= denom;
+
+ //calculate FPKM
+ fpkm.assign(M + 1, 0.0);
+ for (int i = 1; i <= M; i++)
+ if (eel[i] >= EPSILON) fpkm[i] = frac[i] * 1e9 / eel[i];
+
+ //calculate TPM
+ tpm.assign(M + 1, 0.0);
+ denom = 0.0;
+ for (int i = 1; i <= M; i++) denom += fpkm[i];
+ for (int i = 1; i <= M; i++) tpm[i] = fpkm[i] / denom * 1e6;
}
template<class ModelType>
void writeResults(ModelType& model, double* counts) {
- double denom;
char outF[STRLEN];
FILE *fo;
sprintf(modelF, "%s.model", statName);
model.write(modelF);
- //calculate tau values
- double *tau = new double[M + 1];
- memset(tau, 0, sizeof(double) * (M + 1));
+ vector<int> tlens;
+ vector<double> fpkm, tpm, isopct;
+ vector<double> glens, gene_eels, gene_counts, gene_tpm, gene_fpkm;
- denom = 0.0;
- for (int i = 1; i <= M; i++)
- if (eel[i] >= EPSILON) {
- tau[i] = theta[i] / eel[i];
- denom += tau[i];
- }
+ calcExpressionValues(theta, eel, tpm, fpkm);
- general_assert(denom > 0, "No alignable reads?!");
+ //calculate IsoPct, etc.
+ isopct.assign(M + 1, 0.0);
+ tlens.assign(M + 1, 0);
- for (int i = 1; i <= M; i++) {
- tau[i] /= denom;
+ glens.assign(m, 0.0); gene_eels.assign(m, 0.0);
+ gene_counts.assign(m, 0.0); gene_tpm.assign(m, 0.0); gene_fpkm.assign(m, 0.0);
+
+ for (int i = 0; i < m; i++) {
+ int b = gi.spAt(i), e = gi.spAt(i + 1);
+ for (int j = b; j < e; j++) {
+ const Transcript& transcript = transcripts.getTranscriptAt(j);
+ tlens[j] = transcript.getLength();
+
+ glens[i] += tlens[j] * tpm[j];
+ gene_eels[i] += eel[j] * tpm[j];
+ gene_counts[i] += counts[j];
+ gene_tpm[i] += tpm[j];
+ gene_fpkm[i] += fpkm[j];
+ }
+
+ if (gene_tpm[i] < EPSILON) continue;
+
+ for (int j = b; j < e; j++)
+ isopct[j] = tpm[j] / gene_tpm[i];
+ glens[i] /= gene_tpm[i];
+ gene_eels[i] /= gene_tpm[i];
}
//isoform level results
const Transcript& transcript = transcripts.getTranscriptAt(i);
fprintf(fo, "%s%c", transcript.getTranscriptID().c_str(), (i < M ? '\t' : '\n'));
}
- for (int i = 1; i <= M; i++)
- fprintf(fo, "%.2f%c", counts[i], (i < M ? '\t' : '\n'));
- for (int i = 1; i <= M; i++)
- fprintf(fo, "%.15g%c", tau[i], (i < M ? '\t' : '\n'));
for (int i = 1; i <= M; i++) {
const Transcript& transcript = transcripts.getTranscriptAt(i);
fprintf(fo, "%s%c", transcript.getGeneID().c_str(), (i < M ? '\t' : '\n'));
}
+ for (int i = 1; i <= M; i++)
+ fprintf(fo, "%d%c", tlens[i], (i < M ? '\t' : '\n'));
+ for (int i = 1; i <= M; i++)
+ fprintf(fo, "%.2f%c", eel[i], (i < M ? '\t' : '\n'));
+ for (int i = 1; i <= M; i++)
+ fprintf(fo, "%.2f%c", counts[i], (i < M ? '\t' : '\n'));
+ for (int i = 1; i <= M; i++)
+ fprintf(fo, "%.2f%c", tpm[i], (i < M ? '\t' : '\n'));
+ for (int i = 1; i <= M; i++)
+ fprintf(fo, "%.2f%c", fpkm[i], (i < M ? '\t' : '\n'));
+ for (int i = 1; i <= M; i++)
+ fprintf(fo, "%.2f%c", isopct[i] * 1e2, (i < M ? '\t' : '\n'));
fclose(fo);
//gene level results
sprintf(outF, "%s.gene_res", imdName);
fo = fopen(outF, "w");
for (int i = 0; i < m; i++) {
- const string& gene_id = transcripts.getTranscriptAt(gi.spAt(i)).getGeneID();
- fprintf(fo, "%s%c", gene_id.c_str(), (i < m - 1 ? '\t' : '\n'));
- }
- for (int i = 0; i < m; i++) {
- double sumC = 0.0; // sum of counts
- int b = gi.spAt(i), e = gi.spAt(i + 1);
- for (int j = b; j < e; j++) sumC += counts[j];
- fprintf(fo, "%.2f%c", sumC, (i < m - 1 ? '\t' : '\n'));
- }
- for (int i = 0; i < m; i++) {
- double sumT = 0.0; // sum of tau values
- int b = gi.spAt(i), e = gi.spAt(i + 1);
- for (int j = b; j < e; j++) sumT += tau[j];
- fprintf(fo, "%.15g%c", sumT, (i < m - 1 ? '\t' : '\n'));
+ const Transcript& transcript = transcripts.getTranscriptAt(gi.spAt(i));
+ fprintf(fo, "%s%c", transcript.getGeneID().c_str(), (i < m - 1 ? '\t' : '\n'));
}
for (int i = 0; i < m; i++) {
int b = gi.spAt(i), e = gi.spAt(i + 1);
fprintf(fo, "%s%c", transcripts.getTranscriptAt(j).getTranscriptID().c_str(), (j < e - 1 ? ',' : (i < m - 1 ? '\t' :'\n')));
}
}
+ for (int i = 0; i < m; i++)
+ fprintf(fo, "%.2f%c", glens[i], (i < m - 1 ? '\t' : '\n'));
+ for (int i = 0; i < m; i++)
+ fprintf(fo, "%.2f%c", gene_eels[i], (i < m - 1 ? '\t' : '\n'));
+ for (int i = 0; i < m; i++)
+ fprintf(fo, "%.2f%c", gene_counts[i], (i < m - 1 ? '\t' : '\n'));
+ for (int i = 0; i < m; i++)
+ fprintf(fo, "%.2f%c", gene_tpm[i], (i < m - 1 ? '\t' : '\n'));
+ for (int i = 0; i < m; i++)
+ fprintf(fo, "%.2f%c", gene_fpkm[i], (i < m - 1 ? '\t' : '\n'));
fclose(fo);
- delete[] tau;
-
if (verbose) { printf("Expression Results are written!\n"); }
}
fout.close();
}
- sprintf(thetaF, "%s.theta", statName);
- fo = fopen(thetaF, "w");
- fprintf(fo, "%d\n", M + 1);
-
- // output theta'
- for (int i = 0; i < M; i++) fprintf(fo, "%.15g ", theta[i]);
- fprintf(fo, "%.15g\n", theta[M]);
-
- //calculate expected effective lengths for each isoform
- calcExpectedEffectiveLengths<ModelType>(model);
-
- //correct theta vector
- sum = theta[0];
- for (int i = 1; i <= M; i++)
- if (eel[i] < EPSILON) { theta[i] = 0.0; }
- else sum += theta[i];
-
- general_assert(sum >= EPSILON, "No Expected Effective Length is no less than" + ftos(MINEEL, 6) + "?!");
-
- for (int i = 0; i <= M; i++) theta[i] /= sum;
-
//calculate expected weights and counts using learned parameters
+ //just use the raw theta learned from the data, do not correct for eel or mw
updateModel = false; calcExpectedWeights = true;
for (int i = 0; i <= M; i++) probv[i] = theta[i];
for (int i = 0; i < nThreads; i++) {
/* destroy attribute */
pthread_attr_destroy(&attr);
- //convert theta' to theta
- double *mw = model.getMW();
- sum = 0.0;
- for (int i = 0; i <= M; i++) {
- theta[i] = (mw[i] < EPSILON ? 0.0 : theta[i] / mw[i]);
- sum += theta[i];
- }
- assert(sum >= EPSILON);
- for (int i = 0; i <= M; i++) theta[i] /= sum;
+
+ sprintf(thetaF, "%s.theta", statName);
+ fo = fopen(thetaF, "w");
+ fprintf(fo, "%d\n", M + 1);
+
+ // output theta'
+ for (int i = 0; i < M; i++) fprintf(fo, "%.15g ", theta[i]);
+ fprintf(fo, "%.15g\n", theta[M]);
+
+ //calculate expected effective lengths for each isoform
+ calcExpectedEffectiveLengths<ModelType>(model);
+ polishTheta(theta, eel, model.getMW());
// output theta
for (int i = 0; i < M; i++) fprintf(fo, "%.15g ", theta[i]);
FILE *fo;
engine_type *engine;
double *pme_c, *pve_c; //posterior mean and variance vectors on counts
- double *pme_theta;
+ double *pme_tpm, *pme_fpkm;
};
vector<HIT_INT_TYPE> s;
vector<Item> hits;
-vector<double> theta;
+vector<double> eel;
+double *mw;
vector<double> pme_c, pve_c; //global posterior mean and variance vectors on counts
-vector<double> pme_theta, eel;
+vector<double> pme_tpm, pme_fpkm;
bool var_opt;
bool quiet;
gi.load(groupF);
m = gi.getm();
- //load thetaF
- sprintf(thetaF, "%s.theta",statName);
- fin.open(thetaF);
- general_assert(fin.is_open(), "Cannot open " + cstrtos(thetaF) + "!");
- fin>>tmpVal;
- general_assert(tmpVal == M + 1, "Number of transcripts is not consistent in " + cstrtos(refF) + " and " + cstrtos(thetaF) + "!");
- theta.assign(M + 1, 0);
- for (int i = 0; i <= M; i++) fin>>theta[i];
- fin.close();
-
//load ofgF;
sprintf(ofgF, "%s.ofg", imdName);
fin.open(ofgF);
if (verbose) { printf("Loading Data is finished!\n"); }
}
+template<class ModelType>
+void calcExpectedEffectiveLengths(ModelType& model) {
+ int lb, ub, span;
+ double *pdf = NULL, *cdf = NULL, *clen = NULL; // clen[i] = sigma_{j=1}^{i}pdf[i]*(lb+i)
+
+ model.getGLD().copyTo(pdf, cdf, lb, ub, span);
+ clen = new double[span + 1];
+ clen[0] = 0.0;
+ for (int i = 1; i <= span; i++) {
+ clen[i] = clen[i - 1] + pdf[i] * (lb + i);
+ }
+
+ eel.assign(M + 1, 0.0);
+ for (int i = 1; i <= M; i++) {
+ int totLen = refs.getRef(i).getTotLen();
+ int fullLen = refs.getRef(i).getFullLen();
+ int pos1 = max(min(totLen - fullLen + 1, ub) - lb, 0);
+ int pos2 = max(min(totLen, ub) - lb, 0);
+
+ if (pos2 == 0) { eel[i] = 0.0; continue; }
+
+ eel[i] = fullLen * cdf[pos1] + ((cdf[pos2] - cdf[pos1]) * (totLen + 1) - (clen[pos2] - clen[pos1]));
+ assert(eel[i] >= 0);
+ if (eel[i] < MINEEL) { eel[i] = 0.0; }
+ }
+
+ delete[] pdf;
+ delete[] cdf;
+ delete[] clen;
+}
+
+template<class ModelType>
+void init_model_related(char* modelF) {
+ ModelType model;
+ model.read(modelF);
+
+ calcExpectedEffectiveLengths<ModelType>(model);
+ memcpy(mw, model.getMW(), sizeof(double) * (M + 1)); // otherwise, after exiting this procedure, mw becomes undefined
+}
+
// assign threads
void init() {
int quotient, left;
memset(paramsArray[i].pme_c, 0, sizeof(double) * (M + 1));
paramsArray[i].pve_c = new double[M + 1];
memset(paramsArray[i].pve_c, 0, sizeof(double) * (M + 1));
- paramsArray[i].pme_theta = new double[M + 1];
- memset(paramsArray[i].pme_theta, 0, sizeof(double) * (M + 1));
+ paramsArray[i].pme_tpm = new double[M + 1];
+ memset(paramsArray[i].pme_tpm, 0, sizeof(double) * (M + 1));
+ paramsArray[i].pme_fpkm = new double[M + 1];
+ memset(paramsArray[i].pme_fpkm, 0, sizeof(double) * (M + 1));
}
/* set thread attribute to be joinable */
fprintf(fo, "%d\n", counts[M]);
}
+void polishTheta(vector<double>& theta, const vector<double>& eel, const double* mw) {
+ double sum = 0.0;
+
+ /* The reason that for noise gene, mw value is 1 is :
+ * currently, all masked positions are for poly(A) sites, which in theory should be filtered out.
+ * So the theta0 does not containing reads from any masked position
+ */
+
+ for (int i = 0; i <= M; i++) {
+ // i == 0, mw[i] == 1
+ if (i > 0 && (mw[i] < EPSILON || eel[i] < EPSILON)) {
+ theta[i] = 0.0;
+ continue;
+ }
+ theta[i] = theta[i] / mw[i];
+ sum += theta[i];
+ }
+ // currently is OK, since no transcript should be masked totally, only the poly(A) tail related part will be masked
+ general_assert(sum >= EPSILON, "No effective length is no less than" + ftos(MINEEL, 6) + " !");
+ for (int i = 0; i <= M; i++) theta[i] /= sum;
+}
+
+void calcExpressionValues(const vector<double>& theta, const vector<double>& eel, vector<double>& tpm, vector<double>& fpkm) {
+ double denom;
+ vector<double> frac;
+
+ //calculate fraction of count over all mappabile reads
+ denom = 0.0;
+ frac.assign(M + 1, 0.0);
+ for (int i = 1; i <= M; i++)
+ if (eel[i] >= EPSILON) {
+ frac[i] = theta[i];
+ denom += frac[i];
+ }
+ general_assert(denom > 0, "No alignable reads?!");
+ for (int i = 1; i <= M; i++) frac[i] /= denom;
+
+ //calculate FPKM
+ fpkm.assign(M + 1, 0.0);
+ for (int i = 1; i <= M; i++)
+ if (eel[i] >= EPSILON) fpkm[i] = frac[i] * 1e9 / eel[i];
+
+ //calculate TPM
+ tpm.assign(M + 1, 0.0);
+ denom = 0.0;
+ for (int i = 1; i <= M; i++) denom += fpkm[i];
+ for (int i = 1; i <= M; i++) tpm[i] = fpkm[i] / denom * 1e6;
+}
+
void* Gibbs(void* arg) {
int CHAINLEN;
HIT_INT_TYPE len, fr, to;
Params *params = (Params*)arg;
- vector<double> theta;
+ vector<double> theta, tpm, fpkm;
vector<int> z, counts;
vector<double> arr;
if (ROUND > BURNIN) {
if ((ROUND - BURNIN - 1) % GAP == 0) {
writeCountVector(params->fo, counts);
+ for (int i = 0; i <= M; i++) theta[i] = counts[i] / totc;
+ polishTheta(theta, eel, mw);
+ calcExpressionValues(theta, eel, tpm, fpkm);
for (int i = 0; i <= M; i++) {
params->pme_c[i] += counts[i] - 1;
params->pve_c[i] += (counts[i] - 1) * (counts[i] - 1);
- params->pme_theta[i] += counts[i] / totc;
+ params->pme_tpm[i] += tpm[i];
+ params->pme_fpkm[i] += fpkm[i];
}
}
}
pme_c.assign(M + 1, 0);
pve_c.assign(M + 1, 0);
- pme_theta.assign(M + 1, 0);
+ pme_tpm.assign(M + 1, 0);
+ pme_fpkm.assign(M + 1, 0);
for (int i = 0; i < nThreads; i++) {
fclose(paramsArray[i].fo);
delete paramsArray[i].engine;
for (int j = 0; j <= M; j++) {
pme_c[j] += paramsArray[i].pme_c[j];
pve_c[j] += paramsArray[i].pve_c[j];
- pme_theta[j] += paramsArray[i].pme_theta[j];
+ pme_tpm[j] += paramsArray[i].pme_tpm[j];
+ pme_fpkm[j] += paramsArray[i].pme_fpkm[j];
}
delete[] paramsArray[i].pme_c;
delete[] paramsArray[i].pve_c;
- delete[] paramsArray[i].pme_theta;
+ delete[] paramsArray[i].pme_tpm;
+ delete[] paramsArray[i].pme_fpkm;
}
delete[] paramsArray;
for (int i = 0; i <= M; i++) {
pme_c[i] /= NSAMPLES;
pve_c[i] = (pve_c[i] - NSAMPLES * pme_c[i] * pme_c[i]) / (NSAMPLES - 1);
- pme_theta[i] /= NSAMPLES;
- }
-}
-
-template<class ModelType>
-void calcExpectedEffectiveLengths(ModelType& model) {
- int lb, ub, span;
- double *pdf = NULL, *cdf = NULL, *clen = NULL; // clen[i] = \sigma_{j=1}^{i}pdf[i]*(lb+i)
-
- model.getGLD().copyTo(pdf, cdf, lb, ub, span);
- clen = new double[span + 1];
- clen[0] = 0.0;
- for (int i = 1; i <= span; i++) {
- clen[i] = clen[i - 1] + pdf[i] * (lb + i);
- }
-
- eel.assign(M + 1, 0.0);
- for (int i = 1; i <= M; i++) {
- int totLen = refs.getRef(i).getTotLen();
- int fullLen = refs.getRef(i).getFullLen();
- int pos1 = max(min(totLen - fullLen + 1, ub) - lb, 0);
- int pos2 = max(min(totLen, ub) - lb, 0);
-
- if (pos2 == 0) { eel[i] = 0.0; continue; }
-
- eel[i] = fullLen * cdf[pos1] + ((cdf[pos2] - cdf[pos1]) * (totLen + 1) - (clen[pos2] - clen[pos1]));
- assert(eel[i] >= 0);
- if (eel[i] < MINEEL) { eel[i] = 0.0; }
+ pme_tpm[i] /= NSAMPLES;
+ pme_fpkm[i] /= NSAMPLES;
}
-
- delete[] pdf;
- delete[] cdf;
- delete[] clen;
}
-template<class ModelType>
-void writeEstimatedParameters(char* modelF, char* imdName) {
- ModelType model;
- double denom;
+void writeResults(char* imdName) {
char outF[STRLEN];
FILE *fo;
- model.read(modelF);
-
- calcExpectedEffectiveLengths<ModelType>(model);
-
- denom = pme_theta[0];
- for (int i = 1; i <= M; i++)
- if (eel[i] < EPSILON) pme_theta[i] = 0.0;
- else denom += pme_theta[i];
-
- general_assert(denom >= EPSILON, "No Expected Effective Length is no less than " + ftos(MINEEL, 6) + "?!");
-
- for (int i = 0; i <= M; i++) pme_theta[i] /= denom;
-
- denom = 0.0;
- double *mw = model.getMW();
- for (int i = 0; i <= M; i++) {
- pme_theta[i] = (mw[i] < EPSILON ? 0.0 : pme_theta[i] / mw[i]);
- denom += pme_theta[i];
- }
- assert(denom >= EPSILON);
- for (int i = 0; i <= M; i++) pme_theta[i] /= denom;
+ vector<double> isopct;
+ vector<double> gene_counts, gene_tpm, gene_fpkm;
- //calculate tau values
- double *tau = new double[M + 1];
- memset(tau, 0, sizeof(double) * (M + 1));
+ //calculate IsoPct, etc.
+ isopct.assign(M + 1, 0.0);
+ gene_counts.assign(m, 0.0); gene_tpm.assign(m, 0.0); gene_fpkm.assign(m, 0.0);
- denom = 0.0;
- for (int i = 1; i <= M; i++)
- if (eel[i] > EPSILON) {
- tau[i] = pme_theta[i] / eel[i];
- denom += tau[i];
- }
-
- general_assert(denom >= EPSILON, "No alignable reads?!");
-
- for (int i = 1; i <= M; i++) {
- tau[i] /= denom;
+ for (int i = 0; i < m; i++) {
+ int b = gi.spAt(i), e = gi.spAt(i + 1);
+ for (int j = b; j < e; j++) {
+ gene_counts[i] += pme_c[j];
+ gene_tpm[i] += pme_tpm[j];
+ gene_fpkm[i] += pme_fpkm[j];
+ }
+ if (gene_tpm[i] < EPSILON) continue;
+ for (int j = b; j < e; j++)
+ isopct[j] = pme_tpm[j] / gene_tpm[i];
}
//isoform level results
for (int i = 1; i <= M; i++)
fprintf(fo, "%.2f%c", pme_c[i], (i < M ? '\t' : '\n'));
for (int i = 1; i <= M; i++)
- fprintf(fo, "%.15g%c", tau[i], (i < M ? '\t' : '\n'));
-
+ fprintf(fo, "%.2f%c", pme_tpm[i], (i < M ? '\t' : '\n'));
+ for (int i = 1; i <= M; i++)
+ fprintf(fo, "%.2f%c", pme_fpkm[i], (i < M ? '\t' : '\n'));
+ for (int i = 1; i <= M; i++)
+ fprintf(fo, "%.2f%c", isopct[i] * 1e2, (i < M ? '\t' : '\n'));
fclose(fo);
//gene level results
fo = fopen(outF, "a");
general_assert(fo != NULL, "Cannot open " + cstrtos(outF) + "!");
- for (int i = 0; i < m; i++) {
- double sumC = 0.0; // sum of pme counts
- int b = gi.spAt(i), e = gi.spAt(i + 1);
- for (int j = b; j < e; j++) {
- sumC += pme_c[j];
- }
- fprintf(fo, "%.15g%c", sumC, (i < m - 1 ? '\t' : '\n'));
- }
- for (int i = 0; i < m; i++) {
- double sumT = 0.0; // sum of tau values
- int b = gi.spAt(i), e = gi.spAt(i + 1);
- for (int j = b; j < e; j++) {
- sumT += tau[j];
- }
- fprintf(fo, "%.15g%c", sumT, (i < m - 1 ? '\t' : '\n'));
- }
+ for (int i = 0; i < m; i++)
+ fprintf(fo, "%.2f%c", gene_counts[i], (i < m - 1 ? '\t' : '\n'));
+ for (int i = 0; i < m; i++)
+ fprintf(fo, "%.2f%c", gene_tpm[i], (i < m - 1 ? '\t' : '\n'));
+ for (int i = 0; i < m; i++)
+ fprintf(fo, "%.2f%c", gene_fpkm[i], (i < m - 1 ? '\t' : '\n'));
fclose(fo);
- delete[] tau;
-
if (verbose) { printf("Gibbs based expression values are written!\n"); }
}
NSAMPLES = atoi(argv[5]);
GAP = atoi(argv[6]);
- load_data(argv[1], statName, imdName);
-
nThreads = 1;
var_opt = false;
quiet = false;
printf("Warning: Number of samples is less than number of threads! Change the number of threads to %d!\n", nThreads);
}
+ load_data(argv[1], statName, imdName);
+
+ sprintf(modelF, "%s.model", statName);
+ FILE *fi = fopen(modelF, "r");
+ general_assert(fi != NULL, "Cannot open " + cstrtos(modelF) + "!");
+ assert(fscanf(fi, "%d", &model_type) == 1);
+ fclose(fi);
+
+ mw = new double[M + 1]; // make an extra copy
+
+ switch(model_type) {
+ case 0 : init_model_related<SingleModel>(modelF); break;
+ case 1 : init_model_related<SingleQModel>(modelF); break;
+ case 2 : init_model_related<PairedEndModel>(modelF); break;
+ case 3 : init_model_related<PairedEndQModel>(modelF); break;
+ }
+
if (verbose) printf("Gibbs started!\n");
init();
release();
if (verbose) printf("Gibbs finished!\n");
-
- sprintf(modelF, "%s.model", statName);
- FILE *fi = fopen(modelF, "r");
- general_assert(fi != NULL, "Cannot open " + cstrtos(modelF) + "!");
- assert(fscanf(fi, "%d", &model_type) == 1);
- fclose(fi);
-
- switch(model_type) {
- case 0 : writeEstimatedParameters<SingleModel>(modelF, imdName); break;
- case 1 : writeEstimatedParameters<SingleQModel>(modelF, imdName); break;
- case 2 : writeEstimatedParameters<PairedEndModel>(modelF, imdName); break;
- case 3 : writeEstimatedParameters<PairedEndQModel>(modelF, imdName); break;
- }
+
+ writeResults(imdName);
if (var_opt) {
char varF[STRLEN];
fclose(fo);
}
+ delete mw; // delete the copy
+
return 0;
}
#include<algorithm>
#include<sstream>
#include<iostream>
+#include<vector>
#include "utils.h"
#include "my_assert.h"
const LenDist& getGLD() { return *gld; }
- void startSimulation(simul*, double*);
+ void startSimulation(simul*, const std::vector<double>&);
bool simulate(READ_INT_TYPE, PairedEndRead&, int&);
void finishSimulation();
//Use it after function 'read' or 'estimateFromReads'
- double* getMW() {
+ const double* getMW() {
assert(mw != NULL);
return mw;
}
fclose(fo);
}
-void PairedEndModel::startSimulation(simul* sampler, double* theta) {
+void PairedEndModel::startSimulation(simul* sampler, const std::vector<double>& theta) {
this->sampler = sampler;
theta_cdf = new double[M + 1];
#include<algorithm>
#include<sstream>
#include<iostream>
+#include<vector>
#include "utils.h"
#include "my_assert.h"
const LenDist& getGLD() { return *gld; }
- void startSimulation(simul*, double*);
+ void startSimulation(simul*, const std::vector<double>&);
bool simulate(READ_INT_TYPE, PairedEndReadQ&, int&);
void finishSimulation();
//Use it after function 'read' or 'estimateFromReads'
- double* getMW() {
+ const double* getMW() {
assert(mw != NULL);
return mw;
}
fclose(fo);
}
-void PairedEndQModel::startSimulation(simul* sampler, double* theta) {
+void PairedEndQModel::startSimulation(simul* sampler, const std::vector<double>& theta) {
this->sampler = sampler;
theta_cdf = new double[M + 1];
return (bam_aux2i(p) > 0 ? 2 : 0);
}
-
//For paired-end reads, do not print out type 2 reads
inline int SamParser::getReadType(const bam1_t* b, const bam1_t* b2) {
- if ((b->core.flag & 0x0002) && (b2->core.flag & 0x0002)) return 1;
+ if (!(b->core.flag & 0x0004) && !(b2->core.flag & 0x0004)) return 1;
+
+ if (!strcmp(rtTag, "")) return 0;
+
+ uint8_t *p = bam_aux_get(b, rtTag);
+ if (p != NULL && bam_aux2i(p) > 0) return 2;
+
+ p = bam_aux_get(b2, rtTag);
+ if (p != NULL && bam_aux2i(p) > 0) return 2;
return 0;
}
#include<algorithm>
#include<sstream>
#include<iostream>
+#include<vector>
#include "utils.h"
#include "my_assert.h"
const LenDist& getGLD() { return *gld; }
- void startSimulation(simul*, double*);
+ void startSimulation(simul*, const std::vector<double>&);
bool simulate(READ_INT_TYPE, SingleRead&, int&);
void finishSimulation();
- double* getMW() {
+ const double* getMW() {
assert(mw != NULL);
return mw;
}
fclose(fo);
}
-void SingleModel::startSimulation(simul* sampler, double* theta) {
+void SingleModel::startSimulation(simul* sampler, const std::vector<double>& theta) {
this->sampler = sampler;
theta_cdf = new double[M + 1];
#include<algorithm>
#include<sstream>
#include<iostream>
+#include<vector>
#include "utils.h"
#include "my_assert.h"
const LenDist& getGLD() { return *gld; }
- void startSimulation(simul*, double*);
+ void startSimulation(simul*, const std::vector<double>&);
bool simulate(READ_INT_TYPE, SingleReadQ&, int&);
void finishSimulation();
//Use it after function 'read' or 'estimateFromReads'
- double* getMW() {
+ const double* getMW() {
assert(mw != NULL);
return mw;
}
fclose(fo);
}
-void SingleQModel::startSimulation(simul* sampler, double* theta) {
+void SingleQModel::startSimulation(simul* sampler, const std::vector<double>& theta) {
this->sampler = sampler;
theta_cdf = new double[M + 1];
#include "GroupInfo.h"
#include "Buffer.h"
+
using namespace std;
struct Params {
int no;
FILE *fi;
engine_type *engine;
- double *mw;
+ const double *mw;
};
struct CIType {
double confidence;
int nCV, nSpC, nSamples; // nCV: number of count vectors; nSpC: number of theta vectors sampled per count vector; nSamples: nCV * nSpC
int nThreads;
-int cvlen;
+
+float *l_bars;
char cvsF[STRLEN], tmpF[STRLEN], command[STRLEN];
-CIType *iso_tau, *gene_tau;
+CIType *iso_tpm, *gene_tpm, *iso_fpkm, *gene_fpkm;
int M, m;
Refs refs;
}
void* sample_theta_from_c(void* arg) {
-
int *cvec;
double *theta;
+ float *tpm;
gamma_dist **gammas;
gamma_generator **rgs;
Params *params = (Params*)arg;
FILE *fi = params->fi;
- double *mw = params->mw;
+ const double *mw = params->mw;
- cvec = new int[cvlen];
- theta = new double[cvlen];
- gammas = new gamma_dist*[cvlen];
- rgs = new gamma_generator*[cvlen];
-
- float **vecs = new float*[nSpC];
- for (int i = 0; i < nSpC; i++) vecs[i] = new float[cvlen];
+ cvec = new int[M + 1];
+ theta = new double[M + 1];
+ gammas = new gamma_dist*[M + 1];
+ rgs = new gamma_generator*[M + 1];
+ tpm = new float[M + 1];
+ float l_bar; // the mean transcript length over the sample
int cnt = 0;
while (fscanf(fi, "%d", &cvec[0]) == 1) {
- for (int j = 1; j < cvlen; j++) assert(fscanf(fi, "%d", &cvec[j]) == 1);
+ for (int j = 1; j <= M; j++) assert(fscanf(fi, "%d", &cvec[j]) == 1);
++cnt;
- for (int j = 0; j < cvlen; j++) {
+ for (int j = 0; j <= M; j++) {
gammas[j] = new gamma_dist(cvec[j]);
rgs[j] = new gamma_generator(*(params->engine), *gammas[j]);
}
for (int i = 0; i < nSpC; i++) {
double sum = 0.0;
- for (int j = 0; j < cvlen; j++) {
- theta[j] = ((j == 0 || eel[j] >= EPSILON) ? (*rgs[j])() : 0.0);
+ for (int j = 0; j <= M; j++) {
+ theta[j] = ((j == 0 || eel[j] >= EPSILON && mw[j] >= EPSILON) ? (*rgs[j])() / mw[j] : 0.0);
sum += theta[j];
}
assert(sum >= EPSILON);
- for (int j = 0; j < cvlen; j++) theta[j] /= sum;
+ for (int j = 0; j <= M; j++) theta[j] /= sum;
sum = 0.0;
- for (int j = 0; j < cvlen; j++) {
- theta[j] = (mw[j] < EPSILON ? 0.0 : theta[j] / mw[j]);
- sum += theta[j];
- }
- assert(sum >= EPSILON);
- for (int j = 0; j < cvlen; j++) theta[j] /= sum;
-
-
- sum = 0.0;
- vecs[i][0] = theta[0];
- for (int j = 1; j < cvlen; j++)
+ tpm[0] = 0.0;
+ for (int j = 1; j <= M; j++)
if (eel[j] >= EPSILON) {
- vecs[i][j] = theta[j] / eel[j];
- sum += vecs[i][j];
+ tpm[j] = theta[j] / eel[j];
+ sum += tpm[j];
}
else assert(theta[j] < EPSILON);
-
assert(sum >= EPSILON);
- for (int j = 1; j < cvlen; j++) vecs[i][j] /= sum;
+ l_bar = 0.0; // store mean effective length of the sample
+ for (int j = 1; j <= M; j++) { tpm[j] /= sum; l_bar += tpm[j] * eel[j]; tpm[j] *= 1e6; }
+ buffer->write(l_bar, tpm + 1); // ommit the first element in tpm
}
- buffer->write(nSpC, vecs);
-
- for (int j = 0; j < cvlen; j++) {
+ for (int j = 0; j <= M; j++) {
delete gammas[j];
delete rgs[j];
}
delete[] theta;
delete[] gammas;
delete[] rgs;
-
- for (int i = 0; i < nSpC; i++) delete[] vecs[i];
- delete[] vecs;
+ delete[] tpm;
return NULL;
}
model.read(modelF);
calcExpectedEffectiveLengths<ModelType>(model);
-
int num_threads = min(nThreads, nCV);
- buffer = new Buffer(nMB, nSamples, cvlen, tmpF);
+ buffer = new Buffer(nMB, nSamples, M, l_bars, tmpF);
paramsArray = new Params[num_threads];
threads = new pthread_t[num_threads];
}
void* calcCI_batch(void* arg) {
- float *itsamples, *gtsamples;
+ float *itsamples, *gtsamples, *ifsamples, *gfsamples;
ifstream fin;
CIParams *ciParams = (CIParams*)arg;
itsamples = new float[nSamples];
gtsamples = new float[nSamples];
+ ifsamples = new float[nSamples];
+ gfsamples = new float[nSamples];
fin.open(tmpF, ios::binary);
- streampos pos = streampos(gi.spAt(ciParams->start_gene_id)) * nSamples * FLOATSIZE;
+ // minus 1 here for that theta0 is not written!
+ streampos pos = streampos(gi.spAt(ciParams->start_gene_id) - 1) * nSamples * FLOATSIZE;
fin.seekg(pos, ios::beg);
int cnt = 0;
for (int i = ciParams->start_gene_id; i < ciParams->end_gene_id; i++) {
int b = gi.spAt(i), e = gi.spAt(i + 1);
memset(gtsamples, 0, FLOATSIZE * nSamples);
+ memset(gfsamples, 0, FLOATSIZE * nSamples);
for (int j = b; j < e; j++) {
for (int k = 0; k < nSamples; k++) {
fin.read((char*)(&itsamples[k]), FLOATSIZE);
gtsamples[k] += itsamples[k];
+ ifsamples[k] = 1e3 / l_bars[k] * itsamples[k];
+ gfsamples[k] += ifsamples[k];
}
- calcCI(nSamples, itsamples, iso_tau[j].lb, iso_tau[j].ub);
+ calcCI(nSamples, itsamples, iso_tpm[j].lb, iso_tpm[j].ub);
+ calcCI(nSamples, ifsamples, iso_fpkm[j].lb, iso_fpkm[j].ub);
+ }
+
+ if (e - b > 1) {
+ calcCI(nSamples, gtsamples, gene_tpm[i].lb, gene_tpm[i].ub);
+ calcCI(nSamples, gfsamples, gene_fpkm[i].lb, gene_fpkm[i].ub);
+ }
+ else {
+ gene_tpm[i].lb = iso_tpm[b].lb; gene_tpm[i].ub = iso_tpm[b].ub;
+ gene_fpkm[i].lb = iso_fpkm[b].lb; gene_fpkm[i].ub = iso_fpkm[b].ub;
}
- calcCI(nSamples, gtsamples, gene_tau[i].lb, gene_tau[i].ub);
++cnt;
if (verbose && cnt % 1000 == 0) { printf("In thread %d, %d genes are processed for CI calculation!\n", ciParams->no, cnt); }
char outF[STRLEN];
int num_threads = nThreads;
- iso_tau = new CIType[M + 1];
- gene_tau = new CIType[m];
+ iso_tpm = new CIType[M + 1];
+ gene_tpm = new CIType[m];
+ iso_fpkm = new CIType[M + 1];
+ gene_fpkm = new CIType[m];
assert(M > 0);
int quotient = M / num_threads;
sprintf(outF, "%s.iso_res", imdName);
fo = fopen(outF, "a");
for (int i = 1; i <= M; i++)
- fprintf(fo, "%.6g%c", iso_tau[i].lb, (i < M ? '\t' : '\n'));
+ fprintf(fo, "%.6g%c", iso_tpm[i].lb, (i < M ? '\t' : '\n'));
+ for (int i = 1; i <= M; i++)
+ fprintf(fo, "%.6g%c", iso_tpm[i].ub, (i < M ? '\t' : '\n'));
+ for (int i = 1; i <= M; i++)
+ fprintf(fo, "%.6g%c", iso_fpkm[i].lb, (i < M ? '\t' : '\n'));
for (int i = 1; i <= M; i++)
- fprintf(fo, "%.6g%c", iso_tau[i].ub, (i < M ? '\t' : '\n'));
+ fprintf(fo, "%.6g%c", iso_fpkm[i].ub, (i < M ? '\t' : '\n'));
fclose(fo);
//gene level results
sprintf(outF, "%s.gene_res", imdName);
fo = fopen(outF, "a");
for (int i = 0; i < m; i++)
- fprintf(fo, "%.6g%c", gene_tau[i].lb, (i < m - 1 ? '\t' : '\n'));
+ fprintf(fo, "%.6g%c", gene_tpm[i].lb, (i < m - 1 ? '\t' : '\n'));
+ for (int i = 0; i < m; i++)
+ fprintf(fo, "%.6g%c", gene_tpm[i].ub, (i < m - 1 ? '\t' : '\n'));
+ for (int i = 0; i < m; i++)
+ fprintf(fo, "%.6g%c", gene_fpkm[i].lb, (i < m - 1 ? '\t' : '\n'));
for (int i = 0; i < m; i++)
- fprintf(fo, "%.6g%c", gene_tau[i].ub, (i < m - 1 ? '\t' : '\n'));
+ fprintf(fo, "%.6g%c", gene_fpkm[i].ub, (i < m - 1 ? '\t' : '\n'));
fclose(fo);
- delete[] iso_tau;
- delete[] gene_tau;
+ delete[] iso_tpm;
+ delete[] gene_tpm;
+ delete[] iso_fpkm;
+ delete[] gene_fpkm;
if (verbose) { printf("All credibility intervals are calculated!\n"); }
}
m = gi.getm();
nSamples = nCV * nSpC;
- cvlen = M + 1;
- assert(nSamples > 0 && cvlen > 1); // for Buffter.h: (bufsize_type)nSamples
+ assert(nSamples > 0 && M > 0); // for Buffter.h: (bufsize_type)nSamples
+ l_bars = new float[nSamples];
sprintf(tmpF, "%s.tmp", imdName);
sprintf(cvsF, "%s.countvectors", imdName);
// Phase II
calculate_credibility_intervals(imdName);
+ delete l_bars;
/*
sprintf(command, "rm -f %s", tmpF);
int status = system(command);
+++ /dev/null
-#include<cstdio>
-#include<cctype>
-#include<cstring>
-#include<cstdlib>
-#include<cassert>
-#include<fstream>
-#include<iomanip>
-#include<string>
-#include<vector>
-#include<algorithm>
-using namespace std;
-
-typedef unsigned int INTEGER;
-
-const int STRLEN = 1005;
-
-INTEGER M;
-int k; // k-mer size
-vector<string> names;
-vector<string> seqs;
-vector<INTEGER> effL;
-
-// tid starts from 1
-struct ReadType {
- INTEGER tid, pos;
-
- ReadType(INTEGER tid, INTEGER pos) {
- this->tid = tid;
- this->pos = pos;
- }
-
- bool operator< (const ReadType& o) const {
- string& a = seqs[tid];
- string& b = seqs[o.tid];
- for (int i = 0; i < k; i++) {
- if (a[pos + i] != b[o.pos + i]) {
- return a[pos + i] < b[o.pos + i];
- }
- }
- return tid < o.tid;
- }
-
- bool seq_equal(const ReadType& o) const {
- string& a = seqs[tid];
- string& b = seqs[o.tid];
- for (int i = 0; i < k; i++)
- if (a[pos + i] != b[o.pos + i]) return false;
- return true;
- }
-};
-
-vector<ReadType> cands;
-vector<double> clusteringInfo;
-
-string convert(const string& rawseq) {
- int size = (int)rawseq.size();
- string seq = rawseq;
- for (int i = 0; i < size; i++) {
- seq[i] = toupper(rawseq[i]);
- if (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G' && seq[i] != 'T') seq[i] = 'N';
- }
- return seq;
-}
-
-void loadRef(char* inpF) {
- ifstream fin(inpF);
- string tag, line, rawseq;
- void *pt;
-
- assert(fin.is_open());
-
- names.clear(); names.push_back("");
- seqs.clear(); seqs.push_back("");
-
- pt = getline(fin, line);
- while (pt != 0 && line[0] == '>') {
- tag = line.substr(1);
- rawseq = "";
- while((pt = getline(fin, line)) && line[0] != '>') {
- rawseq += line;
- }
- if (rawseq.size() <= 0) {
- printf("Warning: Fasta entry %s has an empty sequence! It is omitted!\n", tag.c_str());
- continue;
- }
- names.push_back(tag);
- seqs.push_back(convert(rawseq));
- }
-
- fin.close();
-
- M = names.size() - 1;
-
- printf("The reference is loaded.\n");
-}
-
-int main(int argc, char* argv[]) {
- if (argc != 4) {
- printf("Usage: rsem-for-ebseq-calculate-clustering-info k input_reference_fasta_file output_file\n");
- exit(-1);
- }
-
- k = atoi(argv[1]);
- loadRef(argv[2]);
-
- cands.clear();
- effL.assign(M + 1, 0);
- for (INTEGER i = 1; i <= M; i++) {
- effL[i] = seqs[i].length() - k + 1;
- if (effL[i] <= 0) effL[i] = 0; // effL should be non-negative
- for (INTEGER j = 0; j < effL[i]; j++)
- cands.push_back(ReadType(i, j));
- }
- printf("All possbile %d mers are generated.\n", k);
-
- sort(cands.begin(), cands.end());
- printf("All %d mers are sorted.\n", k);
-
- size_t p = 0;
- clusteringInfo.assign(M + 1, 0.0);
-
- for (size_t i = 1; i <= cands.size(); i++)
- if (i == cands.size() || !cands[p].seq_equal(cands[i])) {
- size_t denominator = i - p;
- size_t q = p;
- for (size_t j = p + 1; j <= i; j++)
- if (j == i || cands[q].tid != cands[j].tid) {
- size_t numerator = j - q;
- //double prob = numerator * 1.0 / denominator;
- //clusteringInfo[cands[q].tid] += (double)numerator * prob * (1.0 - prob);
- if (numerator < denominator) clusteringInfo[cands[q].tid] += numerator;
- q = j;
- }
- p = i;
- }
-
- for (INTEGER i = 1; i <= M; i++)
- if (effL[i] == 0) clusteringInfo[i] = -1.0;
- else clusteringInfo[i] /= effL[i];
-
- printf("Clustering information is calculated.\n");
-
-
- ofstream fout(argv[3]);
- for (INTEGER i = 1; i <= M; i++) fout<<names[i]<<"\t"<<setprecision(6)<<clusteringInfo[i]<<endl;
- fout.close();
-
- return 0;
-}
CC = g++
CFLAGS = -Wall -c -I.
COFLAGS = -Wall -O3 -ffast-math -c -I.
-PROGRAMS = rsem-extract-reference-transcripts rsem-synthesis-reference-transcripts rsem-preref rsem-parse-alignments rsem-build-read-index rsem-run-em rsem-tbam2gbam rsem-run-gibbs rsem-calculate-credibility-intervals rsem-simulate-reads rsem-bam2wig rsem-get-unique rsem-bam2readdepth rsem-sam-validator rsem-scan-for-paired-end-reads rsem-for-ebseq-calculate-clustering-info
-
+PROGRAMS = rsem-extract-reference-transcripts rsem-synthesis-reference-transcripts rsem-preref rsem-parse-alignments rsem-build-read-index rsem-run-em rsem-tbam2gbam rsem-run-gibbs rsem-calculate-credibility-intervals rsem-simulate-reads rsem-bam2wig rsem-get-unique rsem-bam2readdepth rsem-sam-validator rsem-scan-for-paired-end-reads
all : $(PROGRAMS)
rsem-scan-for-paired-end-reads : sam/bam.h sam/sam.h my_assert.h scanForPairedEndReads.cpp sam/libbam.a
$(CC) -O3 -Wall scanForPairedEndReads.cpp sam/libbam.a -lz -o $@
-rsem-for-ebseq-calculate-clustering-info : calcClusteringInfo.cpp
- $(CC) -O3 -Wall calcClusteringInfo.cpp -o $@
-
-clean:
+clean :
rm -f *.o *~ $(PROGRAMS)
cd sam ; ${MAKE} clean
-
+ cd EBSeq ; ${MAKE} clean
my $read_type = 1; # default, single end with qual
+my @transcript_title = ("transcript_id", "gene_id", "length", "effective_length", "expected_count", "TPM", "FPKM", "IsoPct", "pme_expected_count", "pme_TPM", "pme_FPKM", "IsoPct_from_pme_TPM", "TPM_ci_lower_bound", "TPM_ci_upper_bound", "FPKM_ci_lower_bound", "FPKM_ci_upper_bound");
+
+my @gene_title = ("gene_id", "transcript_id(s)", "length", "effective_length", "expected_count", "TPM", "FPKM", "pme_expected_count", "pme_TPM", "pme_FPKM", "TPM_ci_lower_bound", "TPM_ci_upper_bound", "FPKM_ci_lower_bound", "FPKM_ci_upper_bound");
+
my $bowtie_path = "";
my $C = 2;
my $E = 99999999;
&runCommand($command);
-&collectResults("$imdName.iso_res", "$sampleName.isoforms.results"); # isoform level
-&collectResults("$imdName.gene_res", "$sampleName.genes.results"); # gene level
+&collectResults("isoform", "$imdName.iso_res", "$sampleName.isoforms.results"); # isoform level
+&collectResults("gene", "$imdName.gene_res", "$sampleName.genes.results"); # gene level
if ($genBamF) {
$command = $dir."sam/samtools sort $sampleName.transcript.bam $sampleName.transcript.sorted";
if ($calcCI) {
system("mv $sampleName.isoforms.results $imdName.isoforms.results.bak1");
system("mv $sampleName.genes.results $imdName.genes.results.bak1");
- &collectResults("$imdName.iso_res", "$sampleName.isoforms.results"); # isoform level
- &collectResults("$imdName.gene_res", "$sampleName.genes.results"); # gene level
+ &collectResults("isoform", "$imdName.iso_res", "$sampleName.isoforms.results"); # isoform level
+ &collectResults("gene", "$imdName.gene_res", "$sampleName.genes.results"); # gene level
$command = $dir."rsem-calculate-credibility-intervals $refName $imdName $statName $CONFIDENCE $NCV $NSPC $NMB";
$command .= " -p $nThreads";
system("mv $sampleName.isoforms.results $imdName.isoforms.results.bak2");
system("mv $sampleName.genes.results $imdName.genes.results.bak2");
- &collectResults("$imdName.iso_res", "$sampleName.isoforms.results"); # isoform level
- &collectResults("$imdName.gene_res", "$sampleName.genes.results"); # gene level
+ &collectResults("isoform", "$imdName.iso_res", "$sampleName.isoforms.results"); # isoform level
+ &collectResults("gene", "$imdName.gene_res", "$sampleName.genes.results"); # gene level
}
if ($mTime) { $time_end = time(); $time_ci = $time_end - $time_start; }
}
print "\n";
}
-
+
# inpF, outF
sub collectResults {
my $local_status;
my ($inpF, $outF);
- my (@results, @ids) = ();
+ my @results = ();
my $line;
- my $cnt;
- $inpF = $_[0];
- $outF = $_[1];
+ $inpF = $_[1];
+ $outF = $_[2];
$local_status = open(INPUT, $inpF);
if ($local_status == 0) { print "Fail to open file $inpF!\n"; exit(-1); }
- $cnt = 0;
@results = ();
while ($line = <INPUT>) {
- ++$cnt;
chomp($line);
my @local_arr = split(/\t/, $line);
- if ($cnt == 4) { @ids = @local_arr; }
- else { push(@results, \@local_arr); }
+ push(@results, \@local_arr);
}
-
- push(@results, \@ids);
+
close(INPUT);
$local_status = open(OUTPUT, ">$outF");
my $n = scalar(@results);
my $m = scalar(@{$results[0]});
+
+ $" = "\t";
+
+ my @out_arr = ();
+ for (my $i = 0; $i < $n; $i++) {
+ if ($_[0] eq "isoform") { push(@out_arr, $transcript_title[$i]); }
+ elsif ($_[0] eq "gene") { push(@out_arr, $gene_title[$i]); }
+ else { print "A bug on 'collectResults' is detected!\n"; exit(-1); }
+ }
+ print OUTPUT "@out_arr\n";
+
for (my $i = 0; $i < $m; $i++) {
- my @out_arr = ();
+ @out_arr = ();
for (my $j = 0; $j < $n; $j++) { push(@out_arr, $results[$j][$i]); }
- $" = "\t";
print OUTPUT "@out_arr\n";
}
+
close(OUTPUT);
}
-
__END__
=head1 NAME
=over
-=item B<sample_name.genes.results>
+=item B<sample_name.isoforms.results>
-File containing gene level expression estimates. The format of each
-line in this file is:
+File containing isoform level expression estimates. The first line
+contains column names separated by the tab character. The format of
+each line in the rest of this file is:
-gene_id expected_counts tau_value [pmc_value tau_pme_value tau_ci_lower_bound tau_ci_upper_bound] transcript_id_list
+transcript_id gene_id length effective_length expected_count TPM FPKM IsoPct [pme_expected_count pme_TPM pme_FPKM IsoPct_from_pme_TPM TPM_ci_lower_bound TPM_ci_upper_bound FPKM_ci_lower_bound FPKM_ci_upper_bound]
Fields are separated by the tab character. Fields within "[]" are only
-presented if '--calc-ci' is set. pme stands for posterior mean
-estimation. pmc stands for posterior mean counts. ci_lower_bound(l)
-means the lower bound of the credibility intervals, ci_upper_bound(u)
-means the upper bound of the credibility intervals. So the credibility
-interval is [l, u]. 'transcript_id_list' is a space-separated list of
-transcript_ids belonging to the gene. If no gene information is
-provided, this file has the same content as
-'sample_name.isoforms.results'.
+presented if '--calc-ci' is set.
-=item B<sample_name.isoforms.results>
+'transcript_id' is the transcript name of this transcript. 'gene_id'
+is the gene name of the gene which this transcript belongs to (denote
+this gene as its parent gene). If no gene information is provided,
+'gene_id' and 'transcript_id' are the same.
+
+'length' is this transcript's sequence length (poly(A) tail is not
+counted). 'effective_length' counts only the positions that can
+generate a valid fragment. If no poly(A) tail is added,
+'effective_length' is equal to transcript length - mean fragment
+length + 1. If one transcript's effective length is less than 1, this
+transcript's both effective length and abundance estimates are set to
+0.
-File containing isoform level expression values. The format of each
-line in this file is:
+'expected_count' is the sum of the posterior probability of each read
+comes from this transcript over all reads. Because 1) each read
+aligning to this transcript has a probability of being generated from
+background noise; 2) RSEM may filter some alignable low quality reads,
+the sum of expected counts for all transcript are generally less than
+the total number of reads aligned.
+
+'TPM' stands for Transcripts Per Million. It is a relative measure of
+transcript abundance. The sum of all transcripts' TPM is 1
+million. 'FPKM' stands for Fragments Per Kilobase of transcript per
+Million mapped reads. It is another relative measure of transcript
+abundance. If we define l_bar be the mean transcript length in a
+sample, which can be calculated as
+
+l_bar = \sum_i TPM_i / 10^6 * effective_length_i (i goes through every transcript),
+
+the following equation is hold:
+
+FPKM_i = 10^3 / l_bar * TPM_i.
+
+We can see that the sum of FPKM is not a constant across samples.
+
+'IsoPct' stands for isoform percentage. It is the percentage of this
+transcript's abandunce over its parent gene's abandunce. If its parent
+gene has only one isoform or the gene information is not provided,
+this field will be set to 100.
+
+'pme_expected_count', 'pme_TPM', 'pme_FPKM' are posterior mean
+estimates calculated by RSEM's Gibbs sampler. 'IsoPct_from_pme_TPM' is
+the isoform percentage calculated from 'pme_TPM' values.
+
+'TPM_ci_lower_bound', 'TPM_ci_upper_bound', 'FPKM_ci_lower_bound' and
+'FPKM_ci_upper_bound' are lower(l) and upper(u) bounds of 95%
+credibility intervals for TPM and FPKM values. The bounds are
+inclusive (i.e. [l, u]).
+
+=item B<sample_name.genes.results>
+
+File containing gene level expression estimates. The first line
+contains column names separated by the tab character. The format of
+each line in the rest of this file is:
+
+gene_id transcript_id(s) length effective_length expected_count TPM FPKM [pme_expected_count pme_TPM pme_FPKM TPM_ci_lower_bound TPM_ci_upper_bound FPKM_ci_lower_bound FPKM_ci_upper_bound]
+
+Fields are separated by the tab character. Fields within "[]" are only
+presented if '--calc-ci' is set.
-transcript_id expected_counts tau_value [pmc_value tau_pme_value tau_ci_lower_bound tau_ci_upper_bound] gene_id
+'transcript_id(s)' is a comma-separated list of transcript_ids
+belonging to this gene. If no gene information is provided, 'gene_id'
+and 'transcript_id(s)' are identical (the 'transcript_id').
-Fields are separated by the tab character. 'gene_id' is the gene_id of
-the gene which this transcript belongs to. If no gene information is
-provided, 'gene_id' and 'transcript_id' are the same.
+A gene's 'length' and 'effective_length' are
+defined as the weighted average of its transcripts' lengths and
+effective lengths (weighted by 'IsoPct'). A gene's abundance estimates
+are just the sum of its transcripts' abundance estimates.
=item B<sample_name.transcript.bam, sample_name.transcript.sorted.bam and sample_name.transcript.sorted.bam.bai>
+++ /dev/null
-#!/usr/bin/env Rscript
-
-argv <- commandArgs(TRUE)
-if (length(argv) != 2) {
- cat("Usage: rsem-for-ebseq-generate-ngvector-from-clustering-info input_file output_file\n")
- q(status = 1)
-}
-
-data <- read.table(file = argv[1], stringsAsFactors = F)
-idx <- data[,2] >= 0
-kmr <- kmeans(data[idx, 2], 3)
-order <- order(kmr$centers)
-
-ngvec <- rep(0, length(idx))
-ngvec[idx] <- order[kmr$cluster]
-ngvec[!idx] <- 3
-
-write.table(ngvec, file = argv[2], row.names = F, col.names = F)
exit(-1);
}
+my $offsite = 4; # for new file formats
+
my $line;
my $n = scalar(@ARGV);
my $M = -1;
while ($line = <INPUT>) {
chomp($line);
my @fields = split(/\t/, $line);
- push(@sample, $fields[1]);
+ push(@sample, $fields[$offsite]);
}
close(INPUT);
if (scalar(@sample) == 0) {
my ($fn, $dir, $suf) = fileparse($0);
my $command = "";
-$command = $dir."rsem-for-ebseq-calculate-clustering-info $k $ARGV[0] $ARGV[1].ump";
+$command = $dir."EBSeq/rsem-for-ebseq-calculate-clustering-info $k $ARGV[0] $ARGV[1].ump";
&runCommand($command);
-$command = $dir."rsem-for-ebseq-generate-ngvector-from-clustering-info $ARGV[1].ump $ARGV[1].ngvec";
+$command = $dir."EBSeq/rsem-for-ebseq-generate-ngvector-from-clustering-info $ARGV[1].ump $ARGV[1].ngvec";
&runCommand($command);
# command, {err_msg}
#include<vector>
#include "utils.h"
-
+#include "my_assert.h"
#include "Read.h"
#include "SingleRead.h"
#include "SingleReadQ.h"
using namespace std;
+const int OFFSITE = 5;
+
READ_INT_TYPE N;
int model_type, M, m;
GroupInfo gi;
Transcripts transcripts;
-double *theta, *counts;
vector<double> eel;
+vector<double> theta, counts;
int n_os;
ostream *os[2];
template<class ModelType>
void calcExpectedEffectiveLengths(ModelType& model) {
- int lb, ub, span;
- double *pdf = NULL, *cdf = NULL, *clen = NULL; // clen[i] = sigma_{j=1}^{i}pdf[i]*(lb+i)
+ int lb, ub, span;
+ double *pdf = NULL, *cdf = NULL, *clen = NULL; // clen[i] = sigma_{j=1}^{i}pdf[i]*(lb+i)
- model.getGLD().copyTo(pdf, cdf, lb, ub, span);
- clen = new double[span + 1];
- clen[0] = 0.0;
- for (int i = 1; i <= span; i++) {
- clen[i] = clen[i - 1] + pdf[i] * (lb + i);
- }
-
- eel.clear();
- eel.resize(M + 1, 0.0);
- for (int i = 1; i <= M; i++) {
- int totLen = refs.getRef(i).getTotLen();
- int fullLen = refs.getRef(i).getFullLen();
- int pos1 = max(min(totLen - fullLen + 1, ub) - lb, 0);
- int pos2 = max(min(totLen, ub) - lb, 0);
-
- if (pos2 == 0) { eel[i] = 0.0; continue; }
+ model.getGLD().copyTo(pdf, cdf, lb, ub, span);
+ clen = new double[span + 1];
+ clen[0] = 0.0;
+ for (int i = 1; i <= span; i++) {
+ clen[i] = clen[i - 1] + pdf[i] * (lb + i);
+ }
+
+ eel.assign(M + 1, 0.0);
+ for (int i = 1; i <= M; i++) {
+ int totLen = refs.getRef(i).getTotLen();
+ int fullLen = refs.getRef(i).getFullLen();
+ int pos1 = max(min(totLen - fullLen + 1, ub) - lb, 0);
+ int pos2 = max(min(totLen, ub) - lb, 0);
+
+ if (pos2 == 0) { eel[i] = 0.0; continue; }
- eel[i] = fullLen * cdf[pos1] + ((cdf[pos2] - cdf[pos1]) * (totLen + 1) - (clen[pos2] - clen[pos1]));
- assert(eel[i] >= 0);
- if (eel[i] < MINEEL) { eel[i] = 0.0; }
- }
+ eel[i] = fullLen * cdf[pos1] + ((cdf[pos2] - cdf[pos1]) * (totLen + 1) - (clen[pos2] - clen[pos1]));
+ assert(eel[i] >= 0);
+ if (eel[i] < MINEEL) { eel[i] = 0.0; }
+ }
- delete[] pdf;
- delete[] cdf;
- delete[] clen;
+ delete[] pdf;
+ delete[] cdf;
+ delete[] clen;
}
template<class ReadType, class ModelType>
//generate theta vector
ifstream fin(resultsF);
string line;
- double tau;
+ double tpm;
double denom = 0.0;
+ getline(fin, line); // read the first line, which is just column names
for (int i = 1; i <= M; i++) {
getline(fin, line);
size_t pos = 0;
- for (int j = 0; j < 2; j++) pos = line.find_first_of('\t', pos) + 1;
+ for (int j = 0; j < OFFSITE; j++) pos = line.find_first_of('\t', pos) + 1;
size_t pos2 = line.find_first_of('\t', pos);
if (pos2 == string::npos) pos2 = line.length();
- tau = atof(line.substr(pos, pos2 - pos).c_str());
- theta[i] = tau * eel[i];
+ tpm = atof(line.substr(pos, pos2 - pos).c_str());
+ theta[i] = tpm * eel[i];
denom += theta[i];
}
assert(denom > EPSILON);
cout<< "Total number of resimulation is "<< resimulation_count<< endl;
}
-void writeResFiles(char* outFN) {
- FILE *fo;
+void calcExpressionValues(const vector<double>& theta, const vector<double>& eel, vector<double>& tpm, vector<double>& fpkm) {
double denom;
+ vector<double> frac;
- //calculate tau values
- double *tau = new double[M + 1];
- memset(tau, 0, sizeof(double) * (M + 1));
+ //calculate fraction of count over all mappabile reads
denom = 0.0;
- for (int i = 1; i <= M; i++)
- if (eel[i] > EPSILON) {
- tau[i] = counts[i] / eel[i];
- denom += tau[i];
- }
- else {
- if (counts[i] > EPSILON) { printf("Warning: An isoform which EEL is less than %.6g gets sampled!\n", MINEEL); }
+ frac.assign(M + 1, 0.0);
+ for (int i = 1; i <= M; i++)
+ if (eel[i] >= EPSILON) {
+ frac[i] = theta[i];
+ denom += frac[i];
+ }
+ general_assert(denom > 0, "No alignable reads?!");
+ for (int i = 1; i <= M; i++) frac[i] /= denom;
+
+ //calculate FPKM
+ fpkm.assign(M + 1, 0.0);
+ for (int i = 1; i <= M; i++)
+ if (eel[i] >= EPSILON) fpkm[i] = frac[i] * 1e9 / eel[i];
+
+ //calculate TPM
+ tpm.assign(M + 1, 0.0);
+ denom = 0.0;
+ for (int i = 1; i <= M; i++) denom += fpkm[i];
+ for (int i = 1; i <= M; i++) tpm[i] = fpkm[i] / denom * 1e6;
+}
+
+void writeResFiles(char* outFN) {
+ FILE *fo;
+ vector<int> tlens;
+ vector<double> fpkm, tpm, isopct;
+ vector<double> glens, gene_eels, gene_counts, gene_tpm, gene_fpkm;
+
+ for (int i = 1; i <= M; i++)
+ general_assert(eel[i] > EPSILON || counts[i] <= EPSILON, "An isoform whose effecitve length < " + ftos(MINEEL, 6) + " got sampled!");
+
+ calcExpressionValues(counts, eel, tpm, fpkm);
+
+ //calculate IsoPct, etc.
+ isopct.assign(M + 1, 0.0);
+ tlens.assign(M + 1, 0);
+
+ glens.assign(m, 0.0); gene_eels.assign(m, 0.0);
+ gene_counts.assign(m, 0.0); gene_tpm.assign(m, 0.0); gene_fpkm.assign(m, 0.0);
+
+ for (int i = 0; i < m; i++) {
+ int b = gi.spAt(i), e = gi.spAt(i + 1);
+ for (int j = b; j < e; j++) {
+ const Transcript& transcript = transcripts.getTranscriptAt(j);
+ tlens[j] = transcript.getLength();
+
+ glens[i] += tlens[j] * tpm[j];
+ gene_eels[i] += eel[j] * tpm[j];
+ gene_counts[i] += counts[j];
+ gene_tpm[i] += tpm[j];
+ gene_fpkm[i] += fpkm[j];
}
- assert(denom > 0.0);
- for (int i = 1; i <= M; i++) tau[i] /= denom;
+
+ if (gene_tpm[i] < EPSILON) continue;
+
+ for (int j = b; j < e; j++)
+ isopct[j] = tpm[j] / gene_tpm[i];
+ glens[i] /= gene_tpm[i];
+ gene_eels[i] /= gene_tpm[i];
+ }
//isoform level
sprintf(isoResF, "%s.sim.isoforms.results", outFN);
fo = fopen(isoResF, "w");
+ fprintf(fo, "transcript_id\tgene_id\tlength\teffective_length\tcount\tTPM\tFPKM\tIsoPct\n");
for (int i = 1; i <= M; i++) {
const Transcript& transcript = transcripts.getTranscriptAt(i);
- fprintf(fo, "%s\t%.2f\t%.15g", transcript.getTranscriptID().c_str(), counts[i], tau[i]);
-
- if (transcript.getLeft() != "") { fprintf(fo, "\t%s", transcript.getLeft().c_str()); }
- fprintf(fo, "\n");
+ fprintf(fo, "%s\t%s\t%d\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\n", transcript.getTranscriptID().c_str(), transcript.getGeneID().c_str(), tlens[i],
+ eel[i], counts[i], tpm[i], fpkm[i], isopct[i] * 1e2);
}
fclose(fo);
//gene level
sprintf(geneResF, "%s.sim.genes.results", outFN);
fo = fopen(geneResF, "w");
+ fprintf(fo, "gene_id\ttranscript_id(s)\tlength\teffective_length\tcount\tTPM\tFPKM\n");
for (int i = 0; i < m; i++) {
- double sum_c = 0.0, sum_tau = 0.0;
int b = gi.spAt(i), e = gi.spAt(i + 1);
- for (int j = b; j < e; j++) {
- sum_c += counts[j];
- sum_tau += tau[j];
- }
const string& gene_id = transcripts.getTranscriptAt(b).getGeneID();
- fprintf(fo, "%s\t%.2f\t%.15g\t", gene_id.c_str(), sum_c, sum_tau);
+ fprintf(fo, "%s\t", gene_id.c_str());
for (int j = b; j < e; j++) {
- fprintf(fo, "%s%c", transcripts.getTranscriptAt(j).getTranscriptID().c_str(), (j < e - 1 ? ',' : '\n'));
+ fprintf(fo, "%s%c", transcripts.getTranscriptAt(j).getTranscriptID().c_str(), (j < e - 1 ? ',' : '\t'));
}
+ fprintf(fo, "%.2f\t%.2f\t%.2f\t%.2f\t%.2f\n", glens[i], gene_eels[i], gene_counts[i], gene_tpm[i], gene_fpkm[i]);
}
fclose(fo);
-
- delete[] tau;
}
void releaseOutReadStreams() {
assert(fscanf(fi, "%d", &model_type) == 1);
fclose(fi);
- theta = new double[M + 1];
+ theta.assign(M + 1, 0.0);
theta[0] = atof(argv[4]);
N = atoi(argv[5]);
genOutReadStreams(model_type, argv[6]);
- counts = new double[M + 1];
- memset(counts, 0, sizeof(double) * (M + 1));
+ counts.assign(M + 1, 0.0);
switch(model_type) {
case 0: simulate<SingleRead, SingleModel>(argv[2], argv[3]); break;
writeResFiles(argv[6]);
releaseOutReadStreams();
- delete[] theta;
- delete[] counts;
-
return 0;
}