]> git.donarmstrong.com Git - rsem.git/commitdiff
changed output format to contain FPKM etc. ; fixed a bug for paired-end reads
authorBo Li <bli@cs.wisc.edu>
Mon, 10 Sep 2012 21:22:52 +0000 (16:22 -0500)
committerBo Li <bli@cs.wisc.edu>
Mon, 10 Sep 2012 21:22:52 +0000 (16:22 -0500)
115 files changed:
BamWriter.h
Buffer.h
EBSeq/DESCRIPTION [deleted file]
EBSeq/EBSeq_1.1.3.tar.gz [new file with mode: 0644]
EBSeq/NAMESPACE [deleted file]
EBSeq/R/CheckNg.R [deleted file]
EBSeq/R/DenNHist.R [deleted file]
EBSeq/R/DenNHistTable.R [deleted file]
EBSeq/R/EBMultiTest.R [deleted file]
EBSeq/R/EBTest.R [deleted file]
EBSeq/R/GeneMultiSimu.R [deleted file]
EBSeq/R/GeneSimu.R [deleted file]
EBSeq/R/GeneSimuAt.R [deleted file]
EBSeq/R/GetData.R [deleted file]
EBSeq/R/GetMultiPP.R [deleted file]
EBSeq/R/GetNg.R [deleted file]
EBSeq/R/GetPP.R [deleted file]
EBSeq/R/GetPatterns.R [deleted file]
EBSeq/R/IsoSimu.R [deleted file]
EBSeq/R/IsoSimuAt.R [deleted file]
EBSeq/R/Likefun.R [deleted file]
EBSeq/R/LikefunMulti.R [deleted file]
EBSeq/R/LikefunMultiDVDP.R [deleted file]
EBSeq/R/LikefunMultiEMP.R [deleted file]
EBSeq/R/LogN.R [deleted file]
EBSeq/R/LogNMulti.R [deleted file]
EBSeq/R/LogNMultiDVDP.R [deleted file]
EBSeq/R/LogNMultiEMP.R [deleted file]
EBSeq/R/MedianNorm.R [deleted file]
EBSeq/R/MergeGene.R [deleted file]
EBSeq/R/MergeIso.R [deleted file]
EBSeq/R/PlotFDTP.R [deleted file]
EBSeq/R/PlotFPTP.R [deleted file]
EBSeq/R/PlotPattern.R [deleted file]
EBSeq/R/PlotTopCts.R [deleted file]
EBSeq/R/PolyFitPlot.R [deleted file]
EBSeq/R/PoolMatrix.R [deleted file]
EBSeq/R/PostFC.R [deleted file]
EBSeq/R/QQP.R [deleted file]
EBSeq/R/QuantileNorm.R [deleted file]
EBSeq/R/RankNorm.R [deleted file]
EBSeq/R/TPFDRplot.R [deleted file]
EBSeq/R/TopCts.R [deleted file]
EBSeq/R/beta.mom.R [deleted file]
EBSeq/R/crit_fun.R [deleted file]
EBSeq/R/f0.R [deleted file]
EBSeq/R/f1.R [deleted file]
EBSeq/blockmodeling_0.1.8.tar.gz [new file with mode: 0644]
EBSeq/calcClusteringInfo.cpp [new file with mode: 0644]
EBSeq/data/GeneEBresultGouldBart2.rda [deleted file]
EBSeq/data/GeneMat.rda [deleted file]
EBSeq/data/IsoEBresultGouldBart2.rda [deleted file]
EBSeq/data/IsoList.rda [deleted file]
EBSeq/data/MultiGeneMat.rda [deleted file]
EBSeq/data/datalist [deleted file]
EBSeq/demo/EBSeq.R [deleted file]
EBSeq/inst/doc/EBSeq_Vignette.pdf [deleted file]
EBSeq/makefile [new file with mode: 0644]
EBSeq/man/CheckNg.Rd [deleted file]
EBSeq/man/DenNHist.Rd [deleted file]
EBSeq/man/DenNHistTable.Rd [deleted file]
EBSeq/man/EBMultiTest.Rd [deleted file]
EBSeq/man/EBSeq_NingLeng-package.Rd [deleted file]
EBSeq/man/EBTest.Rd [deleted file]
EBSeq/man/GeneEBresultGouldBart2.Rd [deleted file]
EBSeq/man/GeneMultiSimu.Rd [deleted file]
EBSeq/man/GeneSimu.Rd [deleted file]
EBSeq/man/GeneSimuAt.Rd [deleted file]
EBSeq/man/GetData.Rd [deleted file]
EBSeq/man/GetMultiPP.Rd [deleted file]
EBSeq/man/GetNg.Rd [deleted file]
EBSeq/man/GetPP.Rd [deleted file]
EBSeq/man/GetPatterns.Rd [deleted file]
EBSeq/man/IsoEBresultGouldBart2.Rd [deleted file]
EBSeq/man/IsoSimu.Rd [deleted file]
EBSeq/man/IsoSimuAt.Rd [deleted file]
EBSeq/man/Likefun.Rd [deleted file]
EBSeq/man/LikefunMulti.Rd [deleted file]
EBSeq/man/LogN.Rd [deleted file]
EBSeq/man/LogNMulti.Rd [deleted file]
EBSeq/man/MedianNorm.Rd [deleted file]
EBSeq/man/MergeGene.Rd [deleted file]
EBSeq/man/MergeIso.Rd [deleted file]
EBSeq/man/PlotFDTP.Rd [deleted file]
EBSeq/man/PlotFPTP.Rd [deleted file]
EBSeq/man/PlotPattern.Rd [deleted file]
EBSeq/man/PlotTopCts.Rd [deleted file]
EBSeq/man/PolyFitPlot.Rd [deleted file]
EBSeq/man/PoolMatrix.Rd [deleted file]
EBSeq/man/PostFC.Rd [deleted file]
EBSeq/man/QQP.Rd [deleted file]
EBSeq/man/QuantileNorm.Rd [deleted file]
EBSeq/man/RankNorm.Rd [deleted file]
EBSeq/man/TPFDRplot.Rd [deleted file]
EBSeq/man/TopCts.Rd [deleted file]
EBSeq/man/beta.mom.Rd [deleted file]
EBSeq/man/crit_fun.Rd [deleted file]
EBSeq/man/f0.Rd [deleted file]
EBSeq/man/f1.Rd [deleted file]
EBSeq/rsem-for-ebseq-generate-ngvector-from-clustering-info [new file with mode: 0755]
EM.cpp
Gibbs.cpp
PairedEndModel.h
PairedEndQModel.h
SamParser.h
SingleModel.h
SingleQModel.h
calcCI.cpp
calcClusteringInfo.cpp [deleted file]
makefile
rsem-calculate-expression
rsem-for-ebseq-generate-ngvector-from-clustering-info [deleted file]
rsem-form-counts-matrix
rsem-generate-ngvector
simulation.cpp

index f664710245ff265e184f922c642c0d3efc74a29a..bbdd2983490fc147ab64e1a26106922ba170d077 100644 (file)
@@ -125,24 +125,24 @@ void BamWriter::work(HitWrapper<PairedEndHit> wrapper) {
                bool notgood = (b->core.flag & 0x0004) || (b2->core.flag & 0x0004);
 
                if (!notgood) {
-                 //swap if b is mate 2
-                 if (b->core.flag & 0x0080) {
-                   assert(b2->core.flag & 0x0040);
-                   bam1_t *tmp = b;
-                   b = b2; b2 = tmp;
-                 }
+                       //swap if b is mate 2
+                       if (b->core.flag & 0x0080) {
+                               assert(b2->core.flag & 0x0040);
+                               bam1_t *tmp = b;
+                               b = b2; b2 = tmp;
+                       }
 
-                 hit = wrapper.getNextHit();
-                 assert(hit != NULL);
+                       hit = wrapper.getNextHit();
+                       assert(hit != NULL);
 
-                 assert(transcripts.getInternalSid(b->core.tid + 1) == hit->getSid());
-                 assert(transcripts.getInternalSid(b2->core.tid + 1) == hit->getSid());
+                       assert(transcripts.getInternalSid(b->core.tid + 1) == hit->getSid());
+                       assert(transcripts.getInternalSid(b2->core.tid + 1) == hit->getSid());
 
-                 convert(b, hit->getConPrb());
-                 convert(b2, hit->getConPrb());
+                       convert(b, hit->getConPrb());
+                       convert(b2, hit->getConPrb());
 
-                 b->core.mpos = b2->core.pos;
-                 b2->core.mpos = b->core.pos;
+                       b->core.mpos = b2->core.pos;
+                       b2->core.mpos = b->core.pos;
                }
 
                /*
index 50177960a94a2508b0df9a5c2c590502b23954f2..3e450942509b8cfe52b544bbcc6533bd06c37e59 100644 (file)
--- a/Buffer.h
+++ b/Buffer.h
@@ -12,12 +12,13 @@ const int FLOATSIZE = sizeof(float);
 
 class Buffer {
 public:
-       Buffer(int nMB, int nSamples, int cvlen, const char* tmpF) {
+       // in_mem_arr must be allocated memory before the Buffer is constructed
+       Buffer(int nMB, int nSamples, int vlen, float* in_mem_arr, const char* tmpF) {
                cpos = 0;
-               size = bufsize_type(nMB) * 1024 * 1024 / FLOATSIZE / cvlen;
+               size = bufsize_type(nMB) * 1024 * 1024 / FLOATSIZE / vlen;
                if (size > (bufsize_type)nSamples) size = nSamples;
                general_assert(size > 0, "Memory allocated for credibility intervals is not enough!");
-               size *= cvlen;
+               size *= vlen;
 
                buffer = new float[size];
                ftmpOut.open(tmpF, std::ios::binary);
@@ -25,7 +26,8 @@ public:
 
                fr = to = 0;
                this->nSamples = nSamples;
-               this->cvlen = cvlen;
+               this->vlen = vlen;
+               this->in_mem_arr = in_mem_arr;
        }
 
        ~Buffer() {
@@ -36,14 +38,13 @@ public:
                ftmpOut.close();
        }
 
-       void write(int n, float **vecs) {
+       void write(float value, float *vec) {
                pthread_assert(pthread_mutex_lock(&lock), "pthread_mutex_lock", "Error occurred while acquiring the lock!");
-               for (int i = 0; i < n; i++) {
-                       if (size - cpos < bufsize_type(cvlen)) flushToTempFile();
-                       memcpy(buffer + cpos, vecs[i], FLOATSIZE * cvlen);
-                       cpos += cvlen;
-                       ++to;
-               }
+               if (size - cpos < bufsize_type(vlen)) flushToTempFile();
+               in_mem_arr[to] = value;
+               memcpy(buffer + cpos, vec, FLOATSIZE * vlen);
+               cpos += vlen;
+               ++to;
                pthread_assert(pthread_mutex_unlock(&lock), "pthread_mutex_unlock", "Error occurred while releasing the lock!");
        }
 
@@ -51,11 +52,12 @@ private:
        bufsize_type size, cpos; // cpos : current position
 
        float *buffer;
+       float *in_mem_arr;
        std::ofstream ftmpOut;
        pthread_mutex_t lock;
 
        int fr, to; // each flush, sample fr .. to - 1
-       int nSamples, cvlen;
+       int nSamples, vlen; // vlen : vector length
 
        void flushToTempFile() {
                std::streampos gap1 = std::streampos(fr) * FLOATSIZE;
@@ -63,12 +65,12 @@ private:
                float *p = NULL;
 
                ftmpOut.seekp(0, std::ios::beg);
-               for (int i = 0; i < cvlen; i++) {
+               for (int i = 0; i < vlen; i++) {
                        p = buffer + i;
                        ftmpOut.seekp(gap1, std::ios::cur);
                        for (int j = fr; j < to; j++) {
                                ftmpOut.write((char*)p, FLOATSIZE);
-                               p += cvlen;
+                               p += vlen;
                        }
                        ftmpOut.seekp(gap2, std::ios::cur);
                }
diff --git a/EBSeq/DESCRIPTION b/EBSeq/DESCRIPTION
deleted file mode 100644 (file)
index 5f61713..0000000
+++ /dev/null
@@ -1,12 +0,0 @@
-Package: EBSeq
-Type: Package
-Title: A R package for Gene and Isoform Differential Expression Analysis On RNA-Seq Data
-Version: 1.1
-Date: 2012-4-18
-Author: Ning Leng
-Maintainer: Ning Leng <nleng@wisc.edu>
-Depends:blockmodeling 
-Description: RNA-Seq Differential Expression Analysis on both gene and isoform level
-License: 
-LazyLoad: yes
-Packaged: 2012-04-25 05:25:10 UTC; ningleng
diff --git a/EBSeq/EBSeq_1.1.3.tar.gz b/EBSeq/EBSeq_1.1.3.tar.gz
new file mode 100644 (file)
index 0000000..f0fd121
Binary files /dev/null and b/EBSeq/EBSeq_1.1.3.tar.gz differ
diff --git a/EBSeq/NAMESPACE b/EBSeq/NAMESPACE
deleted file mode 100644 (file)
index 27e0f29..0000000
+++ /dev/null
@@ -1,38 +0,0 @@
-export(beta.mom)
-export(CheckNg)
-export(crit_fun)
-export(DenNHist)
-export(DenNHistTable)
-export(EBTest)
-export(f0)
-export(f1)
-export(GeneSimuAt)
-export(GeneSimu)
-export(GetData)
-export(GetNg)
-export(GetPP)
-export(IsoSimuAt)
-export(IsoSimu)
-export(Likefun)
-export(LogN)
-export(MedianNorm)
-export(MergeGene)
-export(MergeIso)
-export(PlotFDTP)
-export(PlotFPTP)
-export(PlotTopCts)
-export(PolyFitPlot)
-export(PoolMatrix)
-export(PostFC)
-export(QQP)
-export(QuantileNorm)
-export(RankNorm)
-export(TopCts)
-export(TPFDRplot)
-export(EBMultiTest)
-export(GeneMultiSimu)
-export(GetMultiPP)
-export(LikefunMulti)
-export(LogNMulti)
-export(GetPatterns)
-export(PlotPattern)
diff --git a/EBSeq/R/CheckNg.R b/EBSeq/R/CheckNg.R
deleted file mode 100644 (file)
index 092f907..0000000
+++ /dev/null
@@ -1,21 +0,0 @@
-CheckNg<-function(NewMean, NewVar,nterm, xlim, ylim){
-       Ng=1=PolyFit_ENAR(NewMean[[1]],NewVar[[1]],nterm,"Mean","Variance","Ng=1",xlim, ylim)
-       sortNg1=order(NewMean[[1]])
-       Ng=2=PolyFit_ENAR(unlist(NewMean[c(2,4,6,8)]),unlist(NewVar[c(2,4,6,8)]),nterm,"Mean","Variance","Ng=2",xlim, ylim)
-       sortNg2=order(unlist(NewMean[c(2,4,6,8)]))
-       Ng=3=PolyFit_ENAR(unlist(NewMean[c(3,5,7,9)]),unlist(NewVar[c(3,5,7,9)]),nterm,"Mean","Variance","Ng=3",xlim, ylim)
-       sortNg3=order(unlist(NewMean[c(3,5,7,9)]))
-
-       ALL=PolyFit_ENAR(unlist(NewMean),unlist(NewVar),nterm,"Mean","Variance","",xlim, ylim)
-       lines(log10(unlist(NewMean[c(2,4,6,8)]))[sortNg2],Ng=2$fit[sortNg2],col="green",lwd=2)
-       lines(log10(unlist(NewMean[c(3,5,7,9)]))[sortNg3],Ng=3$fit[sortNg3],col="orange",lwd=2)
-       lines(log10(unlist(NewMean[1]))[sortNg1],Ng=1$fit[sortNg1],col="pink",lwd=2)
-       legend("topleft",col=c("red","pink","green","orange"),c("all","Ng=1","Ng=2","Ng=3"),lwd=2)
-}
-
-
-
-
-
-
-
diff --git a/EBSeq/R/DenNHist.R b/EBSeq/R/DenNHist.R
deleted file mode 100644 (file)
index 76e5d60..0000000
+++ /dev/null
@@ -1,20 +0,0 @@
-DenNHist <-
-function(QList,Alpha,Beta,name,AList="F",GroupName)
-{
-    if(!is.list(QList)) QList=list(QList)      
-       for (i in 1:length(QList)){
-               if (AList=="F") alpha.use=Alpha
-                       if(AList=="T")  alpha.use=Alpha[i]
-       hist(QList[[i]][QList[[i]]<.98&QList[[i]]>0],prob=T,col="blue",breaks=100,main=paste(GroupName[i],name,sep=" "),xlim=c(0,1),xlab=paste("Q alpha=",round(alpha.use,2)," beta=",round(Beta[i],2),sep=""))
-       tmpSize=length(QList[[i]][QList[[i]]<.98])
-        tmpseq=seq(0.001,1,length=1000)
-        #tmpdensity=dbeta(tmpseq,AlphaResult,BetaResult[i])
-        #points(tmpseq,tmpdensity, type="l",col="green")
-       #ll=dbeta(tmpseq,Alpha,Beta[i])
-       ll=tmpseq
-                lines(ll,dbeta(ll,alpha.use,Beta[i]),col="green",lwd=2)
-       legend("topright",c("Data","Fitted density"),col=c("blue","green"),lwd=2)
-}
-       
-       }
-
diff --git a/EBSeq/R/DenNHistTable.R b/EBSeq/R/DenNHistTable.R
deleted file mode 100644 (file)
index e3a6855..0000000
+++ /dev/null
@@ -1,38 +0,0 @@
-DenNHistTable <-
-function(QList,Alpha,Beta,AList="F")
-{      
-       par(mfrow=c(3,4))
-       plot(1, type="n", axes=F, xlab="", ylab="", main="No 3' end  No 5' end",cex.main=1)
-       plot(1, type="n", axes=F, xlab="", ylab="",main="With 3' end No 5' end",cex.main=1)
-       plot(1, type="n", axes=F, xlab="", ylab="",main="With 5' end No 3' end",cex.main=1)
-       for (i in c(1,2,4,6,8)){
-                alpha.use=Alpha
-       hist(QList[[i]][QList[[i]]<.98&QList[[i]]>0],prob=T,col="blue",breaks=100,main=ifelse(i==1,"With 5' end With 3' end",""),cex.main=1, xlim=c(0,1),xlab=paste("Q alpha=",round(alpha.use,2)," beta=",round(Beta[i],2),sep=""))
-       if(i==1)mtext("Ng=1",side=4, cex=1)
-       if(i==8)mtext("Ng=2", side=4,cex=1)
-       tmpSize=length(QList[[i]][QList[[i]]<.98])
-
-        tmpseq=seq(0.001,1,length=1000)
-       ll=tmpseq
-                lines(ll,dbeta(ll,alpha.use,Beta[i]),col="green",lwd=2)
-       legend("topright",c("Data","Fitted density"),col=c("blue","green"),lwd=2,cex=.5)
-}
-       
-       for (i in c(3,5,7,9)){
-                alpha.use=Alpha
-       hist(QList[[i]][QList[[i]]<.98&QList[[i]]>0],prob=T,col="blue",breaks=100,main=ifelse(i==1,"With 5' end With 3' end exons",""),xlim=c(0,1),xlab=paste("Q alpha=",round(alpha.use,2)," beta=",round(Beta[i],2),sep=""))
-       if(i==9)mtext("Ng=3", side=4,cex=1)
-
-       tmpSize=length(QList[[i]][QList[[i]]<.98])
-
-        tmpseq=seq(0.001,1,length=1000)
-       ll=tmpseq
-                lines(ll,dbeta(ll,alpha.use,Beta[i]),col="green",lwd=2)
-       legend("topright",c("Data","Fitted density"),col=c("blue","green"),cex=.5, lwd=2)
-}
-
-
-
-
-       }
-
diff --git a/EBSeq/R/EBMultiTest.R b/EBSeq/R/EBMultiTest.R
deleted file mode 100644 (file)
index ab23f87..0000000
+++ /dev/null
@@ -1,336 +0,0 @@
-EBMultiTest <-
-function(Data,NgVector=NULL,Conditions,AllParti=NULL, sizeFactors, maxround, tau=NULL,CI=NULL,CIthre=NULL, Pool=F, NumBin=1000, Approx=10^-10,PoolLower=.25, PoolUpper=.75)
-{
-
-       if(is.null(NgVector))NgVector=rep(1,nrow(Data))
-       if(!is.factor(Conditions))Conditions=as.factor(Conditions)
-
-
-       #ReNameThem
-       IsoNamesIn=rownames(Data)
-       Names=paste("I",c(1:dim(Data)[1]),sep="")
-       names(IsoNamesIn)=Names
-       rownames(Data)=paste("I",c(1:dim(Data)[1]),sep="")
-       names(NgVector)=paste("I",c(1:dim(Data)[1]),sep="")
-       
-       # If PossibleCond==NULL, use all combinations
-       NumCond=nlevels(Conditions)
-       CondLevels=levels(Conditions)
-       #library(blockmodeling)
-       if(is.null(AllParti)){
-               AllPartiList=sapply(1:NumCond,function(i)nkpartitions(NumCond,i))
-               AllParti=do.call(rbind,AllPartiList)
-               colnames(AllParti)=CondLevels
-           rownames(AllParti)=paste("Pattern",1:nrow(AllParti),sep="")
-       }
-       if(!length(sizeFactors)==ncol(Data)){
-               rownames(sizeFactors)=rownames(Data)
-               colnames(sizeFactors)=Conditions
-       }
-
-       
-       NoneZeroLength=nlevels(as.factor(NgVector))
-       NameList=sapply(1:NoneZeroLength,function(i)names(NgVector)[NgVector==i],simplify=F)
-       DataList=sapply(1:NoneZeroLength , function(i) Data[NameList[[i]],],simplify=F)
-       names(DataList)=names(NameList)
-    
-       NumEachGroup=sapply(1:NoneZeroLength , function(i)dim(DataList)[i])
-       # Unlist 
-       DataList.unlist=do.call(rbind, DataList)
-
-       # Divide by SampleSize factor
-       
-       if(length(sizeFactors)==ncol(Data))
-       DataList.unlist.dvd=t(t( DataList.unlist)/sizeFactors)
-       
-       if(length(sizeFactors)!=ncol(Data))
-       DataList.unlist.dvd=DataList.unlist/sizeFactors
-       
-       # Pool or Not
-       if(Pool==T){
-       DataforPoolSP.dvd=MeanforPoolSP.dvd=vector("list",NumCond)
-       for(lv in 1:NumCond){
-               DataforPoolSP.dvd[[lv]]=matrix(DataList.unlist.dvd[,Conditions==levels(Conditions)[lv]],nrow=dim(DataList.unlist)[1])   
-               MeanforPoolSP.dvd[[lv]]=rowMeans(DataforPoolSP.dvd[[lv]])
-       }
-       MeanforPool.dvd=rowMeans(DataList.unlist.dvd)
-       NumInBin=floor(dim(DataList.unlist)[1]/NumBin)
-       StartSeq=c(0:(NumBin-1))*NumInBin+1
-       EndSeq=c(StartSeq[-1]-1,dim(DataList.unlist)[1])
-       MeanforPool.dvd.Sort=sort(MeanforPool.dvd,decreasing=T) 
-       MeanforPool.dvd.Order=order(MeanforPool.dvd,decreasing=T)
-       PoolGroups=sapply(1:NumBin,function(i)(names(MeanforPool.dvd.Sort)[StartSeq[i]:EndSeq[i]]),simplify=F)
-       #FCforPool=MeanforPoolSP.dvd1/MeanforPoolSP.dvd2
-       # Use GeoMean of every two-group partition
-       Parti2=nkpartitions(NumCond,2)
-       FCForPoolList=sapply(1:nrow(Parti2),function(i)rowMeans(do.call(cbind,
-                                                       MeanforPoolSP.dvd[Parti2[i,]==1]))/
-                                                       rowMeans(do.call(cbind,MeanforPoolSP.dvd[Parti2[i,]==2])),
-                                                       simplify=F)
-       FCForPoolMat=do.call(cbind,FCForPoolList)
-       FCforPool=apply(FCForPoolMat,1,function(i)exp(mean(log(i))))
-       names(FCforPool)=names(MeanforPool.dvd)
-       FC_Use=names(FCforPool)[which(FCforPool>=quantile(FCforPool[!is.na(FCforPool)],PoolLower) & FCforPool<=quantile(FCforPool[!is.na(FCforPool)],PoolUpper))]
-       PoolGroupVar=sapply(1:NumBin,function(i)(mean(apply(matrix(DataList.unlist[PoolGroups[[i]][PoolGroups[[i]]%in%FC_Use],],ncol=ncol(DataList.unlist)),1,var))))   
-       PoolGroupVarInList=sapply(1:NumBin,function(i)(rep(PoolGroupVar[i],length(PoolGroups[[i]]))),simplify=F)
-       PoolGroupVarVector=unlist(PoolGroupVarInList)
-       VarPool=PoolGroupVarVector[MeanforPool.dvd.Order]
-       names(VarPool)=names(MeanforPool.dvd)
-               }
-
-       DataListSP=vector("list",nlevels(Conditions))
-       DataListSP.dvd=vector("list",nlevels(Conditions))
-       SizeFSP=DataListSP
-       MeanSP=DataListSP
-       VarSP=DataListSP
-       GetPSP=DataListSP
-       RSP=DataListSP
-       CISP=DataListSP
-       tauSP=DataListSP
-       
-       NumEachCondLevel=summary(Conditions)
-       if(Pool==F & is.null(CI)) CondLevelsUse=CondLevels[NumEachCondLevel>1]
-       if(Pool==T | !is.null(CI)) CondLevelsUse=CondLevels
-       NumCondUse=length(CondLevelsUse)        
-
-       for (lv in 1:nlevels(Conditions)){
-       DataListSP[[lv]]= matrix(DataList.unlist[,Conditions==levels(Conditions)[lv]],nrow=dim(DataList.unlist)[1])
-       rownames(DataListSP[[lv]])=rownames(DataList.unlist)
-       DataListSP.dvd[[lv]]= matrix(DataList.unlist.dvd[,Conditions==levels(Conditions)[lv]],nrow=dim(DataList.unlist.dvd)[1])
-       if(ncol(DataListSP[[lv]])==1 & Pool==F & !is.null(CI)){
-       CISP[[lv]]=matrix(CI[,Conditions==levels(Conditions)[lv]],nrow=dim(DataList.unlist.dvd)[1])
-       tauSP[[lv]]=matrix(tau[,Conditions==levels(Conditions)[lv]],nrow=dim(DataList.unlist.dvd)[1])
-       }
-       # no matter sizeFactors is a vector or a matrix. Matrix should be columns are the normalization factors
-       # may input one for each 
-       if(length(sizeFactors)==ncol(Data))SizeFSP[[lv]]=sizeFactors[Conditions==levels(Conditions)[lv]]
-       if(length(sizeFactors)!=ncol(Data))SizeFSP[[lv]]=sizeFactors[,Conditions==levels(Conditions)[lv]]
-               
-       MeanSP[[lv]]=rowMeans(DataListSP.dvd[[lv]])
-       
-       if(length(sizeFactors)==ncol(Data))PrePareVar=sapply(1:ncol( DataListSP[[lv]]),function(i)( DataListSP[[lv]][,i]- SizeFSP[[lv]][i]*MeanSP[[lv]])^2 /SizeFSP[[lv]][i])
-       if(length(sizeFactors)!=ncol(Data))PrePareVar=sapply(1:ncol( DataListSP[[lv]]),function(i)( DataListSP[[lv]][,i]- SizeFSP[[lv]][,i]*MeanSP[[lv]])^2 /SizeFSP[[lv]][,i])
-
-       if(ncol(DataListSP[[lv]])==1 & Pool==F & !is.null(CI))
-               VarSP[[lv]]=as.vector(((DataListSP[[lv]]/tauSP[[lv]]) * CISP[[lv]]/(CIthre*2))^2)
-       if( Pool==T){
-               VarSP[[lv]]=VarPool     
-               }
-       if(ncol(DataListSP[[lv]])!=1){
-               VarSP[[lv]]=rowSums(PrePareVar)/ncol( DataListSP[[lv]])
-               names(VarSP[[lv]])=rownames(DataList.unlist)
-               GetPSP[[lv]]=MeanSP[[lv]]/VarSP[[lv]]
-           RSP[[lv]]=MeanSP[[lv]]*GetPSP[[lv]]/(1-GetPSP[[lv]])
-       }
-       names(MeanSP[[lv]])=rownames(DataList.unlist)
-       }
-
-       # Get Empirical R
-       # POOL R???
-       MeanList=rowMeans(DataList.unlist.dvd)
-       VarList=apply(DataList.unlist.dvd, 1, var)
-       Varcbind=do.call(cbind,VarSP[CondLevels%in%CondLevelsUse])
-       PoolVarSpeedUp_MDFPoi_NoNormVarList=rowMeans(Varcbind)
-       VarrowMin=apply(Varcbind,1,min)
-       GetP=MeanList/PoolVarSpeedUp_MDFPoi_NoNormVarList
-       
-    EmpiricalRList=MeanList*GetP/(1-GetP) 
-       # sep
-       #Rcb=cbind(RSP[[1]],RSP[[2]])
-       #Rbest=apply(Rcb,1,function(i)max(i[!is.na(i) & i!=Inf]))
-       EmpiricalRList[EmpiricalRList==Inf]     =max(EmpiricalRList[EmpiricalRList!=Inf])
-       # fine
-       # 
-       GoodData=names(MeanList)[EmpiricalRList>0 &  VarrowMin!=0 & EmpiricalRList!=Inf & !is.na(VarrowMin) & !is.na(EmpiricalRList)]
-       NotIn=names(MeanList)[EmpiricalRList<=0 | VarrowMin==0 | EmpiricalRList==Inf |  is.na(VarrowMin) | is.na(EmpiricalRList)]
-       #NotIn.BestR=Rbest[NotIn.raw]
-       #NotIn.fix=NotIn.BestR[which(NotIn.BestR>0)]
-       #EmpiricalRList[names(NotIn.fix)]=NotIn.fix
-       #print(paste("ZeroVar",sum(VarrowMin==0), "InfR", length(which(EmpiricalRList==Inf)), "Poi", length(which(EmpiricalRList<0)), ""))
-       #GoodData=c(GoodData.raw,names(NotIn.fix))
-       #NotIn=NotIn.raw[!NotIn.raw%in%names(NotIn.fix)]
-       EmpiricalRList.NotIn=EmpiricalRList[NotIn]
-       EmpiricalRList.Good=EmpiricalRList[GoodData]
-       EmpiricalRList.Good[EmpiricalRList.Good<1]=1+EmpiricalRList.Good[EmpiricalRList.Good<1]
-       if(length(sizeFactors)==ncol(Data))
-       EmpiricalRList.Good.mat= outer(EmpiricalRList.Good, sizeFactors)        
-       if(!length(sizeFactors)==ncol(Data))
-       EmpiricalRList.Good.mat=EmpiricalRList.Good* sizeFactors[GoodData,]
-
-
-       # Only Use Data has Good q's
-       DataList.In=sapply(1:NoneZeroLength, function(i)DataList[[i]][GoodData[GoodData%in%rownames(DataList[[i]])],],simplify=F)
-       DataList.NotIn=sapply(1:NoneZeroLength, function(i)DataList[[i]][NotIn[NotIn%in%rownames(DataList[[i]])],],simplify=F)
-       DataListIn.unlist=do.call(rbind, DataList.In)
-       DataListNotIn.unlist=do.call(rbind, DataList.NotIn)
-       
-       DataListSPIn=vector("list",nlevels(Conditions))
-       DataListSPNotIn=vector("list",nlevels(Conditions))
-       EmpiricalRList.Good.mat.SP=vector("list",nlevels(Conditions))
-       for (lv in 1:nlevels(Conditions)){
-               DataListSPIn[[lv]]= matrix(DataListIn.unlist[,Conditions==levels(Conditions)[lv]],nrow=dim(DataListIn.unlist)[1])
-               if(length(NotIn)>0)     DataListSPNotIn[[lv]]= matrix(DataListNotIn.unlist[,Conditions==levels(Conditions)[lv]],nrow=dim(DataListNotIn.unlist)[1])
-               rownames(DataListSPIn[[lv]])=rownames(DataListIn.unlist)
-               if(length(NotIn)>0)rownames(DataListSPNotIn[[lv]])=rownames(DataListNotIn.unlist)
-               EmpiricalRList.Good.mat.SP[[lv]]=matrix(EmpiricalRList.Good.mat[,Conditions==levels(Conditions)[lv]],nrow=dim(EmpiricalRList.Good.mat)[1])
-       }       
-
-       NumOfEachGroupIn=sapply(1:NoneZeroLength, function(i)max(0,dim(DataList.In[[i]])[1]))
-       NumOfEachGroupNotIn=sapply(1:NoneZeroLength, function(i)max(0,dim(DataList.NotIn[[i]])[1]))
-
-       #Initialize SigIn & ...
-       AlphaIn=0.5
-       BetaIn=rep(0.5,NoneZeroLength)
-       PIn=rep(1/nrow(AllParti),nrow(AllParti))
-
-       ####use while to make an infinity round?
-       UpdateAlpha=NULL
-       UpdateBeta=NULL
-       UpdateP=NULL
-       UpdatePFromZ=NULL
-    Timeperround=NULL 
-       for (times in 1:maxround){
-       temptime1=proc.time()
-               UpdateOutput=suppressWarnings(LogNMulti(DataListIn.unlist,DataListSPIn, EmpiricalRList.Good.mat ,EmpiricalRList.Good.mat.SP,  
-                                                          NumOfEachGroupIn, AlphaIn, BetaIn, PIn, NoneZeroLength, AllParti,Conditions))
-       print(paste("iteration", times, "done",sep=" "))
-               AlphaIn=UpdateOutput$AlphaNew
-       BetaIn=UpdateOutput$BetaNew
-       PIn=UpdateOutput$PNew
-               PFromZ=UpdateOutput$PFromZ
-       FOut=UpdateOutput$FGood
-               UpdateAlpha=rbind(UpdateAlpha,AlphaIn)
-               UpdateBeta=rbind(UpdateBeta,BetaIn)
-       UpdateP=rbind(UpdateP,PIn)
-               UpdatePFromZ=rbind(UpdatePFromZ,PFromZ)
-               temptime2=proc.time()
-               Timeperround=c(Timeperround,temptime2[3]-temptime1[3])
-               print(paste("time" ,Timeperround[times],sep=" "))
-               Z.output=UpdateOutput$ZEachGood
-               Z.NA.Names=UpdateOutput$zNaNName
-               }
-               #Remove this } after testing!!
-                
-#      if (times!=1){  
-#              if((UpdateAlpha[times]-UpdateAlpha[times-1])^2+UpdateBeta[times]-UpdateBeta[times-1])^2+UpdateR[times]-UpdateR[times-1])^2+UpdateP[times]-UpdateP[times-1])^2<=10^(-6)){ 
-#                      Result=list(Sig=SigIn, Miu=MiuIn, Tau=TauIn)
-#                      break
-#        }
-#    }
-#}
-
-##########Change Names############
-## Only z are for Good Ones
-## Others are for ALL Data
-GoodData=GoodData[!GoodData%in%Z.NA.Names]
-IsoNamesIn.Good=as.vector(IsoNamesIn[GoodData])
-RealName.Z.output=Z.output
-RealName.F=FOut
-rownames(RealName.Z.output)=IsoNamesIn.Good
-rownames(RealName.F)=IsoNamesIn.Good
-
-RealName.EmpiricalRList=sapply(1:NoneZeroLength,function(i)EmpiricalRList[names(EmpiricalRList)%in%NameList[[i]]], simplify=F)
-RealName.MeanList=sapply(1:NoneZeroLength,function(i)MeanList[names(MeanList)%in%NameList[[i]]], simplify=F)
-RealName.SPMeanList=sapply(1:NoneZeroLength,function(i)sapply(1:length(MeanSP), function(j)MeanSP[[j]][names(MeanSP[[j]])%in%NameList[[i]]],simplify=F), simplify=F)
-RealName.SPVarList=sapply(1:NoneZeroLength,function(i)sapply(1:length(VarSP), function(j)VarSP[[j]][names(VarSP[[j]])%in%NameList[[i]]],simplify=F), simplify=F)
-RealName.DataList=sapply(1:NoneZeroLength,function(i)DataList[[i]][rownames(DataList[[i]])%in%NameList[[i]],], simplify=F)
-
-RealName.VarList=sapply(1:NoneZeroLength,function(i)VarList[names(VarList)%in%NameList[[i]]], simplify=F)
-RealName.PoolVarList=sapply(1:NoneZeroLength,function(i)PoolVarSpeedUp_MDFPoi_NoNormVarList[names(PoolVarSpeedUp_MDFPoi_NoNormVarList)%in%NameList[[i]]], simplify=F)
-RealName.QList=sapply(1:NoneZeroLength,function(i)sapply(1:length(GetPSP), function(j)GetPSP[[j]][names(GetPSP[[j]])%in%NameList[[i]]],simplify=F), simplify=F)
-
-
-for (i in 1:NoneZeroLength){
-tmp=NameList[[i]]
-names=IsoNamesIn[tmp]
-RealName.MeanList[[i]]=RealName.MeanList[[i]][NameList[[i]]]
-RealName.VarList[[i]]=RealName.VarList[[i]][NameList[[i]]]
-       for(j in 1:NumCond){
-               RealName.SPMeanList[[i]][[j]]=RealName.SPMeanList[[i]][[j]][NameList[[i]]]
-               if(!is.null(RealName.QList[[i]][[j]])){
-                       RealName.QList[[i]][[j]]=RealName.QList[[i]][[j]][NameList[[i]]]
-                       RealName.SPVarList[[i]][[j]]=RealName.SPVarList[[i]][[j]][NameList[[i]]]
-                       names(RealName.QList[[i]][[j]])=names
-                       names(RealName.SPVarList[[i]][[j]])=names
-               }
-               names(RealName.SPMeanList[[i]][[j]])=names
-       }
-RealName.EmpiricalRList[[i]]=RealName.EmpiricalRList[[i]][NameList[[i]]]
-RealName.PoolVarList[[i]]=RealName.PoolVarList[[i]][NameList[[i]]]
-RealName.DataList[[i]]=RealName.DataList[[i]][NameList[[i]],]
-
-names(RealName.MeanList[[i]])=names
-names(RealName.VarList[[i]])=names
-
-names(RealName.EmpiricalRList[[i]])=names
-names(RealName.PoolVarList[[i]])=names
-rownames(RealName.DataList[[i]])=names
-
-}
-
-
-#########posterior part for other data set here later############
-AllNA=unique(c(Z.NA.Names,NotIn))
-AllZ=NULL
-AllF=NULL
-if(length(AllNA)==0){
-       AllZ=RealName.Z.output[IsoNamesIn,]
-       AllF=RealName.F[IsoNamesIn,]
-}
-ZEachNA=NULL
-if (length(AllNA)>0){
-       Ng.NA=NgVector[AllNA]
-       AllNA.Ngorder=AllNA[order(Ng.NA)]
-       NumOfEachGroupNA=rep(0,NoneZeroLength)
-       NumOfEachGroupNA.tmp=tapply(Ng.NA,Ng.NA,length)
-       names(NumOfEachGroupNA)=c(1:NoneZeroLength)
-       NumOfEachGroupNA[names(NumOfEachGroupNA.tmp)]=NumOfEachGroupNA.tmp
-       PNotIn=rep(1-Approx,length(AllNA.Ngorder))
-       MeanList.NotIn=MeanList[AllNA.Ngorder]
-       R.NotIn.raw=MeanList.NotIn*PNotIn/(1-PNotIn) 
-       if(length(sizeFactors)==ncol(Data))
-       R.NotIn=matrix(outer(R.NotIn.raw,sizeFactors),nrow=length(AllNA.Ngorder))
-       if(!length(sizeFactors)==ncol(Data))
-       R.NotIn=matrix(R.NotIn.raw*sizeFactors[NotIn,],nrow=length(AllNA.Ngorder))
-    
-       DataListNotIn.unlistWithZ=DataList.unlist[AllNA.Ngorder,]
-       DataListSPNotInWithZ=vector("list",nlevels(Conditions))
-       RListSPNotInWithZ=vector("list",nlevels(Conditions))
-       for (lv in 1:nlevels(Conditions)) {
-               DataListSPNotInWithZ[[lv]] = matrix(DataListSP[[lv]][AllNA.Ngorder,],nrow=length(AllNA.Ngorder))
-               RListSPNotInWithZ[[lv]]=matrix(R.NotIn[,Conditions==levels(Conditions)[lv]],nrow=length(AllNA.Ngorder))
-       }
-       FListNA=sapply(1:nrow(AllParti),function(i)sapply(1:nlevels(as.factor(AllParti[i,])),
-                       function(j)f0(do.call(cbind, DataListSPNotInWithZ[AllParti[i,]==j]),AlphaIn, BetaIn,
-                do.call(cbind,RListSPNotInWithZ[AllParti[i,]==j]), NumOfEachGroupNA, log=T)),
-                                                      simplify=F)
-       FPartiLogNA=sapply(FListNA,rowSums)
-       FMatNA=exp(FPartiLogNA)
-       
-       rownames(FMatNA)=rownames(DataListNotIn.unlistWithZ)
-       PMatNA=matrix(rep(1,nrow(DataListNotIn.unlistWithZ)),ncol=1)%*%matrix(PIn,nrow=1)
-       FmultiPNA=FMatNA*PMatNA
-    DenomNA=rowSums(FmultiPNA)
-       ZEachNA=apply(FmultiPNA,2,function(i)i/DenomNA)
-
-       rownames(ZEachNA)=IsoNamesIn[AllNA.Ngorder]
-
-       AllZ=rbind(RealName.Z.output,ZEachNA)
-       AllZ=AllZ[IsoNamesIn,]
-       
-       F.NotIn=FMatNA
-       rownames(F.NotIn)=IsoNamesIn[rownames(FMatNA)]
-       AllF=rbind(RealName.F,F.NotIn)
-       AllF=AllF[IsoNamesIn,]
-
-}
-colnames(AllZ)=rownames(AllParti)
-colnames(AllF)=rownames(AllParti)
-
-#############Result############################
-Result=list(Alpha=UpdateAlpha,Beta=UpdateBeta,P=UpdateP,PFromZ=UpdatePFromZ, 
-                       Z=RealName.Z.output,PoissonZ=ZEachNA, RList=RealName.EmpiricalRList, MeanList=RealName.MeanList, 
-                       VarList=RealName.VarList, QList=RealName.QList, SPMean=RealName.SPMeanList, SPEstVar=RealName.SPVarList, 
-                       PoolVar=RealName.PoolVarList , DataList=RealName.DataList,PPDE=AllZ,f=AllF, AllParti=AllParti)
-}
-
diff --git a/EBSeq/R/EBTest.R b/EBSeq/R/EBTest.R
deleted file mode 100644 (file)
index 17619ae..0000000
+++ /dev/null
@@ -1,339 +0,0 @@
-EBTest <-
-function(Data,NgVector=NULL,Vect5End=NULL,Vect3End=NULL,Conditions, sizeFactors, maxround, tau=NULL,CI=NULL,CIthre=NULL, Pool=F, NumBin=1000,ApproxVal=10^-10)
-{
-       Dataraw=Data
-       AllZeroNames=which(rowMeans(Data)==0)
-       NotAllZeroNames=which(rowMeans(Data)>0)
-       if(length(AllZeroNames)>0) print("Remove transcripts with all zero")
-       Data=Data[NotAllZeroNames,]
-       if(!is.null(NgVector))NgVector=NgVector[NotAllZeroNames]
-       if(!length(sizeFactors)==ncol(Data))sizeFactors=sizeFactors[NotAllZeroNames,]
-
-       if(is.null(NgVector))NgVector=rep(1,nrow(Data))
-
-       #Rename Them
-       IsoNamesIn=rownames(Data)
-       Names=paste("I",c(1:dim(Data)[1]),sep="")
-       names(IsoNamesIn)=Names
-       rownames(Data)=paste("I",c(1:dim(Data)[1]),sep="")
-       names(NgVector)=paste("I",c(1:dim(Data)[1]),sep="")
-       
-
-       if(!length(sizeFactors)==ncol(Data)){
-               rownames(sizeFactors)=rownames(Data)
-               colnames(sizeFactors)=Conditions
-       }
-       
-       NumOfNg=nlevels(as.factor(NgVector))
-       NameList=sapply(1:NumOfNg,function(i)Names[NgVector==i],simplify=F)
-       names(NameList)=paste("Ng",c(1:NumOfNg),sep="")
-       NotNone=NULL
-       for (i in 1:NumOfNg) {
-               if (length(NameList[[i]])!=0) 
-                       NotNone=c(NotNone,names(NameList)[i])
-               }
-       NameList=NameList[NotNone]
-               
-       NoneZeroLength=length(NameList)
-       DataList=vector("list",NoneZeroLength)
-       DataList=sapply(1:NoneZeroLength , function(i) Data[NameList[[i]],],simplify=F)
-       names(DataList)=names(NameList)
-    
-       NumEachGroup=sapply(1:NoneZeroLength , function(i)dim(DataList)[i])
-       # Unlist 
-       DataList.unlist=do.call(rbind, DataList)
-
-       # Divide by SampleSize factor
-       
-       if(length(sizeFactors)==ncol(Data))
-       DataList.unlist.dvd=t(t( DataList.unlist)/sizeFactors)
-       
-       if(length(sizeFactors)!=ncol(Data))
-       DataList.unlist.dvd=DataList.unlist/sizeFactors
-       
-       # Get FC and VarPool for pooling - Only works on 2 conditions
-       if(ncol(Data)==2){
-       DataforPoolSP.dvd1=matrix(DataList.unlist.dvd[,Conditions==levels(Conditions)[1]],nrow=dim(DataList.unlist)[1]) 
-       DataforPoolSP.dvd2=matrix(DataList.unlist.dvd[,Conditions==levels(Conditions)[2]],nrow=dim(DataList.unlist)[1])
-       MeanforPoolSP.dvd1=rowMeans(DataforPoolSP.dvd1)
-       MeanforPoolSP.dvd2=rowMeans(DataforPoolSP.dvd2)
-       FCforPool=MeanforPoolSP.dvd1/MeanforPoolSP.dvd2
-       names(FCforPool)=rownames(Data)
-       FC_Use=which(FCforPool>=quantile(FCforPool[!is.na(FCforPool)],.25) & 
-                                                                 FCforPool<=quantile(FCforPool[!is.na(FCforPool)],.75))
-       
-       Var_FC_Use=apply( DataList.unlist.dvd[FC_Use,],1,var )
-       Mean_FC_Use=(MeanforPoolSP.dvd1[FC_Use]+MeanforPoolSP.dvd2[FC_Use])/2
-       MeanforPool=(MeanforPoolSP.dvd1+MeanforPoolSP.dvd2)/2
-       FC_Use2=which(Var_FC_Use>=Mean_FC_Use)
-       Var_FC_Use2=Var_FC_Use[FC_Use2]
-       Mean_FC_Use2=Mean_FC_Use[FC_Use2]
-       Phi=mean((Var_FC_Use2-Mean_FC_Use2)/Mean_FC_Use2^2)
-       VarEst= MeanforPool*(1+MeanforPool*Phi)
-       print(Phi)
-       }
-
-       #DataListSP Here also unlist.. Only two lists
-       DataListSP=vector("list",nlevels(Conditions))
-       DataListSP.dvd=vector("list",nlevels(Conditions))
-       SizeFSP=DataListSP
-       MeanSP=DataListSP
-       VarSP=DataListSP
-       GetPSP=DataListSP
-       RSP=DataListSP
-       CISP=DataListSP
-       tauSP=DataListSP
-       NumSampleEachCon=rep(NULL,nlevels(Conditions))
-
-       for (lv in 1:nlevels(Conditions)){
-               DataListSP[[lv]]= matrix(DataList.unlist[,Conditions==levels(Conditions)[lv]],nrow=dim(DataList.unlist)[1])
-               rownames(DataListSP[[lv]])=rownames(DataList.unlist)
-               DataListSP.dvd[[lv]]= matrix(DataList.unlist.dvd[,Conditions==levels(Conditions)[lv]],nrow=dim(DataList.unlist.dvd)[1])
-               NumSampleEachCon[lv]=ncol(DataListSP[[lv]])
-
-       if(ncol(DataListSP[[lv]])==1 & !is.null(CI)){
-               CISP[[lv]]=matrix(CI[,Conditions==levels(Conditions)[lv]],nrow=dim(DataList.unlist.dvd)[1])
-               tauSP[[lv]]=matrix(tau[,Conditions==levels(Conditions)[lv]],nrow=dim(DataList.unlist.dvd)[1])
-       }
-       # no matter sizeFactors is a vector or a matrix. Matrix should be columns are the normalization factors
-       # may input one for each 
-       if(length(sizeFactors)==ncol(Data))SizeFSP[[lv]]=sizeFactors[Conditions==levels(Conditions)[lv]]
-       if(length(sizeFactors)!=ncol(Data))SizeFSP[[lv]]=sizeFactors[,Conditions==levels(Conditions)[lv]]
-       
-       
-       MeanSP[[lv]]=rowMeans(DataListSP.dvd[[lv]])
-       
-       if(length(sizeFactors)==ncol(Data))PrePareVar=sapply(1:ncol( DataListSP[[lv]]),function(i)( DataListSP[[lv]][,i]- SizeFSP[[lv]][i]*MeanSP[[lv]])^2 /SizeFSP[[lv]][i])
-       if(length(sizeFactors)!=ncol(Data))PrePareVar=sapply(1:ncol( DataListSP[[lv]]),function(i)( DataListSP[[lv]][,i]- SizeFSP[[lv]][,i]*MeanSP[[lv]])^2 /SizeFSP[[lv]][,i])
-
-       if(ncol(DataListSP[[lv]])==1 & !is.null(CI))
-               VarSP[[lv]]=as.vector(((DataListSP[[lv]]/tauSP[[lv]]) * CISP[[lv]]/(CIthre*2))^2)
-       if(ncol(DataListSP[[lv]])!=1){
-               VarSP[[lv]]=rowSums(PrePareVar)/ncol( DataListSP[[lv]])
-               names(MeanSP[[lv]])=rownames(DataList.unlist)
-               names(VarSP[[lv]])=rownames(DataList.unlist)
-               GetPSP[[lv]]=MeanSP[[lv]]/VarSP[[lv]]
-               RSP[[lv]]=MeanSP[[lv]]*GetPSP[[lv]]/(1-GetPSP[[lv]])
-       }
-}
-       
-       
-       MeanList=rowMeans(DataList.unlist.dvd)
-       VarList=apply(DataList.unlist.dvd, 1, var)
-       if(ncol(Data)==2)PoolVar=VarEst
-       if(!ncol(Data)==2){
-               CondWithRep=which(NumSampleEachCon>1)
-               VarCondWithRep=do.call(cbind,VarSP[CondWithRep])
-               PoolVar=rowMeans(VarCondWithRep)
-       }
-       GetP=MeanList/PoolVar
-       
-    EmpiricalRList=MeanList*GetP/(1-GetP) 
-       EmpiricalRList[EmpiricalRList==Inf]     =max(EmpiricalRList[EmpiricalRList!=Inf])
-       
-       if(ncol(Data)!=2){
-       Varcbind=do.call(cbind,VarSP)
-       VarrowMin=apply(Varcbind,1,min)
-       }
-
-       if(ncol(Data)==2){
-               Varcbind=VarEst
-               VarrowMin=VarEst
-       }
-       # 
-       # 
-       GoodData=names(MeanList)[EmpiricalRList>0 &  VarrowMin!=0 & EmpiricalRList!=Inf & !is.na(VarrowMin) & !is.na(EmpiricalRList)]
-       NotIn=names(MeanList)[EmpiricalRList<=0 | VarrowMin==0 | EmpiricalRList==Inf |  is.na(VarrowMin) | is.na(EmpiricalRList)]
-       #print(paste("ZeroVar",sum(VarrowMin==0), "InfR", length(which(EmpiricalRList==Inf)), "Poi", length(which(EmpiricalRList<0)), ""))
-       EmpiricalRList.NotIn=EmpiricalRList[NotIn]
-       EmpiricalRList.Good=EmpiricalRList[GoodData]
-       EmpiricalRList.Good[EmpiricalRList.Good<1]=1+EmpiricalRList.Good[EmpiricalRList.Good<1]
-       if(length(sizeFactors)==ncol(Data))
-       EmpiricalRList.Good.mat= outer(EmpiricalRList.Good, sizeFactors)        
-       if(!length(sizeFactors)==ncol(Data))
-       EmpiricalRList.Good.mat=EmpiricalRList.Good* sizeFactors[GoodData,]
-
-
-       # Only Use Data has Good q's
-       DataList.In=sapply(1:NoneZeroLength, function(i)DataList[[i]][GoodData[GoodData%in%rownames(DataList[[i]])],],simplify=F)
-       DataList.NotIn=sapply(1:NoneZeroLength, function(i)DataList[[i]][NotIn[NotIn%in%rownames(DataList[[i]])],],simplify=F)
-       DataListIn.unlist=do.call(rbind, DataList.In)
-       DataListNotIn.unlist=do.call(rbind, DataList.NotIn)
-       
-       DataListSPIn=vector("list",nlevels(Conditions))
-       DataListSPNotIn=vector("list",nlevels(Conditions))
-       EmpiricalRList.Good.mat.SP=vector("list",nlevels(Conditions))
-       for (lv in 1:nlevels(Conditions)){
-               DataListSPIn[[lv]]= matrix(DataListIn.unlist[,Conditions==levels(Conditions)[lv]],nrow=dim(DataListIn.unlist)[1])
-       if(length(NotIn)>0){    DataListSPNotIn[[lv]]= matrix(DataListNotIn.unlist[,Conditions==levels(Conditions)[lv]],nrow=dim(DataListNotIn.unlist)[1])
-       rownames(DataListSPNotIn[[lv]])=rownames(DataListNotIn.unlist)
-       }
-       rownames(DataListSPIn[[lv]])=rownames(DataListIn.unlist)
-       EmpiricalRList.Good.mat.SP[[lv]]=matrix(EmpiricalRList.Good.mat[,Conditions==levels(Conditions)[lv]],nrow=dim(EmpiricalRList.Good.mat)[1])
-}      
-
-       NumOfEachGroupIn=sapply(1:NoneZeroLength, function(i)max(0,dim(DataList.In[[i]])[1]))
-       NumOfEachGroupNotIn=sapply(1:NoneZeroLength, function(i)max(0,dim(DataList.NotIn[[i]])[1]))
-
-       #Initialize SigIn & ...
-       AlphaIn=0.5
-       BetaIn=rep(0.5,NoneZeroLength)
-       PIn=0.5
-
-       ####use while to make an infinity round?
-       UpdateAlpha=NULL
-       UpdateBeta=NULL
-       UpdateP=NULL
-       UpdatePFromZ=NULL
-    Timeperround=NULL 
-       for (times in 1:maxround){
-       temptime1=proc.time()
-               UpdateOutput=suppressWarnings(LogN(DataListIn.unlist,DataListSPIn, EmpiricalRList.Good.mat ,EmpiricalRList.Good.mat.SP,  NumOfEachGroupIn, AlphaIn, BetaIn, PIn, NoneZeroLength))
-       print(paste("iteration", times, "done",sep=" "))
-               AlphaIn=UpdateOutput$AlphaNew
-       BetaIn=UpdateOutput$BetaNew
-       PIn=UpdateOutput$PNew
-               PFromZ=UpdateOutput$PFromZ
-       F0Out=UpdateOutput$F0Out
-               F1Out=UpdateOutput$F1Out
-               UpdateAlpha=rbind(UpdateAlpha,AlphaIn)
-               UpdateBeta=rbind(UpdateBeta,BetaIn)
-       UpdateP=rbind(UpdateP,PIn)
-               UpdatePFromZ=rbind(UpdatePFromZ,PFromZ)
-               temptime2=proc.time()
-               Timeperround=c(Timeperround,temptime2[3]-temptime1[3])
-               print(paste("time" ,Timeperround[times],sep=" "))
-               Z.output=UpdateOutput$ZNew.list[!is.na(UpdateOutput$ZNew.list)]
-               Z.NA.Names=UpdateOutput$zNaNName
-               }
-               #Remove this } after testing!!
-                
-#      if (times!=1){  
-#              if((UpdateAlpha[times]-UpdateAlpha[times-1])^2+UpdateBeta[times]-UpdateBeta[times-1])^2+UpdateR[times]-UpdateR[times-1])^2+UpdateP[times]-UpdateP[times-1])^2<=10^(-6)){ 
-#                      Result=list(Sig=SigIn, Miu=MiuIn, Tau=TauIn)
-#                      break
-#        }
-#    }
-#}
-
-##########Change Names############
-## Only z are for Good Ones
-## Others are for ALL Data
-GoodData=GoodData[!GoodData%in%Z.NA.Names]
-IsoNamesIn.Good=IsoNamesIn[GoodData]
-RealName.Z.output=Z.output
-RealName.F0=F0Out
-RealName.F1=F1Out
-names(RealName.Z.output)=IsoNamesIn.Good
-names(RealName.F0)=IsoNamesIn.Good
-names(RealName.F1)=IsoNamesIn.Good
-
-
-RealName.EmpiricalRList=sapply(1:NoneZeroLength,function(i)EmpiricalRList[names(EmpiricalRList)%in%NameList[[i]]], simplify=F)
-RealName.MeanList=sapply(1:NoneZeroLength,function(i)MeanList[names(MeanList)%in%NameList[[i]]], simplify=F)
-RealName.C1MeanList=sapply(1:NoneZeroLength,function(i)MeanSP[[1]][names(MeanSP[[1]])%in%NameList[[i]]], simplify=F)
-RealName.C2MeanList=sapply(1:NoneZeroLength,function(i)MeanSP[[2]][names(MeanSP[[2]])%in%NameList[[i]]], simplify=F)
-RealName.C1VarList=sapply(1:NoneZeroLength,function(i)VarSP[[1]][names(VarSP[[1]])%in%NameList[[i]]], simplify=F)
-RealName.C2VarList=sapply(1:NoneZeroLength,function(i)VarSP[[2]][names(VarSP[[2]])%in%NameList[[i]]], simplify=F)
-RealName.DataList=sapply(1:NoneZeroLength,function(i)DataList[[i]][rownames(DataList[[i]])%in%NameList[[i]],], simplify=F)
-
-
-
-RealName.VarList=sapply(1:NoneZeroLength,function(i)VarList[names(VarList)%in%NameList[[i]]], simplify=F)
-RealName.PoolVarList=sapply(1:NoneZeroLength,function(i)PoolVar[names(PoolVar)%in%NameList[[i]]], simplify=F)
-
-
-RealName.QList1=sapply(1:NoneZeroLength,function(i)GetPSP[[1]][names(GetPSP[[1]])%in%NameList[[i]]], simplify=F)
-RealName.QList2=sapply(1:NoneZeroLength,function(i)GetPSP[[2]][names(GetPSP[[2]])%in%NameList[[i]]], simplify=F)
-
-
-for (i in 1:NoneZeroLength){
-tmp=NameList[[i]]
-names=IsoNamesIn[tmp]
-
-RealName.MeanList[[i]]=RealName.MeanList[[i]][NameList[[i]]]
-RealName.VarList[[i]]=RealName.VarList[[i]][NameList[[i]]]
-RealName.QList1[[i]]=RealName.QList1[[i]][NameList[[i]]]
-RealName.QList2[[i]]=RealName.QList2[[i]][NameList[[i]]]
-RealName.EmpiricalRList[[i]]=RealName.EmpiricalRList[[i]][NameList[[i]]]
-RealName.C1MeanList[[i]]=RealName.C1MeanList[[i]][NameList[[i]]]
-RealName.C2MeanList[[i]]=RealName.C2MeanList[[i]][NameList[[i]]]
-RealName.PoolVarList[[i]]=RealName.PoolVarList[[i]][NameList[[i]]]
-RealName.C1VarList[[i]]=RealName.C1VarList[[i]][NameList[[i]]]
-RealName.C2VarList[[i]]=RealName.C2VarList[[i]][NameList[[i]]]
-RealName.DataList[[i]]=RealName.DataList[[i]][NameList[[i]],]
-
-names(RealName.MeanList[[i]])=names
-names(RealName.VarList[[i]])=names
-if(ncol(DataListSP[[1]])!=1){
-       names(RealName.QList1[[i]])=names
-       names(RealName.C1VarList[[i]])=names
-}
-if(ncol(DataListSP[[2]])!=1){
-       names(RealName.QList2[[i]])=names
-       names(RealName.C2VarList[[i]])=names
-}
-
-names(RealName.EmpiricalRList[[i]])=names
-names(RealName.C1MeanList[[i]])=names
-names(RealName.C2MeanList[[i]])=names
-names(RealName.PoolVarList[[i]])=names
-rownames(RealName.DataList[[i]])=names
-
-
-}
-
-
-#########posterior part for other data set here later############
-AllNA=unique(c(Z.NA.Names,NotIn))
-z.list.NotIn=NULL
-AllF0=c(RealName.F0)
-AllF1=c(RealName.F1)
-AllZ=RealName.Z.output
-
-if (length(AllNA)>0){
-       Ng.NA=NgVector[AllNA]
-       AllNA.Ngorder=AllNA[order(Ng.NA)]
-       NumOfEachGroupNA=rep(0,NoneZeroLength)
-       NumOfEachGroupNA.tmp=tapply(Ng.NA,Ng.NA,length)
-       names(NumOfEachGroupNA)=c(1:NoneZeroLength)
-       NumOfEachGroupNA[names(NumOfEachGroupNA.tmp)]=NumOfEachGroupNA.tmp
-       PNotIn=rep(1-ApproxVal,length(AllNA.Ngorder))
-       MeanList.NotIn=MeanList[AllNA.Ngorder]
-       R.NotIn.raw=MeanList.NotIn*PNotIn/(1-PNotIn) 
-       if(length(sizeFactors)==ncol(Data))
-       R.NotIn=outer(R.NotIn.raw,sizeFactors)
-       if(!length(sizeFactors)==ncol(Data))
-       R.NotIn=R.NotIn.raw*sizeFactors[NotIn,]
-       R.NotIn1=matrix(R.NotIn[,Conditions==levels(Conditions)[1]],nrow=nrow(R.NotIn))
-       R.NotIn2=matrix(R.NotIn[,Conditions==levels(Conditions)[2]],nrow=nrow(R.NotIn))
-    
-       DataListNotIn.unlistWithZ=DataList.unlist[AllNA.Ngorder,]
-       DataListSPNotInWithZ=vector("list",nlevels(Conditions))
-       for (lv in 1:nlevels(Conditions)) 
-               DataListSPNotInWithZ[[lv]] = matrix(DataListSP[[lv]][AllNA.Ngorder,],nrow=length(AllNA.Ngorder))
-               F0=f0(DataListNotIn.unlistWithZ,  AlphaIn, BetaIn, R.NotIn, NumOfEachGroupNA, log=F)
-       F1=f1(DataListSPNotInWithZ[[1]], DataListSPNotInWithZ[[2]], AlphaIn, BetaIn, R.NotIn1,R.NotIn2, NumOfEachGroupNA, log=F)
-       z.list.NotIn=PIn*F1/(PIn*F1+(1-PIn)*F0)
-#      names(z.list.NotIn)=IsoNamesIn.Good=IsoNamesIn[which(Names%in%NotIn)]
-       names(z.list.NotIn)=IsoNamesIn[AllNA.Ngorder]
-
-       AllZ=c(RealName.Z.output,z.list.NotIn)
-       AllZ=AllZ[IsoNamesIn]
-       AllZ[is.na(AllZ)]=0
-       F0.NotIn=F0
-       F1.NotIn=F1
-       names(F0.NotIn)=IsoNamesIn[names(F0)]
-    names(F1.NotIn)=IsoNamesIn[names(F1)]
-       AllF0=c(RealName.F0,F0.NotIn)
-       AllF1=c(RealName.F1,F1.NotIn)
-       AllF0=AllF0[IsoNamesIn]
-       AllF1=AllF1[IsoNamesIn]
-       AllF0[is.na(AllF0)]=0
-       AllF1[is.na(AllF1)]=0
-}
-#############Result############################
-Result=list(Alpha=UpdateAlpha,Beta=UpdateBeta,P=UpdateP,PFromZ=UpdatePFromZ, Z=RealName.Z.output,PoissonZ=z.list.NotIn, RList=RealName.EmpiricalRList, MeanList=RealName.MeanList, VarList=RealName.VarList, QList1=RealName.QList1, QList2=RealName.QList2, C1Mean=RealName.C1MeanList, C2Mean=RealName.C2MeanList,C1EstVar=RealName.C1VarList, C2EstVar=RealName.C2VarList, PoolVar=RealName.PoolVarList , DataList=RealName.DataList,PPDE=AllZ,f0=AllF0, f1=AllF1,
-                       AllZeroIndex=AllZeroNames)
-}
-
diff --git a/EBSeq/R/GeneMultiSimu.R b/EBSeq/R/GeneMultiSimu.R
deleted file mode 100644 (file)
index e71babd..0000000
+++ /dev/null
@@ -1,111 +0,0 @@
-GeneMultiSimu<-
-function(DVDconstant=NULL, DVDqt1=NULL, DVDqt2=NULL, Conditions,AllParti, NumofSample, NumofGene=NULL, DEGeneProp, Phiconstant=NULL, Phi.qt1=NULL, Phi.qt2=NULL, Meanconstant=NULL,NormFactor=NULL, OnlyData=T)
-{
-# 2012 feb 1 paired simulation
-if(is.null(NormFactor)) NormFactor=rep(1,NumofSample)
-data(GeneEBresultGouldBart2)
-MeansC1=GeneEBresultGouldBart2$C1Mean[[1]]
-MeansC2=GeneEBresultGouldBart2$C2Mean[[1]]
-
-MeanDVD=MeansC1/MeansC2
-
-if(is.null(DVDconstant))DVDLibrary=MeanDVD[MeanDVD<quantile(MeanDVD[MeanDVD!=Inf],DVDqt2) & MeanDVD>quantile(MeanDVD[MeanDVD!=Inf],DVDqt1)]
-if(!is.null(DVDconstant))DVDLibrary=DVDconstant
-
-# If DVD constant, use constant when generate
-# If not, use DVDLibrary
-
-MeanInputraw=GeneEBresultGouldBart2$MeanList[[1]]
-
-if(length(NumofGene)!=0)
-NumofGene.raw=NumofGene*2
-
-if(length(NumofGene)==0)
-NumofGene.raw=length(MeanInputraw)
-
-
-PhiInput.raw=GeneEBresultGouldBart2$RList[[1]]
-if (length(Phiconstant)==0){
-       PhiLibrary=PhiInput.raw[(1/PhiInput.raw)<quantile(1/PhiInput.raw,Phi.qt2) & 1/PhiInput.raw>quantile(1/PhiInput.raw,Phi.qt1)]
-       PhiInputNames=sample(names(PhiLibrary),NumofGene.raw,replace=T)
-       PhiInput=PhiInput.raw[PhiInputNames]
-}
-
-if (length(Phiconstant)!=0)PhiInput=rep(Phiconstant,length(MeanInputraw))
-if(length(Meanconstant)==0)MeanInput=GeneEBresultGouldBart2$MeanList[[1]][PhiInputNames]
-if(length(Meanconstant)!=0)MeanInput=rep(Meanconstant,length(GeneEBresultGouldBart2$MeanList[[1]]))
-
-# length(DEGeneNumbers) should be num of patterns -1. the others EE
-PatternGeneNumbers=round(NumofGene.raw*DEGeneProp/2)*2
-names(PatternGeneNumbers)=rownames(AllParti)
-EEWhich=which(rowSums(AllParti)==ncol(AllParti))
-DEGeneNumbers=PatternGeneNumbers[-EEWhich]
-
-
-OutGeneNumbers=round(NumofGene*DEGeneProp/2)*2
-names(OutGeneNumbers)=rownames(AllParti)
-OutDEGeneNumbers=OutGeneNumbers[-EEWhich]
-OutEEGeneNumbers=OutGeneNumbers[EEWhich]
-OutGenePatterns=c(unlist(sapply(1:length(OutDEGeneNumbers),
-                                                         function(i)rep(names(OutDEGeneNumbers)[i],OutDEGeneNumbers[i]),simplify=F)),
-                                 rep(names(OutEEGeneNumbers),OutEEGeneNumbers))
-
-GeneNames=paste("G",c(1:NumofGene.raw),sep="_")
-names(PhiInput)=GeneNames
-names(MeanInput)=GeneNames
-#########
-# data
-#########
-EEList=sapply(1:NumofGene.raw, function(j) sapply(1:NumofSample, function(i)suppressWarnings(rnbinom(1,mu=NormFactor[i]*MeanInput[j], size=PhiInput[j]))))
-
-generateDataraw=t(EEList)
-DVDSample=sample(DVDLibrary,sum(DEGeneNumbers),replace=T)
-
-DErawNames=vector("list",length(DEGeneNumbers))
-st=1
-for(i in 1:length(DEGeneNumbers)){
-       for(j in st:(st+DEGeneNumbers[i]-1)){
-               NumGroup=max(AllParti[names(DEGeneNumbers)[i],])
-               SampleGroup=sample(NumGroup,NumGroup)
-               DVDSampleEach=c(1,DVDSample[j]^c(1:(NumGroup-1)))
-               for(k in 1:NumGroup){
-               CondWhich=which(AllParti[names(DEGeneNumbers)[i],]==SampleGroup[k])
-               SampleChoose=which(Conditions%in%colnames(AllParti)[CondWhich])
-               generateDataraw[j,SampleChoose]=sapply(1:length(SampleChoose), function(i)suppressWarnings(rnbinom(1, size=PhiInput[j], mu=DVDSampleEach[k]*MeanInput[j]*NormFactor[i])),simplify=T)
-               }}
-               DErawNames[[i]]=GeneNames[st:(st+DEGeneNumbers[i]-1)]
-               st=st+DEGeneNumbers[i]
-}
-
-rownames(generateDataraw)=GeneNames
-MeanVector=rowMeans(generateDataraw)
-VarVector=apply(generateDataraw,1,var)
-MOV.post=MeanVector/VarVector
-EErawNames=GeneNames[!GeneNames%in%unlist(DErawNames)]
-
-
-### Remove MOV=NA
-generateData=generateDataraw
-generateData=generateData[!is.na(MOV.post)& MeanVector>2 & MeanVector<10000 ,] 
-InName=rownames(generateData)
-#print(paste("NA MOV's",sum(is.na(MOV.post)),sum( MeanVector<2), sum(MeanVector>10000)))
-## DE
-##################################
-FinalDEInName=sapply(1:length(DEGeneNumbers),function(i)InName[InName%in%DErawNames[[i]]][1:OutDEGeneNumbers[i]],simplify=F)
-FinalEEInName=InName[InName%in%EErawNames][1:OutEEGeneNumbers]
-FinalNames=c(unlist(FinalDEInName),FinalEEInName)
-
-generateData=generateData[FinalNames,]
-########################################
-
-UseName=rownames(generateData)
-phiuse=PhiInput[rownames(generateData)]
-meanuse=MeanInput[rownames(generateData)]
-
-OutName=paste("Gene",c(1:nrow(generateData)),sep="_")
-names(OutName)=rownames(generateData)
-OutData=generateData
-rownames(OutData)=as.vector(OutName)
-names(OutGenePatterns)=as.vector(OutName)
-output=list(data=OutData, Patterns=OutGenePatterns)
-}
diff --git a/EBSeq/R/GeneSimu.R b/EBSeq/R/GeneSimu.R
deleted file mode 100644 (file)
index 1125e32..0000000
+++ /dev/null
@@ -1,241 +0,0 @@
-GeneSimu<-
-function(DVDconstant=NULL, DVDqt1=NULL, DVDqt2=NULL, Conditions, NumofSample, NumofGene=NULL, DEGeneProp, Phiconstant=NULL, Phi.qt1=NULL, Phi.qt2=NULL, Meanconstant=NULL,NormFactor=NULL, OnlyData=T)
-{
-# 2012 feb 1 paired simulation
-if(is.null(NormFactor)) NormFactor=rep(1,NumofSample)
-data(GeneEBresultGouldBart2)
-MeansC1=GeneEBresultGouldBart2$C1Mean[[1]]
-MeansC2=GeneEBresultGouldBart2$C2Mean[[1]]
-
-MeanDVD=MeansC1/MeansC2
-
-if(is.null(DVDconstant))DVDLibrary=MeanDVD[MeanDVD<quantile(MeanDVD[MeanDVD!=Inf],DVDqt2) & MeanDVD>quantile(MeanDVD[MeanDVD!=Inf],DVDqt1)]
-
-
-# If DVD constant, use constant when generate
-# If not, use DVDLibrary
-
-MeanInputraw=GeneEBresultGouldBart2$MeanList[[1]]
-
-if(length(NumofGene)!=0)
-NumofGene.raw=NumofGene*2
-
-if(length(NumofGene)==0)
-NumofGene.raw=length(MeanInputraw)
-
-
-PhiInput.raw=GeneEBresultGouldBart2$RList[[1]]
-if (length(Phiconstant)==0){
-       PhiLibrary=PhiInput.raw[(1/PhiInput.raw)<quantile(1/PhiInput.raw,Phi.qt2) & 1/PhiInput.raw>quantile(1/PhiInput.raw,Phi.qt1)]
-       PhiInputNames=sample(names(PhiLibrary),NumofGene.raw,replace=T)
-       PhiInput=PhiInput.raw[PhiInputNames]
-}
-
-if (length(Phiconstant)!=0)PhiInput=rep(Phiconstant,length(MeanInputraw))
-if(length(Meanconstant)==0)MeanInput=GeneEBresultGouldBart2$MeanList[[1]][PhiInputNames]
-if(length(Meanconstant)!=0)MeanInput=rep(Meanconstant,length(GeneEBresultGouldBart2$MeanList[[1]]))
-
-DEGeneNumbers=round(NumofGene.raw*DEGeneProp/2)*2
-GeneNames=paste("G",c(1:NumofGene.raw),sep="_")
-names(PhiInput)=GeneNames
-names(MeanInput)=GeneNames
-#########
-# data
-#########
-EEList=sapply(1:NumofGene.raw, function(j) sapply(1:NumofSample, function(i)suppressWarnings(rnbinom(1,mu=NormFactor[i]*MeanInput[j], size=PhiInput[j]))))
-
-
-
-
-    generateDataraw=t(EEList)
-       if(length(DVDconstant)==0){
-               DVDSample=sample(DVDLibrary,DEGeneNumbers,replace=T)
-               for(j in 1:NumofGene.raw){
-                if (j<=(DEGeneNumbers/2)) generateDataraw[j,((NumofSample/2)+1):NumofSample]=sapply(((NumofSample/2) +1):NumofSample, function(i)suppressWarnings(rnbinom(1, size=PhiInput[j], mu=DVDSample[j]*MeanInput[j]*NormFactor[i])),simplify=T)
-               if (j>=((DEGeneNumbers/2)+1) & j <=DEGeneNumbers) generateDataraw[j,1:(NumofSample/2)]=sapply(1:(NumofSample/2),function(i)suppressWarnings(rnbinom(1, size=MeanInput[j], mu= DVDSample[j]*MeanInput[j]*NormFactor[i])),simplify=T)
-}
-        }
-       if(length(DVDconstant)!=0){
-        for(j in 1:NumofGene.raw){
-             if (j<=(DEGeneNumbers/2)) generateDataraw[j,((NumofSample/2)+1):NumofSample]=sapply((NumofSample/2+1):NumofSample, function(i)suppressWarnings(rnbinom(1, size=MeanInput[j],mu=DVDconstant*MeanInput[j]*NormFactor[i])),simplify=T)
-             if (j>=((DEGeneNumbers/2)+1) & j <=DEGeneNumbers) generateDataraw[j,1:(NumofSample/2)]=sapply(1:(NumofSample/2),function(i)suppressWarnings(rnbinom(1, size=MeanInput[j],mu=DVDconstant*MeanInput[j]*NormFactor[i])),simplify=T)
-               }
-       }
-rownames(generateDataraw)=GeneNames
-MeanVector=rowMeans(generateDataraw)
-VarVector=apply(generateDataraw,1,var)
-MOV.post=MeanVector/VarVector
-
-
-
-### Remove MOV=NA
-generateData=generateDataraw
-generateData=generateData[!is.na(MOV.post)& MeanVector>2 & MeanVector<10000 ,] 
-#print(paste("NA MOV's",sum(is.na(MOV.post)),sum( MeanVector<2), sum(MeanVector>10000)))
-## DE
-NumDENow=sum(rownames(generateData)%in%rownames(generateDataraw)[1:DEGeneNumbers])
-
-if(length(NumofGene)!=0)
-    generateData=generateData[c(sample(1:NumDENow,round(NumofGene*DEGeneProp),replace=F),round( (dim(generateData)[1]+1-NumofGene*(1-DEGeneProp)):dim(generateData)[1])),]
-
-
-UseName=rownames(generateData)
-phiuse=PhiInput[rownames(generateData)]
-meanuse=MeanInput[rownames(generateData)]
-
-
-TrueDE=UseName[UseName%in%rownames(generateDataraw)[1:DEGeneNumbers]]
-
-if(OnlyData==T){
-       OutName=paste("Gene",c(1:nrow(generateData)),sep="_")
-       names(OutName)=rownames(generateData)
-       OutData=generateData
-       rownames(OutData)=as.vector(OutName)
-       OutTrueDE=as.vector(OutName[TrueDE])
-       output=list(data=OutData, TrueDE=OutTrueDE)
-       return(output)
-       }
-## DESeq
-
-cds=newCountDataSet(round(generateData),Conditions)
-cds=estimateSizeFactors(cds)
-Sizes=sizeFactors(cds)
-if(dim(generateData)[2]>4)cds=estimateVarianceFunctions(cds)
-else  cds=estimateVarianceFunctions(cds, method="blind")
-
-res=nbinomTest(cds, "1", "2")
-ResAdj=res$padj
-names(ResAdj)=res$id
-SmallPValueName=names(ResAdj)[which(ResAdj<=.05)]
-print(paste("DESEq found",length(SmallPValueName)))
-print(paste("In True DE",sum(SmallPValueName%in%TrueDE)))
-
-print("DESeq Size factors")
-print(Sizes)
-
-NewData=generateData
-
-
-#source("/z/Comp/kendziorskigroup/ningleng/RNASEQ/CODE/FinalV/NBBetaBiasUniqueP_PoolVar_SpeedUp_MDFPoi_NoNormVar.R")
-#source("/z/Comp/kendziorskigroup/ningleng/RNASEQ/CODE/FinalV/NBBetaBiasUniqueP_PoolVar_SpeedUp_MDFPoi_NoNormPoolR.R")
-
-EBresult=EBTest(NewData,rep(1,dim(NewData)[1]), rep(1,dim(NewData)[1]), rep(1,dim(NewData)[1]),Conditions,sizeFactors=Sizes,5)
-#library(EBarrays)
-
-#EBres2=NBBetaEB.bias.uniqueP_PoolVarSpeedUp_MDFPoi_NoNormPoolR(NewData,rep(1,dim(NewData)[1]), rep(1,dim(NewData)[1]), rep(1,dim(NewData)[1]),Conditions,sizeFactors=Sizes,5)
-
-
-zlist.unlist=EBresult[[5]]
-fdr=max(.5,crit_fun(1-zlist.unlist,.05))
-EBDE=names(zlist.unlist)[which(zlist.unlist>fdr)]
-EBDE.Poi=names(EBresult[[6]])[which(EBresult[[6]]>fdr)]
-zlist.unlist.whole=c(EBresult[[5]],EBresult[[6]])
-print(paste("Soft EB Poi",length(EBDE.Poi)))
-EBDE=c(EBDE, EBDE.Poi)
-print(paste("Soft EB found",length(EBDE)))
-print(paste("In True DE",sum(EBDE%in%TrueDE)))
-
-EBDE95=names(zlist.unlist)[which(zlist.unlist>.95)]
-EBDE95.Poi=names(EBresult[[6]])[which(EBresult[[6]]>.95)]
-print(paste("Hard Poi found",length(EBDE95.Poi)))
-EBDE95=c(EBDE95, EBDE95.Poi)
-print(paste("Hard EB found" ,length(EBDE95)))
-print(paste("In True DE",sum(EBDE95%in%TrueDE)))
-
-### edgeR
-library(edgeR,lib.loc="~/RCODE")
-edgeRList.b2=DGEList(NewData,group=Conditions)
-if(length(Phiconstant)==1){
-       edgeRList.b2=estimateCommonDisp(edgeRList.b2)
-       edgeRRes.b2=exactTest(edgeRList.b2)
-}
-if(length(Phiconstant)==0){
-       edgeRList.b2=estimateCommonDisp(edgeRList.b2)   
-       edgeRList.b2=estimateTagwiseDisp(edgeRList.b2)
-       edgeRRes.b2=exactTest(edgeRList.b2, common.disp = FALSE)
-}
-edgeRPvalue.b2.raw=edgeRRes.b2[[1]][[3]]
-edgeRPvalue.b2=p.adjust(edgeRPvalue.b2.raw, method="BH")
-names(edgeRPvalue.b2)=rownames(NewData)
-edgeRSmallpvalue=names(which(edgeRPvalue.b2<.05))
-print(paste("edgeR found",length(edgeRSmallpvalue)))
-print(paste("In True DE",sum(edgeRSmallpvalue%in%TrueDE)))
-
-### Bayseq
-library(baySeq, lib.loc="~/RCODE")
-library(snow, lib.loc="~/RCODE")
-cl <- makeCluster(4, "SOCK")
-groups <- list(NDE = rep(1,NumofSample), DE = rep(c(1,2),each=NumofSample/2))
-CD <- new("countData", data = NewData, replicates = Conditions, libsizes = as.integer(colSums(NewData)), groups = groups)
-CDP.NBML <- getPriors.NB(CD, samplesize = dim(NewData)[1], estimation = "QL", cl = cl)
-CDPost.NBML <- getLikelihoods.NB(CDP.NBML, pET = "BIC", cl = cl)
-bayseqPost=CDPost.NBML@posteriors
-rownames(bayseqPost)=rownames(NewData)
-bayseqDE=rownames(NewData)[bayseqPost[,2]>log(.95)]
-print(paste("bayseq found",length(bayseqDE)))
-print(paste("In True DE",sum(bayseqDE%in%TrueDE)))
-
-
-### BBSeq
-library("BBSeq",lib.loc="~/RCODE")
-CondM=cbind(rep(1,NumofSample),rep(c(0,1),each=NumofSample/2))
-output=free.estimate(NewData,CondM)
-beta.free = output$betahat.free
-p.free = output$p.free
-psi.free = output$psi.free
-names(p.free)=rownames(NewData)
-# Top p free?
-#out.model=constrained.estimate(NewData,CondM, gn=3, beta.free ,psi.free)
-#p.constrained = out.model$p.model
-p.free.adj=p.adjust(p.free, method="BH")
-
-BBDE=names(p.free.adj)[which(p.free.adj<.05)]
-print(paste("BBSeq found",length(BBDE)))
-print(paste("In True DE",sum(BBDE%in%TrueDE)))
-
-
-#########################
-# Generate table
-Table=matrix(rep(0,12),ncol=2)
-colnames(Table)=c("Power","FDR")
-rownames(Table)=c("DESeq","edgeR","BaySeq","BBSeq","EBSeq_ModifiedSoft","EBSeq_Hard")
-
-       Length=length(TrueDE)
-       Table[1,1]=sum(SmallPValueName%in%TrueDE)/Length
-       Table[2,1]=sum(edgeRSmallpvalue%in%TrueDE)/Length
-       Table[3,1]=sum(bayseqDE%in%TrueDE)/Length
-       Table[4,1]=sum(BBDE%in%TrueDE)/Length
-       Table[5,1]=sum(EBDE%in%TrueDE)/Length
-       Table[6,1]=sum(EBDE95%in%TrueDE)/Length
-       Table[1,2]=sum(!SmallPValueName%in%TrueDE)/length(SmallPValueName)
-       Table[2,2]=sum(!edgeRSmallpvalue%in%TrueDE)/length(edgeRSmallpvalue)
-       Table[3,2]=sum(!bayseqDE%in%TrueDE)/length(bayseqDE)
-       Table[4,2]=sum(!BBDE%in%TrueDE)/length(BBDE)
-       Table[5,2]=sum(!EBDE%in%TrueDE)/length(EBDE)
-       Table[6,2]=sum(!EBDE95%in%TrueDE)/length(EBDE95)
-       Table=round(Table,2)
-
-ValueTable=matrix(rep(0,12),ncol=2)
-colnames(ValueTable)=c("Power","FDR")
-rownames(ValueTable)=c("DESeq","edgeR","BaySeq","BBSeq","EBSeq_ModifiedSoft","EBSeq_Hard")
-       ValueTable[1,1]=sum(SmallPValueName%in%TrueDE)
-       ValueTable[2,1]=sum(edgeRSmallpvalue%in%TrueDE)
-       ValueTable[3,1]=sum(bayseqDE%in%TrueDE)
-       ValueTable[4,1]=sum(BBDE%in%TrueDE)
-       ValueTable[5,1]=sum(EBDE%in%TrueDE)
-       ValueTable[6,1]=sum(EBDE95%in%TrueDE)
-       ValueTable[1,2]=sum(!SmallPValueName%in%TrueDE)
-       ValueTable[2,2]=sum(!edgeRSmallpvalue%in%TrueDE)
-       ValueTable[3,2]=sum(!bayseqDE%in%TrueDE)
-       ValueTable[4,2]=sum(!BBDE%in%TrueDE)
-       ValueTable[5,2]=sum(!EBDE%in%TrueDE)
-       ValueTable[6,2]=sum(!EBDE95%in%TrueDE)
-       
-if(length(DVDconstant)==0)DVD=c(quantile(MeanDVD[MeanDVD!=Inf],DVDqt1), quantile(MeanDVD[MeanDVD!=Inf],DVDqt2))
-if(length(DVDconstant)!=0) DVD=DVDconstant
-if(length(Phiconstant)==0)Phi=c(quantile(PhiInput.raw,Phi.qt1), quantile(PhiInput.raw,Phi.qt2))
-if(length(Phiconstant)!=0) Phi=Phiconstant
-OUT=list(Table=Table, ValueTable=ValueTable, DVD=DVD, Phi=Phi, generateData=NewData, TrueDE=TrueDE,phi.vector=phiuse,mean.vector=meanuse,NormFactor=NormFactor, DESeqP=ResAdj, edgeRP=edgeRPvalue.b2, EBSeqPP=zlist.unlist.whole, BaySeqPP=bayseqPost,BBSeqP=p.free.adj,EBoutput=EBresult
-,DESeqDE=SmallPValueName, edgeRDE=edgeRSmallpvalue, bayDE=bayseqDE, BBDE=BBDE, EBDE95=EBDE95)
-}
-
diff --git a/EBSeq/R/GeneSimuAt.R b/EBSeq/R/GeneSimuAt.R
deleted file mode 100644 (file)
index 8b5f7a2..0000000
+++ /dev/null
@@ -1,291 +0,0 @@
-GeneSimuAt<-function(DVDconstant=NULL, DVDqt1=NULL, DVDqt2=NULL, Conditions, NumofSample, NumofGene=NULL, DEGeneProp, Phiconstant=NULL, Phi.qt1=NULL, Phi.qt2=NULL, Meanconstant=NULL,NormFactor=NULL, OnlyData=T)
-{
-# 2012 feb 1 
-# paired level simulation
-
-data(GeneEBresultGouldBart2)
-if(is.null(NormFactor)) NormFactor=rep(1,NumofSample)
-
-#MeansC1=rowMeans(GeneV.norm1.NZ.b2[,1:4])
-#MeansC2=rowMeans(GeneV.norm1.NZ.b2[,5:8])
-MeansC1=GeneEBresultGouldBart2$C1Mean[[1]]
-MeansC2=GeneEBresultGouldBart2$C2Mean[[1]]
-
-MeanDVD=MeansC1/MeansC2
-
-if(is.null(DVDconstant))DVDLibrary=MeanDVD[MeanDVD<quantile(MeanDVD[MeanDVD!=Inf],DVDqt2) & MeanDVD>quantile(MeanDVD[MeanDVD!=Inf],DVDqt1)]
-
-
-# If DVD constant, use constant when generate
-# If not, use DVDLibrary
-
-MeanInputraw=GeneEBresultGouldBart2$MeanList[[1]]
-#MeanInputraw=rowMeans(GeneV.norm1.NZ.b2)
-#Var1=apply(GeneV.norm1.NZ.b2[,1:4],1,var)
-#Var2=apply(GeneV.norm1.NZ.b2[,5:8],1,var)
-#VarInput=(Var1 + Var2)/2
-#If NumofGene.raw=NULL, empirical # of Gene
-#If !=NULL , Input a 9-vector
-NumofGene.raw=length(MeanInputraw)
-
-# here phi denotes r -- which is 1/phi' in which sigma^2=mu(1+mu phi')
-# In negative binomial 
-# size is 1/phi'
-# rnbinom(100,size=100,mu=10) 
-# var(qq)
-#[1] 10.93687 
-# qq=rnbinom(100,size=10,mu=10)
-# var(qq)
-#[1] 24.01404
-
-#PhiInput.raw=(MeanInputraw^2) / (VarInput - MeanInputraw)
-PhiInput.raw=GeneEBresultGouldBart2$RList[[1]]
-if (length(Phiconstant)==0){
-       PhiLibrary=PhiInput.raw[1/(PhiInput.raw)<quantile(1/(PhiInput.raw),Phi.qt2) & 1/(PhiInput.raw)>quantile(1/(PhiInput.raw),Phi.qt1)]
-    PhiInputNames=sample(names(PhiLibrary),NumofGene.raw,replace=T)
-       PhiInput=PhiInput.raw[PhiInputNames]
-
-
-}
-
-if (length(Phiconstant)!=0)PhiInput=rep(Phiconstant,length(MeanInputraw))
-if(length(Meanconstant)==0)MeanInput=GeneEBresultGouldBart2$MeanList[[1]][PhiInputNames]
-if(length(Meanconstant)!=0)MeanInput=rep(Meanconstant,length(GeneEBresultGouldBart2$MeanList[[1]]))
-
-# Wanna DENumbers be proportion to 2 
-DEGeneNumbers=round(NumofGene.raw*DEGeneProp/2)*2
-GeneNames=paste("G",c(1:NumofGene.raw),sep="_")
-names(PhiInput)=GeneNames
-names(MeanInput)=GeneNames
-
-#########
-# data
-#########
-EEList=sapply(1:NumofGene.raw, function(j) sapply(1:NumofSample, function(i)rnbinom(1,mu=NormFactor[i]*MeanInput[j], size=PhiInput[j])))
-
-
-
-
-    generateDataraw=t(EEList)
-       if(length(DVDconstant)==0){
-               DVDSample=sample(DVDLibrary,DEGeneNumbers,replace=T)
-               for(j in 1:NumofGene.raw){
-                if (j<=(DEGeneNumbers/2)) generateDataraw[j,((NumofSample/2)+1):NumofSample]=sapply(((NumofSample/2) +1):NumofSample, function(i)rnbinom(1, size=PhiInput[j], mu=DVDSample[j]*MeanInput[j]*NormFactor[i]),simplify=T)
-               if (j>=((DEGeneNumbers/2)+1) & j <=DEGeneNumbers) generateDataraw[j,1:(NumofSample/2)]=sapply(1:(NumofSample/2),function(i)rnbinom(1, size=MeanInput[j], mu= DVDSample[j]*MeanInput[j]*NormFactor[i]),simplify=T)
-}
-        }
-       if(length(DVDconstant)!=0){
-        for(j in 1:NumofGene.raw){
-             if (j<=(DEGeneNumbers/2)) generateDataraw[j,((NumofSample/2)+1):NumofSample]=sapply((NumofSample/2+1):NumofSample, function(i)rnbinom(1, size=MeanInput[j],mu=DVDconstant*MeanInput[j]*NormFactor[i]),simplify=T)
-             if (j>=((DEGeneNumbers/2)+1) & j <=DEGeneNumbers) generateDataraw[j,1:(NumofSample/2)]=sapply(1:(NumofSample/2),function(i)rnbinom(1, size=MeanInput[j],mu=DVDconstant*MeanInput[j]*NormFactor[i]),simplify=T)
-               }
-       }
-rownames(generateDataraw)=GeneNames
-MeanVector=rowMeans(generateDataraw)
-VarVector=apply(generateDataraw,1,var)
-MOV.post=MeanVector/VarVector
-
-
-
-### Remove MOV=NA
-generateData=generateDataraw
-generateData=generateData[!is.na(MOV.post)& MeanVector>2 & MeanVector<10000 ,] 
-print(paste("NA MOV's",sum(is.na(MOV.post)),sum( MeanVector<2), sum(MeanVector>10000)))
-## DE
-NumDENow=sum(rownames(generateData)%in%rownames(generateDataraw)[1:DEGeneNumbers])
-
-if(length(NumofGene)!=0)
-    generateData=generateData[c(sample(1:NumDENow,round(NumofGene*DEGeneProp),replace=F),round( (dim(generateData)[1]+1-NumofGene*(1-DEGeneProp)):dim(generateData)[1])),]
-
-
-UseName=rownames(generateData)
-
-TrueDE=UseName[UseName%in%rownames(generateDataraw)[1:DEGeneNumbers]]
-phiuse=PhiInput[rownames(generateData)]
-meanuse=MeanInput[rownames(generateData)]
-
-#ArtiNames=rownames(generateData)[(DEGeneNumbers+1):(2*DEGeneNumbers)]
-#Noise=sample(c(1,ncol(generateData)),DEGeneNumbers,replace=T)
-TrueDELength=length(TrueDE)
-AtLoc=sample(c(1:length(Conditions)), TrueDELength, replace=T)
-AtFold=sample(c(4,6,8,10),TrueDELength, replace=T)
-
-AtNames_Level=vector("list",4)
-names(AtNames_Level)=c(4,6,8,10)
-for(i in 1:TrueDELength){
-generateData[(TrueDELength+i),AtLoc[i]]=generateData[(TrueDELength+i),AtLoc[i]]*AtFold[i]
-AtNames_Level[[as.character(AtFold[i])]]=c(AtNames_Level[[as.character(AtFold[i])]],rownames(generateData)[TrueDELength+i])
-}
-
-
-if(OnlyData==T){
-       OutName=paste("Gene",c(1:nrow(generateData)),sep="_")
-       names(OutName)=rownames(generateData)
-    OutData=generateData
-    rownames(OutData)=as.vector(OutName)
-       OutAt=as.vector(OutName[AtNames_Level])
-       OutTrueDE=as.vector(OutName[TrueDE])
-    output=list(data=OutData, TrueDE=OutTrueDE,Outliers=OutAt)
-       return(output)
-       }
-## DESeq
-
-cds=newCountDataSet(round(generateData),Conditions)
-cds=estimateSizeFactors(cds)
-Sizes=sizeFactors(cds)
-if(dim(generateData)[2]>4)cds=estimateVarianceFunctions(cds)
-else  cds=estimateVarianceFunctions(cds, method="blind")
-
-res=nbinomTest(cds, "1", "2")
-ResAdj=res$padj
-names(ResAdj)=res$id
-SmallPValueName=names(ResAdj)[which(ResAdj<=.05)]
-print(paste("DESEq found",length(SmallPValueName)))
-print(paste("In True DE",sum(SmallPValueName%in%TrueDE)))
-
-print("DESeq Size factors")
-print(Sizes)
-
-## DESeq each group
-## Ours
-NewData=generateData
-
-
-#source("/z/Comp/kendziorskigroup/ningleng/RNASEQ/CODE/FinalV/NBBetaBiasUniqueP_PoolVar_SpeedUp_MDFPoi_NoNormVar.R")
-#source("/z/Comp/kendziorskigroup/ningleng/RNASEQ/CODE/FinalV/NBBetaBiasUniqueP_PoolVar_SpeedUp_MDFPoi_NoNormPoolR.R")
-
-EBresult=EBTest(NewData,rep(1,dim(NewData)[1]), rep(1,dim(NewData)[1]), rep(1,dim(NewData)[1]),Conditions,sizeFactors=Sizes,5)
-
-#EBres2=NBBetaEB.bias.uniqueP_PoolVarSpeedUp_MDFPoi_NoNormPoolR(NewData,rep(1,dim(NewData)[1]), rep(1,dim(NewData)[1]), rep(1,dim(NewData)[1]),Conditions,sizeFactors=Sizes,5)
-
-
-zlist.unlist=EBresult[[5]]
-fdr=max(.5,crit_fun(1-zlist.unlist,.05))
-EBDE=names(zlist.unlist)[which(zlist.unlist>fdr)]
-EBDE.Poi=names(EBresult[[6]])[which(EBresult[[6]]>fdr)]
-zlist.unlist.whole=c(EBresult[[5]],EBresult[[6]])
-print(paste("Soft EB Poi",length(EBDE.Poi)))
-EBDE=c(EBDE, EBDE.Poi)
-print(paste("Soft EB found",length(EBDE)))
-print(paste("In True DE",sum(EBDE%in%TrueDE)))
-
-EBDE95=names(zlist.unlist)[which(zlist.unlist>.95)]
-EBDE95.Poi=names(EBresult[[6]])[which(EBresult[[6]]>.95)]
-print(paste("Hard Poi found",length(EBDE95.Poi)))
-EBDE95=c(EBDE95, EBDE95.Poi)
-print(paste("Hard EB found" ,length(EBDE95)))
-print(paste("In True DE",sum(EBDE95%in%TrueDE)))
-
-### edgeR
-library(edgeR,lib.loc="~/RCODE")
-edgeRList.b2=DGEList(NewData,group=Conditions)
-if(length(Phiconstant)==1){
-       edgeRList.b2=estimateCommonDisp(edgeRList.b2)
-       edgeRRes.b2=exactTest(edgeRList.b2)
-}
-if(length(Phiconstant)==0){
-       edgeRList.b2=estimateCommonDisp(edgeRList.b2)   
-       edgeRList.b2=estimateTagwiseDisp(edgeRList.b2)
-       edgeRRes.b2=exactTest(edgeRList.b2, common.disp = FALSE)
-}
-edgeRPvalue.b2.raw=edgeRRes.b2[[1]][[3]]
-edgeRPvalue.b2=p.adjust(edgeRPvalue.b2.raw, method="BH")
-names(edgeRPvalue.b2)=rownames(NewData)
-edgeRSmallpvalue=names(which(edgeRPvalue.b2<.05))
-print(paste("edgeR found",length(edgeRSmallpvalue)))
-print(paste("In True DE",sum(edgeRSmallpvalue%in%TrueDE)))
-
-### Bayseq
-library(baySeq, lib.loc="~/RCODE")
-library(snow, lib.loc="~/RCODE")
-cl <- makeCluster(4, "SOCK")
-groups <- list(NDE = rep(1,NumofSample), DE = rep(c(1,2),each=NumofSample/2))
-CD <- new("countData", data = NewData, replicates = Conditions, libsizes = as.integer(colSums(NewData)), groups = groups)
-CDP.NBML <- getPriors.NB(CD, samplesize = dim(NewData)[1], estimation = "QL", cl = cl)
-CDPost.NBML <- getLikelihoods.NB(CDP.NBML, pET = "BIC", cl = cl)
-bayseqPost=CDPost.NBML@posteriors
-rownames(bayseqPost)=rownames(NewData)
-bayseqDE=rownames(NewData)[bayseqPost[,2]>log(.95)]
-print(paste("bayseq found",length(bayseqDE)))
-print(paste("In True DE",sum(bayseqDE%in%TrueDE)))
-
-
-### BBSeq
-library("BBSeq",lib.loc="~/RCODE")
-CondM=cbind(rep(1,NumofSample),rep(c(0,1),each=NumofSample/2))
-output=free.estimate(NewData,CondM)
-beta.free = output$betahat.free
-p.free = output$p.free
-psi.free = output$psi.free
-names(p.free)=rownames(NewData)
-p.free.adj=p.adjust(p.free,method="BH")
-# Top p free?
-#out.model=constrained.estimate(NewData,CondM, gn=3, beta.free ,psi.free)
-#p.constrained = out.model$p.model
-BBDE=names(p.free.adj)[which(p.free.adj<.05)]
-print(paste("BBSeq found",length(BBDE)))
-print(paste("In True DE",sum(BBDE%in%TrueDE)))
-
-
-#########################
-# Generate table
-Table=matrix(rep(0,12),ncol=2)
-colnames(Table)=c("Power","FDR")
-rownames(Table)=c("DESeq","edgeR","BaySeq","BBSeq","EBSeq_ModifiedSoft","EBSeq_Hard")
-
-       Length=length(TrueDE)
-       Table[1,1]=sum(SmallPValueName%in%TrueDE)/Length
-       Table[2,1]=sum(edgeRSmallpvalue%in%TrueDE)/Length
-       Table[3,1]=sum(bayseqDE%in%TrueDE)/Length
-       Table[4,1]=sum(BBDE%in%TrueDE)/Length
-       Table[5,1]=sum(EBDE%in%TrueDE)/Length
-       Table[6,1]=sum(EBDE95%in%TrueDE)/Length
-       Table[1,2]=sum(!SmallPValueName%in%TrueDE)/length(SmallPValueName)
-       Table[2,2]=sum(!edgeRSmallpvalue%in%TrueDE)/length(edgeRSmallpvalue)
-       Table[3,2]=sum(!bayseqDE%in%TrueDE)/length(bayseqDE)
-       Table[4,2]=sum(!BBDE%in%TrueDE)/length(BBDE)
-       Table[5,2]=sum(!EBDE%in%TrueDE)/length(EBDE)
-       Table[6,2]=sum(!EBDE95%in%TrueDE)/length(EBDE95)
-       Table=round(Table,2)
-
-ValueTable=matrix(rep(0,12),ncol=2)
-colnames(ValueTable)=c("Power","FDR")
-rownames(ValueTable)=c("DESeq","edgeR","BaySeq","BBSeq","EBSeq_ModifiedSoft","EBSeq_Hard")
-       ValueTable[1,1]=sum(SmallPValueName%in%TrueDE)
-       ValueTable[2,1]=sum(edgeRSmallpvalue%in%TrueDE)
-       ValueTable[3,1]=sum(bayseqDE%in%TrueDE)
-       ValueTable[4,1]=sum(BBDE%in%TrueDE)
-       ValueTable[5,1]=sum(EBDE%in%TrueDE)
-       ValueTable[6,1]=sum(EBDE95%in%TrueDE)
-       ValueTable[1,2]=sum(!SmallPValueName%in%TrueDE)
-       ValueTable[2,2]=sum(!edgeRSmallpvalue%in%TrueDE)
-       ValueTable[3,2]=sum(!bayseqDE%in%TrueDE)
-       ValueTable[4,2]=sum(!BBDE%in%TrueDE)
-       ValueTable[5,2]=sum(!EBDE%in%TrueDE)
-       ValueTable[6,2]=sum(!EBDE95%in%TrueDE)
-
-
-AtFoundTable=matrix(rep(0,24),ncol=4)
-colnames(AtFoundTable)=paste("Level",c(1:4),sep="_")
-rownames(Table)=c("DESeq","edgeR","BaySeq","BBSeq","EBSeq_ModifiedSoft","EBSeq_Hard")
-for(i in 1:4){
-       AtFoundTable[1,i]=sum(SmallPValueName%in%AtNames_Level[[i]])
-       AtFoundTable[2,i]=sum(edgeRSmallpvalue%in%AtNames_Level[[i]])
-       AtFoundTable[3,i]=sum(bayseqDE%in%AtNames_Level[[i]])
-       AtFoundTable[4,i]=sum(BBDE%in%AtNames_Level[[i]])
-       AtFoundTable[5,i]=sum(EBDE%in%AtNames_Level[[i]])
-       AtFoundTable[6,i]=sum(EBDE95%in%AtNames_Level[[i]])     
-       }
-
-       
-if(length(DVDconstant)==0)DVD=c(quantile(MeanDVD[MeanDVD!=Inf],DVDqt1), quantile(MeanDVD[MeanDVD!=Inf],DVDqt2))
-if(length(DVDconstant)!=0) DVD=DVDconstant
-if(length(Phiconstant)==0)Phi=c(quantile(PhiInput.raw,Phi.qt1), quantile(PhiInput.raw,Phi.qt2))
-if(length(Phiconstant)!=0) Phi=Phiconstant
-OUT=list(Table=Table, ValueTable=ValueTable, DVD=DVD, Phi=Phi, generateData=NewData, TrueDE=TrueDE,phi.vector=phiuse,mean.vector=meanuse,NormFactor=NormFactor, DESeqP=ResAdj, edgeRP=edgeRPvalue.b2, EBSeqPP=zlist.unlist.whole, BaySeqPP=bayseqPost,BBSeqP=p.free.adj,EBoutput=EBresult,  AtFoundTable= AtFoundTable,Outliers=AtNames_Level)
-
-
-
-}
-
-
diff --git a/EBSeq/R/GetData.R b/EBSeq/R/GetData.R
deleted file mode 100644 (file)
index ddccf38..0000000
+++ /dev/null
@@ -1,35 +0,0 @@
-GetData <-
-function(path,Name1,Name2,type)
-{
-Data=vector("list",8)
-Filenames=NULL
-Tablenames=NULL
-for (name in 1:4)
-       {
-               if (type=="I")
-                       Filenames=c(Filenames,paste(path,Name1,name,"_isoform_nus.tab",sep=""))  
-               if (type=="G")  
-                       Filenames=c(Filenames,paste(path,Name1,name,"_gene_nus.tab",sep=""))  
-               Tablenames=c(Tablenames,paste(Name1,name,sep=""))
-       }
-for (name in 1:4)
-       {
-               if (type=="I")
-                       Filenames=c(Filenames,paste(path,Name2,name,"_isoform_nus.tab",sep=""))
-               if (type=="G")
-                       Filenames=c(Filenames,paste(path,Name2,name,"_gene_nus.tab",sep=""))
-               Tablenames=c(Tablenames,paste(Name2,name,sep=""))
-       }
-
-
-names(Data)=Tablenames
-for (file in 1:8)
-       {
-               temp=read.table(Filenames[file],header=T)
-               temp2=as.matrix(temp[-1])
-               rownames(temp2)=as.vector(as.matrix(temp[1]))
-               Data[[file]]=temp2
-       }
-       Data
-}
-
diff --git a/EBSeq/R/GetMultiPP.R b/EBSeq/R/GetMultiPP.R
deleted file mode 100644 (file)
index 4ae2882..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-GetMultiPP <- function(EBout){
-       PP=EBout$PPDE   
-       MAP=colnames(EBout$f)[apply(EBout$f,1,which.max)]
-       AllParti=EBout$AllParti
-       out=list(PP=PP, MAP=MAP,Patterns=AllParti)
-}
diff --git a/EBSeq/R/GetNg.R b/EBSeq/R/GetNg.R
deleted file mode 100644 (file)
index 9312f9a..0000000
+++ /dev/null
@@ -1,10 +0,0 @@
-GetNg<- function(IsoformName, GeneName){
-       GeneNg = tapply(IsoformName, GeneName, length)
-       IsoformNg = GeneNg[GeneName]
-       names(IsoformNg) = IsoformName
-       GeneNgTrun=GeneNg
-       GeneNgTrun[GeneNgTrun>3]=3
-       IsoformNgTrun=IsoformNg
-       IsoformNgTrun[IsoformNgTrun>3]=3
-       out=list( GeneNg=GeneNg, GeneNgTrun=GeneNgTrun, IsoformNg=IsoformNg, IsoformNgTrun=IsoformNgTrun)
-       }
diff --git a/EBSeq/R/GetPP.R b/EBSeq/R/GetPP.R
deleted file mode 100644 (file)
index 0c1eeb9..0000000
+++ /dev/null
@@ -1,4 +0,0 @@
-GetPP <- function(EBout){
-       #PP=c(EBout[[5]], EBout[[6]])
-       PP=EBout$PPDE   
-}
diff --git a/EBSeq/R/GetPatterns.R b/EBSeq/R/GetPatterns.R
deleted file mode 100644 (file)
index 436df74..0000000
+++ /dev/null
@@ -1,12 +0,0 @@
-GetPatterns<-function(Conditions){
-    if(!is.factor(Conditions))Conditions=as.factor(Conditions)
-       NumCond=nlevels(Conditions)
-       CondLevels=levels(Conditions)
-    #library(blockmodeling)
-    AllPartiList=sapply(1:NumCond,function(i)nkpartitions(NumCond,i))
-    AllParti=do.call(rbind,AllPartiList)
-       colnames(AllParti)=CondLevels
-       rownames(AllParti)=paste("Pattern",1:nrow(AllParti),sep="")
-       AllParti
-
-}
diff --git a/EBSeq/R/IsoSimu.R b/EBSeq/R/IsoSimu.R
deleted file mode 100644 (file)
index 4c58ed1..0000000
+++ /dev/null
@@ -1,122 +0,0 @@
-IsoSimu=function(DVDconstant=NULL, DVDqt1=NULL, DVDqt2=NULL, Conditions, NumofSample, NumofIso=NULL, DEIsoProp, Phiconstant=NULL, Phi.qt1=NULL, Phi.qt2=NULL,NormFactor=NULL, OnlyData=T)
-{
-# 2012 feb 1 
-# paired simulation
-data(IsoEBresultGouldBart2)
-if(is.null(NormFactor)) NormFactor=rep(1,NumofSample)
-
-MeansC1=IsoEBresultGouldBart2$C1Mean
-MeansC2=IsoEBresultGouldBart2$C2Mean
-MeanDVD=sapply(1:9,function(i) MeansC1[[i]]/MeansC2[[i]])
-# DVD library with each group here
-if (length(DVDconstant)==0) DVDLibrary= unlist(MeanDVD)[unlist(MeanDVD)<quantile(unlist(MeanDVD)[unlist(MeanDVD)!=Inf],DVDqt2) & unlist(MeanDVD)>quantile(unlist(MeanDVD)[unlist(MeanDVD)!=Inf],DVDqt1)]
-
-
-
-# If DVD constant, use constant when generate
-# If not, use DVDLibrary
-
-VarInput=IsoEBresultGouldBart2$VarList
-VarInputNg=list(VarInput[[1]],unlist(VarInput[c(2,4,6,8)]),unlist(VarInput[c(3,5,7,9)]))
-#If NumofIso=NULL, empirical # of Iso
-#If !=NULL , Input a 9-vector
-if(length(NumofIso)==0) NumofIso.raw=sapply(1:3,function(i)length(VarInputNg[[i]]))
-if(length(NumofIso)!=0) NumofIso.raw=NumofIso*2
-
-PhiInput.raw=IsoEBresultGouldBart2$RList
-PhiInput.raw.Ng=list(PhiInput.raw[[1]],unlist(PhiInput.raw[c(2,4,6,8)]),unlist(PhiInput.raw[c(3,5,7,9)]))
-
-
-if (length(Phiconstant)==0){
-       PhiLibrary=sapply(1:3,function(i)PhiInput.raw.Ng[[i]][1/PhiInput.raw.Ng[[i]]<quantile(1/PhiInput.raw.Ng[[i]],Phi.qt2) & 1/PhiInput.raw.Ng[[i]]>quantile(1/PhiInput.raw.Ng[[i]],Phi.qt1)],simplify=F)
-       PhiIndex=sapply(1:3, function(i)sample(names(PhiLibrary[[i]]),NumofIso.raw[[i]],replace=T),simplify=F)
-       PhiInputNg=sapply(1:3, function(i)PhiLibrary[[i]][PhiIndex[[i]]])
-}
-if (length(Phiconstant)!=0)PhiInputNg=sapply(1:3,function(i)rep(Phiconstant,NumofIso.raw[[i]]),simplify=F)
-
-# Wanna DENumbers be proportion to 2 
-DEIsoNumbers=round(NumofIso.raw*DEIsoProp/2)*2
-IsoNames=sapply(1:3,function(i)paste("I",i,c(1:NumofIso.raw[i]),sep="_"),simplify=F)
-MeanNg=list(IsoEBresultGouldBart2$MeanList[[1]],unlist(IsoEBresultGouldBart2$MeanList[c(2,4,6,8)]),
-unlist(IsoEBresultGouldBart2$MeanList[c(3,5,7,9)]))
-MeanInputNg=sapply(1:3, function(i)MeanNg[[i]][PhiIndex[[i]]])
-
-for(i in 1:3){
-       names(MeanInputNg[[i]])=IsoNames[[i]]
-       names(PhiInputNg[[i]])=IsoNames[[i]]
-       }
-
-##############################
-# Get Ng version to every one
-##############################
-
-
-#########
-# data
-#########
-EEList=sapply(1:3,function(i) sapply(1:NumofIso.raw[[i]], function(j)sapply(1:NumofSample,function(h) rnbinom(1,mu=MeanInputNg[[i]][j]*NormFactor[h], size=PhiInputNg[[i]][j]))),simplify=F)
-
-
-generateDataraw=vector("list",3)
-MeanVector=vector("list",3)
-VarVector=vector("list",3)
-MOV.post=vector("list",3)
-
-
-for(g in 1:3){
-    generateDataraw[[g]]=t(EEList[[g]][,1:NumofIso.raw[g]])
-       if(length(DVDconstant)==0){
-               for(j in 1:NumofIso.raw[g]){
-                if (j<=(DEIsoNumbers[g]/2)) generateDataraw[[g]][j,((NumofSample/2)+1):NumofSample]=sapply((NumofSample/2+1):NumofSample, function(h)suppressWarnings(rnbinom(1, size=PhiInputNg[[g]][j], mu=sample(DVDLibrary,1)*MeanInputNg[[g]][j]*NormFactor[h])), simplify=T)
-               if (j>=((DEIsoNumbers[g]/2)+1) & j <=DEIsoNumbers[g]) generateDataraw[[g]][j,1:(NumofSample/2)]=sapply(1:(NumofSample/2),function(h) suppressWarnings(rnbinom(1, size=MeanInputNg[[g]][j], mu= sample(DVDLibrary,1)*MeanInputNg[[g]][j]*NormFactor[h])),simplify=T)
-}
-        }
-       if(length(DVDconstant)!=0){
-        for(j in 1:NumofIso.raw[g]){
-             if (j<=(DEIsoNumbers[g]/2)) generateDataraw[[g]][j,((NumofSample/2)+1):NumofSample]=sapply((NumofSample/2+1):NumofSample, function(h)suppressWarnings(rnbinom(1, DVDconstant*MeanInputNg[[g]][j]*NormFactor[h])),simplify=T)
-             if (j>=((DEIsoNumbers[g]/2)+1) & j <=DEIsoNumbers[g]) generateDataraw[[g]][j,1:(NumofSample/2)]=sapply(1:(NumofSample/2),function(h) wuppressWarnings(rnbinom(1, DVDconstant*MeanInputNg[[g]][j]*NormFactor[h])),simplify=T)
-               }
-       }
-rownames(generateDataraw[[g]])=IsoNames[[g]][1:NumofIso.raw[g]]
-MeanVector[[g]]=rowMeans(generateDataraw[[g]])
-VarVector[[g]]=apply(generateDataraw[[g]],1,var)
-MOV.post[[g]]=MeanVector[[g]]/VarVector[[g]]
-}
-
-
-### Remove MOV=NA
-generateData=generateDataraw
-for (i in 1:3) generateData[[i]]=generateData[[i]][!is.na(MOV.post[[i]]),] 
-#print(paste("NA MOV's",sum(is.na(unlist(MOV.post)))))
-NumDENow=sapply(1:3, function(i)sum(rownames(generateData[[i]])%in%rownames(generateDataraw[[i]])[1:DEIsoNumbers[i]]))
-
-if(length(NumofIso)!=0){
-           for(i in 1:3)
-               generateData[[i]]=generateData[[i]][c(sample(1:NumDENow[i],round(NumofIso[i]*DEIsoProp),replace=F),round( (dim(generateData[[i]])[1]+1-NumofIso[i]*(1-DEIsoProp)):dim(generateData[[i]])[1])),]
-}
-generateDataNg=generateData
-
-## DE
-UseName=sapply(1:3, function(i)rownames(generateData[[i]]),simplify=F)
-TrueDE=sapply(1:3, function(i)UseName[[i]][UseName[[i]] %in% rownames(generateDataraw[[i]])[1:DEIsoNumbers[i]]],simplify=F)
-TrueDE.unlist=do.call(c,TrueDE)
-
-phiuse=sapply(1:3,function(i)PhiInputNg[[i]][UseName[[i]]])
-meanuse=sapply(1:3,function(i)MeanInputNg[[i]][UseName[[i]]])
-
-#if(OnlyData==T){
-    
-OutName=sapply(1:3,function(i)paste("Iso",i,c(1:nrow(generateDataNg[[i]])),sep="_"))
-for(i in 1:3)names(OutName[[i]])=rownames(generateDataNg[[i]])
-OutData=generateDataNg
-for(i in 1:3)rownames(OutData[[i]])=as.vector(OutName[[i]])
-OutTrueDE=as.vector(unlist(OutName)[TrueDE.unlist])
-output=list(data=OutData, TrueDE=OutTrueDE)
-
-
-#output=list(data=generateDataNg, TrueDE=TrueDE.unlist)
-return(output)
-#    }
-# Now only OnlyData=T version
-}
-
diff --git a/EBSeq/R/IsoSimuAt.R b/EBSeq/R/IsoSimuAt.R
deleted file mode 100644 (file)
index 479518d..0000000
+++ /dev/null
@@ -1,128 +0,0 @@
-IsoSimuAt<-function(DVDconstant=NULL, DVDqt1=NULL, DVDqt2=NULL, Conditions, NumofSample, NumofIso=NULL, DEIsoProp, Phiconstant=NULL, Phi.qt1=NULL, Phi.qt2=NULL,NormFactor=NULL, OnlyData=T)
-{
-#Ng paired 2012 feb 1
-if(is.null(NormFactor)) NormFactor=rep(1,NumofSample)
-data(IsoEBresultGouldBart2)
-
-MeansC1=IsoEBresultGouldBart2$C1Mean
-MeansC2=IsoEBresultGouldBart2$C2Mean
-MeanDVD=sapply(1:9,function(i) MeansC1[[i]]/MeansC2[[i]])
-if (length(DVDconstant)==0) DVDLibrary= unlist(MeanDVD)[unlist(MeanDVD)<quantile(unlist(MeanDVD)[unlist(MeanDVD)!=Inf],DVDqt2) & unlist(MeanDVD)>quantile(unlist(MeanDVD)[unlist(MeanDVD)!=Inf],DVDqt1)]
-
-
-
-
-VarInput=IsoEBresultGouldBart2$VarList
-VarInputNg=list(VarInput[[1]],unlist(VarInput[c(2,4,6,8)]),unlist(VarInput[c(3,5,7,9)]))
-
-if(length(NumofIso)==0) NumofIso=sapply(1:3,function(i)length(VarInputNg[[i]]))
-PhiInput.raw=IsoEBresultGouldBart2$RList
-PhiInput.raw.Ng=list(PhiInput.raw[[1]],unlist(PhiInput.raw[c(2,4,6,8)]),unlist(PhiInput.raw[c(3,5,7,9)]))
-
-
-if (length(Phiconstant)==0){
-       PhiLibrary=sapply(1:3,function(i)PhiInput.raw.Ng[[i]][1/PhiInput.raw.Ng[[i]]<quantile(1/PhiInput.raw.Ng[[i]],Phi.qt2) & 1/PhiInput.raw.Ng[[i]]>quantile(1/PhiInput.raw.Ng[[i]],Phi.qt1)],simplify=F)
-       PhiIndex=sapply(1:3, function(i)sample(names(PhiLibrary[[i]]),NumofIso[[i]],replace=T),simplify=F)
-       PhiInputNg=sapply(1:3, function(i)PhiLibrary[[i]][PhiIndex[[i]]])
-}
-if (length(Phiconstant)!=0)PhiInputNg=sapply(1:3,function(i)rep(Phiconstant,NumofIso[[i]]),simplify=F)
-
-# Wanna DENumbers be proportion to 2 
-DEIsoNumbers=round(NumofIso*DEIsoProp/2)*2
-IsoNames=sapply(1:3,function(i)paste("I",i,c(1:NumofIso[i]),sep="_"),simplify=F)
-MeanNg=list(IsoEBresultGouldBart2$MeanList[[1]],unlist(IsoEBresultGouldBart2$MeanList[c(2,4,6,8)]),
-unlist(IsoEBresultGouldBart2$MeanList[c(3,5,7,9)]))
-MeanInputNg=sapply(1:3, function(i)MeanNg[[i]][PhiIndex[[i]]])
-
-for(i in 1:3){
-       names(MeanInputNg[[i]])=IsoNames[[i]]
-       names(PhiInputNg[[i]])=IsoNames[[i]]
-       }
-
-#########
-# data
-#########
-EEList=sapply(1:3,function(i) sapply(1:NumofIso[[i]], function(j)sapply(1:NumofSample,function(h) rnbinom(1,mu=MeanInputNg[[i]][j]*NormFactor[h], size=PhiInputNg[[i]][j]))),simplify=F)
-
-
-generateDataraw=vector("list",3)
-MeanVector=vector("list",3)
-VarVector=vector("list",3)
-MOV.post=vector("list",3)
-
-
-for(g in 1:3){
-    generateDataraw[[g]]=t(EEList[[g]][,1:NumofIso[g]])
-       if(length(DVDconstant)==0){
-               for(j in 1:NumofIso[g]){
-                if (j<=(DEIsoNumbers[g]/2)) generateDataraw[[g]][j,((NumofSample/2)+1):NumofSample]=sapply((NumofSample/2+1):NumofSample, function(h)rnbinom(1, size=PhiInputNg[[g]][j], mu=sample(DVDLibrary,1)*MeanInputNg[[g]][j]*NormFactor[h]), simplify=T)
-               if (j>=((DEIsoNumbers[g]/2)+1) & j <=DEIsoNumbers[g]) generateDataraw[[g]][j,1:(NumofSample/2)]=sapply(1:(NumofSample/2),function(h) rnbinom(1, size=MeanInputNg[[g]][j], mu= sample(DVDLibrary,1)*MeanInputNg[[g]][j]*NormFactor[h]),simplify=T)
-}
-        }
-       if(length(DVDconstant)!=0){
-        for(j in 1:NumofIso[g]){
-             if (j<=(DEIsoNumbers[g]/2)) generateDataraw[[g]][j,((NumofSample/2)+1):NumofSample]=sapply((NumofSample/2+1):NumofSample, function(h)rnbinom(1, DVDconstant*MeanInputNg[[g]][j]*NormFactor[h]),simplify=T)
-             if (j>=((DEIsoNumbers[g]/2)+1) & j <=DEIsoNumbers[g]) generateDataraw[[g]][j,1:(NumofSample/2)]=sapply(1:(NumofSample/2),function(h) rnbinom(1, DVDconstant*MeanInputNg[[g]][j]*NormFactor[h]),simplify=T)
-               }
-       }
-rownames(generateDataraw[[g]])=IsoNames[[g]][1:NumofIso[g]]
-MeanVector[[g]]=rowMeans(generateDataraw[[g]])
-VarVector[[g]]=apply(generateDataraw[[g]],1,var)
-MOV.post[[g]]=MeanVector[[g]]/VarVector[[g]]
-}
-
-
-### Remove MOV=NA
-generateData=generateDataraw
-for (i in 1:3) generateData[[i]]=generateData[[i]][!is.na(MOV.post[[i]]),] 
-print(paste("NA MOV's",sum(is.na(unlist(MOV.post)))))
-#tmpmean=sapply(1:9,function(i)rowMeans(generateData[[i]]))
-#tmpvar=sapply(1:9,function(i)apply(generateData[[i]],1,var))
-#source("plot_functions.R")
-#CheckSimuNg(tmpmean,tmpvar,c(-1,5),c(-1,7))
-
-
-
-
-## DE
-UseName=sapply(1:3, function(i)rownames(generateData[[i]]),simplify=F)
-TrueDE=sapply(1:3, function(i)UseName[[i]][UseName[[i]] %in% rownames(generateData[[i]])[1:DEIsoNumbers[i]]],simplify=F)
-TrueDE.unlist=do.call(c,TrueDE)
-
-TrueDELength=sapply(TrueDE,length)
-
-AtNames_Level=vector("list",4)
-AtLoc=vector("list",3)
-AtFold=vector("list",3)
-names(AtNames_Level)=c(4,6,8,10)
-
-
-for(j in 1:3){
-AtLoc[[j]]=sample(c(1:length(Conditions)), TrueDELength[j], replace=T)
-AtFold[[j]]=sample(c(4,6,8,10),TrueDELength[j], replace=T)
-
-for(i in 1:TrueDELength[j]){
-
-generateData[[j]][(TrueDELength[j]+i),AtLoc[[j]][i]]=generateData[[j]][(TrueDELength[j]+i),AtLoc[[j]][i]]*AtFold[[j]][i]
-AtNames_Level[[as.character(AtFold[[j]][i])]]=c(AtNames_Level[[as.character(AtFold[[j]][i])]],rownames(generateData[[j]])[TrueDELength[j]+i])
-}
-}
-phiuse=sapply(1:3,function(i)PhiInputNg[[i]][UseName[[i]]])
-meanuse=sapply(1:3,function(i)MeanInputNg[[i]][UseName[[i]]])
-
-#generateDataNg=list(generateData[[1]], do.call(rbind,generateData[c(2,4,6,8)]), do.call(rbind,generateData[c(3,5,7,9)]))
-generateDataNg=generateData
-
-#if(OnlyData==T){
-
-OutName=sapply(1:3,function(i)paste("Iso",i,c(1:nrow(generateDataNg[[i]])),sep="_"))
-for(i in 1:3)names(OutName[[i]])=rownames(generateDataNg[[i]])
-OutData=generateDataNg
-for(i in 1:3)rownames(OutData[[i]])=as.vector(OutName[[i]])
-OutTrueDE=as.vector(unlist(OutName)[TrueDE.unlist])
-OutAt=as.vector(unlist(OutName)[AtNames <- Level])
-
-output=list(data=OutData, TrueDE=OutTrueDE, Outliers=OutAt)
-#      return(output)
-#    }
-       }
diff --git a/EBSeq/R/Likefun.R b/EBSeq/R/Likefun.R
deleted file mode 100644 (file)
index ebe05ab..0000000
+++ /dev/null
@@ -1,26 +0,0 @@
-Likefun <-
-function(ParamPool, InputPool)
-{
-
-NoneZeroLength=InputPool[[5]]
-AlphaIn=ParamPool[1]
-BetaIn=ParamPool[2:(1+NoneZeroLength)]
-PIn=ParamPool[2+NoneZeroLength]
-ZIn=InputPool[[4]]
-Input=InputPool[[3]]
-Input1=matrix(InputPool[[1]],nrow=nrow(Input))
-Input2=matrix(InputPool[[2]],nrow=nrow(Input))
-RIn=InputPool[[6]]
-RInSP1=matrix(InputPool[[7]],nrow=nrow(Input))
-RInSP2=matrix(InputPool[[8]],nrow=nrow(Input))
-NumIn=InputPool[[9]]
-##Function here
-#LikelihoodFunction<- function(NoneZeroLength){
-       F0=f0(Input, AlphaIn, BetaIn, RIn, NumIn, log=T)
-       F1=f1(Input1, Input2, AlphaIn, BetaIn, RInSP1,RInSP2, NumIn, log=T)
-               F0[F0==Inf]=min(!is.na(F0[F0!=Inf]))
-               F1[F1==Inf]=min(!is.na(F1[F1!=Inf]))
-
-       -sum((1-ZIn)*F0+ (1-ZIn)* log(1-PIn) + ZIn*F1 + ZIn*log(PIn))
-}
-
diff --git a/EBSeq/R/LikefunMulti.R b/EBSeq/R/LikefunMulti.R
deleted file mode 100644 (file)
index 510cb1e..0000000
+++ /dev/null
@@ -1,28 +0,0 @@
-LikefunMulti <-
-function(ParamPool, InputPool)
-{
-
-NoneZeroLength=InputPool[[4]]
-AlphaIn=ParamPool[1]
-BetaIn=ParamPool[2:(1+NoneZeroLength)]
-PIn=ParamPool[(2+NoneZeroLength):length(ParamPool)]
-PInAll=c(1-sum(PIn),PIn)
-ZIn=InputPool[[3]]
-Input=InputPool[[2]]
-InputSP=InputPool[[1]]
-RIn=InputPool[[5]]
-RInSP=InputPool[[6]]
-NumIn=InputPool[[7]]
-AllParti=InputPool[[8]]
-PInMat=matrix(rep(1,nrow(Input)),ncol=1)%*%matrix(PInAll,nrow=1)
-##Function here
-FList=sapply(1:nrow(AllParti),function(i)sapply(1:nlevels(as.factor(AllParti[i,])),
-                                               function(j)f0(do.call(cbind,InputSP[AllParti[i,]==j]),AlphaIn, BetaIn, 
-                                       do.call(cbind,RInSP[AllParti[i,]==j]), NumIn, log=T)),
-                                               simplify=F) 
-FPartiLog=sapply(FList,rowSums)
-#FMat=exp(FPartiLog)
-FMat=FPartiLog
--sum(ZIn*(FMat+log(PInMat)))
-}
-
diff --git a/EBSeq/R/LikefunMultiDVDP.R b/EBSeq/R/LikefunMultiDVDP.R
deleted file mode 100644 (file)
index 37dcc2d..0000000
+++ /dev/null
@@ -1,28 +0,0 @@
-LikefunMulti <-
-function(ParamPool, InputPool)
-{
-
-NoneZeroLength=InputPool[[4]]
-AlphaIn=ParamPool[1]
-BetaIn=ParamPool[2:(1+NoneZeroLength)]
-PInMat=InputPool[[9]]
-#PInAll=c(1-sum(PIn),PIn)
-ZIn=InputPool[[3]]
-Input=InputPool[[2]]
-InputSP=InputPool[[1]]
-RIn=InputPool[[5]]
-RInSP=InputPool[[6]]
-NumIn=InputPool[[7]]
-AllParti=InputPool[[8]]
-#PInMat=matrix(rep(1,nrow(Input)),ncol=1)%*%matrix(PInAll,nrow=1)
-##Function here
-FList=sapply(1:nrow(AllParti),function(i)sapply(1:nlevels(as.factor(AllParti[i,])),
-                                               function(j)f0(do.call(cbind,InputSP[AllParti[i,]==j]),AlphaIn, BetaIn, 
-                                       do.call(cbind,RInSP[AllParti[i,]==j]), NumIn, log=T)),
-                                               simplify=F) 
-FPartiLog=sapply(FList,rowSums)
-#FMat=exp(FPartiLog)
-FMat=FPartiLog
--sum(ZIn*(FMat+log(PInMat)))
-}
-
diff --git a/EBSeq/R/LikefunMultiEMP.R b/EBSeq/R/LikefunMultiEMP.R
deleted file mode 100644 (file)
index 510cb1e..0000000
+++ /dev/null
@@ -1,28 +0,0 @@
-LikefunMulti <-
-function(ParamPool, InputPool)
-{
-
-NoneZeroLength=InputPool[[4]]
-AlphaIn=ParamPool[1]
-BetaIn=ParamPool[2:(1+NoneZeroLength)]
-PIn=ParamPool[(2+NoneZeroLength):length(ParamPool)]
-PInAll=c(1-sum(PIn),PIn)
-ZIn=InputPool[[3]]
-Input=InputPool[[2]]
-InputSP=InputPool[[1]]
-RIn=InputPool[[5]]
-RInSP=InputPool[[6]]
-NumIn=InputPool[[7]]
-AllParti=InputPool[[8]]
-PInMat=matrix(rep(1,nrow(Input)),ncol=1)%*%matrix(PInAll,nrow=1)
-##Function here
-FList=sapply(1:nrow(AllParti),function(i)sapply(1:nlevels(as.factor(AllParti[i,])),
-                                               function(j)f0(do.call(cbind,InputSP[AllParti[i,]==j]),AlphaIn, BetaIn, 
-                                       do.call(cbind,RInSP[AllParti[i,]==j]), NumIn, log=T)),
-                                               simplify=F) 
-FPartiLog=sapply(FList,rowSums)
-#FMat=exp(FPartiLog)
-FMat=FPartiLog
--sum(ZIn*(FMat+log(PInMat)))
-}
-
diff --git a/EBSeq/R/LogN.R b/EBSeq/R/LogN.R
deleted file mode 100644 (file)
index 7767b93..0000000
+++ /dev/null
@@ -1,45 +0,0 @@
-LogN <-
-function(Input, InputSP, EmpiricalR, EmpiricalRSP, NumOfEachGroup, AlphaIn, BetaIn,  PIn, NoneZeroLength)
-{
-    #2 condition case (skip the loop then maybe run faster? Code multi condition cases later)
-
-        #For each gene (m rows of Input---m genes)
-        #Save each gene's F0, F1 for further likelihood calculation. 
-  
-        #Get F0 for EE
-        F0=f0(Input,  AlphaIn, BetaIn, EmpiricalR, NumOfEachGroup, log=F)
-        #Get F1 for DE
-        F1=f1(InputSP[[1]], InputSP[[2]], AlphaIn, BetaIn, EmpiricalRSP[[1]],EmpiricalRSP[[2]], NumOfEachGroup, log=F)
-
-        #Get z
-               #Use data.list in logfunction
-        
-               z.list=PIn*F1/(PIn*F1+(1-PIn)*F0)
-               zNaNName=names(z.list)[is.na(z.list)]
-               zGood=which(!is.na(z.list))
-               ###Update P
-        #PFromZ=sapply(1:NoneZeroLength,function(i) sum(z.list[[i]])/length(z.list[[i]]))
-        PFromZ=sum(z.list[zGood])/length(z.list[zGood])
-        F0Good=F0[zGood]
-               F1Good=F1[zGood]
-               ### MLE Part ####
-        # Since we dont wanna update p and Z in this step
-        # Each Ng for one row
-               
-               NumGroupVector=rep(c(1:NoneZeroLength),NumOfEachGroup)
-               
-               NumGroupVector.zGood=NumGroupVector[zGood]
-               NumOfEachGroup.zGood=tapply(NumGroupVector.zGood,NumGroupVector.zGood,length)
-
-        StartValue=c(AlphaIn, BetaIn,PIn)
-                    
-               Result<-optim(StartValue,Likefun,InputPool=list(InputSP[[1]][zGood,],InputSP[[2]][zGood,],Input[zGood,],z.list[zGood], NoneZeroLength,EmpiricalR[zGood, ],EmpiricalRSP[[1]][zGood,], EmpiricalRSP[[2]][zGood,], NumOfEachGroup.zGood))
-        #LikeOutput=Likelihood( StartValue, Input , InputSP , PNEW.list, z.list)
-               AlphaNew= Result$par[1]
-               BetaNew=Result$par[2:(1+NoneZeroLength)]
-        PNew=Result$par[2+NoneZeroLength]
-               ##
-        Output=list(AlphaNew=AlphaNew,BetaNew=BetaNew,PNew=PNew,ZNew.list=z.list,PFromZ=PFromZ, zGood=zGood, zNaNName=zNaNName,F0Out=F0Good, F1Out=F1Good)
-        Output
-    }
-
diff --git a/EBSeq/R/LogNMulti.R b/EBSeq/R/LogNMulti.R
deleted file mode 100644 (file)
index f852540..0000000
+++ /dev/null
@@ -1,54 +0,0 @@
-LogNMulti <-
-function(Input, InputSP, EmpiricalR, EmpiricalRSP, NumOfEachGroup, AlphaIn, BetaIn,  PIn, NoneZeroLength, AllParti, Conditions)
-{
-
-        #For each gene (m rows of Input---m genes)
-        #Save each gene's F0, F1 for further likelihood calculation. 
-               FList=sapply(1:nrow(AllParti),function(i)sapply(1:nlevels(as.factor(AllParti[i,])),
-                                  function(j)f0(do.call(cbind,InputSP[AllParti[i,]==j]),AlphaIn, BetaIn, 
-                                                                        do.call(cbind,EmpiricalRSP[AllParti[i,]==j]), NumOfEachGroup, log=T)),
-                                         simplify=F) 
-               FPartiLog=sapply(FList,rowSums)
-               FMat=exp(FPartiLog)
-               rownames(FMat)=rownames(Input)
-        #Get z
-               #Use data.list in logfunction
-        PInMat=matrix(rep(1,nrow(Input)),ncol=1)%*%matrix(PIn,nrow=1)
-               FmultiP=FMat*PInMat
-               Denom=rowSums(FmultiP)
-               ZEach=apply(FmultiP,2,function(i)i/Denom)
-               zNaNName1=names(Denom)[is.na(Denom)]
-               # other NAs in LikeFun
-               LF=ZEach*(log(FmultiP))
-               zNaNMore=rownames(LF)[which(is.na(rowSums(LF)))]
-               zNaNName=unique(c(zNaNName1,zNaNMore))
-               zGood=which(!rownames(LF)%in%zNaNName)
-               ZEachGood=ZEach[zGood,]
-               ###Update P
-        PFromZ=colSums(ZEach[zGood,])/length(zGood)
-        FGood=FMat[zGood,]
-               ### MLE Part ####
-        # Since we dont wanna update p and Z in this step
-        # Each Ng for one row
-               
-               NumGroupVector=rep(c(1:NoneZeroLength),NumOfEachGroup)
-               
-               NumGroupVector.zGood=NumGroupVector[zGood]
-               NumOfEachGroup.zGood=tapply(NumGroupVector.zGood,NumGroupVector.zGood,length)
-
-        StartValue=c(AlphaIn, BetaIn,PIn[-1])
-               InputSPGood=sapply(1:length(InputSP),function(i)InputSP[[i]][zGood,],simplify=F)
-        EmpiricalRSPGood=sapply(1:length(EmpiricalRSP),function(i)EmpiricalRSP[[i]][zGood,],simplify=F)
-
-               Result<-optim(StartValue,LikefunMulti,InputPool=list(InputSPGood,Input[zGood,],ZEach[zGood,], 
-                                        NoneZeroLength,EmpiricalR[zGood, ],EmpiricalRSPGood, NumOfEachGroup.zGood, AllParti))
-               AlphaNew= Result$par[1]
-               BetaNew=Result$par[2:(1+NoneZeroLength)]
-        PNewNo1=Result$par[(2+NoneZeroLength):length(Result$par)]
-               PNew=c(1-sum(PNewNo1),PNewNo1)
-               ##
-        Output=list(AlphaNew=AlphaNew,BetaNew=BetaNew,PNew=PNew,ZEachNew=ZEach, ZEachGood=ZEachGood, 
-                                       PFromZ=PFromZ, zGood=zGood, zNaNName=zNaNName,FGood=FGood)
-        Output
-    }
-
diff --git a/EBSeq/R/LogNMultiDVDP.R b/EBSeq/R/LogNMultiDVDP.R
deleted file mode 100644 (file)
index 6e85a5d..0000000
+++ /dev/null
@@ -1,57 +0,0 @@
-LogNMulti <-
-function(Input, InputSP, EmpiricalR, EmpiricalRSP, NumOfEachGroup, AlphaIn, BetaIn,  PIn, NoneZeroLength, AllParti, Conditions)
-{
-
-        #For each gene (m rows of Input---m genes)
-        #Save each gene's F0, F1 for further likelihood calculation. 
-               FList=sapply(1:nrow(AllParti),function(i)sapply(1:nlevels(as.factor(AllParti[i,])),
-                                  function(j)f0(do.call(cbind,InputSP[AllParti[i,]==j]),AlphaIn, BetaIn, 
-                                                                        do.call(cbind,EmpiricalRSP[AllParti[i,]==j]), NumOfEachGroup, log=T)),
-                                         simplify=F) 
-               FPartiLog=sapply(FList,rowSums)
-               FMat=exp(FPartiLog)
-               rownames(FMat)=rownames(Input)
-        #Get z
-               #Use data.list in logfunction
-        PInMat=matrix(rep(1,nrow(Input)),ncol=1)%*%matrix(PIn,nrow=1)
-               FmultiP=FMat*PInMat
-               Denom=rowSums(FmultiP)
-               ZEach=apply(FmultiP,2,function(i)i/Denom)
-               zNaNName1=names(Denom)[is.na(Denom)]
-               # other NAs in LikeFun
-               LF=ZEach*(log(FmultiP))
-               zNaNMore=rownames(LF)[which(is.na(rowSums(LF)))]
-               zNaNName=unique(c(zNaNName1,zNaNMore))
-               zGood=which(!rownames(LF)%in%zNaNName)
-               ZEachGood=ZEach[zGood,]
-               ###Update P
-        PFromZ=colSums(ZEach[zGood,])/length(zGood)
-        NewPInMat=matrix(rep(1,nrow(Input)),ncol=1)%*%matrix(PFromZ,nrow=1)
-               FGood=FMat[zGood,]
-               ### MLE Part ####
-        # Since we dont wanna update p and Z in this step
-        # Each Ng for one row
-               
-               NumGroupVector=rep(c(1:NoneZeroLength),NumOfEachGroup)
-               
-               NumGroupVector.zGood=NumGroupVector[zGood]
-               NumOfEachGroup.zGood=tapply(NumGroupVector.zGood,NumGroupVector.zGood,length)
-
-        StartValue=c(AlphaIn, BetaIn)
-               InputSPGood=sapply(1:length(InputSP),function(i)InputSP[[i]][zGood,],simplify=F)
-        EmpiricalRSPGood=sapply(1:length(EmpiricalRSP),function(i)EmpiricalRSP[[i]][zGood,],simplify=F)
-
-               Result<-optim(StartValue,LikefunMulti,InputPool=list(InputSPGood,Input[zGood,],ZEach[zGood,], 
-                                        NoneZeroLength,EmpiricalR[zGood, ],EmpiricalRSPGood, NumOfEachGroup.zGood, AllParti,
-                                        NewPInMat))
-               AlphaNew= Result$par[1]
-               BetaNew=Result$par[2:(1+NoneZeroLength)]
-        #PNewNo1=Result$par[(2+NoneZeroLength):length(Result$par)]
-               #PNew=c(1-sum(PNewNo1),PNewNo1)
-               PNew= PFromZ
-               ##
-        Output=list(AlphaNew=AlphaNew,BetaNew=BetaNew,PNew=PNew,ZEachNew=ZEach, ZEachGood=ZEachGood, 
-                                       PFromZ=PFromZ, zGood=zGood, zNaNName=zNaNName,FGood=FGood)
-        Output
-    }
-
diff --git a/EBSeq/R/LogNMultiEMP.R b/EBSeq/R/LogNMultiEMP.R
deleted file mode 100644 (file)
index f852540..0000000
+++ /dev/null
@@ -1,54 +0,0 @@
-LogNMulti <-
-function(Input, InputSP, EmpiricalR, EmpiricalRSP, NumOfEachGroup, AlphaIn, BetaIn,  PIn, NoneZeroLength, AllParti, Conditions)
-{
-
-        #For each gene (m rows of Input---m genes)
-        #Save each gene's F0, F1 for further likelihood calculation. 
-               FList=sapply(1:nrow(AllParti),function(i)sapply(1:nlevels(as.factor(AllParti[i,])),
-                                  function(j)f0(do.call(cbind,InputSP[AllParti[i,]==j]),AlphaIn, BetaIn, 
-                                                                        do.call(cbind,EmpiricalRSP[AllParti[i,]==j]), NumOfEachGroup, log=T)),
-                                         simplify=F) 
-               FPartiLog=sapply(FList,rowSums)
-               FMat=exp(FPartiLog)
-               rownames(FMat)=rownames(Input)
-        #Get z
-               #Use data.list in logfunction
-        PInMat=matrix(rep(1,nrow(Input)),ncol=1)%*%matrix(PIn,nrow=1)
-               FmultiP=FMat*PInMat
-               Denom=rowSums(FmultiP)
-               ZEach=apply(FmultiP,2,function(i)i/Denom)
-               zNaNName1=names(Denom)[is.na(Denom)]
-               # other NAs in LikeFun
-               LF=ZEach*(log(FmultiP))
-               zNaNMore=rownames(LF)[which(is.na(rowSums(LF)))]
-               zNaNName=unique(c(zNaNName1,zNaNMore))
-               zGood=which(!rownames(LF)%in%zNaNName)
-               ZEachGood=ZEach[zGood,]
-               ###Update P
-        PFromZ=colSums(ZEach[zGood,])/length(zGood)
-        FGood=FMat[zGood,]
-               ### MLE Part ####
-        # Since we dont wanna update p and Z in this step
-        # Each Ng for one row
-               
-               NumGroupVector=rep(c(1:NoneZeroLength),NumOfEachGroup)
-               
-               NumGroupVector.zGood=NumGroupVector[zGood]
-               NumOfEachGroup.zGood=tapply(NumGroupVector.zGood,NumGroupVector.zGood,length)
-
-        StartValue=c(AlphaIn, BetaIn,PIn[-1])
-               InputSPGood=sapply(1:length(InputSP),function(i)InputSP[[i]][zGood,],simplify=F)
-        EmpiricalRSPGood=sapply(1:length(EmpiricalRSP),function(i)EmpiricalRSP[[i]][zGood,],simplify=F)
-
-               Result<-optim(StartValue,LikefunMulti,InputPool=list(InputSPGood,Input[zGood,],ZEach[zGood,], 
-                                        NoneZeroLength,EmpiricalR[zGood, ],EmpiricalRSPGood, NumOfEachGroup.zGood, AllParti))
-               AlphaNew= Result$par[1]
-               BetaNew=Result$par[2:(1+NoneZeroLength)]
-        PNewNo1=Result$par[(2+NoneZeroLength):length(Result$par)]
-               PNew=c(1-sum(PNewNo1),PNewNo1)
-               ##
-        Output=list(AlphaNew=AlphaNew,BetaNew=BetaNew,PNew=PNew,ZEachNew=ZEach, ZEachGood=ZEachGood, 
-                                       PFromZ=PFromZ, zGood=zGood, zNaNName=zNaNName,FGood=FGood)
-        Output
-    }
-
diff --git a/EBSeq/R/MedianNorm.R b/EBSeq/R/MedianNorm.R
deleted file mode 100644 (file)
index 44c0a4a..0000000
+++ /dev/null
@@ -1,5 +0,0 @@
-MedianNorm=function(Data){
-
-    geomeans <- exp(rowMeans(log(Data)))
-       apply(Data, 2, function(cnts) median((cnts/geomeans)[geomeans >  0]))
-}
diff --git a/EBSeq/R/MergeGene.R b/EBSeq/R/MergeGene.R
deleted file mode 100644 (file)
index 4bfdaf4..0000000
+++ /dev/null
@@ -1,107 +0,0 @@
-MergeGene <-
-function(GeneSIMout, Num, Path="./"){
-NumSample=ncol(GeneSIMout[[i]]$generateData)
-
-NumGene=rep(0,Num)
-for (i in 1:Num)NumGene[i]=nrow(GeneSIMout[[i]]$generateData)
-
-MinNumGene=min(NumGene)
-AproxNumDE=length(GeneSIMout[[1]]$TrueDE)
-       
-GeneMergeTable=matrix(rep(0,12),nrow=6)
-       for(i in 1:Num)GeneMergeTable=GeneMergeTable+GeneSIMout[[i]][[1]]
-       GeneMergeTable=GeneMergeTable/Num
-       GeneMergeTable=round(GeneMergeTable,2)
-                 
-       GeneMergeDVD=rep(0,2)
-         for(i in 1:Num)GeneMergeDVD=GeneMergeDVD+GeneSIMout[[i]][[3]]
-                 GeneMergeDVD=round(GeneMergeDVD/Num,2) 
-                                         
-         GeneMergePhi=matrix(rep(0,2),nrow=2)
-                 for(i in 1:Num)GeneMergePhi=GeneMergePhi+GeneSIMout[[i]][[4]]
-                         GeneMergePhi=round(GeneMergePhi/Num,2)
-## Write
-TXTname=paste(paste(Path,paste("Gene","DVD",GeneMergeDVD[1], GeneMergeDVD[2],"Phi",GeneMergePhi[1], GeneMergePhi[2],"Sample",NumSample,sep="_"),sep=""),".txt",sep="")
-write.table(GeneMergeTable, file=TXTname)
-
-
-####### Note everytime # DE genes and # total genes may different. (since NA issue)
-  GeneMergeFD=matrix(rep(0,5*MinNumGene),ncol=5)
-  GeneMergeFD.p=matrix(rep(0,5*MinNumGene),ncol=5)
-  GeneMergeTP.p=matrix(rep(0,5*MinNumGene),ncol=5)
-  GeneMergeFN.p=matrix(rep(0,5*MinNumGene),ncol=5)
-  GeneMergeTN.p=matrix(rep(0,5*MinNumGene),ncol=5)
-
-  GeneMergeFDR=matrix(rep(0,5*MinNumGene),ncol=5)
-  GeneMergeTPR=matrix(rep(0,5*MinNumGene),ncol=5)
-  GeneMergeFPR=matrix(rep(0,5*MinNumGene),ncol=5)
-
-
-  for(i in 1:Num){
-       # Make sure names in the same order
-       # Get FD number for each number of genes found
-    TotalNum=nrow(GeneSIMout[[i]]$generateData)
-       NumDE=length(GeneSIMout[[i]]$TrueDE)
-       EBSeqNames=names(GeneSIMout[[i]]$EBSeqPP)
-    tmpMatrix=cbind(GeneSIMout[[i]]$DESeqP[EBSeqNames],GeneSIMout[[i]]$edgeRP[EBSeqNames], exp(GeneSIMout[[i]]$BaySeqPP[EBSeqNames,2]),GeneSIMout[[i]]$BBSeqP[EBSeqNames],GeneSIMout[[i]]$EBSeqPP)
-       # Bayseq and EBseq are PP. Others are p value 
-    tmpFD=TopCts(tmpMatrix, c(0,0,1,0,1), GeneSIMout[[i]]$TrueDE[GeneSIMout[[i]]$TrueDE%in%EBSeqNames], MinNumGene)
-    # Get percentage for FP, TP, TN, FN!
-       tmpFD.p=tmpFD/TotalNum
-       # TP = Find - FD
-       tmpTP.p=(c(1:MinNumGene)-tmpFD)/TotalNum
-       # FN = TrueDE - TP
-       tmpFN.p=NumDE/TotalNum - tmpTP.p
-       # TN = TrueEE - FD
-       tmpTN.p=(TotalNum-NumDE)/TotalNum - tmpFD.p
-       
-       
-       tmpFDR=tmpFD.p/(tmpFD.p+tmpTP.p)
-       tmpFPR=tmpFD.p/(tmpFD.p+tmpTN.p)
-       tmpTPR=tmpTP.p/(tmpFN.p+tmpTP.p)
-       GeneMergeFDR=GeneMergeFDR+tmpFDR
-       GeneMergeTPR=GeneMergeTPR+tmpTPR
-       GeneMergeFPR=GeneMergeFPR+tmpFPR
-
-    GeneMergeFD.p=GeneMergeFD.p+tmpFD.p
-       GeneMergeTP.p=GeneMergeTP.p+tmpTP.p
-       GeneMergeFN.p=GeneMergeFN.p+tmpFN.p
-       GeneMergeTN.p=GeneMergeTN.p+tmpTN.p
-
-       GeneMergeFD=GeneMergeFD+tmpFD
- }   
-  GeneMergeFD=GeneMergeFD/Num
-  GeneMergeFD.p=GeneMergeFD.p/Num
-  GeneMergeTP.p=GeneMergeTP.p/Num
-  GeneMergeFN.p=GeneMergeFN.p/Num
-  GeneMergeTN.p=GeneMergeTN.p/Num
-
-  GeneMergeFDR=GeneMergeFDR/Num
-  GeneMergeTPR=GeneMergeTPR/Num
-  GeneMergeFPR=GeneMergeFPR/Num
-
-
-PlotTopName=paste(paste(Path,paste("Top","Gene","DVD",GeneMergeDVD[1], GeneMergeDVD[2],"Phi",GeneMergePhi[1], GeneMergePhi[2],"Sample",NumSample, sep="_"),sep=""),".pdf",sep="")
-
-TrueDELength=length(GeneSIMout[[i]]$TrueDE[GeneSIMout[[i]]$TrueDE%in%EBSeqNames])
-pdf(PlotTopName)
-  PlotTopCts(TrueDELength,GeneMergeFD[1:TrueDELength,],c("DESeq","edgeR","BaySeq","BBSeq","EBSeq"))
-dev.off()
-
-
-PlotFDName=paste(paste(Path,paste("FDTP","Gene","DVD",GeneMergeDVD[1], GeneMergeDVD[2],"Phi",GeneMergePhi[1], GeneMergePhi[2],"Sample",NumSample,sep="_"),sep=""),".pdf",sep="")
-pdf(PlotFDName)
-  PlotFDTP(MinNumGene,GeneMergeFDR, GeneMergeTPR, c("DESeq","edgeR","BaySeq","BBSeq","EBSeq"))
-dev.off()
-
-PlotFPName=paste(paste(Path,paste("FPRTP","Gene","DVD",GeneMergeDVD[1], GeneMergeDVD[2],"Phi",GeneMergePhi[1], GeneMergePhi[2],"Sample",NumSample,sep="_"),sep=""),".pdf",sep="")
-pdf(PlotFPName)
-  PlotFPTP(MinNumGene,GeneMergeFPR, GeneMergeTPR, c("DESeq","edgeR","BaySeq","BBSeq","EBSeq"))
-  dev.off()
-
-
-out=list(GeneMergeTable=GeneMergeTable, GeneMergeDVD=GeneMergeDVD, GeneMergePhi=GeneMergePhi, GeneMergeFD=GeneMergeFD)
-
-
-}
-
diff --git a/EBSeq/R/MergeIso.R b/EBSeq/R/MergeIso.R
deleted file mode 100644 (file)
index 50a0784..0000000
+++ /dev/null
@@ -1,103 +0,0 @@
-MergeIso <-
-function(IsoSIMout, Num, Path="./"){
-NumSample=ncol(do.call(rbind, IsoSIMout[[i]]$generateData))
-
-NumIso=rep(0,Num)
-for (i in 1:Num)NumIso[i]=nrow(do.call(rbind, IsoSIMout[[i]]$generateData))
-
-MinNumIso=min(NumIso)
-AproxNumDE=length(unlist(IsoSIMout[[1]]$TrueDE))
-       
-IsoMergeTable=matrix(rep(0,60),nrow=10)
-       for(i in 1:Num)IsoMergeTable=IsoMergeTable+IsoSIMout[[i]][[1]]
-       IsoMergeTable=IsoMergeTable/Num
-       IsoMergeTable=round(IsoMergeTable,2)
-                 
-       IsoMergeDVD=rep(0,2)
-         for(i in 1:Num)IsoMergeDVD=IsoMergeDVD+IsoSIMout[[i]][[3]]
-                 IsoMergeDVD=round(IsoMergeDVD/Num,2) 
-                                         
-         IsoMergePhi=matrix(rep(0,18),nrow=2)
-                 for(i in 1:Num)IsoMergePhi=IsoMergePhi+IsoSIMout[[i]][[4]]
-                         IsoMergePhi=round(IsoMergePhi/Num,2)
-## Write
-TXTname=paste(paste("../IsoOutput/",paste("Iso","DVD",IsoMergeDVD[1], IsoMergeDVD[2],"Sample",NumSample,sep="_"),sep=""),".txt",sep="")
-write.table(IsoMergeTable, file=TXTname)
-
-
-####### Note everytime # DE genes and # total genes may different. (since NA issue)
-  IsoMergeFD=matrix(rep(0,5*MinNumIso),ncol=5)
-  IsoMergeFD.p=matrix(rep(0,5*MinNumIso),ncol=5)
-  IsoMergeTP.p=matrix(rep(0,5*MinNumIso),ncol=5)
-  IsoMergeFN.p=matrix(rep(0,5*MinNumIso),ncol=5)
-  IsoMergeTN.p=matrix(rep(0,5*MinNumIso),ncol=5)
-  IsoMergeFDR=matrix(rep(0,5*MinNumIso),ncol=5)
-  IsoMergeTPR=matrix(rep(0,5*MinNumIso),ncol=5)
-  IsoMergeFPR=matrix(rep(0,5*MinNumIso),ncol=5)
-
-  for(i in 1:Num){
-       # Make sure names in the same order
-       # Get FD number for each number of genes found
-       # columns are samples 
-    TotalNum=nrow(do.call(rbind, IsoSIMout[[i]]$generateData))
-       NumDE=length(unlist(IsoSIMout[[i]]$TrueDE))
-       EBSeqNames=names(IsoSIMout[[i]]$EBSeqPP)
-    tmpMatrix=cbind(IsoSIMout[[i]]$DESeqP[EBSeqNames],IsoSIMout[[i]]$edgeRP[EBSeqNames], exp(IsoSIMout[[i]]$BaySeqPP[EBSeqNames,2]),IsoSIMout[[i]]$BBSeqP[EBSeqNames],IsoSIMout[[i]]$EBSeqPP)
-       # Bayseq and EBseq are PP. Others are p value 
-    tmpFD=TopCts(tmpMatrix, c(0,0,1,0,1), unlist(IsoSIMout[[i]]$TrueDE)[unlist(IsoSIMout[[i]]$TrueDE)%in%EBSeqNames], MinNumIso)
-    # Get percentage for FP, TP, TN, FN!
-       tmpFD.p=tmpFD/TotalNum
-       # TP = Find - FD
-       tmpTP.p=(outer(c(1:MinNumIso),rep(1,5))-tmpFD)/TotalNum
-       # FN = TrueDE - TP
-       tmpFN.p=NumDE/TotalNum - tmpTP.p
-       # TN = TrueEE - FD
-       tmpTN.p=(TotalNum-NumDE)/TotalNum - tmpFD.p
-       
-       tmpFDR=tmpFD.p/(tmpFD.p+tmpTP.p)
-       tmpFPR=tmpFD.p/(tmpFD.p+tmpTN.p)
-       tmpTPR=tmpTP.p/(tmpFN.p+tmpTP.p)
-       IsoMergeFDR=IsoMergeFDR+tmpFDR
-       IsoMergeTPR=IsoMergeTPR+tmpTPR
-       IsoMergeFPR=IsoMergeFPR+tmpFPR
-
-    IsoMergeFD.p=IsoMergeFD.p+tmpFD.p
-       IsoMergeTP.p=IsoMergeTP.p+tmpTP.p
-       IsoMergeFN.p=IsoMergeFN.p+tmpFN.p
-       IsoMergeTN.p=IsoMergeTN.p+tmpTN.p
-
-       IsoMergeFD=IsoMergeFD+tmpFD
- }   
-  IsoMergeFD=IsoMergeFD/Num
-  IsoMergeFD.p=IsoMergeFD.p/Num
-  IsoMergeTP.p=IsoMergeTP.p/Num
-  IsoMergeFN.p=IsoMergeFN.p/Num
-  IsoMergeTN.p=IsoMergeTN.p/Num
-  IsoMergeFDR=IsoMergeFDR/Num
-  IsoMergeTPR=IsoMergeTPR/Num
-  IsoMergeFPR=IsoMergeFPR/Num
-
-PlotTopName=paste(paste(Path,paste("Top","Iso","DVD",IsoMergeDVD[1], IsoMergeDVD[2],"Sample",NumSample, sep="_"),sep=""),".pdf",sep="")
-
-TrueDELength=length(unlist(IsoSIMout[[i]]$TrueDE)[unlist(IsoSIMout[[i]]$TrueDE)%in%EBSeqNames])
-pdf(PlotTopName)
-  PlotTopCts(TrueDELength,IsoMergeFD[1:TrueDELength,],c("DESeq","edgeR","BaySeq","BBSeq","EBSeq"))
-dev.off()
-
-
-PlotFDName=paste(paste(Path,paste("FDTP","Iso","DVD",IsoMergeDVD[1], IsoMergeDVD[2],"Sample",NumSample,sep="_"),sep=""),".pdf",sep="")
-pdf(PlotFDName)
-  PlotFDTP(MinNumIso,IsoMergeFDR, IsoMergeTPR, c("DESeq","edgeR","BaySeq","BBSeq","EBSeq"))
-dev.off()
-
-PlotFPName=paste(paste(Path,paste("FPRTP","Iso","DVD",IsoMergeDVD[1], IsoMergeDVD[2],"Sample",NumSample,sep="_"),sep=""),".pdf",sep="")
-pdf(PlotFPName)
-  PlotFPTP(MinNumIso,IsoMergeFPR, IsoMergeTPR, c("DESeq","edgeR","BaySeq","BBSeq","EBSeq"))
-  dev.off()
-
-
-out=list(IsoMergeTable=IsoMergeTable, IsoMergeDVD=IsoMergeDVD, IsoMergePhi=IsoMergePhi, IsoMergeFD=IsoMergeFD)
-
-
-}
-
diff --git a/EBSeq/R/PlotFDTP.R b/EBSeq/R/PlotFDTP.R
deleted file mode 100644 (file)
index ce029a3..0000000
+++ /dev/null
@@ -1,10 +0,0 @@
-PlotFDTP <-
-function(TopNum, FDR, TPR,names)
-{
-  
-  matplot(FDR, TPR, xlim=c(0,.5), ylim=c(0,1) ,type="l",lwd=2,xlab="FDR", ylab="TPR")
-    legend("bottomright",col=1:TopNum, lwd=2, lty=1:TopNum, names)
-
-
-}
-
diff --git a/EBSeq/R/PlotFPTP.R b/EBSeq/R/PlotFPTP.R
deleted file mode 100644 (file)
index ed8fdb3..0000000
+++ /dev/null
@@ -1,10 +0,0 @@
-PlotFPTP <-
-function(TopNum, FPR, TPR,names)
-{
-        
-         matplot(FPR, TPR,xlim=c(0,.1), ylim=c(0,1) ,type="l",lwd=2, xlab="FPR", ylab="TPR")
-             legend("bottomright",col=1:TopNum,lwd=2, lty=1:TopNum, names)
-
-
-}
-
diff --git a/EBSeq/R/PlotPattern.R b/EBSeq/R/PlotPattern.R
deleted file mode 100644 (file)
index 61f93af..0000000
+++ /dev/null
@@ -1,7 +0,0 @@
-PlotPattern<-function(Patterns){
-       par(oma=c(3,3,3,3))
-       PatternCol=rainbow(ncol(Patterns))
-       heatmap(Patterns,col=PatternCol,Colv=NA,Rowv=NA,scale="none")
-
-}
-
diff --git a/EBSeq/R/PlotTopCts.R b/EBSeq/R/PlotTopCts.R
deleted file mode 100644 (file)
index cb502d0..0000000
+++ /dev/null
@@ -1,8 +0,0 @@
-PlotTopCts <-
-function(TopNum, FD, names)
-{
-    matplot(c(1:TopNum) , FD,type="l",xlab="Top DE selected", lwd=2, log="y", ylab="FD")
-    legend("topleft",col=1:TopNum, lwd=2, lty=1:TopNum, names)
-
-}
-
diff --git a/EBSeq/R/PolyFitPlot.R b/EBSeq/R/PolyFitPlot.R
deleted file mode 100644 (file)
index 59fd29c..0000000
+++ /dev/null
@@ -1,44 +0,0 @@
-PolyFitPlot <-
-function(X , Y , nterms , xname="Estimated Mean", yname="Estimated Var", pdfname="", xlim=c(-1,5), ylim=c(-1,7), ChangeXY=F,col="red"){
-       
-       b=rep(NA,nterms)
-       logX=matrix(rep(X, nterms),ncol=nterms, byrow=T)
-       for (i in 1:nterms)
-               logX[,i]=(log10(X))^i
-       colnames(logX)=paste("logmu^",c(1:nterms))
-       rownames(logX)=names(X)
-       NotUse=c(names(X)[X==0],names(Y)[Y==0],names(X)[rowMeans(logX)==-Inf],names(X)[rowMeans(logX)==Inf])
-       Use=names(X[!names(X)%in%NotUse])
-       Lm=lm(log10(Y[Use])~logX[Use,1:nterms])
-       b=summary(Lm)$coefficients[2:(nterms+1),1]
-       d=summary(Lm)$coefficients[1,1]
-       bvec=matrix(rep(b,length(X)),ncol=nterms,byrow=T)
-       fit=rowSums(logX*bvec)+d
-       main2=NULL
-       if (ChangeXY==T){
-               X.plot=log10(Y)
-               Y.plot=log10(X)
-               fit.X.plot=fit
-               fit.Y.plot=log10(X)
-       }
-       else{
-        X.plot=log10(X)
-        Y.plot=log10(Y)
-           fit.X.plot=log10(X)
-               fit.Y.plot=fit
-                                  }
-
-       for (i in 1:nterms)
-               main2=paste(main2,round(b[i],2),"*log(",xname,")^",i,"+")
-       main=pdfname
-       
-       smoothScatter(X.plot, Y.plot ,main=main,xlim=xlim,ylim=ylim,xlab=xname,ylab=yname,axes=F)
-       axis(1,at=seq(xlim[1],xlim[2],by=1), 10^seq(xlim[1],xlim[2],by=1))
-       axis(2,at=seq(ylim[1],ylim[2],by=2), 10^seq(ylim[1],ylim[2],by=2))
-       Sortit=order(fit.X.plot)
-       lines(fit.X.plot[Sortit],fit.Y.plot[Sortit],col=col,lwd=3)
-       output=list(b=b,d=d,lm=Lm,fit=fit,sort=Sortit)
-       names(output$b)=paste(xname,"^",c(1:length(output$b)))
-       output
-}
-
diff --git a/EBSeq/R/PoolMatrix.R b/EBSeq/R/PoolMatrix.R
deleted file mode 100644 (file)
index 4c80785..0000000
+++ /dev/null
@@ -1,25 +0,0 @@
-PoolMatrix <-
-function(Data,reads,type)
-{
-poolnames=names(Data)
-poolM=NULL
-for (po in 1:8)
-       poolM=cbind(poolM,Data[[po]][,1])
-rownames(poolM)=rownames(Data[[1]])
-colnames(poolM)=poolnames
-
-#poolValue=poolM*reads
-poolValue=poolM
-for (col in 1:8)
-       poolValue[,col]=poolM[,col]*reads[col]
-poolValue=round(poolValue)
-if (type=="G")
-       {
-               poolM=cbind(Data[[1]][,2],poolM)
-               poolValue=cbind(Data[[1]][,2],poolValue)
-               colnames(poolM)=c("Groups",poolnames)
-               colnames(poolValue)=c("Groups",poolnames)
-       }
-poolOutput=list(poolM=poolM,poolValue=poolValue)
-}
-
diff --git a/EBSeq/R/PostFC.R b/EBSeq/R/PostFC.R
deleted file mode 100644 (file)
index 1ef2669..0000000
+++ /dev/null
@@ -1,28 +0,0 @@
-PostFC=function(EBoutput) {
-       GeneRealMeanC1=unlist(EBoutput$C1Mean)
-       GeneRealMeanC2=unlist(EBoutput$C2Mean)
-       GeneRealMean=(GeneRealMeanC1+GeneRealMeanC2)/2
-
-       GeneRealFC=GeneRealMeanC1/GeneRealMeanC2
-
-       GeneR=unlist(EBoutput$RList)
-       GeneR[GeneR<=0 | is.na(GeneR)]=GeneRealMean[GeneR<=0 | is.na(GeneR)]*.99/.01
-
-       GeneAlpha=EBoutput[[1]][nrow(EBoutput[[1]]),]
-       GeneBeta=unlist(sapply(1:length(EBoutput$C1Mean),function(i)rep(EBoutput[[2]][nrow(EBoutput[[1]]),i],length(EBoutput$C1Mean[[i]]))))
-       GeneBeta=as.vector(GeneBeta)
-       # Post alpha = alpha + r_C1 * 3
-       # Post beta = beta + Mean_C1 * 3
-       # Post Mean of q in C1 P_q_C1= P_a/ (P_a + P_b)
-       # Post FC = (1-p_q_c1)/p_q_c1 /( (1-p_q_c2)/p_q_c2)
-
-       GenePostAlpha=GeneAlpha+3*GeneR
-       GenePostBetaC1=GeneBeta+3*GeneRealMeanC1
-       GenePostBetaC2=GeneBeta+3*GeneRealMeanC2
-       GenePostQC1=GenePostAlpha/(GenePostAlpha+GenePostBetaC1)
-       GenePostQC2=GenePostAlpha/(GenePostAlpha+GenePostBetaC2)
-
-       GenePostFC=((1-GenePostQC1)/(1-GenePostQC2))*(GenePostQC2/GenePostQC1)
-       Out=list(GenePostFC=GenePostFC, GeneRealFC=GeneRealFC)
-
-}
diff --git a/EBSeq/R/QQP.R b/EBSeq/R/QQP.R
deleted file mode 100644 (file)
index 686aa92..0000000
+++ /dev/null
@@ -1,14 +0,0 @@
-QQP <-
-function(QList,AlphaResult,BetaResult,name,AList="F",GroupName){
-       
-                   for (i in 1:length(BetaResult)){
-                               tmpSize=length(QList[[i]][QList[[i]]<1 & !is.na(QList[[i]])])
-                       if (AList=="F") rdpts=rbeta(tmpSize,AlphaResult,BetaResult[i])
-                               else rdpts=rbeta(tmpSize,AlphaResult[i],BetaResult[i])
-       qqplot(QList[[i]][QList[[i]]<1], rdpts,xlab="estimated q's", ylab="simulated q's from fitted beta",main=paste(name,GroupName[i],sep=" "),xlim=c(0,1),ylim=c(0,1))
-       fit=lm(sort(rdpts)~sort(QList[[i]][QList[[i]]<1  & !is.na(QList[[i]])]))
-       abline(fit,col="red")
-       
-                       }
-}
-
diff --git a/EBSeq/R/QuantileNorm.R b/EBSeq/R/QuantileNorm.R
deleted file mode 100644 (file)
index a4e49bd..0000000
+++ /dev/null
@@ -1,8 +0,0 @@
-
-QuantileNorm=function(Data, Quantile){
-       #SortData=apply(Data, 2, sort)
-       QtilePt=apply(Data, 2, function(i)quantile(i, Quantile))
-       Size= QtilePt * prod(QtilePt) ^ (-1/ncol(Data))
-       Size
-       }
-
diff --git a/EBSeq/R/RankNorm.R b/EBSeq/R/RankNorm.R
deleted file mode 100644 (file)
index e3b0177..0000000
+++ /dev/null
@@ -1,11 +0,0 @@
-
-RankNorm=function(Data){
-       RankData=apply(Data, 2, rank)
-       SortData=apply(Data, 2, sort)
-       SortMean=rowMeans(SortData)
-       SortMean[SortMean==0]=1
-       NormMatrix=sapply(1:ncol(Data), function(i)Data[,i]/(SortMean[RankData[,i]]))
-       NormMatrix[NormMatrix==0]=1
-       NormMatrix
-       }
-
diff --git a/EBSeq/R/TPFDRplot.R b/EBSeq/R/TPFDRplot.R
deleted file mode 100644 (file)
index 9304330..0000000
+++ /dev/null
@@ -1,39 +0,0 @@
-TPFDRplot <-
-function(DESeqP, EBZ, TrueDE, main, FDR=NULL){
-       Seq=seq(0.001,0.5,by=0.001)
-       DETPR=rep(0,length(Seq))
-       EBTPR=rep(0,length(Seq))
-       DEFDR=rep(0,length(Seq))
-       EBFDR=rep(0,length(Seq))
-       DETPNum=rep(0,length(Seq))
-    EBTPNum=rep(0,length(Seq))
-    DEFDNum=rep(0,length(Seq))
-    EBFDNum=rep(0,length(Seq))
-       for (i in 1:length(Seq)){
-               DESeqOnes=names(DESeqP)[DESeqP<=Seq[i]]
-               if (length(FDR)==0) EBOnes=names(EBZ)[EBZ>=crit.fun(1-EBZ, Seq[i])]
-               else if (FDR=="H") EBOnes=names(EBZ)[EBZ>=(1-Seq[i])]
-                       else EBOnes=names(EBZ)[EBZ>=FDR[i]]
-
-               DETPNum[i]=sum(DESeqOnes%in%TrueDE)
-               EBTPNum[i]=sum(EBOnes%in%TrueDE)
-               DEFDNum[i]=sum(!DESeqOnes%in%TrueDE)
-               EBFDNum[i]=sum(!EBOnes%in%TrueDE)
-               
-               DETPR[i]=DETPNum[i]/length(TrueDE)
-               EBTPR[i]=EBTPNum[i]/length(TrueDE)
-               DEFDR[i]=DEFDNum[i]/length(TrueDE)
-               EBFDR[i]=EBFDNum[i]/length(TrueDE)
-       }
-       plot(Seq,DETPR,ylim=c(0,1),xlim=c(0,.5),type="l",col="red", main=paste(main, "TPR"),xlab="controled FDR level", ylab="TPR",lwd=2)
-       lines(Seq,EBTPR,col="blue",lwd=2)
-       legend("bottomright",lwd=2, col=c("red","blue"), c("DESeq","EBSeq"))
-
-       plot(Seq,DEFDR,ylim=c(0,1),xlim=c(0,.5),type="l",col="red", main=paste(main, "FDR"),xlab="controled FDR level", ylab="TPR",lwd=2)
-       lines(Seq,EBFDR,col="blue",lwd=2)
-       legend("topleft", lwd=2, col=c("red","blue"), c("DESeq","EBSeq"))
-
-
-       output=cbind( DETPR,EBTPR, DEFDR,EBFDR,DETPNum,EBTPNum,DEFDNum,EBFDNum)
-}
-
diff --git a/EBSeq/R/TopCts.R b/EBSeq/R/TopCts.R
deleted file mode 100644 (file)
index 137977c..0000000
+++ /dev/null
@@ -1,23 +0,0 @@
-TopCts <-
-function(pvalue, PP=NULL, TrueNames, TopNum){
-       NumOfMethods=ncol(pvalue)
-       puse=pvalue
-       if(1%in%PP)puse[,PP==1]=1-pvalue[,PP==1]
-       #puse.list=data.frame(puse)
-       FD=matrix(rep(0,NumOfMethods*TopNum),ncol=NumOfMethods)
-#      Rank=apply(puse,2,rank)
-#      for(i in 1:TopNum)
-#              FD[i,]=sapply(1:NumOfMethods, function(j)sum(!rownames(Rank)[Rank[,j]<=i]%in%TrueNames))        
-#      FD=sapply(1:TopNum, function(i)sapply(1:NumOfMethods, function(j)sum(!rownames(Rank)[Rank[,j]<=i]%in%TrueNames)))
-       for (s in 1:NumOfMethods){
-               tmp=puse[,s]
-               names(tmp)=rownames(puse)
-               sorttmp=sort(tmp)
-               for( c in 2:TopNum)
-                       FD[c, s]=FD[(c-1),s]+as.numeric(!names(sorttmp)[c]%in%TrueNames)
-       }
-       FD
-       #matplot(TopNum,FD,type="l",ylim=c(0,1),xlab="Top DE selected", ylab="FDR")
-       #legend("rightbottom",col=1:TopNum, lty=1:TopNum, names)
-       }
-
diff --git a/EBSeq/R/beta.mom.R b/EBSeq/R/beta.mom.R
deleted file mode 100644 (file)
index 269996d..0000000
+++ /dev/null
@@ -1,10 +0,0 @@
-beta.mom <-
-function(qs.in){
-       xbar<-mean(qs.in)
-       s2<-var(qs.in)
-       term<-(xbar*(1-xbar))/s2
-       alpha.hat<-xbar*(term-1)
-       beta.hat<-(1-xbar)*(term-1)
-       return(c(alpha.hat,beta.hat))
-}
-
diff --git a/EBSeq/R/crit_fun.R b/EBSeq/R/crit_fun.R
deleted file mode 100644 (file)
index e68ed4b..0000000
+++ /dev/null
@@ -1,15 +0,0 @@
-crit_fun<-function (PPEE, thre) 
-{
-    y <- cumsum(sort(PPEE))/(1:length(PPEE))
-    mm <- y < thre
-    index <- sum(mm)
-    if (index > 0) {
-        out <- 1 - sort(PPEE)[index]
-           }           
-    if (index == 0) {
-                       out <- 1
-                                   }
-    names(out) <- NULL
-    return(out)
-}
-
diff --git a/EBSeq/R/f0.R b/EBSeq/R/f0.R
deleted file mode 100644 (file)
index a3ec550..0000000
+++ /dev/null
@@ -1,22 +0,0 @@
-f0 <-
-function(Input, AlphaIn, BetaIn, EmpiricalR, NumOfGroups, log)
-{      
-                
-               BetaVect=do.call(c,sapply(1:length(BetaIn),function(i)rep(BetaIn[i],NumOfGroups[i]),simplify=F))
-               SampleNum=dim(Input)[2]
-               #Product part
-               ChooseParam1=round(Input+EmpiricalR-1)
-               roundInput=round(Input)
-               EachChoose=sapply(1:SampleNum, function(i)lchoose(ChooseParam1[,i], roundInput[,i]))
-               
-               SumEachIso=rowSums(Input)
-               param1=AlphaIn + rowSums(EmpiricalR)
-               param2=BetaVect + SumEachIso
-               LogConst=rowSums(EachChoose)+lbeta(param1, param2)-lbeta(AlphaIn, BetaVect)
-
-
-               if (log==F) FinalResult=exp(LogConst)
-               if (log==T) FinalResult=LogConst
-    FinalResult
-}
-
diff --git a/EBSeq/R/f1.R b/EBSeq/R/f1.R
deleted file mode 100644 (file)
index 1f160a0..0000000
+++ /dev/null
@@ -1,10 +0,0 @@
-f1 <-
-function(Input1, Input2, AlphaIn, BetaIn, EmpiricalRSP1,EmpiricalRSP2,NumOfGroup, log){
-       F0.1=f0(Input1, AlphaIn, BetaIn, EmpiricalRSP1, NumOfGroup, log=log)
-       F0.2=f0(Input2, AlphaIn, BetaIn, EmpiricalRSP2, NumOfGroup, log=log)
-       
-       if (log==F) Result=F0.1*F0.2
-       if (log==T) Result=F0.1+F0.2
-       Result
-}
-
diff --git a/EBSeq/blockmodeling_0.1.8.tar.gz b/EBSeq/blockmodeling_0.1.8.tar.gz
new file mode 100644 (file)
index 0000000..c88521e
Binary files /dev/null and b/EBSeq/blockmodeling_0.1.8.tar.gz differ
diff --git a/EBSeq/calcClusteringInfo.cpp b/EBSeq/calcClusteringInfo.cpp
new file mode 100644 (file)
index 0000000..2103f61
--- /dev/null
@@ -0,0 +1,149 @@
+#include<cstdio>
+#include<cctype>
+#include<cstring>
+#include<cstdlib>
+#include<cassert>
+#include<fstream>
+#include<iomanip>
+#include<string>
+#include<vector>
+#include<algorithm>
+using namespace std;
+
+typedef unsigned int INTEGER;
+
+const int STRLEN = 1005;
+
+INTEGER M;
+int k; // k-mer size
+vector<string> names;
+vector<string> seqs;
+vector<INTEGER> effL;
+
+// tid starts from 1
+struct ReadType {
+  INTEGER tid, pos;
+
+  ReadType(INTEGER tid, INTEGER pos) {
+    this->tid = tid;
+    this->pos = pos;
+  }
+
+  bool operator< (const ReadType& o) const {
+    string& a = seqs[tid];
+    string& b = seqs[o.tid];
+    for (int i = 0; i < k; i++) {
+      if (a[pos + i] != b[o.pos + i]) {
+       return a[pos + i] < b[o.pos + i];
+      }
+    }
+    return tid < o.tid;
+  }
+
+  bool seq_equal(const ReadType& o) const {
+    string& a = seqs[tid];
+    string& b = seqs[o.tid];
+    for (int i = 0; i < k; i++) 
+      if (a[pos + i] != b[o.pos + i]) return false;
+    return true;
+  }
+};
+
+vector<ReadType> cands;
+vector<double> clusteringInfo; 
+
+string convert(const string& rawseq) {
+  int size = (int)rawseq.size();
+  string seq = rawseq;
+  for (int i = 0; i < size; i++) {
+    seq[i] = toupper(rawseq[i]);
+    if (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G' && seq[i] != 'T') seq[i] = 'N';
+  }
+  return seq;
+}
+
+void loadRef(char* inpF) {
+  ifstream fin(inpF);
+  string tag, line, rawseq;
+  void *pt;
+
+  assert(fin.is_open());
+
+  names.clear(); names.push_back("");
+  seqs.clear(); seqs.push_back("");
+  
+  pt = getline(fin, line);
+  while (pt != 0 && line[0] == '>') {
+    tag = line.substr(1);
+    rawseq = "";
+    while((pt = getline(fin, line)) && line[0] != '>') {
+      rawseq += line;
+    }
+    if (rawseq.size() <= 0) {
+      printf("Warning: Fasta entry %s has an empty sequence! It is omitted!\n", tag.c_str());
+      continue;
+    }
+    names.push_back(tag);
+    seqs.push_back(convert(rawseq));
+  }
+
+  fin.close();
+
+  M = names.size() - 1;
+
+  printf("The reference is loaded.\n");
+}
+
+int main(int argc, char* argv[]) {
+  if (argc != 4) {
+    printf("Usage: rsem-for-ebseq-calculate-clustering-info k input_reference_fasta_file output_file\n");
+    exit(-1);
+  }
+
+  k = atoi(argv[1]);
+  loadRef(argv[2]);
+
+  cands.clear();
+  effL.assign(M + 1, 0);
+  for (INTEGER i = 1; i <= M; i++) {
+    effL[i] = seqs[i].length() - k + 1;
+    if (effL[i] <= 0) effL[i] = 0; // effL should be non-negative
+    for (INTEGER j = 0; j < effL[i]; j++) 
+      cands.push_back(ReadType(i, j));
+  }
+  printf("All possbile %d mers are generated.\n", k);
+
+  sort(cands.begin(), cands.end());
+  printf("All %d mers are sorted.\n", k);
+  size_t p = 0;
+  clusteringInfo.assign(M + 1, 0.0);
+
+  for (size_t i = 1; i <= cands.size(); i++)
+    if (i == cands.size() || !cands[p].seq_equal(cands[i])) {
+      size_t denominator = i - p;
+      size_t q = p; 
+      for (size_t j = p + 1; j <= i; j++)
+       if (j == i || cands[q].tid != cands[j].tid) {
+         size_t numerator = j - q;
+         //double prob = numerator * 1.0 / denominator;
+         //clusteringInfo[cands[q].tid] += (double)numerator * prob * (1.0 - prob);
+         if (numerator < denominator) clusteringInfo[cands[q].tid] += numerator;
+         q = j;
+       }
+      p = i;
+    }
+
+  for (INTEGER i = 1; i <= M; i++) 
+    if (effL[i] == 0) clusteringInfo[i] = -1.0;
+    else clusteringInfo[i] /= effL[i];
+
+  printf("Clustering information is calculated.\n");
+
+
+  ofstream fout(argv[3]);
+  for (INTEGER i = 1; i <= M; i++) fout<<names[i]<<"\t"<<setprecision(6)<<clusteringInfo[i]<<endl;
+  fout.close();
+
+  return 0;
+}
diff --git a/EBSeq/data/GeneEBresultGouldBart2.rda b/EBSeq/data/GeneEBresultGouldBart2.rda
deleted file mode 100644 (file)
index ab7963c..0000000
Binary files a/EBSeq/data/GeneEBresultGouldBart2.rda and /dev/null differ
diff --git a/EBSeq/data/GeneMat.rda b/EBSeq/data/GeneMat.rda
deleted file mode 100644 (file)
index 1a974cd..0000000
Binary files a/EBSeq/data/GeneMat.rda and /dev/null differ
diff --git a/EBSeq/data/IsoEBresultGouldBart2.rda b/EBSeq/data/IsoEBresultGouldBart2.rda
deleted file mode 100644 (file)
index eb136d9..0000000
Binary files a/EBSeq/data/IsoEBresultGouldBart2.rda and /dev/null differ
diff --git a/EBSeq/data/IsoList.rda b/EBSeq/data/IsoList.rda
deleted file mode 100644 (file)
index 29fbedb..0000000
Binary files a/EBSeq/data/IsoList.rda and /dev/null differ
diff --git a/EBSeq/data/MultiGeneMat.rda b/EBSeq/data/MultiGeneMat.rda
deleted file mode 100644 (file)
index b715267..0000000
Binary files a/EBSeq/data/MultiGeneMat.rda and /dev/null differ
diff --git a/EBSeq/data/datalist b/EBSeq/data/datalist
deleted file mode 100644 (file)
index 70188ff..0000000
+++ /dev/null
@@ -1,5 +0,0 @@
-GeneEBresultGouldBart2
-GeneMat
-IsoEBresultGouldBart2
-IsoList
-MultiGeneMat
diff --git a/EBSeq/demo/EBSeq.R b/EBSeq/demo/EBSeq.R
deleted file mode 100644 (file)
index 4ffc4a4..0000000
+++ /dev/null
@@ -1,110 +0,0 @@
-library(EBSeq)
-set.seed(13)
-
-# Section 3.1
-
-GeneGenerate=GeneSimu(DVDconstant=4, DVDqt1=NULL, DVDqt2=NULL,
-  Conditions=rep(c(1,2),each=5), NumofSample=10, NumofGene=10000,
-  DEGeneProp=.1, Phiconstant=NULL, Phi.qt1=.1, Phi.qt2=.9,
-  Meanconstant=NULL, OnlyData=T)
-GeneData=GeneGenerate$data
-GeneTrueDENames=GeneGenerate$TrueDE
-str(GeneData)
-str(GeneTrueDENames)
-
-Sizes=MedianNorm(GeneData)
-
-EBres=EBTest(Data=GeneData, 
-  Conditions=as.factor(rep(c(1,2),each=5)),sizeFactors=Sizes, maxround=5)
-
-PP=GetPP(EBres)
-str(PP)
-DEfound=names(PP)[which(PP>=.95)]
-str(DEfound)
-sum(DEfound%in%GeneTrueDENames)
-
-QQP(QList=EBres$QList1, AlphaResult=EBres[[1]][5,1], 
-  BetaResult=EBres[[2]][5,1], name="Gene Simulation", AList="F", GroupName=NULL)
-DenNHist(QList=EBres$QList1, Alpha=EBres[[1]][5,1], Beta=EBres[[2]][5,1], 
-  name="Gene Simulation", AList="F", GroupName=NULL)
-
-# Section 3.2
-
-IsoGenerate=IsoSimu(DVDconstant=NULL, DVDqt1=.97, DVDqt2=.98, 
-  Conditions=as.factor(rep(c(1,2),each=5)), NumofSample=10, 
-  NumofIso=c(1000,2000,3000), DEIsoProp=.1, Phiconstant=NULL, 
-  Phi.qt1=.25, Phi.qt2=.75, OnlyData=T )
-str(IsoGenerate)
-
-IsoMat=do.call(rbind,IsoGenerate$data)
-str(IsoMat)
-
-IsoSizes=MedianNorm(IsoMat)
-
-IsoNames=rownames(IsoMat)
-str(IsoNames)
-GeneNames=paste("Gene",c(1:3000),sep="_")
-IsosGeneNames=c(GeneNames[1:1000],rep(GeneNames[1001:2000],each=2),
-  rep(GeneNames[2001:3000],each=3))
-NgList=GetNg(IsoNames, IsosGeneNames)
-IsoNgTrun=NgList$IsoformNgTrun
-IsoNgTrun[c(1:3,1001:1003,3001:3003)]
-
-IsoEBres=EBTest(Data=IsoMat, NgVector=IsoNgTrun, 
-  Conditions=as.factor(rep(c(1,2),each=5)),sizeFactors=IsoSizes, maxround=5)
-IsoPP=GetPP(IsoEBres)
-str(IsoPP)
-IsoDE=IsoPP[which(IsoPP>=.95)]
-str(IsoDE)
-sum(names(IsoDE)%in%IsoGenerate$TrueDE)
-
-par(mfrow=c(2,2))
-PolyFitValue=vector("list",3)
-for(i in 1:3)
-  PolyFitValue[[i]]=PolyFitPlot(IsoEBres$C1Mean[[i]], 
-    IsoEBres$C1EstVar[[i]],5)
-
-PolyAll=PolyFitPlot(unlist(IsoEBres$C1Mean), unlist(IsoEBres$C1EstVar),5)
-lines(log10(IsoEBres$C1Mean[[1]][PolyFitValue[[1]]$sort]), 
-  PolyFitValue[[1]]$fit[PolyFitValue[[1]]$sort],col="yellow")
-lines(log10(IsoEBres$C1Mean[[2]][PolyFitValue[[2]]$sort]), 
-  PolyFitValue[[2]]$fit[PolyFitValue[[2]]$sort],col="pink")
-lines(log10(IsoEBres$C1Mean[[3]][PolyFitValue[[3]]$sort]), 
-  PolyFitValue[[3]]$fit[PolyFitValue[[3]]$sort],col="green")
-legend("topleft",c("All Isoforms","Ng = 1","Ng = 2","Ng = 3"),
-  col=c("red","yellow","pink","green"),lty=1,lwd=3,box.lwd=2)
-
-par(mfrow=c(2,2))
-QQP(QList=IsoEBres$QList1, AlphaResult=IsoEBres[[1]][5,],
- BetaResult=IsoEBres[[2]][5,], 
- name="Isoforms", AList="F", GroupName=paste("Ng = ",c(1:3),sep=""))
-
-DenNHist(QList=IsoEBres$QList1, Alpha=IsoEBres[[1]][5,], 
-  Beta=IsoEBres[[2]][5,], 
-  name="Isoforms", AList="F", GroupName=paste("Ng = ",c(1:3),sep=""))
-
-# Section 3.3
-
-Conditions=c("C1","C1","C2","C2","C3","C3")
-PosParti=GetPatterns(Conditions)
-PosParti
-
-Parti=PosParti[-3,]
-Parti
-
-MultiData=GeneMultiSimu(Conditions=Conditions,AllParti=Parti,
-          NumofSample=6,NumofGene=1000,DEGeneProp=c(.7,.1,.1,.1),
-          DVDqt1=.98,DVDqt2=.99,Phi.qt1=.25,Phi.qt2=.75)
-str(MultiData)
-
-MultiSize=MedianNorm(MultiData$data)
-MultiRes=EBMultiTest(MultiData$data,NgVector=NULL,Conditions=Conditions,
-           AllParti=Parti, sizeFactors=MultiSize, maxround=5)
-MultiPP=GetMultiPP(MultiRes)
-names(MultiPP)
-MultiPP$PP[1:10,]
-MultiPP$MAP[1:10]
-MultiPP$Patterns
-sum(MultiPP$MAP==MultiData$Patterns)
-
-# EOF
\ No newline at end of file
diff --git a/EBSeq/inst/doc/EBSeq_Vignette.pdf b/EBSeq/inst/doc/EBSeq_Vignette.pdf
deleted file mode 100644 (file)
index 30646df..0000000
Binary files a/EBSeq/inst/doc/EBSeq_Vignette.pdf and /dev/null differ
diff --git a/EBSeq/makefile b/EBSeq/makefile
new file mode 100644 (file)
index 0000000..aae42c1
--- /dev/null
@@ -0,0 +1,16 @@
+CC = g++
+PROGRAMS = blockmodeling EBSeq rsem-for-ebseq-calculate-clustering-info
+
+all : $(PROGRAMS)
+
+blockmodeling : blockmodeling_0.1.8.tar.gz
+       R CMD INSTALL -l "." blockmodeling_0.1.8.tar.gz
+
+EBSeq : blockmodeling EBSeq_1.1.3.tar.gz
+       R CMD INSTALL -l "." EBSeq_1.1.3.tar.gz
+
+rsem-for-ebseq-calculate-clustering-info : calcClusteringInfo.cpp
+       $(CC) -O3 -Wall calcClusteringInfo.cpp -o $@
+
+clean : 
+       rm -rf $(PROGRAMS)
diff --git a/EBSeq/man/CheckNg.Rd b/EBSeq/man/CheckNg.Rd
deleted file mode 100644 (file)
index 9571d03..0000000
+++ /dev/null
@@ -1,66 +0,0 @@
-\name{CheckNg}
-\alias{CheckNg}
-%- Also NEED an '\alias' for EACH other topic documented here.
-\title{
-CheckNgStructure
-}
-\description{
-Provide the best polynomial fit of log variance and log mean in each Ng group. 
-}
-\usage{
-CheckNg(NewMean, NewVar,nterm, xlim, ylim)
-}
-%- maybe also 'usage' for other objects documented here.
-\arguments{
-  \item{NewMean}{
-A list contains  
-}
-}
-\details{
-%%  ~~ If necessary, more details than the description above ~~
-}
-\value{
-4 plots will be returned. The first 3 are the data and the best fit line from 
-Ng=1, Ng=2 and Ng=3 group.
-The 4th plot is the scatetr plot of all the data.  
-}
-\references{
-}
-\author{
-Ning Leng
-}
-
-%% ~Make other sections like Warning with \section{Warning }{....} ~
-
-\seealso{
-
-}
-\examples{
-##---- Should be DIRECTLY executable !! ----
-##-- ==>  Define data, use random,
-##--   or do  help(data=index)  for the standard data sets.
-
-### Simulate Isoform Level Data
-IsoGenerate=IsoSimu(DVDconstant=NULL, DVDqt1=.97, DVDqt2=.98, Conditions=as.factor(rep(c(1,2),each=5)), NumofSample=10, NumofIso=NULL, DEIsoProp=.1, Phiconstant=NULL, Phi.qt1=.4, Phi.qt2=.6, OnlyData="Y" )
-IsoList=IsoGenerate$data
-
-# Get Vectors and Run EBSeq
-ngv=c(1,2,3,2,3,2,3,2,3)
-b3v=c(1,0,0,1,1,0,0,1,1)
-b5v=c(1,0,0,0,0,1,1,1,1)
-NgV=unlist(sapply(1:9,function(i)rep(ngv[i],dim(IsoList[[i]])[1])))
-Bias3V=unlist(sapply(1:9,function(i)rep(b3v[i],dim(IsoList[[i]])[1])))
-Bias5V=unlist(sapply(1:9,function(i)rep(b5v[i],dim(IsoList[[i]])[1])))
-
-IsoData=do.call(rbind,IsoList)
-IsoEBres=NBBetaEB.bias.uniqueP_PoolVarSpeedUp_MDFPoi_NoNormVar(Data=IsoData, NgVector=NgV, Vect5End=Bias5V, Vect3End=Bias3V, Conditions=as.factor(rep(c(1,2),each=5)),sizeFactors=rep(1,10), maxround=5)
-
-
-# Plot
-CheckNg(IsoEBres$C1Mean, IsoEBres$C1EstVar,5, c(-1,5),c(-1,7))
-
-
-}
-% Add one or more standard keywords, see file 'KEYWORDS' in the
-% R documentation directory.
-\keyword{ Ng }
diff --git a/EBSeq/man/DenNHist.Rd b/EBSeq/man/DenNHist.Rd
deleted file mode 100644 (file)
index f096153..0000000
+++ /dev/null
@@ -1,107 +0,0 @@
-\name{DenNHist}
-\alias{DenNHist}
-%- Also NEED an '\alias' for EACH other topic documented here.
-\title{
-%%  ~~function to do ... ~~
-Density plot to compare th empirical q's and the simulated q's from the fitted beta distribution.
-}
-\description{
-%%  ~~ A concise (1-5 lines) description of what the function does. ~~
-}
-\usage{
-DenNHist(QList, Alpha, Beta, name, AList = "F", GroupName)
-}
-%- maybe also 'usage' for other objects documented here.
-\arguments{
-  \item{QList}{
-The estimated q's from the output of NBBetaEB.bias.uniqueP_PoolVarSpeedUp_MDFPoi_NoNormVar
-. Input could be a vector or a list of different groups of transcripts. The number of lists here should be the same as the length of Beta.
-
-}
-  \item{Alpha}{
-The fitted parameter alpha from the output of NBBetaEB.bias.uniqueP_PoolVarSpeed
-Up_MDFPoi_NoNormVar. Input should be a number if AList is not defined.
-}
-  \item{Beta}{
-The fitted parameter beta from the output of NBBetaEB.bias.uniqueP_PoolVarSpeedUp_MDFPoi_NoNormVar. 
-Input could be one single number or a vector of several numbers. The length of the input should be the same as the number of lists of QList.
-
-}
-  \item{name}{
-The name of the plots
-}
-  \item{AList}{
-Whether a list of alpha's are used
-
-}
-  \item{GroupName}{
-The names of each sub plot. The l
-ength of the input should be the same as the number of lists of QList.
-
-}
-}
-\details{
-%%  ~~ If necessary, more details than the description above ~~
-}
-\value{
-Plots will be generated. Each plot represents a sub-list of the QList.
-The empirical estimation of q's will be represented as blue histogram and the density of
-the fitted beta distribution will be represented as the green line.
-The main title of the plot will be "GroupName name".
-
-}
-\references{
-%% ~put references to the literature/web site here ~
-}
-\author{
-Ning Leng
-}
-\note{
-%%  ~~further notes~~
-}
-
-%% ~Make other sections like Warning with \section{Warning }{....} ~
-
-\seealso{
-beta.mom, DenNHistTable, QQP, NBBetaEB.bias.uniqueP_PoolVarSpeedUp_MDFPoi_NoNormVar
-}
-\examples{
-### Simulate Gene Level Data
-GeneGenerate=GeneSimu(DVDconstant=4, DVDqt1=NULL, DVDqt2=NULL, Conditions=rep(c(1,2),each=5), NumofSample=10, NumofGene=10000, DEGeneProp=.1, Phiconstant=NULL, Phi.qt1=.25, Phi.qt2=.75, Meanconstant=NULL, OnlyData="Y")
-GeneData=GeneGenerate$data
-
-# Run EBSeq
-EBres=NBBetaEB.bias.uniqueP_PoolVarSpeedUp_MDFPoi_NoNormVar(Data=GeneData, NgVector=rep(1,10^4), Vect5End=rep(1,10^4), Vect3End=rep(1,10^4), Conditions=as.factor(rep(c(1,2),each=5)), sizeFactors=rep(1,10),maxround=5)
-# Plot
-DenNHist(QList=EBres$QList1, Alpha=EBres[[1]][5,1], Beta=EBres[[2]][5,1], name="Gene", AList="F", GroupName="")
-
-### Simulate Isoform Level Data
-IsoGenerate=IsoSimu(DVDconstant=NULL, DVDqt1=.97, DVDqt2=.98, Conditions=as.factor(rep(c(1,2),each=5)), NumofSample=10, NumofIso=NULL, DEIsoProp=.1, Phiconstant=NULL, Phi.qt1=.25, Phi.qt2=.75, OnlyData="Y" )
-IsoList=IsoGenerate$data
-
-# Get Vectors and Run EBSeq
-ngv=c(1,2,3,2,3,2,3,2,3)
-b3v=c(1,0,0,1,1,0,0,1,1)
-b5v=c(1,0,0,0,0,1,1,1,1)
-NgV=unlist(sapply(1:9,function(i)rep(ngv[i],dim(IsoList[[i]])[1])))
-Bias3V=unlist(sapply(1:9,function(i)rep(b3v[i],dim(IsoList[[i]])[1])))
-Bias5V=unlist(sapply(1:9,function(i)rep(b5v[i],dim(IsoList[[i]])[1])))
-
-IsoData=do.call(rbind,IsoList)
-IsoEBres=NBBetaEB.bias.uniqueP_PoolVarSpeedUp_MDFPoi_NoNormVar(Data=IsoData, NgVector=NgV, Vect5End=Bias5V, Vect3End=Bias3V, Conditions=as.factor(rep(c(1,2),each=5)),sizeFactors=rep(1,10), maxround=5)
-
-# Plot
-par(mfrow=c(3,3))
-DenNHist(QList=IsoEBres$QList1, Alpha=IsoEBres[[1]][5,], Beta=IsoEBres[[2]][5,], name="Isoform", AList="F", GroupName=paste("group",c(1:9),sep=""))
-
-
-
-
-
-
-
-}
-% Add one or more standard keywords, see file 'KEYWORDS' in the
-% R documentation directory.
-\keyword{ beta }
-\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
diff --git a/EBSeq/man/DenNHistTable.Rd b/EBSeq/man/DenNHistTable.Rd
deleted file mode 100644 (file)
index b480f98..0000000
+++ /dev/null
@@ -1,85 +0,0 @@
-\name{DenNHistTable}
-\alias{DenNHistTable}
-%- Also NEED an '\alias' for EACH other topic documented here.
-\title{
-9 Density plots to compare the empirical q's and the simulated q's from the fitted beta distribution.
-}
-\description{
-Check the beta fit of 9 different groups on isoform level data. 
-}
-\usage{
-DenNHistTable(QList, Alpha, Beta,  AList = "F")
-}
-%- maybe also 'usage' for other objects documented here.
-\arguments{
-  \item{QList}{
-The estimated q's from the output of NBBetaEB.bias.uniqueP_PoolVarSpeedUp_MDFPoi_NoNormVar
-. Input should be a list of different groups of transcripts. The number of lists here should be 9.
-
-}
-
-  \item{Alpha}{
-The fitted parameter alpha from the output of NBBetaEB.bias.uniqueP_PoolVarSpeed
-Up_MDFPoi_NoNormVar. Input should be a number if AList is not defined.
-}
-  \item{Beta}{
-The fitted parameter beta from the output of NBBetaEB.bias.uniqueP_PoolVarSpeedUp_MDFPoi_NoNormVar. 
-Input could be one single number or a vector of several numbers. The length of the input should be 9.
-
-}
-
-  \item{AList}{
-Whether a list of alpha's are used
-
-}
-
-  }
-\details{
-
-}
-\value{
-A plot contains 9 dub-plots will be generated. 
-The empirical estimation of q's will be represented as blue histogram and the density of 
-the fitted beta distribution will be represented as the green line.
-The main title of the plot will be "GroupName name"  
-}
-\references{
-DenNHist, beta.mom, QQP, NBBetaEB.bias.uniqueP_PoolVarSpeedUp_MDFPoi_NoNormVar
-}
-\author{
-Ning Leng
-}
-\note{
-%%  ~~further notes~~
-}
-
-%% ~Make other sections like Warning with \section{Warning }{....} ~
-
-\seealso{
-%% ~~objects to See Also as \code{\link{help}}, ~~~
-}
-\examples{
-# Generate Data
-IsoGenerate=IsoSimu(DVDconstant=NULL, DVDqt1=.97, DVDqt2=.98, Conditions=as.factor(rep(c(1,2),each=5)), NumofSample=10, NumofIso=NULL, DEIsoProp=.1, Phiconstant=NULL, Phi.qt1=.25, Phi.qt2=.75, OnlyData="Y" )
-IsoList=IsoGenerate$data
-
-# Get Ng Vector, 5End Vector and 3End Vector
-ngv=c(1,2,3,2,3,2,3,2,3)
-b3v=c(1,0,0,1,1,0,0,1,1)
-b5v=c(1,0,0,0,0,1,1,1,1)
-NgV=unlist(sapply(1:9,function(i)rep(ngv[i],dim(IsoList[[i]])[1])))
-Bias3V=unlist(sapply(1:9,function(i)rep(b3v[i],dim(IsoList[[i]])[1])))
-Bias5V=unlist(sapply(1:9,function(i)rep(b5v[i],dim(IsoList[[i]])[1])))
-
-#Run EBSeq
-IsoData=do.call(rbind,IsoList)
-IsoEBres=NBBetaEB.bias.uniqueP_PoolVarSpeedUp_MDFPoi_NoNormVar(Data=IsoData, NgVector=NgV, Vect5End=Bias5V, Vect3End=Bias3V, Conditions=as.factor(rep(c(1,2),each=5)),sizeFactors=rep(1,10), maxround=5)
-
-DenNHistTable(QList=IsoEBres$QList1, Alpha=IsoEBres[[1]][5,], Beta=IsoEBres[[2]][5,], AList="F")
-
-
-}
-% Add one or more standard keywords, see file 'KEYWORDS' in the
-% R documentation directory.
-\keyword{ ~kwd1 }
-\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
diff --git a/EBSeq/man/EBMultiTest.Rd b/EBSeq/man/EBMultiTest.Rd
deleted file mode 100644 (file)
index a23b0f7..0000000
+++ /dev/null
@@ -1,130 +0,0 @@
-\name{EBMultiTest}
-\alias{EBMultiTest}
-%- Also NEED an '\alias' for EACH other topic documented here.
-\title{
-Using EM algorithm to calculate the posterior probabilities of interested patterns in multiple condition study
-}
-\description{
-Base on the assumption of NB-Beta Empirical Bayes model, the EM algorithm is used to get the posterior probability of interested patterns.
-}
-\usage{
-EBMultiTest(Data,NgVector=NULL,Conditions,AllParti=NULL, sizeFactors, maxround, tau=NULL,CI=NULL,CIthre=NULL, Pool=F, NumBin=1000, Approx=10^-10,PoolLower=.25, PoolUpper=.75)
-}
-%- maybe also 'usage' for other objects documented here.
-\arguments{
-
-  \item{Data}{
-A data matrix contains expression values for each transcript .(Gene level or Isoform level.). In which rows should be transcripts and columns should be samples.
-}
-  \item{NgVector}{
-A vector contains the Ng value of each isoform. If the isoform is in a gene with 2 isoforms, Ng should be 2. Ng could be only 1, 2 or 3. If it's gene level data, Ngvector should all be 1. The vector length should be the same as the number of rows in Data.
-}
-  \item{Conditions}{
-A vector indicates the condition each sample belongs to. 
-}
-
-\item{AllParti}{
-       A matrix indicates the interested patterns. Columns shoule be conditions and rows should be patterns. 
-       The matrix could be obtained by the GetPatterns function. If AllParti=NULL, all possible patterns will be used.
-}
-
-  \item{sizeFactors}{
-The normalization factors. 
-The normalization factors could be a vector with lane specitic numbers.
-Or it could be a matrix with lane and transcript specific numbers.
-}
-  \item{maxround}{
-Number of iterations. The suggested value is 5.
-}
-
-\item{tau}{
-The tau value from RSEM output. If the data has no replicates within condition, 
-EBSeq will use the CI of tau to capture the variation from mapping
-uncertainty and estimate the variance.
-       }
-\item{CI}{
-The CI of each tau from RSEM output    
-       }
-\item{CIthre}{
-The threshold of CI RSEM used.
-       }
-\item{Pool, NumBin}{
-Working without replicates, we should define the Pool=T in the
- EBTest function to enable pooling.
-By defining NumBin = 1000, EBSeq will group the genes with similar means
-together into 1,000 bins.
-With the assumption that no more than 50\% genes are DE in the data set,
-We take genes whose FC are in the 25\% - 75\% quantile of the FC's  as the
-candidate genes.
-For each bin, the bin-wise variance estimation would be the median of the
-cross condition variance estimations of the candidate genes within that bin.
-We use the cross condition variance estimations for the candidate genes
-and the bin-wise variance estimations of the host bin for the non-candidate genes.
-}
-
-}
-
-\details{
-For each transcript gi within condition, the model assumes:
-X_gis|mu_gi ~ NB (r_gi0 * l_s, q_gi)
-q_gi|alpha, beta^N_g,b_gi ~ Beta (alpha, neta^N_g,b_gi)
-In which the l_s is the sizeFactors of sample s.
-
-The function will test:
-H0: q_giC1 = q_giC2
-H1: q_giC1 != q_giC2
-
-
-}
-\value{
-\item{Alpha }{Fitted parameter alpha of the prior beta distribution. Rows are the values for each iteration.}
-\item{Beta }{Fitted parameter beta of the prior beta distribution. Rows are the values for each iteration.}
-\item{P, PFromZ }{ The bayes estimator of being DE.Rows are the values for each iteration.}
-\item{Z, PoissonZ}{ The Posterior Probability of being DE for each transcript. (Maybe not in the same order of input)}
-\item{RList}{ The fitted values of r for each transcript.}
-\item{MeanList}{The mean of each transcript. (Cross conditions)}
-\item{VarList}{The variance of each transcript. (Cross conditions, using the expression values devided by it's sizeFactors)}
-\item{QListi1}{The fitted q values of each transcript within condition 1.}
-\item{QListi2}{The fitted q values of each transcript within condition 2.}
-\item{C1Mean}{The mean of each transcript within Condition 1}
-\item{C2Mean}{The mean of each transcript within Condition 2}
-\item{C1EstVar}{The estimated variance of each transcript within Condition 1}
-\item{C2EstVar}{The estimated variance of each transcript within Condition 2}
-\item{PoolVar}{The variance of each transcript. (The pooled value of within condition EstVar)}
-\item{DataList}{A List of data that grouped with Ng and bias.}
-\item{PPDE}{The Posterior Probability of being each pattern for each transcript. (The same order of input)}
-\item{f}{The likelihood of predictive distribution of being each pattern for each transcript. }
-\item{AllParti}{The matrix describe the patterns}
-}
-\references{
-}
-\author{
-Ning Leng
-}
-\note{
-}
-
-
-\seealso{
-}
-\examples{
-Conditions=c("C1","C1","C2","C2","C3","C3")
-PosParti=GetPatterns(Conditions)
-AllParti=PosParti[-3,]
-
-MultiData=GeneMultiSimu(Conditions=Conditions,AllParti=AllParti,
-                                                                       NumofSample=6,NumofGene=1000,DEGeneProp=c(.7,.1,.1,.1),
-                                                                                                                       DVDqt1=.98,DVDqt2=.99,Phi.qt1=.25,Phi.qt2=.75)
-
-MultiRes=EBMultiTest(MultiData[[1]],NgVector=NULL,Conditions=Conditions,
-                                                             AllParti=AllParti, sizeFactors=rep(1,6), maxround=5, tau=NULL,CI=NULL,
-                                                                                              CIthre=NULL, Pool=F, NumBin=1000, Approx=10^-10,PoolLower=.25, PoolUpper=.75)
-MultiPP=GetMultiPP(MultiRes)
-
-sum(MultiPP$MAP==MultiData[[2]])
-
-}
-% Add one or more standard keywords, see file 'KEYWORDS' in the
-% R documentation directory.
-\keyword{ ~kwd1 }
-\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
diff --git a/EBSeq/man/EBSeq_NingLeng-package.Rd b/EBSeq/man/EBSeq_NingLeng-package.Rd
deleted file mode 100644 (file)
index ba10fde..0000000
+++ /dev/null
@@ -1,34 +0,0 @@
-\name{EBSeq_NingLeng-package}
-\alias{EBSeq_NingLeng-package}
-\alias{EBSeq_NingLeng}
-\docType{package}
-\title{
-EBSeq: RNA-Seq Differential Expression Analysis on both gene and isoform level 
-}
-\description{
-A Negative Binomial - beta model was built to analyze the RNASeq data.
-We used the empirical bayes method and EM algrithom.
-}
-\details{
-\tabular{ll}{
-Package: \tab EBSeq_NingLeng\cr
-Type: \tab Package\cr
-Version: \tab 1.0\cr
-Date: \tab 2011-06-13\cr
-License: \tab What license is it under?\cr
-LazyLoad: \tab yes\cr
-}
-}
-\author{
-Ning Leng
-
-Maintainer: Ning Leng  <nleng@wisc.edu>
-}
-\references{
-}
-\keyword{ package }
-\seealso{
-
-}
-\examples{
-}
diff --git a/EBSeq/man/EBTest.Rd b/EBSeq/man/EBTest.Rd
deleted file mode 100644 (file)
index 5d17998..0000000
+++ /dev/null
@@ -1,139 +0,0 @@
-\name{EBTest}
-\alias{EBTest}
-%- Also NEED an '\alias' for EACH other topic documented here.
-\title{
-Using EM algorithm to calculate the posterior probabilities of being DE
-}
-\description{
-Base on the assumption of NB-Beta Empirical Bayes model, the EM algorithm is used to get the posterior probability of being DE.
-}
-\usage{
-EBTest(Data, NgVector=NULL, Vect5End=NULL, Vect3End=NULL, Conditions, sizeFactors, maxround,tau=NULL,CI=NULL,CIthre=NULL, Pool=F, NumBin=1000)
-}
-%- maybe also 'usage' for other objects documented here.
-\arguments{
-
-  \item{Data}{
-A data matrix contains expression values for each transcript .(Gene level or Isoform level.). In which rows should be transcripts and columns should be samples.
-}
-  \item{NgVector}{
-A vector contains the Ng value of each isoform. If the isoform is in a gene with 2 isoforms, Ng should be 2. Ng could be only 1, 2 or 3. If it's gene level data, Ngvector should all be 1. The vector length should be the same as the number of rows in Data.
-}
-  \item{Vect5End}{
-A vector contains the 5' end information of each isoform. It should be 1 if the isoform contains 5' end and otherwise should be 0. If it's gene level data, Vect5End should all be 1. The vector length should be the same as the number of rows in Data.
-(Not recommended)
-}
-  \item{Vect3End}{
-A vector contains the 3' end information of each isoform. It should be 1 if the
-isoform contains 3' end and otherwise should be 0. If it's gene level data, Vect3End should all be 1. The vector length should be the same as the number of rows in Data.
-(Not recommended)
-}
-  \item{Conditions}{
-A vector indicates the condition each sample belongs to. 
-}
-
-
-  \item{sizeFactors}{
-The normalization factors. 
-The normalization factors could be a vector with lane specitic numbers.
-Or it could be a matrix with lane and transcript specific numbers.
-}
-  \item{maxround}{
-Number of iterations. The suggested value is 5.
-}
-
-\item{tau}{
-The tau value from RSEM output. If the data has no replicates within condition, 
-EBSeq will use the CI of tau to capture the variation from mapping
-uncertainty and estimate the variance.
-       }
-\item{CI}{
-The CI of each tau from RSEM output    
-       }
-\item{CIthre}{
-The threshold of CI RSEM used.
-       }
-\item{Pool, NumBin}{
-Working without replicates, we should define the Pool=T in the
- EBTest function to enable pooling.
-By defining NumBin = 1000, EBSeq will group the genes with similar means
-together into 1,000 bins.
-With the assumption that no more than 50\% genes are DE in the data set,
-We take genes whose FC are in the 25\% - 75\% quantile of the FC's  as the
-candidate genes.
-For each bin, the bin-wise variance estimation would be the median of the
-cross condition variance estimations of the candidate genes within that bin.
-We use the cross condition variance estimations for the candidate genes
-and the bin-wise variance estimations of the host bin for the non-candidate genes.
-}
-
-}
-
-\details{
-For each transcript gi within condition, the model assumes:
-X_gis|mu_gi ~ NB (r_gi0 * l_s, q_gi)
-q_gi|alpha, beta^N_g,b_gi ~ Beta (alpha, neta^N_g,b_gi)
-In which the l_s is the sizeFactors of sample s.
-
-The function will test:
-H0: q_giC1 = q_giC2
-H1: q_giC1 != q_giC2
-
-
-}
-\value{
-\item{Alpha }{Fitted parameter alpha of the prior beta distribution. Rows are the values for each iteration.}
-\item{Beta }{Fitted parameter beta of the prior beta distribution. Rows are the values for each iteration.}
-\item{P, PFromZ }{ The bayes estimator of being DE.Rows are the values for each iteration.}
-\item{Z, PoissonZ}{ The Posterior Probability of being DE for each transcript. (Maybe not in the same order of input)}
-\item{RList}{ The fitted values of r for each transcript.}
-\item{MeanList}{The mean of each transcript. (Cross conditions)}
-\item{VarList}{The variance of each transcript. (Cross conditions, using the expression values devided by it's sizeFactors)}
-\item{QListi1}{The fitted q values of each transcript within condition 1.}
-\item{QListi2}{The fitted q values of each transcript within condition 2.}
-\item{C1Mean}{The mean of each transcript within Condition 1}
-\item{C2Mean}{The mean of each transcript within Condition 2}
-\item{C1EstVar}{The estimated variance of each transcript within Condition 1}
-\item{C2EstVar}{The estimated variance of each transcript within Condition 2}
-\item{PoolVar}{The variance of each transcript. (The pooled value of within condition EstVar)}
-\item{DataList}{A List of data that grouped with Ng and bias.}
-\item{PPDE}{The Posterior Probability of being DE for each transcript. (The same order of input)}
-
-
-}
-\references{
-}
-\author{
-Ning Leng
-}
-\note{
-}
-
-
-\seealso{
-}
-\examples{
-#Simulate Gene level data
-GeneGenerate=GeneSimu(DVDconstant=4, DVDqt1=NULL, DVDqt2=NULL, Conditions=rep(c(1,2),each=5), NumofSample=10, NumofGene=10000, DEGeneProp=.1, Phiconstant=NULL, Phi.qt1=.25, Phi.qt2=.75, Meanconstant=NULL, OnlyData="Y")
-GeneData=GeneGenerate$data
-
-# Run EBSeq
-# sizeFactors could be obtained by MedianNorm, QuantileNorm or RankNorm
-EBres=EBTest(Data=GeneData, NgVector=rep(1,10^4), Vect5End=rep(1,10^4), Vect3End=rep(1,10^4), Conditions=as.factor(rep(c(1,2),each=5)), sizeFactors=rep(1,10),maxround=5)
-
-# Isoform Level
-IsoGenerate=IsoSimu(DVDconstant=NULL, DVDqt1=.97, DVDqt2=.98, Conditions=as.factor(rep(c(1,2),each=5)), NumofSample=10, NumofIso=c(1000,2000,3000), DEIsoProp=.1, Phiconstant=NULL, Phi.qt1=.25, Phi.qt2=.75, OnlyData=T )
-
-IsoMat=do.call(rbind,IsoGenerate$data)
-IsoNames=rownames(IsoMat)
-
-Ngvector=GetNg(IsoNames, IsosGeneNames)
-IsoNgTrun=Ngvector$IsoformNgTrun
-
-IsoEBres=EBTest(Data=IsoMat, NgVector=IsoNgTrun, Conditions=as.factor(rep(c(1,2),each=5)),sizeFactors=rep(1,10), maxround=5)
-
-}
-% Add one or more standard keywords, see file 'KEYWORDS' in the
-% R documentation directory.
-\keyword{ ~kwd1 }
-\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
diff --git a/EBSeq/man/GeneEBresultGouldBart2.Rd b/EBSeq/man/GeneEBresultGouldBart2.Rd
deleted file mode 100644 (file)
index 6da5305..0000000
+++ /dev/null
@@ -1,83 +0,0 @@
-\name{GeneEBresultGouldBart2}
-\alias{GeneEBresultGouldBart2}
-\docType{data}
-\title{
-The EBSeq result of the empirical gene data ( Gould Lab data, bart2 )
-}
-\description{
-%%  ~~ A concise (1-5 lines) description of the dataset. ~~
-}
-\usage{data(GeneEBresultGouldBart2)}
-\format{
-  The format is:
-List of 17
- $ Alpha   : num [1:5, 1] 0.728 0.724 0.719 0.717 0.717
-  ..- attr(*, "dimnames")=List of 2
-  .. ..$ : chr [1:5] "AlphaIn" "AlphaIn" "AlphaIn" "AlphaIn" ...
-  .. ..$ : NULL
- $ Beta    : num [1:5, 1] 1.44 1.49 1.49 1.49 1.48
-  ..- attr(*, "dimnames")=List of 2
-  .. ..$ : chr [1:5] "BetaIn" "BetaIn" "BetaIn" "BetaIn" ...
-  .. ..$ : NULL
- $ P       : num [1:5, 1] 0.1584 0.0767 0.0534 0.046 0.0432
-  ..- attr(*, "dimnames")=List of 2
-  .. ..$ : chr [1:5] "PIn" "PIn" "PIn" "PIn" ...
-  .. ..$ : NULL
- $ PFromZ  : num [1:5, 1] 0.1585 0.0765 0.0535 0.0459 0.0432
-  ..- attr(*, "dimnames")=List of 2
-  .. ..$ : chr [1:5] "PFromZ" "PFromZ" "PFromZ" "PFromZ" ...
-  .. ..$ : NULL
- $ Z       : Named num [1:15312] 0.0036 0.00246 0.00122 0.61556 0.00394 ...
-  ..- attr(*, "names")= chr [1:15312] "ENSRNOG00000015181" "ENSRNOG00000015180" "ENSRNOG00000027158" "ENSRNOG00000027157" ...
- $ PoissonZ: Named num [1:4955] 6.59e-04 5.71e-04 3.80e-04 2.75e-04 2.07e-05 ...
-  ..- attr(*, "names")= chr [1:4955] "ENSRNOG00000027159" "ENSRNOG00000039120" "ENSRNOG00000039118" "ENSRNOG00000003198" ...
- $ RList   :List of 1
-  ..$ : Named num [1:20267] 19.12 62.3 -3.09 348.78 200.03 ...
-  .. ..- attr(*, "names")= chr [1:20267] "ENSRNOG00000015181" "ENSRNOG00000015180" "ENSRNOG00000027159" "ENSRNOG00000027158" ...
- $ MeanList:List of 1
-  ..$ : Named num [1:20267] 289.663 302.486 0.398 97.791 106.036 ...
-  .. ..- attr(*, "names")= chr [1:20267] "ENSRNOG00000015181" "ENSRNOG00000015180" "ENSRNOG00000027159" "ENSRNOG00000027158" ...
- $ VarList :List of 1
-  ..$ : Named num [1:20267] 5792.7 1954 0.6 146.8 513.4 ...
-  .. ..- attr(*, "names")= chr [1:20267] "ENSRNOG00000015181" "ENSRNOG00000015180" "ENSRNOG00000027159" "ENSRNOG00000027158" ...
- $ QList1  :List of 1
-  ..$ : Named num [1:20267] 0.188 0.152 NaN 0.487 1.118 ...
-  .. ..- attr(*, "names")= chr [1:20267] "ENSRNOG00000015181" "ENSRNOG00000015180" "ENSRNOG00000027159" "ENSRNOG00000027158" ...
- $ QList2  :List of 1
-  ..$ : Named num [1:20267] 0.0389 0.1951 1.1478 1.7647 0.4149 ...
-  .. ..- attr(*, "names")= chr [1:20267] "ENSRNOG00000015181" "ENSRNOG00000015180" "ENSRNOG00000027159" "ENSRNOG00000027158" ...
- $ C1Mean  :List of 1
-  ..$ : Named num [1:20267] 271.9 300.7 0 93.8 123.1 ...
-  .. ..- attr(*, "names")= chr [1:20267] "ENSRNOG00000015181" "ENSRNOG00000015180" "ENSRNOG00000027159" "ENSRNOG00000027158" ...
- $ C2Mean  :List of 1
-  ..$ : Named num [1:20267] 307.414 304.298 0.796 101.798 88.953 ...
-  .. ..- attr(*, "names")= chr [1:20267] "ENSRNOG00000015181" "ENSRNOG00000015180" "ENSRNOG00000027159" "ENSRNOG00000027158" ...
- $ C1EstVar:List of 1
-  ..$ : Named num [1:20267] 1449 1983 0 193 110 ...
-  .. ..- attr(*, "names")= chr [1:20267] "ENSRNOG00000015181" "ENSRNOG00000015180" "ENSRNOG00000027159" "ENSRNOG00000027158" ...
- $ C2EstVar:List of 1
-  ..$ : Named num [1:20267] 7905.417 1559.46 0.694 57.687 214.39 ...
-  .. ..- attr(*, "names")= chr [1:20267] "ENSRNOG00000015181" "ENSRNOG00000015180" "ENSRNOG00000027159" "ENSRNOG00000027158" ...
- $ PoolVar :List of 1
-  ..$ : Named num [1:20267] 4677.246 1771.219 0.347 125.211 162.247 ...
-  .. ..- attr(*, "names")= chr [1:20267] "ENSRNOG00000015181" "ENSRNOG00000015180" "ENSRNOG00000027159" "ENSRNOG00000027158" ...
- $ DataList:List of 1
-  ..$ Ng1: num [1:20267, 1:8] 287 251 0 87 121 181 5 195 70 5 ...
-  .. ..- attr(*, "dimnames")=List of 2
-  .. .. ..$ : chr [1:20267] "I1" "I2" "I3" "I4" ...
-  .. .. ..$ : chr [1:8] "W5-D1" "W5-D2" "W5-D3" "W5-D4" ...
-}
-\details{
-%%  ~~ If necessary, more details than the __description__ above ~~
-}
-\source{
-%%  ~~ reference to a publication or URL from which the data were obtained ~~
-}
-\seealso{
-IsoEBresultGouldBart2, NBBetaEB.bias.uniqueP_PoolVarSpeedUp_MDFPoi_NoNormVar
-}
-\examples{
-data(GeneEBresultGouldBart2)
-## maybe str(GeneEBresultGouldBart2) ; plot(GeneEBresultGouldBart2) ...
-}
-\keyword{datasets}
diff --git a/EBSeq/man/GeneMultiSimu.Rd b/EBSeq/man/GeneMultiSimu.Rd
deleted file mode 100644 (file)
index a00b4a7..0000000
+++ /dev/null
@@ -1,112 +0,0 @@
-\name{GeneMultiSimu}
-\alias{GeneMultiSimu}
-%- Also NEED an '\alias' for EACH other topic documented here.
-\title{
-Gene Level Simulation for multiple conditions
-}
-\description{
-Simulate Gene level expression data from a Negative Binomial assumption. (Without outliers)
-}
-\usage{
-GeneMultiSimu(DVDconstant = NULL, DVDqt1 = NULL, DVDqt2 = NULL, Conditions,AllParti, NumofSample, NumofGene = NULL, DEGeneProp, Phiconstant = NULL, Phi.qt1 = NULL, Phi.qt2 = NULL, Meanconstant = NULL,  NormFactor=NULL, OnlyData = T)
-}
-%- maybe also 'usage' for other objects documented here.
-\arguments{
-  \item{DVDconstant}{
-Whether want to use constant fold change value for all the DE genes. 
-If set DVDconstant=4, all the DE genes will have fold change of 4 across two condtions. 
-If this parameter is specified, the settings of DVDqt1 and DVDqt2 will be ignored. 
-}
-  \item{DVDqt1, DVDqt2}{
-If DVDconstant is not specified, the user could use a range of empirical DVD's f
-rom Gould' data. The lower and upper bound ( quantile) could be specified.
-The suggested value is c(.96, .97). DVD for each gene will be randomly choosed within the range.
-
-}
-  \item{Conditions}{
-A vector of charecters to show each sample's condition. 
-(Only the two-condition case is supported now)
-}
-\item{AllParti}{
-           A matrix indicates the interested patterns. Columns shoule be conditions and rows should be patterns.
-                   The matrix could be obtained by the GetPatterns function. If AllParti=NULL, all possible patterns will be used.
-}
-
-  \item{NumofSample}{
-Number of samples to generte.
-}
-  \item{NumofGene}{
-Number of genes to generate.
-}
-  \item{DEGeneProp}{
-The proportion of genes to be generated as DE. The value should be in [0, 1].
-Besides, the same proportion of genes will be generated as EE genes with outlier. 
-The genes will be generated as EE at first, then the count of one of the samples 
-(randomly selected) will be setted as its original counts multiplied by one of (4, 6, 8, 10).
-}
-  \item{Phiconstant}{
-Whether set the disperse parameter phi to be a constant. If this parameter is specified, the settings of Phi.qt1 and Phi.qt2 will be ignored.
-}
-  \item{Phi.qt1, Phi.qt2}{
-If Phiconstant is not specified, the user could use a range of empirical phi's from Gould' data. The lower and upper bound ( quantile) could be specified.
-The suggested value is c(.25, .75). phi for each gene will be randomly choosed w
-ithin the range.
-
-}
-  \item{Meanconstant}{
-Wether set the mean of each gene to be a constant.
-}
-  \item{OnlyData}{
-Wether the user only want the generated data matrix. If OnlyData=T, the function will return the simulated matrix
-and the name of the DE genes.
-Otherwise the funtion will run DESeq, EBSeq, edgeR, baySeq and BBSeq and provide the results of each method. 
-}
-
-\item{NormFactor}{
-If NormFactor is NULL, each lane will be set to be with the same library size. Otherwise NormFactor should be a 
-vector with length NumofSample. 
-}
-
-}
-\details{
-For each gene, we assumed that the expression follows a Negative Binomial distribution with mean mu_g and variance mu_g * (1 + mu_g * phi_g). 
-For DE genes, we assumed that in one condition the genes are with mean mu_g * DVD.
-mu, phi and DVD could be specified by the parameter settings.
-}
-\value{
-\item{data}{
-A matrix of expression values will be generated. The rows of the matrix refer to the genes and the columns of the matrix are the samples. The genes are named "G_1", "G_2", ... The first part of the genes will be the DE ones. (The number depends on the DEGeneProp parameter.)
-}
-\item{Patterns}{The pattern each gene belongs to}
-
-}
-
-\references{
-%% ~put references to the literature/web site here ~
-}
-\author{
-Ning Leng
-}
-\note{
-%%  ~~further notes~~
-}
-
-%% ~Make other sections like Warning with \section{Warning }{....} ~
-
-\seealso{
-IsoSimu, IsoSimuAt, GeneSimuAt
-}
-\examples{
-Conditions=c("C1","C1","C2","C2","C3","C3")
-PosParti=GetPatterns(Conditions)
-AllParti=PosParti[-3,]
-
-MultiData=GeneMultiSimu(Conditions=Conditions,AllParti=AllParti,
-                                                                       NumofSample=6,NumofGene=1000,DEGeneProp=c(.7,.1,.1,.1),
-                                                                                                                       DVDqt1=.98,DVDqt2=.99,Phi.qt1=.25,Phi.qt2=.75)
-
-}
-% Add one or more standard keywords, see file 'KEYWORDS' in the
-% R documentation directory.
-\keyword{ simulation }
-\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
diff --git a/EBSeq/man/GeneSimu.Rd b/EBSeq/man/GeneSimu.Rd
deleted file mode 100644 (file)
index a008fcc..0000000
+++ /dev/null
@@ -1,100 +0,0 @@
-\name{GeneSimu}
-\alias{GeneSimu}
-%- Also NEED an '\alias' for EACH other topic documented here.
-\title{
-Gene Level Simulation 
-}
-\description{
-Simulate Gene level expression data from a Negative Binomial assumption. (Without outliers)
-}
-\usage{
-GeneSimu(DVDconstant = NULL, DVDqt1 = NULL, DVDqt2 = NULL, Conditions, NumofSample, NumofGene = NULL, DEGeneProp, Phiconstant = NULL, Phi.qt1 = NULL, Phi.qt2 = NULL, Meanconstant = NULL,  NormFactor=NULL, OnlyData = T)
-}
-%- maybe also 'usage' for other objects documented here.
-\arguments{
-  \item{DVDconstant}{
-Whether want to use constant fold change value for all the DE genes. 
-If set DVDconstant=4, all the DE genes will have fold change of 4 across two condtions. 
-If this parameter is specified, the settings of DVDqt1 and DVDqt2 will be ignored. 
-}
-  \item{DVDqt1, DVDqt2}{
-If DVDconstant is not specified, the user could use a range of empirical DVD's f
-rom Gould' data. The lower and upper bound ( quantile) could be specified.
-The suggested value is c(.96, .97). DVD for each gene will be randomly choosed within the range.
-
-}
-  \item{Conditions}{
-A vector of charecters to show each sample's condition. 
-(Only the two-condition case is supported now)
-}
-  \item{NumofSample}{
-Number of samples to generte.
-}
-  \item{NumofGene}{
-Number of genes to generate.
-}
-  \item{DEGeneProp}{
-The proportion of genes to be generated as DE. The value should be in [0, 1].
-Besides, the same proportion of genes will be generated as EE genes with outlier. 
-The genes will be generated as EE at first, then the count of one of the samples 
-(randomly selected) will be setted as its original counts multiplied by one of (4, 6, 8, 10).
-}
-  \item{Phiconstant}{
-Whether set the disperse parameter phi to be a constant. If this parameter is specified, the settings of Phi.qt1 and Phi.qt2 will be ignored.
-}
-  \item{Phi.qt1, Phi.qt2}{
-If Phiconstant is not specified, the user could use a range of empirical phi's from Gould' data. The lower and upper bound ( quantile) could be specified.
-The suggested value is c(.25, .75). phi for each gene will be randomly choosed w
-ithin the range.
-
-}
-  \item{Meanconstant}{
-Wether set the mean of each gene to be a constant.
-}
-  \item{OnlyData}{
-Wether the user only want the generated data matrix. If OnlyData=T, the function will return the simulated matrix
-and the name of the DE genes.
-Otherwise the funtion will run DESeq, EBSeq, edgeR, baySeq and BBSeq and provide the results of each method. 
-}
-
-\item{NormFactor}{
-If NormFactor is NULL, each lane will be set to be with the same library size. Otherwise NormFactor should be a 
-vector with length NumofSample. 
-}
-
-}
-\details{
-For each gene, we assumed that the expression follows a Negative Binomial distribution with mean mu_g and variance mu_g * (1 + mu_g * phi_g). 
-For DE genes, we assumed that in one condition the genes are with mean mu_g * DVD.
-mu, phi and DVD could be specified by the parameter settings.
-}
-\value{
-\item{data}{
-A matrix of expression values will be generated. The rows of the matrix refer to the genes and the columns of the matrix are the samples. The genes are named "G_1", "G_2", ... The first part of the genes will be the DE ones. (The number depends on the DEGeneProp parameter.)
-}
-\item{TrueDE}{The names of the genes who are defined to be DE.}
-}
-
-\references{
-%% ~put references to the literature/web site here ~
-}
-\author{
-Ning Leng
-}
-\note{
-%%  ~~further notes~~
-}
-
-%% ~Make other sections like Warning with \section{Warning }{....} ~
-
-\seealso{
-IsoSimu, IsoSimuAt, GeneSimuAt
-}
-\examples{
-GeneGenerate=GeneSimu(DVDconstant=4, DVDqt1=NULL, DVDqt2=NULL, Conditions=rep(c(1,2),each=5), NumofSample=10, NumofGene=10000, DEGeneProp=.1, Phiconstant=NULL, Phi.qt1=.25, Phi.qt2=.75, Meanconstant=NULL, NormFactor=rep(c(.9,1.2),5),OnlyData=T)
-
-}
-% Add one or more standard keywords, see file 'KEYWORDS' in the
-% R documentation directory.
-\keyword{ simulation }
-\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
diff --git a/EBSeq/man/GeneSimuAt.Rd b/EBSeq/man/GeneSimuAt.Rd
deleted file mode 100644 (file)
index 1069ab1..0000000
+++ /dev/null
@@ -1,100 +0,0 @@
-\name{GeneSimuAt}
-\alias{GeneSimuAt}
-%- Also NEED an '\alias' for EACH other topic documented here.
-\title{
-Gene Level Simulation with outliers
-}
-\description{
-Simulate Gene level expression data from a Negative Binomial assumption. (With outliers)
-}
-\usage{
-GeneSimuAt(DVDconstant = NULL, DVDqt1 = NULL, DVDqt2 = NULL, Conditions, NumofSample, NumofGene = NULL, DEGeneProp, Phiconstant = NULL, Phi.qt1 = NULL, Phi.qt2 = NULL, Meanconstant = NULL,  NormFactor=NULL, OnlyData = T)
-}
-%- maybe also 'usage' for other objects documented here.
-\arguments{
-  \item{DVDconstant}{
-Whether want to use constant fold change value for all the DE genes. 
-If set DVDconstant=4, all the DE genes will have fold change of 4 across two condtions. 
-If this parameter is specified, the settings of DVDqt1 and DVDqt2 will be ignored. 
-}
-  \item{DVDqt1, DVDqt2}{
-If DVDconstant is not specified, the user could use a range of empirical DVD's f
-rom Gould' data. The lower and upper bound ( quantile) could be specified.
-The suggested value is c(.96, .97). DVD for each gene will be randomly choosed within the range.
-
-}
-  \item{Conditions}{
-A vector of charecters to show each sample's condition. 
-(Only the two-condition case is supported now)
-}
-  \item{NumofSample}{
-Number of samples to generte.
-}
-  \item{NumofGene}{
-Number of genes to generate.
-}
-  \item{DEGeneProp}{
-The proportion of genes to be generated as DE. The value should be in [0, 1].
-Besides, the same proportion of genes will be generated as EE genes with outlier. 
-The genes will be generated as EE at first, then the count of one of the samples 
-(randomly selected) will be setted as its original counts multiplied by one of (4, 6, 8, 10).
-}
-  \item{Phiconstant}{
-Whether set the disperse parameter phi to be a constant. If this parameter is specified, the settings of Phi.qt1 and Phi.qt2 will be ignored.
-}
-  \item{Phi.qt1, Phi.qt2}{
-If Phiconstant is not specified, the user could use a range of empirical phi's from Gould' data. The lower and upper bound ( quantile) could be specified.
-The suggested value is c(.25, .75). phi for each gene will be randomly choosed w
-ithin the range.
-
-}
-  \item{Meanconstant}{
-Wether set the mean of each gene to be a constant.
-}
-  \item{OnlyData}{
-Wether the user only want the generated data matrix. If OnlyData=T, the function will return the simulated matrix
-and the name of the DE genes.
-Otherwise the funtion will run DESeq, EBSeq, edgeR, baySeq and BBSeq and provide the results of each method. 
-}
-
-\item{NormFactor}{
-If NormFactor is NULL, each lane will be set to be with the same library size. Otherwise NormFactor should be a 
-vector with length NumofSample. 
-}
-
-}
-\details{
-For each gene, we assumed that the expression follows a Negative Binomial distribution with mean mu_g and variance mu_g * (1 + mu_g * phi_g). 
-For DE genes, we assumed that in one condition the genes are with mean mu_g * DVD.
-mu, phi and DVD could be specified by the parameter settings.
-}
-\value{
-\item{data}{
-A matrix of expression values will be generated. The rows of the matrix refer to the genes and the columns of the matrix are the samples. The genes are named "G_1", "G_2", ... The first part of the genes will be the DE ones. (The number depends on the DEGeneProp parameter.)
-}
-\item{TrueDE}{The names of the genes who are defined to be DE.}
-\item{Outliers}{The names of the genes who are defined to be outliers at each level of (4,6,8,10).}
-}
-\references{
-%% ~put references to the literature/web site here ~
-}
-\author{
-Ning Leng
-}
-\note{
-%%  ~~further notes~~
-}
-
-%% ~Make other sections like Warning with \section{Warning }{....} ~
-
-\seealso{
-IsoSimu, IsoSimuAt
-}
-\examples{
-GeneGenerate=GeneSimuAt(DVDconstant=4, DVDqt1=NULL, DVDqt2=NULL, Conditions=rep(c(1,2),each=5), NumofSample=10, NumofGene=10000, DEGeneProp=.1, Phiconstant=NULL, Phi.qt1=.25, Phi.qt2=.75, Meanconstant=NULL, NormFactor=rep(c(.9,1.2),5),OnlyData=T)
-
-}
-% Add one or more standard keywords, see file 'KEYWORDS' in the
-% R documentation directory.
-\keyword{ simulation }
-\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
diff --git a/EBSeq/man/GetData.Rd b/EBSeq/man/GetData.Rd
deleted file mode 100644 (file)
index fbd977f..0000000
+++ /dev/null
@@ -1,94 +0,0 @@
-\name{GetData}
-\alias{GetData}
-%- Also NEED an '\alias' for EACH other topic documented here.
-\title{
-Read in RSEM output of Gould data
-}
-\description{
-%%  ~~ A concise (1-5 lines) description of what the function does. ~~
-}
-\usage{
-GetData(path, Name1, Name2, type)
-}
-%- maybe also 'usage' for other objects documented here.
-\arguments{
-  \item{path}{
-The path of RSEM outputs
-}
-  \item{Name1}{
-The output names of the files from Condition 1
-}
-  \item{Name2}{
-The output names of the files from Condition 2
-}
-  \item{type}{
-If type="G", read in the gene level output
-If type="I", read in the isoform level output
-}
-}
-\details{
-%%  ~~ If necessary, more details than the description above ~~
-}
-\value{
-The output is the "nu values" from RSEM.
-To generate a expression matrix, the user need to run the PoolMatrix function.
-}
-\references{
-%% ~put references to the literature/web site here ~
-}
-\author{
-Ning Leng
-}
-\note{
-%%  ~~further notes~~
-}
-
-%% ~Make other sections like Warning with \section{Warning }{....} ~
-
-\seealso{
-%% ~~objects to See Also as \code{\link{help}}, ~~~
-}
-\examples{
-##---- Should be DIRECTLY executable !! ----
-##-- ==>  Define data, use random,
-##--   or do  help(data=index)  for the standard data sets.
-
-## The function is currently defined as
-function(path,Name1,Name2,type)
-{
-Data=vector("list",8)
-Filenames=NULL
-Tablenames=NULL
-for (name in 1:4)
-       {
-               if (type=="I")
-                       Filenames=c(Filenames,paste(path,Name1,name,"_isoform_nus.tab",sep=""))  
-               if (type=="G")  
-                       Filenames=c(Filenames,paste(path,Name1,name,"_gene_nus.tab",sep=""))  
-               Tablenames=c(Tablenames,paste(Name1,name,sep=""))
-       }
-for (name in 1:4)
-       {
-               if (type=="I")
-                       Filenames=c(Filenames,paste(path,Name2,name,"_isoform_nus.tab",sep=""))
-               if (type=="G")
-                       Filenames=c(Filenames,paste(path,Name2,name,"_gene_nus.tab",sep=""))
-               Tablenames=c(Tablenames,paste(Name2,name,sep=""))
-       }
-
-
-names(Data)=Tablenames
-for (file in 1:8)
-       {
-               temp=read.table(Filenames[file],header=T)
-               temp2=as.matrix(temp[-1])
-               rownames(temp2)=as.vector(as.matrix(temp[1]))
-               Data[[file]]=temp2
-       }
-       Data
-  }
-}
-% Add one or more standard keywords, see file 'KEYWORDS' in the
-% R documentation directory.
-\keyword{ ~kwd1 }
-\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
diff --git a/EBSeq/man/GetMultiPP.Rd b/EBSeq/man/GetMultiPP.Rd
deleted file mode 100644 (file)
index 843b362..0000000
+++ /dev/null
@@ -1,47 +0,0 @@
-\name{GetMultiPP}
-\alias{GetMultiPP}
-%- Also NEED an '\alias' for EACH other topic documented here.
-\title{
-Generate the Posterior Probability of each transcript.
-}
-\description{
-Generate the Posterior Probability of being each pattern of each transcript based on the EBMultiTest output.
-}
-\usage{
-GetMultiPP(EBout)
-}
-%- maybe also 'usage' for other objects documented here.
-\arguments{
-  \item{EBout}{
-The output of EBMultiTest function.
-}
-
-}
-\details{
-%%  ~~ If necessary, more details than the description above ~~
-}
-\value{
-\item{PP}{The poster probabilities of being each pattern.}
-\item{MAP}{The most likely pattern each gene belongs to}
-\item{Patterns}{The Patterns}
-}
-\references{
-}
-\author{
-Ning Leng
-}
-
-%% ~Make other sections like Warning with \section{Warning }{....} ~
-
-\seealso{
-}
-\examples{
-##---- Should be DIRECTLY executable !! ----
-##-- ==>  Define data, use random,
-##--   or do  help(data=index)  for the standard data sets.
-
-## The function is currently defined as
-}
-% Add one or more standard keywords, see file 'KEYWORDS' in the
-% R documentation directory.
-\keyword{ Posterior Probability }
diff --git a/EBSeq/man/GetNg.Rd b/EBSeq/man/GetNg.Rd
deleted file mode 100644 (file)
index 65207a4..0000000
+++ /dev/null
@@ -1,66 +0,0 @@
-\name{GetNg}
-\alias{GetNg}
-%- Also NEED an '\alias' for EACH other topic documented here.
-\title{
-Generate the Ng vector
-}
-\description{
-Generate the Ng vector for the isoforms
-}
-\usage{
-GetNg(IsoformName, GeneName)
-}
-%- maybe also 'usage' for other objects documented here.
-\arguments{
-  \item{IsoformName}{
-A vector contains the isoform names
-}
-  \item{GeneName}{
-The gene names of the isoforms in IsoformNames. (Should be in the same order)
-  }
-
-}
-\details{
-%%  ~~ If necessary, more details than the description above ~~
-}
-\value{
-  \item{GeneNg}{
-  The number of isoforms each gene contains
-  }
-  \item{GeneNgTrun}{
-  The truncated Ng of each gene. (The genes contain more than 3 isoforms are with Ng 3.)
-  }
-   \item{IsoformNg}{
-  The Ng of each isoform 
-  }
-    \item{IsoformNgTrun}{
-   The truncated Ng of each isoform. 
-  }
-
-
-}
-\references{
-}
-\author{
-Ning Leng
-}
-
-%% ~Make other sections like Warning with \section{Warning }{....} ~
-
-\seealso{
-}
-\examples{
-IsoGenerate=IsoSimu(DVDconstant=NULL, DVDqt1=.97, DVDqt2=.98, Conditions=as.factor(rep(c(1,2),each=5)), NumofSample=10, NumofIso=c(1000,2000,3000), DEIsoProp=.1, Phiconstant=NULL, Phi.qt1=.25, Phi.qt2=.75, OnlyData=T )
-
-IsoMat=do.call(rbind,IsoGenerate$data)
-IsoNames=rownames(IsoMat)
-
-Ngvector=GetNg(IsoNames, IsosGeneNames)
-
-IsoEBres=EBTest(Data=IsoMat, NgVector=IsoNgTrun, Conditions=as.factor(rep(c(1,2),each=5)),sizeFactors=rep(1,10), maxround=5)
-
-
-}
-% Add one or more standard keywords, see file 'KEYWORDS' in the
-% R documentation directory.
-\keyword{ Ng }
diff --git a/EBSeq/man/GetPP.Rd b/EBSeq/man/GetPP.Rd
deleted file mode 100644 (file)
index 602080c..0000000
+++ /dev/null
@@ -1,45 +0,0 @@
-\name{GetPP}
-\alias{GetPP}
-%- Also NEED an '\alias' for EACH other topic documented here.
-\title{
-Generate the Posterior Probability of each transcript.
-}
-\description{
-Generate the Posterior Probability of being DE of each transcript based on the EBTest output.
-}
-\usage{
-GetPP(EBout)
-}
-%- maybe also 'usage' for other objects documented here.
-\arguments{
-  \item{EBout}{
-The output of EBTest function.
-}
-
-}
-\details{
-%%  ~~ If necessary, more details than the description above ~~
-}
-\value{
-The poster probabilities of being DE.
-}
-\references{
-}
-\author{
-Ning Leng
-}
-
-%% ~Make other sections like Warning with \section{Warning }{....} ~
-
-\seealso{
-}
-\examples{
-##---- Should be DIRECTLY executable !! ----
-##-- ==>  Define data, use random,
-##--   or do  help(data=index)  for the standard data sets.
-
-## The function is currently defined as
-}
-% Add one or more standard keywords, see file 'KEYWORDS' in the
-% R documentation directory.
-\keyword{ Posterior Probability }
diff --git a/EBSeq/man/GetPatterns.Rd b/EBSeq/man/GetPatterns.Rd
deleted file mode 100644 (file)
index 8b08737..0000000
+++ /dev/null
@@ -1,46 +0,0 @@
-\name{GetPatterns}
-\alias{GetPatterns}
-%- Also NEED an '\alias' for EACH other topic documented here.
-\title{
-Generate all possible patterns in multiple condtion study
-}
-\description{
-Generate all possible patterns in multiple condtion study
-}
-\usage{
-GetPatterns(Conditions)
-}
-%- maybe also 'usage' for other objects documented here.
-\arguments{
-  \item{Conditions}{
-The names of the Conditions in the study
-}
-
-}
-\details{
-%%  ~~ If necessary, more details than the description above ~~
-}
-\value{
-A matrix describe all possible patterns. 
-
-}
-\references{
-}
-\author{
-Ning Leng
-}
-
-%% ~Make other sections like Warning with \section{Warning }{....} ~
-
-\seealso{
-}
-\examples{
-##---- Should be DIRECTLY executable !! ----
-##-- ==>  Define data, use random,
-##--   or do  help(data=index)  for the standard data sets.
-
-## The function is currently defined as
-}
-% Add one or more standard keywords, see file 'KEYWORDS' in the
-% R documentation directory.
-\keyword{ }
diff --git a/EBSeq/man/IsoEBresultGouldBart2.Rd b/EBSeq/man/IsoEBresultGouldBart2.Rd
deleted file mode 100644 (file)
index 1cdb1a8..0000000
+++ /dev/null
@@ -1,275 +0,0 @@
-\name{IsoEBresultGouldBart2}
-\alias{IsoEBresultGouldBart2}
-\docType{data}
-\title{
-The EBSeq result of the empirical isoform data ( Gould Lab data, bart2 )
-}
-\description{
-%%  ~~ A concise (1-5 lines) description of the dataset. ~~
-}
-\usage{data(IsoEBresultGouldBart2)}
-\format{
-  The format is:
-List of 17
- $ Alpha   : num [1:5, 1] 0.49 0.674 0.735 0.739 0.739
-  ..- attr(*, "dimnames")=List of 2
-  .. ..$ : chr [1:5] "AlphaIn" "AlphaIn" "AlphaIn" "AlphaIn" ...
-  .. ..$ : NULL
- $ Beta    : num [1:5, 1:9] 1.03 1.3 1.4 1.41 1.41 ...
-  ..- attr(*, "dimnames")=List of 2
-  .. ..$ : chr [1:5] "BetaIn" "BetaIn" "BetaIn" "BetaIn" ...
-  .. ..$ : NULL
- $ P       : num [1:5, 1] 0.1751 0.0955 0.073 0.066 0.0642
-  ..- attr(*, "dimnames")=List of 2
-  .. ..$ : chr [1:5] "PIn" "PIn" "PIn" "PIn" ...
-  .. ..$ : NULL
- $ PFromZ  : num [1:5, 1] 0.1878 0.0937 0.0736 0.0662 0.0634
-  ..- attr(*, "dimnames")=List of 2
-  .. ..$ : chr [1:5] "PFromZ" "PFromZ" "PFromZ" "PFromZ" ...
-  .. ..$ : NULL
- $ Z       : Named num [1:19249] 0.00494 0.00349 0.00219 0.72998 0.00593 ...
-  ..- attr(*, "names")= chr [1:19249] "ENSRNOT00000020358" "ENSRNOT00000020388" "ENSRNOT00000056429" "ENSRNOT00000037482" ...
- $ PoissonZ: Named num [1:6019] 0.001395 0.00111 0.00078 0.000551 0.00111 ...
-  ..- attr(*, "names")= chr [1:6019] "ENSRNOT00000029207" "ENSRNOT00000059839" "ENSRNOT00000056154" "ENSRNOT00000059835" ...
- $ RList   :List of 9
-  ..$ : Named num [1:15315] 19.03 62.06 -3.08 313.15 207.39 ...
-  .. ..- attr(*, "names")= chr [1:15315] "ENSRNOT00000020358" "ENSRNOT00000020388" "ENSRNOT00000029207" "ENSRNOT00000056429" ...
-  ..$ : Named num [1:1103] 3.369 46.691 0.194 6.79 0.541 ...
-  .. ..- attr(*, "names")= chr [1:1103] "ENSRNOT00000059696" "ENSRNOT00000046399" "ENSRNOT00000056154" "ENSRNOT00000020379" ...
-  ..$ : Named num [1:404] 94.298 -733.445 -0.391 1.102 -3.223 ...
-  .. ..- attr(*, "names")= chr [1:404] "ENSRNOT00000044782" "ENSRNOT00000038046" "ENSRNOT00000052405" "ENSRNOT00000048994" ...
-  ..$ : Named num [1:999] 3 7.46 6.32 -2.5 119.32 ...
-  .. ..- attr(*, "names")= chr [1:999] "ENSRNOT00000004311" "ENSRNOT00000064540" "ENSRNOT00000059572" "ENSRNOT00000020652" ...
-  ..$ : Named num [1:592] 11.168 0.167 0.296 0.882 20.272 ...
-  .. ..- attr(*, "names")= chr [1:592] "ENSRNOT00000020305" "ENSRNOT00000031749" "ENSRNOT00000037178" "ENSRNOT00000042240" ...
-  ..$ : Named num [1:863] 2.456 7.899 25.052 0.177 -0.579 ...
-  .. ..- attr(*, "names")= chr [1:863] "ENSRNOT00000020323" "ENSRNOT00000004326" "ENSRNOT00000004290" "ENSRNOT00000067476" ...
-  ..$ : Named num [1:490] 5.64 45.35 -16.06 -31.73 1.76 ...
-  .. ..- attr(*, "names")= chr [1:490] "ENSRNOT00000048345" "ENSRNOT00000066009" "ENSRNOT00000063985" "ENSRNOT00000068068" ...
-  ..$ : Named num [1:3943] 260.79 1.632 0.719 2.843 0.553 ...
-  .. ..- attr(*, "names")= chr [1:3943] "ENSRNOT00000020405" "ENSRNOT00000038103" "ENSRNOT00000067285" "ENSRNOT00000034889" ...
-  ..$ : Named num [1:1559] 7.43 1.85 2.14 60.4 20.51 ...
-  .. ..- attr(*, "names")= chr [1:1559] "ENSRNOT00000004486" "ENSRNOT00000060705" "ENSRNOT00000043951" "ENSRNOT00000020770" ...
- $ MeanList:List of 9
-  ..$ : Named num [1:15315] 288.018 300.77 0.396 97.251 105.428 ...
-  .. ..- attr(*, "names")= chr [1:15315] "ENSRNOT00000020358" "ENSRNOT00000020388" "ENSRNOT00000029207" "ENSRNOT00000056429" ...
-  ..$ : Named num [1:1103] 1.616 3442.78 5.275 30.388 0.253 ...
-  .. ..- attr(*, "names")= chr [1:1103] "ENSRNOT00000059696" "ENSRNOT00000046399" "ENSRNOT00000056154" "ENSRNOT00000020379" ...
-  ..$ : Named num [1:404] 988.128 128.831 0.105 0.759 0.502 ...
-  .. ..- attr(*, "names")= chr [1:404] "ENSRNOT00000044782" "ENSRNOT00000038046" "ENSRNOT00000052405" "ENSRNOT00000048994" ...
-  ..$ : Named num [1:999] 9.19 706.27 205.21 1.52 3715.53 ...
-  .. ..- attr(*, "names")= chr [1:999] "ENSRNOT00000004311" "ENSRNOT00000064540" "ENSRNOT00000059572" "ENSRNOT00000020652" ...
-  ..$ : Named num [1:592] 67.12 3.05 1.13 3.09 14.03 ...
-  .. ..- attr(*, "names")= chr [1:592] "ENSRNOT00000020305" "ENSRNOT00000031749" "ENSRNOT00000037178" "ENSRNOT00000042240" ...
-  ..$ : Named num [1:863] 36.175 52.99 2224.885 0.732 0.253 ...
-  .. ..- attr(*, "names")= chr [1:863] "ENSRNOT00000020323" "ENSRNOT00000004326" "ENSRNOT00000004290" "ENSRNOT00000067476" ...
-  ..$ : Named num [1:490] 6.71 187.77 2.14 3.97 63.38 ...
-  .. ..- attr(*, "names")= chr [1:490] "ENSRNOT00000048345" "ENSRNOT00000066009" "ENSRNOT00000063985" "ENSRNOT00000068068" ...
-  ..$ : Named num [1:3943] 217.38 3.72 31.38 93.58 7.63 ...
-  .. ..- attr(*, "names")= chr [1:3943] "ENSRNOT00000020405" "ENSRNOT00000038103" "ENSRNOT00000067285" "ENSRNOT00000034889" ...
-  ..$ : Named num [1:1559] 28.6 32.2 39.2 1275.1 750.5 ...
-  .. ..- attr(*, "names")= chr [1:1559] "ENSRNOT00000004486" "ENSRNOT00000060705" "ENSRNOT00000043951" "ENSRNOT00000020770" ...
- $ VarList :List of 9
-  ..$ : Named num [1:15315] 5729.745 1929.857 0.593 148.349 505.122 ...
-  .. ..- attr(*, "names")= chr [1:15315] "ENSRNOT00000020358" "ENSRNOT00000020388" "ENSRNOT00000029207" "ENSRNOT00000056429" ...
-  ..$ : Named num [1:1103] 4.69 2.81e+05 2.23e+02 2.21e+02 5.13e-01 ...
-  .. ..- attr(*, "names")= chr [1:1103] "ENSRNOT00000059696" "ENSRNOT00000046399" "ENSRNOT00000056154" "ENSRNOT00000020379" ...
-  ..$ : Named num [1:404] 1.16e+04 1.22e+02 8.75e-02 1.98 5.80e-01 ...
-  .. ..- attr(*, "names")= chr [1:404] "ENSRNOT00000044782" "ENSRNOT00000038046" "ENSRNOT00000052405" "ENSRNOT00000048994" ...
-  ..$ : Named num [1:999] 4.99e+01 7.80e+04 1.80e+04 7.05e-01 1.91e+05 ...
-  .. ..- attr(*, "names")= chr [1:999] "ENSRNOT00000004311" "ENSRNOT00000064540" "ENSRNOT00000059572" "ENSRNOT00000020652" ...
-  ..$ : Named num [1:592] 580.6 74.5 7.8 18.5 26.7 ...
-  .. ..- attr(*, "names")= chr [1:592] "ENSRNOT00000020305" "ENSRNOT00000031749" "ENSRNOT00000037178" "ENSRNOT00000042240" ...
-  ..$ : Named num [1:863] 6.10e+02 6.30e+02 2.69e+05 4.29 2.20e-01 ...
-  .. ..- attr(*, "names")= chr [1:863] "ENSRNOT00000020323" "ENSRNOT00000004326" "ENSRNOT00000004290" "ENSRNOT00000067476" ...
-  ..$ : Named num [1:490] 15.48 1816.69 3.33 5.48 2683.89 ...
-  .. ..- attr(*, "names")= chr [1:490] "ENSRNOT00000048345" "ENSRNOT00000066009" "ENSRNOT00000063985" "ENSRNOT00000068068" ...
-  ..$ : Named num [1:3943] 805.4 27.7 2024.4 4101.5 139 ...
-  .. ..- attr(*, "names")= chr [1:3943] "ENSRNOT00000020405" "ENSRNOT00000038103" "ENSRNOT00000067285" "ENSRNOT00000034889" ...
-  ..$ : Named num [1:1559] 761 1398 854 34973 31500 ...
-  .. ..- attr(*, "names")= chr [1:1559] "ENSRNOT00000004486" "ENSRNOT00000060705" "ENSRNOT00000043951" "ENSRNOT00000020770" ...
- $ QList1  :List of 9
-  ..$ : Named num [1:15315] 0.191 0.153 NaN 0.477 1.171 ...
-  .. ..- attr(*, "names")= chr [1:15315] "ENSRNOT00000020358" "ENSRNOT00000020388" "ENSRNOT00000029207" "ENSRNOT00000056429" ...
-  ..$ : Named num [1:1103] 0.648 0.0134 NaN 0.6744 NaN ...
-  .. ..- attr(*, "names")= chr [1:1103] "ENSRNOT00000059696" "ENSRNOT00000046399" "ENSRNOT00000056154" "ENSRNOT00000020379" ...
-  ..$ : Named num [1:404] 0.0443 0.9225 1.3649 0.592 1.2961 ...
-  .. ..- attr(*, "names")= chr [1:404] "ENSRNOT00000044782" "ENSRNOT00000038046" "ENSRNOT00000052405" "ENSRNOT00000048994" ...
-  ..$ : Named num [1:999] 0.13755 0.00968 0.0695 1.6441 0.05331 ...
-  .. ..- attr(*, "names")= chr [1:999] "ENSRNOT00000004311" "ENSRNOT00000064540" "ENSRNOT00000059572" "ENSRNOT00000020652" ...
-  ..$ : Named num [1:592] 0.0837 0.0518 1.2634 0.1805 0.5577 ...
-  .. ..- attr(*, "names")= chr [1:592] "ENSRNOT00000020305" "ENSRNOT00000031749" "ENSRNOT00000037178" "ENSRNOT00000042240" ...
-  ..$ : Named num [1:863] 0.0655 0.1031 0.0109 0.195 1.776 ...
-  .. ..- attr(*, "names")= chr [1:863] "ENSRNOT00000020323" "ENSRNOT00000004326" "ENSRNOT00000004290" "ENSRNOT00000067476" ...
-  ..$ : Named num [1:490] 0.3176 0.1285 1.6778 2.0836 0.0221 ...
-  .. ..- attr(*, "names")= chr [1:490] "ENSRNOT00000048345" "ENSRNOT00000066009" "ENSRNOT00000063985" "ENSRNOT00000068068" ...
-  ..$ : Named num [1:3943] 19.8858 0.3047 0.563 0.1257 0.0614 ...
-  .. ..- attr(*, "names")= chr [1:3943] "ENSRNOT00000020405" "ENSRNOT00000038103" "ENSRNOT00000067285" "ENSRNOT00000034889" ...
-  ..$ : Named num [1:1559] 0.0552 0.0491 0.0388 0.0374 0.0282 ...
-  .. ..- attr(*, "names")= chr [1:1559] "ENSRNOT00000004486" "ENSRNOT00000060705" "ENSRNOT00000043951" "ENSRNOT00000020770" ...
- $ QList2  :List of 9
-  ..$ : Named num [1:15315] 0.0388 0.1935 1.1475 1.7041 0.4143 ...
-  .. ..- attr(*, "names")= chr [1:15315] "ENSRNOT00000020358" "ENSRNOT00000020388" "ENSRNOT00000029207" "ENSRNOT00000056429" ...
-  ..$ : Named num [1:1103] 1.3629 0.0134 0.0354 0.1129 0.6811 ...
-  .. ..- attr(*, "names")= chr [1:1103] "ENSRNOT00000059696" "ENSRNOT00000046399" "ENSRNOT00000056154" "ENSRNOT00000020379" ...
-  ..$ : Named num [1:404] 3.02 1.78 NaN NaN 1.15 ...
-  .. ..- attr(*, "names")= chr [1:404] "ENSRNOT00000044782" "ENSRNOT00000038046" "ENSRNOT00000052405" "ENSRNOT00000048994" ...
-  ..$ : Named num [1:999] 0.4159 0.0116 0.0245 8.6195 0.0227 ...
-  .. ..- attr(*, "names")= chr [1:999] "ENSRNOT00000004311" "ENSRNOT00000064540" "ENSRNOT00000059572" "ENSRNOT00000020652" ...
-  ..$ : Named num [1:592] 1.264 NaN 0.187 0.246 0.632 ...
-  .. ..- attr(*, "names")= chr [1:592] "ENSRNOT00000020305" "ENSRNOT00000031749" "ENSRNOT00000037178" "ENSRNOT00000042240" ...
-  ..$ : Named num [1:863] 0.0613 0.1542 0.0115 NaN NaN ...
-  .. ..- attr(*, "names")= chr [1:863] "ENSRNOT00000020323" "ENSRNOT00000004326" "ENSRNOT00000004290" "ENSRNOT00000067476" ...
-  ..$ : Named num [1:490] 0.7444 0.3209 0.6206 0.9042 0.0366 ...
-  .. ..- attr(*, "names")= chr [1:490] "ENSRNOT00000048345" "ENSRNOT00000066009" "ENSRNOT00000063985" "ENSRNOT00000068068" ...
-  ..$ : Named num [1:3943] 0.2969 NaN 0.0178 0.0187 0.1135 ...
-  .. ..- attr(*, "names")= chr [1:3943] "ENSRNOT00000020405" "ENSRNOT00000038103" "ENSRNOT00000067285" "ENSRNOT00000034889" ...
-  ..$ : Named num [1:1559] 0.2911 0.3678 0.0834 0.0558 0.0252 ...
-  .. ..- attr(*, "names")= chr [1:1559] "ENSRNOT00000004486" "ENSRNOT00000060705" "ENSRNOT00000043951" "ENSRNOT00000020770" ...
- $ C1Mean  :List of 9
-  ..$ : Named num [1:15315] 270.3 299 0 93.3 122.4 ...
-  .. ..- attr(*, "names")= chr [1:15315] "ENSRNOT00000020358" "ENSRNOT00000020388" "ENSRNOT00000029207" "ENSRNOT00000056429" ...
-  ..$ : Named num [1:1103] 2.98 3490.34 0 27.89 0 ...
-  .. ..- attr(*, "names")= chr [1:1103] "ENSRNOT00000059696" "ENSRNOT00000046399" "ENSRNOT00000056154" "ENSRNOT00000020379" ...
-  ..$ : Named num [1:404] 991.231 129.543 0.209 1.518 0.244 ...
-  .. ..- attr(*, "names")= chr [1:404] "ENSRNOT00000044782" "ENSRNOT00000038046" "ENSRNOT00000052405" "ENSRNOT00000048994" ...
-  ..$ : Named num [1:999] 6.25 775.11 114.76 1.7 3505.16 ...
-  .. ..- attr(*, "names")= chr [1:999] "ENSRNOT00000004311" "ENSRNOT00000064540" "ENSRNOT00000059572" "ENSRNOT00000020652" ...
-  ..$ : Named num [1:592] 74.812 6.103 0.262 1.834 14.543 ...
-  .. ..- attr(*, "names")= chr [1:592] "ENSRNOT00000020305" "ENSRNOT00000031749" "ENSRNOT00000037178" "ENSRNOT00000042240" ...
-  ..$ : Named num [1:863] 40.039 40.329 2394.87 1.464 0.506 ...
-  .. ..- attr(*, "names")= chr [1:863] "ENSRNOT00000020323" "ENSRNOT00000004326" "ENSRNOT00000004290" "ENSRNOT00000067476" ...
-  ..$ : Named num [1:490] 6.29 162.86 3.14 2.93 68.98 ...
-  .. ..- attr(*, "names")= chr [1:490] "ENSRNOT00000048345" "ENSRNOT00000066009" "ENSRNOT00000063985" "ENSRNOT00000068068" ...
-  ..$ : Named num [1:3943] 201.06 7.45 13.4 80.08 12.23 ...
-  .. ..- attr(*, "names")= chr [1:3943] "ENSRNOT00000020405" "ENSRNOT00000038103" "ENSRNOT00000067285" "ENSRNOT00000034889" ...
-  ..$ : Named num [1:1559] 5.5 57.4 41.7 1213.3 749.1 ...
-  .. ..- attr(*, "names")= chr [1:1559] "ENSRNOT00000004486" "ENSRNOT00000060705" "ENSRNOT00000043951" "ENSRNOT00000020770" ...
- $ C2Mean  :List of 9
-  ..$ : Named num [1:15315] 305.699 302.587 0.792 101.229 88.447 ...
-  .. ..- attr(*, "names")= chr [1:15315] "ENSRNOT00000020358" "ENSRNOT00000020388" "ENSRNOT00000029207" "ENSRNOT00000056429" ...
-  ..$ : Named num [1:1103] 0.253 3395.219 10.551 32.889 0.507 ...
-  .. ..- attr(*, "names")= chr [1:1103] "ENSRNOT00000059696" "ENSRNOT00000046399" "ENSRNOT00000056154" "ENSRNOT00000020379" ...
-  ..$ : Named num [1:404] 985.025 128.12 0 0 0.759 ...
-  .. ..- attr(*, "names")= chr [1:404] "ENSRNOT00000044782" "ENSRNOT00000038046" "ENSRNOT00000052405" "ENSRNOT00000048994" ...
-  ..$ : Named num [1:999] 12.13 637.43 295.65 1.34 3925.9 ...
-  .. ..- attr(*, "names")= chr [1:999] "ENSRNOT00000004311" "ENSRNOT00000064540" "ENSRNOT00000059572" "ENSRNOT00000020652" ...
-  ..$ : Named num [1:592] 59.42 0 2 4.35 13.51 ...
-  .. ..- attr(*, "names")= chr [1:592] "ENSRNOT00000020305" "ENSRNOT00000031749" "ENSRNOT00000037178" "ENSRNOT00000042240" ...
-  ..$ : Named num [1:863] 32.3 65.6 2054.9 0 0 ...
-  .. ..- attr(*, "names")= chr [1:863] "ENSRNOT00000020323" "ENSRNOT00000004326" "ENSRNOT00000004290" "ENSRNOT00000067476" ...
-  ..$ : Named num [1:490] 7.13 212.67 1.14 5.01 57.77 ...
-  .. ..- attr(*, "names")= chr [1:490] "ENSRNOT00000048345" "ENSRNOT00000066009" "ENSRNOT00000063985" "ENSRNOT00000068068" ...
-  ..$ : Named num [1:3943] 233.7 0 49.37 107.08 3.04 ...
-  .. ..- attr(*, "names")= chr [1:3943] "ENSRNOT00000020405" "ENSRNOT00000038103" "ENSRNOT00000067285" "ENSRNOT00000034889" ...
-  ..$ : Named num [1:1559] 51.68 7.12 36.79 1336.85 751.85 ...
-  .. ..- attr(*, "names")= chr [1:1559] "ENSRNOT00000004486" "ENSRNOT00000060705" "ENSRNOT00000043951" "ENSRNOT00000020770" ...
- $ C1EstVar:List of 9
-  ..$ : Named num [1:15315] 1413 1953 0 195 105 ...
-  .. ..- attr(*, "names")= chr [1:15315] "ENSRNOT00000020358" "ENSRNOT00000020388" "ENSRNOT00000029207" "ENSRNOT00000056429" ...
-  ..$ : Named num [1:1103] 4.6 261211.7 0 41.4 0 ...
-  .. ..- attr(*, "names")= chr [1:1103] "ENSRNOT00000059696" "ENSRNOT00000046399" "ENSRNOT00000056154" "ENSRNOT00000020379" ...
-  ..$ : Named num [1:404] 2.24e+04 1.40e+02 1.53e-01 2.56 1.88e-01 ...
-  .. ..- attr(*, "names")= chr [1:404] "ENSRNOT00000044782" "ENSRNOT00000038046" "ENSRNOT00000052405" "ENSRNOT00000048994" ...
-  ..$ : Named num [1:999] 45.4 80103.39 1651.21 1.04 65751.73 ...
-  .. ..- attr(*, "names")= chr [1:999] "ENSRNOT00000004311" "ENSRNOT00000064540" "ENSRNOT00000059572" "ENSRNOT00000020652" ...
-  ..$ : Named num [1:592] 893.893 117.714 0.207 10.162 26.076 ...
-  .. ..- attr(*, "names")= chr [1:592] "ENSRNOT00000020305" "ENSRNOT00000031749" "ENSRNOT00000037178" "ENSRNOT00000042240" ...
-  ..$ : Named num [1:863] 6.11e+02 3.91e+02 2.20e+05 7.51 2.85e-01 ...
-  .. ..- attr(*, "names")= chr [1:863] "ENSRNOT00000020323" "ENSRNOT00000004326" "ENSRNOT00000004290" "ENSRNOT00000067476" ...
-  ..$ : Named num [1:490] 19.81 1267.78 1.87 1.41 3123.1 ...
-  .. ..- attr(*, "names")= chr [1:490] "ENSRNOT00000048345" "ENSRNOT00000066009" "ENSRNOT00000063985" "ENSRNOT00000068068" ...
-  ..$ : Named num [1:3943] 10.1 24.5 23.8 637 199 ...
-  .. ..- attr(*, "names")= chr [1:3943] "ENSRNOT00000020405" "ENSRNOT00000038103" "ENSRNOT00000067285" "ENSRNOT00000034889" ...
-  ..$ : Named num [1:1559] 99.7 1167.7 1072.9 32440 26599 ...
-  .. ..- attr(*, "names")= chr [1:1559] "ENSRNOT00000004486" "ENSRNOT00000060705" "ENSRNOT00000043951" "ENSRNOT00000020770" ...
- $ C2EstVar:List of 9
-  ..$ : Named num [1:15315] 7882.46 1563.99 0.69 59.4 213.5 ...
-  .. ..- attr(*, "names")= chr [1:15315] "ENSRNOT00000020358" "ENSRNOT00000020388" "ENSRNOT00000029207" "ENSRNOT00000056429" ...
-  ..$ : Named num [1:1103] 1.86e-01 2.53e+05 2.98e+02 2.91e+02 7.44e-01 ...
-  .. ..- attr(*, "names")= chr [1:1103] "ENSRNOT00000059696" "ENSRNOT00000046399" "ENSRNOT00000056154" "ENSRNOT00000020379" ...
-  ..$ : Named num [1:404] 325.872 71.975 0 0 0.659 ...
-  .. ..- attr(*, "names")= chr [1:404] "ENSRNOT00000044782" "ENSRNOT00000038046" "ENSRNOT00000052405" "ENSRNOT00000048994" ...
-  ..$ : Named num [1:999] 2.92e+01 5.51e+04 1.21e+04 1.55e-01 1.73e+05 ...
-  .. ..- attr(*, "names")= chr [1:999] "ENSRNOT00000004311" "ENSRNOT00000064540" "ENSRNOT00000059572" "ENSRNOT00000020652" ...
-  ..$ : Named num [1:592] 47 0 10.7 17.7 21.4 ...
-  .. ..- attr(*, "names")= chr [1:592] "ENSRNOT00000020305" "ENSRNOT00000031749" "ENSRNOT00000037178" "ENSRNOT00000042240" ...
-  ..$ : Named num [1:863] 527 426 179461 0 0 ...
-  .. ..- attr(*, "names")= chr [1:863] "ENSRNOT00000020323" "ENSRNOT00000004326" "ENSRNOT00000004290" "ENSRNOT00000067476" ...
-  ..$ : Named num [1:490] 9.58 662.66 1.84 5.54 1579.26 ...
-  .. ..- attr(*, "names")= chr [1:490] "ENSRNOT00000048345" "ENSRNOT00000066009" "ENSRNOT00000063985" "ENSRNOT00000068068" ...
-  ..$ : Named num [1:3943] 787 0 2780 5712.2 26.8 ...
-  .. ..- attr(*, "names")= chr [1:3943] "ENSRNOT00000020405" "ENSRNOT00000038103" "ENSRNOT00000067285" "ENSRNOT00000034889" ...
-  ..$ : Named num [1:1559] 177.6 19.4 441.2 23947.8 29826.6 ...
-  .. ..- attr(*, "names")= chr [1:1559] "ENSRNOT00000004486" "ENSRNOT00000060705" "ENSRNOT00000043951" "ENSRNOT00000020770" ...
- $ PoolVar :List of 9
-  ..$ : Named num [1:15315] 4647.958 1758.495 0.345 127.452 159.023 ...
-  .. ..- attr(*, "names")= chr [1:15315] "ENSRNOT00000020358" "ENSRNOT00000020388" "ENSRNOT00000029207" "ENSRNOT00000056429" ...
-  ..$ : Named num [1:1103] 2.39 2.57e+05 1.49e+02 1.66e+02 3.72e-01 ...
-  .. ..- attr(*, "names")= chr [1:1103] "ENSRNOT00000059696" "ENSRNOT00000046399" "ENSRNOT00000056154" "ENSRNOT00000020379" ...
-  ..$ : Named num [1:404] 1.13e+04 1.06e+02 7.66e-02 1.28 4.23e-01 ...
-  .. ..- attr(*, "names")= chr [1:404] "ENSRNOT00000044782" "ENSRNOT00000038046" "ENSRNOT00000052405" "ENSRNOT00000048994" ...
-  ..$ : Named num [1:999] 3.73e+01 6.76e+04 6.87e+03 5.96e-01 1.19e+05 ...
-  .. ..- attr(*, "names")= chr [1:999] "ENSRNOT00000004311" "ENSRNOT00000064540" "ENSRNOT00000059572" "ENSRNOT00000020652" ...
-  ..$ : Named num [1:592] 470.45 58.86 5.44 13.94 23.73 ...
-  .. ..- attr(*, "names")= chr [1:592] "ENSRNOT00000020305" "ENSRNOT00000031749" "ENSRNOT00000037178" "ENSRNOT00000042240" ...
-  ..$ : Named num [1:863] 5.69e+02 4.08e+02 2.00e+05 3.75 1.42e-01 ...
-  .. ..- attr(*, "names")= chr [1:863] "ENSRNOT00000020323" "ENSRNOT00000004326" "ENSRNOT00000004290" "ENSRNOT00000067476" ...
-  ..$ : Named num [1:490] 14.69 965.22 1.86 3.48 2351.18 ...
-  .. ..- attr(*, "names")= chr [1:490] "ENSRNOT00000048345" "ENSRNOT00000066009" "ENSRNOT00000063985" "ENSRNOT00000068068" ...
-  ..$ : Named num [1:3943] 398.6 12.2 1401.9 3174.6 112.9 ...
-  .. ..- attr(*, "names")= chr [1:3943] "ENSRNOT00000020405" "ENSRNOT00000038103" "ENSRNOT00000067285" "ENSRNOT00000034889" ...
-  ..$ : Named num [1:1559] 139 594 757 28194 28213 ...
-  .. ..- attr(*, "names")= chr [1:1559] "ENSRNOT00000004486" "ENSRNOT00000060705" "ENSRNOT00000043951" "ENSRNOT00000020770" ...
- $ DataList:List of 9
-  ..$ Ng1      : num [1:15315, 1:8] 287 251 0 87 121 181 5 195 70 5 ...
-  .. ..- attr(*, "dimnames")=List of 2
-  .. .. ..$ : chr [1:15315] "I1" "I2" "I3" "I4" ...
-  .. .. ..$ : chr [1:8] "W5-D1" "W5-D2" "W5-D3" "W5-D4" ...
-  ..$ Ng2No5No3: num [1:1103, 1:8] 3 3226 0 27 0 ...
-  .. ..- attr(*, "dimnames")=List of 2
-  .. .. ..$ : chr [1:1103] "I14" "I15" "I16" "I66" ...
-  .. .. ..$ : chr [1:8] "W5-D1" "W5-D2" "W5-D3" "W5-D4" ...
-  ..$ Ng3No5No3: num [1:404, 1:8] 827 153 0 3 1 0 0 0 2 19 ...
-  .. ..- attr(*, "dimnames")=List of 2
-  .. .. ..$ : chr [1:404] "I138" "I190" "I191" "I214" ...
-  .. .. ..$ : chr [1:8] "W5-D1" "W5-D2" "W5-D3" "W5-D4" ...
-  ..$ Ng2With3 : num [1:999, 1:8] 0 945 77 2 3763 ...
-  .. ..- attr(*, "dimnames")=List of 2
-  .. .. ..$ : chr [1:999] "I35" "I52" "I79" "I91" ...
-  .. .. ..$ : chr [1:8] "W5-D1" "W5-D2" "W5-D3" "W5-D4" ...
-  ..$ Ng3With3 : num [1:592, 1:8] 25 25 0 0 17 ...
-  .. ..- attr(*, "dimnames")=List of 2
-  .. .. ..$ : chr [1:592] "I131" "I132" "I222" "I266" ...
-  .. .. ..$ : chr [1:8] "W5-D1" "W5-D2" "W5-D3" "W5-D4" ...
-  ..$ Ng2With5 : num [1:863, 1:8] 36 48 1912 0 1 ...
-  .. ..- attr(*, "dimnames")=List of 2
-  .. .. ..$ : chr [1:863] "I111" "I118" "I135" "I193" ...
-  .. .. ..$ : chr [1:8] "W5-D1" "W5-D2" "W5-D3" "W5-D4" ...
-  ..$ Ng3With5 : num [1:490, 1:8] 3 212 5 2 90 5 256 66 21 23 ...
-  .. ..- attr(*, "dimnames")=List of 2
-  .. .. ..$ : chr [1:490] "I43" "I213" "I336" "I556" ...
-  .. .. ..$ : chr [1:8] "W5-D1" "W5-D2" "W5-D3" "W5-D4" ...
-  ..$ Ng2Both  : num [1:3943, 1:8] 209 11 17 101 0 432 631 0 228 878 ...
-  .. ..- attr(*, "dimnames")=List of 2
-  .. .. ..$ : chr [1:3943] "I13" "I34" "I46" "I47" ...
-  .. .. ..$ : chr [1:8] "W5-D1" "W5-D2" "W5-D3" "W5-D4" ...
-  ..$ Ng3Both  : num [1:1559, 1:8] 0 88 25 1455 506 ...
-  .. ..- attr(*, "dimnames")=List of 2
-  .. .. ..$ : chr [1:1559] "I28" "I29" "I30" "I41" ...
-  .. .. ..$ : chr [1:8] "W5-D1" "W5-D2" "W5-D3" "W5-D4" ...
-}
-\details{
-%%  ~~ If necessary, more details than the __description__ above ~~
-}
-\source{
-%%  ~~ reference to a publication or URL from which the data were obtained ~~
-}
-\references{
-%%  ~~ possibly secondary sources and usages ~~
-}
-\examples{
-data(IsoEBresultGouldBart2)
-## maybe str(IsoEBresultGouldBart2) ; plot(IsoEBresultGouldBart2) ...
-}
-\keyword{datasets}
diff --git a/EBSeq/man/IsoSimu.Rd b/EBSeq/man/IsoSimu.Rd
deleted file mode 100644 (file)
index c8f193d..0000000
+++ /dev/null
@@ -1,101 +0,0 @@
-\name{IsoSimu}
-\alias{IsoSimu}
-%- Also NEED an '\alias' for EACH other topic documented here.
-\title{
-Isoform level simulation
-}
-\description{
-Simulate isoform level expression data from a Negative Binomial assumption. (Without outliers)
-}
-\usage{
-IsoSimu(DVDconstant = NULL, DVDqt1 = NULL, DVDqt2 = NULL, Conditions, NumofSample, NumofIso = NULL, DEIsoProp, Phiconstant = NULL, Phi.qt1 = NULL, Phi.qt2 = NULL, NormFactor = NULL, OnlyData = T)
-}
-%- maybe also 'usage' for other objects documented here.
-\arguments{
-  \item{DVDconstant}{
-Whether want to use constant fold change value for all the DE genes.
-}
-  \item{DVDqt1, DVDqt2}{
-If DVDconstant is not specified, the user could use a range of empirical DVD's f
-rom Gould' data. The lower and upper bound ( quantile) could be specified.
-The suggested value is c(.96, .97). DVD for each gene will be randomly choosed within the range.
-}
-  \item{Conditions}{
-A vector of charecters to show each sample's condition.
-(Only the two-condition case is supported now)
-}
-  \item{NumofSample}{
-Number of samples the user want to generate.
-}
-  \item{NumofIso}{
-Input should be a vector with length 3. All values should be non-negative.
-The ith value represents how many isoforms the user want to generate for isoform group i. 
-}
-  \item{DEIsoProp}{
-The proportion of isoforms to be generated as DE. The value should be in [0, 1].
-}
-  \item{Phiconstant}{
-Whether set the disperse parameter phi to be a constant. If this parameter is specified, the settings of Phi.qt1 and Phi.qt2 will be ignored.
-Input should be a vector with length 3. The ith value indicates the overdisperse parameter of isoform group i.
-}
-  \item{Phi.qt1, Phi.qt2}{
-If Phiconstant is not specified, the user could use a range of empirical phi's from each group of Gould' data. The lower and upper bound ( quantile) could be specified.
-The suggested value is c(.25, .75). phi for each gene will be randomly choosed w
-ithin the range.
-
-}
-  \item{NormFactor}{
-Wether set the mean of each isoform to be a constant.
-}
-  \item{OnlyData}{
-Wether the user only want the generated data matrix. If OnlyData = T, the function will return the simulated matrix
-and the name of the DE genes.
-Otherwise the funtion will run DESeq, EBSeq, edgeR, baySeq and BBSeq and provide the results of each method.
-Currently only OnlyData=T is supported
-}
-}
-\details{
-For each isoform, we assumed that the expression follows a Negative Binomial distribution with mean mu_gi and variance mu_gi * (1 + mu_gi * phi_gi).
-For DE genes, we assumed that in one condition the genes are with mean mu_gi * DVD.
-mu, phi and DVD could be specified by the parameter settings.
-
-}
-\value{
-\item{data}{
-A list of expression values will be generated. 
-Each list represents a group of isoforms.
-Group1: Ng1
-Group2: Ng2
-Group3: Ng3
-The rows refer to the isoforms and the columns are the samples. 
-The isoforms are named "I_GroupNumber_IsoformNumber". The first part of the isoforms of each group will be the DE ones. (The number depends on the DEIsoProp parameter.)
-}
-\item{TrueDE}{The names of the isoforms who are defined to be DE.}
-
-}
-\references{
-%% ~put references to the literature/web site here ~
-}
-\author{
-Ning Leng
-}
-\note{
-%%  ~~further notes~~
-}
-
-%% ~Make other sections like Warning with \section{Warning }{....} ~
-
-\seealso{
-GeneSimu
-}
-\examples{
-##---- Should be DIRECTLY executable !! ----
-##-- ==>  Define data, use random,
-##--   or do  help(data=index)  for the standard data sets.
-IsoGenerate=IsoSimu(DVDconstant=NULL, DVDqt1=.97, DVDqt2=.98, Conditions=as.factor(rep(c(1,2),each=5)), NumofSample=10, NumofIso=NULL, DEIsoProp=.1, Phiconstant=NULL, Phi.qt1=.25, Phi.qt2=.75, OnlyData=T )
-
-}
-% Add one or more standard keywords, see file 'KEYWORDS' in the
-% R documentation directory.
-\keyword{ ~kwd1 }
-\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
diff --git a/EBSeq/man/IsoSimuAt.Rd b/EBSeq/man/IsoSimuAt.Rd
deleted file mode 100644 (file)
index 60fb12e..0000000
+++ /dev/null
@@ -1,107 +0,0 @@
-\name{IsoSimuAt}
-\alias{IsoSimuAt}
-%- Also NEED an '\alias' for EACH other topic documented here.
-\title{
-Isoform level simulation with outliers
-}
-\description{
-Simulate isoform level expression data from a Negative Binomial assumption. (With outliers)
-}
-\usage{
-IsoSimuAt(DVDconstant = NULL, DVDqt1 = NULL, DVDqt2 = NULL, Conditions, NumofSample, NumofIso = NULL, DEIsoProp, Phiconstant = NULL, Phi.qt1 = NULL, Phi.qt2 = NULL, NormFactor = NULL, OnlyData = T)
-}
-%- maybe also 'usage' for other objects documented here.
-\arguments{
-  \item{DVDconstant}{
-Whether want to use constant fold change value for all the DE genes.
-}
-  \item{DVDqt1, DVDqt2}{
-If DVDconstant is not specified, the user could use a range of empirical DVD's f
-rom Gould' data. The lower and upper bound ( quantile) could be specified.
-The suggested value is c(.96, .97). DVD for each gene will be randomly choosed within the range.
-}
-  \item{Conditions}{
-A vector of charecters to show each sample's condition.
-(Only the two-condition case is supported now)
-}
-  \item{NumofSample}{
-Number of samples the user want to generate.
-}
-  \item{NumofIso}{
-Input should be a vector with length 3. All values should be non-negative.
-The ith value represents how many isoforms the user want to generate for isoform group i. 
-}
-  \item{DEIsoProp}{
-The proportion of isoforms to be generated as DE. The value should be in [0, 1].
-Besides, the same proportion of isoforms will be generated as EE isoforms with outlier.
-The genes will be generated as EE at first, then the count of one of the samples
-(randomly selected) will be setted as its original counts multiplied by one of (4, 6, 8, 10).
-
-}
-  \item{Phiconstant}{
-Whether set the disperse parameter phi to be a constant. If this parameter is specified, the settings of Phi.qt1 and Phi.qt2 will be ignored.
-Input should be a vector with length 3. The ith value indicates the overdisperse parameter of isoform group i.
-}
-  \item{Phi.qt1, Phi.qt2}{
-If Phiconstant is not specified, the user could use a range of empirical phi's from each group of Gould' data. The lower and upper bound ( quantile) could be specified.
-The suggested value is c(.25, .75). phi for each gene will be randomly choosed w
-ithin the range.
-
-}
-  \item{NormFactor}{
-Wether set the mean of each isoform to be a constant.
-}
-  \item{OnlyData}{
-Wether the user only want the generated data matrix. If OnlyData = T, the function will return the simulated matrix
-and the name of the DE genes.
-Otherwise the funtion will run DESeq, EBSeq, edgeR, baySeq and BBSeq and provide the results of each method.
-Currently only OnlyData=T is supported
-}
-}
-\details{
-For each isoform, we assumed that the expression follows a Negative Binomial distribution with mean mu_gi and variance mu_gi * (1 + mu_gi * phi_gi).
-For DE genes, we assumed that in one condition the genes are with mean mu_gi * DVD.
-mu, phi and DVD could be specified by the parameter settings.
-
-}
-\value{
-\item{data}{
-A list of expression values will be generated. 
-Each list represents a group of isoforms.
-Group1: Ng1
-Group2: Ng2
-Group3: Ng3
-The rows refer to the isoforms and the columns are the samples. 
-The isoforms are named "I_GroupNumber_IsoformNumber". The first part of the isoforms of each group will be the DE ones. (The number depends on the DEIsoProp parameter.)
-}
-\item{TrueDE}{The names of the isoforms who are defined to be DE.}
-
-\item{Outliers}{The names of the genes who are defined to be outliers at each level of (4,6,8,10).}
-}
-
-\references{
-%% ~put references to the literature/web site here ~
-}
-\author{
-Ning Leng
-}
-\note{
-%%  ~~further notes~~
-}
-
-%% ~Make other sections like Warning with \section{Warning }{....} ~
-
-\seealso{
-GeneSimu, IsoSimu
-}
-\examples{
-##---- Should be DIRECTLY executable !! ----
-##-- ==>  Define data, use random,
-##--   or do  help(data=index)  for the standard data sets.
-IsoGenerate=IsoSimuAt(DVDconstant=NULL, DVDqt1=.97, DVDqt2=.98, Conditions=as.factor(rep(c(1,2),each=5)), NumofSample=10, NumofIso=NULL, DEIsoProp=.1, Phiconstant=NULL, Phi.qt1=.25, Phi.qt2=.75, OnlyData=T )
-
-}
-% Add one or more standard keywords, see file 'KEYWORDS' in the
-% R documentation directory.
-\keyword{ ~kwd1 }
-\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
diff --git a/EBSeq/man/Likefun.Rd b/EBSeq/man/Likefun.Rd
deleted file mode 100644 (file)
index dea5882..0000000
+++ /dev/null
@@ -1,50 +0,0 @@
-\name{Likefun}
-\alias{Likefun}
-%- Also NEED an '\alias' for EACH other topic documented here.
-\title{
-Likelihood Function of the NB-Beta Model
-}
-\description{
-%%  ~~ A concise (1-5 lines) description of what the function does. ~~
-}
-\usage{
-Likefun(ParamPool, InputPool)
-}
-%- maybe also 'usage' for other objects documented here.
-\arguments{
-  \item{ParamPool}{
-%%     ~~Describe \code{ParamPool} here~~
-}
-  \item{InputPool}{
-%%     ~~Describe \code{InputPool} here~~
-}
-}
-\details{
-%%  ~~ If necessary, more details than the description above ~~
-}
-\value{
-%%  ~Describe the value returned
-%%  If it is a LIST, use
-%%  \item{comp1 }{Description of 'comp1'}
-%%  \item{comp2 }{Description of 'comp2'}
-%% ...
-}
-\references{
-%% ~put references to the literature/web site here ~
-}
-\author{
-%%  ~~who you are~~
-}
-\note{
-%%  ~~further notes~~
-}
-
-%% ~Make other sections like Warning with \section{Warning }{....} ~
-
-\seealso{
-%% ~~objects to See Also as \code{\link{help}}, ~~~
-}
-\examples{
-}
-% Add one or more standard keywords, see file 'KEYWORDS' in the
-% R documentation directory.
diff --git a/EBSeq/man/LikefunMulti.Rd b/EBSeq/man/LikefunMulti.Rd
deleted file mode 100644 (file)
index 713311f..0000000
+++ /dev/null
@@ -1,50 +0,0 @@
-\name{LikefunMulti}
-\alias{LikefunMulti}
-%- Also NEED an '\alias' for EACH other topic documented here.
-\title{
-Likelihood Function of the NB-Beta Model
-}
-\description{
-%%  ~~ A concise (1-5 lines) description of what the function does. ~~
-}
-\usage{
-LikefunMulti(ParamPool, InputPool)
-}
-%- maybe also 'usage' for other objects documented here.
-\arguments{
-  \item{ParamPool}{
-%%     ~~Describe \code{ParamPool} here~~
-}
-  \item{InputPool}{
-%%     ~~Describe \code{InputPool} here~~
-}
-}
-\details{
-%%  ~~ If necessary, more details than the description above ~~
-}
-\value{
-%%  ~Describe the value returned
-%%  If it is a LIST, use
-%%  \item{comp1 }{Description of 'comp1'}
-%%  \item{comp2 }{Description of 'comp2'}
-%% ...
-}
-\references{
-%% ~put references to the literature/web site here ~
-}
-\author{
-%%  ~~who you are~~
-}
-\note{
-%%  ~~further notes~~
-}
-
-%% ~Make other sections like Warning with \section{Warning }{....} ~
-
-\seealso{
-%% ~~objects to See Also as \code{\link{help}}, ~~~
-}
-\examples{
-}
-% Add one or more standard keywords, see file 'KEYWORDS' in the
-% R documentation directory.
diff --git a/EBSeq/man/LogN.Rd b/EBSeq/man/LogN.Rd
deleted file mode 100644 (file)
index 3b26607..0000000
+++ /dev/null
@@ -1,73 +0,0 @@
-\name{LogN}
-\alias{LogN}
-%- Also NEED an '\alias' for EACH other topic documented here.
-\title{
-The function to run EM (one round) using optim.
-}
-\description{
-%%  ~~ A concise (1-5 lines) description of what the function does. ~~
-}
-\usage{
-LogN(Input, InputSP, EmpiricalR, EmpiricalRSP, NumOfEachGroup, AlphaIn, BetaIn, PIn, NoneZeroLength)
-}
-%- maybe also 'usage' for other objects documented here.
-\arguments{
-  \item{Input}{
-%%     ~~Describe \code{Input} here~~
-}
-  \item{InputSP}{
-%%     ~~Describe \code{InputSP} here~~
-}
-  \item{EmpiricalR}{
-%%     ~~Describe \code{EmpiricalR} here~~
-}
-  \item{EmpiricalRSP}{
-%%     ~~Describe \code{EmpiricalRSP} here~~
-}
-  \item{NumOfEachGroup}{
-%%     ~~Describe \code{NumOfEachGroup} here~~
-}
-  \item{AlphaIn}{
-%%     ~~Describe \code{AlphaIn} here~~
-}
-  \item{BetaIn}{
-%%     ~~Describe \code{BetaIn} here~~
-}
-  \item{PIn}{
-%%     ~~Describe \code{PIn} here~~
-}
-  \item{NoneZeroLength}{
-%%     ~~Describe \code{NoneZeroLength} here~~
-}
-}
-\details{
-%%  ~~ If necessary, more details than the description above ~~
-}
-\value{
-%%  ~Describe the value returned
-%%  If it is a LIST, use
-%%  \item{comp1 }{Description of 'comp1'}
-%%  \item{comp2 }{Description of 'comp2'}
-%% ...
-}
-\references{
-%% ~put references to the literature/web site here ~
-}
-\author{
-%%  ~~who you are~~
-}
-\note{
-%%  ~~further notes~~
-}
-
-%% ~Make other sections like Warning with \section{Warning }{....} ~
-
-\seealso{
-%% ~~objects to See Also as \code{\link{help}}, ~~~
-}
-\examples{
-}
-% Add one or more standard keywords, see file 'KEYWORDS' in the
-% R documentation directory.
-\keyword{ ~kwd1 }
-\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
diff --git a/EBSeq/man/LogNMulti.Rd b/EBSeq/man/LogNMulti.Rd
deleted file mode 100644 (file)
index 627348c..0000000
+++ /dev/null
@@ -1,73 +0,0 @@
-\name{LogNMulti}
-\alias{LogNMulti}
-%- Also NEED an '\alias' for EACH other topic documented here.
-\title{
-The function to run EM (one round) using optim.
-}
-\description{
-%%  ~~ A concise (1-5 lines) description of what the function does. ~~
-}
-\usage{
-LogNMulti(Input, InputSP, EmpiricalR, EmpiricalRSP, NumOfEachGroup, AlphaIn, BetaIn, PIn, NoneZeroLength,AllParti, Conditions)
-}
-%- maybe also 'usage' for other objects documented here.
-\arguments{
-  \item{Input}{
-%%     ~~Describe \code{Input} here~~
-}
-  \item{InputSP}{
-%%     ~~Describe \code{InputSP} here~~
-}
-  \item{EmpiricalR}{
-%%     ~~Describe \code{EmpiricalR} here~~
-}
-  \item{EmpiricalRSP}{
-%%     ~~Describe \code{EmpiricalRSP} here~~
-}
-  \item{NumOfEachGroup}{
-%%     ~~Describe \code{NumOfEachGroup} here~~
-}
-  \item{AlphaIn}{
-%%     ~~Describe \code{AlphaIn} here~~
-}
-  \item{BetaIn}{
-%%     ~~Describe \code{BetaIn} here~~
-}
-  \item{PIn}{
-%%     ~~Describe \code{PIn} here~~
-}
-  \item{NoneZeroLength}{
-%%     ~~Describe \code{NoneZeroLength} here~~
-}
-}
-\details{
-%%  ~~ If necessary, more details than the description above ~~
-}
-\value{
-%%  ~Describe the value returned
-%%  If it is a LIST, use
-%%  \item{comp1 }{Description of 'comp1'}
-%%  \item{comp2 }{Description of 'comp2'}
-%% ...
-}
-\references{
-%% ~put references to the literature/web site here ~
-}
-\author{
-%%  ~~who you are~~
-}
-\note{
-%%  ~~further notes~~
-}
-
-%% ~Make other sections like Warning with \section{Warning }{....} ~
-
-\seealso{
-%% ~~objects to See Also as \code{\link{help}}, ~~~
-}
-\examples{
-}
-% Add one or more standard keywords, see file 'KEYWORDS' in the
-% R documentation directory.
-\keyword{ ~kwd1 }
-\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
diff --git a/EBSeq/man/MedianNorm.Rd b/EBSeq/man/MedianNorm.Rd
deleted file mode 100644 (file)
index ef8d718..0000000
+++ /dev/null
@@ -1,53 +0,0 @@
-\name{MedianNorm}
-\alias{MedianNorm}
-\title{
-Median Normalization
-}
-\description{
-The median normalization from Anders et. al.2010
-}
-\usage{
-MedianNorm(Data)
-}
-\arguments{
-
-  \item{Data}{
-The data matrix with transcripts in rows and lanes in columns.
-}
-}
-\details{
-%%  ~~ If necessary, more details than the description above ~~
-}
-\value{
-The function will return a vector contains the normalization factor for each lane.
-% ...
-}
-\references{
-Simon Anders and Wolfgang Huber: Differential expression analysis for sequence count data
-Genome Biology (2010) 11:R106 (open access)
-}
-\author{
-Ning Leng
-}
-\note{
-%%  ~~further notes~~
-}
-
-%% ~Make other sections like Warning with \section{Warning }{....} ~
-
-\seealso{
-%% ~~objects to See Also as \code{\link{help}}, ~~~
-}
-\examples{
-GeneGenerate=GeneSimu(DVDconstant=4, DVDqt1=NULL, DVDqt2=NULL, Conditions=rep(c(1,2),each=5), NumofSample=10, NumofGene=10000, DEGeneProp=.1, Phiconstant=NULL, Phi.qt1=.25, Phi.qt2=.75, Meanconstant=NULL, OnlyData="Y")
-GeneData=GeneGenerate$data
-
-Sizes=MedianNorm(GeneData)
-# Run EBSeq
-EBres=EBTest(Data=GeneData, NgVector=rep(1,10^4), Vect5End=rep(1,10^4), Vect3End=rep(1,10^4), Conditions=as.factor(rep(c(1,2),each=5)), sizeFactors=Sizes,maxround=5)
-
-}
-% Add one or more standard keywords, see file 'KEYWORDS' in the
-% R documentation directory.
-\keyword{ ~kwd1 }
-\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
diff --git a/EBSeq/man/MergeGene.Rd b/EBSeq/man/MergeGene.Rd
deleted file mode 100644 (file)
index 77044c3..0000000
+++ /dev/null
@@ -1,61 +0,0 @@
-\name{MergeGene}
-\alias{MergeGene}
-%- Also NEED an '\alias' for EACH other topic documented here.
-\title{
-Plots of gene simulation result
-}
-\description{
-%%  ~~ A concise (1-5 lines) description of what the function does. ~~
-}
-\usage{
-MergeGene(GeneSIMout, Num, Path = "./")
-}
-%- maybe also 'usage' for other objects documented here.
-\arguments{
-  \item{GeneSIMout}{
-The output of GeneSimu with OnlyData="F".
-}
-  \item{Num}{
-How many times the simulation ran.
-}
-  \item{Path}{
-The path to store the plots
-}
-}
-\details{
-%%  ~~ If necessary, more details than the description above ~~
-}
-\value{
-3 plots will be generated.
-1 FPR vs TPR of each method
-2 FDR vs TPR of each method
-2 Top counts vs FDR of each method
-
-A table will be generated which contains the FDR and TPR of each method.
-(Using p-value=.05 or Posterior Probability=.95).
-
-}
-\references{
-%% ~put references to the literature/web site here ~
-}
-\author{
-Ning Leng
-}
-\note{
-%%  ~~further notes~~
-}
-
-%% ~Make other sections like Warning with \section{Warning }{....} ~
-
-\seealso{
-IsoMerge
-}
-\examples{
-GeneGenerate=GeneSimu(DVDconstant=4, DVDqt1=NULL, DVDqt2=NULL, Conditions=rep(c(1,2),each=5), NumofSample=10, NumofGene=10000, DEGeneProp=.1, Phiconstant=NULL, Phi.qt1=.25, Phi.qt2=.75, Meanconstant=NULL, OnlyData="F")
-
-GeneTable=MergeGene(GeneGenerate,1,"./")
-}
-% Add one or more standard keywords, see file 'KEYWORDS' in the
-% R documentation directory.
-\keyword{ ~kwd1 }
-\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
diff --git a/EBSeq/man/MergeIso.Rd b/EBSeq/man/MergeIso.Rd
deleted file mode 100644 (file)
index 4ace949..0000000
+++ /dev/null
@@ -1,63 +0,0 @@
-\name{MergeIso}
-\alias{MergeIso}
-%- Also NEED an '\alias' for EACH other topic documented here.
-\title{
-Isoforms of gene simulation result
-}
-\description{
-}
-\usage{
-MergeIso(IsoSIMout, Num, Path = "./")
-}
-%- maybe also 'usage' for other objects documented here.
-\arguments{
-  \item{IsoSIMout}{
-The output of IsoSimu with OnlyData="F".
-}
-  \item{Num}{
-How many times the simulation ran.
-
-}
-  \item{Path}{
-         The path to store the plots.
-}
-}
-\details{
-%%  ~~ If necessary, more details than the description above ~~
-}
-\value{
-3 plots will be generated.
-1 FPR vs TPR of each method
-2 FDR vs TPR of each method
-2 Top counts vs FDR of each method
-
-A table will be generated which contains the FDR and TPR of each method.
-Each method will be ran on all the data and within group.
-(Using p-value=.05 or Posterior Probability=.95).
-
-
-}
-\references{
-%% ~put references to the literature/web site here ~
-}
-\author{
-Ning Leng
-}
-\note{
-%%  ~~further notes~~
-}
-
-%% ~Make other sections like Warning with \section{Warning }{....} ~
-
-\seealso{
-MergeGene
-}
-\examples{
-IsoGenerate=IsoSimu(DVDconstant=NULL, DVDqt1=.97, DVDqt2=.98, Conditions=as.factor(rep(c(1,2),each=5)), NumofSample=10, NumofIso=NULL, DEIsoProp=.1, Phiconstant=NULL, Phi.qt1=.25, Phi.qt2=.75, OnlyData="F" )
-
-IsoTable=MergeIso(IsoGenerate,1,"./")
-}
-% Add one or more standard keywords, see file 'KEYWORDS' in the
-% R documentation directory.
-\keyword{ ~kwd1 }
-\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
diff --git a/EBSeq/man/PlotFDTP.Rd b/EBSeq/man/PlotFDTP.Rd
deleted file mode 100644 (file)
index e3fff39..0000000
+++ /dev/null
@@ -1,71 +0,0 @@
-\name{PlotFDTP}
-\alias{PlotFDTP}
-%- Also NEED an '\alias' for EACH other topic documented here.
-\title{
-Plot the FDR vs TPR for each method in simulation data
-}
-\description{
-%%  ~~ A concise (1-5 lines) description of what the function does. ~~
-}
-\usage{
-PlotFDTP(TopNum, FDR, TPR, names)
-}
-%- maybe also 'usage' for other objects documented here.
-\arguments{
-  \item{TopNum}{
-%%     ~~Describe \code{TopNum} here~~
-}
-  \item{FDR}{
-%%     ~~Describe \code{FDR} here~~
-}
-  \item{TPR}{
-%%     ~~Describe \code{TPR} here~~
-}
-  \item{names}{
-%%     ~~Describe \code{names} here~~
-}
-}
-\details{
-%%  ~~ If necessary, more details than the description above ~~
-}
-\value{
-%%  ~Describe the value returned
-%%  If it is a LIST, use
-%%  \item{comp1 }{Description of 'comp1'}
-%%  \item{comp2 }{Description of 'comp2'}
-%% ...
-}
-\references{
-%% ~put references to the literature/web site here ~
-}
-\author{
-%%  ~~who you are~~
-}
-\note{
-%%  ~~further notes~~
-}
-
-%% ~Make other sections like Warning with \section{Warning }{....} ~
-
-\seealso{
-%% ~~objects to See Also as \code{\link{help}}, ~~~
-}
-\examples{
-##---- Should be DIRECTLY executable !! ----
-##-- ==>  Define data, use random,
-##--   or do  help(data=index)  for the standard data sets.
-
-## The function is currently defined as
-function(TopNum, FDR, TPR,names)
-{
-  
-  matplot(FDR, TPR, xlim=c(0,.5), ylim=c(0,1) ,type="l",lwd=2,xlab="FDR", ylab="TPR")
-    legend("bottomright",col=1:TopNum, lwd=2, lty=1:TopNum, names)
-
-
-  }
-}
-% Add one or more standard keywords, see file 'KEYWORDS' in the
-% R documentation directory.
-\keyword{ ~kwd1 }
-\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
diff --git a/EBSeq/man/PlotFPTP.Rd b/EBSeq/man/PlotFPTP.Rd
deleted file mode 100644 (file)
index 8e10aba..0000000
+++ /dev/null
@@ -1,71 +0,0 @@
-\name{PlotFPTP}
-\alias{PlotFPTP}
-%- Also NEED an '\alias' for EACH other topic documented here.
-\title{
-Plot the FPR vs TPR for each method in simulation data
-}
-\description{
-%%  ~~ A concise (1-5 lines) description of what the function does. ~~
-}
-\usage{
-PlotFPTP(TopNum, FPR, TPR, names)
-}
-%- maybe also 'usage' for other objects documented here.
-\arguments{
-  \item{TopNum}{
-%%     ~~Describe \code{TopNum} here~~
-}
-  \item{FPR}{
-%%     ~~Describe \code{FPR} here~~
-}
-  \item{TPR}{
-%%     ~~Describe \code{TPR} here~~
-}
-  \item{names}{
-%%     ~~Describe \code{names} here~~
-}
-}
-\details{
-%%  ~~ If necessary, more details than the description above ~~
-}
-\value{
-%%  ~Describe the value returned
-%%  If it is a LIST, use
-%%  \item{comp1 }{Description of 'comp1'}
-%%  \item{comp2 }{Description of 'comp2'}
-%% ...
-}
-\references{
-%% ~put references to the literature/web site here ~
-}
-\author{
-%%  ~~who you are~~
-}
-\note{
-%%  ~~further notes~~
-}
-
-%% ~Make other sections like Warning with \section{Warning }{....} ~
-
-\seealso{
-%% ~~objects to See Also as \code{\link{help}}, ~~~
-}
-\examples{
-##---- Should be DIRECTLY executable !! ----
-##-- ==>  Define data, use random,
-##--   or do  help(data=index)  for the standard data sets.
-
-## The function is currently defined as
-function(TopNum, FPR, TPR,names)
-{
-        
-         matplot(FPR, TPR,xlim=c(0,.1), ylim=c(0,1) ,type="l",lwd=2, xlab="FPR", ylab="TPR")
-             legend("bottomright",col=1:TopNum,lwd=2, lty=1:TopNum, names)
-
-
-  }
-}
-% Add one or more standard keywords, see file 'KEYWORDS' in the
-% R documentation directory.
-\keyword{ ~kwd1 }
-\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
diff --git a/EBSeq/man/PlotPattern.Rd b/EBSeq/man/PlotPattern.Rd
deleted file mode 100644 (file)
index 9b13845..0000000
+++ /dev/null
@@ -1,43 +0,0 @@
-\name{PlotPattern}
-\alias{PlotPattern}
-%- Also NEED an '\alias' for EACH other topic documented here.
-\title{
-Visualize the patterns
-}
-\description{
-visualize the patterns
-}
-\usage{
-PlotPattern(PosParti)
-}
-%- maybe also 'usage' for other objects documented here.
-\arguments{
-  \item{PosParti}{
-The output of GetPatterns function.
-}
-
-}
-\details{
-%%  ~~ If necessary, more details than the description above ~~
-}
-\value{
-A heatmap to visualize the patterns of interest.
-}
-\references{
-}
-\author{
-Ning Leng
-}
-
-%% ~Make other sections like Warning with \section{Warning }{....} ~
-
-\seealso{
-}
-\examples{
-Conditions=c("C1","C1","C2","C2","C3","C3")
-PosParti=GetPatterns(Conditions)
-PlotPattern(PosParti)
-}
-% Add one or more standard keywords, see file 'KEYWORDS' in the
-% R documentation directory.
-\keyword{ }
diff --git a/EBSeq/man/PlotTopCts.Rd b/EBSeq/man/PlotTopCts.Rd
deleted file mode 100644 (file)
index 4c3d187..0000000
+++ /dev/null
@@ -1,66 +0,0 @@
-\name{PlotTopCts}
-\alias{PlotTopCts}
-%- Also NEED an '\alias' for EACH other topic documented here.
-\title{
-Plot 3 plots for simulation data
-}
-\description{
-%%  ~~ A concise (1-5 lines) description of what the function does. ~~
-}
-\usage{
-PlotTopCts(TopNum, FD, names)
-}
-%- maybe also 'usage' for other objects documented here.
-\arguments{
-  \item{TopNum}{
-%%     ~~Describe \code{TopNum} here~~
-}
-  \item{FD}{
-%%     ~~Describe \code{FD} here~~
-}
-  \item{names}{
-%%     ~~Describe \code{names} here~~
-}
-}
-\details{
-%%  ~~ If necessary, more details than the description above ~~
-}
-\value{
-%%  ~Describe the value returned
-%%  If it is a LIST, use
-%%  \item{comp1 }{Description of 'comp1'}
-%%  \item{comp2 }{Description of 'comp2'}
-%% ...
-}
-\references{
-%% ~put references to the literature/web site here ~
-}
-\author{
-%%  ~~who you are~~
-}
-\note{
-%%  ~~further notes~~
-}
-
-%% ~Make other sections like Warning with \section{Warning }{....} ~
-
-\seealso{
-%% ~~objects to See Also as \code{\link{help}}, ~~~
-}
-\examples{
-##---- Should be DIRECTLY executable !! ----
-##-- ==>  Define data, use random,
-##--   or do  help(data=index)  for the standard data sets.
-
-## The function is currently defined as
-function(TopNum, FD, names)
-{
-    matplot(c(1:TopNum) , FD,type="l",xlab="Top DE selected", lwd=2, log="y", ylab="FD")
-    legend("topleft",col=1:TopNum, lwd=2, lty=1:TopNum, names)
-
-  }
-}
-% Add one or more standard keywords, see file 'KEYWORDS' in the
-% R documentation directory.
-\keyword{ ~kwd1 }
-\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
diff --git a/EBSeq/man/PolyFitPlot.Rd b/EBSeq/man/PolyFitPlot.Rd
deleted file mode 100644 (file)
index 491f3ee..0000000
+++ /dev/null
@@ -1,84 +0,0 @@
-\name{PolyFitPlot}
-\alias{PolyFitPlot}
-%- Also NEED an '\alias' for EACH other topic documented here.
-\title{
-%%  ~~function to do ... ~~
-Fit the mean-var relationship using polynomial regression
-}
-\description{
-%%  ~~ A concise (1-5 lines) description of what the function does. ~~
-}
-\usage{
-PolyFitPlot(X, Y, nterms, xname = "Estimated Mean", yname = "Estimated Var", pdfname = "", xlim =  c(-1,5), ylim = c(-1,7), ChangeXY = F, col = "red")
-}
-%- maybe also 'usage' for other objects documented here.
-\arguments{
-  \item{X}{
-The first group of values want to be fitted by the polynomial regression. ( e.g Mean of the data. )
-}
-  \item{Y}{
-The second group of values want to be fitted by the polynomial regression. ( e.g. variance of the data.) The length of Y should be the same as the length of X.
-}
-  \item{nterms}{
-How many polynomial terms want to be used.
-}
-  \item{xname}{
-Name of the x axis.
-}
-  \item{yname}{
-Name of the y axis.
-}
-  \item{pdfname}{
-Name of the plot.
-}
-  \item{xlim}{
-The x limits of the plot. 
-}
-  \item{ylim}{
-The y limits of the plot.
-
-}
-  \item{ChangeXY}{
-If ChangeXY is setted to be TRUE, X will be treated as the dependent variable and Y will be treated as the independent one. Default is FALSE.
-}
-  \item{col}{
-Color of the fitted line.
-}
-}
-\details{
-%%  ~~ If necessary, more details than the description above ~~
-}
-\value{
-       The PolyFitPlot function provides a smooth scatter plot of two variables and their best fitting line of polynomial regression.
-%%  ~Describe the value returned
-%%  If it is a LIST, use
-%%  \item{comp1 }{Description of 'comp1'}
-%%  \item{comp2 }{Description of 'comp2'}
-%% ...
-}
-\references{
-%% ~put references to the literature/web site here ~
-}
-\author{
-Ning Leng
-}
-\note{
-%%  ~~further notes~~
-}
-
-%% ~Make other sections like Warning with \section{Warning }{....} ~
-
-\seealso{
-%% ~~objects to See Also as \code{\link{help}}, ~~~
-}
-\examples{
-GeneData=GeneSimu(DVDconstant=4, DVDqt1=NULL, DVDqt2=NULL, Conditions=rep(c(1,2),each=5), NumofSample=10, NumofGene=10000, DEGeneProp=.1, Phiconstant=NULL, Phi.qt1=.25, Phi.qt2=.75, Meanconstant=NULL, OnlyData="Y")
-
-EBres=NBBetaEB.bias.uniqueP_PoolVarSpeedUp_MDFPoi(Data=GeneData,NgVector=rep(1,10^4), Vect5End=rep(1,10^4), Vect3End=rep(1,10^4), Conditions=as.factor(rep(c(1,2),each=5)), maxround=5)
-
-poly=PolyFitPlot(X=EBres$MeanList[[1]], Y=EBres$PoolVar[[1]], nterms=5, xname = "mean", yname = "var", pdfname=NULL, xlim = c(0,4.5),ylim = c(-2,8), ChangeXY = F, col = "red")
-}
-% Add one or more standard keywords, see file 'KEYWORDS' in the
-% R documentation directory.
-\keyword{ ~kwd1 }
-\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
diff --git a/EBSeq/man/PoolMatrix.Rd b/EBSeq/man/PoolMatrix.Rd
deleted file mode 100644 (file)
index b2c46d5..0000000
+++ /dev/null
@@ -1,82 +0,0 @@
-\name{PoolMatrix}
-\alias{PoolMatrix}
-%- Also NEED an '\alias' for EACH other topic documented here.
-\title{
-Generate the expression matrix from the output of GetData
-}
-\description{
-%%  ~~ A concise (1-5 lines) description of what the function does. ~~
-}
-\usage{
-PoolMatrix(Data, reads, type)
-}
-%- maybe also 'usage' for other objects documented here.
-\arguments{
-  \item{Data}{
-The output from GetData function.
-}
-  \item{reads}{
-The total number of reads in each lane. Could be obtained from the RSEM outputs. 
-}
-  \item{type}{
-If type="S", the outputs will be the a matrix which transcript names in row and sample names in column.
-If type="G", the first column will be the group information. 
-
-}
-}
-\details{
-%%  ~~ If necessary, more details than the description above ~~
-}
-\value{
-\item{PoolM}{The matrix of nu values}
-\item{PoolValue}{The matrix of expression values}
-}
-\references{
-%% ~put references to the literature/web site here ~
-}
-\author{
-Ning Leng
-}
-\note{
-%%  ~~further notes~~
-}
-
-%% ~Make other sections like Warning with \section{Warning }{....} ~
-
-\seealso{
-GetData
-}
-\examples{
-##---- Should be DIRECTLY executable !! ----
-##-- ==>  Define data, use random,
-##--   or do  help(data=index)  for the standard data sets.
-
-## The function is currently defined as
-function(Data,reads,type)
-{
-poolnames=names(Data)
-poolM=NULL
-for (po in 1:8)
-       poolM=cbind(poolM,Data[[po]][,1])
-rownames(poolM)=rownames(Data[[1]])
-colnames(poolM)=poolnames
-
-#poolValue=poolM*reads
-poolValue=poolM
-for (col in 1:8)
-       poolValue[,col]=poolM[,col]*reads[col]
-poolValue=round(poolValue)
-if (type=="G")
-       {
-               poolM=cbind(Data[[1]][,2],poolM)
-               poolValue=cbind(Data[[1]][,2],poolValue)
-               colnames(poolM)=c("Groups",poolnames)
-               colnames(poolValue)=c("Groups",poolnames)
-       }
-poolOutput=list(poolM=poolM,poolValue=poolValue)
-  }
-}
-% Add one or more standard keywords, see file 'KEYWORDS' in the
-% R documentation directory.
-\keyword{ ~kwd1 }
-\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
diff --git a/EBSeq/man/PostFC.Rd b/EBSeq/man/PostFC.Rd
deleted file mode 100644 (file)
index 19118aa..0000000
+++ /dev/null
@@ -1,47 +0,0 @@
-\name{PostFC}
-\alias{PostFC}
-\title{
-Calculate the posterior fold change for each transcript across conditions
-}
-\description{
-}
-\usage{
-PostFC(EBoutput)
-}
-\arguments{
-
-  \item{EBoutput}{
-The ourput from function EBTest. (Currently only at gene level)
-}
-}
-\details{
-%%  ~~ If necessary, more details than the description above ~~
-}
-\value{
-%%  ~Describe the value returned
-%%  If it is a LIST, use
-%%  \item{comp1 }{Description of 'comp1'}
-%%  \item{comp2 }{Description of 'comp2'}
-%% ...
-}
-\references{
-%% ~put references to the literature/web site here ~
-}
-\author{
-%%  ~~who you are~~
-}
-\note{
-%%  ~~further notes~~
-}
-
-%% ~Make other sections like Warning with \section{Warning }{....} ~
-
-\seealso{
-%% ~~objects to See Also as \code{\link{help}}, ~~~
-}
-\examples{
-}
-% Add one or more standard keywords, see file 'KEYWORDS' in the
-% R documentation directory.
-\keyword{ ~kwd1 }
-\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
diff --git a/EBSeq/man/QQP.Rd b/EBSeq/man/QQP.Rd
deleted file mode 100644 (file)
index 6fdde4e..0000000
+++ /dev/null
@@ -1,81 +0,0 @@
-\name{QQP}
-\alias{QQP}
-%- Also NEED an '\alias' for EACH other topic documented here.
-\title{
-The QQ Plot of empirical q's and simulated q's from fitted beta distribution
-}
-\description{
-
-}
-\usage{
-QQP(QList, AlphaResult, BetaResult, name, AList="F", GroupName)
-}
-%- maybe also 'usage' for other objects documented here.
-\arguments{
-  \item{QList}{
-The estimated q's from the output of NBBetaEB.bias.uniqueP_PoolVarSpeedUp_MDFPoi. Input could be a vector or a list of different groups of transcripts. The number of lists here should be the same as the length of BetaResult.
-
-}
-  \item{AlphaResult}{
-The fitted parameter alpha from the output of NBBetaEB.bias.uniqueP_PoolVarSpeedUp_MDFPoi. Input should be a number if AList is not defined.
-}
-  \item{BetaResult}{
-The fitted parameter beta from the output of NBBetaEB.bias.uniqueP_PoolVarSpeedUp_MDFPoi. Input could be one single number or a vector of several numbers. The length of the input should be the same as the number of lists of QList.
-}
-  \item{name}{
-The name of the plots
-}
-  \item{AList}{
-Whether a list of alpha's are used
-}
-  \item{GroupName}{
-The names of each sub plot. The l
-ength of the input should be the same as the number of lists of QList.
-}
-}
-\details{
-%%  ~~ If necessary, more details than the description above ~~
-}
-\value{
-%%  ~Describe the value returned
-%%  If it is a LIST, use
-%%  \item{comp1 }{Description of 'comp1'}
-%%  \item{comp2 }{Description of 'comp2'}
-%% ...
-}
-\references{
-%% ~put references to the literature/web site here ~
-}
-\author{
-Ning Leng
-}
-\note{
-%%  ~~further notes~~
-}
-
-%% ~Make other sections like Warning with \section{Warning }{....} ~
-
-\seealso{
- NBBetaEB.bias.uniqueP_PoolVarSpeedUp_MDFPoi , DenNHist
-}
-\examples{
-GeneData=GeneSimu(DVDconstant=4, DVDqt1=NULL, DVDqt2=NULL, Conditions=rep(c(1,2),each=5), NumofSample=10, NumofGene=10000, DEGeneProp=.1, Phiconstant=NULL, Phi.qt1=.25, Phi.qt2=.75, Meanconstant=NULL, OnlyData="Y")
-
-EBres=NBBetaEB.bias.uniqueP_PoolVarSpeedUp_MDFPoi(Data=GeneData, NgVector=rep(1,10^4), Vect5End=rep(1,10^4), Vect3End=rep(1,10^4), Conditions=as.factor(rep(c(1,2),each=5)), maxround=5)
-
-QQP(QList=EBres$QList1, AlphaResult=EBres[[1]][5,1], BetaResult=EBres[[2]][5,1], name="Gene", AList="F", GroupName=NULL)
-
-## The function is currently defined as
-function(QList,AlphaResult,BetaResult,name,AList="F",GroupName){
-                   for (i in 1:length(BetaResult)){
-                               tmpSize=length(QList[[i]][QList[[i]]<1])
-                       if (AList=="F") rdpts=rbeta(tmpSize,AlphaResult,BetaResult[i])
-                               else rdpts=rbeta(tmpSize,AlphaResult[i],BetaResult[i])
-       qqplot(QList[[i]][QList[[i]]<1], rdpts, xlab="estimated q's", "simulated q's from fitted beta distribution",main=paste(names(name,GroupName[i],sep=" "),xlim=c(0,1),ylim=c(0,1))
-                       }
-  }
-}
-% Add one or more standard keywords, see file 'KEYWORDS' in the
-% R documentation directory.
-\keyword{ ~kwd1 }
-\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
diff --git a/EBSeq/man/QuantileNorm.Rd b/EBSeq/man/QuantileNorm.Rd
deleted file mode 100644 (file)
index 5d9ec77..0000000
+++ /dev/null
@@ -1,54 +0,0 @@
-\name{QuantileNorm}
-\alias{QuantileNorm}
-\title{
-Quantile Normalization
-}
-\description{
-The Quantile normalization
-}
-\usage{
-QuantileNorm(Data, Quantile)
-}
-\arguments{
-
-  \item{Data}{
-The data matrix with transcripts in rows and lanes in columns.
-}
-\item{Quantile}{
-The quantile the user wishs to use. Should be a number between 0 and 1.        
-}
-}
-\details{
-%%  ~~ If necessary, more details than the description above ~~
-Use a quantile point to normalize the data.
-}
-\value{
-The function will return a vector contains the normalization factor for each lane.
-% ...
-}
-\references{}
-\author{
-Ning Leng
-}
-\note{
-%%  ~~further notes~~
-}
-
-%% ~Make other sections like Warning with \section{Warning }{....} ~
-
-\seealso{
-%% ~~objects to See Also as \code{\link{help}}, ~~~
-}
-\examples{
-GeneGenerate=GeneSimu(DVDconstant=4, DVDqt1=NULL, DVDqt2=NULL, Conditions=rep(c(1,2),each=5), NumofSample=10, NumofGene=10000, DEGeneProp=.1, Phiconstant=NULL, Phi.qt1=.25, Phi.qt2=.75, Meanconstant=NULL, OnlyData="Y")
-GeneData=GeneGenerate$data
-
-Sizes=QuantileNorm(GeneData)
-# Run EBSeq
-EBres=EBTest(Data=GeneData, NgVector=rep(1,10^4), Vect5End=rep(1,10^4), Vect3End=rep(1,10^4), Conditions=as.factor(rep(c(1,2),each=5)), sizeFactors=Sizes,maxround=5)
-
-}
-% Add one or more standard keywords, see file 'KEYWORDS' in the
-% R documentation directory.
-\keyword{ ~kwd1 }
-\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
diff --git a/EBSeq/man/RankNorm.Rd b/EBSeq/man/RankNorm.Rd
deleted file mode 100644 (file)
index 5fd223e..0000000
+++ /dev/null
@@ -1,50 +0,0 @@
-\name{RankNorm}
-\alias{RankNorm}
-\title{
-Rank Normalization
-}
-\description{
-}
-\usage{
-RankNorm(Data)
-}
-\arguments{
-
-  \item{Data}{
-The data matrix with transcripts in rows and lanes in columns.
-}
-}
-\details{
-%%  ~~ If necessary, more details than the description above ~~
-}
-\value{
-The function will return a matrix contains the normalization factor for each lane and each transcript.
-% ...
-}
-\references{
-}
-\author{
-Ning Leng
-}
-\note{
-%%  ~~further notes~~
-}
-
-%% ~Make other sections like Warning with \section{Warning }{....} ~
-
-\seealso{
-%% ~~objects to See Also as \code{\link{help}}, ~~~
-}
-\examples{
-GeneGenerate=GeneSimu(DVDconstant=4, DVDqt1=NULL, DVDqt2=NULL, Conditions=rep(c(1,2),each=5), NumofSample=10, NumofGene=10000, DEGeneProp=.1, Phiconstant=NULL, Phi.qt1=.25, Phi.qt2=.75, Meanconstant=NULL, OnlyData="Y")
-GeneData=GeneGenerate$data
-
-Sizes=RankNorm(GeneData)
-# Run EBSeq
-EBres=EBTest(Data=GeneData, NgVector=rep(1,10^4), Vect5End=rep(1,10^4), Vect3End=rep(1,10^4), Conditions=as.factor(rep(c(1,2),each=5)), sizeFactors=Sizes,maxround=5)
-
-}
-% Add one or more standard keywords, see file 'KEYWORDS' in the
-% R documentation directory.
-\keyword{ ~kwd1 }
-\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
diff --git a/EBSeq/man/TPFDRplot.Rd b/EBSeq/man/TPFDRplot.Rd
deleted file mode 100644 (file)
index 15e1caa..0000000
+++ /dev/null
@@ -1,103 +0,0 @@
-\name{TPFDRplot}
-\alias{TPFDRplot}
-%- Also NEED an '\alias' for EACH other topic documented here.
-\title{
-Plot the number of top counts vs FDR for each method in simulation data
-}
-\description{
-%%  ~~ A concise (1-5 lines) description of what the function does. ~~
-}
-\usage{
-TPFDRplot(DESeqP, EBZ, TrueDE, main, FDR = NULL)
-}
-%- maybe also 'usage' for other objects documented here.
-\arguments{
-  \item{DESeqP}{
-%%     ~~Describe \code{DESeqP} here~~
-}
-  \item{EBZ}{
-%%     ~~Describe \code{EBZ} here~~
-}
-  \item{TrueDE}{
-%%     ~~Describe \code{TrueDE} here~~
-}
-  \item{main}{
-%%     ~~Describe \code{main} here~~
-}
-  \item{FDR}{
-%%     ~~Describe \code{FDR} here~~
-}
-}
-\details{
-%%  ~~ If necessary, more details than the description above ~~
-}
-\value{
-%%  ~Describe the value returned
-%%  If it is a LIST, use
-%%  \item{comp1 }{Description of 'comp1'}
-%%  \item{comp2 }{Description of 'comp2'}
-%% ...
-}
-\references{
-%% ~put references to the literature/web site here ~
-}
-\author{
-%%  ~~who you are~~
-}
-\note{
-%%  ~~further notes~~
-}
-
-%% ~Make other sections like Warning with \section{Warning }{....} ~
-
-\seealso{
-%% ~~objects to See Also as \code{\link{help}}, ~~~
-}
-\examples{
-##---- Should be DIRECTLY executable !! ----
-##-- ==>  Define data, use random,
-##--   or do  help(data=index)  for the standard data sets.
-
-## The function is currently defined as
-function(DESeqP, EBZ, TrueDE, main, FDR=NULL){
-       Seq=seq(0.001,0.5,by=0.001)
-       DETPR=rep(0,length(Seq))
-       EBTPR=rep(0,length(Seq))
-       DEFDR=rep(0,length(Seq))
-       EBFDR=rep(0,length(Seq))
-       DETPNum=rep(0,length(Seq))
-    EBTPNum=rep(0,length(Seq))
-    DEFDNum=rep(0,length(Seq))
-    EBFDNum=rep(0,length(Seq))
-       for (i in 1:length(Seq)){
-               DESeqOnes=names(DESeqP)[DESeqP<=Seq[i]]
-               if (length(FDR)==0) EBOnes=names(EBZ)[EBZ>=crit.fun(1-EBZ, Seq[i])]
-               else if (FDR=="H") EBOnes=names(EBZ)[EBZ>=(1-Seq[i])]
-                       else EBOnes=names(EBZ)[EBZ>=FDR[i]]
-
-               DETPNum[i]=sum(DESeqOnes\%in\%TrueDE)
-               EBTPNum[i]=sum(EBOnes\%in\%TrueDE)
-               DEFDNum[i]=sum(!DESeqOnes\%in\%TrueDE)
-               EBFDNum[i]=sum(!EBOnes\%in\%TrueDE)
-               
-               DETPR[i]=DETPNum[i]/length(TrueDE)
-               EBTPR[i]=EBTPNum[i]/length(TrueDE)
-               DEFDR[i]=DEFDNum[i]/length(TrueDE)
-               EBFDR[i]=EBFDNum[i]/length(TrueDE)
-       }
-       plot(Seq,DETPR,ylim=c(0,1),xlim=c(0,.5),type="l",col="red", main=paste(main, "TPR"),xlab="controled FDR level", ylab="TPR",lwd=2)
-       lines(Seq,EBTPR,col="blue",lwd=2)
-       legend("bottomright",lwd=2, col=c("red","blue"), c("DESeq","EBSeq"))
-
-       plot(Seq,DEFDR,ylim=c(0,1),xlim=c(0,.5),type="l",col="red", main=paste(main, "FDR"),xlab="controled FDR level", ylab="TPR",lwd=2)
-       lines(Seq,EBFDR,col="blue",lwd=2)
-       legend("topleft", lwd=2, col=c("red","blue"), c("DESeq","EBSeq"))
-
-
-       output=cbind( DETPR,EBTPR, DEFDR,EBFDR,DETPNum,EBTPNum,DEFDNum,EBFDNum)
-  }
-}
-% Add one or more standard keywords, see file 'KEYWORDS' in the
-% R documentation directory.
-\keyword{ ~kwd1 }
-\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
diff --git a/EBSeq/man/TopCts.Rd b/EBSeq/man/TopCts.Rd
deleted file mode 100644 (file)
index bf57d8d..0000000
+++ /dev/null
@@ -1,85 +0,0 @@
-\name{TopCts}
-\alias{TopCts}
-%- Also NEED an '\alias' for EACH other topic documented here.
-\title{
-Get FDR of Top Counts 
-}
-\description{
-
-}
-\usage{
-TopCts(pvalue, PP = NULL, TrueNames, TopNum)
-}
-%- maybe also 'usage' for other objects documented here.
-\arguments{
-  \item{pvalue}{
-A matrix contains the p values (Posterior probabilities) for each transcript and each method.
-Rows are for different methods and columns are for different transcripts.
-}
-  \item{PP}{
-The length of PP vector should be the same as the number of columns in pvalue matrix.
-The value in PP either 0 or 1.
-If the ith value of PP is 0, it means the ith method (the ith row of pvalue) provided p-values.
-If the ith value of PP is 1, it means the ith method (the ith row of pvalue) provided posterior probabilities. 
-}
-  \item{TrueNames}{
-The names of the transcripts who defined to be DE.
-}
-  \item{TopNum}{
-The number of top counts we are interested in.
-For example, if TopNum=1000, we'll calculate the FDR's of each method if we pick the top 1, 2, ... 1000 genes.
-}
-}
-\details{
-%%  ~~ If necessary, more details than the description above ~~
-}
-\value{
-A metrix contains the FDR's.
-}
-\references{
-%% ~put references to the literature/web site here ~
-}
-\author{
-%%  ~~who you are~~
-}
-\note{
-%%  ~~further notes~~
-}
-
-%% ~Make other sections like Warning with \section{Warning }{....} ~
-
-\seealso{
-%% ~~objects to See Also as \code{\link{help}}, ~~~
-}
-\examples{
-##---- Should be DIRECTLY executable !! ----
-##-- ==>  Define data, use random,
-##--   or do  help(data=index)  for the standard data sets.
-
-## The function is currently defined as
-function(pvalue, PP=NULL, TrueNames, TopNum){
-       NumOfMethods=ncol(pvalue)
-       puse=pvalue
-       if(1\%in\%PP)puse[,PP==1]=1-pvalue[,PP==1]
-       #puse.list=data.frame(puse)
-       FD=matrix(rep(0,NumOfMethods*TopNum),ncol=NumOfMethods)
-#      Rank=apply(puse,2,rank)
-#      for(i in 1:TopNum)
-#              FD[i,]=sapply(1:NumOfMethods, function(j)sum(!rownames(Rank)[Rank[,j]<=i]\%in\%TrueNames))      
-#      FD=sapply(1:TopNum, function(i)sapply(1:NumOfMethods, function(j)sum(!rownames(Rank)[Rank[,j]<=i]\%in\%TrueNames)))
-       for (s in 1:NumOfMethods){
-               tmp=puse[,s]
-               names(tmp)=rownames(puse)
-               sorttmp=sort(tmp)
-               for( c in 2:TopNum)
-                       FD[c, s]=FD[(c-1),s]+as.numeric(!names(sorttmp)[c]\%in\%TrueNames)
-       }
-       FD
-       #matplot(TopNum,FD,type="l",ylim=c(0,1),xlab="Top DE selected", ylab="FDR")
-       #legend("rightbottom",col=1:TopNum, lty=1:TopNum, names)
-       }
-}
-% Add one or more standard keywords, see file 'KEYWORDS' in the
-% R documentation directory.
-\keyword{ ~kwd1 }
-\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
diff --git a/EBSeq/man/beta.mom.Rd b/EBSeq/man/beta.mom.Rd
deleted file mode 100644 (file)
index 45c7aa4..0000000
+++ /dev/null
@@ -1,48 +0,0 @@
-\name{beta.mom}
-\alias{beta.mom}
-%- Also NEED an '\alias' for EACH other topic documented here.
-\title{
-Fit the beta distribution by method of moments
-}
-\description{
-Fit the beta distribution by method of moments
-}
-\usage{
-beta.mom(qs.in)
-}
-%- maybe also 'usage' for other objects documented here.
-\arguments{
-  \item{qs.in}{
-A vector contains the numbers that are assumed to follow a beta distribution 
-}
-}
-\details{
-%%  ~~ If necessary, more details than the description above ~~
-}
-\value{
-  \item{alpha.hat }{Return the estimation of alpha}
-  \item{beta.hat}{Return the estimation of beta}
-}
-\references{
-}
-\author{
-Ning Leng
-}
-
-%% ~Make other sections like Warning with \section{Warning }{....} ~
-
-\seealso{
-DenNHist, DenNHistTable
-}
-\examples{
-##---- Should be DIRECTLY executable !! ----
-##-- ==>  Define data, use random,
-##--   or do  help(data=index)  for the standard data sets.
-
-## The function is currently defined as
-tmp=rbeta(5,5,100)
-param=beta.mom(tmp)
-}
-% Add one or more standard keywords, see file 'KEYWORDS' in the
-% R documentation directory.
-\keyword{ beta }
diff --git a/EBSeq/man/crit_fun.Rd b/EBSeq/man/crit_fun.Rd
deleted file mode 100644 (file)
index 99590c9..0000000
+++ /dev/null
@@ -1,48 +0,0 @@
-\name{crit_fun}
-\alias{crit_fun}
-%- Also NEED an '\alias' for EACH other topic documented here.
-\title{
-Calculate the adjusted FDR threshold 
-}
-\description{
-Calculate the adjusted FDR threshold using the posterior probabilities at a target FDR
-}
-\usage{
-crit_fun(PPEE, thre)
-}
-%- maybe also 'usage' for other objects documented here.
-\arguments{
-  \item{PPEE}{
-The posterior probabilities of being EE.
-}
-  \item{thre}{
-The target FDR.
-  }
-
-}
-\details{
-%%  ~~ If necessary, more details than the description above ~~
-}
-\value{
-The adjusted FDR threshold of target FDR.
-}
-\references{
-}
-\author{
-Ning Leng
-}
-
-%% ~Make other sections like Warning with \section{Warning }{....} ~
-
-\seealso{
-}
-\examples{
-##---- Should be DIRECTLY executable !! ----
-##-- ==>  Define data, use random,
-##--   or do  help(data=index)  for the standard data sets.
-
-## The function is currently defined as
-}
-% Add one or more standard keywords, see file 'KEYWORDS' in the
-% R documentation directory.
-\keyword{ FDR }
diff --git a/EBSeq/man/f0.Rd b/EBSeq/man/f0.Rd
deleted file mode 100644 (file)
index fb0c231..0000000
+++ /dev/null
@@ -1,69 +0,0 @@
-\name{f0}
-\alias{f0}
-%- Also NEED an '\alias' for EACH other topic documented here.
-\title{
-The Predictive Distribution of being EE
-}
-\description{
-%%  ~~ A concise (1-5 lines) description of what the function does. ~~
-}
-\usage{
-f0(Input, AlphaIn, BetaIn, EmpiricalR, NumOfGroups, log)
-}
-%- maybe also 'usage' for other objects documented here.
-\arguments{
-  \item{Input}{
-%%     ~~Describe \code{Input} here~~
-}
-  \item{AlphaIn}{
-%%     ~~Describe \code{AlphaIn} here~~
-}
-  \item{BetaIn}{
-%%     ~~Describe \code{BetaIn} here~~
-}
-  \item{EmpiricalR}{
-%%     ~~Describe \code{EmpiricalR} here~~
-}
-  \item{NumOfGroups}{
-%%     ~~Describe \code{NumOfGroups} here~~
-}
-  \item{log}{
-%%     ~~Describe \code{log} here~~
-}
-}
-\details{
-%%  ~~ If necessary, more details than the description above ~~
-}
-\value{
-%%  ~Describe the value returned
-%%  If it is a LIST, use
-%%  \item{comp1 }{Description of 'comp1'}
-%%  \item{comp2 }{Description of 'comp2'}
-%% ...
-}
-\references{
-%% ~put references to the literature/web site here ~
-}
-\author{
-Ning Leng
-}
-\note{
-%%  ~~further notes~~
-}
-
-%% ~Make other sections like Warning with \section{Warning }{....} ~
-
-\seealso{
-f1
-}
-\examples{
-##---- Should be DIRECTLY executable !! ----
-##-- ==>  Define data, use random,
-##--   or do  help(data=index)  for the standard data sets.
-
-
-}
-% Add one or more standard keywords, see file 'KEYWORDS' in the
-% R documentation directory.
-\keyword{ ~kwd1 }
-\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
diff --git a/EBSeq/man/f1.Rd b/EBSeq/man/f1.Rd
deleted file mode 100644 (file)
index 1bf374c..0000000
+++ /dev/null
@@ -1,71 +0,0 @@
-\name{f1}
-\alias{f1}
-%- Also NEED an '\alias' for EACH other topic documented here.
-\title{
-f1(Input, AlphaIn, BetaIn, EmpiricalR, NumOfGroups, log)
-}
-\description{
-f1(X_gi)=f0(X_giC1)f0(X_giC2)
-}
-\usage{
-f1(Input1, Input2, AlphaIn, BetaIn, EmpiricalRSP1, EmpiricalRSP2, NumOfGroup, log)
-}
-%- maybe also 'usage' for other objects documented here.
-\arguments{
-  \item{Input1}{
-%%     ~~Describe \code{Input1} here~~
-}
-  \item{Input2}{
-%%     ~~Describe \code{Input2} here~~
-}
-  \item{AlphaIn}{
-%%     ~~Describe \code{AlphaIn} here~~
-}
-  \item{BetaIn}{
-%%     ~~Describe \code{BetaIn} here~~
-}
-  \item{EmpiricalRSP1}{
-%%     ~~Describe \code{EmpiricalRSP1} here~~
-}
-  \item{EmpiricalRSP2}{
-%%     ~~Describe \code{EmpiricalRSP2} here~~
-}
-  \item{NumOfGroup}{
-%%     ~~Describe \code{NumOfGroup} here~~
-}
-  \item{log}{
-%%     ~~Describe \code{log} here~~
-}
-}
-\details{
-%%  ~~ If necessary, more details than the description above ~~
-}
-\value{
-%%  ~Describe the value returned
-%%  If it is a LIST, use
-%%  \item{comp1 }{Description of 'comp1'}
-%%  \item{comp2 }{Description of 'comp2'}
-%% ...
-}
-\references{
-NBBetaEB.bias.uniqueP_PoolVarSpeedUp_MDFPoi_NoNormVar, f0.bias.uniqueP_PoolVarSpeedUp_MDFPoi_NoNormVar
-}
-\author{
-Ning Leng
-}
-\note{
-%%  ~~further notes~~
-}
-
-%% ~Make other sections like Warning with \section{Warning }{....} ~
-
-\seealso{
-f0
-}
-\examples{
-
-}
-% Add one or more standard keywords, see file 'KEYWORDS' in the
-% R documentation directory.
-\keyword{ ~kwd1 }
-\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
diff --git a/EBSeq/rsem-for-ebseq-generate-ngvector-from-clustering-info b/EBSeq/rsem-for-ebseq-generate-ngvector-from-clustering-info
new file mode 100755 (executable)
index 0000000..312dbfa
--- /dev/null
@@ -0,0 +1,18 @@
+#!/usr/bin/env Rscript
+
+argv <- commandArgs(TRUE)
+if (length(argv) != 2) {
+  cat("Usage: rsem-for-ebseq-generate-ngvector-from-clustering-info input_file output_file\n")
+  q(status = 1)
+}
+
+data <- read.table(file = argv[1], stringsAsFactors = F)
+idx <- data[,2] >= 0
+kmr <- kmeans(data[idx, 2], 3)
+order <- order(kmr$centers)
+
+ngvec <- rep(0, length(idx))
+ngvec[idx] <- order[kmr$cluster]
+ngvec[!idx] <- 3
+
+write.table(ngvec, file = argv[2], row.names = F, col.names = F)
diff --git a/EM.cpp b/EM.cpp
index 684d639271567df55dcfc785d196e0f7df075694..09b85be234bb432724524574519ef6affda7f2d7 100644 (file)
--- a/EM.cpp
+++ b/EM.cpp
@@ -273,60 +273,124 @@ void* calcConProbs(void* arg) {
 
 template<class ModelType>
 void calcExpectedEffectiveLengths(ModelType& model) {
-  int lb, ub, span;
-  double *pdf = NULL, *cdf = NULL, *clen = NULL; // clen[i] = sigma_{j=1}^{i}pdf[i]*(lb+i)
+       int lb, ub, span;
+       double *pdf = NULL, *cdf = NULL, *clen = NULL; // clen[i] = sigma_{j=1}^{i}pdf[i]*(lb+i)
   
-  model.getGLD().copyTo(pdf, cdf, lb, ub, span);
-  clen = new double[span + 1];
-  clen[0] = 0.0;
-  for (int i = 1; i <= span; i++) {
-    clen[i] = clen[i - 1] + pdf[i] * (lb + i);
-  }
-
-  eel.clear();
-  eel.resize(M + 1, 0.0);
-  for (int i = 1; i <= M; i++) {
-    int totLen = refs.getRef(i).getTotLen();
-    int fullLen = refs.getRef(i).getFullLen();
-    int pos1 = max(min(totLen - fullLen + 1, ub) - lb, 0);
-    int pos2 = max(min(totLen, ub) - lb, 0);
-
-    if (pos2 == 0) { eel[i] = 0.0; continue; }
+       model.getGLD().copyTo(pdf, cdf, lb, ub, span);
+       clen = new double[span + 1];
+       clen[0] = 0.0;
+       for (int i = 1; i <= span; i++) {
+               clen[i] = clen[i - 1] + pdf[i] * (lb + i);
+       }
+
+       eel.assign(M + 1, 0.0);
+       for (int i = 1; i <= M; i++) {
+               int totLen = refs.getRef(i).getTotLen();
+               int fullLen = refs.getRef(i).getFullLen();
+               int pos1 = max(min(totLen - fullLen + 1, ub) - lb, 0);
+               int pos2 = max(min(totLen, ub) - lb, 0);
+
+               if (pos2 == 0) { eel[i] = 0.0; continue; }
     
-    eel[i] = fullLen * cdf[pos1] + ((cdf[pos2] - cdf[pos1]) * (totLen + 1) - (clen[pos2] - clen[pos1]));
-    assert(eel[i] >= 0);
-    if (eel[i] < MINEEL) { eel[i] = 0.0; }
-  }
+               eel[i] = fullLen * cdf[pos1] + ((cdf[pos2] - cdf[pos1]) * (totLen + 1) - (clen[pos2] - clen[pos1]));
+               assert(eel[i] >= 0);
+               if (eel[i] < MINEEL) { eel[i] = 0.0; }
+       }
   
-  delete[] pdf;
-  delete[] cdf;
-  delete[] clen;
+       delete[] pdf;
+       delete[] cdf;
+       delete[] clen;
+}
+
+void polishTheta(vector<double>& theta, const vector<double>& eel, const double* mw) {
+       double sum = 0.0;
+
+       /* The reason that for noise gene, mw value is 1 is :
+        * currently, all masked positions are for poly(A) sites, which in theory should be filtered out.
+        * So the theta0 does not containing reads from any masked position
+        */
+
+       for (int i = 0; i <= M; i++) {
+               // i == 0, mw[i] == 1
+               if (i > 0 && (mw[i] < EPSILON || eel[i] < EPSILON)) {
+                       theta[i] = 0.0;
+                       continue;
+               }
+               theta[i] = theta[i] / mw[i];
+               sum += theta[i];
+       }
+       // currently is OK, since no transcript should be masked totally, only the poly(A) tail related part will be masked
+       general_assert(sum >= EPSILON, "No effective length is no less than" + ftos(MINEEL, 6) + " !");
+       for (int i = 0; i <= M; i++) theta[i] /= sum;
+}
+
+void calcExpressionValues(const vector<double>& theta, const vector<double>& eel, vector<double>& tpm, vector<double>& fpkm) {
+       double denom;
+       vector<double> frac;
+
+       //calculate fraction of count over all mappabile reads
+       denom = 0.0;
+       frac.assign(M + 1, 0.0);
+       for (int i = 1; i <= M; i++) 
+         if (eel[i] >= EPSILON) {
+           frac[i] = theta[i];
+           denom += frac[i];
+         }
+       general_assert(denom > 0, "No alignable reads?!");
+       for (int i = 1; i <= M; i++) frac[i] /= denom;
+  
+       //calculate FPKM
+       fpkm.assign(M + 1, 0.0);
+       for (int i = 1; i <= M; i++)
+               if (eel[i] >= EPSILON) fpkm[i] = frac[i] * 1e9 / eel[i];
+
+       //calculate TPM
+       tpm.assign(M + 1, 0.0);
+       denom = 0.0;
+       for (int i = 1; i <= M; i++) denom += fpkm[i];
+       for (int i = 1; i <= M; i++) tpm[i] = fpkm[i] / denom * 1e6;  
 }
 
 template<class ModelType>
 void writeResults(ModelType& model, double* counts) {
-       double denom;
        char outF[STRLEN];
        FILE *fo;
 
        sprintf(modelF, "%s.model", statName);
        model.write(modelF);
 
-       //calculate tau values
-       double *tau = new double[M + 1];
-       memset(tau, 0, sizeof(double) * (M + 1));
+       vector<int> tlens;
+       vector<double> fpkm, tpm, isopct;
+       vector<double> glens, gene_eels, gene_counts, gene_tpm, gene_fpkm;
 
-       denom = 0.0;
-       for (int i = 1; i <= M; i++) 
-         if (eel[i] >= EPSILON) {
-           tau[i] = theta[i] / eel[i];
-           denom += tau[i];
-         }   
+       calcExpressionValues(theta, eel, tpm, fpkm);
 
-       general_assert(denom > 0, "No alignable reads?!");
+       //calculate IsoPct, etc.
+       isopct.assign(M + 1, 0.0);
+       tlens.assign(M + 1, 0);
 
-       for (int i = 1; i <= M; i++) {
-               tau[i] /= denom;
+       glens.assign(m, 0.0); gene_eels.assign(m, 0.0);
+       gene_counts.assign(m, 0.0); gene_tpm.assign(m, 0.0); gene_fpkm.assign(m, 0.0);
+
+       for (int i = 0; i < m; i++) {
+               int b = gi.spAt(i), e = gi.spAt(i + 1);
+               for (int j = b; j < e; j++) {
+                       const Transcript& transcript = transcripts.getTranscriptAt(j);
+                       tlens[j] = transcript.getLength();
+
+                       glens[i] += tlens[j] * tpm[j];
+                       gene_eels[i] += eel[j] * tpm[j];
+                       gene_counts[i] += counts[j];
+                       gene_tpm[i] += tpm[j];
+                       gene_fpkm[i] += fpkm[j];
+               }
+
+               if (gene_tpm[i] < EPSILON) continue;
+
+               for (int j = b; j < e; j++)
+                       isopct[j] = tpm[j] / gene_tpm[i];
+               glens[i] /= gene_tpm[i];
+               gene_eels[i] /= gene_tpm[i];
        }
 
        //isoform level results
@@ -336,34 +400,30 @@ void writeResults(ModelType& model, double* counts) {
                const Transcript& transcript = transcripts.getTranscriptAt(i);
                fprintf(fo, "%s%c", transcript.getTranscriptID().c_str(), (i < M ? '\t' : '\n'));
        }
-       for (int i = 1; i <= M; i++)
-               fprintf(fo, "%.2f%c", counts[i], (i < M ? '\t' : '\n'));
-       for (int i = 1; i <= M; i++)
-               fprintf(fo, "%.15g%c", tau[i], (i < M ? '\t' : '\n'));
        for (int i = 1; i <= M; i++) {
                const Transcript& transcript = transcripts.getTranscriptAt(i);
                fprintf(fo, "%s%c", transcript.getGeneID().c_str(), (i < M ? '\t' : '\n'));
        }
+       for (int i = 1; i <= M; i++)
+               fprintf(fo, "%d%c", tlens[i], (i < M ? '\t' : '\n'));
+       for (int i = 1; i <= M; i++)
+               fprintf(fo, "%.2f%c", eel[i], (i < M ? '\t' : '\n'));
+       for (int i = 1; i <= M; i++)
+               fprintf(fo, "%.2f%c", counts[i], (i < M ? '\t' : '\n'));
+       for (int i = 1; i <= M; i++)
+               fprintf(fo, "%.2f%c", tpm[i], (i < M ? '\t' : '\n'));
+       for (int i = 1; i <= M; i++)
+               fprintf(fo, "%.2f%c", fpkm[i], (i < M ? '\t' : '\n'));
+       for (int i = 1; i <= M; i++)
+               fprintf(fo, "%.2f%c", isopct[i] * 1e2, (i < M ? '\t' : '\n'));
        fclose(fo);
 
        //gene level results
        sprintf(outF, "%s.gene_res", imdName);
        fo = fopen(outF, "w");
        for (int i = 0; i < m; i++) {
-               const string& gene_id = transcripts.getTranscriptAt(gi.spAt(i)).getGeneID();
-               fprintf(fo, "%s%c", gene_id.c_str(), (i < m - 1 ? '\t' : '\n'));
-       }
-       for (int i = 0; i < m; i++) {
-               double sumC = 0.0; // sum of counts
-               int b = gi.spAt(i), e = gi.spAt(i + 1);
-               for (int j = b; j < e; j++) sumC += counts[j];
-               fprintf(fo, "%.2f%c", sumC, (i < m - 1 ? '\t' : '\n'));
-       }
-       for (int i = 0; i < m; i++) {
-               double sumT = 0.0; // sum of tau values
-               int b = gi.spAt(i), e = gi.spAt(i + 1);
-               for (int j = b; j < e; j++) sumT += tau[j];
-               fprintf(fo, "%.15g%c", sumT, (i < m - 1 ? '\t' : '\n'));
+               const Transcript& transcript = transcripts.getTranscriptAt(gi.spAt(i));
+               fprintf(fo, "%s%c", transcript.getGeneID().c_str(), (i < m - 1 ? '\t' : '\n'));
        }
        for (int i = 0; i < m; i++) {
                int b = gi.spAt(i), e = gi.spAt(i + 1);
@@ -371,10 +431,18 @@ void writeResults(ModelType& model, double* counts) {
                        fprintf(fo, "%s%c", transcripts.getTranscriptAt(j).getTranscriptID().c_str(), (j < e - 1 ? ',' : (i < m - 1 ? '\t' :'\n')));
                }
        }
+       for (int i = 0; i < m; i++)
+               fprintf(fo, "%.2f%c", glens[i], (i < m - 1 ? '\t' : '\n'));
+       for (int i = 0; i < m; i++)
+               fprintf(fo, "%.2f%c", gene_eels[i], (i < m - 1 ? '\t' : '\n'));
+       for (int i = 0; i < m; i++)
+               fprintf(fo, "%.2f%c", gene_counts[i], (i < m - 1 ? '\t' : '\n'));
+       for (int i = 0; i < m; i++)
+               fprintf(fo, "%.2f%c", gene_tpm[i], (i < m - 1 ? '\t' : '\n'));
+       for (int i = 0; i < m; i++)
+               fprintf(fo, "%.2f%c", gene_fpkm[i], (i < m - 1 ? '\t' : '\n'));
        fclose(fo);
 
-       delete[] tau;
-
        if (verbose) { printf("Expression Results are written!\n"); }
 }
 
@@ -551,28 +619,8 @@ void EM() {
                fout.close();
        }
 
-       sprintf(thetaF, "%s.theta", statName);
-       fo = fopen(thetaF, "w");
-       fprintf(fo, "%d\n", M + 1);
-
-       // output theta'
-       for (int i = 0; i < M; i++) fprintf(fo, "%.15g ", theta[i]);
-       fprintf(fo, "%.15g\n", theta[M]);
-       
-       //calculate expected effective lengths for each isoform
-       calcExpectedEffectiveLengths<ModelType>(model);
-
-       //correct theta vector
-       sum = theta[0];
-       for (int i = 1; i <= M; i++) 
-         if (eel[i] < EPSILON) { theta[i] = 0.0; }
-         else sum += theta[i];
-
-       general_assert(sum >= EPSILON, "No Expected Effective Length is no less than" + ftos(MINEEL, 6) + "?!");
-
-       for (int i = 0; i <= M; i++) theta[i] /= sum;
-
        //calculate expected weights and counts using learned parameters
+       //just use the raw theta learned from the data, do not correct for eel or mw
        updateModel = false; calcExpectedWeights = true;
        for (int i = 0; i <= M; i++) probv[i] = theta[i];
        for (int i = 0; i < nThreads; i++) {
@@ -594,15 +642,18 @@ void EM() {
        /* destroy attribute */
        pthread_attr_destroy(&attr);
 
-       //convert theta' to theta
-       double *mw = model.getMW();
-       sum = 0.0;
-       for (int i = 0; i <= M; i++) {
-         theta[i] = (mw[i] < EPSILON ? 0.0 : theta[i] / mw[i]);
-         sum += theta[i]; 
-       }
-       assert(sum >= EPSILON);
-       for (int i = 0; i <= M; i++) theta[i] /= sum;
+
+       sprintf(thetaF, "%s.theta", statName);
+       fo = fopen(thetaF, "w");
+       fprintf(fo, "%d\n", M + 1);
+
+       // output theta'
+       for (int i = 0; i < M; i++) fprintf(fo, "%.15g ", theta[i]);
+       fprintf(fo, "%.15g\n", theta[M]);
+       
+       //calculate expected effective lengths for each isoform
+       calcExpectedEffectiveLengths<ModelType>(model);
+       polishTheta(theta, eel, model.getMW());
 
        // output theta
        for (int i = 0; i < M; i++) fprintf(fo, "%.15g ", theta[i]);
index 6bbfeb198c1cd72e81b0da02327dcefd6158c545..bf8563e16ea673b467e22b7d51343b7bd63b27e3 100644 (file)
--- a/Gibbs.cpp
+++ b/Gibbs.cpp
@@ -27,7 +27,7 @@ struct Params {
        FILE *fo;
        engine_type *engine;
        double *pme_c, *pve_c; //posterior mean and variance vectors on counts
-       double *pme_theta;
+  double *pme_tpm, *pme_fpkm;
 };
 
 
@@ -59,10 +59,11 @@ GroupInfo gi;
 vector<HIT_INT_TYPE> s;
 vector<Item> hits;
 
-vector<double> theta;
+vector<double> eel;
+double *mw;
 
 vector<double> pme_c, pve_c; //global posterior mean and variance vectors on counts
-vector<double> pme_theta, eel;
+vector<double> pme_tpm, pme_fpkm;
 
 bool var_opt;
 bool quiet;
@@ -87,16 +88,6 @@ void load_data(char* reference_name, char* statName, char* imdName) {
        gi.load(groupF);
        m = gi.getm();
 
-       //load thetaF
-       sprintf(thetaF, "%s.theta",statName);
-       fin.open(thetaF);
-       general_assert(fin.is_open(), "Cannot open " + cstrtos(thetaF) + "!");
-       fin>>tmpVal;
-       general_assert(tmpVal == M + 1, "Number of transcripts is not consistent in " + cstrtos(refF) + " and " + cstrtos(thetaF) + "!");
-       theta.assign(M + 1, 0);
-       for (int i = 0; i <= M; i++) fin>>theta[i];
-       fin.close();
-
        //load ofgF;
        sprintf(ofgF, "%s.ofg", imdName);
        fin.open(ofgF);
@@ -127,6 +118,46 @@ void load_data(char* reference_name, char* statName, char* imdName) {
        if (verbose) { printf("Loading Data is finished!\n"); }
 }
 
+template<class ModelType>
+void calcExpectedEffectiveLengths(ModelType& model) {
+       int lb, ub, span;
+       double *pdf = NULL, *cdf = NULL, *clen = NULL; // clen[i] = sigma_{j=1}^{i}pdf[i]*(lb+i)
+  
+       model.getGLD().copyTo(pdf, cdf, lb, ub, span);
+       clen = new double[span + 1];
+       clen[0] = 0.0;
+       for (int i = 1; i <= span; i++) {
+               clen[i] = clen[i - 1] + pdf[i] * (lb + i);
+       }
+
+       eel.assign(M + 1, 0.0);
+       for (int i = 1; i <= M; i++) {
+               int totLen = refs.getRef(i).getTotLen();
+               int fullLen = refs.getRef(i).getFullLen();
+               int pos1 = max(min(totLen - fullLen + 1, ub) - lb, 0);
+               int pos2 = max(min(totLen, ub) - lb, 0);
+
+               if (pos2 == 0) { eel[i] = 0.0; continue; }
+    
+               eel[i] = fullLen * cdf[pos1] + ((cdf[pos2] - cdf[pos1]) * (totLen + 1) - (clen[pos2] - clen[pos1]));
+               assert(eel[i] >= 0);
+               if (eel[i] < MINEEL) { eel[i] = 0.0; }
+       }
+  
+       delete[] pdf;
+       delete[] cdf;
+       delete[] clen;
+}
+
+template<class ModelType>
+void init_model_related(char* modelF) {
+       ModelType model;
+       model.read(modelF);
+
+       calcExpectedEffectiveLengths<ModelType>(model);
+       memcpy(mw, model.getMW(), sizeof(double) * (M + 1)); // otherwise, after exiting this procedure, mw becomes undefined
+}
+
 // assign threads
 void init() {
        int quotient, left;
@@ -153,8 +184,10 @@ void init() {
                memset(paramsArray[i].pme_c, 0, sizeof(double) * (M + 1));
                paramsArray[i].pve_c = new double[M + 1];
                memset(paramsArray[i].pve_c, 0, sizeof(double) * (M + 1));
-               paramsArray[i].pme_theta = new double[M + 1];
-               memset(paramsArray[i].pme_theta, 0, sizeof(double) * (M + 1));
+               paramsArray[i].pme_tpm = new double[M + 1];
+               memset(paramsArray[i].pme_tpm, 0, sizeof(double) * (M + 1));
+               paramsArray[i].pme_fpkm = new double[M + 1];
+               memset(paramsArray[i].pme_fpkm, 0, sizeof(double) * (M + 1));
        }
 
        /* set thread attribute to be joinable */
@@ -187,12 +220,61 @@ void writeCountVector(FILE* fo, vector<int>& counts) {
        fprintf(fo, "%d\n", counts[M]);
 }
 
+void polishTheta(vector<double>& theta, const vector<double>& eel, const double* mw) {
+       double sum = 0.0;
+
+       /* The reason that for noise gene, mw value is 1 is :
+        * currently, all masked positions are for poly(A) sites, which in theory should be filtered out.
+        * So the theta0 does not containing reads from any masked position
+        */
+
+       for (int i = 0; i <= M; i++) {
+               // i == 0, mw[i] == 1
+               if (i > 0 && (mw[i] < EPSILON || eel[i] < EPSILON)) {
+                       theta[i] = 0.0;
+                       continue;
+               }
+               theta[i] = theta[i] / mw[i];
+               sum += theta[i];
+       }
+       // currently is OK, since no transcript should be masked totally, only the poly(A) tail related part will be masked
+       general_assert(sum >= EPSILON, "No effective length is no less than" + ftos(MINEEL, 6) + " !");
+       for (int i = 0; i <= M; i++) theta[i] /= sum;
+}
+
+void calcExpressionValues(const vector<double>& theta, const vector<double>& eel, vector<double>& tpm, vector<double>& fpkm) {
+       double denom;
+       vector<double> frac;
+
+       //calculate fraction of count over all mappabile reads
+       denom = 0.0;
+       frac.assign(M + 1, 0.0);
+       for (int i = 1; i <= M; i++) 
+         if (eel[i] >= EPSILON) {
+           frac[i] = theta[i];
+           denom += frac[i];
+         }
+       general_assert(denom > 0, "No alignable reads?!");
+       for (int i = 1; i <= M; i++) frac[i] /= denom;
+  
+       //calculate FPKM
+       fpkm.assign(M + 1, 0.0);
+       for (int i = 1; i <= M; i++)
+               if (eel[i] >= EPSILON) fpkm[i] = frac[i] * 1e9 / eel[i];
+
+       //calculate TPM
+       tpm.assign(M + 1, 0.0);
+       denom = 0.0;
+       for (int i = 1; i <= M; i++) denom += fpkm[i];
+       for (int i = 1; i <= M; i++) tpm[i] = fpkm[i] / denom * 1e6;  
+}
+
 void* Gibbs(void* arg) {
        int CHAINLEN;
        HIT_INT_TYPE len, fr, to;
        Params *params = (Params*)arg;
 
-       vector<double> theta;
+       vector<double> theta, tpm, fpkm;
        vector<int> z, counts;
        vector<double> arr;
 
@@ -237,10 +319,14 @@ void* Gibbs(void* arg) {
                if (ROUND > BURNIN) {
                        if ((ROUND - BURNIN - 1) % GAP == 0) {
                                writeCountVector(params->fo, counts);
+                               for (int i = 0; i <= M; i++) theta[i] = counts[i] / totc;
+                               polishTheta(theta, eel, mw);
+                               calcExpressionValues(theta, eel, tpm, fpkm);
                                for (int i = 0; i <= M; i++) {
                                        params->pme_c[i] += counts[i] - 1;
                                        params->pve_c[i] += (counts[i] - 1) * (counts[i] - 1);
-                                       params->pme_theta[i] += counts[i] / totc;
+                                       params->pme_tpm[i] += tpm[i];
+                                       params->pme_fpkm[i] += fpkm[i];
                                }
                        }
                }
@@ -261,18 +347,21 @@ void release() {
 
        pme_c.assign(M + 1, 0);
        pve_c.assign(M + 1, 0);
-       pme_theta.assign(M + 1, 0);
+       pme_tpm.assign(M + 1, 0);
+       pme_fpkm.assign(M + 1, 0);
        for (int i = 0; i < nThreads; i++) {
                fclose(paramsArray[i].fo);
                delete paramsArray[i].engine;
                for (int j = 0; j <= M; j++) {
                        pme_c[j] += paramsArray[i].pme_c[j];
                        pve_c[j] += paramsArray[i].pve_c[j];
-                       pme_theta[j] += paramsArray[i].pme_theta[j];
+                       pme_tpm[j] += paramsArray[i].pme_tpm[j];
+                       pme_fpkm[j] += paramsArray[i].pme_fpkm[j];
                }
                delete[] paramsArray[i].pme_c;
                delete[] paramsArray[i].pve_c;
-               delete[] paramsArray[i].pme_theta;
+               delete[] paramsArray[i].pme_tpm;
+               delete[] paramsArray[i].pme_fpkm;
        }
        delete[] paramsArray;
 
@@ -280,85 +369,32 @@ void release() {
        for (int i = 0; i <= M; i++) {
                pme_c[i] /= NSAMPLES;
                pve_c[i] = (pve_c[i] - NSAMPLES * pme_c[i] * pme_c[i]) / (NSAMPLES - 1);
-               pme_theta[i] /= NSAMPLES;
-       }
-}
-
-template<class ModelType>
-void calcExpectedEffectiveLengths(ModelType& model) {
-       int lb, ub, span;
-       double *pdf = NULL, *cdf = NULL, *clen = NULL; // clen[i] = \sigma_{j=1}^{i}pdf[i]*(lb+i)
-  
-       model.getGLD().copyTo(pdf, cdf, lb, ub, span);
-       clen = new double[span + 1];
-       clen[0] = 0.0;
-       for (int i = 1; i <= span; i++) {
-               clen[i] = clen[i - 1] + pdf[i] * (lb + i);
-       }
-
-       eel.assign(M + 1, 0.0);
-       for (int i = 1; i <= M; i++) {
-               int totLen = refs.getRef(i).getTotLen();
-               int fullLen = refs.getRef(i).getFullLen();
-               int pos1 = max(min(totLen - fullLen + 1, ub) - lb, 0);
-               int pos2 = max(min(totLen, ub) - lb, 0);
-
-               if (pos2 == 0) { eel[i] = 0.0; continue; }
-    
-               eel[i] = fullLen * cdf[pos1] + ((cdf[pos2] - cdf[pos1]) * (totLen + 1) - (clen[pos2] - clen[pos1]));
-               assert(eel[i] >= 0);
-               if (eel[i] < MINEEL) { eel[i] = 0.0; }
+               pme_tpm[i] /= NSAMPLES;
+               pme_fpkm[i] /= NSAMPLES;
        }
-  
-       delete[] pdf;
-       delete[] cdf;
-       delete[] clen;
 }
 
-template<class ModelType>
-void writeEstimatedParameters(char* modelF, char* imdName) {
-       ModelType model;
-       double denom;
+void writeResults(char* imdName) {
        char outF[STRLEN];
        FILE *fo;
 
-       model.read(modelF);
-
-       calcExpectedEffectiveLengths<ModelType>(model);
-
-       denom = pme_theta[0];
-       for (int i = 1; i <= M; i++)
-         if (eel[i] < EPSILON) pme_theta[i] = 0.0;
-         else denom += pme_theta[i];
-
-       general_assert(denom >= EPSILON, "No Expected Effective Length is no less than " + ftos(MINEEL, 6) + "?!");
-
-       for (int i = 0; i <= M; i++) pme_theta[i] /= denom;
-
-       denom = 0.0;
-       double *mw = model.getMW();
-       for (int i = 0; i <= M; i++) {
-         pme_theta[i] = (mw[i] < EPSILON ? 0.0 : pme_theta[i] / mw[i]);
-         denom += pme_theta[i];
-       }
-       assert(denom >= EPSILON);
-       for (int i = 0; i <= M; i++) pme_theta[i] /= denom;
+       vector<double> isopct;
+       vector<double> gene_counts, gene_tpm, gene_fpkm;
 
-       //calculate tau values
-       double *tau = new double[M + 1];
-       memset(tau, 0, sizeof(double) * (M + 1));
+       //calculate IsoPct, etc.
+       isopct.assign(M + 1, 0.0);
+       gene_counts.assign(m, 0.0); gene_tpm.assign(m, 0.0); gene_fpkm.assign(m, 0.0);
 
-       denom = 0.0;
-       for (int i = 1; i <= M; i++) 
-         if (eel[i] > EPSILON) {
-           tau[i] = pme_theta[i] / eel[i];
-           denom += tau[i];
-         }
-
-       general_assert(denom >= EPSILON, "No alignable reads?!");
-
-       for (int i = 1; i <= M; i++) {
-               tau[i] /= denom;
+       for (int i = 0; i < m; i++) {
+               int b = gi.spAt(i), e = gi.spAt(i + 1);
+               for (int j = b; j < e; j++) {
+                       gene_counts[i] += pme_c[j];
+                       gene_tpm[i] += pme_tpm[j];
+                       gene_fpkm[i] += pme_fpkm[j];
+               }
+               if (gene_tpm[i] < EPSILON) continue;
+               for (int j = b; j < e; j++)
+                       isopct[j] = pme_tpm[j] / gene_tpm[i];
        }
 
        //isoform level results
@@ -369,8 +405,11 @@ void writeEstimatedParameters(char* modelF, char* imdName) {
        for (int i = 1; i <= M; i++)
                fprintf(fo, "%.2f%c", pme_c[i], (i < M ? '\t' : '\n'));
        for (int i = 1; i <= M; i++)
-               fprintf(fo, "%.15g%c", tau[i], (i < M ? '\t' : '\n'));
-
+               fprintf(fo, "%.2f%c", pme_tpm[i], (i < M ? '\t' : '\n'));
+       for (int i = 1; i <= M; i++)
+               fprintf(fo, "%.2f%c", pme_fpkm[i], (i < M ? '\t' : '\n'));
+       for (int i = 1; i <= M; i++)
+               fprintf(fo, "%.2f%c", isopct[i] * 1e2, (i < M ? '\t' : '\n'));
        fclose(fo);
 
        //gene level results
@@ -378,26 +417,14 @@ void writeEstimatedParameters(char* modelF, char* imdName) {
        fo = fopen(outF, "a");
        general_assert(fo != NULL, "Cannot open " + cstrtos(outF) + "!");
 
-       for (int i = 0; i < m; i++) {
-               double sumC = 0.0; //  sum of pme counts
-               int b = gi.spAt(i), e = gi.spAt(i + 1);
-               for (int j = b; j < e; j++) {
-                       sumC += pme_c[j];
-               }
-               fprintf(fo, "%.15g%c", sumC, (i < m - 1 ? '\t' : '\n'));
-       }
-       for (int i = 0; i < m; i++) {
-               double sumT = 0.0; //  sum of tau values
-               int b = gi.spAt(i), e = gi.spAt(i + 1);
-               for (int j = b; j < e; j++) {
-                       sumT += tau[j];
-               }
-               fprintf(fo, "%.15g%c", sumT, (i < m - 1 ? '\t' : '\n'));
-       }
+       for (int i = 0; i < m; i++)
+               fprintf(fo, "%.2f%c", gene_counts[i], (i < m - 1 ? '\t' : '\n'));
+       for (int i = 0; i < m; i++)
+               fprintf(fo, "%.2f%c", gene_tpm[i], (i < m - 1 ? '\t' : '\n'));
+       for (int i = 0; i < m; i++)
+               fprintf(fo, "%.2f%c", gene_fpkm[i], (i < m - 1 ? '\t' : '\n'));
        fclose(fo);
 
-       delete[] tau;
-
        if (verbose) { printf("Gibbs based expression values are written!\n"); }
 }
 
@@ -414,8 +441,6 @@ int main(int argc, char* argv[]) {
        NSAMPLES = atoi(argv[5]);
        GAP = atoi(argv[6]);
 
-       load_data(argv[1], statName, imdName);
-
        nThreads = 1;
        var_opt = false;
        quiet = false;
@@ -434,6 +459,23 @@ int main(int argc, char* argv[]) {
                printf("Warning: Number of samples is less than number of threads! Change the number of threads to %d!\n", nThreads);
        }
 
+       load_data(argv[1], statName, imdName);
+
+       sprintf(modelF, "%s.model", statName);
+       FILE *fi = fopen(modelF, "r");
+       general_assert(fi != NULL, "Cannot open " + cstrtos(modelF) + "!");
+       assert(fscanf(fi, "%d", &model_type) == 1);
+       fclose(fi);
+
+       mw = new double[M + 1]; // make an extra copy
+
+       switch(model_type) {
+       case 0 : init_model_related<SingleModel>(modelF); break;
+       case 1 : init_model_related<SingleQModel>(modelF); break;
+       case 2 : init_model_related<PairedEndModel>(modelF); break;
+       case 3 : init_model_related<PairedEndQModel>(modelF); break;
+       }
+
        if (verbose) printf("Gibbs started!\n");
 
        init();
@@ -448,19 +490,8 @@ int main(int argc, char* argv[]) {
        release();
 
        if (verbose) printf("Gibbs finished!\n");
-
-       sprintf(modelF, "%s.model", statName);
-       FILE *fi = fopen(modelF, "r");
-       general_assert(fi != NULL, "Cannot open " + cstrtos(modelF) + "!");
-       assert(fscanf(fi, "%d", &model_type) == 1);
-       fclose(fi);
-
-       switch(model_type) {
-       case 0 : writeEstimatedParameters<SingleModel>(modelF, imdName); break;
-       case 1 : writeEstimatedParameters<SingleQModel>(modelF, imdName); break;
-       case 2 : writeEstimatedParameters<PairedEndModel>(modelF, imdName); break;
-       case 3 : writeEstimatedParameters<PairedEndQModel>(modelF, imdName); break;
-       }
+       
+       writeResults(imdName);
 
        if (var_opt) {
                char varF[STRLEN];
@@ -477,5 +508,7 @@ int main(int argc, char* argv[]) {
                fclose(fo);
        }
 
+       delete mw; // delete the copy
+
        return 0;
 }
index f3f95fe29874881b73f3a76633d83efa1e8059e8..0d2c9b3f216db8df91fa47a85c57065123fe2331 100644 (file)
@@ -9,6 +9,7 @@
 #include<algorithm>
 #include<sstream>
 #include<iostream>
+#include<vector>
 
 #include "utils.h"
 #include "my_assert.h"
@@ -193,12 +194,12 @@ public:
 
        const LenDist& getGLD() { return *gld; }
 
-       void startSimulation(simul*, double*);
+       void startSimulation(simul*, const std::vector<double>&);
        bool simulate(READ_INT_TYPE, PairedEndRead&, int&);
        void finishSimulation();
 
        //Use it after function 'read' or 'estimateFromReads'
-       double* getMW() { 
+       const double* getMW() { 
          assert(mw != NULL);
          return mw;
        }
@@ -349,7 +350,7 @@ void PairedEndModel::write(const char* outF) {
        fclose(fo);
 }
 
-void PairedEndModel::startSimulation(simul* sampler, double* theta) {
+void PairedEndModel::startSimulation(simul* sampler, const std::vector<double>& theta) {
        this->sampler = sampler;
 
        theta_cdf = new double[M + 1];
index a2f798120cc2524cae41b7f156aaa4bad524cced..7aebb494d1dc9c45785185e5ade49d7c2e8b2d1d 100644 (file)
@@ -9,6 +9,7 @@
 #include<algorithm>
 #include<sstream>
 #include<iostream>
+#include<vector>
 
 #include "utils.h"
 #include "my_assert.h"
@@ -197,12 +198,12 @@ public:
 
        const LenDist& getGLD() { return *gld; }
 
-       void startSimulation(simul*, double*);
+       void startSimulation(simul*, const std::vector<double>&);
        bool simulate(READ_INT_TYPE, PairedEndReadQ&, int&);
        void finishSimulation();
 
        //Use it after function 'read' or 'estimateFromReads'
-       double* getMW() { 
+       const double* getMW() { 
          assert(mw != NULL);
          return mw;
        }
@@ -361,7 +362,7 @@ void PairedEndQModel::write(const char* outF) {
        fclose(fo);
 }
 
-void PairedEndQModel::startSimulation(simul* sampler, double* theta) {
+void PairedEndQModel::startSimulation(simul* sampler, const std::vector<double>& theta) {
        this->sampler = sampler;
 
        theta_cdf = new double[M + 1];
index 513536f2a2107e181a0393946a00e9fa77c35416..e91f9c379539fc7ada0f8682bcfc7c957489d67f 100644 (file)
@@ -341,10 +341,17 @@ inline int SamParser::getReadType(const bam1_t* b) {
        return (bam_aux2i(p) > 0 ? 2 : 0);
 }
 
-
 //For paired-end reads, do not print out type 2 reads
 inline int SamParser::getReadType(const bam1_t* b, const bam1_t* b2) {
-       if ((b->core.flag & 0x0002) && (b2->core.flag & 0x0002)) return 1;
+       if (!(b->core.flag & 0x0004) && !(b2->core.flag & 0x0004)) return 1;
+
+       if (!strcmp(rtTag, "")) return 0;
+
+       uint8_t *p = bam_aux_get(b, rtTag);
+       if (p != NULL && bam_aux2i(p) > 0) return 2;
+
+       p = bam_aux_get(b2, rtTag);
+       if (p != NULL && bam_aux2i(p) > 0) return 2;
 
        return 0;
 }
index 7ad3464180def871d224023326c2a6992bbbaa74..b822f7dc6773c3b5d48e5a3898737bc5f59126fe 100644 (file)
@@ -9,6 +9,7 @@
 #include<algorithm>
 #include<sstream>
 #include<iostream>
+#include<vector>
 
 #include "utils.h"
 #include "my_assert.h"
@@ -230,11 +231,11 @@ public:
 
        const LenDist& getGLD() { return *gld; }
 
-       void startSimulation(simul*, double*);
+       void startSimulation(simul*, const std::vector<double>&);
        bool simulate(READ_INT_TYPE, SingleRead&, int&);
        void finishSimulation();
 
-       double* getMW() { 
+       const double* getMW() { 
          assert(mw != NULL);
          return mw;
        }
@@ -390,7 +391,7 @@ void SingleModel::write(const char* outF) {
        fclose(fo);
 }
 
-void SingleModel::startSimulation(simul* sampler, double* theta) {
+void SingleModel::startSimulation(simul* sampler, const std::vector<double>& theta) {
        this->sampler = sampler;
 
        theta_cdf = new double[M + 1];
index 5da47545370bdaf94991d36c50b474270d876bd4..ba43d9b608c934dd290ff5a9679b7f7d96e9586a 100644 (file)
@@ -9,6 +9,7 @@
 #include<algorithm>
 #include<sstream>
 #include<iostream>
+#include<vector>
 
 #include "utils.h"
 #include "my_assert.h"
@@ -238,12 +239,12 @@ public:
 
        const LenDist& getGLD() { return *gld; }
 
-       void startSimulation(simul*, double*);
+       void startSimulation(simul*, const std::vector<double>&);
        bool simulate(READ_INT_TYPE, SingleReadQ&, int&);
        void finishSimulation();
 
        //Use it after function 'read' or 'estimateFromReads'
-       double* getMW() { 
+       const double* getMW() { 
          assert(mw != NULL);
          return mw;
        }
@@ -405,7 +406,7 @@ void SingleQModel::write(const char* outF) {
        fclose(fo);
 }
 
-void SingleQModel::startSimulation(simul* sampler, double* theta) {
+void SingleQModel::startSimulation(simul* sampler, const std::vector<double>& theta) {
        this->sampler = sampler;
 
        theta_cdf = new double[M + 1];
index c71d9c17c96075d4abfa8633e6fa89624c961548..dbfe53a3bdcd9fe7d84478715fd9d4625846bbc2 100644 (file)
 #include "GroupInfo.h"
 
 #include "Buffer.h"
+
 using namespace std;
 
 struct Params {
        int no;
        FILE *fi;
        engine_type *engine;
-       double *mw;
+       const double *mw;
 };
 
 struct CIType {
@@ -48,11 +49,12 @@ int nMB;
 double confidence;
 int nCV, nSpC, nSamples; // nCV: number of count vectors; nSpC: number of theta vectors sampled per count vector; nSamples: nCV * nSpC
 int nThreads;
-int cvlen;
+
+float *l_bars;
 
 char cvsF[STRLEN], tmpF[STRLEN], command[STRLEN];
 
-CIType *iso_tau, *gene_tau;
+CIType *iso_tpm, *gene_tpm, *iso_fpkm, *gene_fpkm;
 
 int M, m;
 Refs refs;
@@ -105,69 +107,58 @@ void calcExpectedEffectiveLengths(ModelType& model) {
 }
 
 void* sample_theta_from_c(void* arg) {
-
        int *cvec;
        double *theta;
+       float *tpm;
        gamma_dist **gammas;
        gamma_generator **rgs;
 
        Params *params = (Params*)arg;
        FILE *fi = params->fi;
-       double *mw = params->mw;
+       const double *mw = params->mw;
 
-       cvec = new int[cvlen];
-       theta = new double[cvlen];
-       gammas = new gamma_dist*[cvlen];
-       rgs = new gamma_generator*[cvlen];
-
-       float **vecs = new float*[nSpC];
-       for (int i = 0; i < nSpC; i++) vecs[i] = new float[cvlen];
+       cvec = new int[M + 1];
+       theta = new double[M + 1];
+       gammas = new gamma_dist*[M + 1];
+       rgs = new gamma_generator*[M + 1];
+       tpm = new float[M + 1];
+       float l_bar; // the mean transcript length over the sample
 
        int cnt = 0;
        while (fscanf(fi, "%d", &cvec[0]) == 1) {
-               for (int j = 1; j < cvlen; j++) assert(fscanf(fi, "%d", &cvec[j]) == 1);
+               for (int j = 1; j <= M; j++) assert(fscanf(fi, "%d", &cvec[j]) == 1);
 
                ++cnt;
 
-               for (int j = 0; j < cvlen; j++) {
+               for (int j = 0; j <= M; j++) {
                        gammas[j] = new gamma_dist(cvec[j]);
                        rgs[j] = new gamma_generator(*(params->engine), *gammas[j]);
                }
 
                for (int i = 0; i < nSpC; i++) {
                        double sum = 0.0;
-                       for (int j = 0; j < cvlen; j++) {
-                               theta[j] = ((j == 0 || eel[j] >= EPSILON) ? (*rgs[j])() : 0.0);
+                       for (int j = 0; j <= M; j++) {
+                               theta[j] = ((j == 0 || eel[j] >= EPSILON && mw[j] >= EPSILON) ? (*rgs[j])() / mw[j] : 0.0);
                                sum += theta[j];
                        }
                        assert(sum >= EPSILON);
-                       for (int j = 0; j < cvlen; j++) theta[j] /= sum;
+                       for (int j = 0; j <= M; j++) theta[j] /= sum;
 
                        sum = 0.0;
-                       for (int j = 0; j < cvlen; j++) {
-                               theta[j] = (mw[j] < EPSILON ? 0.0 : theta[j] / mw[j]);
-                               sum += theta[j];
-                       }
-                       assert(sum >= EPSILON);
-                       for (int j = 0; j < cvlen; j++) theta[j] /= sum;
-
-
-                       sum = 0.0;
-                       vecs[i][0] = theta[0];
-                       for (int j = 1; j < cvlen; j++)
+                       tpm[0] = 0.0;
+                       for (int j = 1; j <= M; j++)
                                if (eel[j] >= EPSILON) {
-                                       vecs[i][j] = theta[j] / eel[j];
-                                       sum += vecs[i][j];
+                                       tpm[j] = theta[j] / eel[j];
+                                       sum += tpm[j];
                                }
                                else assert(theta[j] < EPSILON);
-
                        assert(sum >= EPSILON);
-                       for (int j = 1; j < cvlen; j++) vecs[i][j] /= sum;
+                       l_bar = 0.0; // store mean effective length of the sample
+                       for (int j = 1; j <= M; j++) { tpm[j] /= sum; l_bar += tpm[j] * eel[j]; tpm[j] *= 1e6; }
+                       buffer->write(l_bar, tpm + 1); // ommit the first element in tpm
                }
 
-               buffer->write(nSpC, vecs);
-
-               for (int j = 0; j < cvlen; j++) {
+               for (int j = 0; j <= M; j++) {
                        delete gammas[j];
                        delete rgs[j];
                }
@@ -179,9 +170,7 @@ void* sample_theta_from_c(void* arg) {
        delete[] theta;
        delete[] gammas;
        delete[] rgs;
-
-       for (int i = 0; i < nSpC; i++) delete[] vecs[i];
-       delete[] vecs;
+       delete[] tpm;
 
        return NULL;
 }
@@ -192,10 +181,9 @@ void sample_theta_vectors_from_count_vectors() {
        model.read(modelF);
        calcExpectedEffectiveLengths<ModelType>(model);
 
-
        int num_threads = min(nThreads, nCV);
 
-       buffer = new Buffer(nMB, nSamples, cvlen, tmpF);
+       buffer = new Buffer(nMB, nSamples, M, l_bars, tmpF);
 
        paramsArray = new Params[num_threads];
        threads = new pthread_t[num_threads];
@@ -281,29 +269,44 @@ void calcCI(int nSamples, float *samples, float &lb, float &ub) {
 }
 
 void* calcCI_batch(void* arg) {
-       float *itsamples, *gtsamples;
+       float *itsamples, *gtsamples, *ifsamples, *gfsamples;
        ifstream fin;
        CIParams *ciParams = (CIParams*)arg;
 
        itsamples = new float[nSamples];
        gtsamples = new float[nSamples];
+       ifsamples = new float[nSamples];
+       gfsamples = new float[nSamples];
 
        fin.open(tmpF, ios::binary);
-       streampos pos = streampos(gi.spAt(ciParams->start_gene_id)) * nSamples * FLOATSIZE;
+       // minus 1 here for that theta0 is not written!
+       streampos pos = streampos(gi.spAt(ciParams->start_gene_id) - 1) * nSamples * FLOATSIZE;
        fin.seekg(pos, ios::beg);
 
        int cnt = 0;
        for (int i = ciParams->start_gene_id; i < ciParams->end_gene_id; i++) {
                int b = gi.spAt(i), e = gi.spAt(i + 1);
                memset(gtsamples, 0, FLOATSIZE * nSamples);
+               memset(gfsamples, 0, FLOATSIZE * nSamples);
                for (int j = b; j < e; j++) {
                        for (int k = 0; k < nSamples; k++) {
                                fin.read((char*)(&itsamples[k]), FLOATSIZE);
                                gtsamples[k] += itsamples[k];
+                               ifsamples[k] = 1e3 / l_bars[k] * itsamples[k];
+                               gfsamples[k] += ifsamples[k];
                        }
-                       calcCI(nSamples, itsamples, iso_tau[j].lb, iso_tau[j].ub);
+                       calcCI(nSamples, itsamples, iso_tpm[j].lb, iso_tpm[j].ub);
+                       calcCI(nSamples, ifsamples, iso_fpkm[j].lb, iso_fpkm[j].ub);
+               }
+
+               if (e - b > 1) {
+                       calcCI(nSamples, gtsamples, gene_tpm[i].lb, gene_tpm[i].ub);
+                       calcCI(nSamples, gfsamples, gene_fpkm[i].lb, gene_fpkm[i].ub);
+               }
+               else {
+                       gene_tpm[i].lb = iso_tpm[b].lb; gene_tpm[i].ub = iso_tpm[b].ub;
+                       gene_fpkm[i].lb = iso_fpkm[b].lb; gene_fpkm[i].ub = iso_fpkm[b].ub;
                }
-               calcCI(nSamples, gtsamples, gene_tau[i].lb, gene_tau[i].ub);
 
                ++cnt;
                if (verbose && cnt % 1000 == 0) { printf("In thread %d, %d genes are processed for CI calculation!\n", ciParams->no, cnt); }
@@ -322,8 +325,10 @@ void calculate_credibility_intervals(char* imdName) {
        char outF[STRLEN];
        int num_threads = nThreads;
 
-       iso_tau = new CIType[M + 1];
-       gene_tau = new CIType[m];
+       iso_tpm = new CIType[M + 1];
+       gene_tpm = new CIType[m];
+       iso_fpkm = new CIType[M + 1];
+       gene_fpkm = new CIType[m];
 
        assert(M > 0);
        int quotient = M / num_threads;
@@ -374,22 +379,32 @@ void calculate_credibility_intervals(char* imdName) {
        sprintf(outF, "%s.iso_res", imdName);
        fo = fopen(outF, "a");
        for (int i = 1; i <= M; i++)
-               fprintf(fo, "%.6g%c", iso_tau[i].lb, (i < M ? '\t' : '\n'));
+               fprintf(fo, "%.6g%c", iso_tpm[i].lb, (i < M ? '\t' : '\n'));
+       for (int i = 1; i <= M; i++)
+               fprintf(fo, "%.6g%c", iso_tpm[i].ub, (i < M ? '\t' : '\n'));
+       for (int i = 1; i <= M; i++)
+               fprintf(fo, "%.6g%c", iso_fpkm[i].lb, (i < M ? '\t' : '\n'));
        for (int i = 1; i <= M; i++)
-               fprintf(fo, "%.6g%c", iso_tau[i].ub, (i < M ? '\t' : '\n'));
+               fprintf(fo, "%.6g%c", iso_fpkm[i].ub, (i < M ? '\t' : '\n'));
        fclose(fo);
 
        //gene level results
        sprintf(outF, "%s.gene_res", imdName);
        fo = fopen(outF, "a");
        for (int i = 0; i < m; i++)
-               fprintf(fo, "%.6g%c", gene_tau[i].lb, (i < m - 1 ? '\t' : '\n'));
+               fprintf(fo, "%.6g%c", gene_tpm[i].lb, (i < m - 1 ? '\t' : '\n'));
+       for (int i = 0; i < m; i++)
+               fprintf(fo, "%.6g%c", gene_tpm[i].ub, (i < m - 1 ? '\t' : '\n'));
+       for (int i = 0; i < m; i++)
+               fprintf(fo, "%.6g%c", gene_fpkm[i].lb, (i < m - 1 ? '\t' : '\n'));
        for (int i = 0; i < m; i++)
-               fprintf(fo, "%.6g%c", gene_tau[i].ub, (i < m - 1 ? '\t' : '\n'));
+               fprintf(fo, "%.6g%c", gene_fpkm[i].ub, (i < m - 1 ? '\t' : '\n'));
        fclose(fo);
 
-       delete[] iso_tau;
-       delete[] gene_tau;
+       delete[] iso_tpm;
+       delete[] gene_tpm;
+       delete[] iso_fpkm;
+       delete[] gene_fpkm;
 
        if (verbose) { printf("All credibility intervals are calculated!\n"); }
 }
@@ -424,8 +439,8 @@ int main(int argc, char* argv[]) {
        m = gi.getm();
 
        nSamples = nCV * nSpC;
-       cvlen = M + 1;
-       assert(nSamples > 0 && cvlen > 1); // for Buffter.h: (bufsize_type)nSamples
+       assert(nSamples > 0 && M > 0); // for Buffter.h: (bufsize_type)nSamples
+       l_bars = new float[nSamples];
 
        sprintf(tmpF, "%s.tmp", imdName);
        sprintf(cvsF, "%s.countvectors", imdName);
@@ -447,6 +462,7 @@ int main(int argc, char* argv[]) {
        // Phase II
        calculate_credibility_intervals(imdName);
 
+       delete l_bars;
        /*
        sprintf(command, "rm -f %s", tmpF);
        int status = system(command);
diff --git a/calcClusteringInfo.cpp b/calcClusteringInfo.cpp
deleted file mode 100644 (file)
index 2103f61..0000000
+++ /dev/null
@@ -1,149 +0,0 @@
-#include<cstdio>
-#include<cctype>
-#include<cstring>
-#include<cstdlib>
-#include<cassert>
-#include<fstream>
-#include<iomanip>
-#include<string>
-#include<vector>
-#include<algorithm>
-using namespace std;
-
-typedef unsigned int INTEGER;
-
-const int STRLEN = 1005;
-
-INTEGER M;
-int k; // k-mer size
-vector<string> names;
-vector<string> seqs;
-vector<INTEGER> effL;
-
-// tid starts from 1
-struct ReadType {
-  INTEGER tid, pos;
-
-  ReadType(INTEGER tid, INTEGER pos) {
-    this->tid = tid;
-    this->pos = pos;
-  }
-
-  bool operator< (const ReadType& o) const {
-    string& a = seqs[tid];
-    string& b = seqs[o.tid];
-    for (int i = 0; i < k; i++) {
-      if (a[pos + i] != b[o.pos + i]) {
-       return a[pos + i] < b[o.pos + i];
-      }
-    }
-    return tid < o.tid;
-  }
-
-  bool seq_equal(const ReadType& o) const {
-    string& a = seqs[tid];
-    string& b = seqs[o.tid];
-    for (int i = 0; i < k; i++) 
-      if (a[pos + i] != b[o.pos + i]) return false;
-    return true;
-  }
-};
-
-vector<ReadType> cands;
-vector<double> clusteringInfo; 
-
-string convert(const string& rawseq) {
-  int size = (int)rawseq.size();
-  string seq = rawseq;
-  for (int i = 0; i < size; i++) {
-    seq[i] = toupper(rawseq[i]);
-    if (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G' && seq[i] != 'T') seq[i] = 'N';
-  }
-  return seq;
-}
-
-void loadRef(char* inpF) {
-  ifstream fin(inpF);
-  string tag, line, rawseq;
-  void *pt;
-
-  assert(fin.is_open());
-
-  names.clear(); names.push_back("");
-  seqs.clear(); seqs.push_back("");
-  
-  pt = getline(fin, line);
-  while (pt != 0 && line[0] == '>') {
-    tag = line.substr(1);
-    rawseq = "";
-    while((pt = getline(fin, line)) && line[0] != '>') {
-      rawseq += line;
-    }
-    if (rawseq.size() <= 0) {
-      printf("Warning: Fasta entry %s has an empty sequence! It is omitted!\n", tag.c_str());
-      continue;
-    }
-    names.push_back(tag);
-    seqs.push_back(convert(rawseq));
-  }
-
-  fin.close();
-
-  M = names.size() - 1;
-
-  printf("The reference is loaded.\n");
-}
-
-int main(int argc, char* argv[]) {
-  if (argc != 4) {
-    printf("Usage: rsem-for-ebseq-calculate-clustering-info k input_reference_fasta_file output_file\n");
-    exit(-1);
-  }
-
-  k = atoi(argv[1]);
-  loadRef(argv[2]);
-
-  cands.clear();
-  effL.assign(M + 1, 0);
-  for (INTEGER i = 1; i <= M; i++) {
-    effL[i] = seqs[i].length() - k + 1;
-    if (effL[i] <= 0) effL[i] = 0; // effL should be non-negative
-    for (INTEGER j = 0; j < effL[i]; j++) 
-      cands.push_back(ReadType(i, j));
-  }
-  printf("All possbile %d mers are generated.\n", k);
-
-  sort(cands.begin(), cands.end());
-  printf("All %d mers are sorted.\n", k);
-  size_t p = 0;
-  clusteringInfo.assign(M + 1, 0.0);
-
-  for (size_t i = 1; i <= cands.size(); i++)
-    if (i == cands.size() || !cands[p].seq_equal(cands[i])) {
-      size_t denominator = i - p;
-      size_t q = p; 
-      for (size_t j = p + 1; j <= i; j++)
-       if (j == i || cands[q].tid != cands[j].tid) {
-         size_t numerator = j - q;
-         //double prob = numerator * 1.0 / denominator;
-         //clusteringInfo[cands[q].tid] += (double)numerator * prob * (1.0 - prob);
-         if (numerator < denominator) clusteringInfo[cands[q].tid] += numerator;
-         q = j;
-       }
-      p = i;
-    }
-
-  for (INTEGER i = 1; i <= M; i++) 
-    if (effL[i] == 0) clusteringInfo[i] = -1.0;
-    else clusteringInfo[i] /= effL[i];
-
-  printf("Clustering information is calculated.\n");
-
-
-  ofstream fout(argv[3]);
-  for (INTEGER i = 1; i <= M; i++) fout<<names[i]<<"\t"<<setprecision(6)<<clusteringInfo[i]<<endl;
-  fout.close();
-
-  return 0;
-}
index e1a47f25bd7f1c033550ac205393212ddb896921..8d969e781d0d6949852b499d76b481481581a0a5 100644 (file)
--- a/makefile
+++ b/makefile
@@ -1,8 +1,7 @@
 CC = g++
 CFLAGS = -Wall -c -I.
 COFLAGS = -Wall -O3 -ffast-math -c -I.
-PROGRAMS = rsem-extract-reference-transcripts rsem-synthesis-reference-transcripts rsem-preref rsem-parse-alignments rsem-build-read-index rsem-run-em rsem-tbam2gbam rsem-run-gibbs rsem-calculate-credibility-intervals rsem-simulate-reads rsem-bam2wig rsem-get-unique rsem-bam2readdepth rsem-sam-validator rsem-scan-for-paired-end-reads rsem-for-ebseq-calculate-clustering-info
-
+PROGRAMS = rsem-extract-reference-transcripts rsem-synthesis-reference-transcripts rsem-preref rsem-parse-alignments rsem-build-read-index rsem-run-em rsem-tbam2gbam rsem-run-gibbs rsem-calculate-credibility-intervals rsem-simulate-reads rsem-bam2wig rsem-get-unique rsem-bam2readdepth rsem-sam-validator rsem-scan-for-paired-end-reads
 
 all : $(PROGRAMS)
 
@@ -136,10 +135,7 @@ rsem-sam-validator : sam/bam.h sam/sam.h my_assert.h samValidator.cpp sam/libbam
 rsem-scan-for-paired-end-reads : sam/bam.h sam/sam.h my_assert.h scanForPairedEndReads.cpp sam/libbam.a
        $(CC) -O3 -Wall scanForPairedEndReads.cpp sam/libbam.a -lz -o $@
 
-rsem-for-ebseq-calculate-clustering-info : calcClusteringInfo.cpp
-       $(CC) -O3 -Wall calcClusteringInfo.cpp -o $@
-
-clean:
+clean :
        rm -f *.o *~ $(PROGRAMS)
        cd sam ; ${MAKE} clean
-
+       cd EBSeq ; ${MAKE} clean
index 684e33c2c3eb713536e3f1db6a82d44acda0bbe2..955a7f5c540ee71ff4b4e94f1238c62524397b5e 100755 (executable)
@@ -19,6 +19,10 @@ my $status = 0;
 
 my $read_type = 1; # default, single end with qual
 
+my @transcript_title = ("transcript_id", "gene_id", "length", "effective_length", "expected_count", "TPM", "FPKM", "IsoPct", "pme_expected_count", "pme_TPM", "pme_FPKM", "IsoPct_from_pme_TPM", "TPM_ci_lower_bound", "TPM_ci_upper_bound", "FPKM_ci_lower_bound", "FPKM_ci_upper_bound");
+
+my @gene_title = ("gene_id", "transcript_id(s)", "length", "effective_length", "expected_count", "TPM", "FPKM", "pme_expected_count", "pme_TPM", "pme_FPKM", "TPM_ci_lower_bound", "TPM_ci_upper_bound", "FPKM_ci_lower_bound", "FPKM_ci_upper_bound");
+
 my $bowtie_path = "";
 my $C = 2;
 my $E = 99999999;
@@ -273,8 +277,8 @@ if ($quiet) { $command .= " -q"; }
 
 &runCommand($command);
 
-&collectResults("$imdName.iso_res", "$sampleName.isoforms.results"); # isoform level
-&collectResults("$imdName.gene_res", "$sampleName.genes.results"); # gene level
+&collectResults("isoform", "$imdName.iso_res", "$sampleName.isoforms.results"); # isoform level
+&collectResults("gene", "$imdName.gene_res", "$sampleName.genes.results"); # gene level
 
 if ($genBamF) {
     $command = $dir."sam/samtools sort $sampleName.transcript.bam $sampleName.transcript.sorted";
@@ -307,8 +311,8 @@ if ($calcCI || $var_opt) {
 if ($calcCI) {
     system("mv $sampleName.isoforms.results $imdName.isoforms.results.bak1");
     system("mv $sampleName.genes.results $imdName.genes.results.bak1");
-    &collectResults("$imdName.iso_res", "$sampleName.isoforms.results"); # isoform level
-    &collectResults("$imdName.gene_res", "$sampleName.genes.results"); # gene level
+    &collectResults("isoform", "$imdName.iso_res", "$sampleName.isoforms.results"); # isoform level
+    &collectResults("gene", "$imdName.gene_res", "$sampleName.genes.results"); # gene level
 
     $command = $dir."rsem-calculate-credibility-intervals $refName $imdName $statName $CONFIDENCE $NCV $NSPC $NMB";
     $command .= " -p $nThreads";
@@ -317,8 +321,8 @@ if ($calcCI) {
 
     system("mv $sampleName.isoforms.results $imdName.isoforms.results.bak2");
     system("mv $sampleName.genes.results $imdName.genes.results.bak2");
-    &collectResults("$imdName.iso_res", "$sampleName.isoforms.results"); # isoform level
-    &collectResults("$imdName.gene_res", "$sampleName.genes.results"); # gene level
+    &collectResults("isoform", "$imdName.iso_res", "$sampleName.isoforms.results"); # isoform level
+    &collectResults("gene", "$imdName.gene_res", "$sampleName.genes.results"); # gene level
 }
 
 if ($mTime) { $time_end = time(); $time_ci = $time_end - $time_start; }
@@ -354,33 +358,28 @@ sub runCommand {
     }
     print "\n";
 }
-
 # inpF, outF
 sub collectResults {
     my $local_status;
     my ($inpF, $outF);
-    my (@results, @ids) = ();
+    my @results = ();
     my $line;
-    my $cnt;
 
-    $inpF = $_[0];
-    $outF = $_[1];
+    $inpF = $_[1];
+    $outF = $_[2];
 
     $local_status = open(INPUT, $inpF);
     if ($local_status == 0) { print "Fail to open file $inpF!\n"; exit(-1); }
     
-    $cnt = 0;
     @results = ();
     
     while ($line = <INPUT>) {
-       ++$cnt;
        chomp($line);
        my @local_arr = split(/\t/, $line);
-       if ($cnt == 4) { @ids = @local_arr; }
-       else { push(@results, \@local_arr); }
+       push(@results, \@local_arr); 
     }
-    
-    push(@results, \@ids);
+
     close(INPUT);
 
     $local_status = open(OUTPUT, ">$outF");
@@ -388,16 +387,26 @@ sub collectResults {
 
     my $n = scalar(@results);
     my $m = scalar(@{$results[0]});
+
+    $" = "\t";
+
+    my @out_arr = ();
+    for (my $i = 0; $i < $n; $i++) {
+       if ($_[0] eq "isoform") { push(@out_arr, $transcript_title[$i]); }
+       elsif ($_[0] eq "gene") { push(@out_arr, $gene_title[$i]); }
+       else { print "A bug on 'collectResults' is detected!\n"; exit(-1); }
+    }
+    print OUTPUT "@out_arr\n";
+
     for (my $i = 0; $i < $m; $i++) {
-       my @out_arr = ();
+       @out_arr = ();
        for (my $j = 0; $j < $n; $j++) { push(@out_arr, $results[$j][$i]); }
-       $" = "\t";
        print OUTPUT "@out_arr\n"; 
     }
+
     close(OUTPUT);
 }
 
-
 __END__
 
 =head1 NAME
@@ -608,33 +617,85 @@ With the '--calc-ci' option, 95% credibility intervals and posterior mean estima
 
 =over
 
-=item B<sample_name.genes.results> 
+=item B<sample_name.isoforms.results> 
 
-File containing gene level expression estimates. The format of each
-line in this file is:
+File containing isoform level expression estimates. The first line
+contains column names separated by the tab character. The format of
+each line in the rest of this file is:
 
-gene_id expected_counts tau_value [pmc_value tau_pme_value tau_ci_lower_bound tau_ci_upper_bound] transcript_id_list
+transcript_id gene_id length effective_length expected_count TPM FPKM IsoPct [pme_expected_count pme_TPM pme_FPKM IsoPct_from_pme_TPM TPM_ci_lower_bound TPM_ci_upper_bound FPKM_ci_lower_bound FPKM_ci_upper_bound]
 
 Fields are separated by the tab character. Fields within "[]" are only
-presented if '--calc-ci' is set. pme stands for posterior mean
-estimation. pmc stands for posterior mean counts. ci_lower_bound(l)
-means the lower bound of the credibility intervals, ci_upper_bound(u)
-means the upper bound of the credibility intervals. So the credibility
-interval is [l, u]. 'transcript_id_list' is a space-separated list of
-transcript_ids belonging to the gene. If no gene information is
-provided, this file has the same content as
-'sample_name.isoforms.results'.
+presented if '--calc-ci' is set.
 
-=item B<sample_name.isoforms.results> 
+'transcript_id' is the transcript name of this transcript. 'gene_id'
+is the gene name of the gene which this transcript belongs to (denote
+this gene as its parent gene). If no gene information is provided,
+'gene_id' and 'transcript_id' are the same.
+
+'length' is this transcript's sequence length (poly(A) tail is not
+counted). 'effective_length' counts only the positions that can
+generate a valid fragment. If no poly(A) tail is added,
+'effective_length' is equal to transcript length - mean fragment
+length + 1. If one transcript's effective length is less than 1, this
+transcript's both effective length and abundance estimates are set to
+0.
 
-File containing isoform level expression values. The format of each
-line in this file is:
+'expected_count' is the sum of the posterior probability of each read
+comes from this transcript over all reads. Because 1) each read
+aligning to this transcript has a probability of being generated from
+background noise; 2) RSEM may filter some alignable low quality reads,
+the sum of expected counts for all transcript are generally less than
+the total number of reads aligned.
+
+'TPM' stands for Transcripts Per Million. It is a relative measure of
+transcript abundance. The sum of all transcripts' TPM is 1
+million. 'FPKM' stands for Fragments Per Kilobase of transcript per
+Million mapped reads. It is another relative measure of transcript
+abundance. If we define l_bar be the mean transcript length in a
+sample, which can be calculated as
+
+l_bar = \sum_i TPM_i / 10^6 * effective_length_i (i goes through every transcript), 
+
+the following equation is hold:
+
+FPKM_i = 10^3 / l_bar * TPM_i.
+
+We can see that the sum of FPKM is not a constant across samples.
+
+'IsoPct' stands for isoform percentage. It is the percentage of this
+transcript's abandunce over its parent gene's abandunce. If its parent
+gene has only one isoform or the gene information is not provided,
+this field will be set to 100.
+
+'pme_expected_count', 'pme_TPM', 'pme_FPKM' are posterior mean
+estimates calculated by RSEM's Gibbs sampler. 'IsoPct_from_pme_TPM' is
+the isoform percentage calculated from 'pme_TPM' values.
+
+'TPM_ci_lower_bound', 'TPM_ci_upper_bound', 'FPKM_ci_lower_bound' and
+'FPKM_ci_upper_bound' are lower(l) and upper(u) bounds of 95%
+credibility intervals for TPM and FPKM values. The bounds are
+inclusive (i.e. [l, u]).
+
+=item B<sample_name.genes.results>
+
+File containing gene level expression estimates. The first line
+contains column names separated by the tab character. The format of
+each line in the rest of this file is:
+
+gene_id transcript_id(s) length effective_length expected_count TPM FPKM [pme_expected_count pme_TPM pme_FPKM TPM_ci_lower_bound TPM_ci_upper_bound FPKM_ci_lower_bound FPKM_ci_upper_bound]
+
+Fields are separated by the tab character. Fields within "[]" are only
+presented if '--calc-ci' is set. 
 
-transcript_id expected_counts tau_value [pmc_value tau_pme_value tau_ci_lower_bound tau_ci_upper_bound] gene_id
+'transcript_id(s)' is a comma-separated list of transcript_ids
+belonging to this gene. If no gene information is provided, 'gene_id'
+and 'transcript_id(s)' are identical (the 'transcript_id').
 
-Fields are separated by the tab character. 'gene_id' is the gene_id of
-the gene which this transcript belongs to. If no gene information is
-provided, 'gene_id' and 'transcript_id' are the same.
+A gene's 'length' and 'effective_length' are
+defined as the weighted average of its transcripts' lengths and
+effective lengths (weighted by 'IsoPct'). A gene's abundance estimates
+are just the sum of its transcripts' abundance estimates.
 
 =item B<sample_name.transcript.bam, sample_name.transcript.sorted.bam and sample_name.transcript.sorted.bam.bai>
 
diff --git a/rsem-for-ebseq-generate-ngvector-from-clustering-info b/rsem-for-ebseq-generate-ngvector-from-clustering-info
deleted file mode 100755 (executable)
index 312dbfa..0000000
+++ /dev/null
@@ -1,18 +0,0 @@
-#!/usr/bin/env Rscript
-
-argv <- commandArgs(TRUE)
-if (length(argv) != 2) {
-  cat("Usage: rsem-for-ebseq-generate-ngvector-from-clustering-info input_file output_file\n")
-  q(status = 1)
-}
-
-data <- read.table(file = argv[1], stringsAsFactors = F)
-idx <- data[,2] >= 0
-kmr <- kmeans(data[idx, 2], 3)
-order <- order(kmr$centers)
-
-ngvec <- rep(0, length(idx))
-ngvec[idx] <- order[kmr$cluster]
-ngvec[!idx] <- 3
-
-write.table(ngvec, file = argv[2], row.names = F, col.names = F)
index b5324a525b3045c5898d22d3e1fc66291a7f8597..d275d6e3e07bf94bea346028a4654b98bb9ae0ea 100755 (executable)
@@ -8,6 +8,8 @@ if (scalar(@ARGV) == 0) {
     exit(-1);
 }
 
+my $offsite = 4; # for new file formats
+
 my $line;
 my $n = scalar(@ARGV);
 my $M = -1;
@@ -19,7 +21,7 @@ for (my $i = 0; $i < $n; $i++) {
     while ($line = <INPUT>) {
        chomp($line); 
        my @fields = split(/\t/, $line);
-       push(@sample, $fields[1]);
+       push(@sample, $fields[$offsite]);
     }
     close(INPUT);
     if (scalar(@sample) == 0) {
index cd184b69ecfe5e1ccc69735add06d2ffaa74c7f4..99498c087d6240702abfcd40686373fdb1c92810 100755 (executable)
@@ -17,10 +17,10 @@ pod2usage(-msg => "Invalid number of arguments!", -exitval => 2, -verbose => 2)
 my ($fn, $dir, $suf) = fileparse($0);
 my $command = "";
 
-$command = $dir."rsem-for-ebseq-calculate-clustering-info $k $ARGV[0] $ARGV[1].ump";
+$command = $dir."EBSeq/rsem-for-ebseq-calculate-clustering-info $k $ARGV[0] $ARGV[1].ump";
 &runCommand($command);
 
-$command = $dir."rsem-for-ebseq-generate-ngvector-from-clustering-info $ARGV[1].ump $ARGV[1].ngvec";
+$command = $dir."EBSeq/rsem-for-ebseq-generate-ngvector-from-clustering-info $ARGV[1].ump $ARGV[1].ngvec";
 &runCommand($command);
 
 # command, {err_msg}
index 97cee6ee610fc1f6f37202b1e5c9715e95cec2e1..838a88fcccae5bf0887c504ac5fef770da8458a7 100644 (file)
@@ -9,7 +9,7 @@
 #include<vector>
 
 #include "utils.h"
-
+#include "my_assert.h"
 #include "Read.h"
 #include "SingleRead.h"
 #include "SingleReadQ.h"
@@ -31,6 +31,8 @@
 
 using namespace std;
 
+const int OFFSITE = 5;
+
 READ_INT_TYPE N;
 int model_type, M, m;
 
@@ -38,8 +40,8 @@ Refs refs;
 GroupInfo gi;
 Transcripts transcripts;
 
-double *theta, *counts;
 vector<double> eel;
+vector<double> theta, counts;
 
 int n_os;
 ostream *os[2];
@@ -78,34 +80,33 @@ void genOutReadStreams(int type, char *outFN) {
 
 template<class ModelType>
 void calcExpectedEffectiveLengths(ModelType& model) {
-  int lb, ub, span;
-  double *pdf = NULL, *cdf = NULL, *clen = NULL; // clen[i] = sigma_{j=1}^{i}pdf[i]*(lb+i)
+       int lb, ub, span;
+       double *pdf = NULL, *cdf = NULL, *clen = NULL; // clen[i] = sigma_{j=1}^{i}pdf[i]*(lb+i)
   
-  model.getGLD().copyTo(pdf, cdf, lb, ub, span);
-  clen = new double[span + 1];
-  clen[0] = 0.0;
-  for (int i = 1; i <= span; i++) {
-    clen[i] = clen[i - 1] + pdf[i] * (lb + i);
-  }
-
-  eel.clear();
-  eel.resize(M + 1, 0.0);
-  for (int i = 1; i <= M; i++) {
-    int totLen = refs.getRef(i).getTotLen();
-    int fullLen = refs.getRef(i).getFullLen();
-    int pos1 = max(min(totLen - fullLen + 1, ub) - lb, 0);
-    int pos2 = max(min(totLen, ub) - lb, 0);
-
-    if (pos2 == 0) { eel[i] = 0.0; continue; }
+       model.getGLD().copyTo(pdf, cdf, lb, ub, span);
+       clen = new double[span + 1];
+       clen[0] = 0.0;
+       for (int i = 1; i <= span; i++) {
+               clen[i] = clen[i - 1] + pdf[i] * (lb + i);
+       }
+
+       eel.assign(M + 1, 0.0);
+       for (int i = 1; i <= M; i++) {
+               int totLen = refs.getRef(i).getTotLen();
+               int fullLen = refs.getRef(i).getFullLen();
+               int pos1 = max(min(totLen - fullLen + 1, ub) - lb, 0);
+               int pos2 = max(min(totLen, ub) - lb, 0);
+
+               if (pos2 == 0) { eel[i] = 0.0; continue; }
     
-    eel[i] = fullLen * cdf[pos1] + ((cdf[pos2] - cdf[pos1]) * (totLen + 1) - (clen[pos2] - clen[pos1]));
-    assert(eel[i] >= 0);
-    if (eel[i] < MINEEL) { eel[i] = 0.0; }
-  }
+               eel[i] = fullLen * cdf[pos1] + ((cdf[pos2] - cdf[pos1]) * (totLen + 1) - (clen[pos2] - clen[pos1]));
+               assert(eel[i] >= 0);
+               if (eel[i] < MINEEL) { eel[i] = 0.0; }
+       }
   
-  delete[] pdf;
-  delete[] cdf;
-  delete[] clen;
+       delete[] pdf;
+       delete[] cdf;
+       delete[] clen;
 }
 
 template<class ReadType, class ModelType>
@@ -122,16 +123,17 @@ void simulate(char* modelF, char* resultsF) {
        //generate theta vector
        ifstream fin(resultsF);
        string line;
-       double tau;
+       double tpm;
        double denom = 0.0;
+       getline(fin, line); // read the first line, which is just column names
        for (int i = 1; i <= M; i++) {
          getline(fin, line);
          size_t pos = 0;
-         for (int j = 0; j < 2; j++) pos = line.find_first_of('\t', pos) + 1;
+         for (int j = 0; j < OFFSITE; j++) pos = line.find_first_of('\t', pos) + 1;
          size_t pos2 = line.find_first_of('\t', pos);
          if (pos2 == string::npos) pos2 = line.length();
-         tau = atof(line.substr(pos, pos2 - pos).c_str());
-         theta[i] = tau * eel[i];
+         tpm = atof(line.substr(pos, pos2 - pos).c_str());
+         theta[i] = tpm * eel[i];
          denom += theta[i];
        }
        assert(denom > EPSILON);
@@ -153,56 +155,97 @@ void simulate(char* modelF, char* resultsF) {
        cout<< "Total number of resimulation is "<< resimulation_count<< endl;
 }
 
-void writeResFiles(char* outFN) {
-       FILE *fo;
+void calcExpressionValues(const vector<double>& theta, const vector<double>& eel, vector<double>& tpm, vector<double>& fpkm) {
        double denom;
+       vector<double> frac;
 
-       //calculate tau values
-       double *tau = new double[M + 1];
-       memset(tau, 0, sizeof(double) * (M + 1));
+       //calculate fraction of count over all mappabile reads
        denom = 0.0;
-       for (int i = 1; i <= M; i++) 
-               if (eel[i] > EPSILON) {
-                       tau[i] = counts[i] / eel[i];
-                       denom += tau[i];
-               }
-               else {
-                   if (counts[i] > EPSILON) { printf("Warning: An isoform which EEL is less than %.6g gets sampled!\n", MINEEL); }
+       frac.assign(M + 1, 0.0);
+       for (int i = 1; i <= M; i++)
+         if (eel[i] >= EPSILON) {
+           frac[i] = theta[i];
+           denom += frac[i];
+         }
+       general_assert(denom > 0, "No alignable reads?!");
+       for (int i = 1; i <= M; i++) frac[i] /= denom;
+
+       //calculate FPKM
+       fpkm.assign(M + 1, 0.0);
+       for (int i = 1; i <= M; i++)
+               if (eel[i] >= EPSILON) fpkm[i] = frac[i] * 1e9 / eel[i];
+
+       //calculate TPM
+       tpm.assign(M + 1, 0.0);
+       denom = 0.0;
+       for (int i = 1; i <= M; i++) denom += fpkm[i];
+       for (int i = 1; i <= M; i++) tpm[i] = fpkm[i] / denom * 1e6;
+}
+
+void writeResFiles(char* outFN) {
+       FILE *fo;
+       vector<int> tlens;
+       vector<double> fpkm, tpm, isopct;
+       vector<double> glens, gene_eels, gene_counts, gene_tpm, gene_fpkm;
+
+       for (int i = 1; i <= M; i++)
+               general_assert(eel[i] > EPSILON || counts[i] <= EPSILON, "An isoform whose effecitve length < " + ftos(MINEEL, 6) + " got sampled!");
+
+       calcExpressionValues(counts, eel, tpm, fpkm);
+
+       //calculate IsoPct, etc.
+       isopct.assign(M + 1, 0.0);
+       tlens.assign(M + 1, 0);
+
+       glens.assign(m, 0.0); gene_eels.assign(m, 0.0);
+       gene_counts.assign(m, 0.0); gene_tpm.assign(m, 0.0); gene_fpkm.assign(m, 0.0);
+
+       for (int i = 0; i < m; i++) {
+               int b = gi.spAt(i), e = gi.spAt(i + 1);
+               for (int j = b; j < e; j++) {
+                       const Transcript& transcript = transcripts.getTranscriptAt(j);
+                       tlens[j] = transcript.getLength();
+
+                       glens[i] += tlens[j] * tpm[j];
+                       gene_eels[i] += eel[j] * tpm[j];
+                       gene_counts[i] += counts[j];
+                       gene_tpm[i] += tpm[j];
+                       gene_fpkm[i] += fpkm[j];
                }
-       assert(denom > 0.0);
-       for (int i = 1; i <= M; i++) tau[i] /= denom;
+
+               if (gene_tpm[i] < EPSILON) continue;
+
+               for (int j = b; j < e; j++)
+                       isopct[j] = tpm[j] / gene_tpm[i];
+               glens[i] /= gene_tpm[i];
+               gene_eels[i] /= gene_tpm[i];
+       }
 
        //isoform level
        sprintf(isoResF, "%s.sim.isoforms.results", outFN);
        fo = fopen(isoResF, "w");
+       fprintf(fo, "transcript_id\tgene_id\tlength\teffective_length\tcount\tTPM\tFPKM\tIsoPct\n");
        for (int i = 1; i <= M; i++) {
                const Transcript& transcript = transcripts.getTranscriptAt(i);
-               fprintf(fo, "%s\t%.2f\t%.15g", transcript.getTranscriptID().c_str(), counts[i], tau[i]);
-               
-               if (transcript.getLeft() != "") { fprintf(fo, "\t%s", transcript.getLeft().c_str()); }
-               fprintf(fo, "\n");
+               fprintf(fo, "%s\t%s\t%d\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\n", transcript.getTranscriptID().c_str(), transcript.getGeneID().c_str(), tlens[i],
+                               eel[i], counts[i], tpm[i], fpkm[i], isopct[i] * 1e2);
        }
        fclose(fo);
 
        //gene level
        sprintf(geneResF, "%s.sim.genes.results", outFN);
        fo = fopen(geneResF, "w");
+       fprintf(fo, "gene_id\ttranscript_id(s)\tlength\teffective_length\tcount\tTPM\tFPKM\n");
        for (int i = 0; i < m; i++) {
-         double sum_c = 0.0, sum_tau = 0.0;
                int b = gi.spAt(i), e = gi.spAt(i + 1);
-               for (int j = b; j < e; j++) {
-                       sum_c += counts[j];
-                       sum_tau += tau[j];
-               }
                const string& gene_id = transcripts.getTranscriptAt(b).getGeneID();
-               fprintf(fo, "%s\t%.2f\t%.15g\t", gene_id.c_str(), sum_c, sum_tau);
+               fprintf(fo, "%s\t", gene_id.c_str());
                for (int j = b; j < e; j++) {
-                       fprintf(fo, "%s%c", transcripts.getTranscriptAt(j).getTranscriptID().c_str(), (j < e - 1 ? ',' : '\n'));
+                       fprintf(fo, "%s%c", transcripts.getTranscriptAt(j).getTranscriptID().c_str(), (j < e - 1 ? ',' : '\t'));
                }
+               fprintf(fo, "%.2f\t%.2f\t%.2f\t%.2f\t%.2f\n", glens[i], gene_eels[i], gene_counts[i], gene_tpm[i], gene_fpkm[i]);
        }
        fclose(fo);
-
-       delete[] tau;
 }
 
 void releaseOutReadStreams() {
@@ -240,14 +283,13 @@ int main(int argc, char* argv[]) {
        assert(fscanf(fi, "%d", &model_type) == 1);
        fclose(fi);
 
-       theta = new double[M + 1];
+       theta.assign(M + 1, 0.0);
        theta[0] = atof(argv[4]);
        N = atoi(argv[5]);
 
        genOutReadStreams(model_type, argv[6]);
 
-       counts = new double[M + 1];
-       memset(counts, 0, sizeof(double) * (M + 1));
+       counts.assign(M + 1, 0.0);
 
        switch(model_type) {
        case 0: simulate<SingleRead, SingleModel>(argv[2], argv[3]); break;
@@ -259,8 +301,5 @@ int main(int argc, char* argv[]) {
        writeResFiles(argv[6]);
        releaseOutReadStreams();
 
-       delete[] theta;
-       delete[] counts;
-
        return 0;
 }