From: Bo Li <bli@cs.wisc.edu>
Date: Mon, 7 Jan 2013 13:11:20 +0000 (-0600)
Subject: Updated EBSeq to v1.1.5
X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=e3d5553e8d1f798a6a36a348ddba475b0f4f1528;p=rsem.git

Updated EBSeq to v1.1.5
---

diff --git a/EBSeq/EBSeq_1.1.4.tar.gz b/EBSeq/EBSeq_1.1.4.tar.gz
deleted file mode 100644
index 4eb8059..0000000
Binary files a/EBSeq/EBSeq_1.1.4.tar.gz and /dev/null differ
diff --git a/EBSeq/makefile b/EBSeq/makefile
index ea5d272..020a32a 100644
--- a/EBSeq/makefile
+++ b/EBSeq/makefile
@@ -6,8 +6,8 @@ all : $(PROGRAMS)
 blockmodeling : blockmodeling_0.1.8.tar.gz
 	R CMD INSTALL -l "." blockmodeling_0.1.8.tar.gz
 
-EBSeq : blockmodeling EBSeq_1.1.4.tar.gz
-	R CMD INSTALL -l "." EBSeq_1.1.4.tar.gz
+EBSeq : blockmodeling EBSeq_1.1.5.tar.gz
+	R CMD INSTALL -l "." EBSeq_1.1.5.tar.gz
 
 rsem-for-ebseq-calculate-clustering-info : calcClusteringInfo.cpp
 	$(CC) -O3 -Wall calcClusteringInfo.cpp -o $@
diff --git a/README.md b/README.md
index 4b0b3ec..e59fb35 100644
--- a/README.md
+++ b/README.md
@@ -379,18 +379,18 @@ Usage:
 
 This script calls EBSeq to find differentially expressed genes/transcripts in two conditions.
 
-data_matrix_file: m by n matrix containing expected counts, m is the number of transcripts/genes, n is the number of total samples.
-[--ngvector ngvector_file]: optional field. 'ngvector_file' is calculated by 'rsem-generate-ngvector'. Having this field is recommended for transcript data.
-number_sample_condition1: the number of samples in condition 1. A condition's samples must be adjacent. The left group of samples are defined as condition 1.
-FDR_rate: false discovery rate.
-output_file: the output file.
+data_matrix_file: m by n matrix containing expected counts, m is the number of transcripts/genes, n is the number of total samples.   
+[--ngvector ngvector_file]: optional field. 'ngvector_file' is calculated by 'rsem-generate-ngvector'. Having this field is recommended for transcript data.   
+number_sample_condition1: the number of samples in condition 1. A condition's samples must be adjacent. The left group of samples are defined as condition 1.   
+FDR_rate: false discovery rate.   
+output_file: the output file. Three files will be generated: 'output_file', 'output_file.hard_threshold' and 'output_file.all'. The first file reports all DE genes/transcripts using a soft threshold (calculated by crit_func in EBSeq). The second file reports all DE genes/transcripts using a hard threshold (only report if PPEE <= fdr). The third file reports all genes/transcripts. The first file is recommended to be used as DE results because it generally contains more called genes/transcripts.   
 
 The results are written as a matrix with row and column names. The row names are the differentially expressed transcripts'/genes' ids. The column names are 'PPEE', 'PPDE', 'PostFC' and 'RealFC'.
 
-PPEE: posterior probability of being equally expressed.
-PPDE: posterior probability of being differentially expressed.
-PostFC: posterior fold change (condition 1 over condition2).
-RealFC: real fold change (condition 1 over condition2).
+PPEE: posterior probability of being equally expressed.   
+PPDE: posterior probability of being differentially expressed.   
+PostFC: posterior fold change (condition 1 over condition2).   
+RealFC: real fold change (condition 1 over condition2).   
 
 To get the above usage information, type 
 
diff --git a/WHAT_IS_NEW b/WHAT_IS_NEW
index 31025d8..4e1690d 100644
--- a/WHAT_IS_NEW
+++ b/WHAT_IS_NEW
@@ -1,3 +1,10 @@
+RSEM v1.2.2
+
+- Updated EBSeq to v1.1.5
+- Modified 'rsem-find-DE' to generate extra output files (type 'rsem-find-DE' to see more information)
+
+--------------------------------------------------------------------------------------------
+
 RSEM v1.2.1
 
 - Added poly(A) tails to 'reference_name.transcripts.fa' so that the RSEM generated transcript unsorted BAM file can be fed into RSEM as an input file. However, users need to rebuild their references if they want to visualize the transcript level wiggle files and BAM files using IGV
diff --git a/rsem-find-DE b/rsem-find-DE
index e9d25f7..e9d65cf 100755
--- a/rsem-find-DE
+++ b/rsem-find-DE
@@ -7,8 +7,8 @@ printUsage <- function() {
   cat("[--ngvector ngvector_file]: optional field. 'ngvector_file' is calculated by 'rsem-generate-ngvector'. Having this field is recommended for transcript data.\n")
   cat("number_sample_condition1: the number of samples in condition 1. A condition's samples must be adjacent. The left group of samples are defined as condition 1.\n")
   cat("FDR_rate: false discovery rate.\n")
-  cat("output_file: the output file.\n\n")
-  cat("The results are written as a matrix with row and column names. The row names are the differentially expressed transcripts'/genes' ids. The column names are 'PPEE', 'PPDE', 'PostFC' and 'RealFC'.\n\n")
+  cat("output_file: the output file. Three files will be generated: 'output_file', 'output_file.hard_threshold' and 'output_file.all'. The first file reports all DE genes/transcripts using a soft threshold (calculated by crit_func in EBSeq). The second file reports all DE genes/transcripts using a hard threshold (only report if PPEE <= fdr). The third file reports all genes/transcripts. The first file is recommended to be used as DE results because it generally contains more called genes/transcripts.\n\n")
+  cat("The results are written as a matrix with row and column names. The row names are the genes'/transcripts' ids. The column names are 'PPEE', 'PPDE', 'PostFC' and 'RealFC'.\n\n")
   cat("PPEE: posterior probability of being equally expressed.\n")
   cat("PPDE: posterior probability of being differentially expressed.\n")
   cat("PostFC: posterior fold change (condition 1 over condition2).\n")
@@ -59,11 +59,25 @@ if (is.null(ngvector)) {
 stopifnot(!is.null(EBOut))
 
 PP <- GetPPMat(EBOut)
+fc_res <- PostFC(EBOut)
+
+# soft threshold, default output
 thre <- crit_fun(PP[, "PPEE"], fdr)
 DEfound <- rownames(PP)[which(PP[, "PPDE"] >= thre)]
 
-fc_res <- PostFC(EBOut)
-
-results <- cbind(PP[DEfound, ], fc_res$GenePostFC[DEfound], fc_res$GeneRealFC[DEfound])
+results <- cbind(PP[DEfound, ], fc_res$PostFC[DEfound], fc_res$RealFC[DEfound])
 colnames(results) <- c("PPEE", "PPDE", "PostFC", "RealFC")
 write.table(results, file = output_file)
+
+# hard threshold
+thre <- 1.0 - fdr
+DEfound <- rownames(PP)[which(PP[, "PPDE"] >= thre)]
+
+results <- cbind(PP[DEfound, ], fc_res$PostFC[DEfound], fc_res$RealFC[DEfound])
+colnames(results) <- c("PPEE", "PPDE", "PostFC", "RealFC")
+write.table(results, file = paste(output_file, ".hard_threshold", sep = ""))
+
+# all
+results <- cbind(PP, fc_res$PostFC, fc_res$RealFC)
+colnames(results) <- c("PPEE", "PPDE", "PostFC", "RealFC")
+write.table(results, file = paste(output_file, ".all", sep = ""))