#!/usr/bin/env Rscript
printUsage <- function() {
- cat("Usage: rsem-find-DE data_matrix_file [--ngvector ngvector_file] number_sample_condition1 FDR_rate output_file\n\n")
+ cat("Usage: rsem-find-DE data_matrix_file [--ngvector ngvector_file] number_of_samples_in_condition_1 FDR_rate output_file\n\n")
cat("This script calls EBSeq to find differentially expressed genes/transcripts in two conditions.\n\n")
cat("data_matrix_file: m by n matrix containing expected counts, m is the number of transcripts/genes, n is the number of total samples.\n")
cat("[--ngvector ngvector_file]: optional field. 'ngvector_file' is calculated by 'rsem-generate-ngvector'. Having this field is recommended for transcript data.\n")
- cat("number_sample_condition1: the number of samples in condition 1. A condition's samples must be adjacent. The left group of samples are defined as condition 1.\n")
+ cat("number_of_samples_in_condition_1: the number of samples in condition 1. A condition's samples must be adjacent. The left group of samples are defined as condition 1.\n")
cat("FDR_rate: false discovery rate.\n")
cat("output_file: the output file. Three files will be generated: 'output_file', 'output_file.hard_threshold' and 'output_file.all'. The first file reports all DE genes/transcripts using a soft threshold (calculated by crit_func in EBSeq). The second file reports all DE genes/transcripts using a hard threshold (only report if PPEE <= fdr). The third file reports all genes/transcripts. The first file is recommended to be used as DE results because it generally contains more called genes/transcripts.\n\n")
cat("The results are written as a matrix with row and column names. The row names are the genes'/transcripts' ids. The column names are 'PPEE', 'PPDE', 'PostFC' and 'RealFC'.\n\n")