3 printUsage <- function() {
4 cat("Usage: rsem-find-DE data_matrix_file [--ngvector ngvector_file] number_sample_condition1 FDR_rate output_file\n\n")
5 cat("This script calls EBSeq to find differentially expressed genes/transcripts in two conditions.\n\n")
6 cat("data_matrix_file: m by n matrix containing expected counts, m is the number of transcripts/genes, n is the number of total samples.\n")
7 cat("[--ngvector ngvector_file]: optional field. 'ngvector_file' is calculated by 'rsem-generate-ngvector'. Having this field is recommended for transcript data.\n")
8 cat("number_sample_condition1: the number of samples in condition 1. A condition's samples must be adjacent. The left group of samples are defined as condition 1.\n")
9 cat("FDR_rate: false discovery rate.\n")
10 cat("output_file: the output file.\n\n")
11 cat("The results are written as a matrix with row and column names. The row names are the differentially expressed transcripts'/genes' ids. The column names are 'PPEE', 'PPDE', 'PostFC' and 'RealFC'.\n\n")
12 cat("PPEE: posterior probability of being equally expressed.\n")
13 cat("PPDE: posterior probability of being differentially expressed.\n")
14 cat("PostFC: posterior fold change (condition 1 over condition2).\n")
15 cat("RealFC: real fold change (condition 1 over condition2).\n")
19 argv <- commandArgs(FALSE)
20 path <- dirname(sub("--file=", "", argv[grep("--file", argv)]))
21 path <- paste(path, "EBSeq", sep = "/")
22 start_pos <- grep("--args", argv) + 1
23 end_pos <- length(argv)
24 if (start_pos <= end_pos) argv <- argv[start_pos : end_pos] else argv <- NULL
26 ng_pos <- grep("--ngvector", argv)
27 if (length(ng_pos) > 1 || length(ng_pos) == 0 && length(argv) != 4 || length(ng_pos) == 1 && length(argv) != 6) printUsage()
29 if (length(ng_pos) == 1) {
30 if (ng_pos == length(argv)) printUsage()
31 ngvector_file <- argv[ng_pos + 1]
32 argv <- argv[-c(ng_pos, ng_pos + 1)]
35 data_matrix_file <- argv[1]
36 num_con1 <- as.numeric(argv[2])
37 fdr <- as.numeric(argv[3])
38 output_file <- argv[4]
40 library(blockmodeling, lib.loc = path)
41 library(EBSeq, lib.loc = path)
43 DataMat <- data.matrix(read.table(data_matrix_file))
45 conditions <- as.factor(rep(c("C1", "C2"), times = c(num_con1, n - num_con1)))
46 Sizes <- MedianNorm(DataMat)
48 if (!is.null(ngvector_file)) {
49 ngvector <- as.vector(data.matrix(read.table(ngvector_file)))
54 if (is.null(ngvector)) {
55 EBOut <- EBTest(Data = DataMat, Conditions = conditions, sizeFactors = Sizes, maxround = 5)
57 EBOut <- EBTest(Data = DataMat, NgVector = ngvector, Conditions = conditions, sizeFactors = Sizes, maxround = 5)
59 stopifnot(!is.null(EBOut))
62 thre <- crit_fun(PP[, "PPEE"], fdr)
63 DEfound <- rownames(PP)[which(PP[, "PPDE"] >= thre)]
65 fc_res <- PostFC(EBOut)
67 results <- cbind(PP[DEfound, ], fc_res$GenePostFC[DEfound], fc_res$GeneRealFC[DEfound])
68 colnames(results) <- c("PPEE", "PPDE", "PostFC", "RealFC")
69 write.table(results, file = output_file)