X-Git-Url: https://git.donarmstrong.com/?p=rsem.git;a=blobdiff_plain;f=rsem-plot-model;h=e87226d224a946109c60cc3821984ec9ba1793e8;hp=c0eaa1b0b70faa4175478830b8e5d26fd4eff61c;hb=83ce658a4b9c5f04c081316314b66b94ad5ffbde;hpb=a42d21510e502fe5cdd21a5d7ad023b33d2e6b99 diff --git a/rsem-plot-model b/rsem-plot-model index c0eaa1b..e87226d 100755 --- a/rsem-plot-model +++ b/rsem-plot-model @@ -2,13 +2,26 @@ argv <- commandArgs(TRUE) if (length(argv) != 2) { - cat("Usage: rsem-plot-model modelF outF\n") + cat("Usage: rsem-plot-model sample_name output_plot_file\n") q(status = 1) } -con <- file(argv[1], open = "r") +strvec <- strsplit(argv[1], split = "/")[[1]] +token <- strvec[length(strvec)] + +stat.dir <- paste(argv[1], ".stat", sep = "") +if (!file.exists(stat.dir)) { + cat("Error: directory does not exist: ", stat.dir, "\n", sep = "") + cat(strwrap("This version of rsem-plot-model only works with the output of RSEM versions >= 1.1.8"), sep="\n") + q(status = 1) +} +modelF <- paste(stat.dir, "/", token, ".model", sep = "") +cntF <- paste(stat.dir, "/", token, ".cnt", sep = "") + pdf(argv[2]) +con <- file(modelF, open = "r") + # model type and forward probability model_type <- as.numeric(readLines(con, n = 4)[1]) @@ -20,10 +33,15 @@ x <- (vec[1] + 1) : vec[2] y <- as.numeric(strsplit(strvec[2], split = " ")[[1]]) mean <- weighted.mean(x, y) std <- sqrt(weighted.mean((x - mean)^2, y)) -plot(x, y, type = "h", main = "Fragment Length Distribution", sub = paste("Mean = ", mean, ", Std = ", std), xlab = "Fragment Length", ylab = "Probability") +plot(x, y, type = "h", + main = "Fragment Length Distribution", + sub = paste("Mean = ", round(mean, 1), ", Std = ", round(std, 1)), + xlab = "Fragment Length", + ylab = "Probability") # mate length distribution -bval <- as.numeric(readLines(con, n = 1)[1]) +if (model_type == 0 || model_type == 1) bval <- as.numeric(readLines(con, n = 1)[1]) else bval <- 1 + if (bval == 1) { list <- strsplit(readLines(con, n = 2), split = " ") vec <- as.numeric(list[[1]]) @@ -32,7 +50,11 @@ if (bval == 1) { y <- as.numeric(list[[2]]) mean <- weighted.mean(x, y) std <- sqrt(weighted.mean((x - mean)^2, y)) - plot(x, y, type = "h", main = "Mate Length Distribution", sub = paste("Mean = ", mean, ", Std = ", std), xlab = "Mate Length", ylab = "Probability") + plot(x, y, type = "h", + main = "Read Length Distribution", + sub=paste("Mean = ", round(mean, 1), ", Std = ", round(std, 1)), + xlab = "Read Length", + ylab = "Probability") } strvec <- readLines(con, n = 1) @@ -57,32 +79,39 @@ if (model_type == 1 || model_type == 3) { # QProfile readLines(con, n = 1) + x <- c() peA <- c() # probability of sequencing error given reference base is A peC <- c() peG <- c() peT <- c() - + for (i in 1 : N) { strvec <- readLines(con, n = 6) list <- strsplit(strvec[1:4], split = " ") + vecA <- as.numeric(list[[1]]) vecC <- as.numeric(list[[2]]) vecG <- as.numeric(list[[3]]) vecT <- as.numeric(list[[4]]) - if (sum(c(vecA, vecC, vecG, vecT)) < 1e-8) break - peA <- c(peA, ifelse(sum(vec) < 1e-8, NA, -10 * log(1.0 - vecA[1]))) - peC <- c(peC, ifelse(sum(vec) < 1e-8, NA, -10 * log(1.0 - vecC[2]))) - peG <- c(peG, ifelse(sum(vec) < 1e-8, NA, -10 * log(1.0 - vecG[3]))) - peT <- c(peT, ifelse(sum(vec) < 1e-8, NA, -10 * log(1.0 - vecT[4]))) + + if (sum(c(vecA, vecC, vecG, vecT)) < 1e-8) next + x <- c(x, (i - 1)) + peA <- c(peA, ifelse(sum(vecA) < 1e-8, NA, -10 * log10(1.0 - vecA[1]))) + peC <- c(peC, ifelse(sum(vecC) < 1e-8, NA, -10 * log10(1.0 - vecC[2]))) + peG <- c(peG, ifelse(sum(vecG) < 1e-8, NA, -10 * log10(1.0 - vecG[3]))) + peT <- c(peT, ifelse(sum(vecT) < 1e-8, NA, -10 * log10(1.0 - vecT[4]))) } - x <- 0 : (length(peA) - 1) - matplot(x, cbind(peA, peC, peG, peT), type = "b", lty = 1:4, pch = 0:3, col = 1:4, main = "Quality Score vs. Observed Quality", xlab = "Quality Score", ylab = "Observed Quality") + matplot(x, cbind(peA, peC, peG, peT), type = "b", lty = 1:4, pch = 0:3, col = 1:4, + main = "Observed Quality vs. Phred Quality Score", + xlab = "Phred Quality Score", + ylab = "Observed Quality") legend("topleft", c("A", "C", "G", "T"), lty = 1:4, pch = 0:3, col = 1:4) } else { # Profile readLines(con, n = 1) - + + x <- c() peA <- c() # probability of sequencing error given reference base is A peC <- c() peG <- c() @@ -91,21 +120,30 @@ if (model_type == 1 || model_type == 3) { for (i in 1: maxL) { strvec <- readLines(con, n = 6) list <- strsplit(strvec[1:4], split = " ") + vecA <- as.numeric(list[[1]]) vecC <- as.numeric(list[[2]]) vecG <- as.numeric(list[[3]]) vecT <- as.numeric(list[[4]]) - if (sum(c(vecA, vecC, vecG, vecT)) < 1e-8) break - peA <- c(peA, ifelse(sum(vec) < 1e-8, NA, 1.0 - vecA[1])) - peC <- c(peC, ifelse(sum(vec) < 1e-8, NA, 1.0 - vecC[2])) - peG <- c(peG, ifelse(sum(vec) < 1e-8, NA, 1.0 - vecG[3])) - peT <- c(peT, ifelse(sum(vec) < 1e-8, NA, 1.0 - vecT[4])) + + if (sum(c(vecA, vecC, vecG, vecT)) < 1e-8) next + x <- c(x, i) + peA <- c(peA, ifelse(sum(vecA) < 1e-8, NA, (1.0 - vecA[1]) * 100)) + peC <- c(peC, ifelse(sum(vecC) < 1e-8, NA, (1.0 - vecC[2]) * 100)) + peG <- c(peG, ifelse(sum(vecG) < 1e-8, NA, (1.0 - vecG[3]) * 100)) + peT <- c(peT, ifelse(sum(vecT) < 1e-8, NA, (1.0 - vecT[4]) * 100)) } - x <- 1 : length(peA) matplot(x, cbind(peA, peC, peG, peT), type = "b", lty = 1:4, pch = 0:3, col = 1:4, main = "Position vs. Percentage Sequence Error", xlab = "Position", ylab = "Percentage of Sequencing Error") legend("topleft", c("A", "C", "G", "T"), lty = 1:4, pch = 0:3, col = 1:4) } -dev.off() close(con) + +pair <- read.table(file = cntF, skip = 3, sep = "\t") +barplot(pair[,2], names.arg = pair[,1], + xlab = "Alignments per fragment", + ylab = "Number of fragments", + main = "Histogram of Alignments per Fragment") + +dev.off.output <- dev.off()