X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=rsem-plot-model;fp=rsem-plot-model;h=aa2c724aec31b3b143d7d10a64d8d343a53893bb;hb=029284bf74305b5540970f858d7194d05aac501b;hp=0000000000000000000000000000000000000000;hpb=a35ba4aedd7ef00cf33fc50a1e4c9454635d0f4a;p=rsem.git diff --git a/rsem-plot-model b/rsem-plot-model new file mode 100755 index 0000000..aa2c724 --- /dev/null +++ b/rsem-plot-model @@ -0,0 +1,109 @@ +#!/usr/bin/env Rscript + +argv <- commandArgs(TRUE) +if (length(argv) != 2) { + cat("Usage: rsem-plot-model modelF outF\n") + q(status = 1) +} + +con <- file(argv[1], open = "r") +pdf(argv[2]) + +# model type and forward probability +model_type <- as.numeric(readLines(con, n = 4)[1]) + +# fragment length distribution +strvec <- readLines(con, n = 3) +vec <- as.numeric(strsplit(strvec[1], split = " ")[[1]]) +maxL <- vec[2] # maxL used for Profile +x <- (vec[1] + 1) : vec[2] +y <- as.numeric(strsplit(strvec[2], split = " ")[[1]]) +mean <- weighted.mean(x, y) +std <- sqrt(weighted.mean((x - mean)^2, y)) +plot(x, y, type = "h", main = "Fragment Length Distribution", sub = paste("Mean = ", mean, ", Std = ", std), xlab = "Fragment Length", ylab = "Probability") + +# mate length distribution +bval <- as.numeric(readLines(con, n = 1)[1]) +if (bval == 1) { + list <- strsplit(readLines(con, n = 2), split = " ") + vec <- as.numeric(list[[1]]) + maxL <- vec[2] + x <- (vec[1] + 1) : vec[2] + y <- as.numeric(list[[2]]) + mean <- weighted.mean(x, y) + std <- sqrt(weighted.mean((x - mean)^2, y)) + plot(x, y, type = "h", main = "Mate Length Distribution", sub = paste("Mean = ", mean, ", Std = ", std), xlab = "Mate Length", ylab = "Probability") +} +strvec <- readLines(con, n = 1) + +# RSPD +bval <- as.numeric(readLines(con, n = 1)[1]) +if (bval == 1) { + bin_size <- as.numeric(readLines(con, n = 1)[1]) + y <- as.numeric(strsplit(readLines(con, n = 1), split = " ")[[1]]) + barplot(y, space = 0, main = "Read Start Position Distribution", xlab = "Bin #", ylab = "Probability") +} +strvec <- readLines(con, n = 1) + +# plot sequencing errors +if (model_type == 1 || model_type == 3) { + # skip QD + N <- as.numeric(readLines(con, n = 1)[1]) + readLines(con, n = N + 1) + readLines(con, n = 1) # for the blank line + + # QProfile + readLines(con, n = 1) + + peA <- c() # probability of sequencing error given reference base is A + peC <- c() + peG <- c() + peT <- c() + + for (i in 1 : N) { + strvec <- readLines(con, n = 6) + list <- strsplit(strvec[1:4], split = " ") + vecA <- as.numeric(list[[1]]) + vecC <- as.numeric(list[[2]]) + vecG <- as.numeric(list[[3]]) + vecT <- as.numeric(list[[4]]) + if (sum(c(vecA, vecC, vecG, vecT)) < 1e-8) break + peA <- c(peA, ifelse(sum(vec) < 1e-8, NA, 1.0 - vecA[1])) + peC <- c(peC, ifelse(sum(vec) < 1e-8, NA, 1.0 - vecC[2])) + peG <- c(peG, ifelse(sum(vec) < 1e-8, NA, 1.0 - vecG[3])) + peT <- c(peT, ifelse(sum(vec) < 1e-8, NA, 1.0 - vecT[4])) + } + + x <- 0 : (length(peA) - 1) + matplot(x, cbind(peA, peC, peG, peT), type = "b", lty = 1:4, pch = 0:3, main = "Quality Score v.s. Percentage Sequence Error", xlab = "Quality Score", ylab = "Percentage of Sequencing Error") + legend("topright", c("A", "C", "G", "T"), lty = 1:4, pch = 0:3) +} else { + # Profile + readLines(con, n = 1) + + peA <- c() # probability of sequencing error given reference base is A + peC <- c() + peG <- c() + peT <- c() + + for (i in 1: maxL) { + strvec <- readLines(con, n = 6) + list <- strsplit(strvec[1:4], split = " ") + vecA <- as.numeric(list[[1]]) + vecC <- as.numeric(list[[2]]) + vecG <- as.numeric(list[[3]]) + vecT <- as.numeric(list[[4]]) + if (sum(c(vecA, vecC, vecG, vecT)) < 1e-8) break + peA <- c(peA, ifelse(sum(vec) < 1e-8, NA, 1.0 - vecA[1])) + peC <- c(peC, ifelse(sum(vec) < 1e-8, NA, 1.0 - vecC[2])) + peG <- c(peG, ifelse(sum(vec) < 1e-8, NA, 1.0 - vecG[3])) + peT <- c(peT, ifelse(sum(vec) < 1e-8, NA, 1.0 - vecT[4])) + } + + x <- 1 : length(peA) + matplot(x, cbind(peA, peC, peG, peT), type = "b", lty = 1:4, pch = 0:3, main = "Position v.s. Percentage Sequence Error", xlab = "Position", ylab = "Percentage of Sequencing Error") + legend("topleft", c("A", "C", "G", "T"), lty = 1:4, pch = 0:3) +} + +dev.off() +close(con)