X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=rsem-plot-model;fp=rsem-plot-model;h=8ff626a530db1808ac4067a91ac04297df423d76;hb=3ec78aa9af79921c44d62b65f88865a4b65880be;hp=f7b84fc0e3664d5441807281afc2ad276aef4b8b;hpb=58f823f5be6dfbe00896fc44ac3aac5e881e9c5c;p=rsem.git diff --git a/rsem-plot-model b/rsem-plot-model index f7b84fc..8ff626a 100755 --- a/rsem-plot-model +++ b/rsem-plot-model @@ -9,8 +9,8 @@ if (length(argv) != 2) { strvec <- strsplit(argv[1], split = "/")[[1]] token <- strvec[length(strvec)] -modelF <- paste(argv[1], ".stat/", token, ".model") -cntF <- paste(argv[1], ".stat/", token, ".cnt") +modelF <- paste(argv[1], ".stat/", token, ".model", sep = "") +cntF <- paste(argv[1], ".stat/", token, ".cnt", sep = "") pdf(argv[2]) @@ -65,32 +65,36 @@ if (model_type == 1 || model_type == 3) { # QProfile readLines(con, n = 1) + x <- c() peA <- c() # probability of sequencing error given reference base is A peC <- c() peG <- c() peT <- c() - + for (i in 1 : N) { strvec <- readLines(con, n = 6) list <- strsplit(strvec[1:4], split = " ") + vecA <- as.numeric(list[[1]]) vecC <- as.numeric(list[[2]]) vecG <- as.numeric(list[[3]]) vecT <- as.numeric(list[[4]]) - if (sum(c(vecA, vecC, vecG, vecT)) < 1e-8) break - peA <- c(peA, ifelse(sum(vec) < 1e-8, NA, -10 * log(1.0 - vecA[1]))) - peC <- c(peC, ifelse(sum(vec) < 1e-8, NA, -10 * log(1.0 - vecC[2]))) - peG <- c(peG, ifelse(sum(vec) < 1e-8, NA, -10 * log(1.0 - vecG[3]))) - peT <- c(peT, ifelse(sum(vec) < 1e-8, NA, -10 * log(1.0 - vecT[4]))) + + if (sum(c(vecA, vecC, vecG, vecT)) < 1e-8) next + x <- c(x, (i - 1)) + peA <- c(peA, ifelse(sum(vecA) < 1e-8, NA, -10 * log(1.0 - vecA[1]))) + peC <- c(peC, ifelse(sum(vecC) < 1e-8, NA, -10 * log(1.0 - vecC[2]))) + peG <- c(peG, ifelse(sum(vecG) < 1e-8, NA, -10 * log(1.0 - vecG[3]))) + peT <- c(peT, ifelse(sum(vecT) < 1e-8, NA, -10 * log(1.0 - vecT[4]))) } - x <- 0 : (length(peA) - 1) - matplot(x, cbind(peA, peC, peG, peT), type = "b", lty = 1:4, pch = 0:3, col = 1:4, main = "Quality Score vs. Observed Quality", xlab = "Quality Score", ylab = "Observed Quality") + matplot(x, cbind(peA, peC, peG, peT), type = "b", lty = 1:4, pch = 0:3, col = 1:4, main = "Phred Quality Score vs. Observed Quality", xlab = "Quality Score", ylab = "Observed Quality") legend("topleft", c("A", "C", "G", "T"), lty = 1:4, pch = 0:3, col = 1:4) } else { # Profile readLines(con, n = 1) - + + x <- c() peA <- c() # probability of sequencing error given reference base is A peC <- c() peG <- c() @@ -99,18 +103,20 @@ if (model_type == 1 || model_type == 3) { for (i in 1: maxL) { strvec <- readLines(con, n = 6) list <- strsplit(strvec[1:4], split = " ") + vecA <- as.numeric(list[[1]]) vecC <- as.numeric(list[[2]]) vecG <- as.numeric(list[[3]]) vecT <- as.numeric(list[[4]]) - if (sum(c(vecA, vecC, vecG, vecT)) < 1e-8) break - peA <- c(peA, ifelse(sum(vec) < 1e-8, NA, (1.0 - vecA[1]) * 100)) - peC <- c(peC, ifelse(sum(vec) < 1e-8, NA, (1.0 - vecC[2]) * 100)) - peG <- c(peG, ifelse(sum(vec) < 1e-8, NA, (1.0 - vecG[3]) * 100)) - peT <- c(peT, ifelse(sum(vec) < 1e-8, NA, (1.0 - vecT[4]) * 100)) + + if (sum(c(vecA, vecC, vecG, vecT)) < 1e-8) next + x <- c(x, i) + peA <- c(peA, ifelse(sum(vecA) < 1e-8, NA, (1.0 - vecA[1]) * 100)) + peC <- c(peC, ifelse(sum(vecC) < 1e-8, NA, (1.0 - vecC[2]) * 100)) + peG <- c(peG, ifelse(sum(vecG) < 1e-8, NA, (1.0 - vecG[3]) * 100)) + peT <- c(peT, ifelse(sum(vecT) < 1e-8, NA, (1.0 - vecT[4]) * 100)) } - x <- 1 : length(peA) matplot(x, cbind(peA, peC, peG, peT), type = "b", lty = 1:4, pch = 0:3, col = 1:4, main = "Position vs. Percentage Sequence Error", xlab = "Position", ylab = "Percentage of Sequencing Error") legend("topleft", c("A", "C", "G", "T"), lty = 1:4, pch = 0:3, col = 1:4) } @@ -118,6 +124,6 @@ if (model_type == 1 || model_type == 3) { close(con) pair <- read.table(file = cntF, skip = 3, sep = "\t") -plot(pair[,1], pair[,2], xlab = "Number of Alignments", ylab = "Number of Reads", main = "Among alignable reads, distribution of # of alignments") +barplot(pair[,2], names.arg = pair[,1], xlab = "Number of Alignments", ylab = "Number of Reads", main = "Histogram of Read Alignments") dev.off()