3 argv <- commandArgs(TRUE)
4 if (length(argv) != 2) {
5 cat("Usage: rsem-plot-model sample_name outF\n")
9 strvec <- strsplit(argv[1], split = "/")[[1]]
10 token <- strvec[length(strvec)]
12 modelF <- paste(argv[1], ".stat/", token, ".model", sep = "")
13 cntF <- paste(argv[1], ".stat/", token, ".cnt", sep = "")
17 con <- file(modelF, open = "r")
19 # model type and forward probability
20 model_type <- as.numeric(readLines(con, n = 4)[1])
22 # fragment length distribution
23 strvec <- readLines(con, n = 3)
24 vec <- as.numeric(strsplit(strvec[1], split = " ")[[1]])
25 maxL <- vec[2] # maxL used for Profile
26 x <- (vec[1] + 1) : vec[2]
27 y <- as.numeric(strsplit(strvec[2], split = " ")[[1]])
28 mean <- weighted.mean(x, y)
29 std <- sqrt(weighted.mean((x - mean)^2, y))
30 plot(x, y, type = "h", main = "Fragment Length Distribution", sub = paste("Mean = ", mean, ", Std = ", std), xlab = "Fragment Length", ylab = "Probability")
32 # mate length distribution
33 if (model_type == 0 || model_type == 1) bval <- as.numeric(readLines(con, n = 1)[1]) else bval <- 1
36 list <- strsplit(readLines(con, n = 2), split = " ")
37 vec <- as.numeric(list[[1]])
39 x <- (vec[1] + 1) : vec[2]
40 y <- as.numeric(list[[2]])
41 mean <- weighted.mean(x, y)
42 std <- sqrt(weighted.mean((x - mean)^2, y))
43 plot(x, y, type = "h", main = "Mate Length Distribution", sub = paste("Mean = ", mean, ", Std = ", std), xlab = "Mate Length", ylab = "Probability")
45 strvec <- readLines(con, n = 1)
48 bval <- as.numeric(readLines(con, n = 1)[1])
50 bin_size <- as.numeric(readLines(con, n = 1)[1])
51 y <- as.numeric(strsplit(readLines(con, n = 1), split = " ")[[1]])
53 barplot(y, space = 0, names.arg = 1:bin_size, main = "Read Start Position Distribution", xlab = "Bin #", ylab = "Probability")
56 strvec <- readLines(con, n = 1)
58 # plot sequencing errors
59 if (model_type == 1 || model_type == 3) {
61 N <- as.numeric(readLines(con, n = 1)[1])
62 readLines(con, n = N + 1)
63 readLines(con, n = 1) # for the blank line
69 peA <- c() # probability of sequencing error given reference base is A
75 strvec <- readLines(con, n = 6)
76 list <- strsplit(strvec[1:4], split = " ")
78 vecA <- as.numeric(list[[1]])
79 vecC <- as.numeric(list[[2]])
80 vecG <- as.numeric(list[[3]])
81 vecT <- as.numeric(list[[4]])
83 if (sum(c(vecA, vecC, vecG, vecT)) < 1e-8) next
85 peA <- c(peA, ifelse(sum(vecA) < 1e-8, NA, -10 * log(1.0 - vecA[1])))
86 peC <- c(peC, ifelse(sum(vecC) < 1e-8, NA, -10 * log(1.0 - vecC[2])))
87 peG <- c(peG, ifelse(sum(vecG) < 1e-8, NA, -10 * log(1.0 - vecG[3])))
88 peT <- c(peT, ifelse(sum(vecT) < 1e-8, NA, -10 * log(1.0 - vecT[4])))
91 matplot(x, cbind(peA, peC, peG, peT), type = "b", lty = 1:4, pch = 0:3, col = 1:4, main = "Phred Quality Score vs. Observed Quality", xlab = "Quality Score", ylab = "Observed Quality")
92 legend("topleft", c("A", "C", "G", "T"), lty = 1:4, pch = 0:3, col = 1:4)
98 peA <- c() # probability of sequencing error given reference base is A
104 strvec <- readLines(con, n = 6)
105 list <- strsplit(strvec[1:4], split = " ")
107 vecA <- as.numeric(list[[1]])
108 vecC <- as.numeric(list[[2]])
109 vecG <- as.numeric(list[[3]])
110 vecT <- as.numeric(list[[4]])
112 if (sum(c(vecA, vecC, vecG, vecT)) < 1e-8) next
114 peA <- c(peA, ifelse(sum(vecA) < 1e-8, NA, (1.0 - vecA[1]) * 100))
115 peC <- c(peC, ifelse(sum(vecC) < 1e-8, NA, (1.0 - vecC[2]) * 100))
116 peG <- c(peG, ifelse(sum(vecG) < 1e-8, NA, (1.0 - vecG[3]) * 100))
117 peT <- c(peT, ifelse(sum(vecT) < 1e-8, NA, (1.0 - vecT[4]) * 100))
120 matplot(x, cbind(peA, peC, peG, peT), type = "b", lty = 1:4, pch = 0:3, col = 1:4, main = "Position vs. Percentage Sequence Error", xlab = "Position", ylab = "Percentage of Sequencing Error")
121 legend("topleft", c("A", "C", "G", "T"), lty = 1:4, pch = 0:3, col = 1:4)
126 pair <- read.table(file = cntF, skip = 3, sep = "\t")
127 barplot(pair[,2], names.arg = pair[,1], xlab = "Number of Alignments", ylab = "Number of Reads", main = "Histogram of Reads with Different Number of Alignments")