3 argv <- commandArgs(TRUE)
4 if (length(argv) != 2) {
5 cat("Usage: rsem-plot-model sample_name output_plot_file\n")
9 strvec <- strsplit(argv[1], split = "/")[[1]]
10 token <- strvec[length(strvec)]
12 stat.dir <- paste(argv[1], ".stat", sep = "")
13 if (!file.exists(stat.dir)) {
14 cat("Error: directory does not exist: ", stat.dir, "\n", sep = "")
15 cat(strwrap("This version of rsem-plot-model only works with the output of RSEM versions >= 1.1.8"), sep="\n")
18 modelF <- paste(stat.dir, "/", token, ".model", sep = "")
19 cntF <- paste(stat.dir, "/", token, ".cnt", sep = "")
23 con <- file(modelF, open = "r")
25 # model type and forward probability
26 model_type <- as.numeric(readLines(con, n = 4)[1])
28 # fragment length distribution
29 strvec <- readLines(con, n = 3)
30 vec <- as.numeric(strsplit(strvec[1], split = " ")[[1]])
31 maxL <- vec[2] # maxL used for Profile
32 x <- (vec[1] + 1) : vec[2]
33 y <- as.numeric(strsplit(strvec[2], split = " ")[[1]])
34 mean <- weighted.mean(x, y)
35 std <- sqrt(weighted.mean((x - mean)^2, y))
36 plot(x, y, type = "h",
37 main = "Fragment Length Distribution",
38 sub = paste("Mean = ", round(mean, 1), ", Std = ", round(std, 1)),
39 xlab = "Fragment Length",
42 # mate length distribution
43 if (model_type == 0 || model_type == 1) bval <- as.numeric(readLines(con, n = 1)[1]) else bval <- 1
46 list <- strsplit(readLines(con, n = 2), split = " ")
47 vec <- as.numeric(list[[1]])
49 x <- (vec[1] + 1) : vec[2]
50 y <- as.numeric(list[[2]])
51 mean <- weighted.mean(x, y)
52 std <- sqrt(weighted.mean((x - mean)^2, y))
53 plot(x, y, type = "h",
54 main = "Read Length Distribution",
55 sub=paste("Mean = ", round(mean, 1), ", Std = ", round(std, 1)),
59 strvec <- readLines(con, n = 1)
62 bval <- as.numeric(readLines(con, n = 1)[1])
64 bin_size <- as.numeric(readLines(con, n = 1)[1])
65 y <- as.numeric(strsplit(readLines(con, n = 1), split = " ")[[1]])
67 barplot(y, space = 0, names.arg = 1:bin_size, main = "Read Start Position Distribution", xlab = "Bin #", ylab = "Probability")
70 strvec <- readLines(con, n = 1)
72 # plot sequencing errors
73 if (model_type == 1 || model_type == 3) {
75 N <- as.numeric(readLines(con, n = 1)[1])
76 readLines(con, n = N + 1)
77 readLines(con, n = 1) # for the blank line
83 peA <- c() # probability of sequencing error given reference base is A
89 strvec <- readLines(con, n = 6)
90 list <- strsplit(strvec[1:4], split = " ")
92 vecA <- as.numeric(list[[1]])
93 vecC <- as.numeric(list[[2]])
94 vecG <- as.numeric(list[[3]])
95 vecT <- as.numeric(list[[4]])
97 if (sum(c(vecA, vecC, vecG, vecT)) < 1e-8) next
99 peA <- c(peA, ifelse(sum(vecA) < 1e-8, NA, -10 * log10(1.0 - vecA[1])))
100 peC <- c(peC, ifelse(sum(vecC) < 1e-8, NA, -10 * log10(1.0 - vecC[2])))
101 peG <- c(peG, ifelse(sum(vecG) < 1e-8, NA, -10 * log10(1.0 - vecG[3])))
102 peT <- c(peT, ifelse(sum(vecT) < 1e-8, NA, -10 * log10(1.0 - vecT[4])))
105 matplot(x, cbind(peA, peC, peG, peT), type = "b", lty = 1:4, pch = 0:3, col = 1:4,
106 main = "Observed Quality vs. Phred Quality Score",
107 xlab = "Phred Quality Score",
108 ylab = "Observed Quality")
109 legend("topleft", c("A", "C", "G", "T"), lty = 1:4, pch = 0:3, col = 1:4)
112 readLines(con, n = 1)
115 peA <- c() # probability of sequencing error given reference base is A
121 strvec <- readLines(con, n = 6)
122 list <- strsplit(strvec[1:4], split = " ")
124 vecA <- as.numeric(list[[1]])
125 vecC <- as.numeric(list[[2]])
126 vecG <- as.numeric(list[[3]])
127 vecT <- as.numeric(list[[4]])
129 if (sum(c(vecA, vecC, vecG, vecT)) < 1e-8) next
131 peA <- c(peA, ifelse(sum(vecA) < 1e-8, NA, (1.0 - vecA[1]) * 100))
132 peC <- c(peC, ifelse(sum(vecC) < 1e-8, NA, (1.0 - vecC[2]) * 100))
133 peG <- c(peG, ifelse(sum(vecG) < 1e-8, NA, (1.0 - vecG[3]) * 100))
134 peT <- c(peT, ifelse(sum(vecT) < 1e-8, NA, (1.0 - vecT[4]) * 100))
137 matplot(x, cbind(peA, peC, peG, peT), type = "b", lty = 1:4, pch = 0:3, col = 1:4, main = "Position vs. Percentage Sequence Error", xlab = "Position", ylab = "Percentage of Sequencing Error")
138 legend("topleft", c("A", "C", "G", "T"), lty = 1:4, pch = 0:3, col = 1:4)
143 pair <- read.table(file = cntF, skip = 3, sep = "\t")
144 barplot(pair[,2], names.arg = pair[,1],
145 xlab = "Alignments per fragment",
146 ylab = "Number of fragments",
147 main = "Histogram of Alignments per Fragment")