#### c) Visualize the model learned by RSEM
-RSEM provides an R script, plotModel.R, for visulazing the model learned.
+RSEM provides an R script, 'rsem-plot-model', for visulazing the model learned.
Usage:
- plotModel.R modelF outF
+ rsem-plot-model modelF outF
modelF: the sample_name.model file generated by RSEM
outF: the file name for plots generated from the model. It is a pdf file
may include fragment length distribution, mate length distribution,
read start position distribution (RSPD), quality score vs percentage
of sequecing error given the reference base, position vs percentage of
-sequencing errro given the reference base.
+sequencing error given the reference base.
## <a name="example"></a> Example
+++ /dev/null
-#!/usr/bin/env Rscript
-
-argv <- commandArgs(TRUE)
-if (length(argv) != 2) {
- cat("Usage: plotModel.R modelF outF\n")
- q(status = 1)
-}
-
-con <- file(argv[1], open = "r")
-pdf(argv[2])
-
-# model type and forward probability
-model_type <- as.numeric(readLines(con, n = 4)[1])
-
-# fragment length distribution
-strvec <- readLines(con, n = 3)
-vec <- as.numeric(strsplit(strvec[1], split = " ")[[1]])
-maxL <- vec[2] # maxL used for Profile
-x <- (vec[1] + 1) : vec[2]
-y <- as.numeric(strsplit(strvec[2], split = " ")[[1]])
-mean <- weighted.mean(x, y)
-std <- sqrt(weighted.mean((x - mean)^2, y))
-plot(x, y, type = "h", main = "Fragment Length Distribution", sub = paste("Mean = ", mean, ", Std = ", std), xlab = "Fragment Length", ylab = "Probability")
-
-# mate length distribution
-bval <- as.numeric(readLines(con, n = 1)[1])
-if (bval == 1) {
- list <- strsplit(readLines(con, n = 2), split = " ")
- vec <- as.numeric(list[[1]])
- maxL <- vec[2]
- x <- (vec[1] + 1) : vec[2]
- y <- as.numeric(list[[2]])
- mean <- weighted.mean(x, y)
- std <- sqrt(weighted.mean((x - mean)^2, y))
- plot(x, y, type = "h", main = "Mate Length Distribution", sub = paste("Mean = ", mean, ", Std = ", std), xlab = "Mate Length", ylab = "Probability")
-}
-strvec <- readLines(con, n = 1)
-
-# RSPD
-bval <- as.numeric(readLines(con, n = 1)[1])
-if (bval == 1) {
- bin_size <- as.numeric(readLines(con, n = 1)[1])
- y <- as.numeric(strsplit(readLines(con, n = 1), split = " ")[[1]])
- barplot(y, space = 0, main = "Read Start Position Distribution", xlab = "Bin #", ylab = "Probability")
-}
-strvec <- readLines(con, n = 1)
-
-# plot sequencing errors
-if (model_type == 1 || model_type == 3) {
- # skip QD
- N <- as.numeric(readLines(con, n = 1)[1])
- readLines(con, n = N + 1)
- readLines(con, n = 1) # for the blank line
-
- # QProfile
- readLines(con, n = 1)
-
- peA <- c() # probability of sequencing error given reference base is A
- peC <- c()
- peG <- c()
- peT <- c()
-
- for (i in 1 : N) {
- strvec <- readLines(con, n = 6)
- list <- strsplit(strvec[1:4], split = " ")
- vecA <- as.numeric(list[[1]])
- vecC <- as.numeric(list[[2]])
- vecG <- as.numeric(list[[3]])
- vecT <- as.numeric(list[[4]])
- if (sum(c(vecA, vecC, vecG, vecT)) < 1e-8) break
- peA <- c(peA, ifelse(sum(vec) < 1e-8, NA, 1.0 - vecA[1]))
- peC <- c(peC, ifelse(sum(vec) < 1e-8, NA, 1.0 - vecC[2]))
- peG <- c(peG, ifelse(sum(vec) < 1e-8, NA, 1.0 - vecG[3]))
- peT <- c(peT, ifelse(sum(vec) < 1e-8, NA, 1.0 - vecT[4]))
- }
-
- x <- 0 : (length(peA) - 1)
- matplot(x, cbind(peA, peC, peG, peT), type = "b", lty = 1:4, pch = 0:3, main = "Quality Score v.s. Percentage Sequence Error", xlab = "Quality Score", ylab = "Percentage of Sequencing Error")
- legend("topright", c("A", "C", "G", "T"), lty = 1:4, pch = 0:3)
-} else {
- # Profile
- readLines(con, n = 1)
-
- peA <- c() # probability of sequencing error given reference base is A
- peC <- c()
- peG <- c()
- peT <- c()
-
- for (i in 1: maxL) {
- strvec <- readLines(con, n = 6)
- list <- strsplit(strvec[1:4], split = " ")
- vecA <- as.numeric(list[[1]])
- vecC <- as.numeric(list[[2]])
- vecG <- as.numeric(list[[3]])
- vecT <- as.numeric(list[[4]])
- if (sum(c(vecA, vecC, vecG, vecT)) < 1e-8) break
- peA <- c(peA, ifelse(sum(vec) < 1e-8, NA, 1.0 - vecA[1]))
- peC <- c(peC, ifelse(sum(vec) < 1e-8, NA, 1.0 - vecC[2]))
- peG <- c(peG, ifelse(sum(vec) < 1e-8, NA, 1.0 - vecG[3]))
- peT <- c(peT, ifelse(sum(vec) < 1e-8, NA, 1.0 - vecT[4]))
- }
-
- x <- 1 : length(peA)
- matplot(x, cbind(peA, peC, peG, peT), type = "b", lty = 1:4, pch = 0:3, main = "Position v.s. Percentage Sequence Error", xlab = "Position", ylab = "Percentage of Sequencing Error")
- legend("topleft", c("A", "C", "G", "T"), lty = 1:4, pch = 0:3)
-}
-
-dev.off()
-close(con)
--- /dev/null
+#!/usr/bin/env Rscript
+
+argv <- commandArgs(TRUE)
+if (length(argv) != 2) {
+ cat("Usage: rsem-plot-model modelF outF\n")
+ q(status = 1)
+}
+
+con <- file(argv[1], open = "r")
+pdf(argv[2])
+
+# model type and forward probability
+model_type <- as.numeric(readLines(con, n = 4)[1])
+
+# fragment length distribution
+strvec <- readLines(con, n = 3)
+vec <- as.numeric(strsplit(strvec[1], split = " ")[[1]])
+maxL <- vec[2] # maxL used for Profile
+x <- (vec[1] + 1) : vec[2]
+y <- as.numeric(strsplit(strvec[2], split = " ")[[1]])
+mean <- weighted.mean(x, y)
+std <- sqrt(weighted.mean((x - mean)^2, y))
+plot(x, y, type = "h", main = "Fragment Length Distribution", sub = paste("Mean = ", mean, ", Std = ", std), xlab = "Fragment Length", ylab = "Probability")
+
+# mate length distribution
+bval <- as.numeric(readLines(con, n = 1)[1])
+if (bval == 1) {
+ list <- strsplit(readLines(con, n = 2), split = " ")
+ vec <- as.numeric(list[[1]])
+ maxL <- vec[2]
+ x <- (vec[1] + 1) : vec[2]
+ y <- as.numeric(list[[2]])
+ mean <- weighted.mean(x, y)
+ std <- sqrt(weighted.mean((x - mean)^2, y))
+ plot(x, y, type = "h", main = "Mate Length Distribution", sub = paste("Mean = ", mean, ", Std = ", std), xlab = "Mate Length", ylab = "Probability")
+}
+strvec <- readLines(con, n = 1)
+
+# RSPD
+bval <- as.numeric(readLines(con, n = 1)[1])
+if (bval == 1) {
+ bin_size <- as.numeric(readLines(con, n = 1)[1])
+ y <- as.numeric(strsplit(readLines(con, n = 1), split = " ")[[1]])
+ barplot(y, space = 0, main = "Read Start Position Distribution", xlab = "Bin #", ylab = "Probability")
+}
+strvec <- readLines(con, n = 1)
+
+# plot sequencing errors
+if (model_type == 1 || model_type == 3) {
+ # skip QD
+ N <- as.numeric(readLines(con, n = 1)[1])
+ readLines(con, n = N + 1)
+ readLines(con, n = 1) # for the blank line
+
+ # QProfile
+ readLines(con, n = 1)
+
+ peA <- c() # probability of sequencing error given reference base is A
+ peC <- c()
+ peG <- c()
+ peT <- c()
+
+ for (i in 1 : N) {
+ strvec <- readLines(con, n = 6)
+ list <- strsplit(strvec[1:4], split = " ")
+ vecA <- as.numeric(list[[1]])
+ vecC <- as.numeric(list[[2]])
+ vecG <- as.numeric(list[[3]])
+ vecT <- as.numeric(list[[4]])
+ if (sum(c(vecA, vecC, vecG, vecT)) < 1e-8) break
+ peA <- c(peA, ifelse(sum(vec) < 1e-8, NA, 1.0 - vecA[1]))
+ peC <- c(peC, ifelse(sum(vec) < 1e-8, NA, 1.0 - vecC[2]))
+ peG <- c(peG, ifelse(sum(vec) < 1e-8, NA, 1.0 - vecG[3]))
+ peT <- c(peT, ifelse(sum(vec) < 1e-8, NA, 1.0 - vecT[4]))
+ }
+
+ x <- 0 : (length(peA) - 1)
+ matplot(x, cbind(peA, peC, peG, peT), type = "b", lty = 1:4, pch = 0:3, main = "Quality Score v.s. Percentage Sequence Error", xlab = "Quality Score", ylab = "Percentage of Sequencing Error")
+ legend("topright", c("A", "C", "G", "T"), lty = 1:4, pch = 0:3)
+} else {
+ # Profile
+ readLines(con, n = 1)
+
+ peA <- c() # probability of sequencing error given reference base is A
+ peC <- c()
+ peG <- c()
+ peT <- c()
+
+ for (i in 1: maxL) {
+ strvec <- readLines(con, n = 6)
+ list <- strsplit(strvec[1:4], split = " ")
+ vecA <- as.numeric(list[[1]])
+ vecC <- as.numeric(list[[2]])
+ vecG <- as.numeric(list[[3]])
+ vecT <- as.numeric(list[[4]])
+ if (sum(c(vecA, vecC, vecG, vecT)) < 1e-8) break
+ peA <- c(peA, ifelse(sum(vec) < 1e-8, NA, 1.0 - vecA[1]))
+ peC <- c(peC, ifelse(sum(vec) < 1e-8, NA, 1.0 - vecC[2]))
+ peG <- c(peG, ifelse(sum(vec) < 1e-8, NA, 1.0 - vecG[3]))
+ peT <- c(peT, ifelse(sum(vec) < 1e-8, NA, 1.0 - vecT[4]))
+ }
+
+ x <- 1 : length(peA)
+ matplot(x, cbind(peA, peC, peG, peT), type = "b", lty = 1:4, pch = 0:3, main = "Position v.s. Percentage Sequence Error", xlab = "Position", ylab = "Percentage of Sequencing Error")
+ legend("topleft", c("A", "C", "G", "T"), lty = 1:4, pch = 0:3)
+}
+
+dev.off()
+close(con)