#!/usr/bin/env Rscript argv <- commandArgs(TRUE) if (length(argv) != 2) { cat("Usage: rsem-plot-model modelF outF\n") q(status = 1) } con <- file(argv[1], open = "r") pdf(argv[2]) # model type and forward probability model_type <- as.numeric(readLines(con, n = 4)[1]) # fragment length distribution strvec <- readLines(con, n = 3) vec <- as.numeric(strsplit(strvec[1], split = " ")[[1]]) maxL <- vec[2] # maxL used for Profile x <- (vec[1] + 1) : vec[2] y <- as.numeric(strsplit(strvec[2], split = " ")[[1]]) mean <- weighted.mean(x, y) std <- sqrt(weighted.mean((x - mean)^2, y)) plot(x, y, type = "h", main = "Fragment Length Distribution", sub = paste("Mean = ", mean, ", Std = ", std), xlab = "Fragment Length", ylab = "Probability") # mate length distribution bval <- as.numeric(readLines(con, n = 1)[1]) if (bval == 1) { list <- strsplit(readLines(con, n = 2), split = " ") vec <- as.numeric(list[[1]]) maxL <- vec[2] x <- (vec[1] + 1) : vec[2] y <- as.numeric(list[[2]]) mean <- weighted.mean(x, y) std <- sqrt(weighted.mean((x - mean)^2, y)) plot(x, y, type = "h", main = "Mate Length Distribution", sub = paste("Mean = ", mean, ", Std = ", std), xlab = "Mate Length", ylab = "Probability") } strvec <- readLines(con, n = 1) # RSPD bval <- as.numeric(readLines(con, n = 1)[1]) if (bval == 1) { bin_size <- as.numeric(readLines(con, n = 1)[1]) y <- as.numeric(strsplit(readLines(con, n = 1), split = " ")[[1]]) barplot(y, space = 0, main = "Read Start Position Distribution", xlab = "Bin #", ylab = "Probability") } strvec <- readLines(con, n = 1) # plot sequencing errors if (model_type == 1 || model_type == 3) { # skip QD N <- as.numeric(readLines(con, n = 1)[1]) readLines(con, n = N + 1) readLines(con, n = 1) # for the blank line # QProfile readLines(con, n = 1) peA <- c() # probability of sequencing error given reference base is A peC <- c() peG <- c() peT <- c() for (i in 1 : N) { strvec <- readLines(con, n = 6) list <- strsplit(strvec[1:4], split = " ") vecA <- as.numeric(list[[1]]) vecC <- as.numeric(list[[2]]) vecG <- as.numeric(list[[3]]) vecT <- as.numeric(list[[4]]) if (sum(c(vecA, vecC, vecG, vecT)) < 1e-8) break peA <- c(peA, ifelse(sum(vec) < 1e-8, NA, 1.0 - vecA[1])) peC <- c(peC, ifelse(sum(vec) < 1e-8, NA, 1.0 - vecC[2])) peG <- c(peG, ifelse(sum(vec) < 1e-8, NA, 1.0 - vecG[3])) peT <- c(peT, ifelse(sum(vec) < 1e-8, NA, 1.0 - vecT[4])) } x <- 0 : (length(peA) - 1) matplot(x, cbind(peA, peC, peG, peT), type = "b", lty = 1:4, pch = 0:3, main = "Quality Score v.s. Percentage Sequence Error", xlab = "Quality Score", ylab = "Percentage of Sequencing Error") legend("topright", c("A", "C", "G", "T"), lty = 1:4, pch = 0:3) } else { # Profile readLines(con, n = 1) peA <- c() # probability of sequencing error given reference base is A peC <- c() peG <- c() peT <- c() for (i in 1: maxL) { strvec <- readLines(con, n = 6) list <- strsplit(strvec[1:4], split = " ") vecA <- as.numeric(list[[1]]) vecC <- as.numeric(list[[2]]) vecG <- as.numeric(list[[3]]) vecT <- as.numeric(list[[4]]) if (sum(c(vecA, vecC, vecG, vecT)) < 1e-8) break peA <- c(peA, ifelse(sum(vec) < 1e-8, NA, 1.0 - vecA[1])) peC <- c(peC, ifelse(sum(vec) < 1e-8, NA, 1.0 - vecC[2])) peG <- c(peG, ifelse(sum(vec) < 1e-8, NA, 1.0 - vecG[3])) peT <- c(peT, ifelse(sum(vec) < 1e-8, NA, 1.0 - vecT[4])) } x <- 1 : length(peA) matplot(x, cbind(peA, peC, peG, peT), type = "b", lty = 1:4, pch = 0:3, main = "Position v.s. Percentage Sequence Error", xlab = "Position", ylab = "Percentage of Sequencing Error") legend("topleft", c("A", "C", "G", "T"), lty = 1:4, pch = 0:3) } dev.off() close(con)