RSEM v1.1.8

[rsem.git] / rsem-plot-model
diff --git a/rsem-plot-model b/rsem-plot-model

index f7b84fc0e3664d5441807281afc2ad276aef4b8b..8ff626a530db1808ac4067a91ac04297df423d76 100755 (executable)
--- a/rsem-plot-model
+++ b/rsem-plot-model
@@ -9,8 +9,8 @@ if (length(argv) != 2) {
  strvec <- strsplit(argv[1], split = "/")[[1]]
  token <- strvec[length(strvec)]
  
-modelF <- paste(argv[1], ".stat/", token, ".model")
-cntF <- paste(argv[1], ".stat/", token, ".cnt")
+modelF <- paste(argv[1], ".stat/", token, ".model", sep = "")
+cntF <- paste(argv[1], ".stat/", token, ".cnt", sep = "")
  
  pdf(argv[2])
  
@@ -65,32 +65,36 @@ if (model_type == 1 || model_type == 3) {
    # QProfile
    readLines(con, n = 1)
  
+  x <- c()
    peA <- c() # probability of sequencing error given reference base is A
    peC <- c()
    peG <- c()
    peT <- c()
-
+  
    for (i in 1 : N) {
      strvec <- readLines(con, n = 6)
      list <- strsplit(strvec[1:4], split = " ")
+
      vecA <- as.numeric(list[[1]])
      vecC <- as.numeric(list[[2]])
      vecG <- as.numeric(list[[3]])
      vecT <- as.numeric(list[[4]])
-    if (sum(c(vecA, vecC, vecG, vecT)) < 1e-8) break
-    peA <- c(peA, ifelse(sum(vec) < 1e-8, NA, -10 * log(1.0 - vecA[1])))
-    peC <- c(peC, ifelse(sum(vec) < 1e-8, NA, -10 * log(1.0 - vecC[2])))
-    peG <- c(peG, ifelse(sum(vec) < 1e-8, NA, -10 * log(1.0 - vecG[3])))
-    peT <- c(peT, ifelse(sum(vec) < 1e-8, NA, -10 * log(1.0 - vecT[4])))
+
+    if (sum(c(vecA, vecC, vecG, vecT)) < 1e-8) next
+    x <- c(x, (i - 1))
+    peA <- c(peA, ifelse(sum(vecA) < 1e-8, NA, -10 * log(1.0 - vecA[1])))
+    peC <- c(peC, ifelse(sum(vecC) < 1e-8, NA, -10 * log(1.0 - vecC[2])))
+    peG <- c(peG, ifelse(sum(vecG) < 1e-8, NA, -10 * log(1.0 - vecG[3])))
+    peT <- c(peT, ifelse(sum(vecT) < 1e-8, NA, -10 * log(1.0 - vecT[4])))
    }
  
-  x <- 0 : (length(peA) - 1)
-  matplot(x, cbind(peA, peC, peG, peT), type = "b", lty = 1:4, pch = 0:3, col = 1:4, main = "Quality Score vs. Observed Quality", xlab = "Quality Score", ylab = "Observed Quality")
+  matplot(x, cbind(peA, peC, peG, peT), type = "b", lty = 1:4, pch = 0:3, col = 1:4, main = "Phred Quality Score vs. Observed Quality", xlab = "Quality Score", ylab = "Observed Quality")
    legend("topleft", c("A", "C", "G", "T"), lty = 1:4, pch = 0:3, col = 1:4)
  } else {
    # Profile
    readLines(con, n = 1)
-  
+
+  x <- c()  
    peA <- c() # probability of sequencing error given reference base is A
    peC <- c()
    peG <- c()
@@ -99,18 +103,20 @@ if (model_type == 1 || model_type == 3) {
    for (i in 1: maxL) {
      strvec <- readLines(con, n = 6)
      list <- strsplit(strvec[1:4], split = " ")
+
      vecA <- as.numeric(list[[1]])
      vecC <- as.numeric(list[[2]])
      vecG <- as.numeric(list[[3]])
      vecT <- as.numeric(list[[4]])
-    if (sum(c(vecA, vecC, vecG, vecT)) < 1e-8) break
-    peA <- c(peA, ifelse(sum(vec) < 1e-8, NA, (1.0 - vecA[1]) * 100))
-    peC <- c(peC, ifelse(sum(vec) < 1e-8, NA, (1.0 - vecC[2]) * 100))
-    peG <- c(peG, ifelse(sum(vec) < 1e-8, NA, (1.0 - vecG[3]) * 100))
-    peT <- c(peT, ifelse(sum(vec) < 1e-8, NA, (1.0 - vecT[4]) * 100))
+
+    if (sum(c(vecA, vecC, vecG, vecT)) < 1e-8) next
+    x <- c(x, i)
+    peA <- c(peA, ifelse(sum(vecA) < 1e-8, NA, (1.0 - vecA[1]) * 100))
+    peC <- c(peC, ifelse(sum(vecC) < 1e-8, NA, (1.0 - vecC[2]) * 100))
+    peG <- c(peG, ifelse(sum(vecG) < 1e-8, NA, (1.0 - vecG[3]) * 100))
+    peT <- c(peT, ifelse(sum(vecT) < 1e-8, NA, (1.0 - vecT[4]) * 100))
    }
  
-  x <- 1 : length(peA)
    matplot(x, cbind(peA, peC, peG, peT), type = "b", lty = 1:4, pch = 0:3, col = 1:4, main = "Position vs. Percentage Sequence Error", xlab = "Position", ylab = "Percentage of Sequencing Error")
    legend("topleft", c("A", "C", "G", "T"), lty = 1:4, pch = 0:3, col = 1:4)       
  }
@@ -118,6 +124,6 @@ if (model_type == 1 || model_type == 3) {
  close(con)
  
  pair <- read.table(file = cntF, skip = 3, sep = "\t")
-plot(pair[,1], pair[,2], xlab = "Number of Alignments", ylab = "Number of Reads", main = "Among alignable reads, distribution of # of alignments")
+barplot(pair[,2], names.arg = pair[,1], xlab = "Number of Alignments", ylab = "Number of Reads", main = "Histogram of Read Alignments")
  
  dev.off()