rename plotModel.R to rsem-plot-model

author Bo Li <bli@cs.wisc.edu>

Fri, 11 Mar 2011 04:00:40 +0000 (22:00 -0600)

committer Bo Li <bli@cs.wisc.edu>

Fri, 11 Mar 2011 04:00:40 +0000 (22:00 -0600)
author Bo Li <bli@cs.wisc.edu>
Fri, 11 Mar 2011 04:00:40 +0000 (22:00 -0600)
committer Bo Li <bli@cs.wisc.edu>
Fri, 11 Mar 2011 04:00:40 +0000 (22:00 -0600)
diff --git a/README.md b/README.md

index 0c74b6eb387158835f7cbea1005c1273ecab7d87..b95c4ea51cffc471d5b211ab84fc07aabeeb0e64 100644 (file)
--- a/README.md
+++ b/README.md
@@ -125,11 +125,11 @@ Refer to the [UCSC custom track help page](http://genome.ucsc.edu/goldenPath/hel
  
  #### c) Visualize the model learned by RSEM
  
-RSEM provides an R script, plotModel.R, for visulazing the model learned.
+RSEM provides an R script, 'rsem-plot-model', for visulazing the model learned.
  
  Usage:
      
-    plotModel.R modelF outF
+    rsem-plot-model modelF outF
  
  modelF: the sample_name.model file generated by RSEM    
  outF: the file name for plots generated from the model. It is a pdf file    
@@ -138,7 +138,7 @@ The plots generated depends on read type and user configuration. It
  may include fragment length distribution, mate length distribution,
  read start position distribution (RSPD), quality score vs percentage
  of sequecing error given the reference base, position vs percentage of
-sequencing errro given the reference base.
+sequencing error given the reference base.
  
  ## <a name="example"></a> Example
  
diff --git a/plotModel.R b/plotModel.R

deleted file mode 100755 (executable)

index ce0a8dc..0000000
--- a/plotModel.R
+++ /dev/null
@@ -1,109 +0,0 @@
-#!/usr/bin/env Rscript
-
-argv <- commandArgs(TRUE)
-if (length(argv) != 2) {
-  cat("Usage: plotModel.R modelF outF\n")
-  q(status = 1)
-}
-
-con <- file(argv[1], open = "r")       
-pdf(argv[2])
-
-# model type and forward probability
-model_type <- as.numeric(readLines(con, n = 4)[1])  
-
-# fragment length distribution
-strvec <- readLines(con, n = 3)
-vec <- as.numeric(strsplit(strvec[1], split = " ")[[1]])
-maxL <- vec[2] # maxL used for Profile
-x <- (vec[1] + 1) : vec[2]
-y <- as.numeric(strsplit(strvec[2], split = " ")[[1]])
-mean <- weighted.mean(x, y)
-std <- sqrt(weighted.mean((x - mean)^2, y))
-plot(x, y, type = "h", main = "Fragment Length Distribution", sub = paste("Mean = ", mean, ", Std = ", std), xlab = "Fragment Length", ylab = "Probability")  
-
-# mate length distribution
-bval <- as.numeric(readLines(con, n = 1)[1])
-if (bval == 1) {
-  list <- strsplit(readLines(con, n = 2), split = " ")
-  vec <- as.numeric(list[[1]])
-  maxL <- vec[2]
-  x <- (vec[1] + 1) : vec[2]
-  y <- as.numeric(list[[2]])
-  mean <- weighted.mean(x, y)
-  std <- sqrt(weighted.mean((x - mean)^2, y))
-  plot(x, y, type = "h", main = "Mate Length Distribution", sub = paste("Mean = ", mean, ", Std = ", std), xlab = "Mate Length", ylab = "Probability")  
-}
-strvec <- readLines(con, n = 1)
-
-# RSPD
-bval <- as.numeric(readLines(con, n = 1)[1])
-if (bval == 1) {
-  bin_size <- as.numeric(readLines(con, n = 1)[1])
-  y <- as.numeric(strsplit(readLines(con, n = 1), split = " ")[[1]])
-  barplot(y, space = 0, main = "Read Start Position Distribution", xlab = "Bin #", ylab = "Probability")
-}
-strvec <- readLines(con, n = 1)
-
-# plot sequencing errors
-if (model_type == 1 || model_type == 3) {
-  # skip QD
-  N <- as.numeric(readLines(con, n = 1)[1])
-  readLines(con, n = N + 1)
-  readLines(con, n = 1) # for the blank line
-  
-  # QProfile
-  readLines(con, n = 1)
-
-  peA <- c() # probability of sequencing error given reference base is A
-  peC <- c()
-  peG <- c()
-  peT <- c()
-
-  for (i in 1 : N) {
-    strvec <- readLines(con, n = 6)
-    list <- strsplit(strvec[1:4], split = " ")
-    vecA <- as.numeric(list[[1]])
-    vecC <- as.numeric(list[[2]])
-    vecG <- as.numeric(list[[3]])
-    vecT <- as.numeric(list[[4]])
-    if (sum(c(vecA, vecC, vecG, vecT)) < 1e-8) break
-    peA <- c(peA, ifelse(sum(vec) < 1e-8, NA, 1.0 - vecA[1]))
-    peC <- c(peC, ifelse(sum(vec) < 1e-8, NA, 1.0 - vecC[2]))
-    peG <- c(peG, ifelse(sum(vec) < 1e-8, NA, 1.0 - vecG[3]))
-    peT <- c(peT, ifelse(sum(vec) < 1e-8, NA, 1.0 - vecT[4]))
-  }
-
-  x <- 0 : (length(peA) - 1)
-  matplot(x, cbind(peA, peC, peG, peT), type = "b", lty = 1:4, pch = 0:3, main = "Quality Score v.s. Percentage Sequence Error", xlab = "Quality Score", ylab = "Percentage of Sequencing Error")
-  legend("topright", c("A", "C", "G", "T"), lty = 1:4, pch = 0:3)
-} else {
-  # Profile
-  readLines(con, n = 1)
-  
-  peA <- c() # probability of sequencing error given reference base is A
-  peC <- c()
-  peG <- c()
-  peT <- c()
-
-  for (i in 1: maxL) {
-    strvec <- readLines(con, n = 6)
-    list <- strsplit(strvec[1:4], split = " ")
-    vecA <- as.numeric(list[[1]])
-    vecC <- as.numeric(list[[2]])
-    vecG <- as.numeric(list[[3]])
-    vecT <- as.numeric(list[[4]])
-    if (sum(c(vecA, vecC, vecG, vecT)) < 1e-8) break
-    peA <- c(peA, ifelse(sum(vec) < 1e-8, NA, 1.0 - vecA[1]))
-    peC <- c(peC, ifelse(sum(vec) < 1e-8, NA, 1.0 - vecC[2]))
-    peG <- c(peG, ifelse(sum(vec) < 1e-8, NA, 1.0 - vecG[3]))
-    peT <- c(peT, ifelse(sum(vec) < 1e-8, NA, 1.0 - vecT[4]))
-  }
-
-  x <- 1 : length(peA)
-  matplot(x, cbind(peA, peC, peG, peT), type = "b", lty = 1:4, pch = 0:3, main = "Position v.s. Percentage Sequence Error", xlab = "Position", ylab = "Percentage of Sequencing Error")
-  legend("topleft", c("A", "C", "G", "T"), lty = 1:4, pch = 0:3)       
-}
-
-dev.off()
-close(con)
diff --git a/rsem-plot-model b/rsem-plot-model

new file mode 100755 (executable)

index 0000000..aa2c724
--- /dev/null
+++ b/rsem-plot-model
@@ -0,0 +1,109 @@
+#!/usr/bin/env Rscript
+
+argv <- commandArgs(TRUE)
+if (length(argv) != 2) {
+  cat("Usage: rsem-plot-model modelF outF\n")
+  q(status = 1)
+}
+
+con <- file(argv[1], open = "r")       
+pdf(argv[2])
+
+# model type and forward probability
+model_type <- as.numeric(readLines(con, n = 4)[1])  
+
+# fragment length distribution
+strvec <- readLines(con, n = 3)
+vec <- as.numeric(strsplit(strvec[1], split = " ")[[1]])
+maxL <- vec[2] # maxL used for Profile
+x <- (vec[1] + 1) : vec[2]
+y <- as.numeric(strsplit(strvec[2], split = " ")[[1]])
+mean <- weighted.mean(x, y)
+std <- sqrt(weighted.mean((x - mean)^2, y))
+plot(x, y, type = "h", main = "Fragment Length Distribution", sub = paste("Mean = ", mean, ", Std = ", std), xlab = "Fragment Length", ylab = "Probability")  
+
+# mate length distribution
+bval <- as.numeric(readLines(con, n = 1)[1])
+if (bval == 1) {
+  list <- strsplit(readLines(con, n = 2), split = " ")
+  vec <- as.numeric(list[[1]])
+  maxL <- vec[2]
+  x <- (vec[1] + 1) : vec[2]
+  y <- as.numeric(list[[2]])
+  mean <- weighted.mean(x, y)
+  std <- sqrt(weighted.mean((x - mean)^2, y))
+  plot(x, y, type = "h", main = "Mate Length Distribution", sub = paste("Mean = ", mean, ", Std = ", std), xlab = "Mate Length", ylab = "Probability")  
+}
+strvec <- readLines(con, n = 1)
+
+# RSPD
+bval <- as.numeric(readLines(con, n = 1)[1])
+if (bval == 1) {
+  bin_size <- as.numeric(readLines(con, n = 1)[1])
+  y <- as.numeric(strsplit(readLines(con, n = 1), split = " ")[[1]])
+  barplot(y, space = 0, main = "Read Start Position Distribution", xlab = "Bin #", ylab = "Probability")
+}
+strvec <- readLines(con, n = 1)
+
+# plot sequencing errors
+if (model_type == 1 || model_type == 3) {
+  # skip QD
+  N <- as.numeric(readLines(con, n = 1)[1])
+  readLines(con, n = N + 1)
+  readLines(con, n = 1) # for the blank line
+  
+  # QProfile
+  readLines(con, n = 1)
+
+  peA <- c() # probability of sequencing error given reference base is A
+  peC <- c()
+  peG <- c()
+  peT <- c()
+
+  for (i in 1 : N) {
+    strvec <- readLines(con, n = 6)
+    list <- strsplit(strvec[1:4], split = " ")
+    vecA <- as.numeric(list[[1]])
+    vecC <- as.numeric(list[[2]])
+    vecG <- as.numeric(list[[3]])
+    vecT <- as.numeric(list[[4]])
+    if (sum(c(vecA, vecC, vecG, vecT)) < 1e-8) break
+    peA <- c(peA, ifelse(sum(vec) < 1e-8, NA, 1.0 - vecA[1]))
+    peC <- c(peC, ifelse(sum(vec) < 1e-8, NA, 1.0 - vecC[2]))
+    peG <- c(peG, ifelse(sum(vec) < 1e-8, NA, 1.0 - vecG[3]))
+    peT <- c(peT, ifelse(sum(vec) < 1e-8, NA, 1.0 - vecT[4]))
+  }
+
+  x <- 0 : (length(peA) - 1)
+  matplot(x, cbind(peA, peC, peG, peT), type = "b", lty = 1:4, pch = 0:3, main = "Quality Score v.s. Percentage Sequence Error", xlab = "Quality Score", ylab = "Percentage of Sequencing Error")
+  legend("topright", c("A", "C", "G", "T"), lty = 1:4, pch = 0:3)
+} else {
+  # Profile
+  readLines(con, n = 1)
+  
+  peA <- c() # probability of sequencing error given reference base is A
+  peC <- c()
+  peG <- c()
+  peT <- c()
+
+  for (i in 1: maxL) {
+    strvec <- readLines(con, n = 6)
+    list <- strsplit(strvec[1:4], split = " ")
+    vecA <- as.numeric(list[[1]])
+    vecC <- as.numeric(list[[2]])
+    vecG <- as.numeric(list[[3]])
+    vecT <- as.numeric(list[[4]])
+    if (sum(c(vecA, vecC, vecG, vecT)) < 1e-8) break
+    peA <- c(peA, ifelse(sum(vec) < 1e-8, NA, 1.0 - vecA[1]))
+    peC <- c(peC, ifelse(sum(vec) < 1e-8, NA, 1.0 - vecC[2]))
+    peG <- c(peG, ifelse(sum(vec) < 1e-8, NA, 1.0 - vecG[3]))
+    peT <- c(peT, ifelse(sum(vec) < 1e-8, NA, 1.0 - vecT[4]))
+  }
+
+  x <- 1 : length(peA)
+  matplot(x, cbind(peA, peC, peG, peT), type = "b", lty = 1:4, pch = 0:3, main = "Position v.s. Percentage Sequence Error", xlab = "Position", ylab = "Percentage of Sequencing Error")
+  legend("topleft", c("A", "C", "G", "T"), lty = 1:4, pch = 0:3)       
+}
+
+dev.off()
+close(con)
author	Bo Li <bli@cs.wisc.edu>
	Fri, 11 Mar 2011 04:00:40 +0000 (22:00 -0600)
committer	Bo Li <bli@cs.wisc.edu>
	Fri, 11 Mar 2011 04:00:40 +0000 (22:00 -0600)
README.md		patch \| blob \| history
plotModel.R	[deleted file]	patch \| blob \| history
rsem-plot-model	[new file with mode: 0755]	patch \| blob