X-Git-Url: https://git.donarmstrong.com/?p=rsem.git;a=blobdiff_plain;f=rsem-gen-transcript-plots;h=5b8cc98434d70083c1573868b6162b612ed1a572;hp=4ea1d87ba9ce14348dbb9d0b37173df66ee52149;hb=refs%2Fheads%2Fupstream;hpb=1d786dca8b4c9c1e2176a28a669d2d16cd93e395 diff --git a/rsem-gen-transcript-plots b/rsem-gen-transcript-plots index 4ea1d87..5b8cc98 100755 --- a/rsem-gen-transcript-plots +++ b/rsem-gen-transcript-plots @@ -2,83 +2,89 @@ nrow_per_page <- 3 # if input_list is composed of transcript ids ncol_per_page <- 2 # if input_list is composed of transcript ids -num_plots_per_page <- nrow_per_page * ncol_per_page # if input_list is composed of transcript ids - +num_plots_per_page <- nrow_per_page * ncol_per_page # if input_list is composed of transcript/allele ids exit_with_error <- function(errmsg) { cat(errmsg, "\n", sep = "", file = stderr()) quit(save = "no", status = 1) } - args <- commandArgs(TRUE) -if (length(args) != 5) - exit_with_error("Usage: rsem-gen-transcript-plots sample_name input_list is_gene show_uniq output_plot_file") +if (length(args) != 6) + exit_with_error("Usage: rsem-gen-transcript-plots sample_name input_list is_allele_specific id_type<0,allele;1,isoform;2,gene> show_uniq output_plot_file") sample_name <- args[1] input_list <- args[2] -is_gene <- as.numeric(args[3]) -show_uniq <- as.numeric(args[4]) -output_plot_file <- args[5] +alleleS <- as.numeric(args[3]) +id_type <- as.numeric(args[4]) +show_uniq <- as.numeric(args[5]) +output_plot_file <- args[6] +is_composite <- (!alleleS && (id_type == 2)) || (alleleS && (id_type > 0)) + load_readdepth_file <- function(filename) { data <- read.table(file = filename, sep = "\t", stringsAsFactors = FALSE) readdepth <- split(data[, 2:3], data[, 1]) } -build_t2gmap <- function(filename) { - tpos <- 1 # the position of transcript_id - gpos <- 2 # the position of gene_id +build_map <- function(filename) { + value_pos <- 1 + if (!alleleS || (alleleS && id_type == 1)) { + key_pos <- 2 + } else { + key_pos <- 3 + } data <- read.delim(file = filename, sep = "\t", stringsAsFactors = FALSE) - tmp <- aggregate(data[tpos], data[gpos], function(x) x) - t2gmap <- tmp[,2] - names(t2gmap) <- tmp[,1] + tmp <- aggregate(data[value_pos], data[key_pos], function(x) x) + map <- tmp[,2] + names(map) <- tmp[,1] - t2gmap + map } -make_a_plot <- function(tid) { - vec <- readdepth[[tid]] - if (is.null(vec)) exit_with_error(paste("Unknown transcript detected,", tid, "is not included in RSEM's indices.")) +make_a_plot <- function(id) { + vec <- readdepth[[id]] + if (is.null(vec)) exit_with_error(sprintf("Unknown %s detected, %s is not included in RSEM's indices.", ifelse(alleleS, "allele-specific transcript", "transcript"), id)) if (is.na(vec[[2]])) wiggle <- rep(0, vec[[1]]) else wiggle <- as.numeric(unlist(strsplit(vec[[2]], split = " "))) len <- length(wiggle) if (!show_uniq) { plot(wiggle, type = "h") } else { - vec <- readdepth_uniq[[tid]] + vec <- readdepth_uniq[[id]] stopifnot(!is.null(vec)) if (is.na(vec[[2]])) wiggle_uniq <- rep(0, vec[[1]]) else wiggle_uniq <- as.numeric(unlist(strsplit(vec[[2]], split = " "))) stopifnot(len == length(wiggle_uniq)) if (len != sum(wiggle >= wiggle_uniq)) { - cat("Warning: transcript ", tid, " has position(s) that read covarege with multireads is smaller than read covarge without multireads.\n", " The 1-based position(s) is(are) : ", which(wiggle < wiggle_uniq), ".\n", " This may be due to floating point arithmetics.\n", sep = "") + cat("Warning: ", ifelse(alleleS, "allele-specific transcript", "transcript"), " ", id, " has position(s) that read covarege with multireads is smaller than read covarge without multireads.\n", " The 1-based position(s) is(are) : ", which(wiggle < wiggle_uniq), ".\n", " This may be due to floating point arithmetics.\n", sep = "") } heights <- rbind(wiggle_uniq, wiggle - wiggle_uniq) barplot(heights, space = 0, border = NA, names.arg = 1:len, col = c("black", "red")) } - title(main = tid) #, xlab = "Position in transcript", ylab = "Read depth") + title(main = id) } -generate_a_page <- function(tids, gene_id = NULL) { - n <- length(tids) - ncol <- ifelse(is_gene, floor(sqrt(n)), ncol_per_page) - nrow <- ifelse(is_gene, ceiling(n / ncol), nrow_per_page) +generate_a_page <- function(ids, title = NULL) { + n <- length(ids) + ncol <- ifelse(is_composite, floor(sqrt(n)), ncol_per_page) + nrow <- ifelse(is_composite, ceiling(n / ncol), nrow_per_page) par(mfrow = c(nrow, ncol), mar = c(2, 2, 2, 2)) - if (is_gene) par(oma = c(0, 0, 3, 0)) - sapply(tids, make_a_plot) - if (is_gene) mtext(gene_id, outer = TRUE, line = 1) + if (is_composite) par(oma = c(0, 0, 3, 0)) + sapply(ids, make_a_plot) + if (is_composite) mtext(title, outer = TRUE, line = 1) } -plot_a_transcript <- function(i) { +plot_individual <- function(i) { fr <- (i - 1) * num_plots_per_page + 1 to <- min(i * num_plots_per_page, n) generate_a_page(ids[fr:to]) } -plot_a_gene <- function(gene_id) { - if (is.null(t2gmap[[gene_id]])) exit_with_error(paste("Unknown gene detected,", gene_id, "is not included in RSEM's in indices.")) - generate_a_page(t2gmap[[gene_id]], gene_id) +# cid, composite id, can be either a gene id or transcript id (for allele-specific expression only) +plot_composite <- function(cid) { + if (is.null(map[[cid]])) exit_with_error(sprintf("Unknown %s detected, %s is not included in RSEM's indices.", ifelse(alleleS && id_type == 1, "transcript", "gene"), cid)) + generate_a_page(map[[cid]], cid) } readdepth <- load_readdepth_file(paste(sample_name, ".transcript.readdepth", sep = "")) @@ -91,24 +97,22 @@ ids <- scan(file = input_list, what = "", sep = "\n") cat("Loading files is done!\n") -if (is_gene) { - t2gmap <- build_t2gmap(paste(sample_name, ".isoforms.results", sep = "")) +if (is_composite) { + file_name <- sprintf("%s.%s.results", sample_name, ifelse(alleleS, "alleles", "isoforms")) + map <- build_map(file_name) cat("Building transcript to gene map is done!\n") } pdf(output_plot_file) -if (!is_gene) { +if (!is_composite) { n <- length(ids) ub <- (n - 1) %/% num_plots_per_page + 1 - tmp <- sapply(1:ub, plot_a_transcript) + dumbvar <- sapply(1:ub, plot_individual) } else { - tmp <- sapply(ids, plot_a_gene) + dumbvar <- sapply(ids, plot_composite) } cat("Plots are generated!\n") dev.off.output <- dev.off() - - -