From 68a2be089a876aba126e384837559aaab40431bf Mon Sep 17 00:00:00 2001 From: Bo Li Date: Mon, 23 May 2011 23:52:56 -0500 Subject: [PATCH] rsem-plot-model updated --- PairedEndModel.h | 5 ++--- PairedEndQModel.h | 7 ++----- SingleModel.h | 5 ++--- SingleQModel.h | 5 ++--- rsem-calculate-expression | 40 +++++++++++++++++++++++++++++++++++---- rsem-plot-model | 34 ++++++++++++++++++++++++++------- simulation.cpp | 6 +++--- 7 files changed, 74 insertions(+), 28 deletions(-) diff --git a/PairedEndModel.h b/PairedEndModel.h index 6e0f1a4..7c87374 100644 --- a/PairedEndModel.h +++ b/PairedEndModel.h @@ -385,9 +385,8 @@ bool PairedEndModel::simulate(int rid, PairedEndRead& read, int& sid) { readseq2 = pro->simulate(sampler, mateL2, m2pos, m2dir, ref); } - std::ostringstream stdout; - stdout<simulate(sampler, mateL2, m2pos, m2dir, qual2, ref); } - std::ostringstream stdout; - stdout< \$keep_intermediate_files, "no-qualities" => \$no_qual, "paired-end" => \$paired_end, @@ -82,6 +85,7 @@ GetOptions("keep-intermediate-files" => \$keep_intermediate_files, "out-bam" => \$genBamF, "calc-ci" => \$calcCI, "ci-memory=i" => \$NMB, + "time" => \$mTime, "q|quiet" => \$quiet, "h|help" => \$help) or pod2usage(-exitval => 2, -verbose => 2); @@ -189,7 +193,13 @@ if (!$is_sam && !$is_bam) { $command .= " | gzip > $imdName.sam.gz"; print "$command\n"; + + if ($mTime) { $time_start = time(); } + $status = system($command); + + if ($mTime) { $time_end = time(); $time_alignment = $time_end - $time_start; } + if ($status != 0) { print "bowtie failed! Please check if you provide correct parameters/options for the pipeline!\n"; exit(-1); @@ -200,6 +210,8 @@ if (!$is_sam && !$is_bam) { $is_sam = 1; # output of bowtie is a sam file } +if ($mTime) { $time_start = time(); } + $command = $dir."rsem-parse-alignments $refName $sampleName $sampleToken"; my $samInpType; @@ -234,8 +246,8 @@ if ($status != 0) { } print "\n"; -$status = open(OUTPUT, ">$imdName.mparams"); -if ($status == 0) { print "Cannot generate $imdName.mparams!\n"; exit(-1); } +my $doesOpen = open(OUTPUT, ">$imdName.mparams"); +if ($doesOpen == 0) { print "Cannot generate $imdName.mparams!\n"; exit(-1); } print OUTPUT "$minL $maxL\n"; print OUTPUT "$probF\n"; print OUTPUT "$estRSPD\n"; @@ -284,6 +296,10 @@ if ($genBamF) { &collectResults("$imdName.iso_res", "$sampleName.isoforms.results"); # isoform level &collectResults("$imdName.gene_res", "$sampleName.genes.results"); # gene level +if ($mTime) { $time_end = time(); $time_rsem = $time_end - $time_start; } + +if ($mTime) { $time_start = time(); } + if ($calcCI) { $command = $dir."rsem-run-gibbs $refName $sampleName $sampleToken $BURNIN $CHAINLEN $SAMPLEGAP"; # $command .= " -p $nThreads"; @@ -317,14 +333,30 @@ if ($calcCI) { &collectResults("$imdName.gene_res", "$sampleName.genes.results"); # gene level } +if ($mTime) { $time_end = time(); $time_ci = $time_end - $time_start; } + +if ($mTime) { $time_start = time(); } + if (!$keep_intermediate_files) { - $status = system ("rm -rf $temp_dir"); + $status = system("rm -rf $temp_dir"); if ($status != 0) { print "Fail to delete the temporary folder!\n"; exit(-1); } } +if ($mTime) { $time_end = time(); } + +if ($mTime) { + open(OUTPUT, ">$sampleName.time"); + print OUTPUT "Alignment: $time_alignment s.\n"; + print OUTPUT "RSEM: $time_rsem s.\n"; + print OUTPUT "CI: $time_ci s.\n"; + my $time_del = $time_end - $time_start; + print OUTPUT "Delete: $time_del s.\n"; + close(OUTPUT); +} + # inpF, outF sub collectResults { my $local_status; diff --git a/rsem-plot-model b/rsem-plot-model index c15a75f..225d08f 100755 --- a/rsem-plot-model +++ b/rsem-plot-model @@ -2,15 +2,21 @@ argv <- commandArgs(TRUE) if (length(argv) != 2) { - cat("Usage: rsem-plot-model sample_name outF\n") + cat("Usage: rsem-plot-model sample_name output_plot_file\n") q(status = 1) } strvec <- strsplit(argv[1], split = "/")[[1]] token <- strvec[length(strvec)] -modelF <- paste(argv[1], ".stat/", token, ".model", sep = "") -cntF <- paste(argv[1], ".stat/", token, ".cnt", sep = "") +stat.dir <- paste(argv[1], ".stat", sep = "") +if (!file.exists(stat.dir)) { + cat("Error: directory does not exist: ", stat.dir, "\n", sep = "") + cat(strwrap("This version of rsem-plot-model only works with the output of RSEM versions >= 1.1.8"), sep="\n") + q(status = 1) +} +modelF <- paste(stat.dir, "/", token, ".model", sep = "") +cntF <- paste(stat.dir, "/", token, ".cnt", sep = "") pdf(argv[2]) @@ -27,7 +33,11 @@ x <- (vec[1] + 1) : vec[2] y <- as.numeric(strsplit(strvec[2], split = " ")[[1]]) mean <- weighted.mean(x, y) std <- sqrt(weighted.mean((x - mean)^2, y)) -plot(x, y, type = "h", main = "Fragment Length Distribution", sub = paste("Mean = ", mean, ", Std = ", std), xlab = "Fragment Length", ylab = "Probability") +plot(x, y, type = "h", + main = "Fragment Length Distribution", + sub = paste("Mean = ", round(mean, 1), ", Std = ", round(std, 1)), + xlab = "Fragment Length", + ylab = "Probability") # mate length distribution if (model_type == 0 || model_type == 1) bval <- as.numeric(readLines(con, n = 1)[1]) else bval <- 1 @@ -40,7 +50,11 @@ if (bval == 1) { y <- as.numeric(list[[2]]) mean <- weighted.mean(x, y) std <- sqrt(weighted.mean((x - mean)^2, y)) - plot(x, y, type = "h", main = "Mate Length Distribution", sub = paste("Mean = ", mean, ", Std = ", std), xlab = "Mate Length", ylab = "Probability") + plot(x, y, type = "h", + main = "Read Length Distribution", + sub=paste("Mean = ", round(mean, 1), ", Std = ", round(std, 1)), + xlab = "Read Length", + ylab = "Probability") } strvec <- readLines(con, n = 1) @@ -88,7 +102,10 @@ if (model_type == 1 || model_type == 3) { peT <- c(peT, ifelse(sum(vecT) < 1e-8, NA, -10 * log10(1.0 - vecT[4]))) } - matplot(x, cbind(peA, peC, peG, peT), type = "b", lty = 1:4, pch = 0:3, col = 1:4, main = "Phred Quality Score vs. Observed Quality", xlab = "Quality Score", ylab = "Observed Quality") + matplot(x, cbind(peA, peC, peG, peT), type = "b", lty = 1:4, pch = 0:3, col = 1:4, + main = "Observed Quality vs. Phred Quality Score", + xlab = "Phred Quality Score", + ylab = "Observed Quality") legend("topleft", c("A", "C", "G", "T"), lty = 1:4, pch = 0:3, col = 1:4) } else { # Profile @@ -124,6 +141,9 @@ if (model_type == 1 || model_type == 3) { close(con) pair <- read.table(file = cntF, skip = 3, sep = "\t") -barplot(pair[,2], names.arg = pair[,1], xlab = "Number of Alignments", ylab = "Number of Reads", main = "Histogram of Reads with Different Number of Alignments") +barplot(pair[,2], names.arg = pair[,1], + xlab = "Alignments per fragment", + ylab = "Number of fragments", + main = "Histogram of Alignments per Fragment") dev.off() diff --git a/simulation.cpp b/simulation.cpp index 19f2b92..e516cc1 100644 --- a/simulation.cpp +++ b/simulation.cpp @@ -138,19 +138,19 @@ void simulate(char* modelF, char* resultsF) { fin.close(); for (int i = 1; i <= M; i++) theta[i] = theta[i] / denom * (1.0 - theta[0]); - int tmp2140 = 0; + int resimulation_count = 0; //simulating... model.startSimulation(&sampler, theta); for (int i = 0; i < N; i++) { - while (!model.simulate(i, read, sid)) { ++tmp2140; }//; + while (!model.simulate(i, read, sid)) { ++resimulation_count; } read.write(n_os, os); ++counts[sid]; if ((i + 1) % 1000000 == 0 && verbose) printf("GEN %d\n", i + 1); } model.finishSimulation(); - printf("Total number of resimulation is %d\n", tmp2140); + printf("Total number of resimulation is %d\n", resimulation_count); } void writeResFiles(char* outFN) { -- 2.39.2