readseq2 = pro->simulate(sampler, mateL2, m2pos, m2dir, ref);
}
- std::ostringstream stdout;
- stdout<<rid<<"_"<<dir<<"_"<<sid<<"_"<<pos<<"_"<<insertL;
- name = stdout.str();
+ strout<<rid<<"_"<<dir<<"_"<<sid<<"_"<<pos<<"_"<<insertL;
+ name = strout.str();
read = PairedEndRead(SingleRead(name + "/1", readseq1), SingleRead(name + "/2", readseq2));
}
int getModelType() const { return model_type; }
-
- bool simulate(int, PairedEndReadQ&, SingleReadQ&, int&);
private:
static const int model_type = 3;
readseq2 = qpro->simulate(sampler, mateL2, m2pos, m2dir, qual2, ref);
}
- std::ostringstream stdout;
- stdout<<rid<<"_"<<dir<<"_"<<sid<<"_"<<pos<<"_"<<insertL;
- name = stdout.str();
+ strout<<rid<<"_"<<dir<<"_"<<sid<<"_"<<pos<<"_"<<insertL;
+ name = strout.str();
read = PairedEndReadQ(SingleReadQ(name + "/1", readseq1, qual1), SingleReadQ(name + "/2", readseq2, qual2));
}
}
- std::ostringstream stdout;
- stdout<<rid<<"_"<<dir<<"_"<<sid<<"_"<<pos;
- name = stdout.str();
+ strout<<rid<<"_"<<dir<<"_"<<sid<<"_"<<pos;
+ name = strout.str();
read = SingleRead(name, readseq);
}
}
- std::ostringstream stdout;
- stdout<<rid<<"_"<<dir<<"_"<<sid<<"_"<<pos;
- name = stdout.str();
+ strout<<rid<<"_"<<dir<<"_"<<sid<<"_"<<pos;
+ name = strout.str();
read = SingleReadQ(name, readseq, qual);
my $NMB = 1024; # default
-my $status;
+my $status = 0;
my $read_type = 1; # default, single end with qual
my $strand_specific = 0;
+my $mTime = 0;
+my ($time_start, $time_end, $time_alignment, $time_rsem, $time_ci) = (0, 0, 0, 0, 0);
+
GetOptions("keep-intermediate-files" => \$keep_intermediate_files,
"no-qualities" => \$no_qual,
"paired-end" => \$paired_end,
"out-bam" => \$genBamF,
"calc-ci" => \$calcCI,
"ci-memory=i" => \$NMB,
+ "time" => \$mTime,
"q|quiet" => \$quiet,
"h|help" => \$help) or pod2usage(-exitval => 2, -verbose => 2);
$command .= " | gzip > $imdName.sam.gz";
print "$command\n";
+
+ if ($mTime) { $time_start = time(); }
+
$status = system($command);
+
+ if ($mTime) { $time_end = time(); $time_alignment = $time_end - $time_start; }
+
if ($status != 0) {
print "bowtie failed! Please check if you provide correct parameters/options for the pipeline!\n";
exit(-1);
$is_sam = 1; # output of bowtie is a sam file
}
+if ($mTime) { $time_start = time(); }
+
$command = $dir."rsem-parse-alignments $refName $sampleName $sampleToken";
my $samInpType;
}
print "\n";
-$status = open(OUTPUT, ">$imdName.mparams");
-if ($status == 0) { print "Cannot generate $imdName.mparams!\n"; exit(-1); }
+my $doesOpen = open(OUTPUT, ">$imdName.mparams");
+if ($doesOpen == 0) { print "Cannot generate $imdName.mparams!\n"; exit(-1); }
print OUTPUT "$minL $maxL\n";
print OUTPUT "$probF\n";
print OUTPUT "$estRSPD\n";
&collectResults("$imdName.iso_res", "$sampleName.isoforms.results"); # isoform level
&collectResults("$imdName.gene_res", "$sampleName.genes.results"); # gene level
+if ($mTime) { $time_end = time(); $time_rsem = $time_end - $time_start; }
+
+if ($mTime) { $time_start = time(); }
+
if ($calcCI) {
$command = $dir."rsem-run-gibbs $refName $sampleName $sampleToken $BURNIN $CHAINLEN $SAMPLEGAP";
# $command .= " -p $nThreads";
&collectResults("$imdName.gene_res", "$sampleName.genes.results"); # gene level
}
+if ($mTime) { $time_end = time(); $time_ci = $time_end - $time_start; }
+
+if ($mTime) { $time_start = time(); }
+
if (!$keep_intermediate_files) {
- $status = system ("rm -rf $temp_dir");
+ $status = system("rm -rf $temp_dir");
if ($status != 0) {
print "Fail to delete the temporary folder!\n";
exit(-1);
}
}
+if ($mTime) { $time_end = time(); }
+
+if ($mTime) {
+ open(OUTPUT, ">$sampleName.time");
+ print OUTPUT "Alignment: $time_alignment s.\n";
+ print OUTPUT "RSEM: $time_rsem s.\n";
+ print OUTPUT "CI: $time_ci s.\n";
+ my $time_del = $time_end - $time_start;
+ print OUTPUT "Delete: $time_del s.\n";
+ close(OUTPUT);
+}
+
# inpF, outF
sub collectResults {
my $local_status;
argv <- commandArgs(TRUE)
if (length(argv) != 2) {
- cat("Usage: rsem-plot-model sample_name outF\n")
+ cat("Usage: rsem-plot-model sample_name output_plot_file\n")
q(status = 1)
}
strvec <- strsplit(argv[1], split = "/")[[1]]
token <- strvec[length(strvec)]
-modelF <- paste(argv[1], ".stat/", token, ".model", sep = "")
-cntF <- paste(argv[1], ".stat/", token, ".cnt", sep = "")
+stat.dir <- paste(argv[1], ".stat", sep = "")
+if (!file.exists(stat.dir)) {
+ cat("Error: directory does not exist: ", stat.dir, "\n", sep = "")
+ cat(strwrap("This version of rsem-plot-model only works with the output of RSEM versions >= 1.1.8"), sep="\n")
+ q(status = 1)
+}
+modelF <- paste(stat.dir, "/", token, ".model", sep = "")
+cntF <- paste(stat.dir, "/", token, ".cnt", sep = "")
pdf(argv[2])
y <- as.numeric(strsplit(strvec[2], split = " ")[[1]])
mean <- weighted.mean(x, y)
std <- sqrt(weighted.mean((x - mean)^2, y))
-plot(x, y, type = "h", main = "Fragment Length Distribution", sub = paste("Mean = ", mean, ", Std = ", std), xlab = "Fragment Length", ylab = "Probability")
+plot(x, y, type = "h",
+ main = "Fragment Length Distribution",
+ sub = paste("Mean = ", round(mean, 1), ", Std = ", round(std, 1)),
+ xlab = "Fragment Length",
+ ylab = "Probability")
# mate length distribution
if (model_type == 0 || model_type == 1) bval <- as.numeric(readLines(con, n = 1)[1]) else bval <- 1
y <- as.numeric(list[[2]])
mean <- weighted.mean(x, y)
std <- sqrt(weighted.mean((x - mean)^2, y))
- plot(x, y, type = "h", main = "Mate Length Distribution", sub = paste("Mean = ", mean, ", Std = ", std), xlab = "Mate Length", ylab = "Probability")
+ plot(x, y, type = "h",
+ main = "Read Length Distribution",
+ sub=paste("Mean = ", round(mean, 1), ", Std = ", round(std, 1)),
+ xlab = "Read Length",
+ ylab = "Probability")
}
strvec <- readLines(con, n = 1)
peT <- c(peT, ifelse(sum(vecT) < 1e-8, NA, -10 * log10(1.0 - vecT[4])))
}
- matplot(x, cbind(peA, peC, peG, peT), type = "b", lty = 1:4, pch = 0:3, col = 1:4, main = "Phred Quality Score vs. Observed Quality", xlab = "Quality Score", ylab = "Observed Quality")
+ matplot(x, cbind(peA, peC, peG, peT), type = "b", lty = 1:4, pch = 0:3, col = 1:4,
+ main = "Observed Quality vs. Phred Quality Score",
+ xlab = "Phred Quality Score",
+ ylab = "Observed Quality")
legend("topleft", c("A", "C", "G", "T"), lty = 1:4, pch = 0:3, col = 1:4)
} else {
# Profile
close(con)
pair <- read.table(file = cntF, skip = 3, sep = "\t")
-barplot(pair[,2], names.arg = pair[,1], xlab = "Number of Alignments", ylab = "Number of Reads", main = "Histogram of Reads with Different Number of Alignments")
+barplot(pair[,2], names.arg = pair[,1],
+ xlab = "Alignments per fragment",
+ ylab = "Number of fragments",
+ main = "Histogram of Alignments per Fragment")
dev.off()
fin.close();
for (int i = 1; i <= M; i++) theta[i] = theta[i] / denom * (1.0 - theta[0]);
- int tmp2140 = 0;
+ int resimulation_count = 0;
//simulating...
model.startSimulation(&sampler, theta);
for (int i = 0; i < N; i++) {
- while (!model.simulate(i, read, sid)) { ++tmp2140; }//;
+ while (!model.simulate(i, read, sid)) { ++resimulation_count; }
read.write(n_os, os);
++counts[sid];
if ((i + 1) % 1000000 == 0 && verbose) printf("GEN %d\n", i + 1);
}
model.finishSimulation();
- printf("Total number of resimulation is %d\n", tmp2140);
+ printf("Total number of resimulation is %d\n", resimulation_count);
}
void writeResFiles(char* outFN) {