my $NMB = 1024; # default
-my $status;
+my $status = 0;
my $read_type = 1; # default, single end with qual
my $strand_specific = 0;
+my $mTime = 0;
+my ($time_start, $time_end, $time_alignment, $time_rsem, $time_ci) = (0, 0, 0, 0, 0);
+
GetOptions("keep-intermediate-files" => \$keep_intermediate_files,
"no-qualities" => \$no_qual,
"paired-end" => \$paired_end,
"out-bam" => \$genBamF,
"calc-ci" => \$calcCI,
"ci-memory=i" => \$NMB,
+ "time" => \$mTime,
"q|quiet" => \$quiet,
"h|help" => \$help) or pod2usage(-exitval => 2, -verbose => 2);
my $mate2_list = "";
my $inpF = "";
-my ($refName, $taskName, $tmp_dir, $imdName) = ();
+my ($refName, $sampleName, $sampleToken, $temp_dir, $stat_dir, $imdName) = ();
my $gap = 32;
if ($paired_end) {
if ($is_sam || $is_bam) { $inpF = $ARGV[0]; }
else {$mate1_list = $ARGV[0]; }
$refName = $ARGV[1];
- $taskName = $ARGV[2];
+ $sampleName = $ARGV[2];
}
else {
$mate1_list = $ARGV[0];
$mate2_list = $ARGV[1];
$refName = $ARGV[2];
- $taskName = $ARGV[3];
+ $sampleName = $ARGV[3];
}
-$tmp_dir = $taskName.".temp";
-my $pos = rindex($taskName, '/');
-if ($pos < 0) {
- $imdName = "$tmp_dir/$taskName";
-}
-else {
- $imdName = $tmp_dir."/".substr($taskName, $pos + 1);
-}
+my $pos = rindex($sampleName, '/');
+if ($pos < 0) { $sampleToken = $sampleName; }
+else { $sampleToken = substr($sampleName, $pos + 1); }
+
+$temp_dir = "$sampleName.temp";
+$stat_dir = "$sampleName.stat";
+
+if (!(-d $temp_dir) && !mkdir($temp_dir)) { print "Fail to create folder $temp_dir.\n"; exit(-1); }
+if (!(-d $stat_dir) && !mkdir($stat_dir)) { print "Fail to create folder $stat_dir.\n"; exit(-1); }
+
+$imdName = "$temp_dir/$sampleToken";
if (!$is_sam && !$is_bam && $phred33 + $phred64 + $solexa == 0) { $phred33 = 1; }
my ($mate_minL, $mate_maxL) = (1, $maxL);
-if (!(-d $tmp_dir) && !mkdir($tmp_dir)) { print "Fail to create the directory.\n"; exit(-1); }
-
if ($bowtie_path ne "") { $bowtie_path .= "/"; }
my ($fn, $dir, $suf) = fileparse($0);
if ($read_type == 2 || $read_type == 3) { $command .= " -I $minL -X $maxL"; }
if ($strand_specific || $probF == 1.0) { $command .= " --norc"; }
- elsif ($probF = 0.0) { $command .= " --nofw"; }
+ elsif ($probF == 0.0) { $command .= " --nofw"; }
$command .= " -p $nThreads -a -m $maxHits -S";
if ($quiet) { $command .= " --quiet"; }
$command .= " | gzip > $imdName.sam.gz";
print "$command\n";
+
+ if ($mTime) { $time_start = time(); }
+
$status = system($command);
+
+ if ($mTime) { $time_end = time(); $time_alignment = $time_end - $time_start; }
+
if ($status != 0) {
print "bowtie failed! Please check if you provide correct parameters/options for the pipeline!\n";
exit(-1);
$is_sam = 1; # output of bowtie is a sam file
}
-$command = $dir."rsem-parse-alignments $refName $imdName";
+if ($mTime) { $time_start = time(); }
+
+$command = $dir."rsem-parse-alignments $refName $sampleName $sampleToken";
my $samInpType;
if ($is_sam) { $samInpType = "s"; }
}
print "\n";
-$status = open(OUTPUT, ">$imdName.mparams");
-if ($status == 0) { print "Cannot generate $imdName.mparams!\n"; exit(-1); }
+my $doesOpen = open(OUTPUT, ">$imdName.mparams");
+if ($doesOpen == 0) { print "Cannot generate $imdName.mparams!\n"; exit(-1); }
print OUTPUT "$minL $maxL\n";
print OUTPUT "$probF\n";
print OUTPUT "$estRSPD\n";
print OUTPUT "$L\n";
close(OUTPUT);
-$command = $dir."rsem-run-em $refName $read_type $imdName $taskName -p $nThreads";
+$command = $dir."rsem-run-em $refName $read_type $sampleName $sampleToken -p $nThreads";
if ($genBamF) {
$command .= " -b $samInpType $inpF";
if ($fn_list ne "") { $command .= " 1 $fn_list"; }
print "\n";
if ($genBamF) {
- $command = $dir."sam/samtools sort $taskName.bam $taskName.sorted";
+ $command = $dir."sam/samtools sort $sampleName.bam $sampleName.sorted";
print "$command\n";
$status = system($command);
if ($status != 0) {
exit(-1);
}
print "\n";
- $command = $dir."sam/samtools index $taskName.sorted.bam";
+ $command = $dir."sam/samtools index $sampleName.sorted.bam";
print "$command\n";
$status = system($command);
if ($status != 0) {
print "\n";
}
-&collectResults("$imdName.iso_res", "$taskName.isoforms.results"); # isoform level
-&collectResults("$imdName.gene_res", "$taskName.genes.results"); # gene level
+&collectResults("$imdName.iso_res", "$sampleName.isoforms.results"); # isoform level
+&collectResults("$imdName.gene_res", "$sampleName.genes.results"); # gene level
+
+if ($mTime) { $time_end = time(); $time_rsem = $time_end - $time_start; }
+
+if ($mTime) { $time_start = time(); }
if ($calcCI) {
- $command = $dir."rsem-run-gibbs $refName $taskName $imdName $BURNIN $CHAINLEN $SAMPLEGAP";
+ $command = $dir."rsem-run-gibbs $refName $sampleName $sampleToken $BURNIN $CHAINLEN $SAMPLEGAP";
+# $command .= " -p $nThreads";
if ($quiet) { $command .= " -q"; }
print "$command\n";
$status = system($command);
}
print "\n";
- system("mv $taskName.isoforms.results $imdName.isoforms.results.bak1");
- system("mv $taskName.genes.results $imdName.genes.results.bak1");
- &collectResults("$imdName.iso_res", "$taskName.isoforms.results"); # isoform level
- &collectResults("$imdName.gene_res", "$taskName.genes.results"); # gene level
+ system("mv $sampleName.isoforms.results $imdName.isoforms.results.bak1");
+ system("mv $sampleName.genes.results $imdName.genes.results.bak1");
+ &collectResults("$imdName.iso_res", "$sampleName.isoforms.results"); # isoform level
+ &collectResults("$imdName.gene_res", "$sampleName.genes.results"); # gene level
- $command = $dir."rsem-calculate-credibility-intervals $refName $taskName $imdName $CONFIDENCE $NSPC $NMB";
+ $command = $dir."rsem-calculate-credibility-intervals $refName $sampleName $sampleToken $CONFIDENCE $NSPC $NMB";
if ($quiet) { $command .= " -q"; }
print "$command\n";
$status = system($command);
}
print "\n";
- system("mv $taskName.isoforms.results $imdName.isoforms.results.bak2");
- system("mv $taskName.genes.results $imdName.genes.results.bak2");
- &collectResults("$imdName.iso_res", "$taskName.isoforms.results"); # isoform level
- &collectResults("$imdName.gene_res", "$taskName.genes.results"); # gene level
+ system("mv $sampleName.isoforms.results $imdName.isoforms.results.bak2");
+ system("mv $sampleName.genes.results $imdName.genes.results.bak2");
+ &collectResults("$imdName.iso_res", "$sampleName.isoforms.results"); # isoform level
+ &collectResults("$imdName.gene_res", "$sampleName.genes.results"); # gene level
}
+if ($mTime) { $time_end = time(); $time_ci = $time_end - $time_start; }
+
+if ($mTime) { $time_start = time(); }
+
if (!$keep_intermediate_files) {
- $status = system ("rm -rf $tmp_dir");
+ $status = system("rm -rf $temp_dir");
if ($status != 0) {
print "Fail to delete the temporary folder!\n";
exit(-1);
}
}
+if ($mTime) { $time_end = time(); }
+
+if ($mTime) {
+ open(OUTPUT, ">$sampleName.time");
+ print OUTPUT "Alignment: $time_alignment s.\n";
+ print OUTPUT "RSEM: $time_rsem s.\n";
+ print OUTPUT "CI: $time_ci s.\n";
+ my $time_del = $time_end - $time_start;
+ print OUTPUT "Delete: $time_del s.\n";
+ close(OUTPUT);
+}
+
# inpF, outF
sub collectResults {
my $local_status;
'rsem-prepare-reference', there will be no tab after the
tau_value field.
-=item B<sample_name.model> and B<sample_name.theta>
-
-Output files used by RSEM internally for tasks like simulation,
-compute credibility intervals etc.
-
=item B<sample_name.bam, sample_name.sorted.bam and sample_name.sorted.bam.bai>
Only generated when --out-bam is specified.
genomic coordinates. Alignments of reads that have identical genomic
coordinates (i.e., alignments to different isoforms that share the
same genomic region) are collapsed into one alignment. The MAPQ field
-of each alignment is set to max(100, floor(-10 * log10(1.0 - w) +
+of each alignment is set to min(100, floor(-10 * log10(1.0 - w) +
0.5)), where w is the posterior probability of that alignment being
the true mapping of a read. In addition, RSEM pads a new tag
ZW:f:value, where value is a single precision floating number
'sample_name.sorted.bam' and 'sample_name.sorted.bam.bai' are the
sorted BAM file and indices generated by samtools (included in RSEM package).
+=item B<sample_name.stat>
+
+This is a folder instead of a file. All model related statistics are stored in this folder. Use 'rsem-plot-model' can generate plots using this folder.
=back
mmliver_paired_end_quals
=cut
+
+# LocalWords: usr