my $NMB = 1024; # default
-my $status;
+my $status = 0;
my $read_type = 1; # default, single end with qual
my $strand_specific = 0;
+my $mTime = 0;
+my ($time_start, $time_end, $time_alignment, $time_rsem, $time_ci) = (0, 0, 0, 0, 0);
+
GetOptions("keep-intermediate-files" => \$keep_intermediate_files,
"no-qualities" => \$no_qual,
"paired-end" => \$paired_end,
"out-bam" => \$genBamF,
"calc-ci" => \$calcCI,
"ci-memory=i" => \$NMB,
+ "time" => \$mTime,
"q|quiet" => \$quiet,
"h|help" => \$help) or pod2usage(-exitval => 2, -verbose => 2);
my $mate2_list = "";
my $inpF = "";
-my ($refName, $taskName, $tmp_dir, $imdName) = ();
+my ($refName, $sampleName, $sampleToken, $temp_dir, $stat_dir, $imdName) = ();
my $gap = 32;
if ($paired_end) {
if ($is_sam || $is_bam) { $inpF = $ARGV[0]; }
else {$mate1_list = $ARGV[0]; }
$refName = $ARGV[1];
- $taskName = $ARGV[2];
+ $sampleName = $ARGV[2];
}
else {
$mate1_list = $ARGV[0];
$mate2_list = $ARGV[1];
$refName = $ARGV[2];
- $taskName = $ARGV[3];
+ $sampleName = $ARGV[3];
}
-$tmp_dir = $taskName.".temp";
-my $pos = rindex($taskName, '/');
-if ($pos < 0) {
- $imdName = "$tmp_dir/$taskName";
-}
-else {
- $imdName = $tmp_dir."/".substr($taskName, $pos + 1);
-}
+my $pos = rindex($sampleName, '/');
+if ($pos < 0) { $sampleToken = $sampleName; }
+else { $sampleToken = substr($sampleName, $pos + 1); }
+
+$temp_dir = "$sampleName.temp";
+$stat_dir = "$sampleName.stat";
+
+if (!(-d $temp_dir) && !mkdir($temp_dir)) { print "Fail to create folder $temp_dir.\n"; exit(-1); }
+if (!(-d $stat_dir) && !mkdir($stat_dir)) { print "Fail to create folder $stat_dir.\n"; exit(-1); }
+
+$imdName = "$temp_dir/$sampleToken";
if (!$is_sam && !$is_bam && $phred33 + $phred64 + $solexa == 0) { $phred33 = 1; }
my ($mate_minL, $mate_maxL) = (1, $maxL);
-if (!(-d $tmp_dir) && !mkdir($tmp_dir)) { print "Fail to create the directory.\n"; exit(-1); }
-
if ($bowtie_path ne "") { $bowtie_path .= "/"; }
my ($fn, $dir, $suf) = fileparse($0);
if ($read_type == 2 || $read_type == 3) { $command .= " -I $minL -X $maxL"; }
if ($strand_specific || $probF == 1.0) { $command .= " --norc"; }
- elsif ($probF = 0.0) { $command .= " --nofw"; }
+ elsif ($probF == 0.0) { $command .= " --nofw"; }
$command .= " -p $nThreads -a -m $maxHits -S";
if ($quiet) { $command .= " --quiet"; }
$command .= " | gzip > $imdName.sam.gz";
print "$command\n";
+
+ if ($mTime) { $time_start = time(); }
+
$status = system($command);
+
+ if ($mTime) { $time_end = time(); $time_alignment = $time_end - $time_start; }
+
if ($status != 0) {
print "bowtie failed! Please check if you provide correct parameters/options for the pipeline!\n";
exit(-1);
$is_sam = 1; # output of bowtie is a sam file
}
-$command = $dir."rsem-parse-alignments $refName $imdName";
+if ($mTime) { $time_start = time(); }
+
+$command = $dir."rsem-parse-alignments $refName $sampleName $sampleToken";
my $samInpType;
if ($is_sam) { $samInpType = "s"; }
}
print "\n";
-$status = open(OUTPUT, ">$imdName.mparams");
-if ($status == 0) { print "Cannot generate $imdName.mparams!\n"; exit(-1); }
+my $doesOpen = open(OUTPUT, ">$imdName.mparams");
+if ($doesOpen == 0) { print "Cannot generate $imdName.mparams!\n"; exit(-1); }
print OUTPUT "$minL $maxL\n";
print OUTPUT "$probF\n";
print OUTPUT "$estRSPD\n";
print OUTPUT "$L\n";
close(OUTPUT);
-$command = $dir."rsem-run-em $refName $read_type $imdName $taskName -p $nThreads";
+$command = $dir."rsem-run-em $refName $read_type $sampleName $sampleToken -p $nThreads";
if ($genBamF) {
$command .= " -b $samInpType $inpF";
if ($fn_list ne "") { $command .= " 1 $fn_list"; }
print "\n";
if ($genBamF) {
- $command = $dir."sam/samtools sort $taskName.bam $taskName.sorted";
+ $command = $dir."sam/samtools sort $sampleName.bam $sampleName.sorted";
print "$command\n";
$status = system($command);
if ($status != 0) {
exit(-1);
}
print "\n";
- $command = $dir."sam/samtools index $taskName.sorted.bam";
+ $command = $dir."sam/samtools index $sampleName.sorted.bam";
print "$command\n";
$status = system($command);
if ($status != 0) {
print "\n";
}
-&collectResults("$imdName.iso_res", "$taskName.isoforms.results"); # isoform level
-&collectResults("$imdName.gene_res", "$taskName.genes.results"); # gene level
+&collectResults("$imdName.iso_res", "$sampleName.isoforms.results"); # isoform level
+&collectResults("$imdName.gene_res", "$sampleName.genes.results"); # gene level
+
+if ($mTime) { $time_end = time(); $time_rsem = $time_end - $time_start; }
+
+if ($mTime) { $time_start = time(); }
if ($calcCI) {
- $command = $dir."rsem-run-gibbs $refName $taskName $imdName $BURNIN $CHAINLEN $SAMPLEGAP";
+ $command = $dir."rsem-run-gibbs $refName $sampleName $sampleToken $BURNIN $CHAINLEN $SAMPLEGAP";
+# $command .= " -p $nThreads";
if ($quiet) { $command .= " -q"; }
print "$command\n";
$status = system($command);
}
print "\n";
- system("mv $taskName.isoforms.results $imdName.isoforms.results.bak1");
- system("mv $taskName.genes.results $imdName.genes.results.bak1");
- &collectResults("$imdName.iso_res", "$taskName.isoforms.results"); # isoform level
- &collectResults("$imdName.gene_res", "$taskName.genes.results"); # gene level
+ system("mv $sampleName.isoforms.results $imdName.isoforms.results.bak1");
+ system("mv $sampleName.genes.results $imdName.genes.results.bak1");
+ &collectResults("$imdName.iso_res", "$sampleName.isoforms.results"); # isoform level
+ &collectResults("$imdName.gene_res", "$sampleName.genes.results"); # gene level
- $command = $dir."rsem-calculate-credibility-intervals $refName $taskName $imdName $CONFIDENCE $NSPC $NMB";
+ $command = $dir."rsem-calculate-credibility-intervals $refName $sampleName $sampleToken $CONFIDENCE $NSPC $NMB";
if ($quiet) { $command .= " -q"; }
print "$command\n";
$status = system($command);
}
print "\n";
- system("mv $taskName.isoforms.results $imdName.isoforms.results.bak2");
- system("mv $taskName.genes.results $imdName.genes.results.bak2");
- &collectResults("$imdName.iso_res", "$taskName.isoforms.results"); # isoform level
- &collectResults("$imdName.gene_res", "$taskName.genes.results"); # gene level
+ system("mv $sampleName.isoforms.results $imdName.isoforms.results.bak2");
+ system("mv $sampleName.genes.results $imdName.genes.results.bak2");
+ &collectResults("$imdName.iso_res", "$sampleName.isoforms.results"); # isoform level
+ &collectResults("$imdName.gene_res", "$sampleName.genes.results"); # gene level
}
+if ($mTime) { $time_end = time(); $time_ci = $time_end - $time_start; }
+
+if ($mTime) { $time_start = time(); }
+
if (!$keep_intermediate_files) {
- $status = system ("rm -rf $tmp_dir");
+ $status = system("rm -rf $temp_dir");
if ($status != 0) {
print "Fail to delete the temporary folder!\n";
exit(-1);
}
}
+if ($mTime) { $time_end = time(); }
+
+if ($mTime) {
+ open(OUTPUT, ">$sampleName.time");
+ print OUTPUT "Alignment: $time_alignment s.\n";
+ print OUTPUT "RSEM: $time_rsem s.\n";
+ print OUTPUT "CI: $time_ci s.\n";
+ my $time_del = $time_end - $time_start;
+ print OUTPUT "Delete: $time_del s.\n";
+ close(OUTPUT);
+}
+
# inpF, outF
sub collectResults {
my $local_status;
=item B<input>
-SAM/BAM formatted input file. If "-" is specified for the filename, SAM/BAM input is instead assumed to come from standard input. SAM/BAM format used is SAM Spec v1.2. RSEM requires all alignments of the same read group together. For paired-end reads, RSEM also requires the two mates of any alignment be adjacent. See Description section for how to make input file obey RSEM's requirements.
+SAM/BAM formatted input file. If "-" is specified for the filename, SAM/BAM input is instead assumed to come from standard input. RSEM requires all alignments of the same read group together. For paired-end reads, RSEM also requires the two mates of any alignment be adjacent. See Description section for how to make input file obey RSEM's requirements.
=item B<reference_name>
sort -k 1,1 -s input.sam > input.sorted.sam
-The SAM/BAM format RSEM uses is v1.2.
+The SAM/BAM format RSEM uses is v1.3. However, it is compatible with old SAM/BAM format.
The user must run 'rsem-prepare-reference' with the appropriate reference before using this program.
'rsem-prepare-reference', there will be no tab after the
tau_value field.
-=item B<sample_name.model> and B<sample_name.theta>
-
-Output files used by RSEM internally for tasks like simulation,
-compute credibility intervals etc.
-
=item B<sample_name.bam, sample_name.sorted.bam and sample_name.sorted.bam.bai>
Only generated when --out-bam is specified.
'sample_name.sorted.bam' and 'sample_name.sorted.bam.bai' are the
sorted BAM file and indices generated by samtools (included in RSEM package).
+=item B<sample_name.stat>
+
+This is a folder instead of a file. All model related statistics are stored in this folder. Use 'rsem-plot-model' can generate plots using this folder.
=back
mmliver_paired_end_quals
=cut
+
+# LocalWords: usr