#const
my $BURNIN = 200;
-my $NSAMPLES = 1000;
+my $CHAINLEN = 1000;
my $SAMPLEGAP = 1;
my $CONFIDENCE = 0.95;
my $NSPC = 50;
my $NMB = 1024; # default
-my $status;
+my $status = 0;
my $read_type = 1; # default, single end with qual
my $E = 99999999;
my $L = 25;
my $maxHits = 200;
+my $chunkMbs = 0; # 0 = use bowtie default
my $phred33 = 0;
my $phred64 = 0;
my $solexa = 0;
my $strand_specific = 0;
+my $mTime = 0;
+my ($time_start, $time_end, $time_alignment, $time_rsem, $time_ci) = (0, 0, 0, 0, 0);
+
GetOptions("keep-intermediate-files" => \$keep_intermediate_files,
"no-qualities" => \$no_qual,
"paired-end" => \$paired_end,
"bowtie-n=i" => \$C,
"bowtie-e=i" => \$E,
"bowtie-m=i" => \$maxHits,
+ "bowtie-chunkmbs=i" => \$chunkMbs,
"phred33-quals" => \$phred33,
"phred64-quals" => \$phred64, #solexa1.3-quals" => \$phred64,
"solexa-quals" => \$solexa,
"out-bam" => \$genBamF,
"calc-ci" => \$calcCI,
"ci-memory=i" => \$NMB,
+ "time" => \$mTime,
"q|quiet" => \$quiet,
"h|help" => \$help) or pod2usage(-exitval => 2, -verbose => 2);
pod2usage(-msg => "Min fragment length should be smaller or equal to max fragment length!", -exitval => 2, -verbose => 2) if ($minL > $maxL);
pod2usage(-msg => "The memory allocated for calculating credibility intervals should be at least 1 MB!\n", -exitval => 2, -verbose => 2) if ($NMB < 1);
pod2usage(-msg => "Number of threads should be at least 1!\n", -exitval => 2, -verbose => 2) if ($nThreads < 1);
+pod2usage(-msg => "Seed length should be at least 25!\n", -exitval => 2, -verbose => 2) if ($L < 25);
if ($strand_specific) { $probF = 1.0; }
$command .= " -p $nThreads -a -m $maxHits -S";
if ($quiet) { $command .= " --quiet"; }
+ $command .= " --chunkmbs $chunkMbs" if $chunkMbs > 0;
+
$command .= " $refName";
if ($read_type == 0 || $read_type == 1) {
$command .= " $mate1_list";
$command .= " | gzip > $imdName.sam.gz";
print "$command\n";
+
+ if ($mTime) { $time_start = time(); }
+
$status = system($command);
+
+ if ($mTime) { $time_end = time(); $time_alignment = $time_end - $time_start; }
+
if ($status != 0) {
print "bowtie failed! Please check if you provide correct parameters/options for the pipeline!\n";
exit(-1);
$is_sam = 1; # output of bowtie is a sam file
}
+if ($mTime) { $time_start = time(); }
+
$command = $dir."rsem-parse-alignments $refName $sampleName $sampleToken";
my $samInpType;
}
print "\n";
-$status = open(OUTPUT, ">$imdName.mparams");
-if ($status == 0) { print "Cannot generate $imdName.mparams!\n"; exit(-1); }
+my $doesOpen = open(OUTPUT, ">$imdName.mparams");
+if ($doesOpen == 0) { print "Cannot generate $imdName.mparams!\n"; exit(-1); }
print OUTPUT "$minL $maxL\n";
print OUTPUT "$probF\n";
print OUTPUT "$estRSPD\n";
&collectResults("$imdName.iso_res", "$sampleName.isoforms.results"); # isoform level
&collectResults("$imdName.gene_res", "$sampleName.genes.results"); # gene level
+if ($mTime) { $time_end = time(); $time_rsem = $time_end - $time_start; }
+
+if ($mTime) { $time_start = time(); }
+
if ($calcCI) {
- $command = $dir."rsem-run-gibbs $refName $sampleName $sampleToken $BURNIN $NSAMPLES $SAMPLEGAP";
+ $command = $dir."rsem-run-gibbs $refName $sampleName $sampleToken $BURNIN $CHAINLEN $SAMPLEGAP";
# $command .= " -p $nThreads";
if ($quiet) { $command .= " -q"; }
print "$command\n";
&collectResults("$imdName.gene_res", "$sampleName.genes.results"); # gene level
}
+if ($mTime) { $time_end = time(); $time_ci = $time_end - $time_start; }
+
+if ($mTime) { $time_start = time(); }
+
if (!$keep_intermediate_files) {
- $status = system ("rm -rf $temp_dir");
+ $status = system("rm -rf $temp_dir");
if ($status != 0) {
print "Fail to delete the temporary folder!\n";
exit(-1);
}
}
+if ($mTime) { $time_end = time(); }
+
+if ($mTime) {
+ open(OUTPUT, ">$sampleName.time");
+ print OUTPUT "Alignment: $time_alignment s.\n";
+ print OUTPUT "RSEM: $time_rsem s.\n";
+ print OUTPUT "CI: $time_ci s.\n";
+ my $time_del = $time_end - $time_start;
+ print OUTPUT "Delete: $time_del s.\n";
+ close(OUTPUT);
+}
+
# inpF, outF
sub collectResults {
my $local_status;
=item B<--seed-length> <int>
-Seed length used by the read aligner. Providing the correct value for this parameter is important for RSEM's accuracy if the data are single-end reads. If RSEM runs Bowtie, it uses this value for Bowtie's seed length parameter. (Default: 25)
+Seed length used by the read aligner. Providing the correct value for this parameter is important for RSEM's accuracy if the data are single-end reads. If RSEM runs Bowtie, it uses this value for Bowtie's seed length parameter. The minimum value is 25. Any read with its or at least one of its mates' (for paired-end reads) length less than 25 will be ignored. (Default: 25)
=item B<--tag> <string>
(Bowtie parameter) suppress all alignments for a read if > <int> valid alignments exist. (Default: 200)
+=item B<--bowtie-chunkmbs> <int>
+
+(Bowtie parameter) memory allocated for best first alignment calculation (Default: 0 - use bowtie's default)
+
=item B<--phred33-quals>
Input quality scores are encoded as Phred+33. (Default: on)
genomic coordinates. Alignments of reads that have identical genomic
coordinates (i.e., alignments to different isoforms that share the
same genomic region) are collapsed into one alignment. The MAPQ field
-of each alignment is set to max(100, floor(-10 * log10(1.0 - w) +
+of each alignment is set to min(100, floor(-10 * log10(1.0 - w) +
0.5)), where w is the posterior probability of that alignment being
the true mapping of a read. In addition, RSEM pads a new tag
ZW:f:value, where value is a single precision floating number
mmliver_paired_end_quals
=cut
+