X-Git-Url: https://git.donarmstrong.com/?p=rsem.git;a=blobdiff_plain;f=rsem-calculate-expression;h=2a09c1570ce7cd45a560b7587568ff106766fd75;hp=00f26cee381fdf03f6028748bda1caa1aa9190ae;hb=9210a5fece7ec2854eb834d5b2dcbe2d12fbebf1;hpb=757263aa0ff5367f826468db30d983282e51e7f0 diff --git a/rsem-calculate-expression b/rsem-calculate-expression index 00f26ce..2a09c15 100755 --- a/rsem-calculate-expression +++ b/rsem-calculate-expression @@ -49,6 +49,7 @@ my $genBamF = 1; # default is generating transcript bam file my $genGenomeBamF = 0; my $sampling = 0; my $calcCI = 0; +my $var_opt = 0; # temporarily, only for internal use my $quiet = 0; my $help = 0; @@ -86,8 +87,10 @@ GetOptions("keep-intermediate-files" => \$keep_intermediate_files, "estimate-rspd" => \$estRSPD, "num-rspd-bins=i" => \$B, "p|num-threads=i" => \$nThreads, + "no-bam-output" => sub { $genBamF = 0; }, "output-genome-bam" => \$genGenomeBamF, "sampling-for-bam" => \$sampling, + "var" => \$var_opt, "calc-ci" => \$calcCI, "ci-memory=i" => \$NMB, "time" => \$mTime, @@ -116,7 +119,8 @@ pod2usage(-msg => "Min fragment length should be smaller or equal to max fragmen pod2usage(-msg => "The memory allocated for calculating credibility intervals should be at least 1 MB!\n", -exitval => 2, -verbose => 2) if ($NMB < 1); pod2usage(-msg => "Number of threads should be at least 1!\n", -exitval => 2, -verbose => 2) if ($nThreads < 1); pod2usage(-msg => "Seed length should be at least 5!\n", -exitval => 2, -verbose => 2) if ($L < 5); -pod2usage(-msg => "--sampling-for-bam cannot be specified if --out-bam is not specified!\n", -exitval => 2, -verbose => 2) if ($sampling && !$genBamF); +pod2usage(-msg => "--sampling-for-bam cannot be specified if --no-bam-output is specified!\n", -exitval => 2, -verbose => 2) if ($sampling && !$genBamF); +pod2usage(-msg => "--output-genome-bam cannot be specified if --no-bam-output is specified!\n", -exitval => 2, -verbose => 2) if ($genGenomeBamF && !$genBamF); if ($L < 25) { print "Warning: the seed length set is less than 25! This is only allowed if the references are not added poly(A) tails.\n"; } @@ -171,7 +175,7 @@ if (!(-d $stat_dir) && !mkdir($stat_dir)) { print "Fail to create folder $stat_d $imdName = "$temp_dir/$sampleToken"; -if (!$is_sam && !$is_bam && $phred33 + $phred64 + $solexa == 0) { $phred33 = 1; } +if (!$is_sam && !$is_bam && !$no_qual && ($phred33 + $phred64 + $solexa == 0)) { $phred33 = 1; } my ($mate_minL, $mate_maxL) = (1, $maxL); @@ -182,13 +186,12 @@ my $command = ""; if (!$is_sam && !$is_bam) { $command = $bowtie_path."bowtie"; - if ($read_type == 0 || $read_type == 2) { $command .= " -f"; } + if ($no_qual) { $command .= " -f"; } else { $command .= " -q"; } if ($phred33) { $command .= " --phred33-quals"; } elsif ($phred64) { $command .= " --phred64-quals"; } elsif ($solexa) { $command .= " --solexa-quals"; } - else { print "Oh, no!!!"; exit(2); } $command .= " -n $C -e $E -l $L"; if ($read_type == 2 || $read_type == 3) { $command .= " -I $minL -X $maxL"; } @@ -290,12 +293,15 @@ if ($mTime) { $time_end = time(); $time_rsem = $time_end - $time_start; } if ($mTime) { $time_start = time(); } -if ($calcCI) { +if ($calcCI || $var_opt) { $command = $dir."rsem-run-gibbs $refName $sampleName $sampleToken $BURNIN $NCV $SAMPLEGAP"; $command .= " -p $nThreads"; + if ($var_opt) { $command .= " --var"; } if ($quiet) { $command .= " -q"; } &runCommand($command); +} +if ($calcCI) { system("mv $sampleName.isoforms.results $imdName.isoforms.results.bak1"); system("mv $sampleName.genes.results $imdName.genes.results.bak1"); &collectResults("$imdName.iso_res", "$sampleName.isoforms.results"); # isoform level @@ -463,6 +469,10 @@ RSEM reads header information from input by default. If this option is on, heade Number of threads to use. Both Bowtie and expression estimation will use this many threads. (Default: 1) +=item B<--no-bam-output> + +Do not output any BAM file. (Default: off) + =item B<--output-genome-bam> Generate a BAM file, 'sample_name.genome.bam', with alignments mapped to genomic coordinates and annotated with their posterior probabilities. In addition, RSEM will call samtools (included in RSEM package) to sort and index the bam file. 'sample_name.genome.sorted.bam' and 'sample_name.genome.sorted.bam.bai' will be generated. (Default: off) @@ -621,6 +631,8 @@ provided, 'gene_id' and 'transcript_id' are the same. =item B +Only generated when --no-bam-output is not specified. + 'sample_name.transcript.bam' is a BAM-formatted file of read alignments in transcript coordinates. The MAPQ field of each alignment is set to min(100, floor(-10 * log10(1.0 - w) + 0.5)), where w is the @@ -635,7 +647,7 @@ indices generated by samtools (included in RSEM package). =item B -Only generated when --output-genome-bam is specified. +Only generated when --no-bam-output is not specified and --output-genome-bam is specified. 'sample_name.genome.bam' is a BAM-formatted file of read alignments in genomic coordinates. Alignments of reads that have identical genomic