Added some instructions on how to visualize transcript coordinate BAM/WIG files using IGV

[rsem.git] / rsem-calculate-expression
diff --git a/rsem-calculate-expression b/rsem-calculate-expression

index 00f26cee381fdf03f6028748bda1caa1aa9190ae..2a09c1570ce7cd45a560b7587568ff106766fd75 100755 (executable)
--- a/rsem-calculate-expression
+++ b/rsem-calculate-expression
@@ -49,6 +49,7 @@ my $genBamF = 1;  # default is generating transcript bam file
  my $genGenomeBamF = 0;
  my $sampling = 0;
  my $calcCI = 0;
+my $var_opt = 0; # temporarily, only for internal use
  my $quiet = 0;
  my $help = 0;
  
@@ -86,8 +87,10 @@ GetOptions("keep-intermediate-files" => \$keep_intermediate_files,
            "estimate-rspd" => \$estRSPD,
            "num-rspd-bins=i" => \$B,
            "p|num-threads=i" => \$nThreads,
+          "no-bam-output" => sub { $genBamF = 0; },
            "output-genome-bam" => \$genGenomeBamF,
            "sampling-for-bam" => \$sampling,
+          "var" => \$var_opt,
            "calc-ci" => \$calcCI,
            "ci-memory=i" => \$NMB,
            "time" => \$mTime,
@@ -116,7 +119,8 @@ pod2usage(-msg => "Min fragment length should be smaller or equal to max fragmen
  pod2usage(-msg => "The memory allocated for calculating credibility intervals should be at least 1 MB!\n", -exitval => 2, -verbose => 2) if ($NMB < 1);
  pod2usage(-msg => "Number of threads should be at least 1!\n", -exitval => 2, -verbose => 2) if ($nThreads < 1);
  pod2usage(-msg => "Seed length should be at least 5!\n", -exitval => 2, -verbose => 2) if ($L < 5);
-pod2usage(-msg => "--sampling-for-bam cannot be specified if --out-bam is not specified!\n", -exitval => 2, -verbose => 2) if ($sampling && !$genBamF);
+pod2usage(-msg => "--sampling-for-bam cannot be specified if --no-bam-output is specified!\n", -exitval => 2, -verbose => 2) if ($sampling && !$genBamF);
+pod2usage(-msg => "--output-genome-bam cannot be specified if --no-bam-output is specified!\n", -exitval => 2, -verbose => 2) if ($genGenomeBamF && !$genBamF);
  
  if ($L < 25) { print "Warning: the seed length set is less than 25! This is only allowed if the references are not added poly(A) tails.\n"; }
  
@@ -171,7 +175,7 @@ if (!(-d $stat_dir) && !mkdir($stat_dir)) { print "Fail to create folder $stat_d
  
  $imdName = "$temp_dir/$sampleToken";
  
-if (!$is_sam && !$is_bam && $phred33 + $phred64 + $solexa == 0) { $phred33 = 1; }
+if (!$is_sam && !$is_bam && !$no_qual && ($phred33 + $phred64 + $solexa == 0)) { $phred33 = 1; }
  
  my ($mate_minL, $mate_maxL) = (1, $maxL);
  
@@ -182,13 +186,12 @@ my $command = "";
  
  if (!$is_sam && !$is_bam) {
      $command = $bowtie_path."bowtie";
-    if ($read_type == 0 || $read_type == 2) { $command .= " -f"; }
+    if ($no_qual) { $command .= " -f"; }
      else { $command .= " -q"; }
      
      if ($phred33) { $command .= " --phred33-quals"; }
      elsif ($phred64) { $command .= " --phred64-quals"; }
      elsif ($solexa) { $command .= " --solexa-quals"; }
-    else { print "Oh, no!!!"; exit(2); }
      
      $command .= " -n $C -e $E -l $L";
      if ($read_type == 2 || $read_type == 3) { $command .= " -I $minL -X $maxL"; }
@@ -290,12 +293,15 @@ if ($mTime) { $time_end = time(); $time_rsem = $time_end - $time_start; }
  
  if ($mTime) { $time_start = time(); }
  
-if ($calcCI) {
+if ($calcCI || $var_opt) {
      $command = $dir."rsem-run-gibbs $refName $sampleName $sampleToken $BURNIN $NCV $SAMPLEGAP";
      $command .= " -p $nThreads";
+    if ($var_opt) { $command .= " --var"; }
      if ($quiet) { $command .= " -q"; }
      &runCommand($command);
+}
  
+if ($calcCI) {
      system("mv $sampleName.isoforms.results $imdName.isoforms.results.bak1");
      system("mv $sampleName.genes.results $imdName.genes.results.bak1");
      &collectResults("$imdName.iso_res", "$sampleName.isoforms.results"); # isoform level
@@ -463,6 +469,10 @@ RSEM reads header information from input by default. If this option is on, heade
  
  Number of threads to use. Both Bowtie and expression estimation will use this many threads. (Default: 1)
  
+=item B<--no-bam-output>
+
+Do not output any BAM file. (Default: off)
+
  =item B<--output-genome-bam>
  
  Generate a BAM file, 'sample_name.genome.bam', with alignments mapped to genomic coordinates and annotated with their posterior probabilities. In addition, RSEM will call samtools (included in RSEM package) to sort and index the bam file. 'sample_name.genome.sorted.bam' and 'sample_name.genome.sorted.bam.bai' will be generated. (Default: off)
@@ -621,6 +631,8 @@ provided, 'gene_id' and 'transcript_id' are the same.
  
  =item B<sample_name.transcript.bam, sample_name.transcript.sorted.bam and sample_name.transcript.sorted.bam.bai>
  
+Only generated when --no-bam-output is not specified.
+
  'sample_name.transcript.bam' is a BAM-formatted file of read
  alignments in transcript coordinates. The MAPQ field of each alignment
  is set to min(100, floor(-10 * log10(1.0 - w) + 0.5)), where w is the
@@ -635,7 +647,7 @@ indices generated by samtools (included in RSEM package).
  
  =item B<sample_name.genome.bam, sample_name.genome.sorted.bam and sample_name.genome.sorted.bam.bai>
  
-Only generated when --output-genome-bam is specified.
+Only generated when --no-bam-output is not specified and --output-genome-bam is specified.
  
  'sample_name.genome.bam' is a BAM-formatted file of read alignments in
  genomic coordinates. Alignments of reads that have identical genomic