rsem-calculate-expression

   1 #!/usr/bin/perl
   2
   3 use Getopt::Long;
   4 use Pod::Usage;
   5 use File::Basename;
   6 use Switch;
   7 use strict;
   8
   9 #const
  10 my $BURNIN = 200;
  11 my $CHAINLEN = 1000;
  12 my $SAMPLEGAP = 1;
  13 my $CONFIDENCE = 0.95;
  14 my $NSPC = 50;
  15
  16 my $NMB = 1024; # default
  17
  18 my $status;
  19
  20 my $read_type = 1; # default, single end with qual
  21
  22 my $bowtie_path = "";
  23 my $C = 2;
  24 my $E = 99999999;
  25 my $L = 25;
  26 my $maxHits = 200;
  27 my $phred33 = 0;
  28 my $phred64 = 0;
  29 my $solexa = 0;
  30
  31 my $is_sam = 0;
  32 my $is_bam = 0;
  33 my $fn_list = "";
  34 my $tagName = "XM";
  35
  36 my $probF = 0.5;
  37
  38 my $minL = 1;
  39 my $maxL = 1000;
  40 my $mean = -1;
  41 my $sd = 0;
  42
  43 my $estRSPD = 0;
  44 my $B = 20;
  45
  46 my $nThreads = 1;
  47 my $genBamF = 0;
  48 my $calcCI = 0;
  49 my $quiet = 0;
  50 my $help = 0;
  51
  52 my $paired_end = 0;
  53 my $no_qual = 0;
  54 my $keep_intermediate_files = 0;
  55
  56 my $strand_specific = 0;
  57
  58 GetOptions("keep-intermediate-files" => \$keep_intermediate_files,
  59            "no-qualities" => \$no_qual,
  60            "paired-end" => \$paired_end,
  61            "strand-specific" => \$strand_specific,
  62            "sam" => \$is_sam,
  63            "bam" => \$is_bam,
  64            "sam-header-info=s" => \$fn_list,
  65            "tag=s" => \$tagName,
  66            "seed-length=i" => \$L,
  67            "bowtie-path=s" => \$bowtie_path,
  68            "bowtie-n=i" => \$C,
  69            "bowtie-e=i" => \$E,
  70            "bowtie-m=i" => \$maxHits,
  71            "phred33-quals" => \$phred33,
  72            "phred64-quals" => \$phred64, #solexa1.3-quals" => \$phred64,
  73            "solexa-quals" => \$solexa,
  74            "forward-prob=f" => \$probF,
  75            "fragment-length-min=i" => \$minL,
  76            "fragment-length-max=i" => \$maxL,
  77            "fragment-length-mean=f" => \$mean,
  78            "fragment-length-sd=f" => \$sd,
  79            "estimate-rspd" => \$estRSPD,
  80            "num-rspd-bins=i" => \$B,
  81            "p|num-threads=i" => \$nThreads,
  82            "out-bam" => \$genBamF,
  83            "calc-ci" => \$calcCI,
  84            "ci-memory=i" => \$NMB,
  85            "q|quiet" => \$quiet,
  86            "h|help" => \$help) or pod2usage(-exitval => 2, -verbose => 2);
  87
  88 pod2usage(-verbose => 2) if ($help == 1);
  89
  90
  91 #check parameters and options
  92
  93 if ($is_sam || $is_bam) {
  94     pod2usage(-msg => "Invalid number of arguments!", -exitval => 2, -verbose => 2) if (scalar(@ARGV) != 3);
  95     pod2usage(-msg => "--sam and --bam cannot be active at the same time!", -exitval => 2, -verbose => 2) if ($is_sam == 1&& $is_bam == 1);
  96     pod2usage(-msg => "--bowtie-path, --bowtie-n, --bowtie-e, --bowtie-m, --phred33-quals, --phred64-quals or --solexa-quals cannot be set if input is SAM/BAM format!", -exitval => 2, -verbose => 2) if ($bowtie_path ne "" || $C != 2 || $E != 99999999 || $maxHits != 200 || $phred33 || $phred64 || $solexa);
  97 }
  98 else {
  99     pod2usage(-msg => "Invalid number of arguments!", -exitval => 2, -verbose => 2) if (!$paired_end && scalar(@ARGV) != 3 || $paired_end && scalar(@ARGV) != 4);
 100     pod2usage(-msg => "Only one of --phred33-quals --phred64-quals/--solexa1.3-quals --solexa-suqls can be active!", -exitval => 2, -verbose => 2) if ($phred33 + $phred64 + $solexa > 1);
 101     podwusage(-msg => "--sam , --bam or --sam-header-info cannot be set if use bowtie aligner to produce alignments!", -exitval => 2, -verbose => 2) if ($is_sam || $is_bam || $fn_list ne "");
 102 }
 103
 104 pod2usage(-msg => "Forward probability should be in [0, 1]!", -exitval => 2, -verbose => 2) if ($probF < 0 || $probF > 1);
 105 pod2usage(-msg => "Min fragment length should be at least 1!", -exitval => 2, -verbose => 2) if ($minL < 1);
 106 pod2usage(-msg => "Min fragment length should be smaller or equal to max fragment length!", -exitval => 2, -verbose => 2) if ($minL > $maxL);
 107 pod2usage(-msg => "The memory allocated for calculating credibility intervals should be at least 1 MB!\n", -exitval => 2, -verbose => 2) if ($NMB < 1);
 108 pod2usage(-msg => "Number of threads should be at least 1!\n", -exitval => 2, -verbose => 2) if ($nThreads < 1);
 109
 110 if ($strand_specific) { $probF = 1.0; }
 111
 112 my $mate1_list = "";
 113 my $mate2_list = "";
 114 my $inpF = "";
 115
 116 my ($refName, $taskName, $tmp_dir, $imdName) = ();
 117 my $gap = 32;
 118
 119 if ($paired_end) {
 120     if ($no_qual) { $read_type = 2; }
 121     else { $read_type = 3; }
 122 }
 123 else {
 124     if ($no_qual) { $read_type = 0; }
 125     else { $read_type = 1; }
 126 }
 127
 128 if (scalar(@ARGV) == 3) {
 129     if ($is_sam || $is_bam) { $inpF = $ARGV[0]; }
 130     else {$mate1_list = $ARGV[0]; }
 131     $refName = $ARGV[1];
 132     $taskName = $ARGV[2];
 133 }
 134 else {
 135     $mate1_list = $ARGV[0];
 136     $mate2_list = $ARGV[1];
 137     $refName = $ARGV[2];
 138     $taskName = $ARGV[3];
 139 }
 140
 141 $tmp_dir = $taskName.".temp";
 142 my $pos = rindex($taskName, '/');
 143 if ($pos < 0) {
 144     $imdName = "$tmp_dir/$taskName";
 145 }
 146 else {
 147     $imdName = $tmp_dir."/".substr($taskName, $pos + 1);
 148 }
 149
 150 if (!$is_sam && !$is_bam && $phred33 + $phred64 + $solexa == 0) { $phred33 = 1; }
 151
 152 my ($mate_minL, $mate_maxL) = (1, $maxL);
 153
 154 if (!(-d $tmp_dir) && !mkdir($tmp_dir)) { print "Fail to create the directory.\n"; exit(-1); }
 155
 156 if ($bowtie_path ne "") { $bowtie_path .= "/"; }
 157
 158 my ($fn, $dir, $suf) = fileparse($0);
 159 my $command = "";
 160
 161 if (!$is_sam && !$is_bam) {
 162     $command = $bowtie_path."bowtie";
 163     if ($read_type == 0 || $read_type == 2) { $command .= " -f"; }
 164     else { $command .= " -q"; }
 165
 166     if ($phred33) { $command .= " --phred33-quals"; }
 167     elsif ($phred64) { $command .= " --phred64-quals"; }
 168     elsif ($solexa) { $command .= " --solexa-quals"; }
 169     else { print "Oh, no!!!"; exit(2); }
 170
 171     $command .= " -n $C -e $E -l $L";
 172
 173     if ($read_type == 2 || $read_type == 3) { $command .= " -I $minL -X $maxL"; }
 174
 175     if ($strand_specific || $probF == 1.0) { $command .= " --norc"; }
 176     elsif ($probF = 0.0) { $command .= " --nofw"; }
 177
 178     $command .= " -p $nThreads -a -m $maxHits -S";
 179     if ($quiet) { $command .= " --quiet"; }
 180
 181     $command .= " $refName";
 182     if ($read_type == 0 || $read_type == 1) {
 183         $command .= " $mate1_list";
 184     }
 185     else {
 186         $command .= " -1 $mate1_list -2 $mate2_list";
 187     }
 188
 189     $command .= " | gzip > $imdName.sam.gz";
 190     print "$command\n";
 191     $status = system($command);
 192     if ($status != 0) {
 193         print "bowtie failed! Please check if you provide correct parameters/options for the pipeline!\n";
 194         exit(-1);
 195     }
 196     print "\n";
 197
 198     $inpF = "$imdName.sam.gz";
 199     $is_sam = 1; # output of bowtie is a sam file
 200 }
 201
 202 $command = $dir."rsem-parse-alignments $refName $imdName";
 203
 204 my $samInpType;
 205 if ($is_sam) { $samInpType = "s"; }
 206 elsif ($is_bam) { $samInpType = "b"; }
 207
 208 $command .= " $samInpType $inpF -t $read_type";
 209 if ($fn_list ne "") { $command .= " -l $fn_list"; }
 210 if ($tagName ne "") { $command .= " -tag $tagName"; }
 211 if ($quiet) { $command .= " -q"; }
 212
 213 print "$command\n";
 214 $status = system($command);
 215 if ($status != 0) {
 216     print "rsem-parse-alignments failed! Please check if you provide correct parameters/options for the pipeline!\n";
 217     exit(-1);
 218 }
 219 print "\n";
 220
 221 $command = $dir."rsem-build-read-index $gap";
 222 switch($read_type) {
 223     case 0  { $command .= " 0 $quiet $imdName\_alignable.fa"; }
 224     case 1  { $command .= " 1 $quiet $imdName\_alignable.fq"; }
 225     case 2  { $command .= " 0 $quiet $imdName\_alignable_1.fa $imdName\_alignable_2.fa"; }
 226     case 3  { $command .= " 1 $quiet $imdName\_alignable_1.fq $imdName\_alignable_2.fq"; }
 227 }
 228 print "$command\n";
 229 $status = system($command);
 230 if ($status != 0) {
 231     print "rsem-build-read-index failed! Please check if you provide correct parameters/options for the pipeline!\n";
 232     exit(-1);
 233 }
 234 print "\n";
 235
 236 $status = open(OUTPUT, ">$imdName.mparams");
 237 if ($status == 0) { print "Cannot generate $imdName.mparams!\n"; exit(-1); }
 238 print OUTPUT "$minL $maxL\n";
 239 print OUTPUT "$probF\n";
 240 print OUTPUT "$estRSPD\n";
 241 print OUTPUT "$B\n";
 242 print OUTPUT "$mate_minL $mate_maxL\n";
 243 print OUTPUT "$mean $sd\n";
 244 print OUTPUT "$L\n";
 245 close(OUTPUT);
 246
 247 $command = $dir."rsem-run-em $refName $read_type $imdName $taskName -p $nThreads";
 248 if ($genBamF) {
 249     $command .= " -b $samInpType $inpF";
 250     if ($fn_list ne "") { $command .= " 1 $fn_list"; }
 251     else { $command .= " 0"; }
 252 }
 253 if ($calcCI) { $command .= " --gibbs-out"; }
 254 if ($quiet) { $command .= " -q"; }
 255
 256 print "$command\n";
 257 $status = system($command);
 258 if ($status != 0) {
 259     print "rsem-run-em failed! Please check if you provide correct parameters/options for the pipeline!\n";
 260     exit(-1);
 261 }
 262 print "\n";
 263
 264 if ($genBamF) {
 265     $command = $dir."sam/samtools sort $taskName.bam $taskName.sorted";
 266     print "$command\n";
 267     $status = system($command);
 268     if ($status != 0) {
 269         print "sam/samtools sort failed! Please check if you provide correct parameters/options for the pipeline!\n";
 270         exit(-1);
 271     }
 272     print "\n";
 273     $command = $dir."sam/samtools index $taskName.sorted.bam";
 274     print "$command\n";
 275     $status = system($command);
 276     if ($status != 0) {
 277         print "sam/samtools index failed! Please check if you provide correct parameters/options for the pipeline!\n";
 278         exit(-1);
 279     }
 280     print "\n";
 281 }
 282
 283 &collectResults("$imdName.iso_res", "$taskName.isoforms.results"); # isoform level
 284 &collectResults("$imdName.gene_res", "$taskName.genes.results"); # gene level
 285
 286 if ($calcCI) {
 287     $command = $dir."rsem-run-gibbs $refName $taskName $imdName $BURNIN $CHAINLEN $SAMPLEGAP";
 288     if ($quiet) { $command .= " -q"; }
 289     print "$command\n";
 290     $status = system($command);
 291     if ($status != 0) {
 292         print "rsem-run-gibbs failed! Please check if you provide correct parameters/options for the pipeline!\n";
 293         exit(-1);
 294     }
 295     print "\n";
 296
 297     system("mv $taskName.isoforms.results $imdName.isoforms.results.bak1");
 298     system("mv $taskName.genes.results $imdName.genes.results.bak1");
 299     &collectResults("$imdName.iso_res", "$taskName.isoforms.results"); # isoform level
 300     &collectResults("$imdName.gene_res", "$taskName.genes.results"); # gene level
 301
 302     $command = $dir."rsem-calculate-credibility-intervals $refName $taskName $imdName $CONFIDENCE $NSPC $NMB";
 303     if ($quiet) { $command .= " -q"; }
 304     print "$command\n";
 305     $status = system($command);
 306     if ($status != 0) {
 307         print "rsem-calculate-credibility-intervals failed! Please check if you provide correct parameters/options for the pipeline!\n";
 308         exit(-1);
 309     }
 310     print "\n";
 311
 312     system("mv $taskName.isoforms.results $imdName.isoforms.results.bak2");
 313     system("mv $taskName.genes.results $imdName.genes.results.bak2");
 314     &collectResults("$imdName.iso_res", "$taskName.isoforms.results"); # isoform level
 315     &collectResults("$imdName.gene_res", "$taskName.genes.results"); # gene level
 316 }
 317
 318 if (!$keep_intermediate_files) {
 319     $status = system ("rm -rf $tmp_dir");
 320     if ($status != 0) {
 321         print "Fail to delete the temporary folder!\n";
 322         exit(-1);
 323     }
 324 }
 325
 326 # inpF, outF
 327 sub collectResults {
 328     my $local_status;
 329     my ($inpF, $outF);
 330     my (@results, @comment) = ();
 331     my $line;
 332     my $cnt;
 333
 334     $inpF = $_[0];
 335     $outF = $_[1];
 336
 337     $local_status = open(INPUT, $inpF);
 338     if ($local_status == 0) { print "Fail to open file $inpF!\n"; exit(-1); }
 339
 340     $cnt = 0;
 341     @results = ();
 342
 343     while ($line = <INPUT>) {
 344         ++$cnt;
 345         chomp($line);
 346         my @local_arr = split(/\t/, $line);
 347         if ($cnt == 3) { @comment = @local_arr; }
 348         else { push(@results, \@local_arr); }
 349     }
 350
 351     push(@results, \@comment);
 352     close(INPUT);
 353
 354     $local_status = open(OUTPUT, ">$outF");
 355     if ($local_status == 0) { print "Fail to create file $outF!\n"; exit(-1); }
 356
 357     my $n = scalar(@results);
 358     my $m = scalar(@{$results[0]});
 359     for (my $i = 0; $i < $m; $i++) {
 360         my @out_arr = ();
 361         for (my $j = 0; $j < $n; $j++) { push(@out_arr, $results[$j][$i]); }
 362         $" = "\t";
 363         print OUTPUT "@out_arr\n";
 364     }
 365     close(OUTPUT);
 366 }
 367
 368
 369 __END__
 370
 371 =head1 NAME
 372
 373 rsem-calculate-expression
 374
 375 =head1 SYNOPSIS
 376
 377 =over
 378
 379  rsem-calculate-expression [options] upstream_read_file(s) reference_name sample_name
 380  rsem-calculate-expression [options] --paired-end upstream_read_file(s) downstream_read_file(s) reference_name sample_name
 381  rsem-calculate-expression [options] --sam/--bam [--paired-end] input reference_name sample_name
 382
 383 =back
 384
 385 =head1 ARGUMENTS
 386
 387 =over
 388
 389 =item B<upstream_read_files(s)>
 390
 391 Comma-separated list of files containing single-end reads or upstream reads for paired-end data.  By default, these files are assumed to be in FASTQ format.  If the --no-qualities option is specified, then FASTA format is expected.
 392
 393 =item B<downstream_read_file(s)>
 394
 395 Comma-separated list of files containing downstream reads which are paired with the upstream reads.  By default, these files are assumed to be in FASTQ format.  If the --no-qualities option is specified, then FASTA format is expected.
 396
 397 =item B<input>
 398
 399 SAM/BAM formatted input file.  If "-" is specified for the filename, SAM/BAM input is instead assumed to come from standard input. SAM/BAM format used is SAM Spec v1.2. RSEM requires all alignments of the same read group together. For paired-end reads, RSEM also requires the two mates of any alignment be adjacent. See Description section for how to make input file obey RSEM's requirements.
 400
 401 =item B<reference_name>
 402
 403 The name of the reference used.  The user must have run 'rsem-prepare-reference' with this reference_name before running this program.
 404
 405 =item B<sample_name>
 406
 407 The name of the sample analyzed. All output files are prefixed by this name (e.g., sample_name.genes.results)
 408
 409 =back
 410
 411 =head1 OPTIONS
 412
 413 =over
 414
 415 =item B<--paired-end>
 416
 417 Input reads are paired-end reads. (Default: off)
 418
 419 =item B<--no-qualities>
 420
 421 Input reads do not contain quality scores. (Default: off)
 422
 423 =item B<--strand-specific>
 424
 425 The RNA-Seq protocol used to generate the reads is strand specific, i.e., all (upstream) reads are derived from the forward strand.  This option is equivalent to --forward-prob=1.0.  With this option set, if RSEM runs the Bowtie aligner, the '--norc' Bowtie option will be used, which disables alignment to the reverse strand of transcripts.  (Default: off)
 426
 427 =item B<--sam>
 428
 429 Input file is in SAM format. (Default: off)
 430
 431 =item B<--bam>
 432
 433 Input file is in BAM format. (Default: off)
 434
 435 =item B<--sam-header-info> <file>
 436
 437 RSEM reads header information from input by default. If this option is on, header information is read from the specified file. For the format of the file, please see SAM official website. (Default: "")
 438
 439 =item B<-p/--num-threads> <int>
 440
 441 Number of threads to use. Both Bowtie and expression estimation will use this many threads. (Default: 1)
 442
 443 =item B<--out-bam>
 444
 445 Generate a BAM file, 'sample_name.bam', with alignments mapped to genomic coordinates and annotated with their posterior probabilities. In addition, RSEM will call samtools (included in RSEM package) to sort and index the bam file. 'sample_name.sorted.bam' and 'sample_name.sorted.bam.bai' will be generated. (Default: off)
 446
 447 =item B<--calc-ci>
 448
 449 Calculate 95% credibility intervals and posterior mean estimates.  (Default: off)
 450
 451 =item B<--seed-length> <int>
 452
 453 Seed length used by the read aligner.  Providing the correct value for this parameter is important for RSEM's accuracy if the data are single-end reads.  If RSEM runs Bowtie, it uses this value for Bowtie's seed length parameter.  (Default: 25)
 454
 455 =item B<--tag> <string>
 456
 457 The name of the optional field used in the SAM input for identifying a read with too many valid alignments. The field should have the format <tagName>:i:<value>, where a <value> bigger than 0 indicates a read with too many alignments. (Default: "")
 458
 459 =item B<--bowtie-path> <path>
 460
 461 The path to the bowtie executables. (Default: the path to the bowtie executables is assumed to be in the user's PATH environment variable)
 462
 463 =item B<--bowtie-n> <int>
 464
 465 (Bowtie parameter) max # of mismatches in the seed. (Range: 0-3, Default: 2)
 466
 467 =item B<--bowtie-e> <int>
 468
 469 (Bowtie parameter) max sum of mismatch quality scores across the alignment. (Default: 99999999)
 470
 471 =item B<--bowtie-m> <int>
 472
 473 (Bowtie parameter) suppress all alignments for a read if > <int> valid alignments exist. (Default: 200)
 474
 475 =item B<--phred33-quals>
 476
 477 Input quality scores are encoded as Phred+33. (Default: on)
 478
 479 =item B<--phred64-quals>
 480
 481 Input quality scores are encoded as Phred+64 (default for GA Pipeline ver. >= 1.3). (Default: off)
 482
 483 =item B<--solexa-quals>
 484
 485 Input quality scores are solexa encoded (from GA Pipeline ver. < 1.3). (Default: off)
 486
 487 =item B<--forward-prob> <double>
 488
 489 Probability of generating a read from the forward strand of a transcript. Set to 1 for a strand-specific protocol where all (upstream) reads are derived from the forward strand, 0 for a strand-specific protocol where all (upstream) read are derived from the reverse strand, or 0.5 for a non-strand-specific protocol. (Default: 0.5)
 490
 491 =item B<--fragment-length-min> <int>
 492
 493 Minimum read/insert length allowed. This is also the value for the bowtie -I option. (Default: 1)
 494
 495 =item B<--fragment-length-max> <int>
 496
 497 Maximum read/insert length allowed. This is also the value for the bowtie -X option. (Default: 1000)
 498
 499 =item B<--fragment-length-mean> <double>
 500
 501 (single-end data only) The mean of the fragment length distribution, which is assumed to be a Gaussian. (Default: -1, which disables use of the fragment length distribution)
 502
 503 =item B<--fragment-length-sd> <double>
 504
 505 (single-end data only) The standard deviation of the fragment length distribution, which is assumed to be a Gaussian.  (Default: 0, which assumes that all fragments are of the same length, given by the rounded value of B<--fragment-length-mean>)
 506
 507 =item B<--estimate-rspd>
 508
 509 Set this option if you want to estimate the read start position distribution (RSPD) from data. Otherwise, RSEM will use a uniform RSPD. (Default: off)
 510
 511 =item B<--num-rspd-bins> <int>
 512
 513 Number of bins in the RSPD. Only relevant when '--estimate-rspd' is specified.  Use of the default setting is recommended. (Default: 20)
 514
 515 =item B<--ci-memory> <int>
 516
 517 Amount of memory (in MB) RSEM is allowed to use for computing credibility intervals. (Default: 1024)
 518
 519 =item B<--keep-intermediate-files>
 520
 521 Keep temporary files generated by RSEM.  RSEM creates a temporary directory, 'sample_name.temp', into which it puts all intermediate output files. If this directory already exists, RSEM overwrites all files generated by previous RSEM runs inside of it. By default, after RSEM finishes, the temporary directory is deleted.  Set this option to prevent the deletion of this directory and the intermediate files inside of it. (Default: off)
 522
 523 =item B<-q/--quiet>
 524
 525 Suppress the output of logging information. (Default: off)
 526
 527 =item B<-h/--help>
 528
 529 Show help information.
 530
 531 =back
 532
 533 =head1 DESCRIPTION
 534
 535 In its default mode, this program aligns input reads against a reference transcriptome with Bowtie and calculates expression values using the alignments.  RSEM assumes the data are single-end reads with quality scores, unless the '--paired-end' or '--no-qualities' options are specified.  Users may use an alternative aligner by specifying one of the --sam and --bam options, and providing an alignment file in the specified format. However, users should make sure the alignment file satisfies the requirements mentioned in ARGUMENTS section.
 536
 537 One simple way to make the alignment file (e.g. input.sam) satisfying RSEM's requirements (assuming the aligner used put mates in a paired-end read adjacent) is to use the following command:
 538
 539   sort -k 1,1 -s input.sam > input.sorted.sam
 540
 541 The SAM/BAM format RSEM uses is v1.2.
 542
 543 The user must run 'rsem-prepare-reference' with the appropriate reference before using this program.
 544
 545 For single-end data, it is strongly recommended that the user provide the fragment length distribution parameters (--fragment-length-mean and --fragment-length-sd).  For paired-end data, RSEM will automatically learn a fragment length distribution from the data.
 546
 547 Please note that some of the default values for the Bowtie parameters are not the same as those defined for Bowtie itself.
 548
 549 The temporary directory and all intermediate files will be removed when RSEM finishes unless '--keep-intermediate-files' is specified.
 550
 551 With the "--calc-ci" option, 95% credibility intervals and posterior mean estimates will be calculated in addition to maximum likelihood estimates.
 552
 553 =head1 OUTPUT
 554
 555 =over
 556
 557 =item B<sample_name.genes.results>
 558
 559 File containing gene level expression estimates. The format of each
 560 line in this file is:
 561
 562 gene_id expected_counts tau_value [pmc_value tau_pme_value tau_ci_lower_bound tau_ci_upper_bound] transcript_id_list
 563
 564 Fields are separated by the tab character. Fields within "[]" are only
 565 presented if '--calc-ci' is set. pme stands for posterior mean
 566 estimation. pmc stands for posterior mean counts. ci_lower_bound(l)
 567 means the lower bound of the credibility intervals, ci_upper_bound(u)
 568 means the upper bound of the credibility intervals. So the credibility
 569 interval is [l, u]. 'transcript_id_list' is a space-separated list of
 570 transcript_ids belonging to the gene.
 571
 572 =item B<sample_name.isoforms.results>
 573
 574 File containing isoform level expression values. The format of each
 575 line in this file is:
 576
 577 transcript_id expected_counts tau_value [pmc_value tau_pme_value tau_ci_lower_bound tau_ci_upper_bound] other_attributes
 578
 579 Fields are separated by the tab character. 'other_attributes' are all
 580 other attributes after attribute 'transcript_id' field in the GTF
 581 file. If no other attributes are given or no GTF file is provided in
 582 'rsem-prepare-reference', there will be no tab after the
 583 tau_value field.
 584
 585 =item B<sample_name.model> and B<sample_name.theta>
 586
 587 Output files used by RSEM internally for tasks like simulation,
 588 compute credibility intervals etc.
 589
 590 =item B<sample_name.bam, sample_name.sorted.bam and sample_name.sorted.bam.bai>
 591
 592 Only generated when --out-bam is specified.
 593
 594 'sample_name.bam' is a BAM-formatted file of read alignments in
 595 genomic coordinates. Alignments of reads that have identical genomic
 596 coordinates (i.e., alignments to different isoforms that share the
 597 same genomic region) are collapsed into one alignment.  The MAPQ field
 598 of each alignment is set to max(100, floor(-10 * log10(1.0 - w) +
 599 0.5)), where w is the posterior probability of that alignment being
 600 the true mapping of a read.  In addition, RSEM pads a new tag
 601 ZW:f:value, where value is a single precision floating number
 602 representing the posterior probability.
 603
 604 'sample_name.sorted.bam' and 'sample_name.sorted.bam.bai' are the
 605 sorted BAM file and indices generated by samtools (included in RSEM package).
 606
 607
 608 =back
 609
 610 =head1 EXAMPLES
 611
 612 Assume the path to the bowtie executables is in the user's PATH environment variable. Reference files are under '/ref' with name 'mm9'.
 613
 614 1) '/data/mmliver.fq', single-end reads with quality scores. Quality scores are encoded as for 'GA pipeline version >= 1.3'. We want to use 8 threads and generate a BAM file:
 615
 616  rsem-calculate-expression --phred64-quals \
 617                            -p 8 \
 618                            --out-bam \
 619                            /data/mmliver.fq \
 620                            /ref/mm9 \
 621                            mmliver_single_quals
 622
 623 2) '/data/mmliver_1.fq' and '/data/mmliver_2.fq', paired-end reads with quality scores. Quality scores are in SANGER format. We want to use 8 threads and do not generate a BAM file:
 624
 625  rsem-calculate-expression -p 8 \
 626                            --paired-end \
 627                            /data/mmliver_1.fq \
 628                            /data/mmliver_2.fq \
 629                            /ref/mm9 \
 630                            mmliver_paired_end_quals
 631
 632 3) '/data/mmliver.fa', single-end reads without quality scores. We want to use 8 threads and generate a BAM file:
 633
 634  rsem-calculate-expression -p 8 \
 635                            --no-qualities \
 636                            /data/mmliver.fa \
 637                            /ref/mm9 \
 638                            mmliver_single_without_quals
 639
 640 4) Data are the same as 1). We want to take a fragment length distribution into consideration. We set the fragment length mean to 150 and the standard deviation to 35. In addition to a BAM file, we also want to generate credibility intervals.  We allow RSEM to use 1GB of memory for CI calculation.
 641
 642  rsem-calculate-expression --bowtie-path /sw/bowtie \
 643                            --phred64-quals \
 644                            --fragment-length-mean 150.0 \
 645                            --fragment-length-sd 35.0 \
 646                            -p 8 \
 647                            --out-bam \
 648                            --calc-ci \
 649                            --ci-memory 1024 \
 650                            /data/mmliver.fq \
 651                            /ref/mm9 \
 652                            mmliver_single_quals
 653
 654 5) '/data/mmliver_paired_end_quals.bam', paired-end reads with quality scores.  We want to use 8 threads and do not generate a BAM file:
 655
 656  rsem-calculate-expression --paired-end \
 657                            --bam \
 658                            -p 8 \
 659                            /data/mmliver_paired_end_quals.bam \
 660                            /ref/mm9 \
 661                            mmliver_paired_end_quals
 662
 663 =cut