Updated samtools to 0.1.19

[rsem.git] / rsem-calculate-expression
diff --git a/rsem-calculate-expression b/rsem-calculate-expression

index 4d1b70a8a4dca169ccb0702caf32d2bb30caaa2f..3f2817d083fac4605cbcb0ded7baaa6e6900db99 100755 (executable)
--- a/rsem-calculate-expression
+++ b/rsem-calculate-expression
@@ -2,9 +2,12 @@
  
  use Getopt::Long;
  use Pod::Usage;
-use File::Basename;
+use FindBin;
+use lib $FindBin::Bin;
  use strict;
  
+use rsem_perl_utils qw(runCommand collectResults showVersionInfo);
+
  #const
  my $BURNIN = 200;
  my $NCV = 1000;
@@ -18,10 +21,6 @@ my $status = 0;
  
  my $read_type = 1; # default, single end with qual
  
-my @transcript_title = ("transcript_id", "gene_id", "length", "effective_length", "expected_count", "TPM", "FPKM", "IsoPct", "pme_expected_count", "pme_TPM", "pme_FPKM", "IsoPct_from_pme_TPM", "TPM_ci_lower_bound", "TPM_ci_upper_bound", "FPKM_ci_lower_bound", "FPKM_ci_upper_bound");
-
-my @gene_title = ("gene_id", "transcript_id(s)", "length", "effective_length", "expected_count", "TPM", "FPKM", "pme_expected_count", "pme_TPM", "pme_FPKM", "TPM_ci_lower_bound", "TPM_ci_upper_bound", "FPKM_ci_lower_bound", "FPKM_ci_upper_bound");
-
  my $bowtie_path = "";
  my $C = 2;
  my $E = 99999999;
@@ -89,9 +88,9 @@ GetOptions("keep-intermediate-files" => \$keep_intermediate_files,
            "bowtie-e=i" => \$E,
            "bowtie-m=i" => \$maxHits,
            "bowtie-chunkmbs=i" => \$chunkMbs,
-          "phred33-quals" => \$phred33,
-          "phred64-quals" => \$phred64, #solexa1.3-quals" => \$phred64,
-          "solexa-quals" => \$solexa,
+          "bowtie-phred33-quals" => \$phred33,
+          "bowtie-phred64-quals" => \$phred64, #solexa1.3-quals" => \$phred64,
+          "bowtie-solexa-quals" => \$solexa,
            "forward-prob=f" => \$probF,
            "fragment-length-min=i" => \$minL,
            "fragment-length-max=i" => \$maxL,
@@ -111,7 +110,7 @@ GetOptions("keep-intermediate-files" => \$keep_intermediate_files,
            "q|quiet" => \$quiet,
            "h|help" => \$help) or pod2usage(-exitval => 2, -verbose => 2);
  
-my ($fn, $dir, $suf) = fileparse($0);
+my $dir = "$FindBin::Bin/";
  
  pod2usage(-verbose => 2) if ($help == 1);
  &showVersionInfo($dir) if ($version == 1);
@@ -347,78 +346,6 @@ if ($mTime) {
      close(OUTPUT);
  }
  
-# command, {err_msg}
-sub runCommand {
-    print $_[0]."\n";
-    my $status = system($_[0]);
-    if ($status != 0) {
-        my $errmsg = "";
-        if (scalar(@_) > 1) { $errmsg .= $_[1]."\n"; }
-        $errmsg .= "\"$_[0]\" failed! Please check if you provide correct parameters/options for the pipeline!\n";
-        print $errmsg;
-        exit(-1);
-    }
-    print "\n";
-}
- 
-# inpF, outF
-sub collectResults {
-    my $local_status;
-    my ($inpF, $outF);
-    my @results = ();
-    my $line;
-
-    $inpF = $_[1];
-    $outF = $_[2];
-
-    $local_status = open(INPUT, $inpF);
-    if ($local_status == 0) { print "Fail to open file $inpF!\n"; exit(-1); }
-    
-    @results = ();
-    
-    while ($line = <INPUT>) {
-       chomp($line);
-       my @local_arr = split(/\t/, $line);
-       push(@results, \@local_arr); 
-    }
-
-    close(INPUT);
-
-    $local_status = open(OUTPUT, ">$outF");
-    if ($local_status == 0) { print "Fail to create file $outF!\n"; exit(-1); }
-
-    my $n = scalar(@results);
-    my $m = scalar(@{$results[0]});
-
-    $" = "\t";
-
-    my @out_arr = ();
-    for (my $i = 0; $i < $n; $i++) {
-       if ($_[0] eq "isoform") { push(@out_arr, $transcript_title[$i]); }
-       elsif ($_[0] eq "gene") { push(@out_arr, $gene_title[$i]); }
-       else { print "A bug on 'collectResults' is detected!\n"; exit(-1); }
-    }
-    print OUTPUT "@out_arr\n";
-
-    for (my $i = 0; $i < $m; $i++) {
-       @out_arr = ();
-       for (my $j = 0; $j < $n; $j++) { push(@out_arr, $results[$j][$i]); }
-       print OUTPUT "@out_arr\n"; 
-    }
-
-    close(OUTPUT);
-}
-
-# dir
-sub showVersionInfo {
-    open(INPUT, "$_[0]\WHAT_IS_NEW");
-    my $line = <INPUT>;
-    chomp($line);
-    close(INPUT);
-    print "$line\n";
-    exit(0);
-}
-
  __END__
  
  =head1 NAME
@@ -427,14 +354,10 @@ rsem-calculate-expression
  
  =head1 SYNOPSIS
  
-=over
-
- rsem-calculate-expression [options] upstream_read_file(s) reference_name sample_name
- rsem-calculate-expression [options] --paired-end upstream_read_file(s) downstream_read_file(s) reference_name sample_name
+ rsem-calculate-expression [options] upstream_read_file(s) reference_name sample_name 
+ rsem-calculate-expression [options] --paired-end upstream_read_file(s) downstream_read_file(s) reference_name sample_name 
   rsem-calculate-expression [options] --sam/--bam [--paired-end] input reference_name sample_name
  
-=back
-
  =head1 ARGUMENTS
  
  =over
@@ -537,17 +460,17 @@ The path to the bowtie executables. (Default: the path to the bowtie executables
  
  (Bowtie parameter) memory allocated for best first alignment calculation (Default: 0 - use bowtie's default)
  
-=item B<--phred33-quals>
+=item B<--bowtie-phred33-quals>
  
-Input quality scores are encoded as Phred+33. (Default: on)
+(Bowtie parameter) Input quality scores are encoded as Phred+33. (Default: on)
  
-=item B<--phred64-quals>
+=item B<--bowtie-phred64-quals>
  
-Input quality scores are encoded as Phred+64 (default for GA Pipeline ver. >= 1.3). (Default: off)
+(Bowtie parameter) Input quality scores are encoded as Phred+64 (default for GA Pipeline ver. >= 1.3). (Default: off)
  
-=item B<--solexa-quals>
+=item B<--bowtie-solexa-quals>
  
-Input quality scores are solexa encoded (from GA Pipeline ver. < 1.3). (Default: off)
+(Bowtie parameter) Input quality scores are solexa encoded (from GA Pipeline ver. < 1.3). (Default: off)
  
  =item B<--forward-prob> <double>
  
@@ -601,6 +524,10 @@ Suppress the output of logging information. (Default: off)
  
  Show help information.
  
+=item B<--version>
+
+Show version information.
+
  =back
  
  =head1 DESCRIPTION
@@ -791,7 +718,7 @@ Assume the path to the bowtie executables is in the user's PATH environment vari
                             /ref/mouse_125 \
                             mmliver_single_without_quals
  
-4) Data are the same as 1). We want to take a fragment length distribution into consideration. We set the fragment length mean to 150 and the standard deviation to 35. In addition to a BAM file, we also want to generate credibility intervals.  We allow RSEM to use 1GB of memory for CI calculation:
+4) Data are the same as 1). This time we assume the bowtie executables are under '/sw/bowtie'. We want to take a fragment length distribution into consideration. We set the fragment length mean to 150 and the standard deviation to 35. In addition to a BAM file, we also want to generate credibility intervals.  We allow RSEM to use 1GB of memory for CI calculation:
  
   rsem-calculate-expression --bowtie-path /sw/bowtie \
                             --phred64-quals \