X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=rsem-calculate-expression;h=3f2817d083fac4605cbcb0ded7baaa6e6900db99;hb=58d504aaf36ae486b1dba6d03e0e9f1c25855037;hp=f16975545d37dd64e589197b5a2be70d39b596dc;hpb=102cd7714dbd1c1c13d0f4736ec1b1523b359978;p=rsem.git
diff --git a/rsem-calculate-expression b/rsem-calculate-expression
index f169755..3f2817d 100755
--- a/rsem-calculate-expression
+++ b/rsem-calculate-expression
@@ -2,9 +2,12 @@
use Getopt::Long;
use Pod::Usage;
-use File::Basename;
+use FindBin;
+use lib $FindBin::Bin;
use strict;
+use rsem_perl_utils qw(runCommand collectResults showVersionInfo);
+
#const
my $BURNIN = 200;
my $NCV = 1000;
@@ -18,10 +21,6 @@ my $status = 0;
my $read_type = 1; # default, single end with qual
-my @transcript_title = ("transcript_id", "gene_id", "length", "effective_length", "expected_count", "TPM", "FPKM", "IsoPct", "pme_expected_count", "pme_TPM", "pme_FPKM", "IsoPct_from_pme_TPM", "TPM_ci_lower_bound", "TPM_ci_upper_bound", "FPKM_ci_lower_bound", "FPKM_ci_upper_bound");
-
-my @gene_title = ("gene_id", "transcript_id(s)", "length", "effective_length", "expected_count", "TPM", "FPKM", "pme_expected_count", "pme_TPM", "pme_FPKM", "TPM_ci_lower_bound", "TPM_ci_upper_bound", "FPKM_ci_lower_bound", "FPKM_ci_upper_bound");
-
my $bowtie_path = "";
my $C = 2;
my $E = 99999999;
@@ -62,6 +61,8 @@ my $keep_intermediate_files = 0;
my $strand_specific = 0;
+my $version = 0;
+
my $mTime = 0;
my ($time_start, $time_end, $time_alignment, $time_rsem, $time_ci) = (0, 0, 0, 0, 0);
@@ -87,9 +88,9 @@ GetOptions("keep-intermediate-files" => \$keep_intermediate_files,
"bowtie-e=i" => \$E,
"bowtie-m=i" => \$maxHits,
"bowtie-chunkmbs=i" => \$chunkMbs,
- "phred33-quals" => \$phred33,
- "phred64-quals" => \$phred64, #solexa1.3-quals" => \$phred64,
- "solexa-quals" => \$solexa,
+ "bowtie-phred33-quals" => \$phred33,
+ "bowtie-phred64-quals" => \$phred64, #solexa1.3-quals" => \$phred64,
+ "bowtie-solexa-quals" => \$solexa,
"forward-prob=f" => \$probF,
"fragment-length-min=i" => \$minL,
"fragment-length-max=i" => \$maxL,
@@ -105,11 +106,14 @@ GetOptions("keep-intermediate-files" => \$keep_intermediate_files,
"calc-ci" => \$calcCI,
"ci-memory=i" => \$NMB,
"time" => \$mTime,
+ "version" => \$version,
"q|quiet" => \$quiet,
"h|help" => \$help) or pod2usage(-exitval => 2, -verbose => 2);
-pod2usage(-verbose => 2) if ($help == 1);
+my $dir = "$FindBin::Bin/";
+pod2usage(-verbose => 2) if ($help == 1);
+&showVersionInfo($dir) if ($version == 1);
#check parameters and options
@@ -186,7 +190,6 @@ my ($mate_minL, $mate_maxL) = (1, $maxL);
if ($bowtie_path ne "") { $bowtie_path .= "/"; }
-my ($fn, $dir, $suf) = fileparse($0);
my $command = "";
if (!$is_sam && !$is_bam) {
@@ -343,68 +346,6 @@ if ($mTime) {
close(OUTPUT);
}
-# command, {err_msg}
-sub runCommand {
- print $_[0]."\n";
- my $status = system($_[0]);
- if ($status != 0) {
- my $errmsg = "";
- if (scalar(@_) > 1) { $errmsg .= $_[1]."\n"; }
- $errmsg .= "\"$_[0]\" failed! Plase check if you provide correct parameters/options for the pipeline!\n";
- print $errmsg;
- exit(-1);
- }
- print "\n";
-}
-
-# inpF, outF
-sub collectResults {
- my $local_status;
- my ($inpF, $outF);
- my @results = ();
- my $line;
-
- $inpF = $_[1];
- $outF = $_[2];
-
- $local_status = open(INPUT, $inpF);
- if ($local_status == 0) { print "Fail to open file $inpF!\n"; exit(-1); }
-
- @results = ();
-
- while ($line = ) {
- chomp($line);
- my @local_arr = split(/\t/, $line);
- push(@results, \@local_arr);
- }
-
- close(INPUT);
-
- $local_status = open(OUTPUT, ">$outF");
- if ($local_status == 0) { print "Fail to create file $outF!\n"; exit(-1); }
-
- my $n = scalar(@results);
- my $m = scalar(@{$results[0]});
-
- $" = "\t";
-
- my @out_arr = ();
- for (my $i = 0; $i < $n; $i++) {
- if ($_[0] eq "isoform") { push(@out_arr, $transcript_title[$i]); }
- elsif ($_[0] eq "gene") { push(@out_arr, $gene_title[$i]); }
- else { print "A bug on 'collectResults' is detected!\n"; exit(-1); }
- }
- print OUTPUT "@out_arr\n";
-
- for (my $i = 0; $i < $m; $i++) {
- @out_arr = ();
- for (my $j = 0; $j < $n; $j++) { push(@out_arr, $results[$j][$i]); }
- print OUTPUT "@out_arr\n";
- }
-
- close(OUTPUT);
-}
-
__END__
=head1 NAME
@@ -413,14 +354,10 @@ rsem-calculate-expression
=head1 SYNOPSIS
-=over
-
- rsem-calculate-expression [options] upstream_read_file(s) reference_name sample_name
- rsem-calculate-expression [options] --paired-end upstream_read_file(s) downstream_read_file(s) reference_name sample_name
+ rsem-calculate-expression [options] upstream_read_file(s) reference_name sample_name
+ rsem-calculate-expression [options] --paired-end upstream_read_file(s) downstream_read_file(s) reference_name sample_name
rsem-calculate-expression [options] --sam/--bam [--paired-end] input reference_name sample_name
-=back
-
=head1 ARGUMENTS
=over
@@ -523,17 +460,17 @@ The path to the bowtie executables. (Default: the path to the bowtie executables
(Bowtie parameter) memory allocated for best first alignment calculation (Default: 0 - use bowtie's default)
-=item B<--phred33-quals>
+=item B<--bowtie-phred33-quals>
-Input quality scores are encoded as Phred+33. (Default: on)
+(Bowtie parameter) Input quality scores are encoded as Phred+33. (Default: on)
-=item B<--phred64-quals>
+=item B<--bowtie-phred64-quals>
-Input quality scores are encoded as Phred+64 (default for GA Pipeline ver. >= 1.3). (Default: off)
+(Bowtie parameter) Input quality scores are encoded as Phred+64 (default for GA Pipeline ver. >= 1.3). (Default: off)
-=item B<--solexa-quals>
+=item B<--bowtie-solexa-quals>
-Input quality scores are solexa encoded (from GA Pipeline ver. < 1.3). (Default: off)
+(Bowtie parameter) Input quality scores are solexa encoded (from GA Pipeline ver. < 1.3). (Default: off)
=item B<--forward-prob>
@@ -587,6 +524,10 @@ Suppress the output of logging information. (Default: off)
Show help information.
+=item B<--version>
+
+Show version information.
+
=back
=head1 DESCRIPTION
@@ -777,7 +718,7 @@ Assume the path to the bowtie executables is in the user's PATH environment vari
/ref/mouse_125 \
mmliver_single_without_quals
-4) Data are the same as 1). We want to take a fragment length distribution into consideration. We set the fragment length mean to 150 and the standard deviation to 35. In addition to a BAM file, we also want to generate credibility intervals. We allow RSEM to use 1GB of memory for CI calculation:
+4) Data are the same as 1). This time we assume the bowtie executables are under '/sw/bowtie'. We want to take a fragment length distribution into consideration. We set the fragment length mean to 150 and the standard deviation to 35. In addition to a BAM file, we also want to generate credibility intervals. We allow RSEM to use 1GB of memory for CI calculation:
rsem-calculate-expression --bowtie-path /sw/bowtie \
--phred64-quals \