X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=rsem-calculate-expression;h=3f2817d083fac4605cbcb0ded7baaa6e6900db99;hb=58d504aaf36ae486b1dba6d03e0e9f1c25855037;hp=f16975545d37dd64e589197b5a2be70d39b596dc;hpb=102cd7714dbd1c1c13d0f4736ec1b1523b359978;p=rsem.git diff --git a/rsem-calculate-expression b/rsem-calculate-expression index f169755..3f2817d 100755 --- a/rsem-calculate-expression +++ b/rsem-calculate-expression @@ -2,9 +2,12 @@ use Getopt::Long; use Pod::Usage; -use File::Basename; +use FindBin; +use lib $FindBin::Bin; use strict; +use rsem_perl_utils qw(runCommand collectResults showVersionInfo); + #const my $BURNIN = 200; my $NCV = 1000; @@ -18,10 +21,6 @@ my $status = 0; my $read_type = 1; # default, single end with qual -my @transcript_title = ("transcript_id", "gene_id", "length", "effective_length", "expected_count", "TPM", "FPKM", "IsoPct", "pme_expected_count", "pme_TPM", "pme_FPKM", "IsoPct_from_pme_TPM", "TPM_ci_lower_bound", "TPM_ci_upper_bound", "FPKM_ci_lower_bound", "FPKM_ci_upper_bound"); - -my @gene_title = ("gene_id", "transcript_id(s)", "length", "effective_length", "expected_count", "TPM", "FPKM", "pme_expected_count", "pme_TPM", "pme_FPKM", "TPM_ci_lower_bound", "TPM_ci_upper_bound", "FPKM_ci_lower_bound", "FPKM_ci_upper_bound"); - my $bowtie_path = ""; my $C = 2; my $E = 99999999; @@ -62,6 +61,8 @@ my $keep_intermediate_files = 0; my $strand_specific = 0; +my $version = 0; + my $mTime = 0; my ($time_start, $time_end, $time_alignment, $time_rsem, $time_ci) = (0, 0, 0, 0, 0); @@ -87,9 +88,9 @@ GetOptions("keep-intermediate-files" => \$keep_intermediate_files, "bowtie-e=i" => \$E, "bowtie-m=i" => \$maxHits, "bowtie-chunkmbs=i" => \$chunkMbs, - "phred33-quals" => \$phred33, - "phred64-quals" => \$phred64, #solexa1.3-quals" => \$phred64, - "solexa-quals" => \$solexa, + "bowtie-phred33-quals" => \$phred33, + "bowtie-phred64-quals" => \$phred64, #solexa1.3-quals" => \$phred64, + "bowtie-solexa-quals" => \$solexa, "forward-prob=f" => \$probF, "fragment-length-min=i" => \$minL, "fragment-length-max=i" => \$maxL, @@ -105,11 +106,14 @@ GetOptions("keep-intermediate-files" => \$keep_intermediate_files, "calc-ci" => \$calcCI, "ci-memory=i" => \$NMB, "time" => \$mTime, + "version" => \$version, "q|quiet" => \$quiet, "h|help" => \$help) or pod2usage(-exitval => 2, -verbose => 2); -pod2usage(-verbose => 2) if ($help == 1); +my $dir = "$FindBin::Bin/"; +pod2usage(-verbose => 2) if ($help == 1); +&showVersionInfo($dir) if ($version == 1); #check parameters and options @@ -186,7 +190,6 @@ my ($mate_minL, $mate_maxL) = (1, $maxL); if ($bowtie_path ne "") { $bowtie_path .= "/"; } -my ($fn, $dir, $suf) = fileparse($0); my $command = ""; if (!$is_sam && !$is_bam) { @@ -343,68 +346,6 @@ if ($mTime) { close(OUTPUT); } -# command, {err_msg} -sub runCommand { - print $_[0]."\n"; - my $status = system($_[0]); - if ($status != 0) { - my $errmsg = ""; - if (scalar(@_) > 1) { $errmsg .= $_[1]."\n"; } - $errmsg .= "\"$_[0]\" failed! Plase check if you provide correct parameters/options for the pipeline!\n"; - print $errmsg; - exit(-1); - } - print "\n"; -} - -# inpF, outF -sub collectResults { - my $local_status; - my ($inpF, $outF); - my @results = (); - my $line; - - $inpF = $_[1]; - $outF = $_[2]; - - $local_status = open(INPUT, $inpF); - if ($local_status == 0) { print "Fail to open file $inpF!\n"; exit(-1); } - - @results = (); - - while ($line = ) { - chomp($line); - my @local_arr = split(/\t/, $line); - push(@results, \@local_arr); - } - - close(INPUT); - - $local_status = open(OUTPUT, ">$outF"); - if ($local_status == 0) { print "Fail to create file $outF!\n"; exit(-1); } - - my $n = scalar(@results); - my $m = scalar(@{$results[0]}); - - $" = "\t"; - - my @out_arr = (); - for (my $i = 0; $i < $n; $i++) { - if ($_[0] eq "isoform") { push(@out_arr, $transcript_title[$i]); } - elsif ($_[0] eq "gene") { push(@out_arr, $gene_title[$i]); } - else { print "A bug on 'collectResults' is detected!\n"; exit(-1); } - } - print OUTPUT "@out_arr\n"; - - for (my $i = 0; $i < $m; $i++) { - @out_arr = (); - for (my $j = 0; $j < $n; $j++) { push(@out_arr, $results[$j][$i]); } - print OUTPUT "@out_arr\n"; - } - - close(OUTPUT); -} - __END__ =head1 NAME @@ -413,14 +354,10 @@ rsem-calculate-expression =head1 SYNOPSIS -=over - - rsem-calculate-expression [options] upstream_read_file(s) reference_name sample_name - rsem-calculate-expression [options] --paired-end upstream_read_file(s) downstream_read_file(s) reference_name sample_name + rsem-calculate-expression [options] upstream_read_file(s) reference_name sample_name + rsem-calculate-expression [options] --paired-end upstream_read_file(s) downstream_read_file(s) reference_name sample_name rsem-calculate-expression [options] --sam/--bam [--paired-end] input reference_name sample_name -=back - =head1 ARGUMENTS =over @@ -523,17 +460,17 @@ The path to the bowtie executables. (Default: the path to the bowtie executables (Bowtie parameter) memory allocated for best first alignment calculation (Default: 0 - use bowtie's default) -=item B<--phred33-quals> +=item B<--bowtie-phred33-quals> -Input quality scores are encoded as Phred+33. (Default: on) +(Bowtie parameter) Input quality scores are encoded as Phred+33. (Default: on) -=item B<--phred64-quals> +=item B<--bowtie-phred64-quals> -Input quality scores are encoded as Phred+64 (default for GA Pipeline ver. >= 1.3). (Default: off) +(Bowtie parameter) Input quality scores are encoded as Phred+64 (default for GA Pipeline ver. >= 1.3). (Default: off) -=item B<--solexa-quals> +=item B<--bowtie-solexa-quals> -Input quality scores are solexa encoded (from GA Pipeline ver. < 1.3). (Default: off) +(Bowtie parameter) Input quality scores are solexa encoded (from GA Pipeline ver. < 1.3). (Default: off) =item B<--forward-prob> @@ -587,6 +524,10 @@ Suppress the output of logging information. (Default: off) Show help information. +=item B<--version> + +Show version information. + =back =head1 DESCRIPTION @@ -777,7 +718,7 @@ Assume the path to the bowtie executables is in the user's PATH environment vari /ref/mouse_125 \ mmliver_single_without_quals -4) Data are the same as 1). We want to take a fragment length distribution into consideration. We set the fragment length mean to 150 and the standard deviation to 35. In addition to a BAM file, we also want to generate credibility intervals. We allow RSEM to use 1GB of memory for CI calculation: +4) Data are the same as 1). This time we assume the bowtie executables are under '/sw/bowtie'. We want to take a fragment length distribution into consideration. We set the fragment length mean to 150 and the standard deviation to 35. In addition to a BAM file, we also want to generate credibility intervals. We allow RSEM to use 1GB of memory for CI calculation: rsem-calculate-expression --bowtie-path /sw/bowtie \ --phred64-quals \