Added support for DE analysis on multiple conditions via running EBSeq

[rsem.git] / rsem-prepare-reference
diff --git a/rsem-prepare-reference b/rsem-prepare-reference

index 78743e946f2b2f1b6b02ecdb6cbeeaaebdf3c70e..8ff37c7ffc79dcc894cbbd2e3fb0b402fbb89570 100755 (executable)
--- a/rsem-prepare-reference
+++ b/rsem-prepare-reference
@@ -2,9 +2,12 @@
  
  use Getopt::Long;
  use Pod::Usage;        
-use File::Basename;
+use FindBin;
+use lib $FindBin::Bin;
  use strict;
  
+use rsem_perl_utils;
+
  my $status;
  
  my $gtfF = "";
@@ -58,7 +61,7 @@ elsif ($subsetFile ne "") { $polyAChoice = 2; }
  
  if ($bowtie_path ne "") { $bowtie_path .= "/"; }
  
-my ($fn, $dir, $suf) = fileparse($0); 
+my $dir = "$FindBin::Bin/";
  my $command = "";
  
  if ($type == 0) {
@@ -93,20 +96,6 @@ if (!$no_bowtie) {
      &runCommand($command);
  }
  
-# command, {err_msg}
-sub runCommand {
-    print $_[0]."\n";
-    my $status = system($_[0]);
-    if ($status != 0) { 
-       my $errmsg;
-       if (scalar(@_) > 1) { $errmsg = $_[1]; }
-       else { $errmsg = "\"$command\" failed! Plase check if you provide correct parameters/options for the pipeline!"; }
-       print $errmsg."\n";
-       exit(-1);
-    }
-    print "\n";
-}
-
  __END__
  
  =head1 NAME
@@ -115,11 +104,7 @@ rsem-prepare-reference
  
  =head1 SYNOPSIS
  
-=over
-
- rsem-prepare-reference [options] reference_fasta_file(s) reference_name
-
-=back
+rsem-prepare-reference [options] reference_fasta_file(s) reference_name
  
  =head1 ARGUMENTS
  
@@ -155,7 +140,7 @@ Each line of <file> should be of the form:
  gene_id transcript_id
  
  with the two fields separated by a tab character.
- 
+
  If you are using a GTF file for the "UCSC Genes" gene set from the UCSC Genome Browser, then the "knownIsoforms.txt" file (obtained from the "Downloads" section of the UCSC Genome Browser site) is of this format.
  
  If this option is off, then the mapping of isoforms to genes depends on whether the --gtf option is specified.  If --gtf is specified, then RSEM uses the "gene_id" and "transcript_id" attributes in the GTF file.  Otherwise, RSEM assumes that each sequence in the reference sequence files is a separate gene.
@@ -164,7 +149,7 @@ If this option is off, then the mapping of isoforms to genes depends on whether
  
  =item B<--no-polyA>
  
-Do not add poly(A) tails to the end of reference isoforms. (Default: add poly(A) tails to all transcripts)
+Do not add poly(A) tails to the end of reference isoforms. (Default: adding poly(A) tails to all transcripts)
  
  =item B<--no-polyA-subset> <file>
  
@@ -207,7 +192,7 @@ This program will generate 'reference_name.grp', 'reference_name.ti', 'reference
  
  'reference_name.grp', 'reference_name.ti', 'reference_name.seq', 'reference_name.idx.fa', and 'reference_name.chrlist' are used by RSEM internally.
  
-B<'reference_name.transcripts.fa'> contains the extracted reference transcripts in FASTA format. Poly(A) tails are not added.
+B<'reference_name.transcripts.fa'> contains the extracted reference transcripts in FASTA format. Poly(A) tails are added unless '--no-polyA' is set.
  
  =head1 EXAMPLES