add new files

[rsem.git] / rsem-calculate-expression
diff --git a/rsem-calculate-expression b/rsem-calculate-expression

index 6f9706e9e55e6c487c2aeed023fc4a52d31795c1..774065a33aced3b9003285be20c52b3cc7017832 100755 (executable)
--- a/rsem-calculate-expression
+++ b/rsem-calculate-expression
@@ -334,7 +334,7 @@ sub runCommand {
      if ($status != 0) { 
         my $errmsg;
         if (scalar(@_) > 1) { $errmsg = $_[1]; }
-       else { $errmsg = "$command failed! Plase check if you provide correct parameters/options for the pipeline!"; }
+       else { $errmsg = "\"$command\" failed! Plase check if you provide correct parameters/options for the pipeline!"; }
         print $errmsg."\n";
         exit(-1);
      }
@@ -345,7 +345,7 @@ sub runCommand {
  sub collectResults {
      my $local_status;
      my ($inpF, $outF);
-    my (@results, @comment) = ();
+    my (@results, @ids) = ();
      my $line;
      my $cnt;
  
@@ -362,11 +362,11 @@ sub collectResults {
         ++$cnt;
         chomp($line);
         my @local_arr = split(/\t/, $line);
-       if ($cnt == 4) { @comment = @local_arr; }
+       if ($cnt == 4) { @ids = @local_arr; }
         else { push(@results, \@local_arr); }
      }
      
-    push(@results, \@comment);
+    push(@results, \@ids);
      close(INPUT);
  
      $local_status = open(OUTPUT, ">$outF");
@@ -589,20 +589,20 @@ estimation. pmc stands for posterior mean counts. ci_lower_bound(l)
  means the lower bound of the credibility intervals, ci_upper_bound(u)
  means the upper bound of the credibility intervals. So the credibility
  interval is [l, u]. 'transcript_id_list' is a space-separated list of
-transcript_ids belonging to the gene.
+transcript_ids belonging to the gene. If no gene information is
+provided, this file has the same content as
+'sample_name.isoforms.results'.
  
  =item B<sample_name.isoforms.results> 
  
  File containing isoform level expression values. The format of each
  line in this file is:
  
-transcript_id expected_counts tau_value [pmc_value tau_pme_value tau_ci_lower_bound tau_ci_upper_bound] other_attributes
+transcript_id expected_counts tau_value [pmc_value tau_pme_value tau_ci_lower_bound tau_ci_upper_bound] gene_id
  
-Fields are separated by the tab character. 'other_attributes' are all
-other attributes after attribute 'transcript_id' field in the GTF
-file. If no other attributes are given or no GTF file is provided in
-'rsem-prepare-reference', there will be no tab after the
-tau_value field.
+Fields are separated by the tab character. 'gene_id' is the gene_id of
+the gene which this transcript belongs to. If no gene information is
+provided, 'gene_id' and 'transcript_id' are the same.
  
  =item B<sample_name.transcript.bam, sample_name.transcript.sorted.bam and sample_name.transcript.sorted.bam.bai>
  
@@ -630,7 +630,9 @@ of each alignment is set to min(100, floor(-10 * log10(1.0 - w) +
  0.5)), where w is the posterior probability of that alignment being
  the true mapping of a read.  In addition, RSEM pads a new tag
  ZW:f:value, where value is a single precision floating number
-representing the posterior probability.
+representing the posterior probability. If an alignment is spliced, a
+XS:A:value tag is also added, where value is either '+' or '-'
+indicating the strand of the transcript it aligns to.
  
  'sample_name.genome.sorted.bam' and 'sample_name.genome.sorted.bam.bai' are the
  sorted BAM file and indices generated by samtools (included in RSEM package).