rsem v1.1.14

author Bo Li <bli@cs.wisc.edu>

Wed, 18 Jan 2012 05:36:57 +0000 (23:36 -0600)

committer Bo Li <bli@cs.wisc.edu>

Wed, 18 Jan 2012 05:36:57 +0000 (23:36 -0600)
author Bo Li <bli@cs.wisc.edu>
Wed, 18 Jan 2012 05:36:57 +0000 (23:36 -0600)
committer Bo Li <bli@cs.wisc.edu>
Wed, 18 Jan 2012 05:36:57 +0000 (23:36 -0600)
diff --git a/GTFItem.h b/GTFItem.h

index 4e502706b88bb55767ff4f7ddf98797bdf8680c7..a908375c9296ca1ba9ce0717969b11e30bffd109 100644 (file)
--- a/GTFItem.h
+++ b/GTFItem.h
@@ -10,114 +10,114 @@
  #include "utils.h"
  
  class GTFItem {
- public:
-
-  GTFItem() {
-    seqname = source = feature = "";
-    score = "";
-    start = end = 0;
-    strand = 0; //strand is a char variable
-    frame = "";
-    gene_id = transcript_id = "";
-    left = "";
-  }
-
-  bool operator<(const GTFItem& o) const {
-         if (gene_id != o.gene_id) return gene_id < o.gene_id;
-         if (transcript_id != o.transcript_id) return transcript_id < o.transcript_id;
-         return start < o.start;
-  }
-
-  void my_assert(char value, std::string& line, const std::string& msg) {
-         if (!value) {
-                 fprintf(stderr, ".gtf file might be corrupted!\n");
-                 fprintf(stderr, "Stop at line : %s\n", line.c_str());
-                 fprintf(stderr, "Error Message: %s\n", msg.c_str());
-                 exit(-1);
-         }
-  }
-
-  void parse(std::string line) {
-    std::istringstream strin(line);
-    std::string tmp;
-
-    getline(strin, seqname, '\t');
-    getline(strin, source, '\t');
-    getline(strin, feature, '\t');
-    getline(strin, tmp, '\t');
-    start = atoi(tmp.c_str());
-    getline(strin, tmp, '\t');
-    end = atoi(tmp.c_str());
-    getline(strin, score, '\t');
-    getline(strin, tmp, '\t');
-    my_assert((tmp.length() == 1 && (tmp[0] == '+' || tmp[0] == '-')), line, "Strand is neither '+' nor '-'!");
-    strand = tmp[0];
-    getline(strin, frame, '\t');
-
-    getline(strin, left); // assign attributes and possible comments into "left"
-
-    strin.clear(); strin.str(left);
-    bool find_gene_id = false, find_transcript_id = false;
-
-    while (getline(strin, tmp, ';') && (!find_gene_id || !find_transcript_id)) {
-       tmp = cleanStr(tmp);
-       size_t pos = tmp.find(' ');
-       my_assert((pos != std::string::npos), line, "Cannot separate the identifier from the value for attribute " + tmp + "!");
-       std::string identifier = tmp.substr(0, pos);
-
-       if (identifier == "gene_id") {
-               my_assert(!find_gene_id, line, "gene_id appear more than once!");
-           tmp = cleanStr(tmp.substr(pos));
-           my_assert((tmp[0] == '"' && tmp[tmp.length() - 1] == '"'), line, "Textual attributes should be surrounded by doublequotes!");
-           gene_id = tmp.substr(1, tmp.length() - 2);
-           find_gene_id = true;
-       } else if (identifier == "transcript_id") {
-               my_assert(!find_transcript_id, line, "transcript_id appear more than once!");
-           tmp = cleanStr(tmp.substr(pos));
-           my_assert((tmp[0] == '"' && tmp[tmp.length() - 1] == '"'), line, "Textual attributes should be surrounded by doublequotes!");
-           transcript_id = tmp.substr(1, tmp.length() - 2);
-           find_transcript_id = true;
-       }
-    }
-
-    my_assert(find_gene_id, line, "Cannot find gene_id!");
-    my_assert(find_transcript_id, line, "Cannot find transcript_id!");
-  }
-
-  std::string getSeqName() { return seqname; }
-  std::string getSource() { return source; }
-  std::string getFeature() { return feature; }
-  int getStart() { return start; }
-  int getEnd() { return end; }
-  char getStrand() { return strand; }
-  std::string getScore() { return score; }  // float, integer or "." ; let downstream programs parse it
-  std::string getFrame() { return frame; }  // 0, 1, 2, or "."; let downstream programs parse it
-  std::string getGeneID() { return gene_id; }
-  std::string getTranscriptID() { return transcript_id; }
-  std::string getLeft() { return left; }
-
-  void setGeneID(const std::string& gene_id) {
-    this->gene_id = gene_id;
-  }
-
-  std::string toString() {
-    std::string val;
-    std::ostringstream strout;
-    strout<<seqname<<'\t'<<source<<'\t'<<feature<<'\t'<<start<<'\t'<<end<<'\t'<<score<<'\t'<<strand<<'\t'<<frame<<'\t';
-    strout<<"gene_id \""<<gene_id<<"\"; transcript_id \""<<transcript_id<<"\";"<<left;
-    val = strout.str();
-
-    return val;
-  }
-
- private:
-  std::string seqname, source, feature;
-  std::string score;
-  int start, end;
-  char strand;
-  std::string frame;
-  std::string gene_id, transcript_id;
-  std::string left;
+public:
+
+       GTFItem() {
+               seqname = source = feature = "";
+               score = "";
+               start = end = 0;
+               strand = 0; //strand is a char variable
+               frame = "";
+               gene_id = transcript_id = "";
+               left = "";
+       }
+
+       bool operator<(const GTFItem& o) const {
+               if (gene_id != o.gene_id) return gene_id < o.gene_id;
+               if (transcript_id != o.transcript_id) return transcript_id < o.transcript_id;
+               return start < o.start;
+       }
+
+       void my_assert(char value, std::string& line, const std::string& msg) {
+               if (!value) {
+                       fprintf(stderr, ".gtf file might be corrupted!\n");
+                       fprintf(stderr, "Stop at line : %s\n", line.c_str());
+                       fprintf(stderr, "Error Message: %s\n", msg.c_str());
+                       exit(-1);
+               }
+       }
+
+       void parse(std::string line) {
+               std::istringstream strin(line);
+               std::string tmp;
+
+               getline(strin, seqname, '\t');
+               getline(strin, source, '\t');
+               getline(strin, feature, '\t');
+               getline(strin, tmp, '\t');
+               start = atoi(tmp.c_str());
+               getline(strin, tmp, '\t');
+               end = atoi(tmp.c_str());
+               getline(strin, score, '\t');
+               getline(strin, tmp, '\t');
+               my_assert((tmp.length() == 1 && (tmp[0] == '+' || tmp[0] == '-')), line, "Strand is neither '+' nor '-'!");
+               strand = tmp[0];
+               getline(strin, frame, '\t');
+
+               getline(strin, left); // assign attributes and possible comments into "left"
+
+               strin.clear(); strin.str(left);
+               bool find_gene_id = false, find_transcript_id = false;
+
+               while (getline(strin, tmp, ';') && (!find_gene_id || !find_transcript_id)) {
+                       tmp = cleanStr(tmp);
+                       size_t pos = tmp.find(' ');
+                       my_assert((pos != std::string::npos), line, "Cannot separate the identifier from the value for attribute " + tmp + "!");
+                       std::string identifier = tmp.substr(0, pos);
+
+                       if (identifier == "gene_id") {
+                               my_assert(!find_gene_id, line, "gene_id appear more than once!");
+                               tmp = cleanStr(tmp.substr(pos));
+                               my_assert((tmp[0] == '"' && tmp[tmp.length() - 1] == '"'), line, "Textual attributes should be surrounded by doublequotes!");
+                               gene_id = tmp.substr(1, tmp.length() - 2);
+                               find_gene_id = true;
+                       } else if (identifier == "transcript_id") {
+                               my_assert(!find_transcript_id, line, "transcript_id appear more than once!");
+                               tmp = cleanStr(tmp.substr(pos));
+                               my_assert((tmp[0] == '"' && tmp[tmp.length() - 1] == '"'), line, "Textual attributes should be surrounded by doublequotes!");
+                               transcript_id = tmp.substr(1, tmp.length() - 2);
+                               find_transcript_id = true;
+                       }
+               }
+
+               my_assert(find_gene_id, line, "Cannot find gene_id!");
+               my_assert(find_transcript_id, line, "Cannot find transcript_id!");
+       }
+
+       std::string getSeqName() { return seqname; }
+       std::string getSource() { return source; }
+       std::string getFeature() { return feature; }
+       int getStart() { return start; }
+       int getEnd() { return end; }
+       char getStrand() { return strand; }
+       std::string getScore() { return score; }  // float, integer or "." ; let downstream programs parse it
+       std::string getFrame() { return frame; }  // 0, 1, 2, or "."; let downstream programs parse it
+       std::string getGeneID() { return gene_id; }
+       std::string getTranscriptID() { return transcript_id; }
+       std::string getLeft() { return left; }
+
+       void setGeneID(const std::string& gene_id) {
+               this->gene_id = gene_id;
+       }
+
+       std::string toString() {
+               std::string val;
+               std::ostringstream strout;
+               strout<<seqname<<'\t'<<source<<'\t'<<feature<<'\t'<<start<<'\t'<<end<<'\t'<<score<<'\t'<<strand<<'\t'<<frame<<'\t';
+               strout<<"gene_id \""<<gene_id<<"\"; transcript_id \""<<transcript_id<<"\";"<<left;
+               val = strout.str();
+
+               return val;
+       }
+
+private:
+       std::string seqname, source, feature;
+       std::string score;
+       int start, end;
+       char strand;
+       std::string frame;
+       std::string gene_id, transcript_id;
+       std::string left;
  };
  
  #endif
diff --git a/README.md b/README.md

index 6076ce665d58704f638be04675ce20c2e57c959d..bb66d5498be3014ccab93ef382d6d05536e6e047 100644 (file)
--- a/README.md
+++ b/README.md
@@ -22,15 +22,21 @@ Table of Contents
  ## <a name="introduction"></a> Introduction
  
  RSEM is a software package for estimating gene and isoform expression
-levels from RNA-Seq data.  The new RSEM package (rsem-1.x) provides an
-user-friendly interface, supports threads for parallel computation of
-the EM algorithm, single-end and paired-end read data, quality scores,
-variable-length reads and RSPD estimation. It can also generate
-genomic-coordinate BAM files and UCSC wiggle files for
-visualization. In addition, it provides posterior mean and 95%
-credibility interval estimates for expression levels. For
-visualization, it can also generate transcript-coordinate BAM files
-and visualize them and also models learned.
+levels from RNA-Seq data. The RSEM package provides an user-friendly
+interface, supports threads for parallel computation of the EM
+algorithm, single-end and paired-end read data, quality scores,
+variable-length reads and RSPD estimation. In addition, it provides
+posterior mean and 95% credibility interval estimates for expression
+levels. For visualization, It can generate BAM and Wiggle files in
+both transcript-coordinate and genomic-coordinate. Genomic-coordinate
+files can be visualized by both UCSC Genome browser and Broad
+Institute's Integrative Genomics Viewer (IGV). Transcript-coordinate
+files can be visualized by IGV. RSEM also has its own scripts to
+generate transcript read depth plots in pdf format. The unique feature
+of RSEM is, the read depth plots can be stacked, with read depth
+contributed to unique reads shown in black and contributed to
+multi-reads shown in red. In addition, models learned from data can
+also be visualized. Last but not least, RSEM contains a simulator.
  
  ## <a name="compilation"></a> Compilation & Installation
  
@@ -152,7 +158,7 @@ wiggle_name: the name the user wants to use for this wiggle plot
  
  For UCSC genome browser, please refer to the [UCSC custom track help page](http://genome.ucsc.edu/goldenPath/help/customTrack.html).
  
-For integrative genomics viewer, please refer to the [IGV home page](http://www.broadinstitute.org/software/igv/home).
+For integrative genomics viewer, please refer to the [IGV home page](http://www.broadinstitute.org/software/igv/home). Note: Although IGV can generate read depth plot from the BAM file given, it cannot recognize "ZW" tag RSEM puts. Therefore IGV counts each alignment as weight 1 instead of the expected weight for the plot it generates. So we recommend to use the wiggle file generated by RSEM for read depth visualization.
  
  #### c) Generating Transcript Wiggle Plots
  
@@ -258,6 +264,8 @@ map_file: transcript-to-gene-map file's name.
  RSEM uses the [Boost C++](http://www.boost.org) and
  [samtools](http://samtools.sourceforge.net) libraries.
  
+We thank earonesty for contributing patches.
+
  ## <a name="license"></a> License
  
  RSEM is licensed under the [GNU General Public License v3](http://www.gnu.org/licenses/gpl-3.0.html).
diff --git a/convert-sam-for-rsem b/convert-sam-for-rsem

new file mode 100755 (executable)

index 0000000..68a42d7
--- /dev/null
+++ b/convert-sam-for-rsem
@@ -0,0 +1,184 @@
+#!/usr/bin/perl
+
+use Getopt::Long;
+use Pod::Usage;
+use strict;
+
+my $standard_output = "&STDOUT";
+
+my $out_file = $standard_output;
+my @tmp_dirs = ();
+my $help = 0;
+
+GetOptions("o=s" => \$out_file,
+          "T|temporary-directory=s" => \@tmp_dirs,
+          "h|help" => \$help) or pd2usage(-exitval => 2, -verbose => 2);
+
+          
+pod2usage(-verbose => 2) if ($help == 1);
+pod2usage(-msg => "Invalid number of arguments!", -exitval => 2, -verbose => 2) if (scalar(@ARGV) != 2);
+
+my $command;
+my (@fields, @header) = ();
+my $M;
+
+# Load fields
+my @lines = ();
+my $type;
+
+open(INPUT, "$ARGV[0].ti");
+@lines = <INPUT>;
+chomp(@lines);
+close(INPUT);
+
+@fields = ();
+($M, $type) = split(/ /, $lines[0]);
+for (my $i = 0; $i < $M; $i++) {
+    push(@fields, "SN:$lines[$i * 6 + 1]");
+}
+
+
+# Reorder header
+my $line;
+
+open(INPUT, $ARGV[1]);
+@header = ();
+while (($line = <INPUT>) && substr($line, 0, 1) eq '@') {
+    chomp($line);
+    push(@header, $line);
+}
+close(INPUT);
+
+my $n = scalar(@header);
+if ($n > 0) {
+    my %hash = ();
+    my @ktable = ();
+
+    my $tid = 0;
+
+    for (my $i = 0; $i < $n; $i++) {
+       my @arr = split(/\t/, $header[$i]);
+       if ($arr[0] ne "\@SQ") { push(@ktable, ""); next; }
+       my $hasSN = 0;
+       foreach my $key (@arr) {
+           if (substr($key, 0, 3) eq "SN:") {
+               $hash{$key} = $i;
+               $hasSN = 1;
+               last;
+           }
+       }
+       if (!$hasSN) { print STDERR "\"$header[$i]\" does not have a SN tag!\n"; exit(-1); }
+       push(@ktable, $fields[$tid++]);
+    }
+
+    if ($tid != $M) { print STDERR "Number of \@SQ lines is not correct!\n"; exit(-1); }
+
+    open(OUTPUT, ">$out_file");
+    for (my $i = 0; $i < $n; $i++) {
+       if ($ktable[$i] eq "") { print OUTPUT $header[$i]; }
+       else { print OUTPUT $header[$hash{$ktable[$i]}]; }
+       print OUTPUT "\n";
+    }
+    close(OUTPUT);
+}
+
+
+# extract alignment section
+$command = "grep ^[^@] $ARGV[1] > $ARGV[1].__temp";
+&runCommand($command);
+
+# sort and output the alignment section
+$command = "sort -k 1,1 -s";
+if (scalar(@tmp_dirs) > 0) { $" = " -T "; $command .= " -T @tmp_dirs"; }
+$command .= " $ARGV[1].__temp";
+if ($out_file ne $standard_output) { $command .= " >> $out_file"; }
+&runCommand($command);
+
+# delete temporary files
+$command = "rm -f $ARGV[1].__temp";
+&runCommand($command);
+
+# finish
+print STDERR "Conversion is completed successfully!\n";
+
+# command, {err_msg}
+sub runCommand {
+    print STDERR $_[0]."\n";
+    my $status = system($_[0]);
+    if ($status != 0) { 
+       my $errmsg;
+       if (scalar(@_) > 1) { $errmsg = $_[1]; }
+       else { $errmsg = "\"$command\" failed! Plase check if you provide correct parameters/options for the script!"; }
+       print STDERR $errmsg."\n";
+       exit(-1);
+    }
+    print STDERR "\n";
+}
+
+__END__
+
+=head1 NAME
+
+convert-sam-for-rsem
+
+=head1 SYNOPSIS
+
+=over
+
+ convert-sam-for-rsem [options] reference_name input_sam
+
+=back
+
+=head1 ARGUMENTS
+
+=over
+
+=item B<reference_name>
+
+The name of the reference used. This should be the same name used by 'rsem-prepare-reference'.
+
+=item B<input_sam>
+
+The SAM file (*.sam) generated by user's aligner. If the aligner produces a BAM file, please use samtools to convert it to a SAM file (with header information). 
+
+=back
+
+=head1 OPTIONS
+
+=over
+
+=item B<-o> <file>
+
+Output the converted SAM file into <file>. (Default: STDOUT)
+
+=item B<-T/--temporary-directory> <directory>
+
+'convert-sam-for-rsem' will call 'sort' command and this is the '-T/--temporary-directory' option of 'sort' command. The following is the description from 'sort' : "use DIR for temporaries, not $TMPDIR or /tmp; multiple options specify multiple directories". 
+
+=item B<-h/--help>
+
+Show help information.
+
+=back
+
+=head1 DESCRIPTION
+
+This program converts the SAM file generated by user's aligner into a SAM file which RSEM can process. However, users should make sure their aligners use 'reference_name.idx.fa' generated by 'rsem-prepare-reference' as their references. In addition, their aligners should output header information and make two mates of the same alignment adjacent to each other for paired-end data. This program will output the converted file into standard output by default for the purpose of piping. By setting '-o' option, users can make the converted file written into disk.
+
+Note: You do not need to run this script if Bowtie (not Bowtie 2) is used, or the order of @SQ lines is the same as 'reference_name.idx.fa' and the alignment lines of a same read group together and the mates of the same alignment are adjacent each other for paired-end reads.
+
+Note: This program can only recognize SAM files. See ARGUMENTS section.
+
+=head1 EXAMPLES
+
+Suppose reference_name and input_sam are set to '/ref/mouse_125' and 'input.sam'. 
+
+1) Output to standard output and gzip the output to 'input_for_rsem.sam.gz':
+
+ convert-sam-for-rsem /ref/mouse_125 input.sam | gzip > input_for_rsem.sam.gz
+
+2) Output to 'input_for_rsem.sam' directly:
+
+ convert-sam-for-rsem /ref/mouse_125 input.sam -o input_for_rsem.sam
+
+=cut
diff --git a/convert_sam_for_rsem b/convert_sam_for_rsem

deleted file mode 100755 (executable)

index a6bc124..0000000
--- a/convert_sam_for_rsem
+++ /dev/null
@@ -1,184 +0,0 @@
-#!/usr/bin/perl
-
-use Getopt::Long;
-use Pod::Usage;
-use strict;
-
-my $standard_output = "&STDOUT";
-
-my $out_file = $standard_output;
-my @tmp_dirs = ();
-my $help = 0;
-
-GetOptions("o=s" => \$out_file,
-          "T|temporary-directory=s" => \@tmp_dirs,
-          "h|help" => \$help) or pd2usage(-exitval => 2, -verbose => 2);
-
-          
-pod2usage(-verbose => 2) if ($help == 1);
-pod2usage(-msg => "Invalid number of arguments!", -exitval => 2, -verbose => 2) if (scalar(@ARGV) != 2);
-
-my $command;
-my (@fields, @header) = ();
-my $M;
-
-# Load fields
-my @lines = ();
-my $type;
-
-open(INPUT, "$ARGV[0].ti");
-@lines = <INPUT>;
-chomp(@lines);
-close(INPUT);
-
-@fields = ();
-($M, $type) = split(/ /, $lines[0]);
-for (my $i = 0; $i < $M; $i++) {
-    push(@fields, "SN:$lines[$i * 6 + 1]");
-}
-
-
-# Reorder header
-my $line;
-
-open(INPUT, $ARGV[1]);
-@header = ();
-while (($line = <INPUT>) && substr($line, 0, 1) eq '@') {
-    chomp($line);
-    push(@header, $line);
-}
-close(INPUT);
-
-my $n = scalar(@header);
-if ($n > 0) {
-    my %hash = ();
-    my @ktable = ();
-
-    my $tid = 0;
-
-    for (my $i = 0; $i < $n; $i++) {
-       my @arr = split(/\t/, $header[$i]);
-       if ($arr[0] ne "\@SQ") { push(@ktable, ""); next; }
-       my $hasSN = 0;
-       foreach my $key (@arr) {
-           if (substr($key, 0, 3) eq "SN:") {
-               $hash{$key} = $i;
-               $hasSN = 1;
-               last;
-           }
-       }
-       if (!$hasSN) { print STDERR "\"$header[$i]\" does not have a SN tag!\n"; exit(-1); }
-       push(@ktable, $fields[$tid++]);
-    }
-
-    if ($tid != $M) { print STDERR "Number of \@SQ lines is not correct!\n"; exit(-1); }
-
-    open(OUTPUT, ">$out_file");
-    for (my $i = 0; $i < $n; $i++) {
-       if ($ktable[$i] eq "") { print OUTPUT $header[$i]; }
-       else { print OUTPUT $header[$hash{$ktable[$i]}]; }
-       print OUTPUT "\n";
-    }
-    close(OUTPUT);
-}
-
-
-# extract alignment section
-$command = "grep ^[^@] $ARGV[1] > $ARGV[1].__temp";
-&runCommand($command);
-
-# sort and output the alignment section
-$command = "sort -k 1,1 -s";
-if (scalar(@tmp_dirs) > 0) { $" = " -T "; $command .= " -T @tmp_dirs"; }
-$command .= " $ARGV[1].__temp";
-if ($out_file ne $standard_output) { $command .= " >> $out_file"; }
-&runCommand($command);
-
-# delete temporary files
-$command = "rm -f $ARGV[1].__temp";
-&runCommand($command);
-
-# finish
-print STDERR "Conversion is completed successfully!\n";
-
-# command, {err_msg}
-sub runCommand {
-    print STDERR $_[0]."\n";
-    my $status = system($_[0]);
-    if ($status != 0) { 
-       my $errmsg;
-       if (scalar(@_) > 1) { $errmsg = $_[1]; }
-       else { $errmsg = "\"$command\" failed! Plase check if you provide correct parameters/options for the script!"; }
-       print STDERR $errmsg."\n";
-       exit(-1);
-    }
-    print STDERR "\n";
-}
-
-__END__
-
-=head1 NAME
-
-convert_sam_for_rsem
-
-=head1 SYNOPSIS
-
-=over
-
- convert_sam_for_rsem [options] reference_name input_sam
-
-=back
-
-=head1 ARGUMENTS
-
-=over
-
-=item B<reference_name>
-
-The name of the reference used. This should be the same name used by 'rsem-prepare-reference'.
-
-=item B<input_sam>
-
-The SAM file (*.sam) generated by user's aligner. If the aligner produces a BAM file, please use samtools to convert it to a SAM file (with header information). 
-
-=back
-
-=head1 OPTIONS
-
-=over
-
-=item B<-o> <file>
-
-Output the converted SAM file into <file>. (Default: STDOUT)
-
-=item B<-T/--temporary-directory> <directory>
-
-'convert_sam_for_rsem' will call 'sort' command and this is the '-T/--temporary-directory' option of 'sort' command. The following is the description from 'sort' : "use DIR for temporaries, not $TMPDIR or /tmp; multiple options specify multiple directories". 
-
-=item B<-h/--help>
-
-Show help information.
-
-=back
-
-=head1 DESCRIPTION
-
-This program converts the SAM file generated by user's aligner into a SAM file which RSEM can process. However, users should make sure their aligners use 'reference_name.idx.fa' generated by 'rsem-prepare-reference' as their references. In addition, their aligners should output header information and make two mates of the same alignment adjacent to each other for paired-end data. This program will output the converted file into standard output by default for the purpose of piping. By setting '-o' option, users can make the converted file written into disk.
-
-Note: You do not need to run this script if Bowtie (not Bowtie 2) is used, or the order of @SQ lines is the same as 'reference_name.idx.fa' and the alignment lines of a same read group together and the mates of the same alignment are adjacent each other for paired-end reads.
-
-Note: This program can only recognize SAM files. See ARGUMENTS section.
-
-=head1 EXAMPLES
-
-Suppose reference_name and input_sam are set to '/ref/mouse_125' and 'input.sam'. 
-
-1) Output to standard output and gzip the output to 'input_for_rsem.sam.gz':
-
- convert_sam_for_rsem /ref/mouse_125 input.sam | gzip > input_for_rsem.sam.gz
-
-2) Output to 'input_for_rsem.sam' directly:
-
- convert_sam_for_rsem /ref/mouse_125 input.sam -o input_for_rsem.sam
-
-=cut
diff --git a/what_is_new b/what_is_new

new file mode 100644 (file)

index 0000000..7168193
--- /dev/null
+++ b/what_is_new
@@ -0,0 +1,10 @@
+In this version of RSEM, several things are updated:
+
+- Added --chunkmbs option to rsem-calculate-expression (patch contributed by earonesty)
+- Added --sampling-for-bam option to rsem-calculate-expression, in the bam file, instead of providing expected weights, for each read RSEM samples one alignment based on the expected weights
+- RSEM can generate BAM and Wiggle files in both genomic-coordinate and transcript-coordinate
+- Added rsem-plot-transcript-wiggles. This script can generate transcript-coordinate wiggle plots in pdf format. One unique feature is, a stacked plot can be generated, with unique read contribution shown as black and multi-read contribution shown as red
+- Added convert_sam_for_rsem script for users do not use bowtie aligner
+- Modified RSEM's GTF file parser. Now RSEM does not require "transcript_id" and "gene_id" be the first two attributes shown
+- Improved descriptions for thread related errors. 
+
author	Bo Li <bli@cs.wisc.edu>
	Wed, 18 Jan 2012 05:36:57 +0000 (23:36 -0600)
committer	Bo Li <bli@cs.wisc.edu>
	Wed, 18 Jan 2012 05:36:57 +0000 (23:36 -0600)
GTFItem.h		patch \| blob \| history
README.md		patch \| blob \| history
convert-sam-for-rsem	[new file with mode: 0755]	patch \| blob
convert_sam_for_rsem	[deleted file]	patch \| blob \| history
what_is_new	[new file with mode: 0644]	patch \| blob