From 1b2999c4407ef8419fb89b66b843b7141caff4da Mon Sep 17 00:00:00 2001 From: Bo Li Date: Mon, 2 Jul 2012 12:18:03 -0500 Subject: [PATCH] rsem v1.1.21 --- BamWriter.h | 4 +++- README.md | 9 +++++---- WHAT_IS_NEW | 10 +++++++++- bam2readdepth.cpp | 19 +++++++++++++------ bam2wig.cpp | 16 ++++++++++++---- makefile | 4 ++-- my_assert.h | 12 ++++++------ rsem-plot-transcript-wiggles | 8 ++++++-- utils.h | 6 +++--- wiggle.cpp | 29 +++++++++++++++++++---------- wiggle.h | 7 +++++++ 11 files changed, 85 insertions(+), 39 deletions(-) diff --git a/BamWriter.h b/BamWriter.h index 782f5bd..f664710 100644 --- a/BamWriter.h +++ b/BamWriter.h @@ -144,6 +144,8 @@ void BamWriter::work(HitWrapper wrapper) { b->core.mpos = b2->core.pos; b2->core.mpos = b->core.pos; } + + /* else { // if only one mate can be aligned, mask it as unaligned and put an additional tag Z0:A:! char exclamation = '!'; @@ -156,7 +158,7 @@ void BamWriter::work(HitWrapper wrapper) { bam_aux_append(b2, "Z0", 'A', bam_aux_type2size('A'), (uint8_t*)&exclamation); } } - + */ samwrite(out, b); samwrite(out, b2); diff --git a/README.md b/README.md index ed5acc0..8d3c15f 100644 --- a/README.md +++ b/README.md @@ -158,11 +158,12 @@ plot, run the 'rsem-bam2wig' program on the Usage: - rsem-bam2wig sorted_bam_input wig_output wiggle_name + rsem-bam2wig sorted_bam_input wig_output wiggle_name [--no-fractional-weight] -sorted_bam_input: sorted bam file -wig_output: output file name, e.g. output.wig -wiggle_name: the name the user wants to use for this wiggle plot +sorted_bam_input : Input BAM format file, must be sorted +wig_output : Output wiggle file's name, e.g. output.wig +wiggle_name : the name of this wiggle plot +--no-fractional-weight : If this is set, RSEM will not look for "ZW" tag and each alignment appeared in the BAM file has weight 1. Set this if your BAM file is not generated by RSEM. Please note that this option must be at the end of the command line. #### b) Loading a BAM and/or Wiggle file into the UCSC Genome Browser or Integrative Genomics Viewer(IGV) diff --git a/WHAT_IS_NEW b/WHAT_IS_NEW index 3c7f654..244066c 100644 --- a/WHAT_IS_NEW +++ b/WHAT_IS_NEW @@ -1,7 +1,15 @@ +RSEM v1.1.21 + +- Removed optional field "Z0:A:!" in the BAM outputs +- Added --no-fractional-weight option to rsem-bam2wig, if the BAM file is not generated by RSEM, this option is recommended to be set +- Fixed a bug for generating transcript level wiggle files using 'rsem-plot-transcript-wiggles' + +-------------------------------------------------------------------------------------------- + RSEM v1.1.20 - Added an option to set the temporary folder name -- Removed sample_name.sam.gz. Instead, RSEM uses samtools to convert bowtie outputed SAM file into a BAM file under the temporary folder +- Removed sample_name.sam.gz. Instead, RSEM uses samtools to convert bowtie outputted SAM file into a BAM file under the temporary folder - RSEM generated BAM files now contains all alignment lines produced by bowtie or user-specified aligners, including unalignable reads. Please note that for paired-end reads, if one mate has alignments but the other does not, RSEM will mark the alignable mate as "unmappable" (flag bit 0x4) and append an optional field "Z0:A:!" -------------------------------------------------------------------------------------------- diff --git a/bam2readdepth.cpp b/bam2readdepth.cpp index 87fb178..d252e2e 100644 --- a/bam2readdepth.cpp +++ b/bam2readdepth.cpp @@ -1,20 +1,27 @@ #include #include #include -#include +#include +#include "my_assert.h" #include "wiggle.h" using namespace std; int main(int argc, char* argv[]) { - if (argc != 2) { - printf("Usage: rsem-bam2readdepth sorted_bam_input\n"); + if (argc != 3) { + printf("Usage: rsem-bam2readdepth sorted_bam_input readdepth_output\n"); exit(-1); } - ReadDepthWriter depth_writer(std::cout); - build_wiggles(argv[1], depth_writer); + ofstream fout(argv[2]); + general_assert(fout.is_open(), "Cannot write to " + cstrtos(argv[2]) + "!"); - return 0; + ReadDepthWriter depth_writer(fout); + + build_wiggles(argv[1], depth_writer); + + fout.close(); + + return 0; } diff --git a/bam2wig.cpp b/bam2wig.cpp index 24a53c6..d205e8f 100644 --- a/bam2wig.cpp +++ b/bam2wig.cpp @@ -6,12 +6,20 @@ using namespace std; +void printUsage() { + printf("Usage: rsem-bam2wig sorted_bam_input wig_output wiggle_name [--no-fractional-weight]\n"); + printf("sorted_bam_input\t: Input BAM format file, must be sorted\n"); + printf("wig_output\t\t: Output wiggle file's name, e.g. output.wig\n"); + printf("wiggle_name\t\t: the name of this wiggle plot\n"); + printf("--no-fractional-weight\t: If this is set, RSEM will not look for \"ZW\" tag and each alignment appeared in the BAM file has weight 1. Set this if your BAM file is not generated by RSEM. Please note that this option must be at the end of the command line.\n"); + exit(-1); +} + int main(int argc, char* argv[]) { - if (argc != 4) { - printf("Usage: rsem-bam2wig sorted_bam_input wig_output wiggle_name\n"); - exit(-1); - } + if (argc < 4 || argc > 5) { printf("Number of arguments is not correct!\n"); printUsage(); } + if (argc == 5 && strcmp(argv[4], "--no-fractional-weight")) { printf("Cannot recognize option %s!\n", argv[4]); printUsage(); } + no_fractional_weight = (argc == 5 && !strcmp(argv[4], "--no-fractional-weight")); UCSCWiggleTrackWriter track_writer(argv[2], argv[3]); build_wiggles(argv[1], track_writer); diff --git a/makefile b/makefile index c9f37a8..e1a47f2 100644 --- a/makefile +++ b/makefile @@ -96,10 +96,10 @@ BamConverter.h : utils.h my_assert.h sam/sam.h sam/bam.h sam_rsem_aux.h sam_rsem rsem-tbam2gbam : utils.h Transcripts.h Transcript.h bc_aux.h BamConverter.h sam/sam.h sam/bam.h sam/libbam.a sam_rsem_aux.h sam_rsem_cvt.h tbam2gbam.cpp sam/libbam.a $(CC) -O3 -Wall tbam2gbam.cpp sam/libbam.a -lz -o $@ -rsem-bam2wig : wiggle.h wiggle.o sam/libbam.a bam2wig.cpp +rsem-bam2wig : utils.h my_assert.h wiggle.h wiggle.o sam/libbam.a bam2wig.cpp $(CC) -O3 -Wall bam2wig.cpp wiggle.o sam/libbam.a -lz -o $@ -rsem-bam2readdepth : wiggle.h wiggle.o sam/libbam.a bam2readdepth.cpp +rsem-bam2readdepth : utils.h my_assert.h wiggle.h wiggle.o sam/libbam.a bam2readdepth.cpp $(CC) -O3 -Wall bam2readdepth.cpp wiggle.o sam/libbam.a -lz -o $@ wiggle.o: sam/bam.h sam/sam.h wiggle.cpp wiggle.h diff --git a/my_assert.h b/my_assert.h index 02844e1..d63c3a2 100644 --- a/my_assert.h +++ b/my_assert.h @@ -9,29 +9,29 @@ #include #include -std::string itos(int i) { +inline std::string itos(int i) { std::ostringstream strout; strout< "Invalid number of arguments!", -exitval => 2, -verbose => 2) my ($fn, $dir, $suf) = fileparse($0); my $command = ""; +unless (-e "$ARGV[0].transcript.sorted.bam") { + $command = $dir."sam/samtools sort $ARGV[0].transcript.bam $ARGV[0].transcript.sorted"; + &runCommand($command); +} unless (-e "$ARGV[0].transcript.readdepth") { - $command = $dir."rsem-bam2readdepth $ARGV[0].transcript.sorted.bam > $ARGV[0].transcript.readdepth"; + $command = $dir."rsem-bam2readdepth $ARGV[0].transcript.sorted.bam $ARGV[0].transcript.readdepth"; &runCommand($command); } @@ -34,7 +38,7 @@ if ($show_unique) { &runCommand($command); } unless (-e "$ARGV[0].uniq.transcript.readdepth") { - $command = $dir."rsem-bam2readdepth $ARGV[0].uniq.transcript.sorted.bam > $ARGV[0].uniq.transcript.readdepth"; + $command = $dir."rsem-bam2readdepth $ARGV[0].uniq.transcript.sorted.bam $ARGV[0].uniq.transcript.readdepth"; &runCommand($command); } } diff --git a/utils.h b/utils.h index b15f154..137440e 100644 --- a/utils.h +++ b/utils.h @@ -23,7 +23,7 @@ const int RANGE = 201; const int OLEN = 25; // overlap length, number of bases must not be in poly(A) tails const int NBITS = 32; // use unsigned int, 32 bits per variable -bool verbose = true; // show detail intermediate outputs +static bool verbose = true; // show detail intermediate outputs inline bool isZero(double a) { return fabs(a) < 1e-8; } inline bool isLongZero(double a) { return fabs(a) < 1e-30; } @@ -124,7 +124,7 @@ inline std::string cleanStr(const std::string& str) { return (fr <= to ? str.substr(fr, to - fr + 1) : ""); } -void genReadFileNames(const char* readFN, int tagType, int read_type, int& s, char readFs[][STRLEN]){ +inline void genReadFileNames(const char* readFN, int tagType, int read_type, int& s, char readFs[][STRLEN]){ const char tags[3][STRLEN] = {"un", "alignable", "max"}; char suffix[STRLEN]; @@ -146,7 +146,7 @@ void genReadFileNames(const char* readFN, int tagType, int read_type, int& s, ch } } -void printTimeUsed(const time_t& a, const time_t& b, const char* program_name) { +inline void printTimeUsed(const time_t& a, const time_t& b, const char* program_name) { int hh = (b - a) / 3600; int mm = (b - a) % 3600 / 60; int ss = (b - a) % 60; diff --git a/wiggle.cpp b/wiggle.cpp index b0cb7d3..4e68b44 100644 --- a/wiggle.cpp +++ b/wiggle.cpp @@ -10,9 +10,18 @@ #include "utils.h" #include "wiggle.h" +bool no_fractional_weight = false; + void add_bam_record_to_wiggle(const bam1_t *b, Wiggle& wiggle) { - uint8_t *p_tag = bam_aux_get(b, "ZW"); - float w = (p_tag != NULL ? bam_aux2f(p_tag) : 1.0); + float w; + + if (no_fractional_weight) w = 1.0; + else { + uint8_t *p_tag = bam_aux_get(b, "ZW"); + if (p_tag == NULL) return; + w = bam_aux2f(p_tag); + } + int pos = b->core.pos; uint32_t *p = bam1_cigar(b); @@ -43,21 +52,21 @@ void build_wiggles(const std::string& bam_filename, bool *used = new bool[header->n_targets]; memset(used, 0, sizeof(bool) * header->n_targets); - int cur_tid = -1; //current tid; - HIT_INT_TYPE cnt = 0; - bam1_t *b = bam_init1(); - Wiggle wiggle; + int cur_tid = -1; //current tid; + HIT_INT_TYPE cnt = 0; + bam1_t *b = bam_init1(); + Wiggle wiggle; while (samread(bam_in, b) >= 0) { if (b->core.flag & 0x0004) continue; if (b->core.tid != cur_tid) { if (cur_tid >= 0) { used[cur_tid] = true; processor.process(wiggle); } cur_tid = b->core.tid; - wiggle.name = header->target_name[cur_tid]; - wiggle.length = header->target_len[cur_tid]; - wiggle.read_depth.assign(wiggle.length, 0.0); + wiggle.name = header->target_name[cur_tid]; + wiggle.length = header->target_len[cur_tid]; + wiggle.read_depth.assign(wiggle.length, 0.0); } - add_bam_record_to_wiggle(b, wiggle); + add_bam_record_to_wiggle(b, wiggle); ++cnt; if (cnt % 1000000 == 0) std::cout<< cnt<< std::endl; } diff --git a/wiggle.h b/wiggle.h index 1f37592..ecce0fc 100644 --- a/wiggle.h +++ b/wiggle.h @@ -1,8 +1,13 @@ +#ifndef WIGGLE_H_ +#define WIGGLE_H_ + #include #include #include #include +extern bool no_fractional_weight; // if no_frac_weight == true, each alignment counts as weight 1 + struct Wiggle { std::string name; std::vector read_depth; @@ -40,3 +45,5 @@ private: void build_wiggles(const std::string& bam_filename, WiggleProcessor& processor); + +#endif -- 2.39.2