b->core.mpos = b2->core.pos;
b2->core.mpos = b->core.pos;
}
+
+ /*
else {
// if only one mate can be aligned, mask it as unaligned and put an additional tag Z0:A:!
char exclamation = '!';
bam_aux_append(b2, "Z0", 'A', bam_aux_type2size('A'), (uint8_t*)&exclamation);
}
}
-
+ */
samwrite(out, b);
samwrite(out, b2);
Usage:
- rsem-bam2wig sorted_bam_input wig_output wiggle_name
+ rsem-bam2wig sorted_bam_input wig_output wiggle_name [--no-fractional-weight]
-sorted_bam_input: sorted bam file
-wig_output: output file name, e.g. output.wig
-wiggle_name: the name the user wants to use for this wiggle plot
+sorted_bam_input : Input BAM format file, must be sorted
+wig_output : Output wiggle file's name, e.g. output.wig
+wiggle_name : the name of this wiggle plot
+--no-fractional-weight : If this is set, RSEM will not look for "ZW" tag and each alignment appeared in the BAM file has weight 1. Set this if your BAM file is not generated by RSEM. Please note that this option must be at the end of the command line.
#### b) Loading a BAM and/or Wiggle file into the UCSC Genome Browser or Integrative Genomics Viewer(IGV)
+RSEM v1.1.21
+
+- Removed optional field "Z0:A:!" in the BAM outputs
+- Added --no-fractional-weight option to rsem-bam2wig, if the BAM file is not generated by RSEM, this option is recommended to be set
+- Fixed a bug for generating transcript level wiggle files using 'rsem-plot-transcript-wiggles'
+
+--------------------------------------------------------------------------------------------
+
RSEM v1.1.20
- Added an option to set the temporary folder name
-- Removed sample_name.sam.gz. Instead, RSEM uses samtools to convert bowtie outputed SAM file into a BAM file under the temporary folder
+- Removed sample_name.sam.gz. Instead, RSEM uses samtools to convert bowtie outputted SAM file into a BAM file under the temporary folder
- RSEM generated BAM files now contains all alignment lines produced by bowtie or user-specified aligners, including unalignable reads. Please note that for paired-end reads, if one mate has alignments but the other does not, RSEM will mark the alignable mate as "unmappable" (flag bit 0x4) and append an optional field "Z0:A:!"
--------------------------------------------------------------------------------------------
#include <cstdio>
#include <cstring>
#include <cstdlib>
-#include <iostream>
+#include <fstream>
+#include "my_assert.h"
#include "wiggle.h"
using namespace std;
int main(int argc, char* argv[]) {
- if (argc != 2) {
- printf("Usage: rsem-bam2readdepth sorted_bam_input\n");
+ if (argc != 3) {
+ printf("Usage: rsem-bam2readdepth sorted_bam_input readdepth_output\n");
exit(-1);
}
- ReadDepthWriter depth_writer(std::cout);
- build_wiggles(argv[1], depth_writer);
+ ofstream fout(argv[2]);
+ general_assert(fout.is_open(), "Cannot write to " + cstrtos(argv[2]) + "!");
- return 0;
+ ReadDepthWriter depth_writer(fout);
+
+ build_wiggles(argv[1], depth_writer);
+
+ fout.close();
+
+ return 0;
}
using namespace std;
+void printUsage() {
+ printf("Usage: rsem-bam2wig sorted_bam_input wig_output wiggle_name [--no-fractional-weight]\n");
+ printf("sorted_bam_input\t: Input BAM format file, must be sorted\n");
+ printf("wig_output\t\t: Output wiggle file's name, e.g. output.wig\n");
+ printf("wiggle_name\t\t: the name of this wiggle plot\n");
+ printf("--no-fractional-weight\t: If this is set, RSEM will not look for \"ZW\" tag and each alignment appeared in the BAM file has weight 1. Set this if your BAM file is not generated by RSEM. Please note that this option must be at the end of the command line.\n");
+ exit(-1);
+}
+
int main(int argc, char* argv[]) {
- if (argc != 4) {
- printf("Usage: rsem-bam2wig sorted_bam_input wig_output wiggle_name\n");
- exit(-1);
- }
+ if (argc < 4 || argc > 5) { printf("Number of arguments is not correct!\n"); printUsage(); }
+ if (argc == 5 && strcmp(argv[4], "--no-fractional-weight")) { printf("Cannot recognize option %s!\n", argv[4]); printUsage(); }
+ no_fractional_weight = (argc == 5 && !strcmp(argv[4], "--no-fractional-weight"));
UCSCWiggleTrackWriter track_writer(argv[2], argv[3]);
build_wiggles(argv[1], track_writer);
rsem-tbam2gbam : utils.h Transcripts.h Transcript.h bc_aux.h BamConverter.h sam/sam.h sam/bam.h sam/libbam.a sam_rsem_aux.h sam_rsem_cvt.h tbam2gbam.cpp sam/libbam.a
$(CC) -O3 -Wall tbam2gbam.cpp sam/libbam.a -lz -o $@
-rsem-bam2wig : wiggle.h wiggle.o sam/libbam.a bam2wig.cpp
+rsem-bam2wig : utils.h my_assert.h wiggle.h wiggle.o sam/libbam.a bam2wig.cpp
$(CC) -O3 -Wall bam2wig.cpp wiggle.o sam/libbam.a -lz -o $@
-rsem-bam2readdepth : wiggle.h wiggle.o sam/libbam.a bam2readdepth.cpp
+rsem-bam2readdepth : utils.h my_assert.h wiggle.h wiggle.o sam/libbam.a bam2readdepth.cpp
$(CC) -O3 -Wall bam2readdepth.cpp wiggle.o sam/libbam.a -lz -o $@
wiggle.o: sam/bam.h sam/sam.h wiggle.cpp wiggle.h
#include<sstream>
#include<iomanip>
-std::string itos(int i) {
+inline std::string itos(int i) {
std::ostringstream strout;
strout<<i;
return strout.str();
}
// n : number of significant digits
-std::string ftos(double f, int n) {
+inline std::string ftos(double f, int n) {
std::ostringstream strout;
strout<<std::setprecision(n)<<f;
return strout.str();
}
-std::string ctos(char c) {
+inline std::string ctos(char c) {
return std::string(1, c);
}
-std::string cstrtos(const char* s) {
+inline std::string cstrtos(const char* s) {
return std::string(s);
}
-void general_assert(int expr, const std::string& errmsg, bool putEnter = false) {
+inline void general_assert(int expr, const std::string& errmsg, bool putEnter = false) {
if (expr) return;
if (putEnter) printf("\n");
exit(-1);
}
-void pthread_assert(int rc, const std::string& func_name, const std::string& errmsg) {
+inline void pthread_assert(int rc, const std::string& func_name, const std::string& errmsg) {
if (rc == 0) return;
fprintf(stderr, "%s\n", errmsg.c_str());
my ($fn, $dir, $suf) = fileparse($0);
my $command = "";
+unless (-e "$ARGV[0].transcript.sorted.bam") {
+ $command = $dir."sam/samtools sort $ARGV[0].transcript.bam $ARGV[0].transcript.sorted";
+ &runCommand($command);
+}
unless (-e "$ARGV[0].transcript.readdepth") {
- $command = $dir."rsem-bam2readdepth $ARGV[0].transcript.sorted.bam > $ARGV[0].transcript.readdepth";
+ $command = $dir."rsem-bam2readdepth $ARGV[0].transcript.sorted.bam $ARGV[0].transcript.readdepth";
&runCommand($command);
}
&runCommand($command);
}
unless (-e "$ARGV[0].uniq.transcript.readdepth") {
- $command = $dir."rsem-bam2readdepth $ARGV[0].uniq.transcript.sorted.bam > $ARGV[0].uniq.transcript.readdepth";
+ $command = $dir."rsem-bam2readdepth $ARGV[0].uniq.transcript.sorted.bam $ARGV[0].uniq.transcript.readdepth";
&runCommand($command);
}
}
const int OLEN = 25; // overlap length, number of bases must not be in poly(A) tails
const int NBITS = 32; // use unsigned int, 32 bits per variable
-bool verbose = true; // show detail intermediate outputs
+static bool verbose = true; // show detail intermediate outputs
inline bool isZero(double a) { return fabs(a) < 1e-8; }
inline bool isLongZero(double a) { return fabs(a) < 1e-30; }
return (fr <= to ? str.substr(fr, to - fr + 1) : "");
}
-void genReadFileNames(const char* readFN, int tagType, int read_type, int& s, char readFs[][STRLEN]){
+inline void genReadFileNames(const char* readFN, int tagType, int read_type, int& s, char readFs[][STRLEN]){
const char tags[3][STRLEN] = {"un", "alignable", "max"};
char suffix[STRLEN];
}
}
-void printTimeUsed(const time_t& a, const time_t& b, const char* program_name) {
+inline void printTimeUsed(const time_t& a, const time_t& b, const char* program_name) {
int hh = (b - a) / 3600;
int mm = (b - a) % 3600 / 60;
int ss = (b - a) % 60;
#include "utils.h"
#include "wiggle.h"
+bool no_fractional_weight = false;
+
void add_bam_record_to_wiggle(const bam1_t *b, Wiggle& wiggle) {
- uint8_t *p_tag = bam_aux_get(b, "ZW");
- float w = (p_tag != NULL ? bam_aux2f(p_tag) : 1.0);
+ float w;
+
+ if (no_fractional_weight) w = 1.0;
+ else {
+ uint8_t *p_tag = bam_aux_get(b, "ZW");
+ if (p_tag == NULL) return;
+ w = bam_aux2f(p_tag);
+ }
+
int pos = b->core.pos;
uint32_t *p = bam1_cigar(b);
bool *used = new bool[header->n_targets];
memset(used, 0, sizeof(bool) * header->n_targets);
- int cur_tid = -1; //current tid;
- HIT_INT_TYPE cnt = 0;
- bam1_t *b = bam_init1();
- Wiggle wiggle;
+ int cur_tid = -1; //current tid;
+ HIT_INT_TYPE cnt = 0;
+ bam1_t *b = bam_init1();
+ Wiggle wiggle;
while (samread(bam_in, b) >= 0) {
if (b->core.flag & 0x0004) continue;
if (b->core.tid != cur_tid) {
if (cur_tid >= 0) { used[cur_tid] = true; processor.process(wiggle); }
cur_tid = b->core.tid;
- wiggle.name = header->target_name[cur_tid];
- wiggle.length = header->target_len[cur_tid];
- wiggle.read_depth.assign(wiggle.length, 0.0);
+ wiggle.name = header->target_name[cur_tid];
+ wiggle.length = header->target_len[cur_tid];
+ wiggle.read_depth.assign(wiggle.length, 0.0);
}
- add_bam_record_to_wiggle(b, wiggle);
+ add_bam_record_to_wiggle(b, wiggle);
++cnt;
if (cnt % 1000000 == 0) std::cout<< cnt<< std::endl;
}
+#ifndef WIGGLE_H_
+#define WIGGLE_H_
+
#include <cstdio>
#include <string>
#include <vector>
#include <ostream>
+extern bool no_fractional_weight; // if no_frac_weight == true, each alignment counts as weight 1
+
struct Wiggle {
std::string name;
std::vector<float> read_depth;
void build_wiggles(const std::string& bam_filename,
WiggleProcessor& processor);
+
+#endif