From 5fd66ada6c610981dcecd50e5e41436e0458b110 Mon Sep 17 00:00:00 2001
From: Bo Li <bli@cs.wisc.edu>
Date: Wed, 30 Nov 2011 21:14:06 -0600
Subject: [PATCH] add new files

---
 BamConverter.h               | 230 +++++++++++++++++++++++++++++++++++
 BamWriter.h                  |   2 +-
 EM.cpp                       |   2 +-
 bc_aux.h                     | 120 ++++++++++++++++++
 getUnique.cpp                |  72 +++++++++++
 makefile                     |   8 +-
 rsem-calculate-expression    |  26 ++--
 rsem-gen-transcript-plots    | 124 +++++++++++++++++++
 rsem-plot-transcript-wiggles | 119 ++++++++++++++++++
 rsem-prepare-reference       |   2 +-
 sam_rsem_aux.h               |  48 ++++++++
 sam_rsem_cvt.h               |  92 ++++++++++++++
 tbam2gbam.cpp                |  31 +++++
 wiggle.cpp                   |  35 ++++--
 wiggle.h                     |   1 +
 15 files changed, 886 insertions(+), 26 deletions(-)
 create mode 100644 BamConverter.h
 create mode 100644 bc_aux.h
 create mode 100644 getUnique.cpp
 create mode 100755 rsem-gen-transcript-plots
 create mode 100755 rsem-plot-transcript-wiggles
 create mode 100644 sam_rsem_aux.h
 create mode 100644 sam_rsem_cvt.h
 create mode 100644 tbam2gbam.cpp
diff --git a/BamConverter.h b/BamConverter.h
new file mode 100644
index 0000000..9b54308
--- /dev/null
+++ b/BamConverter.h
@@ -0,0 +1,230 @@
+#ifndef BAMCONVERTER_H_
+#define BAMCONVERTER_H_
+
+#include<cstdio>
+#include<cstring>
+#include<cassert>
+#include<string>
+#include<map>
+
+#include <stdint.h>
+#include "sam/bam.h"
+#include "sam/sam.h"
+#include "sam_rsem_aux.h"
+#include "sam_rsem_cvt.h"
+
+#include "utils.h"
+#include "bc_aux.h"
+#include "Transcript.h"
+#include "Transcripts.h"
+
+class BamConverter {
+public:
+	BamConverter(const char*, const char*, const char*, Transcripts&);
+	~BamConverter();
+
+	void process();
+private:
+	samfile_t *in, *out;
+	Transcripts& transcripts;
+
+	std::map<std::string, int> refmap;
+	std::map<std::string, int>::iterator iter;
+
+	CollapseMap collapseMap;
+
+	void convert(bam1_t*, const Transcript&);
+
+	void writeCollapsedLines();
+	void flipSeq(uint8_t*, int);
+	void flipQual(uint8_t*, int);
+	void addXSTag(bam1_t*, const Transcript&);
+};
+
+BamConverter::BamConverter(const char* inpF, const char* outF, const char* chr_list, Transcripts& transcripts)
+	: transcripts(transcripts)
+{
+	if (transcripts.getType() != 0)
+		exitWithError("Genome information is not provided! RSEM cannot convert the transcript bam file!");
+
+	in = samopen(inpF, "rb", NULL);
+	assert(in != 0);
+
+	bam_header_t *out_header = sam_header_read2(chr_list);
+	refmap.clear();
+	for (int i = 0; i < out_header->n_targets; i++) {
+		refmap[out_header->target_name[i]] = i;
+	}
+
+	append_header_text(out_header, in->header->text, in->header->l_text);
+
+	out = samopen(outF, "wb", out_header);
+	assert(out != 0);
+
+	bam_header_destroy(out_header);
+}
+
+BamConverter::~BamConverter() {
+	samclose(in);
+	samclose(out);
+}
+
+void BamConverter::process() {
+	bam1_t *b, *b2;
+	std::string cqname;
+	bool isPaired = false;
+
+	int cnt = 0;
+
+	cqname = "";
+	b = bam_init1(); b2 = bam_init1();
+
+	while (samread(in, b) >= 0) {
+		++cnt;
+		isPaired = (b->core.flag & 0x0001) > 0;
+		if (isPaired) {
+			assert(samread(in, b2) >= 0 && (b2->core.flag & 0x0001) && b->core.tid == b2->core.tid);
+			assert((b->core.flag & 0x0040) && (b2->core.flag & 0x0080)); // for collapsing
+			++cnt;
+		}
+
+		if (cnt % 1000000 == 0) { printf("."); fflush(stdout); }
+
+		// at least one segment is not properly mapped
+		if ((b->core.flag & 0x0004) || isPaired && (b2->core.flag & 0x0004)) continue;
+
+		const Transcript& transcript = transcripts.getTranscriptAt(b->core.tid + 1);
+
+		convert(b, transcript);
+		if (isPaired) {
+			convert(b2, transcript);
+			b->core.mpos = b2->core.pos;
+			b2->core.mpos = b->core.pos;
+		}
+
+		if (cqname != bam1_qname(b)) {
+			writeCollapsedLines();
+			cqname = bam1_qname(b);
+			collapseMap.init(isPaired);
+		}
+
+		collapseMap.insert(b, b2, bam_aux2f(bam_aux_get(b, "ZW")));
+	}
+
+	writeCollapsedLines();
+
+	bam_destroy1(b);
+	bam_destroy1(b2);
+
+	if (cnt >= 1000000) printf("\n");
+}
+
+void BamConverter::convert(bam1_t* b, const Transcript& transcript) {
+	int pos = b->core.pos;
+	int readlen = b->core.l_qseq;
+
+	if (readlen == 0) exitWithError("One alignment line has SEQ field as *. RSEM does not support this currently!");
+
+	iter = refmap.find(transcript.getSeqName());
+	assert(iter != refmap.end());
+	b->core.tid = iter->second;
+	if (b->core.flag & 0x0001) { b->core.mtid = b->core.tid; }
+	b->core.qual = 255; // set to not available temporarily
+
+	if (transcript.getStrand() == '-') {
+		b->core.flag ^= 0x0010;
+		if (b->core.flag & 0x0001) {
+			b->core.flag ^= 0x0020;
+			b->core.isize = -b->core.isize;
+		}
+		flipSeq(bam1_seq(b), readlen);
+		flipQual(bam1_qual(b), readlen);
+	}
+
+	std::vector<uint32_t> data;
+	data.clear();
+
+	int core_pos, core_n_cigar;
+	tr2chr(transcript, pos + 1, pos + readlen, core_pos, core_n_cigar, data);
+	assert(core_pos >= 0);
+
+	int rest_len = b->data_len - b->core.l_qname - b->core.n_cigar * 4;
+	b->data_len = b->core.l_qname + core_n_cigar * 4 + rest_len;
+	expand_data_size(b);
+	uint8_t* pt = b->data + b->core.l_qname;
+	memmove(pt + core_n_cigar * 4, pt + b->core.n_cigar * 4, rest_len);
+	for (int i = 0; i < core_n_cigar; i++) { memmove(pt, &data[i], 4); pt += 4; }
+
+	b->core.pos = core_pos;
+	b->core.n_cigar = core_n_cigar;
+	b->core.bin = bam_reg2bin(b->core.pos, bam_calend(&(b->core), bam1_cigar(b)));
+
+	addXSTag(b, transcript); // check if need to add XS tag, if need, add it
+}
+
+inline void BamConverter::writeCollapsedLines() {
+	bam1_t *tmp_b = NULL,*tmp_b2 = NULL;
+	float prb;
+	bool isPaired;
+
+	if (!collapseMap.empty(isPaired)) {
+		while (collapseMap.next(tmp_b, tmp_b2, prb)) {
+			memcpy(bam_aux_get(tmp_b, "ZW") + 1, (uint8_t*)&(prb), bam_aux_type2size('f'));
+			tmp_b->core.qual = getMAPQ(prb);
+			if (tmp_b->core.qual > 0) {
+				samwrite(out, tmp_b);
+				if (isPaired) {
+					memcpy(bam_aux_get(tmp_b2, "ZW") + 1, (uint8_t*)&(prb), bam_aux_type2size('f'));
+					tmp_b2->core.qual = tmp_b->core.qual;
+					samwrite(out, tmp_b2);
+				}
+			}
+			bam_destroy1(tmp_b);
+			if (isPaired) bam_destroy1(tmp_b2);
+
+		}
+	}
+}
+
+inline void BamConverter::flipSeq(uint8_t* s, int readlen) {
+	uint8_t code, base;
+	std::vector<uint8_t> seq;
+
+	code = 0; base = 0;
+	seq.clear();
+	for (int i = 0; i < readlen; i++) {
+		switch (bam1_seqi(s, readlen - i - 1)) {
+		case 1: base = 8; break;
+		case 2: base = 4; break;
+		case 4: base = 2; break;
+		case 8: base = 1; break;
+		case 15: base = 15; break;
+		default: assert(false);
+		}
+		code |=  base << (4 * (1 - i % 2));
+		if (i % 2 == 1) { seq.push_back(code); code = 0; }
+	}
+	if (readlen % 2 == 1) { seq.push_back(code); }
+
+	for (int i = 0; i < (int)seq.size(); i++) s[i] = seq[i];
+}
+
+inline void BamConverter::flipQual(uint8_t* q, int readlen) {
+	int32_t mid = readlen / 2;
+	uint8_t tmp;
+	for (int i = 0; i < mid; i++) {
+		tmp = q[i]; q[i] = q[readlen - i - 1]; q[readlen -i -1] = tmp;
+	}
+}
+
+inline void BamConverter::addXSTag(bam1_t* b, const Transcript& transcript) {
+	uint32_t* p = bam1_cigar(b);
+	bool hasN = false;
+	for (int i = 0; i < (int)b->core.n_cigar; i++)
+		if ((*(p + i) & BAM_CIGAR_MASK) == BAM_CREF_SKIP) { hasN = true; break; }
+	if (!hasN) return;
+	char strand = transcript.getStrand();
+	bam_aux_append(b, "XS", 'A', 1, (uint8_t*)&strand);
+}
+
+#endif /* BAMCONVERTER_H_ */
diff --git a/BamWriter.h b/BamWriter.h
index b0aeee0..ff11397 100644
--- a/BamWriter.h
+++ b/BamWriter.h
@@ -123,7 +123,7 @@ void BamWriter::work(HitWrapper<PairedEndHit> wrapper) {
 		cnt += 2;
 		if (verbose && cnt % 1000000 == 0) { printf("%d alignment lines are loaded!\n", cnt); }
 
-		if (!((b->core.flag & 0x0002) && (b2->core.flag & 0x0002))) continue;
+		if ((b->core.flag & 0x0004) || (b2->core.flag & 0x0004)) continue;
 
 		//swap if b is mate 2
 		if (b->core.flag & 0x0080) {
diff --git a/EM.cpp b/EM.cpp
index 2921ad3..8362f7d 100644
--- a/EM.cpp
+++ b/EM.cpp
@@ -340,7 +340,7 @@ void writeResults(ModelType& model, double* counts) {
 		fprintf(fo, "%.15g%c", tau[i], (i < M ? '\t' : '\n'));
 	for (int i = 1; i <= M; i++) {
 		const Transcript& transcript = transcripts.getTranscriptAt(i);
-		fprintf(fo, "%s%c", transcript.getLeft().c_str(), (i < M ? '\t' : '\n'));
+		fprintf(fo, "%s%c", transcript.getGeneID().c_str(), (i < M ? '\t' : '\n'));
 	}
 	fclose(fo);
 
diff --git a/bc_aux.h b/bc_aux.h
new file mode 100644
index 0000000..0527e7e
--- /dev/null
+++ b/bc_aux.h
@@ -0,0 +1,120 @@
+#ifndef BC_AUX_H_
+#define BC_AUX_H_
+
+#include<map>
+
+#include <stdint.h>
+#include "sam/bam.h"
+
+struct SingleEndT {
+	bam1_t *b;
+
+	SingleEndT(bam1_t *b) {
+		this->b = b;
+	}
+
+	int getSign(bool value) const { return value ? -1 : 1; }
+
+	int compare(const SingleEndT& o) const {
+		int strand1, strand2;
+		uint32_t *p1, *p2;
+
+		if (b->core.tid != o.b->core.tid) return getSign(b->core.tid < o.b->core.tid);
+		if (b->core.pos != o.b->core.pos) return getSign(b->core.pos < o.b->core.pos);
+		strand1 = b->core.flag & 0x0010; strand2 = o.b->core.flag & 0x0010;
+		if (strand1 != strand2) return getSign(strand1 < strand2);
+		if (b->core.n_cigar != o.b->core.n_cigar) return getSign(b->core.n_cigar < o.b->core.n_cigar);
+		p1 = bam1_cigar(b); p2 = bam1_cigar(o.b);
+		for (int i = 0; i < (int)b->core.n_cigar; i++) {
+			if (*p1 != *p2) return getSign(*p1 < *p2);
+			++p1; ++p2;
+		}
+
+		return 0;
+	}
+
+	bool operator< (const SingleEndT& o) const {
+		return compare(o) < 0;
+	}
+};
+
+struct PairedEndT {
+	SingleEndT mate1, mate2;
+
+	PairedEndT(const SingleEndT& mate1, const SingleEndT& mate2) : mate1(mate1), mate2(mate2) {
+	}
+
+	bool operator< (const PairedEndT& o) const {
+		int value = mate1.compare(o.mate1);
+		return value < 0 || value == 0 && mate2 < o.mate2;
+	}
+};
+
+class CollapseMap {
+public:
+	CollapseMap() { isPaired = false; smap.clear(); pmap.clear(); }
+
+	void init(bool isPaired) {
+		this->isPaired = isPaired;
+		isPaired ? pmap.clear() : smap.clear();
+	}
+
+	void insert(bam1_t *b, bam1_t *b2, float prb) {
+		if (!isPaired) {
+			smapIter = smap.find(SingleEndT(b));
+			if (smapIter == smap.end()) { smap[SingleEndT(bam_dup1(b))] = prb; }
+			else smapIter->second += prb;
+		}
+		else {
+			pmapIter = pmap.find(PairedEndT(SingleEndT(b), SingleEndT(b2)));
+			if (pmapIter == pmap.end()) { pmap[PairedEndT(SingleEndT(bam_dup1(b)), SingleEndT(bam_dup1(b2)))] = prb; }
+			else pmapIter->second += prb;
+		}
+	}
+
+	//once this function is called, "insert" cannot be called anymore
+	bool empty(bool& par) {
+		bool value;
+
+		par = isPaired;
+		if (!isPaired) { value = smap.empty(); smapIter = smap.begin(); }
+		else { value = pmap.empty(); pmapIter = pmap.begin(); }
+
+		return value;
+	}
+
+	bool next(bam1_t*& b, bam1_t*& b2, float& prb) {
+		bool value;
+
+		if (!isPaired) {
+			value = smapIter != smap.end();
+			if (value) {
+				b = smapIter->first.b;
+				prb = smapIter->second;
+				smapIter++;
+			}
+		}
+		else {
+			value = pmapIter != pmap.end();
+			if (value) {
+				b = pmapIter->first.mate1.b;
+				b2 = pmapIter->first.mate2.b;
+				prb = pmapIter->second;
+				pmapIter++;
+			}
+		}
+
+		return value;
+	}
+
+private:
+	bool isPaired;
+
+	std::map<SingleEndT, float> smap;
+	std::map<SingleEndT, float>::iterator smapIter;
+
+	std::map<PairedEndT, float> pmap;
+	std::map<PairedEndT, float>::iterator pmapIter;
+};
+
+#endif /* BC_AUX_H_ */
diff --git a/getUnique.cpp b/getUnique.cpp
new file mode 100644
index 0000000..8e2ba4e
--- /dev/null
+++ b/getUnique.cpp
@@ -0,0 +1,72 @@
+#include<cstdio>
+#include<cstring>
+#include<cstdlib>
+#include<cassert>
+#include<string>
+#include<vector>
+
+#include <stdint.h>
+#include "sam/bam.h"
+#include "sam/sam.h"
+
+using namespace std;
+
+string cqname;
+samfile_t *in, *out;
+bam1_t *b;
+vector<bam1_t*> arr;
+bool unaligned;
+
+void output() {
+	if (unaligned || arr.size() == 0) return;
+	bool isPaired = (arr[0]->core.flag & 0x0001);
+	if (isPaired && arr.size() != 2 || !isPaired && arr.size() != 1) return;
+	for (int i = 0; i < (int)arr.size(); i++) samwrite(out, arr[i]);
+}
+
+int main(int argc, char* argv[]) {
+	if (argc != 3) {
+		printf("Usage: rsem-get-unique unsorted_transcript_bam_input bam_output\n");
+		exit(-1);
+	}
+
+	in = samopen(argv[1], "rb", NULL);
+	assert(in != 0);
+	out = samopen(argv[2], "wb", in->header);
+	assert(out != 0);
+
+	int cnt = 0;
+
+	cqname = "";
+	arr.clear();
+	b = bam_init1();
+	unaligned = false;
+
+	while (samread(in, b) >= 0) {
+		if (cqname != bam1_qname(b)) {
+			output();
+			cqname = bam1_qname(b);
+			for (int i = 0; i < (int)arr.size(); i++) bam_destroy1(arr[i]);
+			arr.clear();
+			unaligned = false;
+		}
+
+		unaligned = unaligned || (b->core.flag & 0x0004);
+		arr.push_back(bam_dup1(b));
+
+		++cnt;
+		if (cnt % 1000000 == 0) { printf("."); fflush(stdout); }
+	}
+
+	if (cnt >= 1000000) printf("\n");
+
+	output();
+
+	bam_destroy1(b);
+	samclose(in);
+	samclose(out);
+
+	printf("done!\n");
+
+	return 0;
+}
diff --git a/makefile b/makefile
index e8b1318..6b11536 100644
--- a/makefile
+++ b/makefile
@@ -1,8 +1,7 @@
 CC = g++
-#LFLAGS = -Wall -O3 -ffast-math
 CFLAGS = -Wall -c -I.
 COFLAGS = -Wall -O3 -ffast-math -c -I.
-PROGRAMS = rsem-tbam2gbam rsem-bam2wig rsem-bam2readdepth rsem-build-read-index rsem-run-em rsem-extract-reference-transcripts rsem-synthesis-reference-transcripts rsem-parse-alignments rsem-preref rsem-simulate-reads rsem-run-gibbs rsem-calculate-credibility-intervals
+PROGRAMS = rsem-extract-reference-transcripts rsem-synthesis-reference-transcripts rsem-preref rsem-parse-alignments rsem-build-read-index rsem-run-em rsem-tbam2gbam rsem-run-gibbs rsem-calculate-credibility-intervals rsem-simulate-reads rsem-bam2wig rsem-get-unique rsem-bam2readdepth  
 
 
 all : build-sam $(PROGRAMS)
@@ -94,7 +93,7 @@ bc_aux.h : sam/bam.h
 
 BamConverter.h : utils.h sam/sam.h sam/bam.h sam_rsem_aux.h sam_rsem_cvt.h bc_aux.h Transcript.h Transcripts.h
 
-rsem-tbam2gbam : utils.h Transcripts.h Transcript.h bc_aux.h BamConverter.h sam/sam.h sam/bam.h sam/libbam.a sam_rsem_aux.h sam_rsem_cvt.h tbam2gbam.cpp
+rsem-tbam2gbam : utils.h Transcripts.h Transcript.h bc_aux.h BamConverter.h sam/sam.h sam/bam.h sam/libbam.a sam_rsem_aux.h sam_rsem_cvt.h tbam2gbam.cpp sam/libbam.a
 	$(CC) -O3 -Wall tbam2gbam.cpp sam/libbam.a -lz -o $@
 
 rsem-bam2wig : wiggle.h wiggle.o sam/libbam.a bam2wig.cpp
@@ -126,6 +125,9 @@ rsem-calculate-credibility-intervals : calcCI.o
 calcCI.o : utils.h Model.h SingleModel.h SingleQModel.h PairedEndModel.h PairedEndQModel.h RefSeq.h RefSeqPolicy.h PolyARules.h Refs.h GroupInfo.h calcCI.cpp boost/random.hpp
 	$(CC) $(COFLAGS) calcCI.cpp
 
+rsem-get-unique : sam/bam.h sam/sam.h getUnique.cpp sam/libbam.a
+	$(CC) -O3 -Wall getUnique.cpp sam/libbam.a -lz -o $@
+	
 clean:
 	rm -f *.o *~ $(PROGRAMS)
 	cd sam ; ${MAKE} clean
diff --git a/rsem-calculate-expression b/rsem-calculate-expression
index 6f9706e..774065a 100755
--- a/rsem-calculate-expression
+++ b/rsem-calculate-expression
@@ -334,7 +334,7 @@ sub runCommand {
     if ($status != 0) { 
 	my $errmsg;
 	if (scalar(@_) > 1) { $errmsg = $_[1]; }
-	else { $errmsg = "$command failed! Plase check if you provide correct parameters/options for the pipeline!"; }
+	else { $errmsg = "\"$command\" failed! Plase check if you provide correct parameters/options for the pipeline!"; }
 	print $errmsg."\n";
 	exit(-1);
     }
@@ -345,7 +345,7 @@ sub runCommand {
 sub collectResults {
     my $local_status;
     my ($inpF, $outF);
-    my (@results, @comment) = ();
+    my (@results, @ids) = ();
     my $line;
     my $cnt;
 
@@ -362,11 +362,11 @@ sub collectResults {
 	++$cnt;
 	chomp($line);
 	my @local_arr = split(/\t/, $line);
-	if ($cnt == 4) { @comment = @local_arr; }
+	if ($cnt == 4) { @ids = @local_arr; }
 	else { push(@results, \@local_arr); }
     }
     
-    push(@results, \@comment);
+    push(@results, \@ids);
     close(INPUT);
 
     $local_status = open(OUTPUT, ">$outF");
@@ -589,20 +589,20 @@ estimation. pmc stands for posterior mean counts. ci_lower_bound(l)
 means the lower bound of the credibility intervals, ci_upper_bound(u)
 means the upper bound of the credibility intervals. So the credibility
 interval is [l, u]. 'transcript_id_list' is a space-separated list of
-transcript_ids belonging to the gene.
+transcript_ids belonging to the gene. If no gene information is
+provided, this file has the same content as
+'sample_name.isoforms.results'.
 
 =item B<sample_name.isoforms.results> 
 
 File containing isoform level expression values. The format of each
 line in this file is:
 
-transcript_id expected_counts tau_value [pmc_value tau_pme_value tau_ci_lower_bound tau_ci_upper_bound] other_attributes
+transcript_id expected_counts tau_value [pmc_value tau_pme_value tau_ci_lower_bound tau_ci_upper_bound] gene_id
 
-Fields are separated by the tab character. 'other_attributes' are all
-other attributes after attribute 'transcript_id' field in the GTF
-file. If no other attributes are given or no GTF file is provided in
-'rsem-prepare-reference', there will be no tab after the
-tau_value field.
+Fields are separated by the tab character. 'gene_id' is the gene_id of
+the gene which this transcript belongs to. If no gene information is
+provided, 'gene_id' and 'transcript_id' are the same.
 
 =item B<sample_name.transcript.bam, sample_name.transcript.sorted.bam and sample_name.transcript.sorted.bam.bai>
 
@@ -630,7 +630,9 @@ of each alignment is set to min(100, floor(-10 * log10(1.0 - w) +
 0.5)), where w is the posterior probability of that alignment being
 the true mapping of a read.  In addition, RSEM pads a new tag
 ZW:f:value, where value is a single precision floating number
-representing the posterior probability.
+representing the posterior probability. If an alignment is spliced, a
+XS:A:value tag is also added, where value is either '+' or '-'
+indicating the strand of the transcript it aligns to.
 
 'sample_name.genome.sorted.bam' and 'sample_name.genome.sorted.bam.bai' are the
 sorted BAM file and indices generated by samtools (included in RSEM package).
diff --git a/rsem-gen-transcript-plots b/rsem-gen-transcript-plots
new file mode 100755
index 0000000..6f7ae10
--- /dev/null
+++ b/rsem-gen-transcript-plots
@@ -0,0 +1,124 @@
+#!/usr/bin/env Rscript
+
+nrow_per_page <- 3 # if input_list is composed of transcript ids
+ncol_per_page <- 2 # if input_list is composed of transcript ids
+num_plots_per_page <- nrow_per_page * ncol_per_page # if input_list is composed of transcript ids
+
+
+exit_with_error <- function(errmsg) {
+  cat(errmsg, "\n", sep = "", file = stderr())
+  quit(save = "no", status = 1)
+}
+
+
+args <- commandArgs(TRUE)
+if (length(args) != 5) 
+  exit_with_error("Usage: rsem-gen-transcript-plots sample_name input_list is_gene show_uniq output_plot_file")
+
+sample_name <- args[1]
+input_list <- args[2]
+is_gene <- args[3]
+show_uniq <- args[4]
+output_plot_file <- args[5]
+
+
+
+load_readdepth_file <- function(filename) {
+  data <- read.table(file = filename, sep = "\t", stringsAsFactors = FALSE)
+  nrow <- dim(data)[1]
+  readdepth <- list()
+  for (i in 1:nrow) {
+    readdepth[[data[i, 1]]] <- data[i, c(2, 3)]
+  }
+  readdepth
+}
+
+build_t2gmap <- function(filename) {
+  data <- read.table(file = filename, sep = "\t", stringsAsFactors = FALSE)
+  t2gmap <- list()
+
+  nrow <- dim(data)[1]
+  ncol <- dim(data)[2]
+
+  gene_id <- ""
+  tids <- c()
+  for (i in 1:nrow) {
+    if (gene_id != data[i, ncol]) {
+      if (gene_id != "") { 
+        t2gmap[[gene_id]] <- tids
+      }
+      gene_id <- data[i, ncol]
+      tids <- c()
+    }
+    tids <- c(tids, data[i, 1])
+  }
+  if (gene_id != "") t2gmap[[gene_id]] <- tids
+  
+  t2gmap
+}
+
+generate_a_page <- function(tids, gene_id = NULL) {
+  n <- length(tids)
+  ncol <- ifelse(is_gene, floor(sqrt(n)), ncol_per_page)
+  nrow <- ifelse(is_gene, ceiling(n / ncol), nrow_per_page)
+
+  par(mfrow = c(nrow, ncol), mar = c(1, 1, 2, 1))
+  if (is_gene) par(oma = c(0, 0, 3, 0)) 
+
+  for (i in 1:n) {
+    vec <- readdepth[[tids[i]]]
+    if (is.null(vec)) exit_with_error(paste("Cannot find transcript", tids[i], sep = ""))
+    wiggle <- ifelse(vec[2] == "NA", rep(0, vec[1]), as.numeric(unlist(strsplit(vec[2], split = " "))))
+    len <- length(wiggle)
+    if (!show_uniq) {
+      plot(wiggle, type = 'h')
+    }
+    else {
+      vec <- readdepth_uniq[[tids[i]]]
+      stopifnot(!is.null(vec))
+      wiggle_uniq <- ifelse(vec[2] == "NA", rep(0, vec[1]), as.numeric(unlist(strsplit(vec[2], split = " "))))
+      stopifnot(len == length(wiggle_uniq), len == sum(wiggle >= wiggle_uniq))
+      heights <- rbind(wiggle_uniq, wiggle - wiggle_uniq)	
+      barplot(heights, space = 0, border = NA, names.arg = 1:len) 
+    }
+    title(main = tids[i], xlab = "Position in transcript", ylab = "Read depth")
+  }
+
+  if (is_gene) mtext(gene_id, outer = TRUE, line = 1)
+}
+
+readdepth <- load_readdepth_file(paste(sample_name, ".transcript.readdepth", sep = ""))
+
+if (show_uniq) {
+  readdepth_uniq <- load_readdepth_file(paste(sample_name, ".uniq.transcript.readdepth", sep = ""))
+}
+
+ids <- scan(file = input_list, what = "", sep = "\n")
+
+if (is_gene) {
+  t2gmap <- build_t2gmap(paste(sample_name, ".isoforms.results", sep = ""))
+}
+
+pdf(output_plot_file)
+
+if (!is_gene) {	
+  n <- length(ids)
+  ub <- (n - 1) %/% num_plots_per_page + 1
+  for (i in 1:ub) {
+    fr <- (i - 1) * num_plots_per_page + 1
+    to <- min(i * num_plots_per_page, n)
+    generate_a_page(ids[fr:to])
+  }
+}
+else {
+  for (gene_id in ids) {
+    if (is.null(t2gmap[[gene_id]])) exit_with_error(paste("Cannot find gene", gene_id, sep = ""))
+    generate_a_page(t2gmap[[gene_id]], gene_id)
+  }
+}
+
+dev.off.output <- dev.off()
+
+
+
+
diff --git a/rsem-plot-transcript-wiggles b/rsem-plot-transcript-wiggles
new file mode 100755
index 0000000..9180cb7
--- /dev/null
+++ b/rsem-plot-transcript-wiggles
@@ -0,0 +1,119 @@
+#!/usr/bin/perl
+
+use Getopt::Long;
+use Pod::Usage;
+use File::Basename;
+use strict;
+
+my $gene_list = 0; # default is 0, means input is a transcript list; 1 means input is a gene list
+my $show_unique = 0; # 0, default value, means do not show unique transcript wiggles; 1 means show unique transcript wiggles
+my $help = 0;
+
+GetOptions("gene-list" => \$gene_list,
+	   "show-unique" => \$show_unique,
+	   "h|help" => \$help) or pod2usage(-exitval => 2, -verbose => 2);
+
+pod2usage(-verbose => 2) if ($help == 1);
+pod2usage(-msg => "Invalid number of arguments!", -exitval => 2, -verbose => 2) if (scalar(@ARGV) != 3);
+
+my ($fn, $dir, $suf) = fileparse($0);
+my $command = "";
+
+unless (-e "$ARGV[0].transcript.readdepth") {
+    $command = $dir."rsem-bam2readdepth $ARGV[0].transcript.sorted.bam > $ARGV[0].transcript.readdepth";
+    &runCommand($command);
+}
+
+if ($show_unique) {
+    unless (-e "$ARGV[0].uniq.transcript.bam") {
+	$command = $dir."rsem-get-unique $ARGV[0].transcript.bam $ARGV[0].uniq.transcript.bam";
+	&runCommand($command);
+    }
+    unless (-e "$ARGV[0].uniq.transcript.sorted.bam") {
+	$command = $dir."sam/samtools sort $ARGV[0].uniq.transcript.bam $ARGV[0].uniq.transcript.sorted";
+	&runCommand($command);
+    }
+    unless (-e "$ARGV[0].uniq.transcript.readdepth") {
+	$command = $dir."rsem-bam2readdepth $ARGV[0].uniq.transcript.sorted.bam > $ARGV[0].uniq.transcript.readdepth";
+	&runCommand($command);
+    }
+}
+
+$command = $dir."rsem-gen-transcript-plots $ARGV[0] $ARGV[1] $gene_list $show_unique $ARGV[2]";
+&runCommand($command);
+
+# command, {err_msg}
+sub runCommand {
+    print $_[0]."\n";
+    my $status = system($_[0]);
+    if ($status != 0) { 
+	my $errmsg;
+	if (scalar(@_) > 1) { $errmsg = $_[1]; }
+	else { $errmsg = "\"$command\" failed! Plase check if you provide correct parameters/options for the pipeline!"; }
+	print $errmsg."\n";
+	exit(-1);
+    }
+    print "\n";
+}
+
+__END__
+
+=head1 NAME
+
+rsem-plot-transcript-wiggles
+
+=head1 SYNOPSIS
+
+=over
+
+ rsem-plot-transcript-wiggles [options] sample_name input_list output_plot_file
+
+=back
+
+=head1 ARGUMENTS
+
+=over
+
+=item B<sample_name>
+
+blablabla
+
+=item B<input_list>
+
+blablabla
+
+=item B<output_plot_file>
+
+blablabla
+
+=back
+
+=head1 OPTIONS
+
+=over
+
+=item B<--gene-list>
+
+blablabla
+
+=item B<--show-unique>
+
+blablabla
+
+=item B<-h/--help>
+
+Show help information.
+
+=back
+
+=head1 DESCRIPTION
+
+blablabla
+
+=head1 OUTPUT
+
+blablabla
+
+=head1 EXAMPLES
+
+=cut
diff --git a/rsem-prepare-reference b/rsem-prepare-reference
index 6eb6b60..c4527be 100755
--- a/rsem-prepare-reference
+++ b/rsem-prepare-reference
@@ -100,7 +100,7 @@ sub runCommand {
     if ($status != 0) { 
 	my $errmsg;
 	if (scalar(@_) > 1) { $errmsg = $_[1]; }
-	else { $errmsg = "$command failed! Plase check if you provide correct parameters/options for the pipeline!"; }
+	else { $errmsg = "\"$command\" failed! Plase check if you provide correct parameters/options for the pipeline!"; }
 	print $errmsg."\n";
 	exit(-1);
     }
diff --git a/sam_rsem_aux.h b/sam_rsem_aux.h
new file mode 100644
index 0000000..5b4f9cd
--- /dev/null
+++ b/sam_rsem_aux.h
@@ -0,0 +1,48 @@
+#ifndef SAM_RSEM_AUX_H_
+#define SAM_RSEM_AUX_H_
+
+#include<cstdio>
+#include<cstring>
+#include<stdint.h>
+
+#include "sam/bam.h"
+
+// dwt: duplicate without text
+bam_header_t *bam_header_dwt(const bam_header_t *ori_h)
+{
+	bam_header_t *h;
+
+	h = bam_header_init();
+	h->n_targets = ori_h->n_targets;
+	h->target_len = (uint32_t*)calloc(h->n_targets, 4);
+	h->target_name = (char**)calloc(h->n_targets, sizeof(char*));
+	for (int i = 0; i < h->n_targets; i++) {
+		h->target_len[i] = ori_h->target_len[i];
+		h->target_name[i] = strdup(ori_h->target_name[i]);
+	}
+
+	return h;
+}
+
+void append_header_text(bam_header_t *header, const char* text, int len)
+{
+	int x = header->l_text + 1;
+	int y = header->l_text + len + 1; // 1 byte null
+	if (text == 0) return;
+	kroundup32(x);
+	kroundup32(y);
+	if (x < y) header->text = (char*)realloc(header->text, y);
+	strncpy(header->text + header->l_text, text, len); // we cannot use strcpy() here.
+	header->l_text += len;
+	header->text[header->l_text] = 0;
+}
+
+void expand_data_size(bam1_t *b) {
+	if (b->m_data < b->data_len) {
+		b->m_data = b->data_len;
+		kroundup32(b->m_data);
+		b->data = (uint8_t*)realloc(b->data, b->m_data);
+	}
+}
+
+#endif /* SAM_RSEM_AUX_H_ */
diff --git a/sam_rsem_cvt.h b/sam_rsem_cvt.h
new file mode 100644
index 0000000..4347ef8
--- /dev/null
+++ b/sam_rsem_cvt.h
@@ -0,0 +1,92 @@
+#ifndef SAM_RSEM_CVT_H_
+#define SAM_RSEM_CVT_H_
+
+#include<vector>
+
+#include "stdint.h"
+#include "sam/bam.h"
+
+#include "Transcript.h"
+#include "Transcripts.h"
+
+uint8_t getMAPQ(double val) {
+	double err = 1.0 - val;
+	if (err <= 1e-10) return 100;
+	return (uint8_t)(-10 * log10(err) + .5); // round it
+}
+
+//convert transcript coordinate to chromosome coordinate and generate CIGAR string
+void tr2chr(const Transcript& transcript, int sp, int ep, int& pos, int& n_cigar, std::vector<uint32_t>& data) {
+	int length = transcript.getLength();
+	char strand = transcript.getStrand();
+	const std::vector<Interval>& structure = transcript.getStructure();
+
+	int s, i;
+	int oldlen, curlen;
+
+	uint32_t operation;
+
+	n_cigar = 0;
+	s = structure.size();
+
+	if (strand == '-') {
+		int tmp = sp;
+		sp = length - ep + 1;
+		ep = length - tmp + 1;
+	}
+
+	if (ep < 1 || sp > length) { // a read which align to polyA tails totally!
+	  pos = (sp > length ? structure[s - 1].end : structure[0].start - 1); // 0 based
+
+	  n_cigar = 1;
+	  operation = (ep - sp + 1) << BAM_CIGAR_SHIFT | BAM_CINS; //BAM_CSOFT_CLIP;
+	  data.push_back(operation);
+
+	  return;
+	}
+
+	if (sp < 1) {
+		n_cigar++;
+		operation = (1 - sp) << BAM_CIGAR_SHIFT | BAM_CINS; //BAM_CSOFT_CLIP;
+		data.push_back(operation);
+		sp = 1;
+	}
+
+	oldlen = curlen = 0;
+
+	for (i = 0; i < s; i++) {
+		oldlen = curlen;
+		curlen += structure[i].end - structure[i].start + 1;
+		if (curlen >= sp) break;
+	}
+	assert(i < s);
+	pos = structure[i].start + (sp - oldlen - 1) - 1; // 0 based
+
+	while (curlen < ep && i < s) {
+		n_cigar++;
+		operation = (curlen - sp + 1) << BAM_CIGAR_SHIFT | BAM_CMATCH;
+		data.push_back(operation);
+		++i;
+		if (i >= s) continue;
+		n_cigar++;
+		operation = (structure[i].start - structure[i - 1].end - 1) << BAM_CIGAR_SHIFT | BAM_CREF_SKIP;
+		data.push_back(operation);
+
+		oldlen = curlen;
+		sp = oldlen + 1;
+		curlen += structure[i].end - structure[i].start + 1;
+	}
+
+	if (i >= s) {
+		n_cigar++;
+		operation = (ep - length) << BAM_CIGAR_SHIFT | BAM_CINS; //BAM_CSOFT_CLIP;
+		data.push_back(operation);
+	}
+	else {
+		n_cigar++;
+		operation = (ep - sp + 1) << BAM_CIGAR_SHIFT | BAM_CMATCH;
+		data.push_back(operation);
+	}
+}
+
+#endif /* SAM_RSEM_CVT_H_ */
diff --git a/tbam2gbam.cpp b/tbam2gbam.cpp
new file mode 100644
index 0000000..18688d0
--- /dev/null
+++ b/tbam2gbam.cpp
@@ -0,0 +1,31 @@
+#include<cstdio>
+#include<cstring>
+#include<cstdlib>
+
+#include "utils.h"
+#include "Transcripts.h"
+#include "BamConverter.h"
+
+using namespace std;
+
+char tiF[STRLEN], chr_list[STRLEN];
+Transcripts transcripts;
+
+int main(int argc, char* argv[]) {
+	if (argc != 4) {
+		printf("Usage: rsem-tbam2gbam reference_name unsorted_transcript_bam_input genome_bam_output\n");
+		exit(-1);
+	}
+
+	sprintf(tiF, "%s.ti", argv[1]);
+	sprintf(chr_list, "%s.chrlist", argv[1]);
+	transcripts.readFrom(tiF);
+
+	printf("Start converting:\n");
+	BamConverter bc(argv[2], argv[3], chr_list, transcripts);
+	bc.process();
+	printf("Genome bam file is generated!\n");
+
+	return 0;
+}
+
diff --git a/wiggle.cpp b/wiggle.cpp
index 19f52b4..65a3d4a 100644
--- a/wiggle.cpp
+++ b/wiggle.cpp
@@ -36,7 +36,10 @@ void build_wiggles(const std::string& bam_filename,
                    WiggleProcessor& processor) {
     samfile_t *bam_in = samopen(bam_filename.c_str(), "rb", NULL);
 	if (bam_in == 0) { fprintf(stderr, "Cannot open %s!\n", bam_filename.c_str()); exit(-1); }
-	//assert(bam_in != 0);
+
+	bam_header_t *header = bam_in->header;
+	bool *used = new bool[header->n_targets];
+	memset(used, 0, sizeof(bool) * header->n_targets);
 
     int cur_tid = -1; //current tid;
 	int cnt = 0;
@@ -46,19 +49,29 @@ void build_wiggles(const std::string& bam_filename,
 		if (b->core.flag & 0x0004) continue;
 
 		if (b->core.tid != cur_tid) {
-			if (cur_tid >= 0) processor.process(wiggle);
+			if (cur_tid >= 0) { used[cur_tid] = true; processor.process(wiggle); }
 			cur_tid = b->core.tid;
-            wiggle.name = bam_in->header->target_name[cur_tid];
-            wiggle.read_depth.assign(bam_in->header->target_len[cur_tid], 0.0);
+            wiggle.name = header->target_name[cur_tid];
+            wiggle.length = header->target_len[cur_tid];
+            wiggle.read_depth.assign(wiggle.length, 0.0);
 		}
         add_bam_record_to_wiggle(b, wiggle);
 		++cnt;
 		if (cnt % 1000000 == 0) fprintf(stderr, "%d FIN\n", cnt);
 	}
-	if (cur_tid >= 0) processor.process(wiggle);
+	if (cur_tid >= 0) { used[cur_tid] = true; processor.process(wiggle); }
+
+	for (int32_t i = 0; i < header->n_targets; i++)
+		if (!used[i]) {
+			wiggle.name = header->target_name[i];
+			wiggle.length = header->target_len[i];
+			wiggle.read_depth.clear();
+			processor.process(wiggle);
+		}
 
 	samclose(bam_in);
 	bam_destroy1(b);
+	delete[] used;
 }
 
 UCSCWiggleTrackWriter::UCSCWiggleTrackWriter(const std::string& output_filename,
@@ -75,9 +88,11 @@ UCSCWiggleTrackWriter::~UCSCWiggleTrackWriter() {
 
 void UCSCWiggleTrackWriter::process(const Wiggle& wiggle) {
     int sp, ep;
+
+    if (wiggle.read_depth.empty()) return;
     
     sp = ep = -1;
-    for (size_t i = 0; i < wiggle.read_depth.size(); i++) {
+    for (size_t i = 0; i < wiggle.length; i++) {
         if (wiggle.read_depth[i] > 0) {
             ep = i;
         }
@@ -102,9 +117,13 @@ ReadDepthWriter::ReadDepthWriter(std::ostream& stream)
 }
 
 void ReadDepthWriter::process(const Wiggle& wiggle) {
+
     stream_ << wiggle.name << '\t'
-            << wiggle.read_depth.size() << '\t';
-    for (size_t i = 0; i < wiggle.read_depth.size(); ++i) {
+            << wiggle.length << '\t';
+
+    if (wiggle.read_depth.empty()) { stream_ << "NA\n"; return; }
+
+    for (size_t i = 0; i < wiggle.length; ++i) {
         if (i > 0) stream_ << ' ';
         stream_ << wiggle.read_depth[i];
     }
diff --git a/wiggle.h b/wiggle.h
index 09cc4f9..1f37592 100644
--- a/wiggle.h
+++ b/wiggle.h
@@ -6,6 +6,7 @@
 struct Wiggle {
     std::string name;
     std::vector<float> read_depth;
+    size_t length;
 };
 
 class WiggleProcessor {
-- 
2.39.2