From 4f7502168c3816ba3283385f093e599527e2b144 Mon Sep 17 00:00:00 2001
From: Bo Li <bli@cs.wisc.edu>
Date: Fri, 21 Mar 2014 11:27:31 -0500
Subject: [PATCH] Added support for allele-specific expression estimation

---
 EM.cpp                       | 192 +----------
 Gibbs.cpp                    | 173 ++--------
 README.md                    |   2 +-
 Transcript.h                 |   6 +-
 Transcripts.h                |  12 +-
 WHAT_IS_NEW                  |   7 +
 WriteResults.h               | 624 +++++++++++++++++++++++++++++++++++
 calcCI.cpp                   | 189 +++++++----
 makefile                     |  12 +-
 rsem-calculate-expression    | 121 +++++--
 rsem-gen-transcript-plots    |  84 ++---
 rsem-plot-model              |   2 +-
 rsem-plot-transcript-wiggles |  32 +-
 rsem-prepare-reference       |  19 ++
 rsem_perl_utils.pm           |   6 +-
 simulation.cpp               | 150 +--------
 synthesisRef.cpp             | 134 +++++---
 17 files changed, 1100 insertions(+), 665 deletions(-)
 create mode 100644 WriteResults.h
diff --git a/EM.cpp b/EM.cpp
index 59783f3..bf15c2b 100644
--- a/EM.cpp
+++ b/EM.cpp
@@ -44,6 +44,8 @@
 #include "HitWrapper.h"
 #include "BamWriter.h"
 
+#include "WriteResults.h"
+
 using namespace std;
 
 const double STOP_CRITERIA = 0.001;
@@ -68,7 +70,7 @@ bool genGibbsOut; // generate file for Gibbs sampler
 
 char refName[STRLEN], outName[STRLEN];
 char imdName[STRLEN], statName[STRLEN];
-char refF[STRLEN], groupF[STRLEN], cntF[STRLEN], tiF[STRLEN];
+char refF[STRLEN], cntF[STRLEN], tiF[STRLEN];
 char mparamsF[STRLEN];
 char modelF[STRLEN], thetaF[STRLEN];
 
@@ -83,7 +85,6 @@ vector<double> theta, eel; // eel : expected effective length
 double *probv, **countvs;
 
 Refs refs;
-GroupInfo gi;
 Transcripts transcripts;
 
 ModelParams mparams;
@@ -271,185 +272,11 @@ void* calcConProbs(void* arg) {
 	return NULL;
 }
 
-template<class ModelType>
-void calcExpectedEffectiveLengths(ModelType& model) {
-	int lb, ub, span;
-	double *pdf = NULL, *cdf = NULL, *clen = NULL; // clen[i] = sigma_{j=1}^{i}pdf[i]*(lb+i)
-  
-	model.getGLD().copyTo(pdf, cdf, lb, ub, span);
-	clen = new double[span + 1];
-	clen[0] = 0.0;
-	for (int i = 1; i <= span; i++) {
-		clen[i] = clen[i - 1] + pdf[i] * (lb + i);
-	}
-
-	eel.assign(M + 1, 0.0);
-	for (int i = 1; i <= M; i++) {
-		int totLen = refs.getRef(i).getTotLen();
-		int fullLen = refs.getRef(i).getFullLen();
-		int pos1 = max(min(totLen - fullLen + 1, ub) - lb, 0);
-		int pos2 = max(min(totLen, ub) - lb, 0);
-
-		if (pos2 == 0) { eel[i] = 0.0; continue; }
-    
-		eel[i] = fullLen * cdf[pos1] + ((cdf[pos2] - cdf[pos1]) * (totLen + 1) - (clen[pos2] - clen[pos1]));
-		assert(eel[i] >= 0);
-		if (eel[i] < MINEEL) { eel[i] = 0.0; }
-	}
-  
-	delete[] pdf;
-	delete[] cdf;
-	delete[] clen;
-}
-
-void polishTheta(vector<double>& theta, const vector<double>& eel, const double* mw) {
-	double sum = 0.0;
-
-	/* The reason that for noise gene, mw value is 1 is :
-	 * currently, all masked positions are for poly(A) sites, which in theory should be filtered out.
-	 * So the theta0 does not containing reads from any masked position
-	 */
-
-	for (int i = 0; i <= M; i++) {
-		// i == 0, mw[i] == 1
-		if (i > 0 && (mw[i] < EPSILON || eel[i] < EPSILON)) {
-			theta[i] = 0.0;
-			continue;
-		}
-		theta[i] = theta[i] / mw[i];
-		sum += theta[i];
-	}
-	// currently is OK, since no transcript should be masked totally, only the poly(A) tail related part will be masked
-	general_assert(sum >= EPSILON, "No effective length is no less than" + ftos(MINEEL, 6) + " !");
-	for (int i = 0; i <= M; i++) theta[i] /= sum;
-}
-
-void calcExpressionValues(const vector<double>& theta, const vector<double>& eel, vector<double>& tpm, vector<double>& fpkm) {
-	double denom;
-	vector<double> frac;
-
-	//calculate fraction of count over all mappabile reads
-	denom = 0.0;
-	frac.assign(M + 1, 0.0);
-	for (int i = 1; i <= M; i++) 
-	  if (eel[i] >= EPSILON) {
-	    frac[i] = theta[i];
-	    denom += frac[i];
-	  }
-	general_assert(denom >= EPSILON, "No alignable reads?!");
-	for (int i = 1; i <= M; i++) frac[i] /= denom;
-  
-	//calculate FPKM
-	fpkm.assign(M + 1, 0.0);
-	for (int i = 1; i <= M; i++)
-		if (eel[i] >= EPSILON) fpkm[i] = frac[i] * 1e9 / eel[i];
-
-	//calculate TPM
-	tpm.assign(M + 1, 0.0);
-	denom = 0.0;
-	for (int i = 1; i <= M; i++) denom += fpkm[i];
-	for (int i = 1; i <= M; i++) tpm[i] = fpkm[i] / denom * 1e6;  
-}
-
 template<class ModelType>
 void writeResults(ModelType& model, double* counts) {
-	char outF[STRLEN];
-	FILE *fo;
-
-	sprintf(modelF, "%s.model", statName);
-	model.write(modelF);
-
-	vector<int> tlens;
-	vector<double> fpkm, tpm, isopct;
-	vector<double> glens, gene_eels, gene_counts, gene_tpm, gene_fpkm;
-
-	calcExpressionValues(theta, eel, tpm, fpkm);
-
-	//calculate IsoPct, etc.
-	isopct.assign(M + 1, 0.0);
-	tlens.assign(M + 1, 0);
-
-	glens.assign(m, 0.0); gene_eels.assign(m, 0.0);
-	gene_counts.assign(m, 0.0); gene_tpm.assign(m, 0.0); gene_fpkm.assign(m, 0.0);
-
-	for (int i = 0; i < m; i++) {
-		int b = gi.spAt(i), e = gi.spAt(i + 1);
-		for (int j = b; j < e; j++) {
-			const Transcript& transcript = transcripts.getTranscriptAt(j);
-			tlens[j] = transcript.getLength();
-
-			gene_counts[i] += counts[j];
-			gene_tpm[i] += tpm[j];
-			gene_fpkm[i] += fpkm[j];
-		}
-
-		if (gene_tpm[i] < EPSILON) {
-			double frac = 1.0 / (e - b);
-			for (int j = b; j < e; j++) {
-				glens[i] += tlens[j] * frac;
-				gene_eels[i] += eel[j] * frac;
-			}
-		}
-		else {
-			for (int j = b; j < e; j++) {
-				isopct[j] = tpm[j] / gene_tpm[i];
-				glens[i] += tlens[j] * isopct[j];
-				gene_eels[i] += eel[j] * isopct[j];
-			}
-		}
-	}
-
-	//isoform level results
-	sprintf(outF, "%s.iso_res", imdName);
-	fo = fopen(outF, "w");
-	for (int i = 1; i <= M; i++) {
-		const Transcript& transcript = transcripts.getTranscriptAt(i);
-		fprintf(fo, "%s%c", transcript.getTranscriptID().c_str(), (i < M ? '\t' : '\n'));
-	}
-	for (int i = 1; i <= M; i++) {
-		const Transcript& transcript = transcripts.getTranscriptAt(i);
-		fprintf(fo, "%s%c", transcript.getGeneID().c_str(), (i < M ? '\t' : '\n'));
-	}
-	for (int i = 1; i <= M; i++)
-		fprintf(fo, "%d%c", tlens[i], (i < M ? '\t' : '\n'));
-	for (int i = 1; i <= M; i++)
-		fprintf(fo, "%.2f%c", eel[i], (i < M ? '\t' : '\n'));
-	for (int i = 1; i <= M; i++)
-		fprintf(fo, "%.2f%c", counts[i], (i < M ? '\t' : '\n'));
-	for (int i = 1; i <= M; i++)
-		fprintf(fo, "%.2f%c", tpm[i], (i < M ? '\t' : '\n'));
-	for (int i = 1; i <= M; i++)
-		fprintf(fo, "%.2f%c", fpkm[i], (i < M ? '\t' : '\n'));
-	for (int i = 1; i <= M; i++)
-		fprintf(fo, "%.2f%c", isopct[i] * 1e2, (i < M ? '\t' : '\n'));
-	fclose(fo);
-
-	//gene level results
-	sprintf(outF, "%s.gene_res", imdName);
-	fo = fopen(outF, "w");
-	for (int i = 0; i < m; i++) {
-		const Transcript& transcript = transcripts.getTranscriptAt(gi.spAt(i));
-		fprintf(fo, "%s%c", transcript.getGeneID().c_str(), (i < m - 1 ? '\t' : '\n'));
-	}
-	for (int i = 0; i < m; i++) {
-		int b = gi.spAt(i), e = gi.spAt(i + 1);
-		for (int j = b; j < e; j++) {
-			fprintf(fo, "%s%c", transcripts.getTranscriptAt(j).getTranscriptID().c_str(), (j < e - 1 ? ',' : (i < m - 1 ? '\t' :'\n')));
-		}
-	}
-	for (int i = 0; i < m; i++)
-		fprintf(fo, "%.2f%c", glens[i], (i < m - 1 ? '\t' : '\n'));
-	for (int i = 0; i < m; i++)
-		fprintf(fo, "%.2f%c", gene_eels[i], (i < m - 1 ? '\t' : '\n'));
-	for (int i = 0; i < m; i++)
-		fprintf(fo, "%.2f%c", gene_counts[i], (i < m - 1 ? '\t' : '\n'));
-	for (int i = 0; i < m; i++)
-		fprintf(fo, "%.2f%c", gene_tpm[i], (i < m - 1 ? '\t' : '\n'));
-	for (int i = 0; i < m; i++)
-		fprintf(fo, "%.2f%c", gene_fpkm[i], (i < m - 1 ? '\t' : '\n'));
-	fclose(fo);
-
-	if (verbose) { printf("Expression Results are written!\n"); }
+  sprintf(modelF, "%s.model", statName);
+  model.write(modelF);
+  writeResultsEM(M, refName, imdName, transcripts, theta, eel, countvs[0]);
 }
 
 template<class ReadType, class HitType, class ModelType>
@@ -658,8 +485,8 @@ void EM() {
 	fprintf(fo, "%.15g\n", theta[M]);
 	
 	//calculate expected effective lengths for each isoform
-	calcExpectedEffectiveLengths<ModelType>(model);
-	polishTheta(theta, eel, model.getMW());
+	calcExpectedEffectiveLengths<ModelType>(M, refs, model, eel);
+	polishTheta(M, theta, eel, model.getMW());
 
 	// output theta
 	for (int i = 0; i < M; i++) fprintf(fo, "%.15g ", theta[i]);
@@ -762,9 +589,6 @@ int main(int argc, char* argv[]) {
 	sprintf(refF, "%s.seq", refName);
 	refs.loadRefs(refF);
 	M = refs.getM();
-	sprintf(groupF, "%s.grp", refName);
-	gi.load(groupF);
-	m = gi.getm();
 
 	sprintf(tiF, "%s.ti", refName);
 	transcripts.readFrom(tiF);
diff --git a/Gibbs.cpp b/Gibbs.cpp
index b0287a3..e7a1182 100644
--- a/Gibbs.cpp
+++ b/Gibbs.cpp
@@ -18,7 +18,9 @@
 #include "PairedEndQModel.h"
 
 #include "Refs.h"
+
 #include "GroupInfo.h"
+#include "WriteResults.h"
 
 using namespace std;
 
@@ -44,17 +46,16 @@ struct Item {
 int nThreads;
 
 int model_type;
-int m, M;
+int M;
 READ_INT_TYPE N0, N1;
 HIT_INT_TYPE nHits;
 double totc;
 int BURNIN, NSAMPLES, GAP;
-char imdName[STRLEN], statName[STRLEN];
-char thetaF[STRLEN], ofgF[STRLEN], groupF[STRLEN], refF[STRLEN], modelF[STRLEN];
+char refName[STRLEN], imdName[STRLEN], statName[STRLEN];
+char thetaF[STRLEN], ofgF[STRLEN], refF[STRLEN], modelF[STRLEN];
 char cvsF[STRLEN];
 
 Refs refs;
-GroupInfo gi;
 
 vector<HIT_INT_TYPE> s;
 vector<Item> hits;
@@ -73,21 +74,16 @@ pthread_t *threads;
 pthread_attr_t attr;
 int rc;
 
-void load_data(char* reference_name, char* statName, char* imdName) {
+void load_data(char* refName, char* statName, char* imdName) {
 	ifstream fin;
 	string line;
 	int tmpVal;
 
 	//load reference file
-	sprintf(refF, "%s.seq", reference_name);
+	sprintf(refF, "%s.seq", refName);
 	refs.loadRefs(refF, 1);
 	M = refs.getM();
 
-	//load groupF
-	sprintf(groupF, "%s.grp", reference_name);
-	gi.load(groupF);
-	m = gi.getm();
-
 	//load ofgF;
 	sprintf(ofgF, "%s.ofg", imdName);
 	fin.open(ofgF);
@@ -118,43 +114,12 @@ void load_data(char* reference_name, char* statName, char* imdName) {
 	if (verbose) { printf("Loading Data is finished!\n"); }
 }
 
-template<class ModelType>
-void calcExpectedEffectiveLengths(ModelType& model) {
-	int lb, ub, span;
-	double *pdf = NULL, *cdf = NULL, *clen = NULL; // clen[i] = sigma_{j=1}^{i}pdf[i]*(lb+i)
-  
-	model.getGLD().copyTo(pdf, cdf, lb, ub, span);
-	clen = new double[span + 1];
-	clen[0] = 0.0;
-	for (int i = 1; i <= span; i++) {
-		clen[i] = clen[i - 1] + pdf[i] * (lb + i);
-	}
-
-	eel.assign(M + 1, 0.0);
-	for (int i = 1; i <= M; i++) {
-		int totLen = refs.getRef(i).getTotLen();
-		int fullLen = refs.getRef(i).getFullLen();
-		int pos1 = max(min(totLen - fullLen + 1, ub) - lb, 0);
-		int pos2 = max(min(totLen, ub) - lb, 0);
-
-		if (pos2 == 0) { eel[i] = 0.0; continue; }
-    
-		eel[i] = fullLen * cdf[pos1] + ((cdf[pos2] - cdf[pos1]) * (totLen + 1) - (clen[pos2] - clen[pos1]));
-		assert(eel[i] >= 0);
-		if (eel[i] < MINEEL) { eel[i] = 0.0; }
-	}
-  
-	delete[] pdf;
-	delete[] cdf;
-	delete[] clen;
-}
-
 template<class ModelType>
 void init_model_related(char* modelF) {
 	ModelType model;
 	model.read(modelF);
 
-	calcExpectedEffectiveLengths<ModelType>(model);
+	calcExpectedEffectiveLengths<ModelType>(M, refs, model, eel);
 	memcpy(mw, model.getMW(), sizeof(double) * (M + 1)); // otherwise, after exiting this procedure, mw becomes undefined
 }
 
@@ -220,55 +185,6 @@ void writeCountVector(FILE* fo, vector<int>& counts) {
 	fprintf(fo, "%d\n", counts[M]);
 }
 
-void polishTheta(vector<double>& theta, const vector<double>& eel, const double* mw) {
-	double sum = 0.0;
-
-	/* The reason that for noise gene, mw value is 1 is :
-	 * currently, all masked positions are for poly(A) sites, which in theory should be filtered out.
-	 * So the theta0 does not containing reads from any masked position
-	 */
-
-	for (int i = 0; i <= M; i++) {
-		// i == 0, mw[i] == 1
-		if (i > 0 && (mw[i] < EPSILON || eel[i] < EPSILON)) {
-			theta[i] = 0.0;
-			continue;
-		}
-		theta[i] = theta[i] / mw[i];
-		sum += theta[i];
-	}
-	// currently is OK, since no transcript should be masked totally, only the poly(A) tail related part will be masked
-	general_assert(sum >= EPSILON, "No effective length is no less than" + ftos(MINEEL, 6) + " !");
-	for (int i = 0; i <= M; i++) theta[i] /= sum;
-}
-
-void calcExpressionValues(const vector<double>& theta, const vector<double>& eel, vector<double>& tpm, vector<double>& fpkm) {
-	double denom;
-	vector<double> frac;
-
-	//calculate fraction of count over all mappabile reads
-	denom = 0.0;
-	frac.assign(M + 1, 0.0);
-	for (int i = 1; i <= M; i++) 
-	  if (eel[i] >= EPSILON) {
-	    frac[i] = theta[i];
-	    denom += frac[i];
-	  }
-	general_assert(denom >= EPSILON, "No alignable reads?!");
-	for (int i = 1; i <= M; i++) frac[i] /= denom;
-  
-	//calculate FPKM
-	fpkm.assign(M + 1, 0.0);
-	for (int i = 1; i <= M; i++)
-		if (eel[i] >= EPSILON) fpkm[i] = frac[i] * 1e9 / eel[i];
-
-	//calculate TPM
-	tpm.assign(M + 1, 0.0);
-	denom = 0.0;
-	for (int i = 1; i <= M; i++) denom += fpkm[i];
-	for (int i = 1; i <= M; i++) tpm[i] = fpkm[i] / denom * 1e6;  
-}
-
 void* Gibbs(void* arg) {
 	int CHAINLEN;
 	HIT_INT_TYPE len, fr, to;
@@ -320,8 +236,8 @@ void* Gibbs(void* arg) {
 			if ((ROUND - BURNIN - 1) % GAP == 0) {
 				writeCountVector(params->fo, counts);
 				for (int i = 0; i <= M; i++) theta[i] = counts[i] / totc;
-				polishTheta(theta, eel, mw);
-				calcExpressionValues(theta, eel, tpm, fpkm);
+				polishTheta(M, theta, eel, mw);
+				calcExpressionValues(M, theta, eel, tpm, fpkm);
 				for (int i = 0; i <= M; i++) {
 					params->pme_c[i] += counts[i] - 1;
 					params->pve_c[i] += (counts[i] - 1) * (counts[i] - 1);
@@ -374,66 +290,13 @@ void release() {
 	}
 }
 
-void writeResults(char* imdName) {
-	char outF[STRLEN];
-	FILE *fo;
-
-	vector<double> isopct;
-	vector<double> gene_counts, gene_tpm, gene_fpkm;
-
-	//calculate IsoPct, etc.
-	isopct.assign(M + 1, 0.0);
-	gene_counts.assign(m, 0.0); gene_tpm.assign(m, 0.0); gene_fpkm.assign(m, 0.0);
-
-	for (int i = 0; i < m; i++) {
-		int b = gi.spAt(i), e = gi.spAt(i + 1);
-		for (int j = b; j < e; j++) {
-			gene_counts[i] += pme_c[j];
-			gene_tpm[i] += pme_tpm[j];
-			gene_fpkm[i] += pme_fpkm[j];
-		}
-		if (gene_tpm[i] < EPSILON) continue;
-		for (int j = b; j < e; j++)
-			isopct[j] = pme_tpm[j] / gene_tpm[i];
-	}
-
-	//isoform level results
-	sprintf(outF, "%s.iso_res", imdName);
-	fo = fopen(outF, "a");
-	general_assert(fo != NULL, "Cannot open " + cstrtos(outF) + "!");
-
-	for (int i = 1; i <= M; i++)
-		fprintf(fo, "%.2f%c", pme_c[i], (i < M ? '\t' : '\n'));
-	for (int i = 1; i <= M; i++)
-		fprintf(fo, "%.2f%c", pme_tpm[i], (i < M ? '\t' : '\n'));
-	for (int i = 1; i <= M; i++)
-		fprintf(fo, "%.2f%c", pme_fpkm[i], (i < M ? '\t' : '\n'));
-	for (int i = 1; i <= M; i++)
-		fprintf(fo, "%.2f%c", isopct[i] * 1e2, (i < M ? '\t' : '\n'));
-	fclose(fo);
-
-	//gene level results
-	sprintf(outF, "%s.gene_res", imdName);
-	fo = fopen(outF, "a");
-	general_assert(fo != NULL, "Cannot open " + cstrtos(outF) + "!");
-
-	for (int i = 0; i < m; i++)
-		fprintf(fo, "%.2f%c", gene_counts[i], (i < m - 1 ? '\t' : '\n'));
-	for (int i = 0; i < m; i++)
-		fprintf(fo, "%.2f%c", gene_tpm[i], (i < m - 1 ? '\t' : '\n'));
-	for (int i = 0; i < m; i++)
-		fprintf(fo, "%.2f%c", gene_fpkm[i], (i < m - 1 ? '\t' : '\n'));
-	fclose(fo);
-
-	if (verbose) { printf("Gibbs based expression values are written!\n"); }
-}
-
 int main(int argc, char* argv[]) {
 	if (argc < 7) {
 		printf("Usage: rsem-run-gibbs reference_name imdName statName BURNIN NSAMPLES GAP [-p #Threads] [--var] [-q]\n");
 		exit(-1);
 	}
 
+	strcpy(refName, argv[1]);
 	strcpy(imdName, argv[2]);
 	strcpy(statName, argv[3]);
 
@@ -459,7 +322,7 @@ int main(int argc, char* argv[]) {
 		printf("Warning: Number of samples is less than number of threads! Change the number of threads to %d!\n", nThreads);
 	}
 
-	load_data(argv[1], statName, imdName);
+	load_data(refName, statName, imdName);
 
 	sprintf(modelF, "%s.model", statName);
 	FILE *fi = fopen(modelF, "r");
@@ -491,11 +354,19 @@ int main(int argc, char* argv[]) {
 
 	if (verbose) printf("Gibbs finished!\n");
 	
-	writeResults(imdName);
+	writeResultsGibbs(M, refName, imdName, pme_c, pme_fpkm, pme_tpm);
 
 	if (var_opt) {
 		char varF[STRLEN];
 
+		// Load group info
+		int m;
+		GroupInfo gi;
+		char groupF[STRLEN];
+		sprintf(groupF, "%s.grp", refName);
+		gi.load(groupF);
+		m = gi.getm();
+		
 		sprintf(varF, "%s.var", statName);
 		FILE *fo = fopen(varF, "w");
 		general_assert(fo != NULL, "Cannot open " + cstrtos(varF) + "!");
@@ -507,7 +378,7 @@ int main(int argc, char* argv[]) {
 		}
 		fclose(fo);
 	}
-
+	
 	delete mw; // delete the copy
 
 	return 0;
diff --git a/README.md b/README.md
index b93678d..f6b5ec8 100644
--- a/README.md
+++ b/README.md
@@ -296,7 +296,7 @@ __reference_name:__ The name of RSEM references, which should be already generat
 
 __estimated_model_file:__ This file describes how the RNA-Seq reads will be sequenced given the expression levels. It determines what kind of reads will be simulated (single-end/paired-end, w/o quality score) and includes parameters for fragment length distribution, read start position distribution, sequencing error models, etc. Normally, this file should be learned from real data using 'rsem-calculate-expression'. The file can be found under the 'sample_name.stat' folder with the name of 'sample_name.model'. 'model_file_description.txt' provides the format and meanings of this file.    
 
-__estimated_isoform_results:__ This file contains expression levels for all isoforms recorded in the reference. It can be learned using 'rsem-calculate-expression' from real data. The corresponding file users want to use is 'sample_name.isoforms.results'. If simulating from user-designed expression profile is desired, start from a learned 'sample_name.isoforms.results' file and only modify the 'TPM' column. The simulator only reads the TPM column. But keeping the file format the same is required.   
+__estimated_isoform_results:__ This file contains expression levels for all isoforms recorded in the reference. It can be learned using 'rsem-calculate-expression' from real data. The corresponding file users want to use is 'sample_name.isoforms.results'. If simulating from user-designed expression profile is desired, start from a learned 'sample_name.isoforms.results' file and only modify the 'TPM' column. The simulator only reads the TPM column. But keeping the file format the same is required. If the RSEM references built are aware of allele-specific transcripts, 'sample_name.alleles.results' should be used instead.   
 
 __theta0:__ This parameter determines the fraction of reads that are coming from background "noise" (instead of from a transcript). It can also be estimated using 'rsem-calculate-expression' from real data. Users can find it as the first value of the third line of the file 'sample_name.stat/sample_name.theta'.   
 
diff --git a/Transcript.h b/Transcript.h
index 90f6e67..f7afa2b 100644
--- a/Transcript.h
+++ b/Transcript.h
@@ -10,6 +10,10 @@
 
 #include "utils.h"
 
+/**
+   If no genome is provided, seqname field is used to store the allele name.
+ */
+
 struct Interval {
 	int start, end;
 
@@ -49,7 +53,7 @@ public:
 	}
 
 	bool operator< (const Transcript& o) const {
-	  return gene_id < o.gene_id || (gene_id == o.gene_id && transcript_id < o.transcript_id);
+	  return gene_id < o.gene_id || (gene_id == o.gene_id && transcript_id < o.transcript_id) || (gene_id == o.gene_id && transcript_id == o.transcript_id && seqname < o.seqname);
 	}
 
 	const std::string& getTranscriptID() const { return transcript_id; }
diff --git a/Transcripts.h b/Transcripts.h
index 19b025e..5c17f95 100644
--- a/Transcripts.h
+++ b/Transcripts.h
@@ -28,7 +28,11 @@ public:
 	}
 
 	int getM() { return M; }
+
 	int getType() { return type; }
+	void setType(int type) { this->type = type; }
+
+	bool isAlleleSpecific() { return type == 2; }
 
 	const Transcript& getTranscriptAt(int pos) {
 		assert(pos > 0 && pos <= M);
@@ -60,7 +64,7 @@ public:
 	void buildMappings(int, char**);
 
 private:
-	int M, type; // type 0 from genome , 1 standalone transcriptome
+	int M, type; // type 0 from genome, 1 standalone transcriptome, 2 allele-specific 
 	std::vector<Transcript> transcripts;
 
 	std::vector<int> e2i, i2e; // external sid to internal sid, internal sid to external sid
@@ -94,11 +98,11 @@ void Transcripts::buildMappings(int n_targets, char** target_name) {
 	std::map<std::string, int> dict;
 	std::map<std::string, int>::iterator iter;
 
-	general_assert(n_targets == M, "Number of transcripts does not match! Please check if the reads are aligned to a transcript set (instead of a genome)!");
+	general_assert(n_targets == M, "Number of reference sequences does not match! Please check if the reads are aligned to a transcript set (instead of a genome)!");
 
 	dict.clear();
 	for (int i = 1; i <= M; i++) {
-		const std::string& tid = transcripts[i].getTranscriptID();
+		const std::string& tid = isAlleleSpecific() ? transcripts[i].getSeqName() : transcripts[i].getTranscriptID();
 		iter = dict.find(tid);
 		general_assert(iter == dict.end(), tid + " appears more than once!");
 		dict[tid] = i;
@@ -108,7 +112,7 @@ void Transcripts::buildMappings(int n_targets, char** target_name) {
 	i2e.assign(M + 1, 0);
 	for (int i = 0; i < n_targets; i++) {
 		iter = dict.find(std::string(target_name[i]));
-		general_assert(iter != dict.end(), "RSEM can not recognize transcript " + cstrtos(target_name[i]) + "!");
+		general_assert(iter != dict.end(), "RSEM can not recognize reference sequence name " + cstrtos(target_name[i]) + "!");
 		general_assert(iter->second > 0, "Reference sequence name " + cstrtos(target_name[i]) + " is duplicated!");
 		e2i[i + 1] = iter->second;
 		i2e[iter->second] = i + 1;
diff --git a/WHAT_IS_NEW b/WHAT_IS_NEW
index 24ae065..b942a63 100644
--- a/WHAT_IS_NEW
+++ b/WHAT_IS_NEW
@@ -1,3 +1,10 @@
+RSEM v1.2.12
+
+- Enabled allele-specific expression estimation
+- Added option '--calc-pme' option to calculate posterior mean estimates only (no credibility intervals)
+
+--------------------------------------------------------------------------------------------
+
 RSEM v1.2.11
 
 - Enabled RSEM to use Bowtie 2 aligner (indel, local and discordant alignments are not supported yet)
diff --git a/WriteResults.h b/WriteResults.h
new file mode 100644
index 0000000..95aa7b0
--- /dev/null
+++ b/WriteResults.h
@@ -0,0 +1,624 @@
+#ifndef WRITERESULTS_H_
+#define WRITERESULTS_H_
+
+#include<cstdio>
+#include<vector>
+#include<string>
+#include<fstream>
+#include<algorithm>
+
+#include "utils.h"
+#include "my_assert.h"
+#include "GroupInfo.h"
+#include "Transcript.h"
+#include "Transcripts.h"
+#include "Refs.h"
+
+#include "Model.h"
+#include "SingleModel.h"
+#include "SingleQModel.h"
+#include "PairedEndModel.h"
+#include "PairedEndQModel.h"
+
+template<class ModelType>
+void calcExpectedEffectiveLengths(int M, Refs& refs, ModelType& model, std::vector<double>& eel) {
+	int lb, ub, span;
+	double *pdf = NULL, *cdf = NULL, *clen = NULL; // clen[i] = sigma_{j=1}^{i}pdf[i]*(lb+i)
+  
+	model.getGLD().copyTo(pdf, cdf, lb, ub, span);
+	clen = new double[span + 1];
+	clen[0] = 0.0;
+	for (int i = 1; i <= span; i++) {
+		clen[i] = clen[i - 1] + pdf[i] * (lb + i);
+	}
+
+	eel.assign(M + 1, 0.0);
+	for (int i = 1; i <= M; i++) {
+		int totLen = refs.getRef(i).getTotLen();
+		int fullLen = refs.getRef(i).getFullLen();
+		int pos1 = std::max(std::min(totLen - fullLen + 1, ub) - lb, 0);
+		int pos2 = std::max(std::min(totLen, ub) - lb, 0);
+
+		if (pos2 == 0) { eel[i] = 0.0; continue; }
+    
+		eel[i] = fullLen * cdf[pos1] + ((cdf[pos2] - cdf[pos1]) * (totLen + 1) - (clen[pos2] - clen[pos1]));
+		assert(eel[i] >= 0);
+		if (eel[i] < MINEEL) { eel[i] = 0.0; }
+	}
+  
+	delete[] pdf;
+	delete[] cdf;
+	delete[] clen;
+}
+
+void polishTheta(int M, std::vector<double>& theta, const std::vector<double>& eel, const double* mw) {
+	double sum = 0.0;
+
+	/* The reason that for noise gene, mw value is 1 is :
+	 * currently, all masked positions are for poly(A) sites, which in theory should be filtered out.
+	 * So the theta0 does not containing reads from any masked position
+	 */
+
+	for (int i = 0; i <= M; i++) {
+		// i == 0, mw[i] == 1
+		if (i > 0 && (mw[i] < EPSILON || eel[i] < EPSILON)) {
+			theta[i] = 0.0;
+			continue;
+		}
+		theta[i] = theta[i] / mw[i];
+		sum += theta[i];
+	}
+	// currently is OK, since no transcript should be masked totally, only the poly(A) tail related part will be masked
+	general_assert(sum >= EPSILON, "No effective length is no less than" + ftos(MINEEL, 6) + " !");
+	for (int i = 0; i <= M; i++) theta[i] /= sum;
+}
+
+void calcExpressionValues(int M, const std::vector<double>& theta, const std::vector<double>& eel, std::vector<double>& tpm, std::vector<double>& fpkm) {
+	double denom;
+	std::vector<double> frac;
+
+	//calculate fraction of count over all mappabile reads
+	denom = 0.0;
+	frac.assign(M + 1, 0.0);
+	for (int i = 1; i <= M; i++) 
+	  if (eel[i] >= EPSILON) {
+	    frac[i] = theta[i];
+	    denom += frac[i];
+	  }
+	general_assert(denom >= EPSILON, "No alignable reads?!");
+	for (int i = 1; i <= M; i++) frac[i] /= denom;
+  
+	//calculate FPKM
+	fpkm.assign(M + 1, 0.0);
+	for (int i = 1; i <= M; i++)
+		if (eel[i] >= EPSILON) fpkm[i] = frac[i] * 1e9 / eel[i];
+
+	//calculate TPM
+	tpm.assign(M + 1, 0.0);
+	denom = 0.0;
+	for (int i = 1; i <= M; i++) denom += fpkm[i];
+	for (int i = 1; i <= M; i++) tpm[i] = fpkm[i] / denom * 1e6;  
+}
+
+inline bool isAlleleSpecific(const char* refName, GroupInfo* gt = NULL, GroupInfo* ta = NULL) {
+  bool alleleS;
+  char gtF[STRLEN], taF[STRLEN];
+
+  sprintf(gtF, "%s.gt", refName);
+  sprintf(taF, "%s.ta", refName);
+  std::ifstream gtIF(gtF), taIF(taF);
+  alleleS = gtIF.is_open() && taIF.is_open();
+  if (gtIF.is_open()) gtIF.close();
+  if (taIF.is_open()) taIF.close();
+
+  if (alleleS) { 
+    if (gt != NULL) gt->load(gtF); 
+    if (ta != NULL) ta->load(taF); 
+  }
+
+  return alleleS;
+}
+
+void writeResultsEM(int M, const char* refName, const char* imdName, Transcripts& transcripts, std::vector<double>& theta, std::vector<double>& eel, double* counts) {
+	char outF[STRLEN];
+	FILE *fo;
+
+	int m;
+	GroupInfo gi;
+	char groupF[STRLEN];
+
+	std::vector<int> tlens;
+	std::vector<double> fpkm, tpm, isopct;
+	std::vector<double> glens, gene_eels, gene_counts, gene_tpm, gene_fpkm;
+
+	// Load group info
+	sprintf(groupF, "%s.grp", refName);
+	gi.load(groupF);
+	m = gi.getm();
+
+	// For allele-specific expression
+	int m_trans = 0;
+	GroupInfo gt, ta;
+	std::vector<double> trans_lens, trans_eels, trans_counts, trans_tpm, trans_fpkm, ta_pct, gt_pct;
+
+	bool alleleS = isAlleleSpecific(refName, &gt, &ta); // if allele-specific
+
+	calcExpressionValues(M, theta, eel, tpm, fpkm);
+
+	//calculate IsoPct, etc.
+	isopct.assign(M + 1, 0.0);
+	tlens.assign(M + 1, 0);
+
+	glens.assign(m, 0.0); gene_eels.assign(m, 0.0);
+	gene_counts.assign(m, 0.0); gene_tpm.assign(m, 0.0); gene_fpkm.assign(m, 0.0);
+
+	for (int i = 0; i < m; i++) {
+		int b = gi.spAt(i), e = gi.spAt(i + 1);
+		for (int j = b; j < e; j++) {
+			const Transcript& transcript = transcripts.getTranscriptAt(j);
+			tlens[j] = transcript.getLength();
+
+			gene_counts[i] += counts[j];
+			gene_tpm[i] += tpm[j];
+			gene_fpkm[i] += fpkm[j];
+		}
+
+		if (gene_tpm[i] < EPSILON) {
+			double frac = 1.0 / (e - b);
+			for (int j = b; j < e; j++) {
+				glens[i] += tlens[j] * frac;
+				gene_eels[i] += eel[j] * frac;
+			}
+		}
+		else {
+			for (int j = b; j < e; j++) {
+				isopct[j] = tpm[j] / gene_tpm[i];
+				glens[i] += tlens[j] * isopct[j];
+				gene_eels[i] += eel[j] * isopct[j];
+			}
+		}
+	}
+
+	if (alleleS) {
+	  m_trans = ta.getm();
+	  ta_pct.assign(M + 1, 0.0);
+	  trans_lens.assign(m_trans, 0.0); trans_eels.assign(m_trans, 0.0);
+	  trans_counts.assign(m_trans, 0.0); trans_tpm.assign(m_trans, 0.0); trans_fpkm.assign(m_trans, 0.0);
+	  
+	  for (int i = 0; i < m_trans; i++) {
+		int b = ta.spAt(i), e = ta.spAt(i + 1);
+		for (int j = b; j < e; j++) {
+			trans_counts[i] += counts[j];
+			trans_tpm[i] += tpm[j];
+			trans_fpkm[i] += fpkm[j];
+		}
+
+		if (trans_tpm[i] < EPSILON) {
+			double frac = 1.0 / (e - b);
+			for (int j = b; j < e; j++) {
+				trans_lens[i] += tlens[j] * frac;
+				trans_eels[i] += eel[j] * frac;
+			}
+		}
+		else {
+			for (int j = b; j < e; j++) {
+				ta_pct[j] = tpm[j] / trans_tpm[i];
+				trans_lens[i] += tlens[j] * ta_pct[j];
+				trans_eels[i] += eel[j] * ta_pct[j];
+			}
+		}
+	  } 
+	  
+	  gt_pct.assign(m_trans, 0.0);
+	  for (int i = 0; i < m; i++) 
+	    if (gene_tpm[i] >= EPSILON) {
+	      int b = gt.spAt(i), e = gt.spAt(i + 1);
+	      for (int j = b; j < e; j++) gt_pct[j] = trans_tpm[j] / gene_tpm[i];
+	    }
+	}
+
+	if (!alleleS) {
+	  //isoform level results
+	  sprintf(outF, "%s.iso_res", imdName);
+	  fo = fopen(outF, "w");
+	  for (int i = 1; i <= M; i++) {
+	    const Transcript& transcript = transcripts.getTranscriptAt(i);
+	    fprintf(fo, "%s%c", transcript.getTranscriptID().c_str(), (i < M ? '\t' : '\n'));
+	  }
+	  for (int i = 1; i <= M; i++) {
+	    const Transcript& transcript = transcripts.getTranscriptAt(i);
+	    fprintf(fo, "%s%c", transcript.getGeneID().c_str(), (i < M ? '\t' : '\n'));
+	  }
+	  for (int i = 1; i <= M; i++)
+	    fprintf(fo, "%d%c", tlens[i], (i < M ? '\t' : '\n'));
+	  for (int i = 1; i <= M; i++)
+	    fprintf(fo, "%.2f%c", eel[i], (i < M ? '\t' : '\n'));
+	  for (int i = 1; i <= M; i++)
+	    fprintf(fo, "%.2f%c", counts[i], (i < M ? '\t' : '\n'));
+	  for (int i = 1; i <= M; i++)
+	    fprintf(fo, "%.2f%c", tpm[i], (i < M ? '\t' : '\n'));
+	  for (int i = 1; i <= M; i++)
+	    fprintf(fo, "%.2f%c", fpkm[i], (i < M ? '\t' : '\n'));
+	  for (int i = 1; i <= M; i++)
+	    fprintf(fo, "%.2f%c", isopct[i] * 1e2, (i < M ? '\t' : '\n'));
+	  fclose(fo);
+	}
+	else {
+	  // allele level results
+	  sprintf(outF, "%s.allele_res", imdName);
+	  fo = fopen(outF, "w");
+	  for (int i = 1; i <= M; i++) {
+	    const Transcript& transcript = transcripts.getTranscriptAt(i);
+	    fprintf(fo, "%s%c", transcript.getSeqName().c_str(), (i < M ? '\t' : '\n'));
+	  }
+	  for (int i = 1; i <= M; i++) {
+	    const Transcript& transcript = transcripts.getTranscriptAt(i);
+	    fprintf(fo, "%s%c", transcript.getTranscriptID().c_str(), (i < M ? '\t' : '\n'));
+	  }
+	  for (int i = 1; i <= M; i++) {
+	    const Transcript& transcript = transcripts.getTranscriptAt(i);
+	    fprintf(fo, "%s%c", transcript.getGeneID().c_str(), (i < M ? '\t' : '\n'));
+	  }
+	  for (int i = 1; i <= M; i++)
+	    fprintf(fo, "%d%c", tlens[i], (i < M ? '\t' : '\n'));
+	  for (int i = 1; i <= M; i++)
+	    fprintf(fo, "%.2f%c", eel[i], (i < M ? '\t' : '\n'));
+	  for (int i = 1; i <= M; i++)
+	    fprintf(fo, "%.2f%c", counts[i], (i < M ? '\t' : '\n'));
+	  for (int i = 1; i <= M; i++)
+	    fprintf(fo, "%.2f%c", tpm[i], (i < M ? '\t' : '\n'));
+	  for (int i = 1; i <= M; i++)
+	    fprintf(fo, "%.2f%c", fpkm[i], (i < M ? '\t' : '\n'));
+	  for (int i = 1; i <= M; i++)
+	    fprintf(fo, "%.2f%c", ta_pct[i] * 1e2, (i < M ? '\t' : '\n'));
+	  for (int i = 1; i <= M; i++)
+	    fprintf(fo, "%.2f%c", isopct[i] * 1e2, (i < M ? '\t' : '\n'));
+	  fclose(fo);
+
+	  // isoform level results
+	  sprintf(outF, "%s.iso_res", imdName);
+	  fo = fopen(outF, "w");
+	  for (int i = 0; i < m_trans; i++) {
+	    const Transcript& transcript = transcripts.getTranscriptAt(ta.spAt(i));
+	    fprintf(fo, "%s%c", transcript.getTranscriptID().c_str(), (i < m_trans - 1 ? '\t' : '\n'));
+	  }
+	  for (int i = 0; i < m_trans; i++) {
+	    const Transcript& transcript = transcripts.getTranscriptAt(ta.spAt(i));
+	    fprintf(fo, "%s%c", transcript.getGeneID().c_str(), (i < m_trans - 1 ? '\t' : '\n'));
+	  }
+	  for (int i = 0; i < m_trans; i++)
+	    fprintf(fo, "%.2f%c", trans_lens[i], (i < m_trans - 1 ? '\t' : '\n'));
+	  for (int i = 0; i < m_trans; i++)
+	    fprintf(fo, "%.2f%c", trans_eels[i], (i < m_trans - 1 ? '\t' : '\n'));
+	  for (int i = 0; i < m_trans; i++)
+	    fprintf(fo, "%.2f%c", trans_counts[i], (i < m_trans - 1 ? '\t' : '\n'));
+	  for (int i = 0; i < m_trans; i++)
+	    fprintf(fo, "%.2f%c", trans_tpm[i], (i < m_trans - 1 ? '\t' : '\n'));
+	  for (int i = 0; i < m_trans; i++)
+	    fprintf(fo, "%.2f%c", trans_fpkm[i], (i < m_trans - 1 ? '\t' : '\n'));
+	  for (int i = 0; i < m_trans; i++)
+	    fprintf(fo, "%.2f%c", gt_pct[i] * 1e2, (i < m_trans - 1 ? '\t' : '\n'));
+	  fclose(fo);
+	}
+
+	//gene level results
+	sprintf(outF, "%s.gene_res", imdName);
+	fo = fopen(outF, "w");
+	for (int i = 0; i < m; i++) {
+		const Transcript& transcript = transcripts.getTranscriptAt(gi.spAt(i));
+		fprintf(fo, "%s%c", transcript.getGeneID().c_str(), (i < m - 1 ? '\t' : '\n'));
+	}
+	for (int i = 0; i < m; i++) {
+		int b = gi.spAt(i), e = gi.spAt(i + 1);
+		std::string curtid = "", tid;
+		for (int j = b; j < e; j++) {
+			tid = transcripts.getTranscriptAt(j).getTranscriptID();
+			if (curtid != tid) {
+			  if (curtid != "") fprintf(fo, ",");
+			  fprintf(fo, "%s", tid.c_str());
+			  curtid = tid;
+			}
+		}
+		fprintf(fo, "%c", (i < m - 1 ? '\t' : '\n'));
+	}
+	for (int i = 0; i < m; i++)
+		fprintf(fo, "%.2f%c", glens[i], (i < m - 1 ? '\t' : '\n'));
+	for (int i = 0; i < m; i++)
+		fprintf(fo, "%.2f%c", gene_eels[i], (i < m - 1 ? '\t' : '\n'));
+	for (int i = 0; i < m; i++)
+		fprintf(fo, "%.2f%c", gene_counts[i], (i < m - 1 ? '\t' : '\n'));
+	for (int i = 0; i < m; i++)
+		fprintf(fo, "%.2f%c", gene_tpm[i], (i < m - 1 ? '\t' : '\n'));
+	for (int i = 0; i < m; i++)
+		fprintf(fo, "%.2f%c", gene_fpkm[i], (i < m - 1 ? '\t' : '\n'));
+	fclose(fo);
+
+	if (verbose) { printf("Expression Results are written!\n"); }
+}
+
+void writeResultsGibbs(int M, char* refName, char* imdName, std::vector<double>& pme_c, std::vector<double>& pme_fpkm, std::vector<double>& pme_tpm) {
+	char outF[STRLEN];
+	FILE *fo;
+
+	int m;
+	GroupInfo gi;
+	char groupF[STRLEN];
+	std::vector<double> isopct;
+	std::vector<double> gene_counts, gene_tpm, gene_fpkm;
+
+	// Load group info
+	sprintf(groupF, "%s.grp", refName);
+	gi.load(groupF);
+	m = gi.getm();
+
+	// For allele-specific expression
+	int m_trans = 0;
+	GroupInfo gt, ta;
+	std::vector<double> trans_counts, trans_tpm, trans_fpkm, ta_pct, gt_pct;
+
+	bool alleleS = isAlleleSpecific(refName, &gt, &ta); // if allele-specific
+
+	//calculate IsoPct, etc.
+	isopct.assign(M + 1, 0.0);
+	gene_counts.assign(m, 0.0); gene_tpm.assign(m, 0.0); gene_fpkm.assign(m, 0.0);
+
+	for (int i = 0; i < m; i++) {
+		int b = gi.spAt(i), e = gi.spAt(i + 1);
+		for (int j = b; j < e; j++) {
+			gene_counts[i] += pme_c[j];
+			gene_tpm[i] += pme_tpm[j];
+			gene_fpkm[i] += pme_fpkm[j];
+		}
+		if (gene_tpm[i] < EPSILON) continue;
+		for (int j = b; j < e; j++)
+			isopct[j] = pme_tpm[j] / gene_tpm[i];
+	}
+
+	if (alleleS) {
+	  m_trans = ta.getm();
+	  ta_pct.assign(M + 1, 0.0);
+	  trans_counts.assign(m_trans, 0.0); trans_tpm.assign(m_trans, 0.0); trans_fpkm.assign(m_trans, 0.0);
+	  
+	  for (int i = 0; i < m_trans; i++) {
+		int b = ta.spAt(i), e = ta.spAt(i + 1);
+		for (int j = b; j < e; j++) {
+			trans_counts[i] += pme_c[j];
+			trans_tpm[i] += pme_tpm[j];
+			trans_fpkm[i] += pme_fpkm[j];
+		}
+		if (trans_tpm[i] < EPSILON) continue;
+		for (int j = b; j < e; j++) 
+		  ta_pct[j] = pme_tpm[j] / trans_tpm[i];	
+	  }
+
+	  gt_pct.assign(m_trans, 0.0);
+	  for (int i = 0; i < m; i++) 
+	    if (gene_tpm[i] >= EPSILON) {
+	      int b = gt.spAt(i), e = gt.spAt(i + 1);
+	      for (int j = b; j < e; j++) gt_pct[j] = trans_tpm[j] / gene_tpm[i];
+	    }
+	}
+
+	if (!alleleS) {
+	  //isoform level results
+	  sprintf(outF, "%s.iso_res", imdName);
+	  fo = fopen(outF, "a");
+	  general_assert(fo != NULL, "Cannot open " + cstrtos(outF) + "!");
+	  
+	  for (int i = 1; i <= M; i++)
+	    fprintf(fo, "%.2f%c", pme_c[i], (i < M ? '\t' : '\n'));
+	  for (int i = 1; i <= M; i++)
+	    fprintf(fo, "%.2f%c", pme_tpm[i], (i < M ? '\t' : '\n'));
+	  for (int i = 1; i <= M; i++)
+	    fprintf(fo, "%.2f%c", pme_fpkm[i], (i < M ? '\t' : '\n'));
+	  for (int i = 1; i <= M; i++)
+	    fprintf(fo, "%.2f%c", isopct[i] * 1e2, (i < M ? '\t' : '\n'));
+	  fclose(fo);	 
+	}
+	else {
+	  //allele level results
+	  sprintf(outF, "%s.allele_res", imdName);
+	  fo = fopen(outF, "a");
+	  general_assert(fo != NULL, "Cannot open " + cstrtos(outF) + "!");
+	  
+	  for (int i = 1; i <= M; i++)
+	    fprintf(fo, "%.2f%c", pme_c[i], (i < M ? '\t' : '\n'));
+	  for (int i = 1; i <= M; i++)
+	    fprintf(fo, "%.2f%c", pme_tpm[i], (i < M ? '\t' : '\n'));
+	  for (int i = 1; i <= M; i++)
+	    fprintf(fo, "%.2f%c", pme_fpkm[i], (i < M ? '\t' : '\n'));
+	  for (int i = 1; i <= M; i++)
+	    fprintf(fo, "%.2f%c", ta_pct[i] * 1e2, (i < M ? '\t' : '\n'));
+	  for (int i = 1; i <= M; i++)
+	    fprintf(fo, "%.2f%c", isopct[i] * 1e2, (i < M ? '\t' : '\n'));
+	  fclose(fo);
+
+	  //isoform level results
+	  sprintf(outF, "%s.iso_res", imdName);
+	  fo = fopen(outF, "a");
+	  general_assert(fo != NULL, "Cannot open " + cstrtos(outF) + "!");
+	  
+	  for (int i = 0; i < m_trans; i++)
+	    fprintf(fo, "%.2f%c", trans_counts[i], (i < m_trans - 1 ? '\t' : '\n'));
+	  for (int i = 0; i < m_trans; i++)
+	    fprintf(fo, "%.2f%c", trans_tpm[i], (i < m_trans - 1 ? '\t' : '\n'));
+	  for (int i = 0; i < m_trans; i++)
+	    fprintf(fo, "%.2f%c", trans_fpkm[i], (i < m_trans - 1 ? '\t' : '\n'));
+	  for (int i = 0; i < m_trans; i++)
+	    fprintf(fo, "%.2f%c", gt_pct[i] * 1e2, (i < m_trans - 1 ? '\t' : '\n'));
+	  fclose(fo);
+	}
+ 
+	//gene level results
+	sprintf(outF, "%s.gene_res", imdName);
+	fo = fopen(outF, "a");
+	general_assert(fo != NULL, "Cannot open " + cstrtos(outF) + "!");
+
+	for (int i = 0; i < m; i++)
+		fprintf(fo, "%.2f%c", gene_counts[i], (i < m - 1 ? '\t' : '\n'));
+	for (int i = 0; i < m; i++)
+		fprintf(fo, "%.2f%c", gene_tpm[i], (i < m - 1 ? '\t' : '\n'));
+	for (int i = 0; i < m; i++)
+		fprintf(fo, "%.2f%c", gene_fpkm[i], (i < m - 1 ? '\t' : '\n'));
+	fclose(fo);
+
+	if (verbose) { printf("Gibbs based expression values are written!\n"); }
+}
+
+void writeResultsSimulation(int M, char* refName, char* outFN, Transcripts& transcripts, std::vector<double>& eel, std::vector<double>& counts) {
+	char outF[STRLEN];
+	FILE *fo;
+
+	int m;
+	GroupInfo gi;
+	char groupF[STRLEN];
+
+        // Load group info
+        sprintf(groupF, "%s.grp", refName);
+        gi.load(groupF);
+	m = gi.getm();
+
+	std::vector<int> tlens;
+	std::vector<double> tpm, fpkm, isopct;
+	std::vector<double> glens, gene_eels, gene_counts, gene_tpm, gene_fpkm;
+	
+        // For allele-specific expression
+	int m_trans = 0;
+        GroupInfo gt, ta;
+	std::vector<double> trans_lens, trans_eels, trans_counts, trans_tpm, trans_fpkm, ta_pct, gt_pct;
+
+	bool alleleS = isAlleleSpecific(refName, &gt, &ta); // if allele-specific
+	
+	for (int i = 1; i <= M; i++)
+		general_assert(eel[i] > EPSILON || counts[i] <= EPSILON, "An isoform whose effecitve length < " + ftos(MINEEL, 6) + " got sampled!");
+
+	calcExpressionValues(M, counts, eel, tpm, fpkm);
+
+	//calculate IsoPct, etc.
+	isopct.assign(M + 1, 0.0);
+	tlens.assign(M + 1, 0);
+
+	glens.assign(m, 0.0); gene_eels.assign(m, 0.0);
+	gene_counts.assign(m, 0.0); gene_tpm.assign(m, 0.0); gene_fpkm.assign(m, 0.0);
+
+        for (int i = 0; i < m; i++) {
+	  int b = gi.spAt(i), e = gi.spAt(i + 1);
+	  for (int j = b; j < e; j++) {
+	    const Transcript& transcript = transcripts.getTranscriptAt(j);
+	    tlens[j] = transcript.getLength();
+	    
+	    gene_counts[i] += counts[j];
+	    gene_tpm[i] += tpm[j];
+	    gene_fpkm[i] += fpkm[j];
+	  }
+
+	  if (gene_tpm[i] < EPSILON) {
+	    double frac = 1.0 / (e - b);
+	    for (int j = b; j < e; j++) {
+	      glens[i] += tlens[j] * frac;
+	      gene_eels[i] += eel[j] * frac;
+	    }
+	  }
+	  else {
+	    for (int j = b; j < e; j++) {
+	      isopct[j] = tpm[j] / gene_tpm[i];
+	      glens[i] += tlens[j] * isopct[j];
+	      gene_eels[i] += eel[j] * isopct[j];
+	    }
+	  }
+        }
+
+        if (alleleS) {
+          m_trans = ta.getm();
+          ta_pct.assign(M + 1, 0.0);
+          trans_lens.assign(m_trans, 0.0); trans_eels.assign(m_trans, 0.0);
+          trans_counts.assign(m_trans, 0.0); trans_tpm.assign(m_trans, 0.0); trans_fpkm.assign(m_trans, 0.0);
+
+          for (int i = 0; i < m_trans; i++) {
+	    int b = ta.spAt(i), e = ta.spAt(i + 1);
+	    for (int j = b; j < e; j++) {
+	      trans_counts[i] += counts[j];
+	      trans_tpm[i] += tpm[j];
+	      trans_fpkm[i] += fpkm[j];
+	    }
+
+	    if (trans_tpm[i] < EPSILON) {
+	      double frac = 1.0 / (e - b);
+	      for (int j = b; j < e; j++) {
+		trans_lens[i] += tlens[j] * frac;
+		trans_eels[i] += eel[j] * frac;
+	      }
+	    }
+	    else {
+	      for (int j = b; j < e; j++) {
+		ta_pct[j] = tpm[j] / trans_tpm[i];
+		trans_lens[i] += tlens[j] * ta_pct[j];
+		trans_eels[i] += eel[j] * ta_pct[j];
+	      }
+	    }
+          }
+
+          gt_pct.assign(m_trans, 0.0);
+          for (int i = 0; i < m; i++)
+            if (gene_tpm[i] >= EPSILON) {
+              int b = gt.spAt(i), e = gt.spAt(i + 1);
+              for (int j = b; j < e; j++) gt_pct[j] = trans_tpm[j] / gene_tpm[i];
+            }
+        }
+
+	//allele level
+	if (alleleS) {
+	  sprintf(outF, "%s.sim.alleles.results", outFN);
+	  fo = fopen(outF, "w");
+	  fprintf(fo, "allele_id\ttranscript_id\tgene_id\tlength\teffective_length\tcount\tTPM\tFPKM\tAlleleIsoPct\tAlleleGenePct\n");
+	  for (int i = 1; i <= M; i++) {
+	    const Transcript& transcript = transcripts.getTranscriptAt(i);
+	    fprintf(fo, "%s\t%s\t%s\t%d\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\n", transcript.getSeqName().c_str(), transcript.getTranscriptID().c_str(), transcript.getGeneID().c_str(), tlens[i],
+		    eel[i], counts[i], tpm[i], fpkm[i], ta_pct[i] * 1e2, isopct[i] * 1e2);
+	  }
+	  fclose(fo);
+	}
+
+	//isoform level
+	sprintf(outF, "%s.sim.isoforms.results", outFN);
+	fo = fopen(outF, "w");
+	fprintf(fo, "transcript_id\tgene_id\tlength\teffective_length\tcount\tTPM\tFPKM\tIsoPct\n");
+	if (!alleleS) {
+	  for (int i = 1; i <= M; i++) {
+	    const Transcript& transcript = transcripts.getTranscriptAt(i);
+	    fprintf(fo, "%s\t%s\t%d\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\n", transcript.getTranscriptID().c_str(), transcript.getGeneID().c_str(), tlens[i],
+		    eel[i], counts[i], tpm[i], fpkm[i], isopct[i] * 1e2);
+	  }
+	}
+	else {
+	  for (int i = 0; i < m_trans; i++) {
+	    const Transcript& transcript = transcripts.getTranscriptAt(ta.spAt(i));
+	    fprintf(fo, "%s\t%s\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\n", transcript.getTranscriptID().c_str(), transcript.getGeneID().c_str(), trans_lens[i],
+                    trans_eels[i], trans_counts[i], trans_tpm[i], trans_fpkm[i], gt_pct[i] * 1e2);
+	  }
+	}
+	fclose(fo);
+
+	//gene level
+	sprintf(outF, "%s.sim.genes.results", outFN);
+	fo = fopen(outF, "w");
+	fprintf(fo, "gene_id\ttranscript_id(s)\tlength\teffective_length\tcount\tTPM\tFPKM\n");
+	for (int i = 0; i < m; i++) {
+		int b = gi.spAt(i), e = gi.spAt(i + 1);
+		const std::string& gene_id = transcripts.getTranscriptAt(b).getGeneID();
+		fprintf(fo, "%s\t", gene_id.c_str());
+		std::string curtid = "", tid;
+		for (int j = b; j < e; j++) {
+			tid = transcripts.getTranscriptAt(j).getTranscriptID();
+			if (curtid != tid) {
+			  if (curtid != "") fprintf(fo, ",");
+			  fprintf(fo, "%s", tid.c_str());
+			  curtid = tid;
+			}
+		}
+		fprintf(fo, "\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\n", glens[i], gene_eels[i], gene_counts[i], gene_tpm[i], gene_fpkm[i]);
+	}
+	fclose(fo);
+}
+
+#endif
diff --git a/calcCI.cpp b/calcCI.cpp
index 45c7683..97eba64 100644
--- a/calcCI.cpp
+++ b/calcCI.cpp
@@ -20,6 +20,7 @@
 
 #include "Refs.h"
 #include "GroupInfo.h"
+#include "WriteResults.h"
 
 #include "Buffer.h"
 
@@ -32,17 +33,17 @@ struct Params {
 	const double *mw;
 };
 
+struct CIParams {
+	int no;
+	int start_gene_id, end_gene_id;
+};
+
 struct CIType {
 	float lb, ub; // the interval is [lb, ub]
 
 	CIType() { lb = ub = 0.0; }
 };
 
-struct CIParams {
-	int no;
-	int start_gene_id, end_gene_id;
-};
-
 int model_type;
 
 int nMB;
@@ -54,14 +55,20 @@ float *l_bars;
 
 char cvsF[STRLEN], tmpF[STRLEN], command[STRLEN];
 
-CIType *iso_tpm, *gene_tpm, *iso_fpkm, *gene_fpkm;
+CIType *tpm, *fpkm;
+CIType *iso_tpm = NULL, *iso_fpkm = NULL;
+CIType *gene_tpm, *gene_fpkm;
 
 int M, m;
 Refs refs;
 GroupInfo gi;
-char imdName[STRLEN], statName[STRLEN];
+char refName[STRLEN], imdName[STRLEN], statName[STRLEN];
 char modelF[STRLEN], groupF[STRLEN], refF[STRLEN];
 
+bool alleleS;
+int m_trans;
+GroupInfo ta;
+
 vector<double> eel; //expected effective lengths
 
 Buffer *buffer;
@@ -75,37 +82,6 @@ int rc;
 
 CIParams *ciParamsArray;
 
-template<class ModelType>
-void calcExpectedEffectiveLengths(ModelType& model) {
-	int lb, ub, span;
-	double *pdf = NULL, *cdf = NULL, *clen = NULL; // clen[i] = \sigma_{j=1}^{i}pdf[i]*(lb+i)
-  
-	model.getGLD().copyTo(pdf, cdf, lb, ub, span);
-	clen = new double[span + 1];
-	clen[0] = 0.0;
-	for (int i = 1; i <= span; i++) {
-		clen[i] = clen[i - 1] + pdf[i] * (lb + i);
-	}
-
-	eel.assign(M + 1, 0.0);
-	for (int i = 1; i <= M; i++) {
-		int totLen = refs.getRef(i).getTotLen();
-		int fullLen = refs.getRef(i).getFullLen();
-		int pos1 = max(min(totLen - fullLen + 1, ub) - lb, 0);
-		int pos2 = max(min(totLen, ub) - lb, 0);
-
-		if (pos2 == 0) { eel[i] = 0.0; continue; }
-    
-		eel[i] = fullLen * cdf[pos1] + ((cdf[pos2] - cdf[pos1]) * (totLen + 1) - (clen[pos2] - clen[pos1]));
-		assert(eel[i] >= 0);
-		if (eel[i] < MINEEL) { eel[i] = 0.0; }
-	}
-  
-	delete[] pdf;
-	delete[] cdf;
-	delete[] clen;
-}
-
 void* sample_theta_from_c(void* arg) {
 	int *cvec;
 	double *theta;
@@ -179,7 +155,7 @@ template<class ModelType>
 void sample_theta_vectors_from_count_vectors() {
 	ModelType model;
 	model.read(modelF);
-	calcExpectedEffectiveLengths<ModelType>(model);
+	calcExpectedEffectiveLengths<ModelType>(M, refs, model, eel);
 
 	int num_threads = min(nThreads, nCV);
 
@@ -269,13 +245,19 @@ void calcCI(int nSamples, float *samples, float &lb, float &ub) {
 }
 
 void* calcCI_batch(void* arg) {
-	float *itsamples, *gtsamples, *ifsamples, *gfsamples;
+	float *tsamples, *fsamples;
+	float *itsamples = NULL, *ifsamples = NULL, *gtsamples, *gfsamples;
 	ifstream fin;
 	CIParams *ciParams = (CIParams*)arg;
+	int curtid, curaid, tid;
 
-	itsamples = new float[nSamples];
+	tsamples = new float[nSamples];
+	fsamples = new float[nSamples];
+	if (alleleS) { 
+	  itsamples = new float[nSamples];
+	  ifsamples = new float[nSamples];
+	}
 	gtsamples = new float[nSamples];
-	ifsamples = new float[nSamples];
 	gfsamples = new float[nSamples];
 
 	fin.open(tmpF, ios::binary);
@@ -284,19 +266,46 @@ void* calcCI_batch(void* arg) {
 	fin.seekg(pos, ios::beg);
 
 	int cnt = 0;
+	if (alleleS) {
+	  curtid = curaid = -1;
+	  memset(itsamples, 0, FLOATSIZE * nSamples);
+	  memset(ifsamples, 0, FLOATSIZE * nSamples);
+	}
 	for (int i = ciParams->start_gene_id; i < ciParams->end_gene_id; i++) {
 		int b = gi.spAt(i), e = gi.spAt(i + 1);
 		memset(gtsamples, 0, FLOATSIZE * nSamples);
 		memset(gfsamples, 0, FLOATSIZE * nSamples);
 		for (int j = b; j < e; j++) {
+			if (alleleS) {
+			  tid = ta.gidAt(j);
+			  if (curtid != tid) {
+			    if (curtid >= 0) {
+			      if (j - curaid > 1) {
+				calcCI(nSamples, itsamples, iso_tpm[curtid].lb, iso_tpm[curtid].ub);
+				calcCI(nSamples, ifsamples, iso_fpkm[curtid].lb, iso_fpkm[curtid].ub);
+			      }
+			      else {
+				iso_tpm[curtid] = tpm[curaid];
+				iso_fpkm[curtid] = fpkm[curaid];
+			      }
+			    }
+			    curtid = tid;
+			    curaid = j;
+			  }
+			}
+
 			for (int k = 0; k < nSamples; k++) {
-				fin.read((char*)(&itsamples[k]), FLOATSIZE);
-				gtsamples[k] += itsamples[k];
-				ifsamples[k] = 1e3 / l_bars[k] * itsamples[k];
-				gfsamples[k] += ifsamples[k];
+				fin.read((char*)(&tsamples[k]), FLOATSIZE);
+				fsamples[k] = 1e3 / l_bars[k] * tsamples[k];
+				if (alleleS) {
+				  itsamples[k] += tsamples[k];
+				  ifsamples[k] += fsamples[k];
+				}
+				gtsamples[k] += tsamples[k];
+				gfsamples[k] += fsamples[k];
 			}
-			calcCI(nSamples, itsamples, iso_tpm[j].lb, iso_tpm[j].ub);
-			calcCI(nSamples, ifsamples, iso_fpkm[j].lb, iso_fpkm[j].ub);
+			calcCI(nSamples, tsamples, tpm[j].lb, tpm[j].ub);
+			calcCI(nSamples, fsamples, fpkm[j].lb, fpkm[j].ub);
 		}
 
 		if (e - b > 1) {
@@ -304,18 +313,34 @@ void* calcCI_batch(void* arg) {
 			calcCI(nSamples, gfsamples, gene_fpkm[i].lb, gene_fpkm[i].ub);
 		}
 		else {
-			gene_tpm[i].lb = iso_tpm[b].lb; gene_tpm[i].ub = iso_tpm[b].ub;
-			gene_fpkm[i].lb = iso_fpkm[b].lb; gene_fpkm[i].ub = iso_fpkm[b].ub;
+			gene_tpm[i] = tpm[b];
+			gene_fpkm[i] = fpkm[b];
 		}
 
 		++cnt;
 		if (verbose && cnt % 1000 == 0) { printf("In thread %d, %d genes are processed for CI calculation!\n", ciParams->no, cnt); }
 	}
-
 	fin.close();
 
-	delete[] itsamples;
+	if (alleleS && (curtid >= 0)) {
+	  if (gi.spAt(ciParams->end_gene_id) - curaid > 1) {
+	    calcCI(nSamples, itsamples, iso_tpm[curtid].lb, iso_tpm[curtid].ub);
+	    calcCI(nSamples, ifsamples, iso_fpkm[curtid].lb, iso_fpkm[curtid].ub);
+	  }
+	  else {
+	    iso_tpm[curtid] = tpm[curaid];
+	    iso_fpkm[curtid] = fpkm[curaid];
+	  }
+	}
+	
+	delete[] tsamples;
+	delete[] fsamples;
+	if (alleleS) {
+	  delete[] itsamples;
+	  delete[] ifsamples;
+	}
 	delete[] gtsamples;
+	delete[] gfsamples;
 
 	return NULL;
 }
@@ -325,9 +350,13 @@ void calculate_credibility_intervals(char* imdName) {
 	char outF[STRLEN];
 	int num_threads = nThreads;
 
-	iso_tpm = new CIType[M + 1];
+	tpm = new CIType[M + 1];
+	fpkm = new CIType[M + 1];
+	if (alleleS) {
+	  iso_tpm = new CIType[m_trans];
+	  iso_fpkm = new CIType[m_trans];
+	}
 	gene_tpm = new CIType[m];
-	iso_fpkm = new CIType[M + 1];
 	gene_fpkm = new CIType[m];
 
 	assert(M > 0);
@@ -375,19 +404,33 @@ void calculate_credibility_intervals(char* imdName) {
 
 	delete[] ciParamsArray;
 
-	//isoform level results
-	sprintf(outF, "%s.iso_res", imdName);
+	alleleS ? sprintf(outF, "%s.allele_res", imdName) : sprintf(outF, "%s.iso_res", imdName);
 	fo = fopen(outF, "a");
 	for (int i = 1; i <= M; i++)
-		fprintf(fo, "%.6g%c", iso_tpm[i].lb, (i < M ? '\t' : '\n'));
+	  fprintf(fo, "%.6g%c", tpm[i].lb, (i < M ? '\t' : '\n'));
 	for (int i = 1; i <= M; i++)
-		fprintf(fo, "%.6g%c", iso_tpm[i].ub, (i < M ? '\t' : '\n'));
+	  fprintf(fo, "%.6g%c", tpm[i].ub, (i < M ? '\t' : '\n'));
 	for (int i = 1; i <= M; i++)
-		fprintf(fo, "%.6g%c", iso_fpkm[i].lb, (i < M ? '\t' : '\n'));
+	  fprintf(fo, "%.6g%c", fpkm[i].lb, (i < M ? '\t' : '\n'));
 	for (int i = 1; i <= M; i++)
-		fprintf(fo, "%.6g%c", iso_fpkm[i].ub, (i < M ? '\t' : '\n'));
+	  fprintf(fo, "%.6g%c", fpkm[i].ub, (i < M ? '\t' : '\n'));
 	fclose(fo);
 
+	if (alleleS) {
+	  //isoform level results
+	  sprintf(outF, "%s.iso_res", imdName);
+	  fo = fopen(outF, "a");
+	  for (int i = 0; i < m_trans; i++)
+	    fprintf(fo, "%.6g%c", iso_tpm[i].lb, (i < m_trans - 1 ? '\t' : '\n'));
+	  for (int i = 0; i < m_trans; i++)
+	    fprintf(fo, "%.6g%c", iso_tpm[i].ub, (i < m_trans - 1 ? '\t' : '\n'));
+	  for (int i = 0; i < m_trans; i++)
+	    fprintf(fo, "%.6g%c", iso_fpkm[i].lb, (i < m_trans - 1 ? '\t' : '\n'));
+	  for (int i = 0; i < m_trans; i++)
+	    fprintf(fo, "%.6g%c", iso_fpkm[i].ub, (i < m_trans - 1 ? '\t' : '\n'));
+	  fclose(fo);
+	}
+
 	//gene level results
 	sprintf(outF, "%s.gene_res", imdName);
 	fo = fopen(outF, "a");
@@ -401,9 +444,13 @@ void calculate_credibility_intervals(char* imdName) {
 		fprintf(fo, "%.6g%c", gene_fpkm[i].ub, (i < m - 1 ? '\t' : '\n'));
 	fclose(fo);
 
-	delete[] iso_tpm;
+	delete[] tpm;
+	delete[] fpkm;
+	if (alleleS) { 
+	  delete[] iso_tpm; 
+	  delete[] iso_fpkm; 
+	}
 	delete[] gene_tpm;
-	delete[] iso_fpkm;
 	delete[] gene_fpkm;
 
 	if (verbose) { printf("All credibility intervals are calculated!\n"); }
@@ -415,6 +462,7 @@ int main(int argc, char* argv[]) {
 		exit(-1);
 	}
 
+	strcpy(refName, argv[1]);
 	strcpy(imdName, argv[2]);
 	strcpy(statName, argv[3]);
 
@@ -431,13 +479,18 @@ int main(int argc, char* argv[]) {
 	}
 	verbose = !quiet;
 
-	sprintf(refF, "%s.seq", argv[1]);
+	sprintf(refF, "%s.seq", refName);
 	refs.loadRefs(refF, 1);
 	M = refs.getM();
-	sprintf(groupF, "%s.grp", argv[1]);
+
+	sprintf(groupF, "%s.grp", refName);
 	gi.load(groupF);
 	m = gi.getm();
 
+	// allele-specific 
+	alleleS = isAlleleSpecific(refName, NULL, &ta);
+	if (alleleS) m_trans = ta.getm();
+
 	nSamples = nCV * nSpC;
 	assert(nSamples > 0 && M > 0); // for Buffter.h: (bufsize_type)nSamples
 	l_bars = new float[nSamples];
@@ -463,14 +516,6 @@ int main(int argc, char* argv[]) {
 	calculate_credibility_intervals(imdName);
 
 	delete l_bars;
-	/*
-	sprintf(command, "rm -f %s", tmpF);
-	int status = system(command);
-	if (status != 0) {
-		fprintf(stderr, "Cannot delete %s!\n", tmpF);
-		exit(-1);
-	}
-	*/
 
 	return 0;
 }
diff --git a/makefile b/makefile
index c95fd14..aec4b2d 100644
--- a/makefile
+++ b/makefile
@@ -15,7 +15,7 @@ Transcripts.h : my_assert.h Transcript.h
 rsem-extract-reference-transcripts : utils.h GTFItem.h Transcript.h Transcripts.h extractRef.cpp
 	$(CC) -Wall -O3 extractRef.cpp -o rsem-extract-reference-transcripts
 
-rsem-synthesis-reference-transcripts : utils.h Transcript.h Transcripts.h synthesisRef.cpp
+rsem-synthesis-reference-transcripts : utils.h my_assert.h Transcript.h Transcripts.h synthesisRef.cpp
 	$(CC) -Wall -O3 synthesisRef.cpp -o rsem-synthesis-reference-transcripts
 
 BowtieRefSeqPolicy.h : RefSeqPolicy.h
@@ -82,10 +82,12 @@ BamWriter.h : sam/sam.h sam/bam.h sam_rsem_aux.h sam_rsem_cvt.h SingleHit.h Pair
 
 sampling.h : boost/random.hpp
 
+WriteResults.h : utils.h my_assert.h GroupInfo.h Transcript.h Transcripts.h RefSeq.h Refs.h Model.h SingleModel.h SingleQModel.h PairedEndModel.h PairedEndQModel.h
+
 rsem-run-em : EM.o sam/libbam.a
 	$(CC) -o rsem-run-em EM.o sam/libbam.a -lz -lpthread
 
-EM.o : utils.h my_assert.h Read.h SingleRead.h SingleReadQ.h PairedEndRead.h PairedEndReadQ.h SingleHit.h PairedEndHit.h Model.h SingleModel.h SingleQModel.h PairedEndModel.h PairedEndQModel.h Refs.h GroupInfo.h HitContainer.h ReadIndex.h ReadReader.h Orientation.h LenDist.h RSPD.h QualDist.h QProfile.h NoiseQProfile.h ModelParams.h RefSeq.h RefSeqPolicy.h PolyARules.h Profile.h NoiseProfile.h Transcript.h Transcripts.h HitWrapper.h BamWriter.h sam/bam.h sam/sam.h simul.h sam_rsem_aux.h sampling.h boost/random.hpp EM.cpp
+EM.o : utils.h my_assert.h Read.h SingleRead.h SingleReadQ.h PairedEndRead.h PairedEndReadQ.h SingleHit.h PairedEndHit.h Model.h SingleModel.h SingleQModel.h PairedEndModel.h PairedEndQModel.h Refs.h GroupInfo.h HitContainer.h ReadIndex.h ReadReader.h Orientation.h LenDist.h RSPD.h QualDist.h QProfile.h NoiseQProfile.h ModelParams.h RefSeq.h RefSeqPolicy.h PolyARules.h Profile.h NoiseProfile.h Transcript.h Transcripts.h HitWrapper.h BamWriter.h sam/bam.h sam/sam.h simul.h sam_rsem_aux.h sampling.h boost/random.hpp WriteResults.h EM.cpp
 	$(CC) $(COFLAGS) EM.cpp
 
 bc_aux.h : sam/bam.h
@@ -107,14 +109,14 @@ wiggle.o: sam/bam.h sam/sam.h wiggle.cpp wiggle.h
 rsem-simulate-reads : simulation.o
 	$(CC) -o rsem-simulate-reads simulation.o
 
-simulation.o : utils.h Read.h SingleRead.h SingleReadQ.h PairedEndRead.h PairedEndReadQ.h Model.h SingleModel.h SingleQModel.h PairedEndModel.h PairedEndQModel.h Refs.h RefSeq.h GroupInfo.h Transcript.h Transcripts.h Orientation.h LenDist.h RSPD.h QualDist.h QProfile.h NoiseQProfile.h Profile.h NoiseProfile.h simul.h boost/random.hpp simulation.cpp
+simulation.o : utils.h Read.h SingleRead.h SingleReadQ.h PairedEndRead.h PairedEndReadQ.h Model.h SingleModel.h SingleQModel.h PairedEndModel.h PairedEndQModel.h Refs.h RefSeq.h GroupInfo.h Transcript.h Transcripts.h Orientation.h LenDist.h RSPD.h QualDist.h QProfile.h NoiseQProfile.h Profile.h NoiseProfile.h simul.h boost/random.hpp WriteResults.h simulation.cpp
 	$(CC) $(COFLAGS) simulation.cpp
 
 rsem-run-gibbs : Gibbs.o
 	$(CC) -o rsem-run-gibbs Gibbs.o -lpthread
 
 #some header files are omitted
-Gibbs.o : utils.h my_assert.h boost/random.hpp sampling.h Model.h SingleModel.h SingleQModel.h PairedEndModel.h PairedEndQModel.h RefSeq.h RefSeqPolicy.h PolyARules.h Refs.h GroupInfo.h Gibbs.cpp 
+Gibbs.o : utils.h my_assert.h boost/random.hpp sampling.h Model.h SingleModel.h SingleQModel.h PairedEndModel.h PairedEndQModel.h RefSeq.h RefSeqPolicy.h PolyARules.h Refs.h GroupInfo.h WriteResults.h Gibbs.cpp 
 	$(CC) $(COFLAGS) Gibbs.cpp
 
 Buffer.h : my_assert.h
@@ -123,7 +125,7 @@ rsem-calculate-credibility-intervals : calcCI.o
 	$(CC) -o rsem-calculate-credibility-intervals calcCI.o -lpthread
 
 #some header files are omitted
-calcCI.o : utils.h my_assert.h boost/random.hpp sampling.h Model.h SingleModel.h SingleQModel.h PairedEndModel.h PairedEndQModel.h RefSeq.h RefSeqPolicy.h PolyARules.h Refs.h GroupInfo.h Buffer.h calcCI.cpp
+calcCI.o : utils.h my_assert.h boost/random.hpp sampling.h Model.h SingleModel.h SingleQModel.h PairedEndModel.h PairedEndQModel.h RefSeq.h RefSeqPolicy.h PolyARules.h Refs.h GroupInfo.h WriteResults.h Buffer.h calcCI.cpp
 	$(CC) $(COFLAGS) calcCI.cpp
 
 rsem-get-unique : sam/bam.h sam/sam.h getUnique.cpp sam/libbam.a
diff --git a/rsem-calculate-expression b/rsem-calculate-expression
index b523edd..8a7ca0e 100755
--- a/rsem-calculate-expression
+++ b/rsem-calculate-expression
@@ -51,6 +51,7 @@ my $nThreads = 1;
 my $genBamF = 1;  # default is generating transcript bam file
 my $genGenomeBamF = 0;
 my $sampling = 0;
+my $calcPME = 0;
 my $calcCI = 0;
 my $var_opt = 0; # temporarily, only for internal use
 my $quiet = 0;
@@ -80,6 +81,8 @@ my $inpF = "";
 my ($refName, $sampleName, $sampleToken, $temp_dir, $stat_dir, $imdName, $statName) = ();
 my $gap = 32;
 
+my $alleleS = 0;
+
 GetOptions("keep-intermediate-files" => \$keep_intermediate_files,
 	   "temporary-folder=s" => \$temp_dir,
 	   "no-qualities" => \$no_qual,
@@ -114,6 +117,7 @@ GetOptions("keep-intermediate-files" => \$keep_intermediate_files,
 	   "no-bam-output" => sub { $genBamF = 0; },
 	   "output-genome-bam" => \$genGenomeBamF,
 	   "sampling-for-bam" => \$sampling,
+	   "calc-pme" => \$calcPME,
 	   "var" => \$var_opt,
 	   "calc-ci" => \$calcCI,
 	   "ci-memory=i" => \$NMB,
@@ -181,6 +185,13 @@ else {
     $sampleName = $ARGV[3];
 }
 
+if (((-e "$refName.ta") && !(-e "$refName.gt")) || (!(-e "$refName.ta") && (-e "$refName.gt"))) {
+    print "Allele-specific expression related reference files are corrupted!\n";
+    exit(-1);
+}
+
+$alleleS = (-e "$refName.ta") && (-e "$refName.gt");
+
 if ($genGenomeBamF) {
     open(INPUT, "$refName.ti");
     my $line = <INPUT>; chomp($line);
@@ -329,13 +340,20 @@ if ($genBamF) {
     else { $command .= " 0"; }
     if ($sampling) { $command .= " --sampling"; }
 }
-if ($calcCI || $var_opt) { $command .= " --gibbs-out"; }
+if ($calcPME || $var_opt || $calcCI) { $command .= " --gibbs-out"; }
 if ($quiet) { $command .= " -q"; }
 
 &runCommand($command);
 
-&collectResults("isoform", "$imdName.iso_res", "$sampleName.isoforms.results"); # isoform level
-&collectResults("gene", "$imdName.gene_res", "$sampleName.genes.results"); # gene level
+if ($alleleS) {
+    &collectResults("allele", "$imdName.allele_res", "$sampleName.alleles.results"); # allele level
+    &collectResults("isoform", "$imdName.iso_res", "$sampleName.isoforms.results"); # isoform level
+    &collectResults("gene", "$imdName.gene_res", "$sampleName.genes.results"); # gene level
+} 
+else {
+    &collectResults("isoform", "$imdName.iso_res", "$sampleName.isoforms.results"); # isoform level
+    &collectResults("gene", "$imdName.gene_res", "$sampleName.genes.results"); # gene level
+}
 
 if ($genBamF) {
     $command = $dir."sam/samtools sort -@ $nThreads -m $SortMem $sampleName.transcript.bam $sampleName.transcript.sorted";
@@ -357,7 +375,7 @@ if ($mTime) { $time_end = time(); $time_rsem = $time_end - $time_start; }
 
 if ($mTime) { $time_start = time(); }
 
-if ($calcCI || $var_opt) {
+if ($calcPME || $var_opt || $calcCI ) {
     $command = $dir."rsem-run-gibbs $refName $imdName $statName $BURNIN $NCV $SAMPLEGAP";
     $command .= " -p $nThreads";
     if ($var_opt) { $command .= " --var"; }
@@ -365,21 +383,43 @@ if ($calcCI || $var_opt) {
     &runCommand($command);
 }
 
-if ($calcCI) {
-    system("mv $sampleName.isoforms.results $imdName.isoforms.results.bak1");
-    system("mv $sampleName.genes.results $imdName.genes.results.bak1");
-    &collectResults("isoform", "$imdName.iso_res", "$sampleName.isoforms.results"); # isoform level
-    &collectResults("gene", "$imdName.gene_res", "$sampleName.genes.results"); # gene level
+if ($calcPME || $calcCI) {
+    if ($alleleS) {
+	system("mv $sampleName.alleles.results $imdName.alleles.results.bak1");
+	system("mv $sampleName.isoforms.results $imdName.isoforms.results.bak1");
+	system("mv $sampleName.genes.results $imdName.genes.results.bak1");
+	&collectResults("allele", "$imdName.allele_res", "$sampleName.alleles.results"); # allele level
+	&collectResults("isoform", "$imdName.iso_res", "$sampleName.isoforms.results"); # isoform level
+	&collectResults("gene", "$imdName.gene_res", "$sampleName.genes.results"); # gene level
+    }
+    else {
+	system("mv $sampleName.isoforms.results $imdName.isoforms.results.bak1");
+	system("mv $sampleName.genes.results $imdName.genes.results.bak1");
+	&collectResults("isoform", "$imdName.iso_res", "$sampleName.isoforms.results"); # isoform level
+	&collectResults("gene", "$imdName.gene_res", "$sampleName.genes.results"); # gene level
+    }
+}
 
+if ($calcCI) {
     $command = $dir."rsem-calculate-credibility-intervals $refName $imdName $statName $CONFIDENCE $NCV $NSPC $NMB";
     $command .= " -p $nThreads";
     if ($quiet) { $command .= " -q"; }
     &runCommand($command);
 
-    system("mv $sampleName.isoforms.results $imdName.isoforms.results.bak2");
-    system("mv $sampleName.genes.results $imdName.genes.results.bak2");
-    &collectResults("isoform", "$imdName.iso_res", "$sampleName.isoforms.results"); # isoform level
-    &collectResults("gene", "$imdName.gene_res", "$sampleName.genes.results"); # gene level
+    if ($alleleS) {
+	system("mv $sampleName.alleles.results $imdName.alleles.results.bak2");
+	system("mv $sampleName.isoforms.results $imdName.isoforms.results.bak2");
+	system("mv $sampleName.genes.results $imdName.genes.results.bak2");
+	&collectResults("allele", "$imdName.allele_res", "$sampleName.alleles.results"); # allele level
+	&collectResults("isoform", "$imdName.iso_res", "$sampleName.isoforms.results"); # isoform level
+	&collectResults("gene", "$imdName.gene_res", "$sampleName.genes.results"); # gene level
+    }
+    else {
+	system("mv $sampleName.isoforms.results $imdName.isoforms.results.bak2");
+	system("mv $sampleName.genes.results $imdName.genes.results.bak2");
+	&collectResults("isoform", "$imdName.iso_res", "$sampleName.isoforms.results"); # isoform level
+	&collectResults("gene", "$imdName.gene_res", "$sampleName.genes.results"); # gene level
+    }
 }
 
 if ($mTime) { $time_end = time(); $time_ci = $time_end - $time_start; }
@@ -397,7 +437,7 @@ if ($mTime) {
     print OUTPUT "Aligning reads: $time_alignment s.\n";
     print OUTPUT "Estimating expression levels: $time_rsem s.\n";
     print OUTPUT "Calculating credibility intervals: $time_ci s.\n";
-    my $time_del = $time_end - $time_start;
+#    my $time_del = $time_end - $time_start;
 #    print OUTPUT "Delete: $time_del s.\n";
     close(OUTPUT);
 }
@@ -484,9 +524,13 @@ Generate a BAM file, 'sample_name.genome.bam', with alignments mapped to genomic
 
 When RSEM generates a BAM file, instead of outputing all alignments a read has with their posterior probabilities, one alignment is sampled according to the posterior probabilities. The sampling procedure includes the alignment to the "noise" transcript, which does not appear in the BAM file. Only the sampled alignment has a weight of 1. All other alignments have weight 0. If the "noise" transcript is sampled, all alignments appeared in the BAM file should have weight 0. (Default: off)
 
+=item B<--calc-pme>
+
+Run RSEM's collapsed Gibbs sampler to calculate posterior mean estimates. (Default: off) 
+
 =item B<--calc-ci>
 
-Calculate 95% credibility intervals and posterior mean estimates.  (Default: off)
+Calculate 95% credibility intervals and posterior mean estimates. (Default: off)
 
 =item B<--seed-length> <int>
 
@@ -644,8 +688,9 @@ each line in the rest of this file is:
 
 transcript_id gene_id length effective_length expected_count TPM FPKM IsoPct [pme_expected_count pme_TPM pme_FPKM IsoPct_from_pme_TPM TPM_ci_lower_bound TPM_ci_upper_bound FPKM_ci_lower_bound FPKM_ci_upper_bound]
 
-Fields are separated by the tab character. Fields within "[]" are only
-presented if '--calc-ci' is set.
+Fields are separated by the tab character. Fields within "[]" are
+optional. They will not be presented if neither '--calc-pme' nor
+'--calc-ci' is set.
 
 'transcript_id' is the transcript name of this transcript. 'gene_id'
 is the gene name of the gene which this transcript belongs to (denote
@@ -704,8 +749,9 @@ each line in the rest of this file is:
 
 gene_id transcript_id(s) length effective_length expected_count TPM FPKM [pme_expected_count pme_TPM pme_FPKM TPM_ci_lower_bound TPM_ci_upper_bound FPKM_ci_lower_bound FPKM_ci_upper_bound]
 
-Fields are separated by the tab character. Fields within "[]" are only
-presented if '--calc-ci' is set. 
+Fields are separated by the tab character. Fields within "[]" are
+optional. They will not be presented if neither '--calc-pme' nor
+'--calc-ci' is set.
 
 'transcript_id(s)' is a comma-separated list of transcript_ids
 belonging to this gene. If no gene information is provided, 'gene_id'
@@ -716,6 +762,43 @@ defined as the weighted average of its transcripts' lengths and
 effective lengths (weighted by 'IsoPct'). A gene's abundance estimates
 are just the sum of its transcripts' abundance estimates.
 
+=item B<sample_name.alleles.results>
+
+Only generated when the RSEM references are built with allele-specific
+transcripts.
+
+This file contains allele level expression estimates for
+allele-specific expression calculation. The first line
+contains column names separated by the tab character. The format of
+each line in the rest of this file is:
+
+allele_id transcript_id gene_id length effective_length expected_count TPM FPKM AlleleIsoPct AlleleGenePct [pme_expected_count pme_TPM pme_FPKM AlleleIsoPct_from_pme_TPM AlleleGenePct_from_pme_TPM TPM_ci_lower_bound TPM_ci_upper_bound FPKM_ci_lower_bound FPKM_ci_upper_bound]
+
+Fields are separated by the tab character. Fields within "[]" are
+optional. They will not be presented if neither '--calc-pme' nor
+'--calc-ci' is set.
+
+'allele_id' is the allele-specific name of this allele-specific transcript.
+
+'AlleleIsoPct' stands for allele-specific percentage on isoform
+level. It is the percentage of this allele-specific transcript's
+abundance over its parent transcript's abundance. If its parent
+transcript has only one allele variant form, this field will be set to
+100.
+
+'AlleleGenePct' stands for allele-specific percentage on gene
+level. It is the percentage of this allele-specific transcript's
+abundance over its parent gene's abundance.
+
+'AlleleIsoPct_from_pme_TPM' and 'AlleleGenePct_from_pme_TPM' have
+similar meanings. They are calculated based on posterior mean
+estimates.
+
+Please note that if this file is present, the fields 'length' and
+'effective_length' in 'sample_name.isoforms.results' should be
+interpreted similarly as the corresponding definitions in
+'sample_name.genes.results'.
+
 =item B<sample_name.transcript.bam, sample_name.transcript.sorted.bam and sample_name.transcript.sorted.bam.bai>
 
 Only generated when --no-bam-output is not specified.
diff --git a/rsem-gen-transcript-plots b/rsem-gen-transcript-plots
index 4ea1d87..5b8cc98 100755
--- a/rsem-gen-transcript-plots
+++ b/rsem-gen-transcript-plots
@@ -2,83 +2,89 @@
 
 nrow_per_page <- 3 # if input_list is composed of transcript ids
 ncol_per_page <- 2 # if input_list is composed of transcript ids
-num_plots_per_page <- nrow_per_page * ncol_per_page # if input_list is composed of transcript ids
-
+num_plots_per_page <- nrow_per_page * ncol_per_page # if input_list is composed of transcript/allele ids
 
 exit_with_error <- function(errmsg) {
   cat(errmsg, "\n", sep = "", file = stderr())
   quit(save = "no", status = 1)
 }
 
-
 args <- commandArgs(TRUE)
-if (length(args) != 5) 
-  exit_with_error("Usage: rsem-gen-transcript-plots sample_name input_list is_gene show_uniq output_plot_file")
+if (length(args) != 6) 
+  exit_with_error("Usage: rsem-gen-transcript-plots sample_name input_list is_allele_specific id_type<0,allele;1,isoform;2,gene> show_uniq output_plot_file")
 
 sample_name <- args[1]
 input_list <- args[2]
-is_gene <- as.numeric(args[3])
-show_uniq <- as.numeric(args[4])
-output_plot_file <- args[5]
+alleleS <- as.numeric(args[3])
+id_type <- as.numeric(args[4])
+show_uniq <- as.numeric(args[5])
+output_plot_file <- args[6]
 
+is_composite <- (!alleleS && (id_type == 2)) || (alleleS && (id_type > 0))
+ 
 load_readdepth_file <- function(filename) {
   data <- read.table(file = filename, sep = "\t", stringsAsFactors = FALSE)
   readdepth <- split(data[, 2:3], data[, 1])
 }
 
-build_t2gmap <- function(filename) {
-  tpos <- 1   # the position of transcript_id
-  gpos <- 2   # the position of gene_id
+build_map <- function(filename) {
+  value_pos <- 1
+  if (!alleleS || (alleleS && id_type == 1)) {
+    key_pos <- 2
+  } else {
+    key_pos <- 3
+  }
 
   data <- read.delim(file = filename, sep = "\t", stringsAsFactors = FALSE)
-  tmp <- aggregate(data[tpos], data[gpos], function(x) x)
-  t2gmap <- tmp[,2]
-  names(t2gmap) <- tmp[,1]
+  tmp <- aggregate(data[value_pos], data[key_pos], function(x) x)
+  map <- tmp[,2]
+  names(map) <- tmp[,1]
 
-  t2gmap
+  map
 }
 
-make_a_plot <- function(tid) {
-  vec <- readdepth[[tid]]	
-  if (is.null(vec)) exit_with_error(paste("Unknown transcript detected,", tid, "is not included in RSEM's indices."))
+make_a_plot <- function(id) {
+  vec <- readdepth[[id]]	
+  if (is.null(vec)) exit_with_error(sprintf("Unknown %s detected, %s is not included in RSEM's indices.", ifelse(alleleS, "allele-specific transcript", "transcript"), id))
   if (is.na(vec[[2]])) wiggle <- rep(0, vec[[1]]) else wiggle <- as.numeric(unlist(strsplit(vec[[2]], split = " ")))
   len <- length(wiggle)
   if (!show_uniq) {
     plot(wiggle, type = "h")
   } else {
-    vec <- readdepth_uniq[[tid]]
+    vec <- readdepth_uniq[[id]]
     stopifnot(!is.null(vec))
     if (is.na(vec[[2]])) wiggle_uniq <- rep(0, vec[[1]]) else wiggle_uniq <- as.numeric(unlist(strsplit(vec[[2]], split = " ")))
     stopifnot(len == length(wiggle_uniq))
     if (len != sum(wiggle >= wiggle_uniq)) {
-      cat("Warning: transcript ", tid, " has position(s) that read covarege with multireads is smaller than read covarge without multireads.\n", "         The 1-based position(s) is(are) : ", which(wiggle < wiggle_uniq), ".\n", "         This may be due to floating point arithmetics.\n", sep = "") 
+      cat("Warning: ", ifelse(alleleS, "allele-specific transcript", "transcript"), " ", id, " has position(s) that read covarege with multireads is smaller than read covarge without multireads.\n", "         The 1-based position(s) is(are) : ", which(wiggle < wiggle_uniq), ".\n", "         This may be due to floating point arithmetics.\n", sep = "") 
     }
     heights <- rbind(wiggle_uniq, wiggle - wiggle_uniq)	
     barplot(heights, space = 0, border = NA, names.arg = 1:len, col = c("black", "red")) 
   }
-  title(main = tid) #, xlab = "Position in transcript", ylab = "Read depth")
+  title(main = id)
 }
 
-generate_a_page <- function(tids, gene_id = NULL) {
-  n <- length(tids)
-  ncol <- ifelse(is_gene, floor(sqrt(n)), ncol_per_page)
-  nrow <- ifelse(is_gene, ceiling(n / ncol), nrow_per_page)
+generate_a_page <- function(ids, title = NULL) {
+  n <- length(ids)
+  ncol <- ifelse(is_composite, floor(sqrt(n)), ncol_per_page)
+  nrow <- ifelse(is_composite, ceiling(n / ncol), nrow_per_page)
 
   par(mfrow = c(nrow, ncol), mar = c(2, 2, 2, 2))
-  if (is_gene) par(oma = c(0, 0, 3, 0)) 
-  sapply(tids, make_a_plot)
-  if (is_gene) mtext(gene_id, outer = TRUE, line = 1)
+  if (is_composite) par(oma = c(0, 0, 3, 0)) 
+  sapply(ids, make_a_plot)
+  if (is_composite) mtext(title, outer = TRUE, line = 1)
 }
 
-plot_a_transcript <- function(i) {
+plot_individual <- function(i) {
   fr <- (i - 1) * num_plots_per_page + 1
   to <- min(i * num_plots_per_page, n)
   generate_a_page(ids[fr:to])
 }
 
-plot_a_gene <- function(gene_id) {
-  if (is.null(t2gmap[[gene_id]])) exit_with_error(paste("Unknown gene detected,", gene_id, "is not included in RSEM's in indices."))
-  generate_a_page(t2gmap[[gene_id]], gene_id)
+# cid, composite id, can be either a gene id or transcript id (for allele-specific expression only)
+plot_composite <- function(cid) {
+  if (is.null(map[[cid]])) exit_with_error(sprintf("Unknown %s detected, %s is not included in RSEM's indices.", ifelse(alleleS && id_type == 1, "transcript", "gene"), cid))
+  generate_a_page(map[[cid]], cid)
 }
 
 readdepth <- load_readdepth_file(paste(sample_name, ".transcript.readdepth", sep = ""))
@@ -91,24 +97,22 @@ ids <- scan(file = input_list, what = "", sep = "\n")
 
 cat("Loading files is done!\n")
 
-if (is_gene) {
-  t2gmap <- build_t2gmap(paste(sample_name, ".isoforms.results", sep = ""))
+if (is_composite) {
+  file_name <- sprintf("%s.%s.results", sample_name, ifelse(alleleS, "alleles", "isoforms"))
+  map <- build_map(file_name)
   cat("Building transcript to gene map is done!\n")
 }
 
 pdf(output_plot_file)
 
-if (!is_gene) {	
+if (!is_composite) {	
   n <- length(ids)
   ub <- (n - 1) %/% num_plots_per_page + 1
-  tmp <- sapply(1:ub, plot_a_transcript)
+  dumbvar <- sapply(1:ub, plot_individual)
 } else {
-  tmp <- sapply(ids, plot_a_gene)
+  dumbvar <- sapply(ids, plot_composite)
 }
 
 cat("Plots are generated!\n")
 
 dev.off.output <- dev.off()
-
-
-
diff --git a/rsem-plot-model b/rsem-plot-model
index 225d08f..e87226d 100755
--- a/rsem-plot-model
+++ b/rsem-plot-model
@@ -146,4 +146,4 @@ barplot(pair[,2], names.arg = pair[,1],
         ylab = "Number of fragments",
         main = "Histogram of Alignments per Fragment")
 
-dev.off()
+dev.off.output <- dev.off()
diff --git a/rsem-plot-transcript-wiggles b/rsem-plot-transcript-wiggles
index efe00ba..74076ec 100755
--- a/rsem-plot-transcript-wiggles
+++ b/rsem-plot-transcript-wiggles
@@ -8,17 +8,25 @@ use strict;
 
 use rsem_perl_utils;
 
-my $gene_list = 0; # default is 0, means input is a transcript list; 1 means input is a gene list
+my $gene_list = 0; # default is 0, means input is not a gene list
+my $transcript_list = 0; # default is 0, this option can only be turned on if allele-specific expression is calculated
 my $show_unique = 0; # 0, default value, means do not show unique transcript wiggles; 1 means show unique transcript wiggles
 my $help = 0;
 
 GetOptions("gene-list" => \$gene_list,
+	   "transcript-list" => \$transcript_list,
 	   "show-unique" => \$show_unique,
 	   "h|help" => \$help) or pod2usage(-exitval => 2, -verbose => 2);
 
 pod2usage(-verbose => 2) if ($help == 1);
 pod2usage(-msg => "Invalid number of arguments!", -exitval => 2, -verbose => 2) if (scalar(@ARGV) != 3);
 
+my $alleleS = 0;
+if (-e "$ARGV[0].alleles.results") { $alleleS = 1; }
+
+pod2usage(-msg => "--transcript-list cannot be set if allele-specific reference is not built!", -exitval => 2, -verbose => 2) if (!$alleleS && $transcript_list);
+pod2usage(-msg => "--gene-list and --transcript-list cannot be set at the same time!", -exitval => 2, -verbose => 2) if ($gene_list && $transcript_list);
+
 my $dir = "$FindBin::Bin/";
 my $command = "";
 
@@ -46,7 +54,19 @@ if ($show_unique) {
     }
 }
 
-$command = $dir."rsem-gen-transcript-plots $ARGV[0] $ARGV[1] $gene_list $show_unique $ARGV[2]";
+my $id_type;
+
+if ($alleleS) {
+    $id_type = 0;
+    if ($transcript_list) { $id_type = 1; }
+    if ($gene_list) { $id_type = 2; }
+} 
+else {
+    $id_type = 1;
+    if ($gene_list) { $id_type = 2; }
+}
+
+$command = $dir."rsem-gen-transcript-plots $ARGV[0] $ARGV[1] $alleleS $id_type $show_unique $ARGV[2]";
 &runCommand($command);
 
 __END__
@@ -85,6 +105,10 @@ The file name of the pdf file which contains all plots.
 
 The input-list is a list of gene ids. (Default: off)
 
+=item B<--transcript-list>
+
+The input-list is a list of transcript ids. This option can only be turned on if allele-specific expression is calculated. (Default: off)
+
 =item B<--show-unique>
 
 Show the wiggle plots as stacked bar plots. See description section for details. (Default: off)
@@ -97,7 +121,7 @@ Show help information.
 
 =head1 DESCRIPTION
 
-This program generates transcript wiggle plots and outputs them in a pdf file. This program can accept either a list of transcript ids or gene ids (if transcript to gene mapping information is provided) and has two modes of showing wiggle plots. If '--show-unique' is not specified, the wiggle plot for each transcript is a histogram where each position has the expected read depth at this position as its height. If '--show-unique' is specified, for each transcript a stacked bar plot is generated. For each position, the read depth of unique reads, which have only one alignment, is showed in black. The read depth of multi-reads, which align to more than one places, is showed in red on top of the read depth of unique reads.This program will use some files RSEM generated previouslly. So please do not delete/move any file 'rsem-calculate-expression' generated.
+This program generates transcript wiggle plots and outputs them in a pdf file. This program can accept either a list of transcript ids or gene ids (if transcript to gene mapping information is provided) and has two modes of showing wiggle plots. If '--show-unique' is not specified, the wiggle plot for each transcript is a histogram where each position has the expected read depth at this position as its height. If '--show-unique' is specified, for each transcript a stacked bar plot is generated. For each position, the read depth of unique reads, which have only one alignment, is showed in black. The read depth of multi-reads, which align to more than one places, is showed in red on top of the read depth of unique reads.This program will use some files RSEM generated previouslly. So please do not delete/move any file 'rsem-calculate-expression' generated. If allele-specific expression is calculated, the basic unit for plotting is an allele-specific transcript and plots can be grouped by either transcript ids (--transcript-list) or gene ids (--gene-list). 
 
 =head1 OUTPUT
 
@@ -105,7 +129,7 @@ This program generates transcript wiggle plots and outputs them in a pdf file. T
 
 =item B<output_plot_file>
 
-This is a pdf file containing all plots generated. If a list of transcript ids is provided, each page display at most 6 plots in 3 rows and 2 columns. If gene ids are provided, each page display a gene. The gene's id is showed at the top and all its transcripts' wiggle plots are showed in this page. The arrangment of plots is determined automatically. For each transcript wiggle plot, the transcript id is displayed as title. x-axis is position in the transcript and y-axis is read depth. 
+This is a pdf file containing all plots generated. If a list of transcript ids is provided, each page display at most 6 plots in 3 rows and 2 columns. If gene ids are provided, each page display a gene. The gene's id is showed at the top and all its transcripts' wiggle plots are showed in this page. The arrangment of plots is determined automatically. For each transcript wiggle plot, the transcript id is displayed as title. x-axis is position in the transcript and y-axis is read depth. If allele-specific expression is calculated, the basin unit becomes an allele-specific transcript and transcript ids and gene ids can be used to group allele-specific transcripts.
 
 =item B<sample_name.transcript.sorted.bam and sample_name.transcript.readdepth>
 
diff --git a/rsem-prepare-reference b/rsem-prepare-reference
index 81fd13e..de4f061 100755
--- a/rsem-prepare-reference
+++ b/rsem-prepare-reference
@@ -24,8 +24,11 @@ my $bowtie2_path = "";
 my $quiet = 0;
 my $help = 0;
 
+my $alleleMappingF = "";
+
 GetOptions("gtf=s" => \$gtfF,
 	   "transcript-to-gene-map=s" => \$mappingF,
+	   "allele-to-gene-map=s" => \$alleleMappingF,
 	   "no-polyA" => \$no_polyA, 
 	   "no-polyA-subset=s" => \$subsetFile,
 	   "polyA-length=i" => \$polyALen,
@@ -39,6 +42,8 @@ GetOptions("gtf=s" => \$gtfF,
 
 pod2usage(-verbose => 2) if ($help == 1);
 pod2usage(-msg => "Set --no-polyA & --no-polyA-subset at the same time!", -exitval => 2, -verbose => 2) if ($no_polyA == 1 && $subsetFile ne '');
+pod2usage(-msg => "--transcript-to-gene-map and --allele-to-gene-map are mutually exclusive!", -exitval => 2, -verbose => 2) if (($mappingF ne "") && ($alleleMappingF ne ""));
+pod2usage(-msg => "--gtf and --allele-to-gene-map are mutually exclusive!", -exitval => 2, -verbose => 2) if (($gtfF ne "") && ($alleleMappingF ne ""));
 pod2usage(-msg => "Invalid number of arguments!", -exitval => 2, -verbose => 2) if (scalar(@ARGV) != 2);
 
 if ($bowtie2) { $no_bowtie = 1; $no_ntog = 1; }
@@ -85,6 +90,7 @@ else {
     $"=" ";
     $command = $dir."rsem-synthesis-reference-transcripts $ARGV[1] $quiet";
     if ($mappingF ne "") { $command .= " 1 $mappingF"; }
+    elsif ($alleleMappingF ne "") { $command .= " 2 $alleleMappingF"; }
     else { $command .= " 0"; } 
     $command .= " @list";
     &runCommand($command);
@@ -164,6 +170,19 @@ If this option is off, then the mapping of isoforms to genes depends on whether
 
 (Default: off)
 
+=item B<--allele-to-gene-map> <file>
+
+Use information from <file> to provide gene_id and transcript_id information for each allele-specific transcript.
+Each line of <file> should be of the form:
+
+gene_id transcript_id allele_id
+
+with the fields separated by a tab character.
+
+This option is designed for quantifying allele-specific expression. It is only valid if '--gtf' option is not specified. allele_id should be the sequence names presented in the FASTA-formatted files.  
+
+(Default: off) 
+
 =item B<--no-polyA>
 
 Do not add poly(A) tails to the end of reference isoforms. (Default: adding poly(A) tails to all transcripts)
diff --git a/rsem_perl_utils.pm b/rsem_perl_utils.pm
index b2a040f..4321a45 100644
--- a/rsem_perl_utils.pm
+++ b/rsem_perl_utils.pm
@@ -31,12 +31,13 @@ sub runCommand {
     print "\n";
 }
 
+my @allele_title = ("allele_id", "transcript_id", "gene_id", "length", "effective_length", "expected_count", "TPM", "FPKM", "AlleleIsoPct", "AlleleGenePct", "pme_expected_count", "pme_TPM", "pme_FPKM", "AlleleIsoPct_from_pme_TPM", "AlleleGenePct_from_pme_TPM", "TPM_ci_lower_bound", "TPM_ci_upper_bound", "FPKM_ci_lower_bound", "FPKM_ci_upper_bound");
 
 my @transcript_title = ("transcript_id", "gene_id", "length", "effective_length", "expected_count", "TPM", "FPKM", "IsoPct", "pme_expected_count", "pme_TPM", "pme_FPKM", "IsoPct_from_pme_TPM", "TPM_ci_lower_bound", "TPM_ci_upper_bound", "FPKM_ci_lower_bound", "FPKM_ci_upper_bound");
 
 my @gene_title = ("gene_id", "transcript_id(s)", "length", "effective_length", "expected_count", "TPM", "FPKM", "pme_expected_count", "pme_TPM", "pme_FPKM", "TPM_ci_lower_bound", "TPM_ci_upper_bound", "FPKM_ci_lower_bound", "FPKM_ci_upper_bound");
 
-# inpF, outF
+# type, inpF, outF
 sub collectResults {
     my $local_status;
     my ($inpF, $outF);
@@ -69,7 +70,8 @@ sub collectResults {
 
     my @out_arr = ();
     for (my $i = 0; $i < $n; $i++) {
-	if ($_[0] eq "isoform") { push(@out_arr, $transcript_title[$i]); }
+	if ($_[0] eq "allele") { push(@out_arr, $allele_title[$i]); }
+	elsif ($_[0] eq "isoform") { push(@out_arr, $transcript_title[$i]); }
 	elsif ($_[0] eq "gene") { push(@out_arr, $gene_title[$i]); }
 	else { print "A bug on 'collectResults' is detected!\n"; exit(-1); }
     }
diff --git a/simulation.cpp b/simulation.cpp
index 0073c3e..3eb4242 100644
--- a/simulation.cpp
+++ b/simulation.cpp
@@ -23,21 +23,22 @@
 #include "PairedEndQModel.h"
 
 #include "Refs.h"
-#include "GroupInfo.h"
 #include "Transcript.h"
 #include "Transcripts.h"
 
+#include "WriteResults.h"
+
 #include "simul.h"
 
 using namespace std;
 
-const int OFFSITE = 5;
+bool alleleS;
+int OFFSITE;
 
 READ_INT_TYPE N;
-int model_type, M, m;
+int model_type, M;
 
 Refs refs;
-GroupInfo gi;
 Transcripts transcripts;
 
 vector<double> eel;
@@ -47,8 +48,8 @@ int n_os;
 ostream *os[2];
 char outReadF[2][STRLEN];
 
-char refF[STRLEN], groupF[STRLEN], tiF[STRLEN];
-char geneResF[STRLEN], isoResF[STRLEN];
+char refName[STRLEN];
+char refF[STRLEN], tiF[STRLEN];
 
 simul sampler;
 
@@ -78,37 +79,6 @@ void genOutReadStreams(int type, char *outFN) {
 		os[i] = new ofstream(outReadF[i]);
 }
 
-template<class ModelType>
-void calcExpectedEffectiveLengths(ModelType& model) {
-	int lb, ub, span;
-	double *pdf = NULL, *cdf = NULL, *clen = NULL; // clen[i] = sigma_{j=1}^{i}pdf[i]*(lb+i)
-  
-	model.getGLD().copyTo(pdf, cdf, lb, ub, span);
-	clen = new double[span + 1];
-	clen[0] = 0.0;
-	for (int i = 1; i <= span; i++) {
-		clen[i] = clen[i - 1] + pdf[i] * (lb + i);
-	}
-
-	eel.assign(M + 1, 0.0);
-	for (int i = 1; i <= M; i++) {
-		int totLen = refs.getRef(i).getTotLen();
-		int fullLen = refs.getRef(i).getFullLen();
-		int pos1 = max(min(totLen - fullLen + 1, ub) - lb, 0);
-		int pos2 = max(min(totLen, ub) - lb, 0);
-
-		if (pos2 == 0) { eel[i] = 0.0; continue; }
-    
-		eel[i] = fullLen * cdf[pos1] + ((cdf[pos2] - cdf[pos1]) * (totLen + 1) - (clen[pos2] - clen[pos1]));
-		assert(eel[i] >= 0);
-		if (eel[i] < MINEEL) { eel[i] = 0.0; }
-	}
-  
-	delete[] pdf;
-	delete[] cdf;
-	delete[] clen;
-}
-
 template<class ReadType, class ModelType>
 void simulate(char* modelF, char* resultsF) {
 	ModelType model(&refs);
@@ -118,7 +88,7 @@ void simulate(char* modelF, char* resultsF) {
 	model.read(modelF);
 	
 	//calculate eel
-	calcExpectedEffectiveLengths<ModelType>(model);
+	calcExpectedEffectiveLengths<ModelType>(M, refs, model, eel);
 
 	//generate theta vector
 	ifstream fin(resultsF);
@@ -155,99 +125,6 @@ void simulate(char* modelF, char* resultsF) {
 	cout<< "Total number of resimulation is "<< resimulation_count<< endl;
 }
 
-void calcExpressionValues(const vector<double>& theta, const vector<double>& eel, vector<double>& tpm, vector<double>& fpkm) {
-	double denom;
-	vector<double> frac;
-
-	//calculate fraction of count over all mappabile reads
-	denom = 0.0;
-	frac.assign(M + 1, 0.0);
-	for (int i = 1; i <= M; i++)
-	  if (eel[i] >= EPSILON) {
-	    frac[i] = theta[i];
-	    denom += frac[i];
-	  }
-	general_assert(denom > 0, "No alignable reads?!");
-	for (int i = 1; i <= M; i++) frac[i] /= denom;
-
-	//calculate FPKM
-	fpkm.assign(M + 1, 0.0);
-	for (int i = 1; i <= M; i++)
-		if (eel[i] >= EPSILON) fpkm[i] = frac[i] * 1e9 / eel[i];
-
-	//calculate TPM
-	tpm.assign(M + 1, 0.0);
-	denom = 0.0;
-	for (int i = 1; i <= M; i++) denom += fpkm[i];
-	for (int i = 1; i <= M; i++) tpm[i] = fpkm[i] / denom * 1e6;
-}
-
-void writeResFiles(char* outFN) {
-	FILE *fo;
-	vector<int> tlens;
-	vector<double> fpkm, tpm, isopct;
-	vector<double> glens, gene_eels, gene_counts, gene_tpm, gene_fpkm;
-
-	for (int i = 1; i <= M; i++)
-		general_assert(eel[i] > EPSILON || counts[i] <= EPSILON, "An isoform whose effecitve length < " + ftos(MINEEL, 6) + " got sampled!");
-
-	calcExpressionValues(counts, eel, tpm, fpkm);
-
-	//calculate IsoPct, etc.
-	isopct.assign(M + 1, 0.0);
-	tlens.assign(M + 1, 0);
-
-	glens.assign(m, 0.0); gene_eels.assign(m, 0.0);
-	gene_counts.assign(m, 0.0); gene_tpm.assign(m, 0.0); gene_fpkm.assign(m, 0.0);
-
-	for (int i = 0; i < m; i++) {
-		int b = gi.spAt(i), e = gi.spAt(i + 1);
-		for (int j = b; j < e; j++) {
-			const Transcript& transcript = transcripts.getTranscriptAt(j);
-			tlens[j] = transcript.getLength();
-
-			glens[i] += tlens[j] * tpm[j];
-			gene_eels[i] += eel[j] * tpm[j];
-			gene_counts[i] += counts[j];
-			gene_tpm[i] += tpm[j];
-			gene_fpkm[i] += fpkm[j];
-		}
-
-		if (gene_tpm[i] < EPSILON) continue;
-
-		for (int j = b; j < e; j++)
-			isopct[j] = tpm[j] / gene_tpm[i];
-		glens[i] /= gene_tpm[i];
-		gene_eels[i] /= gene_tpm[i];
-	}
-
-	//isoform level
-	sprintf(isoResF, "%s.sim.isoforms.results", outFN);
-	fo = fopen(isoResF, "w");
-	fprintf(fo, "transcript_id\tgene_id\tlength\teffective_length\tcount\tTPM\tFPKM\tIsoPct\n");
-	for (int i = 1; i <= M; i++) {
-		const Transcript& transcript = transcripts.getTranscriptAt(i);
-		fprintf(fo, "%s\t%s\t%d\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\n", transcript.getTranscriptID().c_str(), transcript.getGeneID().c_str(), tlens[i],
-				eel[i], counts[i], tpm[i], fpkm[i], isopct[i] * 1e2);
-	}
-	fclose(fo);
-
-	//gene level
-	sprintf(geneResF, "%s.sim.genes.results", outFN);
-	fo = fopen(geneResF, "w");
-	fprintf(fo, "gene_id\ttranscript_id(s)\tlength\teffective_length\tcount\tTPM\tFPKM\n");
-	for (int i = 0; i < m; i++) {
-		int b = gi.spAt(i), e = gi.spAt(i + 1);
-		const string& gene_id = transcripts.getTranscriptAt(b).getGeneID();
-		fprintf(fo, "%s\t", gene_id.c_str());
-		for (int j = b; j < e; j++) {
-			fprintf(fo, "%s%c", transcripts.getTranscriptAt(j).getTranscriptID().c_str(), (j < e - 1 ? ',' : '\t'));
-		}
-		fprintf(fo, "%.2f\t%.2f\t%.2f\t%.2f\t%.2f\n", glens[i], gene_eels[i], gene_counts[i], gene_tpm[i], gene_fpkm[i]);
-	}
-	fclose(fo);
-}
-
 void releaseOutReadStreams() {
 	for (int i = 0; i < n_os; i++) {
 		((ofstream*)os[i])->close();
@@ -264,7 +141,7 @@ int main(int argc, char* argv[]) {
 		printf("Parameters:\n\n");
 		printf("reference_name: The name of RSEM references, which should be already generated by 'rsem-prepare-reference'\n");
 		printf("estimated_model_file: This file describes how the RNA-Seq reads will be sequenced given the expression levels. It determines what kind of reads will be simulated (single-end/paired-end, w/o quality score) and includes parameters for fragment length distribution, read start position distribution, sequencing error models, etc. Normally, this file should be learned from real data using 'rsem-calculate-expression'. The file can be found under the 'sample_name.stat' folder with the name of 'sample_name.model'\n");
-		printf("estimated_isoform_results: This file contains expression levels for all isoforms recorded in the reference. It can be learned using 'rsem-calculate-expression' from real data. The corresponding file users want to use is 'sample_name.isoforms.results'. If simulating from user-designed expression profile is desired, start from a learned 'sample_name.isoforms.results' file and only modify the 'TPM' column. The simulator only reads the TPM column. But keeping the file format the same is required.\n");
+		printf("estimated_isoform_results: This file contains expression levels for all isoforms recorded in the reference. It can be learned using 'rsem-calculate-expression' from real data. The corresponding file users want to use is 'sample_name.isoforms.results'. If simulating from user-designed expression profile is desired, start from a learned 'sample_name.isoforms.results' file and only modify the 'TPM' column. The simulator only reads the TPM column. But keeping the file format the same is required. If the RSEM references built are aware of allele-specific transcripts, 'sample_name.alleles.results' should be used instead.\n");
 		printf("theta0: This parameter determines the fraction of reads that are coming from background \"noise\" (instead of from a transcript). It can also be estimated using 'rsem-calculate-expression' from real data. Users can find it as the first value of the third line of the file 'sample_name.stat/sample_name.theta'.\n");
 		printf("N: The total number of reads to be simulated. If 'rsem-calculate-expression' is executed on a real data set, the total number of reads can be found as the 4th number of the first line of the file 'sample_name.stat/sample_name.cnt'.\n");
 		printf("output_name: Prefix for all output files.\n");
@@ -289,13 +166,14 @@ int main(int argc, char* argv[]) {
 	if (argc == 8 && !strcmp(argv[7], "-q")) quiet = true;
 	verbose = !quiet;
 
+	strcpy(refName, argv[1]);
+	alleleS = isAlleleSpecific(refName);
+	OFFSITE = (alleleS ? 6: 5);
+
 	//load basic files
 	sprintf(refF, "%s.seq", argv[1]);
 	refs.loadRefs(refF);
 	M = refs.getM();
-	sprintf(groupF, "%s.grp", argv[1]);
-	gi.load(groupF);
-	m = gi.getm();
 	sprintf(tiF, "%s.ti", argv[1]);
 	transcripts.readFrom(tiF);
 
@@ -320,7 +198,7 @@ int main(int argc, char* argv[]) {
 	case 3: simulate<PairedEndReadQ, PairedEndQModel>(argv[2], argv[3]); break;
 	}
 
-	writeResFiles(argv[6]);
+	writeResultsSimulation(M, refName, argv[6], transcripts, eel, counts);
 	releaseOutReadStreams();
 
 	return 0;
diff --git a/synthesisRef.cpp b/synthesisRef.cpp
index aa0d473..bde2d5b 100644
--- a/synthesisRef.cpp
+++ b/synthesisRef.cpp
@@ -5,8 +5,10 @@
 #include<fstream>
 #include<sstream>
 #include<map>
+#include<vector>
 
 #include "utils.h"
+#include "my_assert.h"
 #include "Transcript.h"
 #include "Transcripts.h"
 
@@ -19,29 +21,47 @@ map<string, string>::iterator iter;
 
 Transcripts transcripts(1); // no genome, just transcript set
 char groupF[STRLEN], tiF[STRLEN], refFastaF[STRLEN], chromListF[STRLEN];
+char gtF[STRLEN], taF[STRLEN]; // group info between gene and transcript, transcript and allele
 
-bool hasMappingFile;
+int hasMappingFile;
 char mappingFile[STRLEN];
 
 map<string, string> mi_table; // mapping info table
 map<string, string>::iterator mi_iter; //mapping info table's iterator
 
-void loadMappingInfo(char* mappingF) {
-  ifstream fin(mappingF);
-  string line, key, value;
-
-  if (!fin.is_open()) {
-	  fprintf(stderr, "Cannot open %s! It may not exist.\n", mappingF);
-	  exit(-1);
-  }
+map<string, string> mi_table2; // allele_id to transcript_id
+map<string, string>::iterator mi_iter2; // corresponding iterator
 
-  mi_table.clear();
-  while (getline(fin, line)) {
-    line = cleanStr(line);
-    if (line[0] == '#') continue;
-    istringstream strin(line);
-    strin>>value>>key;
-    mi_table[key] = value;
+void loadMappingInfo(int file_type, char* mappingF) {
+  ifstream fin(mappingF);
+  string line, key, value, value2;
+
+  general_assert(fin.is_open(), "Cannot open " + cstrtos(mappingF) + "! It may not exist.");
+
+  switch(file_type) {
+  case 1: 
+    mi_table.clear();
+    while (getline(fin, line)) {
+      line = cleanStr(line);
+      if (line[0] == '#') continue;
+      istringstream strin(line);
+      strin>>value>>key;
+      mi_table[key] = value;
+    }
+    break;
+  case 2: 
+    mi_table.clear();
+    mi_table2.clear();
+    while (getline(fin, line)) {
+      line = cleanStr(line);
+      if (line[0] == '#') continue;
+      istringstream strin(line);
+      strin>> value>> value2>> key;
+      mi_table[key] = value;
+      mi_table2[key] = value2;
+    }
+    break;
+  default: assert(false);
   }
 
   fin.close();
@@ -55,40 +75,58 @@ char check(char c) {
 	return c;
 }
 
-void writeResults(char* refName) {
+void writeResults(int option, char* refName) {
 	ofstream fout, fout2;
-	string cur_gene_id, name;
+	string cur_gene_id, cur_transcript_id, name;
+	vector<int> gi, gt, ta;
 
-	sprintf(groupF, "%s.grp", refName);
 	sprintf(tiF, "%s.ti", refName);
-	sprintf(refFastaF, "%s.transcripts.fa", refName);
-	sprintf(chromListF, "%s.chrlist", refName);
-
 	transcripts.writeTo(tiF);
 	if (verbose) { printf("Transcript Information File is generated!\n"); }
 
-	fout.open(groupF);
-	cur_gene_id = "";
+	cur_gene_id = ""; gi.clear(); 
+	if (option == 2) { cur_transcript_id = ""; gt.clear(); ta.clear(); }
 	for (int i = 1; i <= M; i++) {
 		const Transcript& transcript = transcripts.getTranscriptAt(i);
 		if (cur_gene_id != transcript.getGeneID()) {
-			fout<<i<<endl;
-			cur_gene_id = transcript.getGeneID();
+		  gi.push_back(i);
+		  if (option == 2) gt.push_back((int)ta.size());
+		  cur_gene_id = transcript.getGeneID();
+		}
+		if ((option == 2) && (cur_transcript_id != transcript.getTranscriptID())) {
+		    ta.push_back(i);
+		    cur_transcript_id = transcript.getTranscriptID();
 		}
 	}
-	fout<<M + 1<<endl;
+	gi.push_back(M + 1);
+	if (option == 2) { gt.push_back((int)ta.size()); ta.push_back(M + 1); }
+
+	sprintf(groupF, "%s.grp", refName);
+	fout.open(groupF);
+	for (int i = 0; i < (int)gi.size(); i++) fout<< gi[i]<< endl;
 	fout.close();
 	if (verbose) { printf("Group File is generated!\n"); }
 
+	if (option == 2) {
+	  sprintf(gtF, "%s.gt", refName);
+	  fout.open(gtF);
+	  for (int i = 0; i < (int)gt.size(); i++) fout<< gt[i]<< endl;
+	  fout.close();
+	  sprintf(taF, "%s.ta", refName);
+	  fout.open(taF);
+	  for (int i = 0; i < (int)ta.size(); i++) fout<< ta[i]<< endl;
+	  fout.close();
+	  if (verbose) { printf("Allele-specific group files are generated!\n"); }
+	}
+
+	sprintf(refFastaF, "%s.transcripts.fa", refName);
+	sprintf(chromListF, "%s.chrlist", refName);
 	fout2.open(chromListF);
 	fout.open(refFastaF);
 	for (int i = 1; i <= M; i++) {
-		name = transcripts.getTranscriptAt(i).getTranscriptID();
+		name = transcripts.getTranscriptAt(i).getSeqName();
 		iter = name2seq.find(name);
-		if (iter == name2seq.end()) {
-			fprintf(stderr, "Cannot recognize transcript ID %s!\n", name.c_str());
-			exit(-1);
-		}
+		general_assert(iter != name2seq.end(), "Cannot recognize sequence ID" + name + "!");
 		fout<<">"<<name<<endl;
 		fout<<iter->second<<endl;
 
@@ -105,19 +143,22 @@ void writeResults(char* refName) {
 
 int main(int argc, char* argv[]) {
   if (argc < 5 || ((hasMappingFile = atoi(argv[3])) && argc < 6)) {
-		printf("Usage: synthesisRef refName quiet hasMappingFile [mappingFile] reference_file_1 [reference_file_2 ...]\n");
+		printf("Usage: synthesisRef refName quiet hasMappingFile<0,no;1,yes;2,allele-specific> [mappingFile] reference_file_1 [reference_file_2 ...]\n");
 		exit(-1);
 	}
 
 	verbose = !atoi(argv[2]);
 
-	if (hasMappingFile) { loadMappingInfo(argv[4]); }
+	if (hasMappingFile) { loadMappingInfo(hasMappingFile, argv[4]); }
+
+	// allele-specific
+	if (hasMappingFile == 2) { transcripts.setType(2); }
 
 	int start = hasMappingFile ? 5 : 4;
 
 	ifstream fin;
 	string line, gseq;
-	string seqname, gene_id;
+	string seqname, gene_id, transcript_id;
 
 	vector<Interval> vec;
 
@@ -125,7 +166,7 @@ int main(int argc, char* argv[]) {
 	name2seq.clear();
 	for (int i = start; i < argc; i++) {
 		fin.open(argv[i]);
-		if (!fin.is_open()) { fprintf(stderr, "Cannot open %s! It may not exist.\n", argv[i]); exit(-1); }
+		general_assert(fin.is_open(), "Cannot open " + cstrtos(argv[i]) + "! It may not exist."); 
 		getline(fin, line);
 		while ((fin) && (line[0] == '>')) {
 			istringstream strin(line.substr(1));
@@ -142,20 +183,23 @@ int main(int argc, char* argv[]) {
 
 			name2seq[seqname] = gseq;
 
+			transcript_id = seqname;
+			gene_id = seqname;
+
 			if (hasMappingFile) {
 			      mi_iter = mi_table.find(seqname);
-			      if (mi_iter == mi_table.end()) {
-			    	  fprintf(stderr, "Mapping Info is not correct, cannot find %s's gene_id!\n", seqname.c_str());
-			    	  exit(-1);
-			      }
-			      //assert(iter != table.end());
+			      general_assert(mi_iter != mi_table.end(), "Mapping Info is not correct, cannot find " + seqname + "'s gene_id!");
 			      gene_id = mi_iter->second;
+			      if (hasMappingFile == 2) {
+				mi_iter2 = mi_table2.find(seqname);
+				general_assert(mi_iter2 != mi_table2.end(), "Mapping Info is not correct, cannot find allele " + seqname + "'s transcript_id!");
+				transcript_id = mi_iter2->second;
+			      }
 			}
-			else gene_id = seqname;
-
+			
 			vec.clear();
 			vec.push_back(Interval(1, len));
-			transcripts.add(Transcript(seqname, gene_id, seqname, '+', vec, ""));
+			transcripts.add(Transcript(transcript_id, gene_id, seqname, '+', vec, ""));
 			++M;
 
 			if (verbose && M % 1000000 == 0) { printf("%d sequences are processed!\n", M); }
@@ -171,7 +215,7 @@ int main(int argc, char* argv[]) {
 	assert(M == transcripts.getM());
 	transcripts.sort();
 
-	writeResults(argv[1]);
+	writeResults(hasMappingFile, argv[1]);
 
 	return 0;
 }
-- 
2.39.2