*.o
*.a
+*~
rsem-bam2readdepth
rsem-bam2wig
rsem-build-read-index
rsem-simulate-reads
rsem-synthesis-reference-transcripts
rsem-tbam2gbam
+rsem-sam-validator
sam/samtools
+.project
+.cproject
cnt += 2;
if (verbose && cnt % 1000000 == 0) { printf("%d alignment lines are loaded!\n", cnt); }
+ //mate info is not complete, skip
+ if (!(((b->core.flag & 0x0040) && (b2->core.flag & 0x0080)) || ((b->core.flag & 0x0080) && (b2->core.flag & 0x0040)))) continue;
+ //unalignable reads, skip
if ((b->core.flag & 0x0004) || (b2->core.flag & 0x0004)) continue;
//swap if b is mate 2
if (verbose) printf("ROUND = %d, SUM = %.15g, bChange = %f, totNum = %d\n", ROUND, sum, bChange, totNum);
} while (ROUND < MIN_ROUND || (totNum > 0 && ROUND < MAX_ROUND));
- //while (ROUND < MAX_ROUND);
+// } while (ROUND < 1);
if (totNum > 0) fprintf(stderr, "Warning: RSEM reaches %d iterations before meeting the convergence criteria.\n", MAX_ROUND);
bam1_t *mp1 = NULL, *mp2 = NULL;
- if ((b->core.flag & 0x0040) && (b2->core.flag & 0x0080)) {
+ // If lose mate info, discard. is it necessary?
+ if (!((b->core.flag & 0x0040) && (b2->core.flag & 0x0080)) && !((b->core.flag & 0x0080) && (b2->core.flag & 0x0040))) return 4;
+ // If only one mate is mapped, discard
+ if (((b->core.flag & 0x0004) && !(b2->core.flag & 0x0004)) || (!(b->core.flag & 0x0004) && (b2->core.flag & 0x0004))) return 4;
+
+ if (b->core.flag & 0x0040) {
mp1 = b; mp2 = b2;
}
- else if ((b->core.flag & 0x0080) && (b2->core.flag & 0x0040)) {
+ else {
mp1 = b2; mp2 = b;
}
- else return 4; // If lose mate info, discard. is it necessary?
int readType = getReadType(mp1, mp2);
std::string name = getName(mp1);
bam1_t *mp1 = NULL, *mp2 = NULL;
- if ((b->core.flag & 0x0040) && (b2->core.flag & 0x0080)) {
+ // If lose mate info, discard. is it necessary?
+ if (!((b->core.flag & 0x0040) && (b2->core.flag & 0x0080)) && !((b->core.flag & 0x0080) && (b2->core.flag & 0x0040))) return 4;
+ // If only one mate is mapped, discard
+ if (((b->core.flag & 0x0004) && !(b2->core.flag & 0x0004)) || (!(b->core.flag & 0x0004) && (b2->core.flag & 0x0004))) return 4;
+
+ if (b->core.flag & 0x0040) {
mp1 = b; mp2 = b2;
}
- else if ((b->core.flag & 0x0080) && (b2->core.flag & 0x0040)) {
+ else {
mp1 = b2; mp2 = b;
}
- else return 4;
int readType = getReadType(mp1, mp2);
std::string name = getName(mp1);
CC = g++
CFLAGS = -Wall -c -I.
COFLAGS = -Wall -O3 -ffast-math -c -I.
-PROGRAMS = rsem-extract-reference-transcripts rsem-synthesis-reference-transcripts rsem-preref rsem-parse-alignments rsem-build-read-index rsem-run-em rsem-tbam2gbam rsem-run-gibbs rsem-calculate-credibility-intervals rsem-simulate-reads rsem-bam2wig rsem-get-unique rsem-bam2readdepth
+PROGRAMS = rsem-extract-reference-transcripts rsem-synthesis-reference-transcripts rsem-preref rsem-parse-alignments rsem-build-read-index rsem-run-em rsem-tbam2gbam rsem-run-gibbs rsem-calculate-credibility-intervals rsem-simulate-reads rsem-bam2wig rsem-get-unique rsem-bam2readdepth rsem-sam-validator
all : $(PROGRAMS)
ReadReader.h : SingleRead.h SingleReadQ.h PairedEndRead.h PairedEndReadQ.h ReadIndex.h
-SingleModel.h : utils.h Orientation.h LenDist.h RSPD.h Profile.h NoiseProfile.h ModelParams.h RefSeq.h Refs.h SingleRead.h SingleHit.h ReadReader.h simul.h
+SingleModel.h : utils.h my_assert.h Orientation.h LenDist.h RSPD.h Profile.h NoiseProfile.h ModelParams.h RefSeq.h Refs.h SingleRead.h SingleHit.h ReadReader.h simul.h
-SingleQModel.h : utils.h Orientation.h LenDist.h RSPD.h QualDist.h QProfile.h NoiseQProfile.h ModelParams.h RefSeq.h Refs.h SingleReadQ.h SingleHit.h ReadReader.h simul.h
+SingleQModel.h : utils.h my_assert.h Orientation.h LenDist.h RSPD.h QualDist.h QProfile.h NoiseQProfile.h ModelParams.h RefSeq.h Refs.h SingleReadQ.h SingleHit.h ReadReader.h simul.h
-PairedEndModel.h : utils.h Orientation.h LenDist.h RSPD.h Profile.h NoiseProfile.h ModelParams.h RefSeq.h Refs.h SingleRead.h PairedEndRead.h PairedEndHit.h ReadReader.h simul.h
+PairedEndModel.h : utils.h my_assert.h Orientation.h LenDist.h RSPD.h Profile.h NoiseProfile.h ModelParams.h RefSeq.h Refs.h SingleRead.h PairedEndRead.h PairedEndHit.h ReadReader.h simul.h
-PairedEndQModel.h : utils.h Orientation.h LenDist.h RSPD.h QualDist.h QProfile.h NoiseQProfile.h ModelParams.h RefSeq.h Refs.h SingleReadQ.h PairedEndReadQ.h PairedEndHit.h ReadReader.h simul.h
+PairedEndQModel.h : utils.h my_assert.h Orientation.h LenDist.h RSPD.h QualDist.h QProfile.h NoiseQProfile.h ModelParams.h RefSeq.h Refs.h SingleReadQ.h PairedEndReadQ.h PairedEndHit.h ReadReader.h simul.h
HitWrapper.h : HitContainer.h
rsem-get-unique : sam/bam.h sam/sam.h getUnique.cpp sam/libbam.a
$(CC) -O3 -Wall getUnique.cpp sam/libbam.a -lz -o $@
+rsem-sam-validator : sam/bam.h sam/sam.h my_assert.h samValidator.cpp sam/libbam.a
+ $(CC) -O3 -Wall samValidator.cpp sam/libbam.a -lz -o $@
+
clean:
rm -f *.o *~ $(PROGRAMS)
cd sam ; ${MAKE} clean
--- /dev/null
+#include<cstdio>
+#include<cstring>
+#include<cstdlib>
+#include<cassert>
+#include<string>
+#include<set>
+
+#include <stdint.h>
+#include "sam/bam.h"
+#include "sam/sam.h"
+
+#include "my_assert.h"
+
+using namespace std;
+
+samfile_t *in;
+bam1_t *b, *b2;
+
+set<string> used;
+
+bool isValid;
+
+int main(int argc, char* argv[]) {
+ if (argc != 2) {
+ printf("Usage: rsem-sam-validator <input.sam/input.bam>\n");
+ exit(-1);
+ }
+
+ string input_file(argv[1]);
+ size_t pos = input_file.find_last_of('.');
+ general_assert(pos != string::npos, "Input file does not have a suffix!");
+ ++pos;
+
+ string suffix = input_file.substr(pos);
+ size_t len = suffix.length();
+ for (size_t i = 0; i < len; i++) suffix[i] = tolower(suffix[i]);
+
+ general_assert(suffix == "sam" || suffix == "bam", "Cannot recognize input file's file type! The file suffix is neither sam or bam.");
+
+ in = (suffix == "sam" ? samopen(argv[1], "r", NULL) : samopen(argv[1], "rb", NULL));
+ general_assert(in != 0, "Cannot open input file!");
+
+ used.clear();
+ b = bam_init1(); b2 = bam_init1();
+
+ isValid = true;
+
+ long cnt = 0;
+ string cqname(""), qname;
+ uint64_t creadlen = 0, readlen;
+ bool cispaired = false, ispaired;
+
+ printf(".");
+ do {
+ if (samread(in, b) < 0) break;
+ assert(b->core.l_qseq > 0);
+
+ qname.assign(bam1_qname(b));
+
+ // if this is a paired-end read
+ ispaired = b->core.flag & 0x0001;
+ if (ispaired) {
+ isValid = (samread(in, b2) >= 0) && (qname == bam1_qname(b2)) && (b2->core.flag & 0x0001);
+ if (!isValid) { printf("\nOnly find one mate for paired-end read %s!\n", qname.c_str()); continue; }
+ assert(b2->core.l_qseq > 0);
+ isValid = !(((b->core.flag & 0x0040) && (b2->core.flag & 0x0040)) || ((b->core.flag & 0x0080) && (b2->core.flag & 0x0080)));
+ if (!isValid) { printf("\nThe two mates of paired-end read %s are not adjacent!\n", qname.c_str()); continue; }
+ // both mates are mapped
+ if (!(b->core.flag & 0x0004) && !(b2->core.flag & 0x0004)) {
+ isValid = (b->core.tid == b2->core.tid) && (b->core.pos == b2->core.mpos) && (b2->core.pos == b->core.mpos);
+ if (!isValid) { printf("\nOne of paired-end read %s's alignment does not have two mates adjacent to each other!\n", qname.c_str()); continue; }
+ }
+
+ readlen = ((b->core.flag & 0x0040) ? (uint64_t(b->core.l_qseq) << 32) + b2->core.l_qseq : (uint64_t(b2->core.l_qseq) << 32) + b->core.l_qseq);
+ }
+ else readlen = b->core.l_qseq;
+
+ if (cqname != qname) {
+ isValid = used.find(qname) == used.end();
+ if (!isValid) { printf("\nThe alignments of read %s are not grouped together!", qname.c_str()); continue; }
+ used.insert(cqname);
+ cqname = qname;
+ creadlen = readlen;
+ cispaired = ispaired;
+ }
+ else {
+ assert(cqname != "");
+ isValid = (creadlen == readlen);
+ if (!isValid) { printf("\nRead %s have different read/mate lengths!\n", qname.c_str()); continue; }
+ isValid = (cispaired == ispaired);
+ if (!isValid) { printf("\nRead %s is detected as both single-end read and paired-end read!\n", qname.c_str()); continue; }
+ }
+
+ ++cnt;
+ if (cnt % 1000000 == 0) printf(".");
+
+ } while(isValid);
+
+ bam_destroy1(b); bam_destroy1(b2);
+ samclose(in);
+
+ if (isValid) printf("\nThe input file is valid!\n");
+ else printf("The input file is not valid!\n");
+
+ return 0;
+}