class SamParser {
public:
- SamParser(char, const char*, Transcripts&, const char* = 0);
+ SamParser(char, const char*, const char*, Transcripts&, const char*);
~SamParser();
/**
}
std::string getName(const bam1_t* b) {
- return std::string((char*)bam1_qname(b));
+ return std::string(bam1_qname(b));
}
std::string getReadSeq(const bam1_t*);
char SamParser::rtTag[STRLEN] = ""; // default : no tag, thus no Type 2 reads
// aux, if not 0, points to the file name of fn_list
-SamParser::SamParser(char inpType, const char* inpF, Transcripts& transcripts, const char* aux)
+SamParser::SamParser(char inpType, const char* inpF, const char* aux, Transcripts& transcripts, const char* imdName)
: transcripts(transcripts)
{
switch(inpType) {
header = sam_in->header;
general_assert(header != 0, "Fail to parse sam header!");
- transcripts.buildMappings(header->n_targets, header->target_name);
+ transcripts.buildMappings(header->n_targets, header->target_name, imdName);
b = bam_init1();
b2 = bam_init1();
bool canR = (samread(sam_in, b) >= 0);
if (!canR) return -1;
- if (b->core.flag & 0x0001) { fprintf(stderr, "Find a paired end read in the file!\n"); exit(-1); }
- //(b->core.flag & 0x0100) && && !(b->core.flag & 0x0004)
+ general_assert(!(b->core.flag & 0x0001), "Find a paired end read in the file!");
int readType = getReadType(b);
std::string name = getName(b);
bool canR = (samread(sam_in, b) >= 0);
if (!canR) return -1;
- if (b->core.flag & 0x0001) { fprintf(stderr, "Find a paired end read in the file!\n"); exit(-1); }
- //assert(!(b->core.flag & 0x0001)); //(b->core.flag & 0x0100) && && !(b->core.flag & 0x0004)
+ general_assert(!(b->core.flag & 0x0001), "Find a paired end read in the file!");
int readType = getReadType(b);
std::string name = getName(b);
bool canR = ((samread(sam_in, b) >= 0) && (samread(sam_in, b2) >= 0));
if (!canR) return -1;
- if (!((b->core.flag & 0x0001) && (b2->core.flag & 0x0001))) {
- fprintf(stderr, "One of the mate is not paired-end! (RSEM assumes the two mates of a paired-end read should be adjacent)\n");
- exit(-1);
- }
- //assert((b->core.flag & 0x0001) && (b2->core.flag & 0x0001));
+ general_assert((b->core.flag & 0x0001) && (b2->core.flag & 0x0001), \
+ "One of the mate is not paired-end! (RSEM assumes the two mates of a paired-end read should be adjacent)");
bam1_t *mp1 = NULL, *mp2 = NULL;
- if ((b->core.flag & 0x0040) && (b2->core.flag & 0x0080)) {
+ // If lose mate info, discard. is it necessary?
+ if (!((b->core.flag & 0x0040) && (b2->core.flag & 0x0080)) && !((b->core.flag & 0x0080) && (b2->core.flag & 0x0040))) return 4;
+ // If only one mate is mapped, discard
+ if (((b->core.flag & 0x0004) && !(b2->core.flag & 0x0004)) || (!(b->core.flag & 0x0004) && (b2->core.flag & 0x0004))) return 4;
+
+ if (b->core.flag & 0x0040) {
mp1 = b; mp2 = b2;
}
- else if ((b->core.flag & 0x0080) && (b2->core.flag & 0x0040)) {
+ else {
mp1 = b2; mp2 = b;
}
- else return 4; // If lose mate info, discard. is it necessary?
+
+ general_assert(!strcmp(bam1_qname(mp1), bam1_qname(mp2)), "Detected a read pair whose two mates have different names: " + getName(mp1) + " , " + getName(mp2) + " !");
int readType = getReadType(mp1, mp2);
std::string name = getName(mp1);
bool canR = ((samread(sam_in, b) >= 0) && (samread(sam_in, b2) >= 0));
if (!canR) return -1;
- if (!((b->core.flag & 0x0001) && (b2->core.flag & 0x0001))) {
- fprintf(stderr, "One of the mate is not paired-end! (RSEM assumes the two mates of a paired-end read should be adjacent)\n");
- exit(-1);
- }
- //assert((b->core.flag & 0x0001) && (b2->core.flag & 0x0001));
+ general_assert((b->core.flag & 0x0001) && (b2->core.flag & 0x0001), \
+ "One of the mate is not paired-end! (RSEM assumes the two mates of a paired-end read should be adjacent)");
bam1_t *mp1 = NULL, *mp2 = NULL;
- if ((b->core.flag & 0x0040) && (b2->core.flag & 0x0080)) {
+ // If lose mate info, discard. is it necessary?
+ if (!((b->core.flag & 0x0040) && (b2->core.flag & 0x0080)) && !((b->core.flag & 0x0080) && (b2->core.flag & 0x0040))) return 4;
+ // If only one mate is mapped, discard
+ if (((b->core.flag & 0x0004) && !(b2->core.flag & 0x0004)) || (!(b->core.flag & 0x0004) && (b2->core.flag & 0x0004))) return 4;
+
+ if (b->core.flag & 0x0040) {
mp1 = b; mp2 = b2;
}
- else if ((b->core.flag & 0x0080) && (b2->core.flag & 0x0040)) {
+ else {
mp1 = b2; mp2 = b;
}
- else return 4;
+
+ general_assert(!strcmp(bam1_qname(mp1), bam1_qname(mp2)), "Detected a read pair whose two mates have different names: " + getName(mp1) + " , " + getName(mp2) + " !");
int readType = getReadType(mp1, mp2);
std::string name = getName(mp1);
return (bam_aux2i(p) > 0 ? 2 : 0);
}
-
//For paired-end reads, do not print out type 2 reads
inline int SamParser::getReadType(const bam1_t* b, const bam1_t* b2) {
- if ((b->core.flag & 0x0002) && (b2->core.flag & 0x0002)) return 1;
+ if (!(b->core.flag & 0x0004) && !(b2->core.flag & 0x0004)) return 1;
+
+ if (!strcmp(rtTag, "")) return 0;
+
+ uint8_t *p = bam_aux_get(b, rtTag);
+ if (p != NULL && bam_aux2i(p) > 0) return 2;
+
+ p = bam_aux_get(b2, rtTag);
+ if (p != NULL && bam_aux2i(p) > 0) return 2;
return 0;
}