-/* ReadType here means if the read is unalignable, alignable or aligned too much. It is NOT single read or paired-end read */
+/* ReadType here means if the read is unalignable, alignable or aligned too much. It is NOT siheaderngle read or paired-end read */
#ifndef SAMPARSER_H_
#define SAMPARSER_H_
#include "sam/bam.h"
#include "sam/sam.h"
+
#include "utils.h"
+
+#include "RefSeq.h"
+#include "Refs.h"
+
#include "SingleRead.h"
#include "SingleReadQ.h"
#include "PairedEndRead.h"
class SamParser {
public:
- SamParser(char, const char*, const char* = 0);
+ SamParser(char, const char*, Refs&, const char* = 0);
~SamParser();
/**
bam_header_t *header;
bam1_t *b, *b2;
+
//tag used by aligner
static char rtTag[STRLEN];
//0 ~ N0 1 ~ N1 2 ~ N2
int getReadType(const bam1_t*);
int getReadType(const bam1_t*, const bam1_t*); // for paired-end reads
+
+ bool check(bam1_t *b) {
+ return (b->core.n_cigar == 1) && ((*bam1_cigar(b) & BAM_CIGAR_MASK) == BAM_CMATCH) && (b->core.l_qseq == (int32_t)(*bam1_cigar(b) >> BAM_CIGAR_SHIFT));
+ }
};
char SamParser::rtTag[STRLEN] = ""; // default : no tag, thus no Type 2 reads
// aux, if not 0, points to the file name of fn_list
-SamParser::SamParser(char inpType, const char* inpF, const char* aux) {
+SamParser::SamParser(char inpType, const char* inpF, Refs& refs, const char* aux) {
switch(inpType) {
case 'b': sam_in = samopen(inpF, "rb", aux); break;
case 's': sam_in = samopen(inpF, "r", aux); break;
header = sam_in->header;
if (header == 0) { fprintf(stderr, "Fail to parse sam header!\n"); exit(-1); }
+ // Check if the reference used for aligner is the transcript set RSEM generated
+ if (refs.getM() != header->n_targets) {
+ fprintf(stderr, "Number of transcripts does not match! Please align reads against the transcript set and use RSEM generated reference for your aligner!\n");
+ exit(-1);
+ }
+ for (int i = 0; i < header->n_targets; i++) {
+ const RefSeq& refseq = refs.getRef(i + 1);
+ // If update int to long, chance the (int) conversion
+ if (refseq.getName().compare(header->target_name[i]) != 0 || refseq.getTotLen() != (int)header->target_len[i]) {
+ fprintf(stderr, "Transcript information does not match! Please align reads against the transcript set and use RSEM generated reference for your aligner!\n");
+ exit(-1);
+ }
+ }
+
b = bam_init1();
b2 = bam_init1();
}
else val = 5;
if (readType == 1) {
+ if (!check(b)) { fprintf(stderr, "RSEM does not support gapped alignments, sorry!\n"); exit(-1); }
+
if (getDir(b) > 0) {
hit = SingleHit(b->core.tid + 1, b->core.pos);
}
else val = 5;
if (readType == 1) {
+ if (!check(b)) { fprintf(stderr, "RSEM does not support gapped alignments, sorry!\n"); exit(-1); }
+
if (getDir(b) > 0) {
hit = SingleHit(b->core.tid + 1, b->core.pos);
}
else val = 5;
if (readType == 1) {
+ if (!check(mp1) || !check(mp2)) { fprintf(stderr, "RSEM does not support gapped alignments, sorry!\n"); exit(-1); }
+
if (mp1->core.tid != mp2->core.tid) {
fprintf(stderr, "The two reads do not come from the same pair!");
exit(-1);
else val = 5;
if (readType == 1) {
+ if (!check(mp1) || !check(mp2)) { fprintf(stderr, "RSEM does not support gapped alignments, sorry!\n"); exit(-1); }
+
if (mp1->core.tid != mp2->core.tid) {
fprintf(stderr, "The two reads do not come from the same pair!");
exit(-1);