#include<cassert>
#include<string>
#include<sstream>
+#include<iostream>
#include <stdint.h>
#include "sam/bam.h"
#include "sam_rsem_aux.h"
#include "sam_rsem_cvt.h"
+#include "utils.h"
+
#include "SingleHit.h"
#include "PairedEndHit.h"
}
assert(in != 0);
+ //build mappings from external sid to internal sid
+ transcripts.buildMappings(in->header->n_targets, in->header->target_name);
+
//generate output's header
bam_header_t *out_header = bam_header_dwt(in->header);
-
- if (out_header->n_targets != transcripts.getM()) {
- fprintf(stderr, "Number of reference sequences recorded in the header is not correct! The header contains %d sequences while there should be %d sequences\n", out_header->n_targets, transcripts.getM());
- exit(-1);
- }
-
for (int i = 0; i < out_header->n_targets; i++) {
- const Transcript& transcript = transcripts.getTranscriptAt(i + 1);
- if (out_header->target_name[i] != transcript.getTranscriptID()) {
- fprintf(stderr, "Reference sequence %d's name recorded in the header is not correct! \n", i);
- fprintf(stderr, "Name in the header: %s\n", out_header->target_name[i]);
- fprintf(stderr, "Should be: %s\n", transcript.getTranscriptID().c_str());
- exit(-1);
- }
- out_header->target_len[i] = transcript.getLength(); // transcript length without poly(A) tail
+ out_header->target_len[i] = transcripts.getTranscriptViaEid(i + 1).getLength(); // transcript length without poly(A) tail
}
std::ostringstream strout;
bam1_t *b;
SingleHit *hit;
- int cnt = 0;
+ HIT_INT_TYPE cnt = 0;
b = bam_init1();
while (samread(in, b) >= 0) {
++cnt;
- if (verbose && cnt % 1000000 == 0) { printf("%d alignment lines are loaded!\n", cnt); }
+ if (verbose && cnt % 1000000 == 0) { std::cout<< cnt<< "alignment lines are loaded!"<< std::endl; }
if (b->core.flag & 0x0004) continue;
hit = wrapper.getNextHit();
assert(hit != NULL);
- assert(b->core.tid + 1 == hit->getSid());
+ assert(transcripts.getInternalSid(b->core.tid + 1) == hit->getSid());
convert(b, hit->getConPrb());
if (b->core.qual > 0) samwrite(out, b); // output only when MAPQ > 0
}
assert(wrapper.getNextHit() == NULL);
bam_destroy1(b);
- if (verbose) { printf("Bam output file is generated!\n"); }
+ if (verbose) { std::cout<< "Bam output file is generated!"<< std::endl; }
}
void BamWriter::work(HitWrapper<PairedEndHit> wrapper) {
while (samread(in, b) >= 0 && samread(in, b2) >= 0) {
cnt += 2;
- if (verbose && cnt % 1000000 == 0) { printf("%d alignment lines are loaded!\n", cnt); }
-
+ if (verbose && cnt % 1000000 == 0) { std::cout<< cnt<< "alignment lines are loaded!"<< std::endl; }
+ //mate info is not complete, skip
+ if (!(((b->core.flag & 0x0040) && (b2->core.flag & 0x0080)) || ((b->core.flag & 0x0080) && (b2->core.flag & 0x0040)))) continue;
+ //unalignable reads, skip
if ((b->core.flag & 0x0004) || (b2->core.flag & 0x0004)) continue;
//swap if b is mate 2
hit = wrapper.getNextHit();
assert(hit != NULL);
- assert(b->core.tid + 1 == hit->getSid());
- assert(b2->core.tid + 1 == hit->getSid());
+ assert(transcripts.getInternalSid(b->core.tid + 1) == hit->getSid());
+ assert(transcripts.getInternalSid(b2->core.tid + 1) == hit->getSid());
convert(b, hit->getConPrb());
convert(b2, hit->getConPrb());
bam_destroy1(b);
bam_destroy1(b2);
- if (verbose) { printf("Bam output file is generated!\n"); }
+ if (verbose) { std::cout<< "Bam output file is generated!"<< std::endl; }
}
void BamWriter::convert(bam1_t *b, double prb) {
- int sid = b->core.tid + 1;
- const Transcript& transcript = transcripts.getTranscriptAt(sid);
+ const Transcript& transcript = transcripts.getTranscriptViaEid(b->core.tid + 1);
int pos = b->core.pos;
int readlen = b->core.l_qseq;