//generate output's header
bam_header_t *out_header = bam_header_dwt(in->header);
- for (int i = 0; i < out_header->n_targets; i++) {
- out_header->target_len[i] = transcripts.getTranscriptViaEid(i + 1).getLength(); // transcript length without poly(A) tail
- }
std::ostringstream strout;
strout<<"@HD\tVN:1.4\tSO:unknown\n@PG\tID:RSEM\n";
while (samread(in, b) >= 0) {
++cnt;
- if (verbose && cnt % 1000000 == 0) { std::cout<< cnt<< "alignment lines are loaded!"<< std::endl; }
+ if (verbose && cnt % 1000000 == 0) { std::cout<< cnt<< " alignment lines are loaded!"<< std::endl; }
- if (b->core.flag & 0x0004) continue;
+ if (!(b->core.flag & 0x0004)) {
+ hit = wrapper.getNextHit();
+ assert(hit != NULL);
- hit = wrapper.getNextHit();
- assert(hit != NULL);
+ assert(transcripts.getInternalSid(b->core.tid + 1) == hit->getSid());
+ convert(b, hit->getConPrb());
+ }
- assert(transcripts.getInternalSid(b->core.tid + 1) == hit->getSid());
- convert(b, hit->getConPrb());
- if (b->core.qual > 0) samwrite(out, b); // output only when MAPQ > 0
+ //if (b->core.qual > 0) samwrite(out, b); // output only when MAPQ > 0
+ samwrite(out, b);
}
assert(wrapper.getNextHit() == NULL);
bam1_t *b, *b2;
PairedEndHit *hit;
- int cnt = 0;
+ HIT_INT_TYPE cnt = 0;
b = bam_init1();
b2 = bam_init1();
while (samread(in, b) >= 0 && samread(in, b2) >= 0) {
cnt += 2;
- if (verbose && cnt % 1000000 == 0) { std::cout<< cnt<< "alignment lines are loaded!"<< std::endl; }
- //mate info is not complete, skip
- if (!(((b->core.flag & 0x0040) && (b2->core.flag & 0x0080)) || ((b->core.flag & 0x0080) && (b2->core.flag & 0x0040)))) continue;
- //unalignable reads, skip
- if ((b->core.flag & 0x0004) || (b2->core.flag & 0x0004)) continue;
-
- //swap if b is mate 2
- if (b->core.flag & 0x0080) {
- assert(b2->core.flag & 0x0040);
- bam1_t *tmp = b;
- b = b2; b2 = tmp;
- }
+ if (verbose && cnt % 1000000 == 0) { std::cout<< cnt<< " alignment lines are loaded!"<< std::endl; }
- hit = wrapper.getNextHit();
- assert(hit != NULL);
+ assert((b->core.flag & 0x0001) && (b2->core.flag & 0x0001));
+ assert(((b->core.flag & 0x0040) && (b2->core.flag & 0x0080)) || ((b->core.flag & 0x0080) && (b2->core.flag & 0x0040)));
- assert(transcripts.getInternalSid(b->core.tid + 1) == hit->getSid());
- assert(transcripts.getInternalSid(b2->core.tid + 1) == hit->getSid());
+ //unalignable reads, skip
+ bool notgood = (b->core.flag & 0x0004) || (b2->core.flag & 0x0004);
- convert(b, hit->getConPrb());
- convert(b2, hit->getConPrb());
+ if (!notgood) {
+ //swap if b is mate 2
+ if (b->core.flag & 0x0080) {
+ assert(b2->core.flag & 0x0040);
+ bam1_t *tmp = b;
+ b = b2; b2 = tmp;
+ }
- b->core.mpos = b2->core.pos;
- b2->core.mpos = b->core.pos;
+ hit = wrapper.getNextHit();
+ assert(hit != NULL);
- if (b->core.qual > 0) {
- samwrite(out, b);
- samwrite(out, b2);
+ assert(transcripts.getInternalSid(b->core.tid + 1) == hit->getSid());
+ assert(transcripts.getInternalSid(b2->core.tid + 1) == hit->getSid());
+
+ convert(b, hit->getConPrb());
+ convert(b2, hit->getConPrb());
+
+ // omit "b->core.mpos = b2->core.pos; b2->core.mpos = b->core.pos" for the reason that it is possible that only one mate is alignable
}
+
+ samwrite(out, b);
+ samwrite(out, b2);
}
assert(wrapper.getNextHit() == NULL);
}
void BamWriter::convert(bam1_t *b, double prb) {
- const Transcript& transcript = transcripts.getTranscriptViaEid(b->core.tid + 1);
-
- int pos = b->core.pos;
- int readlen = b->core.l_qseq;
-
- std::vector<uint32_t> data;
- data.clear();
-
- int core_pos, core_n_cigar;
- std::vector<Interval> vec;
- vec.assign(1, Interval(1, transcript.getLength()));
- // make an artificial chromosome coordinates for the transcript to get new CIGAR strings
- tr2chr(Transcript("", "", "", '+', vec, ""), pos + 1, pos + readlen, core_pos, core_n_cigar, data);
- assert(core_pos >= 0);
-
- int rest_len = b->data_len - b->core.l_qname - b->core.n_cigar * 4;
- b->data_len = b->core.l_qname + core_n_cigar * 4 + rest_len;
- expand_data_size(b);
- uint8_t* pt = b->data + b->core.l_qname;
- memmove(pt + core_n_cigar * 4, pt + b->core.n_cigar * 4, rest_len);
- for (int i = 0; i < core_n_cigar; i++) { memmove(pt, &data[i], 4); pt += 4; }
-
- b->core.pos = core_pos;
- b->core.n_cigar = core_n_cigar;
b->core.qual = getMAPQ(prb);
- b->core.bin = bam_reg2bin(b->core.pos, bam_calend(&(b->core), bam1_cigar(b)));
-
float val = (float)prb;
bam_aux_append(b, "ZW", 'f', bam_aux_type2size('f'), (uint8_t*)&val);
}