14 #include "my_assert.h"
20 vector<bam1_t*> arr_both, arr_partial_1, arr_partial_2, arr_partial_unknown;
22 inline void add_to_appropriate_arr(bam1_t *b) {
23 if (!(b->core.flag & 0x0004) && (b->core.flag & 0x0002)) {
24 arr_both.push_back(bam_dup1(b)); return;
27 if (b->core.flag & 0x0040) arr_partial_1.push_back(bam_dup1(b));
28 else if (b->core.flag & 0x0080) arr_partial_2.push_back(bam_dup1(b));
29 else arr_partial_unknown.push_back(bam_dup1(b));
32 bool less_than(bam1_t *a, bam1_t *b) {
33 int32_t ap1 = min(a->core.pos, a->core.mpos);
34 int32_t ap2 = max(a->core.pos, a->core.mpos);
35 int32_t bp1 = min(b->core.pos, b->core.mpos);
36 int32_t bp2 = max(b->core.pos, b->core.mpos);
38 if (a->core.tid != b->core.tid) return a->core.tid < b->core.tid;
39 if (ap1 != bp1) return ap1 < bp1;
43 int main(int argc, char* argv[]) {
45 printf("Usage: rsem-scan-for-paired-end-reads input.sam output.bam\n");
49 in = samopen(argv[1], "r", NULL);
50 general_assert(in != 0, "Cannot open " + cstrtos(argv[1]) + " !");
51 out = samopen(argv[2], "wb", in->header);
52 general_assert(out != 0, "Cannot open " + cstrtos(argv[2]) + " !");
54 b = bam_init1(); b2 = bam_init1();
57 bool go_on = (samread(in, b) >= 0);
61 printf("."); fflush(stdout);
64 qname.assign(bam1_qname(b));
65 isPaired = (b->core.flag & 0x0001);
68 add_to_appropriate_arr(b);
69 while ((go_on = (samread(in, b) >= 0)) && (qname == bam1_qname(b))) {
70 general_assert(b->core.flag & 0x0001, "Read " + qname + " is detected as both single-end and paired-end read!", true);
71 add_to_appropriate_arr(b);
74 general_assert(arr_both.size() % 2 == 0, "Number of first and second mates in read " + qname + "'s full alignments (both mates are aligned) are not matched!", true);
75 general_assert((arr_partial_1.size() + arr_partial_2.size() + arr_partial_unknown.size()) % 2 == 0, "Number of first and second mates in read " + qname + "'s partial alignments (at most one mate is aligned) are not matched!", true);
77 if (!arr_both.empty()) {
78 sort(arr_both.begin(), arr_both.end(), less_than);
79 for (size_t i = 0; i < arr_both.size(); i++) { samwrite(out, arr_both[i]); bam_destroy1(arr_both[i]); }
83 while (!arr_partial_1.empty() || !arr_partial_2.empty()) {
84 if (!arr_partial_1.empty() && !arr_partial_2.empty()) {
85 samwrite(out, arr_partial_1.back()); bam_destroy1(arr_partial_1.back()); arr_partial_1.pop_back();
86 samwrite(out, arr_partial_2.back()); bam_destroy1(arr_partial_2.back()); arr_partial_2.pop_back();
88 else if (!arr_partial_1.empty()) {
89 samwrite(out, arr_partial_1.back()); bam_destroy1(arr_partial_1.back()); arr_partial_1.pop_back();
90 samwrite(out, arr_partial_unknown.back()); bam_destroy1(arr_partial_unknown.back()); arr_partial_unknown.pop_back();
93 samwrite(out, arr_partial_2.back()); bam_destroy1(arr_partial_2.back()); arr_partial_2.pop_back();
94 samwrite(out, arr_partial_unknown.back()); bam_destroy1(arr_partial_unknown.back()); arr_partial_unknown.pop_back();
98 while (!arr_partial_unknown.empty()) {
99 samwrite(out, arr_partial_unknown.back()); bam_destroy1(arr_partial_unknown.back()); arr_partial_unknown.pop_back();
104 while ((go_on = (samread(in, b) >= 0)) && (qname == bam1_qname(b))) {
110 if (cnt % 1000000 == 0) { printf("."); fflush(stdout); }
113 printf("\nFinished!\n");
115 bam_destroy1(b); bam_destroy1(b2);