13 #include "my_assert.h"
19 vector<bam1_t*> arr_both, arr_partial_1, arr_partial_2, arr_partial_unknown;
21 inline void add_to_appropriate_arr(bam1_t *b) {
22 if (!(b->core.flag & 0x0004) && (b->core.flag & 0x0002)) {
23 arr_both.push_back(bam_dup1(b)); return;
26 if (b->core.flag & 0x0040) arr_partial_1.push_back(bam_dup1(b));
27 else if (b->core.flag & 0x0080) arr_partial_2.push_back(bam_dup1(b));
28 else arr_partial_unknown.push_back(bam_dup1(b));
31 bool less_than(bam1_t *a, bam1_t *b) {
32 int32_t ap1 = min(a->core.pos, a->core.mpos);
33 int32_t ap2 = max(a->core.pos, a->core.mpos);
34 int32_t bp1 = min(b->core.pos, b->core.mpos);
35 int32_t bp2 = max(b->core.pos, b->core.mpos);
37 if (a->core.tid != b->core.tid) return a->core.tid < b->core.tid;
38 if (ap1 != bp1) return ap1 < bp1;
42 int main(int argc, char* argv[]) {
44 printf("Usage: rsem-scan-for-paired-end-reads input.sam output.bam\n");
48 in = samopen(argv[1], "r", NULL);
49 general_assert(in != 0, "Cannot open " + cstrtos(argv[1]) + " !");
50 out = samopen(argv[2], "wb", in->header);
51 general_assert(out != 0, "Cannot open " + cstrtos(argv[2]) + " !");
53 b = bam_init1(); b2 = bam_init1();
56 bool go_on = (samread(in, b) >= 0);
60 printf("."); fflush(stdout);
63 qname.assign(bam1_qname(b));
64 isPaired = (b->core.flag & 0x0001);
67 add_to_appropriate_arr(b);
68 while ((go_on = (samread(in, b) >= 0)) && (qname == bam1_qname(b))) {
69 general_assert(b->core.flag & 0x0001, "Read " + qname + " is detected as both single-end and paired-end read!", true);
70 add_to_appropriate_arr(b);
73 general_assert(arr_both.size() % 2 == 0, "Number of first and second mates in read " + qname + "'s full alignments (both mates are aligned) are not matched!", true);
74 general_assert((arr_partial_1.size() + arr_partial_2.size() + arr_partial_unknown.size()) % 2 == 0, "Number of first and second mates in read " + qname + "'s partial alignments (at most one mate is aligned) are not matched!", true);
76 if (!arr_both.empty()) {
77 sort(arr_both.begin(), arr_both.end(), less_than);
78 for (size_t i = 0; i < arr_both.size(); i++) { samwrite(out, arr_both[i]); bam_destroy1(arr_both[i]); }
82 while (!arr_partial_1.empty() || !arr_partial_2.empty()) {
83 if (!arr_partial_1.empty() && !arr_partial_2.empty()) {
84 samwrite(out, arr_partial_1.back()); bam_destroy1(arr_partial_1.back()); arr_partial_1.pop_back();
85 samwrite(out, arr_partial_2.back()); bam_destroy1(arr_partial_2.back()); arr_partial_2.pop_back();
87 else if (!arr_partial_1.empty()) {
88 samwrite(out, arr_partial_1.back()); bam_destroy1(arr_partial_1.back()); arr_partial_1.pop_back();
89 samwrite(out, arr_partial_unknown.back()); bam_destroy1(arr_partial_unknown.back()); arr_partial_unknown.pop_back();
92 samwrite(out, arr_partial_2.back()); bam_destroy1(arr_partial_2.back()); arr_partial_2.pop_back();
93 samwrite(out, arr_partial_unknown.back()); bam_destroy1(arr_partial_unknown.back()); arr_partial_unknown.pop_back();
97 while (!arr_partial_unknown.empty()) {
98 samwrite(out, arr_partial_unknown.back()); bam_destroy1(arr_partial_unknown.back()); arr_partial_unknown.pop_back();
103 while ((go_on = (samread(in, b) >= 0)) && (qname == bam1_qname(b))) {
109 if (cnt % 1000000 == 0) { printf("."); fflush(stdout); }
112 printf("\nFinished!\n");
114 bam_destroy1(b); bam_destroy1(b2);