14 #include "my_assert.h"
20 vector<bam1_t*> arr_both, arr_partial_1, arr_partial_2, arr_partial_unknown;
22 inline void add_to_appropriate_arr(bam1_t *b) {
23 if (!(b->core.flag & 0x0004) && (b->core.flag & 0x0002)) {
24 arr_both.push_back(bam_dup1(b)); return;
27 if (b->core.flag & 0x0040) arr_partial_1.push_back(bam_dup1(b));
28 else if (b->core.flag & 0x0080) arr_partial_2.push_back(bam_dup1(b));
29 else arr_partial_unknown.push_back(bam_dup1(b));
32 char get_pattern_code(uint32_t flag) {
33 if (flag & 0x0040) return (flag & 0x0010 ? 1 : 0);
34 else return (flag & 0x0010 ? 0 : 1);
37 bool less_than(bam1_t *a, bam1_t *b) {
38 int32_t ap1 = min(a->core.pos, a->core.mpos);
39 int32_t ap2 = max(a->core.pos, a->core.mpos);
40 int32_t bp1 = min(b->core.pos, b->core.mpos);
41 int32_t bp2 = max(b->core.pos, b->core.mpos);
42 char apat = get_pattern_code(a->core.flag); // apt: a's pattern of strand and mate information
43 char bpat = get_pattern_code(b->core.flag);
45 if (a->core.tid != b->core.tid) return a->core.tid < b->core.tid;
46 if (ap1 != bp1) return ap1 < bp1;
47 if (ap2 != bp2) return ap2 < bp2;
51 int main(int argc, char* argv[]) {
53 printf("UsaOAge: rsem-scan-for-paired-end-reads input.sam output.bam\n");
57 in = samopen(argv[1], "r", NULL);
58 general_assert(in != 0, "Cannot open " + cstrtos(argv[1]) + " !");
59 out = samopen(argv[2], "wb", in->header);
60 general_assert(out != 0, "Cannot open " + cstrtos(argv[2]) + " !");
62 b = bam_init1(); b2 = bam_init1();
65 bool go_on = (samread(in, b) >= 0);
69 printf("."); fflush(stdout);
72 qname.assign(bam1_qname(b));
73 isPaired = (b->core.flag & 0x0001);
76 add_to_appropriate_arr(b);
77 while ((go_on = (samread(in, b) >= 0)) && (qname == bam1_qname(b))) {
78 general_assert_1(b->core.flag & 0x0001, "Read " + qname + " is detected as both single-end and paired-end read!");
79 add_to_appropriate_arr(b);
82 general_assert_1(arr_both.size() % 2 == 0, "Number of first and second mates in read " + qname + "'s full alignments (both mates are aligned) are not matched!");
83 general_assert_1((arr_partial_1.size() + arr_partial_2.size() + arr_partial_unknown.size()) % 2 == 0, "Number of first and second mates in read " + qname + "'s partial alignments (at most one mate is aligned) are not matched!");
85 if (!arr_both.empty()) {
86 sort(arr_both.begin(), arr_both.end(), less_than);
87 for (size_t i = 0; i < arr_both.size(); i++) { samwrite(out, arr_both[i]); bam_destroy1(arr_both[i]); }
91 while (!arr_partial_1.empty() || !arr_partial_2.empty()) {
92 if (!arr_partial_1.empty() && !arr_partial_2.empty()) {
93 samwrite(out, arr_partial_1.back()); bam_destroy1(arr_partial_1.back()); arr_partial_1.pop_back();
94 samwrite(out, arr_partial_2.back()); bam_destroy1(arr_partial_2.back()); arr_partial_2.pop_back();
96 else if (!arr_partial_1.empty()) {
97 samwrite(out, arr_partial_1.back()); bam_destroy1(arr_partial_1.back()); arr_partial_1.pop_back();
98 samwrite(out, arr_partial_unknown.back()); bam_destroy1(arr_partial_unknown.back()); arr_partial_unknown.pop_back();
101 samwrite(out, arr_partial_2.back()); bam_destroy1(arr_partial_2.back()); arr_partial_2.pop_back();
102 samwrite(out, arr_partial_unknown.back()); bam_destroy1(arr_partial_unknown.back()); arr_partial_unknown.pop_back();
106 while (!arr_partial_unknown.empty()) {
107 samwrite(out, arr_partial_unknown.back()); bam_destroy1(arr_partial_unknown.back()); arr_partial_unknown.pop_back();
112 while ((go_on = (samread(in, b) >= 0)) && (qname == bam1_qname(b))) {
118 if (cnt % 1000000 == 0) { printf("."); fflush(stdout); }
121 printf("\nFinished!\n");
123 bam_destroy1(b); bam_destroy1(b2);