]> git.donarmstrong.com Git - samtools.git/blob - bam_mate.c
cfa2e758a2b550c24b887d992b6a9cbe2ce28b21
[samtools.git] / bam_mate.c
1 #include <stdlib.h>
2 #include <string.h>
3 #include "kstring.h"
4 #include "bam.h"
5
6 void bam_template_cigar(bam1_t *b1, bam1_t *b2, kstring_t *str)
7 {
8         bam1_t *swap;
9         int i, end;
10         uint32_t *cigar;
11         str->l = 0;
12         if (b1->core.tid != b2->core.tid || b1->core.tid < 0) return; // coordinateless or not on the same chr; skip
13         if (b1->core.pos > b2->core.pos) swap = b1, b1 = b2, b2 = swap; // make sure b1 has a smaller coordinate
14         kputc((b1->core.flag & BAM_FREAD1)? '1' : '2', str); // segment index
15         kputc((b1->core.flag & BAM_FREVERSE)? 'R' : 'F', str); // strand
16         for (i = 0, cigar = bam1_cigar(b1); i < b1->core.n_cigar; ++i) {
17                 kputw(bam_cigar_oplen(cigar[i]), str);
18                 kputc(bam_cigar_opchr(cigar[i]), str);
19         }
20         end = bam_calend(&b1->core, cigar);
21         kputw(b2->core.pos - end, str);
22         kputc('T', str);
23         kputc((b2->core.flag & BAM_FREAD1)? '1' : '2', str); // segment index
24         kputc((b2->core.flag & BAM_FREVERSE)? 'R' : 'F', str); // strand
25         for (i = 0, cigar = bam1_cigar(b2); i < b2->core.n_cigar; ++i) {
26                 kputw(bam_cigar_oplen(cigar[i]), str);
27                 kputc(bam_cigar_opchr(cigar[i]), str);
28         }
29         bam_aux_append(b1, "CT", 'Z', str->l+1, (uint8_t*)str->s); 
30 }
31
32 // currently, this function ONLY works if each read has one hit
33 void bam_mating_core(bamFile in, bamFile out, int remove_reads)
34 {
35         bam_header_t *header;
36         bam1_t *b[2];
37         int curr, has_prev, pre_end = 0, cur_end;
38         kstring_t str;
39
40         str.l = str.m = 0; str.s = 0;
41         header = bam_header_read(in);
42         bam_header_write(out, header);
43
44         b[0] = bam_init1();
45         b[1] = bam_init1();
46         curr = 0; has_prev = 0;
47         while (bam_read1(in, b[curr]) >= 0) {
48                 bam1_t *cur = b[curr], *pre = b[1-curr];
49                 if (cur->core.tid < 0) 
50         {
51             if ( !remove_reads ) bam_write1(out, cur);
52             continue;
53         }
54                 cur_end = bam_calend(&cur->core, bam1_cigar(cur));
55                 if (cur_end > (int)header->target_len[cur->core.tid]) cur->core.flag |= BAM_FUNMAP;
56                 if (cur->core.flag & BAM_FSECONDARY) 
57         {
58             if ( !remove_reads ) bam_write1(out, cur);
59             continue; // skip secondary alignments
60         }
61                 if (has_prev) {
62                         if (strcmp(bam1_qname(cur), bam1_qname(pre)) == 0) { // identical pair name
63                                 cur->core.mtid = pre->core.tid; cur->core.mpos = pre->core.pos;
64                                 pre->core.mtid = cur->core.tid; pre->core.mpos = cur->core.pos;
65                                 if (pre->core.tid == cur->core.tid && !(cur->core.flag&(BAM_FUNMAP|BAM_FMUNMAP))
66                                         && !(pre->core.flag&(BAM_FUNMAP|BAM_FMUNMAP))) // set TLEN/ISIZE
67                                 {
68                                         uint32_t cur5, pre5;
69                                         cur5 = (cur->core.flag&BAM_FREVERSE)? cur_end : cur->core.pos;
70                                         pre5 = (pre->core.flag&BAM_FREVERSE)? pre_end : pre->core.pos;
71                                         cur->core.isize = pre5 - cur5; pre->core.isize = cur5 - pre5;
72                                 } else cur->core.isize = pre->core.isize = 0;
73                                 if (pre->core.flag&BAM_FREVERSE) cur->core.flag |= BAM_FMREVERSE;
74                                 else cur->core.flag &= ~BAM_FMREVERSE;
75                                 if (cur->core.flag&BAM_FREVERSE) pre->core.flag |= BAM_FMREVERSE;
76                                 else pre->core.flag &= ~BAM_FMREVERSE;
77                                 if (cur->core.flag & BAM_FUNMAP) { pre->core.flag |= BAM_FMUNMAP; pre->core.flag &= ~BAM_FPROPER_PAIR; }
78                                 if (pre->core.flag & BAM_FUNMAP) { cur->core.flag |= BAM_FMUNMAP; cur->core.flag &= ~BAM_FPROPER_PAIR; }
79                                 bam_template_cigar(pre, cur, &str);
80                                 bam_write1(out, pre);
81                                 bam_write1(out, cur);
82                                 has_prev = 0;
83                         } else { // unpaired or singleton
84                                 pre->core.mtid = -1; pre->core.mpos = -1; pre->core.isize = 0;
85                                 if (pre->core.flag & BAM_FPAIRED) {
86                                         pre->core.flag |= BAM_FMUNMAP;
87                                         pre->core.flag &= ~BAM_FMREVERSE & ~BAM_FPROPER_PAIR;
88                                 }
89                                 bam_write1(out, pre);
90                         }
91                 } else has_prev = 1;
92                 curr = 1 - curr;
93                 pre_end = cur_end;
94         }
95         if (has_prev) bam_write1(out, b[1-curr]);
96         bam_header_destroy(header);
97         bam_destroy1(b[0]);
98         bam_destroy1(b[1]);
99         free(str.s);
100 }
101
102 void usage()
103 {
104     fprintf(stderr,"Usage: samtools fixmate <in.nameSrt.bam> <out.nameSrt.bam>\n");
105     fprintf(stderr,"Options:\n");
106     fprintf(stderr,"       -r    remove unmapped reads and secondary alignments\n");
107     exit(1);
108 }
109
110 int bam_mating(int argc, char *argv[])
111 {
112         bamFile in, out;
113     int c, remove_reads=0;
114     while ((c = getopt(argc, argv, "r")) >= 0) {
115         switch (c) {
116             case 'r': remove_reads=1; break;
117         }
118     }
119     if (optind+1 >= argc) usage();
120         in = (strcmp(argv[optind], "-") == 0)? bam_dopen(fileno(stdin), "r") : bam_open(argv[optind], "r");
121     out = (strcmp(argv[optind+1], "-") == 0)? bam_dopen(fileno(stdout), "w") : bam_open(argv[optind+1], "w");
122         bam_mating_core(in, out, remove_reads);
123         bam_close(in); bam_close(out);
124         return 0;
125 }
126
127