]> git.donarmstrong.com Git - samtools.git/blob - bam_tview.c
165eadda02fdfef828733b97bd8edaabbc3a5f5c
[samtools.git] / bam_tview.c
1 #include <assert.h>
2 #include "bam_tview.h"
3
4 int base_tv_init(tview_t* tv,const char *fn, const char *fn_fa, const char *samples)
5         {
6         assert(tv!=NULL);
7         assert(fn!=NULL);
8         tv->mrow = 24; tv->mcol = 80;
9         tv->color_for = TV_COLOR_MAPQ;
10         tv->is_dot = 1;
11         fprintf(stderr,"Opening '%s'\n",fn);
12         tv->fp = bam_open(fn, "r");
13         if(tv->fp==0)
14                 {
15                 fprintf(stderr,"bam_open %s. %s\n", fn,fn_fa);
16                 exit(EXIT_FAILURE);
17                 }
18         bgzf_set_cache_size(tv->fp, 8 * 1024 *1024);
19         assert(tv->fp);
20         fprintf(stderr,"Opening header '%s'\n",fn);
21         tv->header = bam_header_read(tv->fp);
22         if(tv->header==0)
23                 {
24                 fprintf(stderr,"Cannot read '%s'.\n", fn);
25                 exit(EXIT_FAILURE);
26                 }
27         tv->idx = bam_index_load(fn);
28         if (tv->idx == 0) exit(1);
29         tv->lplbuf = bam_lplbuf_init(tv_pl_func, tv);
30         if (fn_fa) tv->fai = fai_load(fn_fa);
31         tv->bca = bcf_call_init(0.83, 13);
32         tv->ins = 1;
33
34     if ( samples ) 
35     {
36         if ( !tv->header->dict ) tv->header->dict = sam_header_parse2(tv->header->text);
37         void *iter = tv->header->dict;
38         const char *key, *val;
39         int n = 0;
40         tv->rg_hash = kh_init(kh_rg);
41         while ( (iter = sam_header2key_val(iter, "RG","ID","SM", &key, &val)) )
42         {
43             if ( !strcmp(samples,key) || (val && !strcmp(samples,val)) )
44             {
45                 khiter_t k = kh_get(kh_rg, tv->rg_hash, key);
46                 if ( k != kh_end(tv->rg_hash) ) continue;
47                 int ret;
48                 k = kh_put(kh_rg, tv->rg_hash, key, &ret);
49                 kh_value(tv->rg_hash, k) = val;
50                 n++;
51             }
52         }
53         if ( !n )
54         {
55             fprintf(stderr,"The sample or read group \"%s\" not present.\n", samples);
56             exit(EXIT_FAILURE);
57         }
58     }
59
60         return 0;
61         }
62
63
64 void base_tv_destroy(tview_t* tv)
65         {
66         bam_lplbuf_destroy(tv->lplbuf);
67         bcf_call_destroy(tv->bca);
68         bam_index_destroy(tv->idx);
69         if (tv->fai) fai_destroy(tv->fai);
70         free(tv->ref);
71         bam_header_destroy(tv->header);
72         bam_close(tv->fp);
73         }
74
75
76 int tv_pl_func(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *pl, void *data)
77 {
78         extern unsigned char bam_nt16_table[256];
79         tview_t *tv = (tview_t*)data;
80         int i, j, c, rb, attr, max_ins = 0;
81         uint32_t call = 0;
82         if (pos < tv->left_pos || tv->ccol > tv->mcol) return 0; // out of screen
83         // print referece
84         rb = (tv->ref && pos - tv->left_pos < tv->l_ref)? tv->ref[pos - tv->left_pos] : 'N';
85         for (i = tv->last_pos + 1; i < pos; ++i) {
86                 if (i%10 == 0 && tv->mcol - tv->ccol >= 10) tv->my_mvprintw(tv,0, tv->ccol, "%-d", i+1);
87                 c = tv->ref? tv->ref[i - tv->left_pos] : 'N';
88                 tv->my_mvaddch(tv,1, tv->ccol++, c);
89         }
90         if (pos%10 == 0 && tv->mcol - tv->ccol >= 10) tv->my_mvprintw(tv,0, tv->ccol, "%-d", pos+1);
91         { // call consensus
92                 bcf_callret1_t bcr;
93                 int qsum[4], a1, a2, tmp;
94                 double p[3], prior = 30;
95                 bcf_call_glfgen(n, pl, bam_nt16_table[rb], tv->bca, &bcr);
96                 for (i = 0; i < 4; ++i) qsum[i] = bcr.qsum[i]<<2 | i;
97                 for (i = 1; i < 4; ++i) // insertion sort
98                         for (j = i; j > 0 && qsum[j] > qsum[j-1]; --j)
99                                 tmp = qsum[j], qsum[j] = qsum[j-1], qsum[j-1] = tmp;
100                 a1 = qsum[0]&3; a2 = qsum[1]&3;
101                 p[0] = bcr.p[a1*5+a1]; p[1] = bcr.p[a1*5+a2] + prior; p[2] = bcr.p[a2*5+a2];
102                 if ("ACGT"[a1] != toupper(rb)) p[0] += prior + 3;
103                 if ("ACGT"[a2] != toupper(rb)) p[2] += prior + 3;
104                 if (p[0] < p[1] && p[0] < p[2]) call = (1<<a1)<<16 | (int)((p[1]<p[2]?p[1]:p[2]) - p[0] + .499);
105                 else if (p[2] < p[1] && p[2] < p[0]) call = (1<<a2)<<16 | (int)((p[0]<p[1]?p[0]:p[1]) - p[2] + .499);
106                 else call = (1<<a1|1<<a2)<<16 | (int)((p[0]<p[2]?p[0]:p[2]) - p[1] + .499);
107         }
108         attr = BAM_TVIEW_UNDERLINE;
109         c = ",ACMGRSVTWYHKDBN"[call>>16&0xf];
110         i = (call&0xffff)/10+1;
111         if (i > 4) i = 4;
112         attr |= tv->my_colorpair(tv,i);
113         if (c == toupper(rb)) c = '.';
114         tv->my_attron(tv,attr);
115         tv->my_mvaddch(tv,2, tv->ccol, c);
116         tv->my_attroff(tv,attr);
117         if(tv->ins) {
118                 // calculate maximum insert
119                 for (i = 0; i < n; ++i) {
120                         const bam_pileup1_t *p = pl + i;
121                         if (p->indel > 0 && max_ins < p->indel) max_ins = p->indel;
122                 }
123         }
124         // core loop
125         for (j = 0; j <= max_ins; ++j) {
126                 for (i = 0; i < n; ++i) {
127                         const bam_pileup1_t *p = pl + i;
128                         int row = TV_MIN_ALNROW + p->level - tv->row_shift;
129                         if (j == 0) {
130                                 if (!p->is_del) {
131                                         if (tv->base_for == TV_BASE_COLOR_SPACE && 
132                                                         (c = bam_aux_getCSi(p->b, p->qpos))) {
133                                                 // assume that if we found one color, we will be able to get the color error
134                                                 if (tv->is_dot && '-' == bam_aux_getCEi(p->b, p->qpos)) c = bam1_strand(p->b)? ',' : '.';
135                                         } else {
136                                                 if (tv->show_name) {
137                                                         char *name = bam1_qname(p->b);
138                                                         c = (p->qpos + 1 >= p->b->core.l_qname)? ' ' : name[p->qpos];
139                                                 } else {
140                                                         c = bam_nt16_rev_table[bam1_seqi(bam1_seq(p->b), p->qpos)];
141                                                         if (tv->is_dot && toupper(c) == toupper(rb)) c = bam1_strand(p->b)? ',' : '.';
142                                                 }
143                                         }
144                                 } else c = p->is_refskip? (bam1_strand(p->b)? '<' : '>') : '*';
145                         } else { // padding
146                                 if (j > p->indel) c = '*';
147                                 else { // insertion
148                                         if (tv->base_for ==  TV_BASE_NUCL) {
149                                                 if (tv->show_name) {
150                                                         char *name = bam1_qname(p->b);
151                                                         c = (p->qpos + j + 1 >= p->b->core.l_qname)? ' ' : name[p->qpos + j];
152                                                 } else {
153                                                         c = bam_nt16_rev_table[bam1_seqi(bam1_seq(p->b), p->qpos + j)];
154                                                         if (j == 0 && tv->is_dot && toupper(c) == toupper(rb)) c = bam1_strand(p->b)? ',' : '.';
155                                                 }
156                                         } else {
157                                                 c = bam_aux_getCSi(p->b, p->qpos + j);
158                                                 if (tv->is_dot && '-' == bam_aux_getCEi(p->b, p->qpos + j)) c = bam1_strand(p->b)? ',' : '.';
159                                         }
160                                 }
161                         }
162                         if (row > TV_MIN_ALNROW && row < tv->mrow) {
163                                 int x;
164                                 attr = 0;
165                                 if (((p->b->core.flag&BAM_FPAIRED) && !(p->b->core.flag&BAM_FPROPER_PAIR))
166                                                 || (p->b->core.flag & BAM_FSECONDARY)) attr |= BAM_TVIEW_UNDERLINE;
167                                 if (tv->color_for == TV_COLOR_BASEQ) {
168                                         x = bam1_qual(p->b)[p->qpos]/10 + 1;
169                                         if (x > 4) x = 4;
170                                         attr |= COLOR_PAIR(x);
171                                 } else if (tv->color_for == TV_COLOR_MAPQ) {
172                                         x = p->b->core.qual/10 + 1;
173                                         if (x > 4) x = 4;
174                                         attr |= COLOR_PAIR(x);
175                                 } else if (tv->color_for == TV_COLOR_NUCL) {
176                                         x = bam_nt16_nt4_table[bam1_seqi(bam1_seq(p->b), p->qpos)] + 5;
177                                         attr |= COLOR_PAIR(x);
178                                 } else if(tv->color_for == TV_COLOR_COL) {
179                                         x = 0;
180                                         switch(bam_aux_getCSi(p->b, p->qpos)) {
181                                                 case '0': x = 0; break;
182                                                 case '1': x = 1; break;
183                                                 case '2': x = 2; break;
184                                                 case '3': x = 3; break;
185                                                 case '4': x = 4; break;
186                                                 default: x = bam_nt16_nt4_table[bam1_seqi(bam1_seq(p->b), p->qpos)]; break;
187                                         }
188                                         x+=5;
189                                         attr |= COLOR_PAIR(x);
190                                 } else if(tv->color_for == TV_COLOR_COLQ) {
191                                         x = bam_aux_getCQi(p->b, p->qpos);
192                                         if(0 == x) x = bam1_qual(p->b)[p->qpos];
193                                         x = x/10 + 1;
194                                         if (x > 4) x = 4;
195                                         attr |= COLOR_PAIR(x);
196                                 }
197                                 tv->my_attron(tv,attr);
198                                 tv->my_mvaddch(tv,row, tv->ccol, bam1_strand(p->b)? tolower(c) : toupper(c));
199                                 tv->my_attroff(tv,attr);
200                         }
201                 }
202                 c = j? '*' : rb;
203                 if (c == '*') {
204                         attr = COLOR_PAIR(8);
205                         tv->my_attron(tv,attr);
206                         tv->my_mvaddch(tv,1, tv->ccol++, c);
207                         tv->my_attroff(tv,attr);
208                 } else tv->my_mvaddch(tv,1, tv->ccol++, c);
209         }
210         tv->last_pos = pos;
211         return 0;
212 }
213
214
215
216
217 int tv_fetch_func(const bam1_t *b, void *data)
218 {
219         tview_t *tv = (tview_t*)data;
220     if ( tv->rg_hash )
221     {
222         const uint8_t *rg = bam_aux_get(b, "RG");
223         if ( !rg ) return 0; 
224         khiter_t k = kh_get(kh_rg, tv->rg_hash, (const char*)(rg + 1));
225         if ( k == kh_end(tv->rg_hash) ) return 0;
226     }
227         if (tv->no_skip) {
228                 uint32_t *cigar = bam1_cigar(b); // this is cheating...
229                 int i;
230                 for (i = 0; i <b->core.n_cigar; ++i) {
231                         if ((cigar[i]&0xf) == BAM_CREF_SKIP)
232                                 cigar[i] = cigar[i]>>4<<4 | BAM_CDEL;
233                 }
234         }
235         bam_lplbuf_push(b, tv->lplbuf);
236         return 0;
237 }
238
239 int tv_draw_aln(tview_t *tv, int tid, int pos)
240         {
241         assert(tv!=NULL);
242         // reset
243         tv->my_clear(tv);
244         tv->curr_tid = tid; tv->left_pos = pos;
245         tv->last_pos = tv->left_pos - 1;
246         tv->ccol = 0;
247         // print ref and consensus
248         if (tv->fai) {
249                 char *str;
250                 if (tv->ref) free(tv->ref);
251                 assert(tv->curr_tid>=0);
252                 
253                 str = (char*)calloc(strlen(tv->header->target_name[tv->curr_tid]) + 30, 1);
254                 assert(str!=NULL);
255                 sprintf(str, "%s:%d-%d", tv->header->target_name[tv->curr_tid], tv->left_pos + 1, tv->left_pos + tv->mcol);
256                 tv->ref = fai_fetch(tv->fai, str, &tv->l_ref);
257                 free(str);
258         }
259         // draw aln
260         bam_lplbuf_reset(tv->lplbuf);
261         bam_fetch(tv->fp, tv->idx, tv->curr_tid, tv->left_pos, tv->left_pos + tv->mcol, tv, tv_fetch_func);
262         bam_lplbuf_push(0, tv->lplbuf);
263
264         while (tv->ccol < tv->mcol) {
265                 int pos = tv->last_pos + 1;
266                 if (pos%10 == 0 && tv->mcol - tv->ccol >= 10) tv->my_mvprintw(tv,0, tv->ccol, "%-d", pos+1);
267                 tv->my_mvaddch(tv,1, tv->ccol++, (tv->ref && pos < tv->l_ref)? tv->ref[pos - tv->left_pos] : 'N');
268                 ++tv->last_pos;
269         }
270         return 0;
271 }
272