{
if (bca == 0) return;
errmod_destroy(bca->e);
- free(bca->bases); free(bca);
+ free(bca->bases); free(bca->inscns); free(bca);
}
/* ref_base is the 4-bit representation of the reference base. It is
* negative if we are looking at an indel. */
return 0;
}
-int bcf_call2bcf(int tid, int pos, bcf_call_t *bc, bcf1_t *b, bcf_callret1_t *bcr, int is_SP)
+int bcf_call2bcf(int tid, int pos, bcf_call_t *bc, bcf1_t *b, bcf_callret1_t *bcr, int is_SP,
+ const bcf_callaux_t *bca, const char *ref)
{
extern double kt_fisher_exact(int n11, int n12, int n21, int n22, double *_left, double *_right, double *two);
kstring_t s;
- int i;
+ int i, j;
b->n_smpl = bc->n;
b->tid = tid; b->pos = pos; b->qual = 0;
s.s = b->str; s.m = b->m_str; s.l = 0;
kputc('\0', &s);
- if (bc->ori_ref < 0) {
- kputc('N', &s); kputc('\0', &s);
- kputs("<INDEL>", &s); kputc('\0', &s);
- } else {
+ if (bc->ori_ref < 0) { // an indel
+ // write REF
+ kputc(ref[pos], &s);
+ for (j = 0; j < bca->indelreg; ++j) kputc(ref[pos+1+j], &s);
+ kputc('\0', &s);
+ // write ALT
+ kputc(ref[pos], &s);
+ for (i = 1; i < 4; ++i) {
+ if (bc->a[i] < 0) break;
+ if (i > 1) kputc(',', &s);
+ if (bca->indel_types[bc->a[i]] < 0) { // deletion
+ for (j = -bca->indel_types[bc->a[i]]; j < bca->indelreg; ++i)
+ kputc(ref[pos+1+j], &s);
+ } else { // insertion; cannot be a reference unless a bug
+ char *inscns = &bca->inscns[bc->a[i] * bca->maxins];
+ for (j = 0; j < bca->indel_types[bc->a[i]]; ++j)
+ kputc("ACGTN"[(int)inscns[j]], &s);
+ for (j = 0; j < bca->indelreg; ++j) kputc(ref[pos+1+j], &s);
+ }
+ }
+ kputc('\0', &s);
+ } else { // a SNP
kputc("ACGTN"[bc->ori_ref], &s); kputc('\0', &s);
for (i = 1; i < 5; ++i) {
if (bc->a[i] < 0) break;
}
kputc('\0', &s);
// INFO
+ if (bc->ori_ref < 0) kputs("INDEL;", &s);
kputs("I16=", &s);
for (i = 0; i < 16; ++i) {
if (i) kputc(',', &s);
// for internal uses
int max_bases;
int indel_types[4];
+ int maxins, indelreg;
+ char *inscns;
uint16_t *bases;
errmod_t *e;
} bcf_callaux_t;
void bcf_call_destroy(bcf_callaux_t *bca);
int bcf_call_glfgen(int _n, const bam_pileup1_t *pl, int ref_base, bcf_callaux_t *bca, bcf_callret1_t *r);
int bcf_call_combine(int n, const bcf_callret1_t *calls, int ref_base /*4-bit*/, bcf_call_t *call);
- int bcf_call2bcf(int tid, int pos, bcf_call_t *bc, bcf1_t *b, bcf_callret1_t *bcr, int is_SP);
+ int bcf_call2bcf(int tid, int pos, bcf_call_t *bc, bcf1_t *b, bcf_callret1_t *bcr, int is_SP,
+ const bcf_callaux_t *bca, const char *ref);
int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_callaux_t *bca, const char *ref);
#ifdef __cplusplus
#include <assert.h>
+#include <ctype.h>
#include "bam.h"
#include "bam2bcf.h"
#include "ksort.h"
return q < qh? q : qh;
}
+static inline int est_indelreg(int pos, const char *ref, int l, char *ins4)
+{
+ int i, j, max = 0, max_i = pos, score = 0;
+ l = abs(l);
+ for (i = pos + 1, j = 0; ref[i]; ++i, ++j) {
+ if (ins4) score += (toupper(ref[i]) != "ACGTN"[(int)ins4[j%l]])? -10 : 1;
+ else score += (toupper(ref[i]) != toupper(ref[pos+1+j%l]))? -10 : 1;
+ if (score < 0) break;
+ if (max < score) max = score, max_i = i;
+ }
+ return max_i - pos;
+}
+
int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_callaux_t *bca, const char *ref)
{
extern void ks_introsort_uint32_t(int, uint32_t*);
ref2 = calloc(right - left + max_ins + 2, 1);
query = calloc(right - left + max_rd_len + max_ins + 2, 1);
score = calloc(N * n_types, sizeof(int));
+ bca->indelreg = 0;
for (t = 0; t < n_types; ++t) {
- int l;
+ int l, ir;
ka_param2_t ap = ka_param2_qual;
ap.band_width = abs(types[t]) + 3;
+ // compute indelreg
+ if (types[t] == 0) ir = 0;
+ else if (types[t] > 0) ir = est_indelreg(pos, ref, types[t], &inscns[t*max_ins]);
+ else ir = est_indelreg(pos, ref, -types[t], 0);
+ if (ir > bca->indelreg) bca->indelreg = ir;
+// fprintf(stderr, "%d, %d, %d\n", pos, types[t], ir);
// write ref2
for (k = 0, j = left; j <= pos; ++j)
ref2[k++] = bam_nt16_nt4_table[bam_nt16_table[(int)ref[j]]];
// fprintf(stderr, "pos=%d read=%d:%d name=%s call=%d q=%d\n", pos, s, i, bam1_qname(p->b), types[sc[0]&0x3f], indelQ);
}
}
- // determine bca->indel_types[]
+ // determine bca->indel_types[] and bca->inscns
+ bca->maxins = max_ins;
+ bca->inscns = realloc(bca->inscns, bca->maxins * 4);
for (t = 0; t < n_types; ++t)
sumq[t] = sumq[t]<<6 | t;
for (t = 1; t < n_types; ++t) // insertion sort
sumq[0] = tmp;
}
for (t = 0; t < 4; ++t) bca->indel_types[t] = B2B_INDEL_NULL;
- for (t = 0; t < 4 && t < n_types; ++t)
+ for (t = 0; t < 4 && t < n_types; ++t) {
bca->indel_types[t] = types[sumq[t]&0x3f];
+ memcpy(&bca->inscns[t * bca->maxins], &inscns[(sumq[t]&0x3f) * max_ins], bca->maxins);
+ }
// update p->aux
for (s = K = 0; s < n; ++s) {
for (i = 0; i < n_plp[s]; ++i, ++K) {
}
}
}
- // FIXME: to set the inserted sequence
free(score);
// free
free(types); free(inscns);
for (i = 0; i < gplp.n; ++i)
bcf_call_glfgen(gplp.n_plp[i], gplp.plp[i], ref16, bca, bcr + i);
bcf_call_combine(gplp.n, bcr, ref16, &bc);
- bcf_call2bcf(tid, pos, &bc, b, (conf->flag&(MPLP_FMT_DP|MPLP_FMT_SP))? bcr : 0, (conf->flag&MPLP_FMT_SP));
+ bcf_call2bcf(tid, pos, &bc, b, (conf->flag&(MPLP_FMT_DP|MPLP_FMT_SP))? bcr : 0,
+ (conf->flag&MPLP_FMT_SP), 0, 0);
bcf_write(bp, bh, b);
bcf_destroy(b);
// call indels
bcf_call_combine(gplp.n, bcr, -1, &bc);
b = calloc(1, sizeof(bcf1_t));
bcf_call2bcf(tid, pos, &bc, b, (conf->flag&(MPLP_FMT_DP|MPLP_FMT_SP))? bcr : 0,
- (conf->flag&MPLP_FMT_SP));
+ (conf->flag&MPLP_FMT_SP), bca, ref);
bcf_write(bp, bh, b);
bcf_destroy(b);
}
#endif
#ifndef PACKAGE_VERSION
-#define PACKAGE_VERSION "0.1.9-2 (r787)"
+#define PACKAGE_VERSION "0.1.9-3 (r797)"
#endif
int bam_taf2baf(int argc, char *argv[]);