} else {
b->fp = strcmp(fn, "-")? bgzf_open(fn, mode) : bgzf_fdopen(fileno(stdin), mode);
}
- b->fp->owned_file = 1;
return b;
}
ks_tokaux_t aux;
// set ref, alt, flt, info, fmt
b->ref = b->alt = b->flt = b->info = b->fmt = 0;
- for (p = b->str, n = 0; p < b->str + b->l_str; ++p)
- if (*p == 0 && p+1 != b->str + b->l_str) tmp[n++] = p + 1;
+ for (p = b->str, n = 0; p < b->str + b->l_str; ++p) {
+ if (*p == 0 && p+1 != b->str + b->l_str) {
+ if (n == 5) {
+ ++n;
+ break;
+ } else tmp[n++] = p + 1;
+ }
+ }
if (n != 5) {
- fprintf(stderr, "[%s] incorrect number of fields (%d != 5). Corrupted file?\n", __func__, n);
+ fprintf(stderr, "[%s] incorrect number of fields (%d != 5) at %d:%d\n", __func__, n, b->tid, b->pos);
return -1;
}
b->ref = tmp[0]; b->alt = tmp[1]; b->flt = tmp[2]; b->info = tmp[3]; b->fmt = tmp[4];
for (i = 0; i < b->n_gi; ++i) {
if (b->gi[i].fmt == bcf_str2int("PL", 2)) {
b->gi[i].len = b->n_alleles * (b->n_alleles + 1) / 2;
- } else if (b->gi[i].fmt == bcf_str2int("DP", 2) || b->gi[i].fmt == bcf_str2int("HQ", 2)) {
+ } else if (b->gi[i].fmt == bcf_str2int("DP", 2) || b->gi[i].fmt == bcf_str2int("HQ", 2) || b->gi[i].fmt == bcf_str2int("DV", 2)) {
b->gi[i].len = 2;
- } else if (b->gi[i].fmt == bcf_str2int("GQ", 2) || b->gi[i].fmt == bcf_str2int("GT", 2)
- || b->gi[i].fmt == bcf_str2int("SP", 2))
- {
+ } else if (b->gi[i].fmt == bcf_str2int("GQ", 2) || b->gi[i].fmt == bcf_str2int("GT", 2)) {
b->gi[i].len = 1;
+ } else if (b->gi[i].fmt == bcf_str2int("SP", 2)) {
+ b->gi[i].len = 4;
} else if (b->gi[i].fmt == bcf_str2int("GL", 2)) {
b->gi[i].len = b->n_alleles * (b->n_alleles + 1) / 2 * 4;
}
}
bgzf_read(bp->fp, b->str, b->l_str);
l = 12 + b->l_str;
- bcf_sync(b);
+ if (bcf_sync(b) < 0) return -2;
for (i = 0; i < b->n_gi; ++i) {
bgzf_read(bp->fp, b->gi[i].data, b->gi[i].len * h->n_smpl);
l += b->gi[i].len * h->n_smpl;
}
x = b->n_alleles * (b->n_alleles + 1) / 2;
if (b->n_gi == 0) return;
+ int iPL = -1;
+ if ( b->n_alleles > 2 ) {
+ for (i=0; i<b->n_gi; i++) {
+ if ( b->gi[i].fmt == bcf_str2int("PL", 2) ) {
+ iPL = i;
+ break;
+ }
+ }
+ }
for (j = 0; j < h->n_smpl; ++j) {
+ int ploidy = b->ploidy ? b->ploidy[j] : 2;
kputc('\t', s);
for (i = 0; i < b->n_gi; ++i) {
if (i) kputc(':', s);
if (b->gi[i].fmt == bcf_str2int("PL", 2)) {
uint8_t *d = (uint8_t*)b->gi[i].data + j * x;
int k;
- for (k = 0; k < x; ++k) {
- if (k > 0) kputc(',', s);
- kputw(d[k], s);
- }
- } else if (b->gi[i].fmt == bcf_str2int("DP", 2)) {
+ if ( ploidy==1 )
+ for (k=0; k<b->n_alleles; k++)
+ {
+ if (k>0) kputc(',', s);
+ kputw(d[(k+1)*(k+2)/2-1], s);
+ }
+ else
+ for (k = 0; k < x; ++k) {
+ if (k > 0) kputc(',', s);
+ kputw(d[k], s);
+ }
+ } else if (b->gi[i].fmt == bcf_str2int("DP", 2) || b->gi[i].fmt == bcf_str2int("DV", 2)) {
kputw(((uint16_t*)b->gi[i].data)[j], s);
- } else if (b->gi[i].fmt == bcf_str2int("GQ", 2) || b->gi[i].fmt == bcf_str2int("SP", 2)) {
+ } else if (b->gi[i].fmt == bcf_str2int("GQ", 2)) {
kputw(((uint8_t*)b->gi[i].data)[j], s);
+ } else if (b->gi[i].fmt == bcf_str2int("SP", 2)) {
+ kputw(((int32_t*)b->gi[i].data)[j], s);
} else if (b->gi[i].fmt == bcf_str2int("GT", 2)) {
- int y = ((uint8_t*)b->gi[i].data)[j];
- if (y>>7&1) {
- kputsn("./.", 3, s);
- } else {
- kputc('0' + (y>>3&7), s);
- kputc("/|"[y>>6&1], s);
- kputc('0' + (y&7), s);
- }
+ int y = ((uint8_t*)b->gi[i].data)[j];
+ if ( ploidy==1 )
+ {
+ if ( y>>7&1 )
+ kputc('.', s);
+ else
+ kputc('0' + (y>>3&7), s);
+ }
+ else
+ {
+ if ( y>>7&1 )
+ kputsn("./.", 3, s);
+ else {
+ kputc('0' + (y>>3&7), s);
+ kputc("/|"[y>>6&1], s);
+ kputc('0' + (y&7), s);
+ }
+ }
} else if (b->gi[i].fmt == bcf_str2int("GL", 2)) {
float *d = (float*)b->gi[i].data + j * x;
int k;
if (k > 0) kputc(',', s);
ksprintf(s, "%.2f", d[k]);
}
- }
+ } else kputc('.', s); // custom fields
}
}
}
return 0;
}
+int remove_tag(char *str, const char *tag, char delim)
+{
+ char *tmp = str, *p;
+ int len_diff = 0, ori_len = strlen(str);
+ while ( *tmp && (p = strstr(tmp,tag)) )
+ {
+ if ( p>str )
+ {
+ if ( *(p-1)!=delim ) { tmp=p+1; continue; } // shared substring
+ p--;
+ }
+ char *q=p+1;
+ while ( *q && *q!=delim ) q++;
+ if ( p==str && *q ) q++; // the tag is first, don't move the delim char
+ len_diff += q-p;
+ if ( ! *q ) { *p = 0; break; } // the tag was last, no delim follows
+ else
+ memmove(p,q,ori_len-(int)(p-str)-(int)(q-p)); // *q==delim
+ }
+ if ( len_diff==ori_len )
+ str[0]='.', str[1]=0, len_diff--;
+
+ return len_diff;
+}
+
+
+void rm_info(kstring_t *s, const char *key)
+{
+ char *p = s->s;
+ int n = 0;
+ while ( n<4 )
+ {
+ if ( !*p ) n++;
+ p++;
+ }
+ char *q = p+1;
+ while ( *q && q-s->s<s->l ) q++;
+
+ int nrm = remove_tag(p, key, ';');
+ if ( nrm )
+ memmove(q-nrm, q, s->s+s->l-q+1);
+ s->l -= nrm;
+}
+
int bcf_cpy(bcf1_t *r, const bcf1_t *b)
{
char *t1 = r->str;
memcpy(r->gi[i].data, b->gi[i].data, r->n_smpl * r->gi[i].len);
return 0;
}
+
+int bcf_is_indel(const bcf1_t *b)
+{
+ char *p;
+ if (strlen(b->ref) > 1) return 1;
+ for (p = b->alt; *p; ++p)
+ if (*p != ',' && p[1] != ',' && p[1] != '\0')
+ return 1;
+ return 0;
+}