]> git.donarmstrong.com Git - samtools.git/blobdiff - bcftools/vcf.c
fixed a bug in indexing
[samtools.git] / bcftools / vcf.c
index 07e25315f7e69a529dcd4bcd4f89e3f47696dd60..d441d7f0b278bf0d6d72eb05d0e641a0926b04ef 100644 (file)
@@ -93,9 +93,22 @@ int vcf_close(bcf_t *bp)
 int vcf_hdr_write(bcf_t *bp, const bcf_hdr_t *h)
 {
        vcf_t *v = (vcf_t*)bp->v;
-       int i;
+       int i, has_ref = 0, has_ver = 0;
        if (!bp->is_vcf) return bcf_hdr_write(bp, h);
-       if (h->l_txt > 0) fwrite(h->txt, 1, h->l_txt - 1, v->fpout);
+       if (h->l_txt > 0) {
+               if (strstr(h->txt, "##fileformat=")) has_ver = 1;
+               if (has_ver == 0) fprintf(v->fpout, "##fileformat=VCFv4.0\n");
+               fwrite(h->txt, 1, h->l_txt - 1, v->fpout);
+               if (strstr(h->txt, "##SQ=")) has_ref = 1;
+       }
+       if (has_ver == 0) fprintf(v->fpout, "##fileformat=VCFv4.0\n");
+       if (!has_ref) {
+               fprintf(v->fpout, "##SQ=");
+               for (i = 0; i < h->n_ref; ++i) {
+                       fprintf(v->fpout, "%s", h->ns[i]);
+                       fputc(i == h->n_ref - 1? '\n' : ',', v->fpout);
+               }
+       }
        fprintf(v->fpout, "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT");
        for (i = 0; i < h->n_smpl; ++i)
                fprintf(v->fpout, "\t%s", h->sns[i]);
@@ -137,19 +150,38 @@ int vcf_read(bcf_t *bp, bcf_hdr_t *h, bcf1_t *b)
                        }
                        b->tid = tid;
                } else if (k == 1) { // pos
-                       b->pos = atoi(p);
+                       b->pos = atoi(p) - 1;
                } else if (k == 5) { // qual
-                       b->qual = (p[0] >= '0' && p[0] <= '9')? atoi(p) : 0;
+                       b->qual = (p[0] >= '0' && p[0] <= '9')? atof(p) : 0;
                } else if (k <= 8) { // variable length strings
                        kputs(p, &str); kputc('\0', &str);
                        b->l_str = str.l; b->m_str = str.m; b->str = str.s;
                        if (k == 8) bcf_sync(h->n_smpl, b);
-               } else {
+               } else { // k > 9
+                       if (strncmp(p, "./.", 3) == 0) {
+                               for (i = 0; i < b->n_gi; ++i) {
+                                       if (b->gi[i].fmt == bcf_str2int("GT", 2)) {
+                                               ((uint8_t*)b->gi[i].data)[k-9] = 1<<7;
+                                       } else if (b->gi[i].fmt == bcf_str2int("GQ", 2)) {
+                                               ((uint8_t*)b->gi[i].data)[k-9] = 0;
+                                       } else if (b->gi[i].fmt == bcf_str2int("DP", 2)) {
+                                               ((uint16_t*)b->gi[i].data)[k-9] = 0;
+                                       } else if (b->gi[i].fmt == bcf_str2int("PL", 2)) {
+                                               int y = b->n_alleles * (b->n_alleles + 1) / 2;
+                                               memset((uint8_t*)b->gi[i].data + (k - 9) * y, 0, y);
+                                       } else if (b->gi[i].fmt == bcf_str2int("GL", 2)) {
+                                               int y = b->n_alleles * (b->n_alleles + 1) / 2;
+                                               memset((float*)b->gi[i].data + (k - 9) * y, 0, y * 4);
+                                       }
+                               }
+                               goto endblock;
+                       }
                        for (q = kstrtok(p, ":", &a2), i = 0; q && i < b->n_gi; q = kstrtok(0, 0, &a2), ++i) {
                                if (b->gi[i].fmt == bcf_str2int("GT", 2)) {
                                        ((uint8_t*)b->gi[i].data)[k-9] = (q[0] - '0')<<3 | (q[2] - '0') | (q[1] == '/'? 0 : 1) << 6;
                                } else if (b->gi[i].fmt == bcf_str2int("GQ", 2)) {
-                                       int x = strtol(q, &q, 10);
+                                       double _x = strtod(q, &q);
+                                       int x = (int)(_x + .499);
                                        if (x > 255) x = 255;
                                        ((uint8_t*)b->gi[i].data)[k-9] = x;
                                } else if (b->gi[i].fmt == bcf_str2int("DP", 2)) {
@@ -157,16 +189,27 @@ int vcf_read(bcf_t *bp, bcf_hdr_t *h, bcf1_t *b)
                                        if (x > 0xffff) x = 0xffff;
                                        ((uint16_t*)b->gi[i].data)[k-9] = x;
                                } else if (b->gi[i].fmt == bcf_str2int("PL", 2)) {
-                                       int x, j;
+                                       int x, y, j;
                                        uint8_t *data = (uint8_t*)b->gi[i].data;
-                                       for (j = 0; j < b->gi[i].len; ++j) {
+                                       y = b->n_alleles * (b->n_alleles + 1) / 2;
+                                       for (j = 0; j < y; ++j) {
                                                x = strtol(q, &q, 10);
                                                if (x > 255) x = 255;
-                                               data[i * b->gi[i].len + j] = x;
+                                               data[(k-9) * y + j] = x;
+                                               ++q;
+                                       }
+                               } else if (b->gi[i].fmt == bcf_str2int("GL", 2)) {
+                                       int j, y;
+                                       float x, *data = (float*)b->gi[i].data;
+                                       y = b->n_alleles * (b->n_alleles + 1) / 2;
+                                       for (j = 0; j < y; ++j) {
+                                               x = strtod(q, &q);
+                                               data[(k-9) * y + j] = x;
                                                ++q;
                                        }
                                }
                        }
+               endblock: i = i;
                }
        }
        h->l_nm = rn.l; h->name = rn.s;