]> git.donarmstrong.com Git - samtools.git/commitdiff
* write a little more VCF header
authorHeng Li <lh3@live.co.uk>
Tue, 17 Aug 2010 16:12:20 +0000 (16:12 +0000)
committerHeng Li <lh3@live.co.uk>
Tue, 17 Aug 2010 16:12:20 +0000 (16:12 +0000)
 * concatenate BCFs

bcftools/main.c
bcftools/vcf.c

index e271efd6b3afb6b7f63b9d317da4a849959e4072..7ffc2a05f4557a1aa12a156a86dc2e15cf51ead3 100644 (file)
@@ -1,10 +1,47 @@
 #include <string.h>
 #include <stdlib.h>
+#include <sys/stat.h>
 #include "bcf.h"
 
 int bcfview(int argc, char *argv[]);
 int bcf_main_index(int argc, char *argv[]);
 
+#define BUF_SIZE 0x10000
+
+int bcf_cat(int n, char * const *fn)
+{
+       int i;
+       bcf_t *out;
+       uint8_t *buf;
+       buf = malloc(BUF_SIZE);
+       out = bcf_open("-", "w");
+       for (i = 0; i < n; ++i) {
+               bcf_t *in;
+               bcf_hdr_t *h;
+               off_t end;
+               struct stat s;
+               in = bcf_open(fn[i], "r");
+               h = bcf_hdr_read(in);
+               if (i == 0) bcf_hdr_write(out, h);
+               bcf_hdr_destroy(h);
+#ifdef _USE_KNETFILE
+               fstat(knet_fileno(in->fp->x.fpr), &s);
+               end = s.st_size - 28;
+               while (knet_tell(in->fp->x.fpr) < end) {
+                       int size = knet_tell(in->fp->x.fpr) + BUF_SIZE < end? BUF_SIZE : end - knet_tell(in->fp->x.fpr);
+                       knet_read(in->fp->x.fpr, buf, size);
+                       fwrite(buf, 1, size, out->fp->x.fpw);
+               }
+#else
+               abort(); // FIXME: not implemented
+#endif
+               bcf_close(in);
+       }
+       bcf_close(out);
+       free(buf);
+       return 0;
+}
+
 int main(int argc, char *argv[])
 {
        if (argc == 1) {
@@ -12,11 +49,13 @@ int main(int argc, char *argv[])
                fprintf(stderr, "Usage:   bcftools <command> <arguments>\n\n");
                fprintf(stderr, "Command: view      print, extract, convert and call SNPs from BCF\n");
                fprintf(stderr, "         index     index BCF\n");
+               fprintf(stderr, "         cat       concatenate BCFs\n");
                fprintf(stderr, "\n");
                return 1;
        }
        if (strcmp(argv[1], "view") == 0) return bcfview(argc-1, argv+1);
-       if (strcmp(argv[1], "index") == 0) return bcf_main_index(argc-1, argv+1);
+       else if (strcmp(argv[1], "index") == 0) return bcf_main_index(argc-1, argv+1);
+       else if (strcmp(argv[1], "cat") == 0) return bcf_cat(argc-2, argv+2);
        else {
                fprintf(stderr, "[main] Unrecognized command.\n");
                return 1;
index 07e25315f7e69a529dcd4bcd4f89e3f47696dd60..81c7061c9cbc4462ade1d36825100b5b760c5975 100644 (file)
@@ -93,9 +93,22 @@ int vcf_close(bcf_t *bp)
 int vcf_hdr_write(bcf_t *bp, const bcf_hdr_t *h)
 {
        vcf_t *v = (vcf_t*)bp->v;
-       int i;
+       int i, has_ref = 0, has_ver = 0;
        if (!bp->is_vcf) return bcf_hdr_write(bp, h);
-       if (h->l_txt > 0) fwrite(h->txt, 1, h->l_txt - 1, v->fpout);
+       if (h->l_txt > 0) {
+               if (strstr(h->txt, "##fileformat=")) has_ver = 1;
+               if (has_ver == 0) fprintf(v->fpout, "##fileformat=VCFv4.0\n");
+               fwrite(h->txt, 1, h->l_txt - 1, v->fpout);
+               if (strstr(h->txt, "##SQ=")) has_ref = 1;
+       }
+       if (has_ver == 0) fprintf(v->fpout, "##fileformat=VCFv4.0\n");
+       if (!has_ref) {
+               fprintf(v->fpout, "##SQ=");
+               for (i = 0; i < h->n_ref; ++i) {
+                       fprintf(v->fpout, "%s", h->ns[i]);
+                       fputc(i == h->n_ref - 1? '\n' : ',', v->fpout);
+               }
+       }
        fprintf(v->fpout, "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT");
        for (i = 0; i < h->n_smpl; ++i)
                fprintf(v->fpout, "\t%s", h->sns[i]);