]> git.donarmstrong.com Git - samtools.git/commitdiff
* samtools-0.1.7-14 (r591)
authorHeng Li <lh3@live.co.uk>
Sat, 12 Jun 2010 18:09:22 +0000 (18:09 +0000)
committerHeng Li <lh3@live.co.uk>
Sat, 12 Jun 2010 18:09:22 +0000 (18:09 +0000)
 * elementary multi-way pileup. More testing and more functionality to be done.

bam.h
bam_pileup.c
bam_plcmd.c
bamtk.c

diff --git a/bam.h b/bam.h
index b418dcd796a171bcdecd71381db6dded02b5ab3a..f6d31a922b12a413528950c2288f4e28a7284215 100644 (file)
--- a/bam.h
+++ b/bam.h
@@ -482,7 +482,7 @@ extern "C" {
                uint32_t is_del:1, is_head:1, is_tail:1;
        } bam_pileup1_t;
 
-       typedef int (*bam_plp_auto_f)(bam1_t *b, void *data);
+       typedef int (*bam_plp_auto_f)(void *data, bam1_t *b);
 
        struct __bam_plp_t;
        typedef struct __bam_plp_t *bam_plp_t;
index bcc1e9872e12db407e8a436556cf1e91b9350aa9..8a75829cf6e92889ed72e2b23f08056abedb516d 100644 (file)
@@ -229,15 +229,15 @@ const bam_pileup1_t *bam_plp_auto(bam_plp_t iter, int *_tid, int *_pos, int *_n_
 {
        const bam_pileup1_t *plp;
        if (iter->func == 0 || iter->error) { *_n_plp = -1; return 0; }
-       if ((plp = bam_plp_next(iter, _n_plp, _tid, _pos)) != 0) return plp;
+       if ((plp = bam_plp_next(iter, _tid, _pos, _n_plp)) != 0) return plp;
        else {
                *_n_plp = 0;
-               while (iter->func(iter->b, iter->data) >= 0) {
+               while (iter->func(iter->data, iter->b) >= 0) {
                        if (bam_plp_push(iter, iter->b) < 0) {
                                *_n_plp = -1;
                                return 0;
                        }
-                       if ((plp = bam_plp_next(iter, _n_plp, _tid, _pos)) != 0) return plp;
+                       if ((plp = bam_plp_next(iter, _tid, _pos, _n_plp)) != 0) return plp;
                }
                return 0;
        }
@@ -339,10 +339,13 @@ bam_mplp_t bam_mplp_init(int n, bam_plp_auto_f func, void **data)
        iter->pos = calloc(n, 8);
        iter->n_plp = calloc(n, sizeof(int));
        iter->plp = calloc(n, sizeof(void*));
+       iter->iter = calloc(n, sizeof(void*));
        iter->n = n;
        iter->min = (uint64_t)-1;
-       for (i = 0; i < n; ++i)
+       for (i = 0; i < n; ++i) {
                iter->iter[i] = bam_plp_init(func, data[i]);
+               iter->pos[i] = iter->min;
+       }
        return iter;
 }
 
index ccaa28dec1dffc5174d77c31dc81603f8214b38c..92023b87f92e4c293c7bb4409cac63373668c017 100644 (file)
@@ -158,11 +158,37 @@ static int glt3_func(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *pu,
        return 0;
 }
 
+static void pileup_seq(const bam_pileup1_t *p, int pos, int ref_len, const char *ref)
+{
+       if (p->is_head) printf("^%c", p->b->core.qual > 93? 126 : p->b->core.qual + 33);
+       if (!p->is_del) {
+               int j, rb, c = bam_nt16_rev_table[bam1_seqi(bam1_seq(p->b), p->qpos)];
+               rb = (ref && (int)pos < ref_len)? ref[pos] : 'N';
+               if (c == '=' || toupper(c) == toupper(rb)) c = bam1_strand(p->b)? ',' : '.';
+               else c = bam1_strand(p->b)? tolower(c) : toupper(c);
+               putchar(c);
+               if (p->indel > 0) {
+                       putchar('+'); putw(p->indel, stdout);
+                       for (j = 1; j <= p->indel; ++j) {
+                               c = bam_nt16_rev_table[bam1_seqi(bam1_seq(p->b), p->qpos + j)];
+                               putchar(bam1_strand(p->b)? tolower(c) : toupper(c));
+                       }
+               } else if (p->indel < 0) {
+                       putw(p->indel, stdout);
+                       for (j = 1; j <= -p->indel; ++j) {
+                               c = (ref && (int)pos+j < ref_len)? ref[pos+j] : 'N';
+                               putchar(bam1_strand(p->b)? tolower(c) : toupper(c));
+                       }
+               }
+       } else putchar('*');
+       if (p->is_tail) putchar('$');
+}
+
 static int pileup_func(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *pu, void *data)
 {
        pu_data_t *d = (pu_data_t*)data;
        bam_maqindel_ret_t *r = 0;
-       int i, j, rb, rms_mapq = -1, *proposed_indels = 0;
+       int i, rb, rms_mapq = -1, *proposed_indels = 0;
        uint64_t rms_aux;
        uint32_t cns = 0;
 
@@ -243,27 +269,7 @@ static int pileup_func(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *p
                const bam_pileup1_t *p = pu + i;
                int tmp = p->b->core.qual < d->c->cap_mapQ? p->b->core.qual : d->c->cap_mapQ;
                rms_aux += tmp * tmp;
-               if (p->is_head) printf("^%c", p->b->core.qual > 93? 126 : p->b->core.qual + 33);
-               if (!p->is_del) {
-                       int c = bam_nt16_rev_table[bam1_seqi(bam1_seq(p->b), p->qpos)];
-                       if (c == '=' || toupper(c) == toupper(rb)) c = bam1_strand(p->b)? ',' : '.';
-                       else c = bam1_strand(p->b)? tolower(c) : toupper(c);
-                       putchar(c);
-                       if (p->indel > 0) {
-                               printf("+%d", p->indel);
-                               for (j = 1; j <= p->indel; ++j) {
-                                       c = bam_nt16_rev_table[bam1_seqi(bam1_seq(p->b), p->qpos + j)];
-                                       putchar(bam1_strand(p->b)? tolower(c) : toupper(c));
-                               }
-                       } else if (p->indel < 0) {
-                               printf("%d", p->indel);
-                               for (j = 1; j <= -p->indel; ++j) {
-                                       c = (d->ref && (int)pos+j < d->len)? d->ref[pos+j] : 'N';
-                                       putchar(bam1_strand(p->b)? tolower(c) : toupper(c));
-                               }
-                       }
-               } else putchar('*');
-               if (p->is_tail) putchar('$');
+               pileup_seq(p, pos, d->len, d->ref);
        }
        // finalize rms_mapq
        rms_aux = (uint64_t)(sqrt((double)rms_aux / n) + .499);
@@ -435,3 +441,65 @@ int bam_pileup(int argc, char *argv[])
        free(d->ido); free(d->ref); free(d);
        return 0;
 }
+
+/***********
+ * mpileup *
+ ***********/
+
+static int mpileup(int n, char **fn)
+{
+       bamFile *fp;
+       int i, tid, pos, *n_plp;
+       const bam_pileup1_t **plp;
+       bam_mplp_t iter;
+       bam_header_t *h = 0;
+       fp = calloc(n, sizeof(void*));
+       plp = calloc(n, sizeof(void*));
+       n_plp = calloc(n, sizeof(int*));
+       for (i = 0; i < n; ++i) {
+               bam_header_t *h_tmp;
+               fp[i] = bam_open(fn[i], "r");
+               h_tmp = bam_header_read(fp[i]);
+               if (i == 0) h = h_tmp;
+               else {
+                       // FIXME: to check consistency
+                       bam_header_destroy(h_tmp);
+               }
+       }
+       iter = bam_mplp_init(n, bam_read1, fp);
+       while (bam_mplp_auto(iter, &tid, &pos, n_plp, plp) > 0) {
+               printf("%s\t%d\tN", h->target_name[tid], pos + 1);
+               for (i = 0; i < n; ++i) {
+                       int j;
+                       printf("\t%d\t", n_plp[i]);
+                       if (n_plp[i] == 0) printf("*\t*");
+                       else {
+                               for (j = 0; j < n_plp[i]; ++j)
+                                       pileup_seq(plp[i] + j, pos, 0, 0);
+                               putchar('\t');
+                               for (j = 0; j < n_plp[i]; ++j) {
+                                       const bam_pileup1_t *p = plp[i] + j;
+                                       int c = bam1_qual(p->b)[p->qpos] + 33;
+                                       if (c > 126) c = 126;
+                                       putchar(c);
+                               }
+                       }
+               }
+               putchar('\n');
+       }
+       bam_mplp_destroy(iter);
+       bam_header_destroy(h);
+       for (i = 0; i < n; ++i) bam_close(fp[i]);
+       free(fp); free(plp);
+       return 0;
+}
+
+int bam_mpileup(int argc, char *argv[])
+{
+       if (argc == 1) {
+               fprintf(stderr, "Usage: samtools mpileup <in1.bam> [<in2.bam> [...]]\n");
+               return 1;
+       }
+       mpileup(argc - 1, argv + 1);
+       return 0;
+}
diff --git a/bamtk.c b/bamtk.c
index c1c4a49531e14f3c5f516f8aa9f4ce25faa75d21..0594b119ba13ea8960fec6601c88ebf7fcf5128e 100644 (file)
--- a/bamtk.c
+++ b/bamtk.c
@@ -9,11 +9,12 @@
 #endif
 
 #ifndef PACKAGE_VERSION
-#define PACKAGE_VERSION "0.1.7-13 (r590)"
+#define PACKAGE_VERSION "0.1.7-14 (r591)"
 #endif
 
 int bam_taf2baf(int argc, char *argv[]);
 int bam_pileup(int argc, char *argv[]);
+int bam_mpileup(int argc, char *argv[]);
 int bam_merge(int argc, char *argv[]);
 int bam_index(int argc, char *argv[]);
 int bam_sort(int argc, char *argv[]);
@@ -79,6 +80,7 @@ static int usage()
        fprintf(stderr, "Command: view        SAM<->BAM conversion\n");
        fprintf(stderr, "         sort        sort alignment file\n");
        fprintf(stderr, "         pileup      generate pileup output\n");
+       fprintf(stderr, "         mpileup     multi-way pileup\n");
        fprintf(stderr, "         faidx       index/extract FASTA\n");
 #if _CURSES_LIB != 0
        fprintf(stderr, "         tview       text alignment viewer\n");
@@ -108,6 +110,7 @@ int main(int argc, char *argv[])
        if (strcmp(argv[1], "view") == 0) return main_samview(argc-1, argv+1);
        else if (strcmp(argv[1], "import") == 0) return main_import(argc-1, argv+1);
        else if (strcmp(argv[1], "pileup") == 0) return bam_pileup(argc-1, argv+1);
+       else if (strcmp(argv[1], "mpileup") == 0) return bam_mpileup(argc-1, argv+1);
        else if (strcmp(argv[1], "merge") == 0) return bam_merge(argc-1, argv+1);
        else if (strcmp(argv[1], "sort") == 0) return bam_sort(argc-1, argv+1);
        else if (strcmp(argv[1], "index") == 0) return bam_index(argc-1, argv+1);