From aee93c83c1c7ff30c2c959a14877cf8a4e2d8f92 Mon Sep 17 00:00:00 2001 From: Heng Li Date: Sat, 12 Jun 2010 18:09:22 +0000 Subject: [PATCH] * samtools-0.1.7-14 (r591) * elementary multi-way pileup. More testing and more functionality to be done. --- bam.h | 2 +- bam_pileup.c | 11 +++-- bam_plcmd.c | 112 +++++++++++++++++++++++++++++++++++++++++---------- bamtk.c | 5 ++- 4 files changed, 102 insertions(+), 28 deletions(-) diff --git a/bam.h b/bam.h index b418dcd..f6d31a9 100644 --- a/bam.h +++ b/bam.h @@ -482,7 +482,7 @@ extern "C" { uint32_t is_del:1, is_head:1, is_tail:1; } bam_pileup1_t; - typedef int (*bam_plp_auto_f)(bam1_t *b, void *data); + typedef int (*bam_plp_auto_f)(void *data, bam1_t *b); struct __bam_plp_t; typedef struct __bam_plp_t *bam_plp_t; diff --git a/bam_pileup.c b/bam_pileup.c index bcc1e98..8a75829 100644 --- a/bam_pileup.c +++ b/bam_pileup.c @@ -229,15 +229,15 @@ const bam_pileup1_t *bam_plp_auto(bam_plp_t iter, int *_tid, int *_pos, int *_n_ { const bam_pileup1_t *plp; if (iter->func == 0 || iter->error) { *_n_plp = -1; return 0; } - if ((plp = bam_plp_next(iter, _n_plp, _tid, _pos)) != 0) return plp; + if ((plp = bam_plp_next(iter, _tid, _pos, _n_plp)) != 0) return plp; else { *_n_plp = 0; - while (iter->func(iter->b, iter->data) >= 0) { + while (iter->func(iter->data, iter->b) >= 0) { if (bam_plp_push(iter, iter->b) < 0) { *_n_plp = -1; return 0; } - if ((plp = bam_plp_next(iter, _n_plp, _tid, _pos)) != 0) return plp; + if ((plp = bam_plp_next(iter, _tid, _pos, _n_plp)) != 0) return plp; } return 0; } @@ -339,10 +339,13 @@ bam_mplp_t bam_mplp_init(int n, bam_plp_auto_f func, void **data) iter->pos = calloc(n, 8); iter->n_plp = calloc(n, sizeof(int)); iter->plp = calloc(n, sizeof(void*)); + iter->iter = calloc(n, sizeof(void*)); iter->n = n; iter->min = (uint64_t)-1; - for (i = 0; i < n; ++i) + for (i = 0; i < n; ++i) { iter->iter[i] = bam_plp_init(func, data[i]); + iter->pos[i] = iter->min; + } return iter; } diff --git a/bam_plcmd.c b/bam_plcmd.c index ccaa28d..92023b8 100644 --- a/bam_plcmd.c +++ b/bam_plcmd.c @@ -158,11 +158,37 @@ static int glt3_func(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *pu, return 0; } +static void pileup_seq(const bam_pileup1_t *p, int pos, int ref_len, const char *ref) +{ + if (p->is_head) printf("^%c", p->b->core.qual > 93? 126 : p->b->core.qual + 33); + if (!p->is_del) { + int j, rb, c = bam_nt16_rev_table[bam1_seqi(bam1_seq(p->b), p->qpos)]; + rb = (ref && (int)pos < ref_len)? ref[pos] : 'N'; + if (c == '=' || toupper(c) == toupper(rb)) c = bam1_strand(p->b)? ',' : '.'; + else c = bam1_strand(p->b)? tolower(c) : toupper(c); + putchar(c); + if (p->indel > 0) { + putchar('+'); putw(p->indel, stdout); + for (j = 1; j <= p->indel; ++j) { + c = bam_nt16_rev_table[bam1_seqi(bam1_seq(p->b), p->qpos + j)]; + putchar(bam1_strand(p->b)? tolower(c) : toupper(c)); + } + } else if (p->indel < 0) { + putw(p->indel, stdout); + for (j = 1; j <= -p->indel; ++j) { + c = (ref && (int)pos+j < ref_len)? ref[pos+j] : 'N'; + putchar(bam1_strand(p->b)? tolower(c) : toupper(c)); + } + } + } else putchar('*'); + if (p->is_tail) putchar('$'); +} + static int pileup_func(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *pu, void *data) { pu_data_t *d = (pu_data_t*)data; bam_maqindel_ret_t *r = 0; - int i, j, rb, rms_mapq = -1, *proposed_indels = 0; + int i, rb, rms_mapq = -1, *proposed_indels = 0; uint64_t rms_aux; uint32_t cns = 0; @@ -243,27 +269,7 @@ static int pileup_func(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *p const bam_pileup1_t *p = pu + i; int tmp = p->b->core.qual < d->c->cap_mapQ? p->b->core.qual : d->c->cap_mapQ; rms_aux += tmp * tmp; - if (p->is_head) printf("^%c", p->b->core.qual > 93? 126 : p->b->core.qual + 33); - if (!p->is_del) { - int c = bam_nt16_rev_table[bam1_seqi(bam1_seq(p->b), p->qpos)]; - if (c == '=' || toupper(c) == toupper(rb)) c = bam1_strand(p->b)? ',' : '.'; - else c = bam1_strand(p->b)? tolower(c) : toupper(c); - putchar(c); - if (p->indel > 0) { - printf("+%d", p->indel); - for (j = 1; j <= p->indel; ++j) { - c = bam_nt16_rev_table[bam1_seqi(bam1_seq(p->b), p->qpos + j)]; - putchar(bam1_strand(p->b)? tolower(c) : toupper(c)); - } - } else if (p->indel < 0) { - printf("%d", p->indel); - for (j = 1; j <= -p->indel; ++j) { - c = (d->ref && (int)pos+j < d->len)? d->ref[pos+j] : 'N'; - putchar(bam1_strand(p->b)? tolower(c) : toupper(c)); - } - } - } else putchar('*'); - if (p->is_tail) putchar('$'); + pileup_seq(p, pos, d->len, d->ref); } // finalize rms_mapq rms_aux = (uint64_t)(sqrt((double)rms_aux / n) + .499); @@ -435,3 +441,65 @@ int bam_pileup(int argc, char *argv[]) free(d->ido); free(d->ref); free(d); return 0; } + +/*********** + * mpileup * + ***********/ + +static int mpileup(int n, char **fn) +{ + bamFile *fp; + int i, tid, pos, *n_plp; + const bam_pileup1_t **plp; + bam_mplp_t iter; + bam_header_t *h = 0; + fp = calloc(n, sizeof(void*)); + plp = calloc(n, sizeof(void*)); + n_plp = calloc(n, sizeof(int*)); + for (i = 0; i < n; ++i) { + bam_header_t *h_tmp; + fp[i] = bam_open(fn[i], "r"); + h_tmp = bam_header_read(fp[i]); + if (i == 0) h = h_tmp; + else { + // FIXME: to check consistency + bam_header_destroy(h_tmp); + } + } + iter = bam_mplp_init(n, bam_read1, fp); + while (bam_mplp_auto(iter, &tid, &pos, n_plp, plp) > 0) { + printf("%s\t%d\tN", h->target_name[tid], pos + 1); + for (i = 0; i < n; ++i) { + int j; + printf("\t%d\t", n_plp[i]); + if (n_plp[i] == 0) printf("*\t*"); + else { + for (j = 0; j < n_plp[i]; ++j) + pileup_seq(plp[i] + j, pos, 0, 0); + putchar('\t'); + for (j = 0; j < n_plp[i]; ++j) { + const bam_pileup1_t *p = plp[i] + j; + int c = bam1_qual(p->b)[p->qpos] + 33; + if (c > 126) c = 126; + putchar(c); + } + } + } + putchar('\n'); + } + bam_mplp_destroy(iter); + bam_header_destroy(h); + for (i = 0; i < n; ++i) bam_close(fp[i]); + free(fp); free(plp); + return 0; +} + +int bam_mpileup(int argc, char *argv[]) +{ + if (argc == 1) { + fprintf(stderr, "Usage: samtools mpileup [ [...]]\n"); + return 1; + } + mpileup(argc - 1, argv + 1); + return 0; +} diff --git a/bamtk.c b/bamtk.c index c1c4a49..0594b11 100644 --- a/bamtk.c +++ b/bamtk.c @@ -9,11 +9,12 @@ #endif #ifndef PACKAGE_VERSION -#define PACKAGE_VERSION "0.1.7-13 (r590)" +#define PACKAGE_VERSION "0.1.7-14 (r591)" #endif int bam_taf2baf(int argc, char *argv[]); int bam_pileup(int argc, char *argv[]); +int bam_mpileup(int argc, char *argv[]); int bam_merge(int argc, char *argv[]); int bam_index(int argc, char *argv[]); int bam_sort(int argc, char *argv[]); @@ -79,6 +80,7 @@ static int usage() fprintf(stderr, "Command: view SAM<->BAM conversion\n"); fprintf(stderr, " sort sort alignment file\n"); fprintf(stderr, " pileup generate pileup output\n"); + fprintf(stderr, " mpileup multi-way pileup\n"); fprintf(stderr, " faidx index/extract FASTA\n"); #if _CURSES_LIB != 0 fprintf(stderr, " tview text alignment viewer\n"); @@ -108,6 +110,7 @@ int main(int argc, char *argv[]) if (strcmp(argv[1], "view") == 0) return main_samview(argc-1, argv+1); else if (strcmp(argv[1], "import") == 0) return main_import(argc-1, argv+1); else if (strcmp(argv[1], "pileup") == 0) return bam_pileup(argc-1, argv+1); + else if (strcmp(argv[1], "mpileup") == 0) return bam_mpileup(argc-1, argv+1); else if (strcmp(argv[1], "merge") == 0) return bam_merge(argc-1, argv+1); else if (strcmp(argv[1], "sort") == 0) return bam_sort(argc-1, argv+1); else if (strcmp(argv[1], "index") == 0) return bam_index(argc-1, argv+1); -- 2.39.2