From c5d9a2277fc92480fb816fcc748f36213917250a Mon Sep 17 00:00:00 2001 From: Heng Li Date: Thu, 6 Jan 2011 15:52:15 +0000 Subject: [PATCH] * samtools-0.1.12-10 (r896) * allow to exclude read groups in mpileup --- bam_plcmd.c | 47 ++++++++++++++++++++++++++++++--------------- bamtk.c | 2 +- bcftools/bcf.h | 1 + bcftools/bcfutils.c | 10 ++++++++++ bcftools/call1.c | 2 ++ 5 files changed, 45 insertions(+), 17 deletions(-) diff --git a/bam_plcmd.c b/bam_plcmd.c index d03bab1..d6e8213 100644 --- a/bam_plcmd.c +++ b/bam_plcmd.c @@ -542,13 +542,15 @@ typedef struct { char *reg, *fn_pos, *pl_list; faidx_t *fai; kh_64_t *hash; + void *rghash; } mplp_conf_t; typedef struct { bamFile fp; bam_iter_t iter; - int min_mq, flag, ref_id, capQ_thres; + int ref_id; char *ref; + const mplp_conf_t *conf; } mplp_aux_t; typedef struct { @@ -568,16 +570,21 @@ static int mplp_func(void *data, bam1_t *b) int has_ref; ret = ma->iter? bam_iter_read(ma->fp, ma->iter, b) : bam_read1(ma->fp, b); if (ret < 0) break; + if (ma->conf->rghash) { // exclude read groups + uint8_t *rg = bam_aux_get(b, "RG"); + skip = (rg && bcf_str2id(ma->conf->rghash, (const char*)(rg+1)) >= 0); + if (skip) continue; + } has_ref = (ma->ref && ma->ref_id == b->core.tid)? 1 : 0; skip = 0; - if (has_ref && (ma->flag&MPLP_REALN)) bam_prob_realn_core(b, ma->ref, (ma->flag & MPLP_EXT_BAQ)? 3 : 1); - if (has_ref && ma->capQ_thres > 10) { - int q = bam_cap_mapQ(b, ma->ref, ma->capQ_thres); + if (has_ref && (ma->conf->flag&MPLP_REALN)) bam_prob_realn_core(b, ma->ref, (ma->conf->flag & MPLP_EXT_BAQ)? 3 : 1); + if (has_ref && ma->conf->capQ_thres > 10) { + int q = bam_cap_mapQ(b, ma->ref, ma->conf->capQ_thres); if (q < 0) skip = 1; else if (b->core.qual > q) b->core.qual = q; } else if (b->core.flag&BAM_FUNMAP) skip = 1; - else if (b->core.qual < ma->min_mq) skip = 1; - else if ((ma->flag&MPLP_NO_ORPHAN) && (b->core.flag&1) && !(b->core.flag&2)) skip = 1; + else if (b->core.qual < ma->conf->min_mq) skip = 1; + else if ((ma->conf->flag&MPLP_NO_ORPHAN) && (b->core.flag&1) && !(b->core.flag&2)) skip = 1; } while (skip); return ret; } @@ -640,10 +647,8 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn) for (i = 0; i < n; ++i) { bam_header_t *h_tmp; data[i] = calloc(1, sizeof(mplp_aux_t)); - data[i]->min_mq = conf->min_mq; - data[i]->flag = conf->flag; - data[i]->capQ_thres = conf->capQ_thres; data[i]->fp = strcmp(fn[i], "-") == 0? bam_dopen(fileno(stdin), "r") : bam_open(fn[i], "r"); + data[i]->conf = conf; h_tmp = bam_header_read(data[i]->fp); bam_smpl_add(sm, fn[i], h_tmp->text); rghash = bcf_call_add_rg(rghash, h_tmp->text, conf->pl_list); @@ -801,7 +806,7 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn) } #define MAX_PATH_LEN 1024 -int read_file_list(const char *file_list,int *n,char **argv[]) +static int read_file_list(const char *file_list,int *n,char **argv[]) { char buf[MAX_PATH_LEN]; int len, nfiles; @@ -864,7 +869,7 @@ int bam_mpileup(int argc, char *argv[]) mplp.openQ = 40; mplp.extQ = 20; mplp.tandemQ = 100; mplp.min_frac = 0.002; mplp.min_support = 1; mplp.flag = MPLP_NO_ORPHAN | MPLP_REALN; - while ((c = getopt(argc, argv, "Agf:r:l:M:q:Q:uaRC:BDSd:b:P:o:e:h:Im:F:E")) >= 0) { + while ((c = getopt(argc, argv, "Agf:r:l:M:q:Q:uaRC:BDSd:b:P:o:e:h:Im:F:EG:")) >= 0) { switch (c) { case 'f': mplp.fai = fai_load(optarg); @@ -894,6 +899,17 @@ int bam_mpileup(int argc, char *argv[]) case 'A': use_orphan = 1; break; case 'F': mplp.min_frac = atof(optarg); break; case 'm': mplp.min_support = atoi(optarg); break; + case 'G': { + FILE *fp_rg; + char buf[1024]; + mplp.rghash = bcf_str2id_init(); + if ((fp_rg = fopen(optarg, "r")) == 0) + fprintf(stderr, "(%s) Fail to open file %s. Continue anyway.\n", __func__, optarg); + while (!feof(fp_rg) && fscanf(fp_rg, "%s", buf) > 0) // this is not a good style, but forgive me... + bcf_str2id_add(mplp.rghash, strdup(buf)); + fclose(fp_rg); + } + break; } } if (use_orphan) mplp.flag &= ~MPLP_NO_ORPHAN; @@ -914,6 +930,7 @@ int bam_mpileup(int argc, char *argv[]) fprintf(stderr, " -h INT coefficient for homopolyer errors [%d]\n", mplp.tandemQ); fprintf(stderr, " -m INT minimum gapped reads for indel candidates [%d]\n", mplp.min_support); fprintf(stderr, " -F FLOAT minimum fraction of gapped reads for candidates [%g]\n", mplp.min_frac); + fprintf(stderr, " -G FILE exclude read groups listed in FILE [null]\n"); fprintf(stderr, " -A use anomalous read pairs in SNP/INDEL calling\n"); fprintf(stderr, " -g generate BCF output\n"); fprintf(stderr, " -u do not compress BCF output\n"); @@ -925,15 +942,13 @@ int bam_mpileup(int argc, char *argv[]) fprintf(stderr, "Notes: Assuming diploid individuals.\n\n"); return 1; } - if ( file_list ) - { + if (file_list) { if ( read_file_list(file_list,&nfiles,&fn) ) return 1; mpileup(&mplp,nfiles,fn); for (c=0; c= 0) { if (max == n) { -- 2.39.2