From d062f8f3ca3f897b7a95c81e76d05e94b1d39877 Mon Sep 17 00:00:00 2001 From: Heng Li Date: Wed, 8 Dec 2010 22:27:05 +0000 Subject: [PATCH] * samtools-0.1.12-2 (r877) * allow to fine control the selection of indel candidates. The current setting is okay for lowCov and highCov with ~100 samples, but it skips too many indels for highCov with >250 samples. --- bam2bcf.c | 2 ++ bam2bcf.h | 4 +++- bam2bcf_indel.c | 3 +-- bam_plcmd.c | 12 ++++++++++-- bamtk.c | 2 +- 5 files changed, 17 insertions(+), 6 deletions(-) diff --git a/bam2bcf.c b/bam2bcf.c index 088635c..08120fc 100644 --- a/bam2bcf.c +++ b/bam2bcf.c @@ -23,6 +23,8 @@ bcf_callaux_t *bcf_call_init(double theta, int min_baseQ) bca->openQ = 40; bca->extQ = 20; bca->tandemQ = 100; bca->min_baseQ = min_baseQ; bca->e = errmod_init(1. - theta); + bca->min_frac = 0.002; + bca->min_support = 1; return bca; } diff --git a/bam2bcf.h b/bam2bcf.h index 26b022c..9585672 100644 --- a/bam2bcf.h +++ b/bam2bcf.h @@ -9,7 +9,9 @@ typedef struct __bcf_callaux_t { int capQ, min_baseQ; - int openQ, extQ, tandemQ; + int openQ, extQ, tandemQ; // for indels + int min_support; // for collecting indel candidates + double min_frac; // for collecting indel candidates // for internal uses int max_bases; int indel_types[4]; diff --git a/bam2bcf_indel.c b/bam2bcf_indel.c index 16241d0..239fb8d 100644 --- a/bam2bcf_indel.c +++ b/bam2bcf_indel.c @@ -11,7 +11,6 @@ KHASH_SET_INIT_STR(rg) #define MINUS_CONST 0x10000000 #define INDEL_WINDOW_SIZE 50 -#define MIN_SUPPORT_COEF 500 void *bcf_call_add_rg(void *_hash, const char *hdtext, const char *list) { @@ -165,7 +164,7 @@ int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_calla // squeeze out identical types for (i = 1, n_types = 1; i < m; ++i) if (aux[i] != aux[i-1]) ++n_types; - if (n_types == 1 || n_alt * MIN_SUPPORT_COEF < n_tot) { // no indels or too few supporting reads + if (n_types == 1 || (double)n_alt / n_tot < bca->min_frac || n_alt < bca->min_support) { // then skip free(aux); return -1; } types = (int*)calloc(n_types, sizeof(int)); diff --git a/bam_plcmd.c b/bam_plcmd.c index b562b94..1aa9361 100644 --- a/bam_plcmd.c +++ b/bam_plcmd.c @@ -536,7 +536,8 @@ int bam_pileup(int argc, char *argv[]) typedef struct { int max_mq, min_mq, flag, min_baseQ, capQ_thres, max_depth; - int openQ, extQ, tandemQ; + int openQ, extQ, tandemQ, min_support; // for indels + double min_frac; // for indels char *reg, *fn_pos, *pl_list; faidx_t *fai; kh_64_t *hash; @@ -702,6 +703,8 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn) bcr = calloc(sm->n, sizeof(bcf_callret1_t)); bca->rghash = rghash; bca->openQ = conf->openQ, bca->extQ = conf->extQ, bca->tandemQ = conf->tandemQ; + bca->min_frac = conf->min_frac; + bca->min_support = conf->min_support; } ref_tid = -1; ref = 0; iter = bam_mplp_init(n, mplp_func, (void**)data); @@ -858,8 +861,9 @@ int bam_mpileup(int argc, char *argv[]) mplp.capQ_thres = 0; mplp.max_depth = 250; mplp.openQ = 40; mplp.extQ = 20; mplp.tandemQ = 100; + mplp.min_frac = 0.002; mplp.min_support = 1; mplp.flag = MPLP_NO_ORPHAN | MPLP_REALN; - while ((c = getopt(argc, argv, "Agf:r:l:M:q:Q:uaRC:BDSd:b:P:o:e:h:I")) >= 0) { + while ((c = getopt(argc, argv, "Agf:r:l:M:q:Q:uaRC:BDSd:b:P:o:e:h:Im:F:")) >= 0) { switch (c) { case 'f': mplp.fai = fai_load(optarg); @@ -886,6 +890,8 @@ int bam_mpileup(int argc, char *argv[]) case 'e': mplp.extQ = atoi(optarg); break; case 'h': mplp.tandemQ = atoi(optarg); break; case 'A': use_orphan = 1; break; + case 'F': mplp.min_frac = atof(optarg); break; + case 'm': mplp.min_support = atoi(optarg); break; } } if (use_orphan) mplp.flag &= ~MPLP_NO_ORPHAN; @@ -904,6 +910,8 @@ int bam_mpileup(int argc, char *argv[]) fprintf(stderr, " -o INT Phred-scaled gap open sequencing error probability [%d]\n", mplp.openQ); fprintf(stderr, " -e INT Phred-scaled gap extension seq error probability [%d]\n", mplp.extQ); fprintf(stderr, " -h INT coefficient for homopolyer errors [%d]\n", mplp.tandemQ); + fprintf(stderr, " -m INT minimum gapped reads for indel candidates [%d]\n", mplp.min_support); + fprintf(stderr, " -F FLOAT minimum fraction of gapped reads for candidates [%g]\n", mplp.min_frac); fprintf(stderr, " -A use anomalous read pairs in SNP/INDEL calling\n"); fprintf(stderr, " -g generate BCF output\n"); fprintf(stderr, " -u do not compress BCF output\n"); diff --git a/bamtk.c b/bamtk.c index a5a0da3..9dbdfde 100644 --- a/bamtk.c +++ b/bamtk.c @@ -9,7 +9,7 @@ #endif #ifndef PACKAGE_VERSION -#define PACKAGE_VERSION "0.1.12-1 (r873)" +#define PACKAGE_VERSION "0.1.12-2 (r877)" #endif int bam_taf2baf(int argc, char *argv[]); -- 2.39.2