* Allow to set a maximum per-sample depth to reduce memory. However,
BAQ computation is still applied to every read. The speed is not
improved.
const bam_pileup1_t *bam_plp_next(bam_plp_t iter, int *_tid, int *_pos, int *_n_plp);
const bam_pileup1_t *bam_plp_auto(bam_plp_t iter, int *_tid, int *_pos, int *_n_plp);
void bam_plp_set_mask(bam_plp_t iter, int mask);
const bam_pileup1_t *bam_plp_next(bam_plp_t iter, int *_tid, int *_pos, int *_n_plp);
const bam_pileup1_t *bam_plp_auto(bam_plp_t iter, int *_tid, int *_pos, int *_n_plp);
void bam_plp_set_mask(bam_plp_t iter, int mask);
+ void bam_plp_set_maxcnt(bam_plp_t iter, int maxcnt);
void bam_plp_reset(bam_plp_t iter);
void bam_plp_destroy(bam_plp_t iter);
void bam_plp_reset(bam_plp_t iter);
void bam_plp_destroy(bam_plp_t iter);
bam_mplp_t bam_mplp_init(int n, bam_plp_auto_f func, void **data);
void bam_mplp_destroy(bam_mplp_t iter);
bam_mplp_t bam_mplp_init(int n, bam_plp_auto_f func, void **data);
void bam_mplp_destroy(bam_mplp_t iter);
+ void bam_mplp_set_maxcnt(bam_mplp_t iter, int maxcnt);
int bam_mplp_auto(bam_mplp_t iter, int *_tid, int *_pos, int *n_plp, const bam_pileup1_t **plp);
/*! @typedef
int bam_mplp_auto(bam_mplp_t iter, int *_tid, int *_pos, int *n_plp, const bam_pileup1_t **plp);
/*! @typedef
{
uint8_t *data = bdst->data;
int m_data = bdst->m_data; // backup data and m_data
{
uint8_t *data = bdst->data;
int m_data = bdst->m_data; // backup data and m_data
- if (m_data < bsrc->m_data) { // double the capacity
- m_data = bsrc->m_data; kroundup32(m_data);
+ if (m_data < bsrc->data_len) { // double the capacity
+ m_data = bsrc->data_len; kroundup32(m_data);
data = (uint8_t*)realloc(data, m_data);
}
memcpy(data, bsrc->data, bsrc->data_len); // copy var-len data
data = (uint8_t*)realloc(data, m_data);
}
memcpy(data, bsrc->data, bsrc->data_len); // copy var-len data
mempool_t *mp;
lbnode_t *head, *tail, *dummy;
int32_t tid, pos, max_tid, max_pos;
mempool_t *mp;
lbnode_t *head, *tail, *dummy;
int32_t tid, pos, max_tid, max_pos;
- int is_eof, flag_mask, max_plp, error;
+ int is_eof, flag_mask, max_plp, error, maxcnt;
bam_pileup1_t *plp;
// for the "auto" interface only
bam1_t *b;
bam_pileup1_t *plp;
// for the "auto" interface only
bam1_t *b;
iter->dummy = mp_alloc(iter->mp);
iter->max_tid = iter->max_pos = -1;
iter->flag_mask = BAM_DEF_MASK;
iter->dummy = mp_alloc(iter->mp);
iter->max_tid = iter->max_pos = -1;
iter->flag_mask = BAM_DEF_MASK;
if (func) {
iter->func = func;
iter->data = data;
if (func) {
iter->func = func;
iter->data = data;
if (b) {
if (b->core.tid < 0) return 0;
if (b->core.flag & iter->flag_mask) return 0;
if (b) {
if (b->core.tid < 0) return 0;
if (b->core.flag & iter->flag_mask) return 0;
+ if (iter->tid == b->core.tid && iter->pos == b->core.pos && iter->mp->cnt > iter->maxcnt) return 0;
bam_copy1(&iter->tail->b, b);
iter->tail->beg = b->core.pos; iter->tail->end = bam_calend(&b->core, bam1_cigar(b));
iter->tail->s = g_cstate_null; iter->tail->s.end = iter->tail->end - 1; // initialize cstate_t
bam_copy1(&iter->tail->b, b);
iter->tail->beg = b->core.pos; iter->tail->end = bam_calend(&b->core, bam1_cigar(b));
iter->tail->s = g_cstate_null; iter->tail->s.end = iter->tail->end - 1; // initialize cstate_t
const bam_pileup1_t *plp;
if (iter->func == 0 || iter->error) { *_n_plp = -1; return 0; }
if ((plp = bam_plp_next(iter, _tid, _pos, _n_plp)) != 0) return plp;
const bam_pileup1_t *plp;
if (iter->func == 0 || iter->error) { *_n_plp = -1; return 0; }
if ((plp = bam_plp_next(iter, _tid, _pos, _n_plp)) != 0) return plp;
+ else { // no pileup line can be obtained; read alignments
*_n_plp = 0;
if (iter->is_eof) return 0;
while (iter->func(iter->data, iter->b) >= 0) {
*_n_plp = 0;
if (iter->is_eof) return 0;
while (iter->func(iter->data, iter->b) >= 0) {
return 0;
}
if ((plp = bam_plp_next(iter, _tid, _pos, _n_plp)) != 0) return plp;
return 0;
}
if ((plp = bam_plp_next(iter, _tid, _pos, _n_plp)) != 0) return plp;
+ // otherwise no pileup line can be returned; read the next alignment.
}
bam_plp_push(iter, 0);
if ((plp = bam_plp_next(iter, _tid, _pos, _n_plp)) != 0) return plp;
}
bam_plp_push(iter, 0);
if ((plp = bam_plp_next(iter, _tid, _pos, _n_plp)) != 0) return plp;
iter->flag_mask = mask < 0? BAM_DEF_MASK : (BAM_FUNMAP | mask);
}
iter->flag_mask = mask < 0? BAM_DEF_MASK : (BAM_FUNMAP | mask);
}
+void bam_plp_set_maxcnt(bam_plp_t iter, int maxcnt)
+{
+ iter->maxcnt = maxcnt;
+}
+
/*****************
* callback APIs *
*****************/
/*****************
* callback APIs *
*****************/
+void bam_mplp_set_maxcnt(bam_mplp_t iter, int maxcnt)
+{
+ int i;
+ for (i = 0; i < iter->n; ++i)
+ iter->iter[i]->maxcnt = maxcnt;
+}
+
void bam_mplp_destroy(bam_mplp_t iter)
{
int i;
void bam_mplp_destroy(bam_mplp_t iter)
{
int i;
#define MPLP_FMT_SP 0x200
typedef struct {
#define MPLP_FMT_SP 0x200
typedef struct {
- int max_mq, min_mq, flag, min_baseQ, capQ_thres;
+ int max_mq, min_mq, flag, min_baseQ, capQ_thres, max_depth;
char *reg, *fn_pos;
faidx_t *fai;
kh_64_t *hash;
char *reg, *fn_pos;
faidx_t *fai;
kh_64_t *hash;
static int mpileup(mplp_conf_t *conf, int n, char **fn)
{
mplp_aux_t **data;
static int mpileup(mplp_conf_t *conf, int n, char **fn)
{
mplp_aux_t **data;
- int i, tid, pos, *n_plp, beg0 = 0, end0 = 1u<<29, ref_len, ref_tid;
+ int i, tid, pos, *n_plp, beg0 = 0, end0 = 1u<<29, ref_len, ref_tid, max_depth;
const bam_pileup1_t **plp;
bam_mplp_t iter;
bam_header_t *h = 0;
const bam_pileup1_t **plp;
bam_mplp_t iter;
bam_header_t *h = 0;
}
ref_tid = -1; ref = 0;
iter = bam_mplp_init(n, mplp_func, (void**)data);
}
ref_tid = -1; ref = 0;
iter = bam_mplp_init(n, mplp_func, (void**)data);
+ max_depth = conf->max_depth;
+ if (max_depth * sm->n > 1<<20)
+ fprintf(stderr, "(%s) Max depth is above 1M. Potential memory hog!\n", __func__);
+ if (max_depth * sm->n < 8000) {
+ max_depth = 8000 / sm->n;
+ fprintf(stderr, "<%s> Set max per-sample depth to %d\n", __func__, max_depth);
+ }
+ bam_mplp_set_maxcnt(iter, max_depth);
while (bam_mplp_auto(iter, &tid, &pos, n_plp, plp) > 0) {
if (conf->reg && (pos < beg0 || pos >= end0)) continue; // out of the region requested
if (hash) {
while (bam_mplp_auto(iter, &tid, &pos, n_plp, plp) > 0) {
if (conf->reg && (pos < beg0 || pos >= end0)) continue; // out of the region requested
if (hash) {
mplp.max_mq = 60;
mplp.min_baseQ = 13;
mplp.capQ_thres = 0;
mplp.max_mq = 60;
mplp.min_baseQ = 13;
mplp.capQ_thres = 0;
mplp.flag = MPLP_NO_ORPHAN | MPLP_REALN;
mplp.flag = MPLP_NO_ORPHAN | MPLP_REALN;
- while ((c = getopt(argc, argv, "gf:r:l:M:q:Q:uaORC:BDS")) >= 0) {
+ while ((c = getopt(argc, argv, "gf:r:l:M:q:Q:uaORC:BDSd:")) >= 0) {
switch (c) {
case 'f':
mplp.fai = fai_load(optarg);
if (mplp.fai == 0) return 1;
break;
switch (c) {
case 'f':
mplp.fai = fai_load(optarg);
if (mplp.fai == 0) return 1;
break;
+ case 'd': mplp.max_depth = atoi(optarg); break;
case 'r': mplp.reg = strdup(optarg); break;
case 'l': mplp.fn_pos = strdup(optarg); break;
case 'g': mplp.flag |= MPLP_GLF; break;
case 'r': mplp.reg = strdup(optarg); break;
case 'l': mplp.fn_pos = strdup(optarg); break;
case 'g': mplp.flag |= MPLP_GLF; break;
fprintf(stderr, " -M INT cap mapping quality at INT [%d]\n", mplp.max_mq);
fprintf(stderr, " -Q INT min base quality [%d]\n", mplp.min_baseQ);
fprintf(stderr, " -q INT filter out alignment with MQ smaller than INT [%d]\n", mplp.min_mq);
fprintf(stderr, " -M INT cap mapping quality at INT [%d]\n", mplp.max_mq);
fprintf(stderr, " -Q INT min base quality [%d]\n", mplp.min_baseQ);
fprintf(stderr, " -q INT filter out alignment with MQ smaller than INT [%d]\n", mplp.min_mq);
+ fprintf(stderr, " -d INT max per-sample depth [%d]\n", mplp.max_depth);
fprintf(stderr, " -g generate BCF output\n");
fprintf(stderr, " -u do not compress BCF output\n");
fprintf(stderr, " -B disable BAQ computation\n");
fprintf(stderr, " -g generate BCF output\n");
fprintf(stderr, " -u do not compress BCF output\n");
fprintf(stderr, " -B disable BAQ computation\n");
#endif
#ifndef PACKAGE_VERSION
#endif
#ifndef PACKAGE_VERSION
-#define PACKAGE_VERSION "0.1.9-1 (r786)"
+#define PACKAGE_VERSION "0.1.9-2 (r787)"
#endif
int bam_taf2baf(int argc, char *argv[]);
#endif
int bam_taf2baf(int argc, char *argv[]);