elem_t *p = kl_pushp(q, queue);
p->discarded = 0;
p->endpos = endpos; p->score = score;
- p->b = bam_dup1(b);
+ if (p->b == 0) p->b = bam_init1();
+ bam_copy1(p->b, b);
return p;
}
push_queue(queue, b, endpos, score);
} else {
const char *lib;
- const uint8_t *rg;
lib_aux_t *q;
besthash_t *h;
uint32_t key;
int ret;
- rg = bam_aux_get(b, "RG");
- lib = (rg == 0)? 0 : bam_strmap_get(in->header->rg2lib, (char*)(rg + 1));
+ lib = bam_get_library(in->header, b);
q = lib? get_aux(aux, lib) : get_aux(aux, "\t");
+ ++q->n_checked;
h = (c->flag&BAM_FREVERSE)? q->rght : q->left;
key = (c->flag&BAM_FREVERSE)? endpos : c->pos;
k = kh_put(best, h, key, &ret);
if (ret == 0) { // in the hash table
elem_t *p = kh_val(h, k);
+ ++q->n_removed;
if (p->score < score) {
if (c->flag&BAM_FREVERSE) { // mark "discarded" and push the queue
p->discarded = 1;
for (k = kh_begin(aux); k != kh_end(aux); ++k) {
if (kh_exist(aux, k)) {
- kh_destroy(best, kh_val(aux, k).rght);
- kh_destroy(best, kh_val(aux, k).left);
+ lib_aux_t *q = &kh_val(aux, k);
+ fprintf(stderr, "[bam_rmdupse_core] %lld / %lld = %.4lf in library '%s'\n", (long long)q->n_removed,
+ (long long)q->n_checked, (double)q->n_removed/q->n_checked, kh_key(aux, k));
+ kh_destroy(best, q->left); kh_destroy(best, q->rght);
free((char*)kh_key(aux, k));
}
}
bam_destroy1(b);
kl_destroy(q, queue);
}
-
-int bam_rmdupse(int argc, char *argv[])
-{
- samfile_t *in, *out;
- if (argc < 3) {
- fprintf(stderr, "Usage: samtools rmdupse <in.bam> <out.bam>\n");
- return 1;
- }
- in = samopen(argv[1], "rb", 0);
- out = samopen(argv[2], "wb", in->header);
- bam_rmdupse_core(in, out, 1);
- samclose(in); samclose(out);
- return 0;
-}