X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=bam_rmdupse.c;h=e7dbdc77d05f05d9c9622b484557df3504d6f4c1;hb=9f118264ea012adc21a46d7c03eaad4f9ce7d4d4;hp=e23de1c4e17928f85c520549e8db56de9f40bb9d;hpb=d0845804f9bd1790e6bc8a2c91132642e20bc51e;p=samtools.git diff --git a/bam_rmdupse.c b/bam_rmdupse.c index e23de1c..e7dbdc7 100644 --- a/bam_rmdupse.c +++ b/bam_rmdupse.c @@ -53,7 +53,8 @@ static inline elem_t *push_queue(queue_t *queue, const bam1_t *b, int endpos, in elem_t *p = kl_pushp(q, queue); p->discarded = 0; p->endpos = endpos; p->score = score; - p->b = bam_dup1(b); + if (p->b == 0) p->b = bam_init1(); + bam_copy1(p->b, b); return p; } @@ -116,19 +117,19 @@ void bam_rmdupse_core(samfile_t *in, samfile_t *out, int force_se) push_queue(queue, b, endpos, score); } else { const char *lib; - const uint8_t *rg; lib_aux_t *q; besthash_t *h; uint32_t key; int ret; - rg = bam_aux_get(b, "RG"); - lib = (rg == 0)? 0 : bam_strmap_get(in->header->rg2lib, (char*)(rg + 1)); + lib = bam_get_library(in->header, b); q = lib? get_aux(aux, lib) : get_aux(aux, "\t"); + ++q->n_checked; h = (c->flag&BAM_FREVERSE)? q->rght : q->left; key = (c->flag&BAM_FREVERSE)? endpos : c->pos; k = kh_put(best, h, key, &ret); if (ret == 0) { // in the hash table elem_t *p = kh_val(h, k); + ++q->n_removed; if (p->score < score) { if (c->flag&BAM_FREVERSE) { // mark "discarded" and push the queue p->discarded = 1; @@ -145,8 +146,10 @@ void bam_rmdupse_core(samfile_t *in, samfile_t *out, int force_se) for (k = kh_begin(aux); k != kh_end(aux); ++k) { if (kh_exist(aux, k)) { - kh_destroy(best, kh_val(aux, k).rght); - kh_destroy(best, kh_val(aux, k).left); + lib_aux_t *q = &kh_val(aux, k); + fprintf(stderr, "[bam_rmdupse_core] %lld / %lld = %.4lf in library '%s'\n", (long long)q->n_removed, + (long long)q->n_checked, (double)q->n_removed/q->n_checked, kh_key(aux, k)); + kh_destroy(best, q->left); kh_destroy(best, q->rght); free((char*)kh_key(aux, k)); } } @@ -154,17 +157,3 @@ void bam_rmdupse_core(samfile_t *in, samfile_t *out, int force_se) bam_destroy1(b); kl_destroy(q, queue); } - -int bam_rmdupse(int argc, char *argv[]) -{ - samfile_t *in, *out; - if (argc < 3) { - fprintf(stderr, "Usage: samtools rmdupse \n"); - return 1; - } - in = samopen(argv[1], "rb", 0); - out = samopen(argv[2], "wb", in->header); - bam_rmdupse_core(in, out, 1); - samclose(in); samclose(out); - return 0; -}