]> git.donarmstrong.com Git - samtools.git/blobdiff - bam_rmdupse.c
r579: after merging Peter's depad changes
[samtools.git] / bam_rmdupse.c
index e23de1c4e17928f85c520549e8db56de9f40bb9d..e7dbdc77d05f05d9c9622b484557df3504d6f4c1 100644 (file)
@@ -53,7 +53,8 @@ static inline elem_t *push_queue(queue_t *queue, const bam1_t *b, int endpos, in
        elem_t *p = kl_pushp(q, queue);
        p->discarded = 0;
        p->endpos = endpos; p->score = score;
-       p->b = bam_dup1(b);
+       if (p->b == 0) p->b = bam_init1();
+       bam_copy1(p->b, b);
        return p;
 }
 
@@ -116,19 +117,19 @@ void bam_rmdupse_core(samfile_t *in, samfile_t *out, int force_se)
                        push_queue(queue, b, endpos, score);
                } else {
                        const char *lib;
-                       const uint8_t *rg;
                        lib_aux_t *q;
                        besthash_t *h;
                        uint32_t key;
                        int ret;
-                       rg = bam_aux_get(b, "RG");
-                       lib = (rg == 0)? 0 : bam_strmap_get(in->header->rg2lib, (char*)(rg + 1));
+                       lib = bam_get_library(in->header, b);
                        q = lib? get_aux(aux, lib) : get_aux(aux, "\t");
+                       ++q->n_checked;
                        h = (c->flag&BAM_FREVERSE)? q->rght : q->left;
                        key = (c->flag&BAM_FREVERSE)? endpos : c->pos;
                        k = kh_put(best, h, key, &ret);
                        if (ret == 0) { // in the hash table
                                elem_t *p = kh_val(h, k);
+                               ++q->n_removed;
                                if (p->score < score) {
                                        if (c->flag&BAM_FREVERSE) { // mark "discarded" and push the queue
                                                p->discarded = 1;
@@ -145,8 +146,10 @@ void bam_rmdupse_core(samfile_t *in, samfile_t *out, int force_se)
 
        for (k = kh_begin(aux); k != kh_end(aux); ++k) {
                if (kh_exist(aux, k)) {
-                       kh_destroy(best, kh_val(aux, k).rght);
-                       kh_destroy(best, kh_val(aux, k).left);
+                       lib_aux_t *q = &kh_val(aux, k);
+                       fprintf(stderr, "[bam_rmdupse_core] %lld / %lld = %.4lf in library '%s'\n", (long long)q->n_removed,
+                                       (long long)q->n_checked, (double)q->n_removed/q->n_checked, kh_key(aux, k));
+                       kh_destroy(best, q->left); kh_destroy(best, q->rght);
                        free((char*)kh_key(aux, k));
                }
        }
@@ -154,17 +157,3 @@ void bam_rmdupse_core(samfile_t *in, samfile_t *out, int force_se)
        bam_destroy1(b);
        kl_destroy(q, queue);
 }
-
-int bam_rmdupse(int argc, char *argv[])
-{
-       samfile_t *in, *out;
-       if (argc < 3) {
-               fprintf(stderr, "Usage: samtools rmdupse <in.bam> <out.bam>\n");
-               return 1;
-       }
-       in = samopen(argv[1], "rb", 0);
-       out = samopen(argv[2], "wb", in->header);
-       bam_rmdupse_core(in, out, 1);
-       samclose(in); samclose(out);
-       return 0;
-}