From 2c05c1d8da65572e6587702ba13ad8a26b2200c2 Mon Sep 17 00:00:00 2001 From: Heng Li Date: Fri, 15 Jul 2011 20:04:59 +0000 Subject: [PATCH] allow to set seed in view-subsampling --- sam_view.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/sam_view.c b/sam_view.c index ad74962..efda4e8 100644 --- a/sam_view.c +++ b/sam_view.c @@ -22,7 +22,7 @@ typedef khash_t(rg) *rghash_t; // FIXME: we'd better use no global variables... static rghash_t g_rghash = 0; static int g_min_mapQ = 0, g_flag_on = 0, g_flag_off = 0; -static float g_subsam = 2.; +static float g_subsam = -1; static char *g_library, *g_rg; static void *g_bed; @@ -36,9 +36,10 @@ static inline int __g_skip_aln(const bam_header_t *h, const bam1_t *b) return 1; if (g_bed && b->core.tid >= 0 && !bed_overlap(g_bed, h->target_name[b->core.tid], b->core.pos, bam_calend(&b->core, bam1_cigar(b)))) return 1; - if (g_subsam > 0. && g_subsam < 1.) { - uint32_t k = __ac_X31_hash_string(bam1_qname(b)); - if (k%1024 / 1024.0 >= g_subsam) return 1; + if (g_subsam > 0.) { + int x = (int)(g_subsam + .499); + uint32_t k = __ac_X31_hash_string(bam1_qname(b)) + x; + if (k%1024 / 1024.0 >= g_subsam - x) return 1; } if (g_rg || g_rghash) { uint8_t *s = bam_aux_get(b, "RG"); @@ -286,7 +287,7 @@ static int usage(int is_long_help) fprintf(stderr, " -q INT minimum mapping quality [0]\n"); fprintf(stderr, " -l STR only output reads in library STR [null]\n"); fprintf(stderr, " -r STR only output reads in read group STR [null]\n"); - fprintf(stderr, " -s FLOAT fraction of templates (read paris) to subsample [1]\n"); + fprintf(stderr, " -s FLOAT fraction of templates to subsample; integer part as seed [-1]\n"); fprintf(stderr, " -? longer help\n"); fprintf(stderr, "\n"); if (is_long_help) -- 2.39.2