X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=misc%2Fbamcheck.c;h=d72e8008d9d1eb84810406a3a7bec15c1b7542a0;hb=72d6457f7f361c323f62bd2d3170980132ba2113;hp=532d105c820be6283368205f05b580cea4ebd4eb;hpb=bfcd9b39b85184a8815529b245280fd45f190253;p=samtools.git diff --git a/misc/bamcheck.c b/misc/bamcheck.c index 532d105..d72e800 100644 --- a/misc/bamcheck.c +++ b/misc/bamcheck.c @@ -515,9 +515,9 @@ void realloc_gcd_buffer(stats_t *stats, int seq_len) int n = 1 + stats->gcd_ref_size / (stats->gcd_bin_size - seq_len); if ( n <= stats->igcd ) - error("Uh: n=%d igcd=%d\n", n,stats->igcd ); + error("The --GC-depth bin size is too small or reference genome too big; please decrease the bin size or increase the reference length\n"); - if ( n >= stats->ngcd ) + if ( n > stats->ngcd ) { stats->gcd = realloc(stats->gcd, n*sizeof(gc_depth_t)); if ( !stats->gcd ) @@ -1050,12 +1050,14 @@ void output_stats(stats_t *stats) } printf("# Coverage distribution. Use `grep ^COV | cut -f 2-` to extract this part.\n"); - printf("COV\t[<%d]\t%d\t%ld\n",stats->cov_min,stats->cov_min-1, (long)stats->cov[0]); + if ( stats->cov[0] ) + printf("COV\t[<%d]\t%d\t%ld\n",stats->cov_min,stats->cov_min-1, (long)stats->cov[0]); int icov; for (icov=1; icovncov-1; icov++) - printf("COV\t[%d-%d]\t%d\t%ld\n",stats->cov_min + (icov-1)*stats->cov_step, stats->cov_min + icov*stats->cov_step-1,stats->cov_min + icov*stats->cov_step-1, (long)stats->cov[icov]); - printf("COV\t[%d<]\t%d\t%ld\n",stats->cov_min + (stats->ncov-2)*stats->cov_step-1,stats->cov_min + (stats->ncov-2)*stats->cov_step-1, (long)stats->cov[stats->ncov-1]); - + if ( stats->cov[icov] ) + printf("COV\t[%d-%d]\t%d\t%ld\n",stats->cov_min + (icov-1)*stats->cov_step, stats->cov_min + icov*stats->cov_step-1,stats->cov_min + icov*stats->cov_step-1, (long)stats->cov[icov]); + if ( stats->cov[stats->ncov-1] ) + printf("COV\t[%d<]\t%d\t%ld\n",stats->cov_min + (stats->ncov-2)*stats->cov_step-1,stats->cov_min + (stats->ncov-2)*stats->cov_step-1, (long)stats->cov[stats->ncov-1]); // Calculate average GC content, then sort by GC and depth printf("# GC-depth. Use `grep ^GCD | cut -f 2-` to extract this part. The columns are: GC%%, unique sequence percentiles, 10th, 25th, 50th, 75th and 90th depth percentile\n"); @@ -1279,7 +1281,7 @@ void error(const char *format, ...) printf(" -d, --remove-dups Exlude from statistics reads marked as duplicates\n"); printf(" -f, --required-flag Required flag, 0 for unset [0]\n"); printf(" -F, --filtering-flag Filtering flag, 0 for unset [0]\n"); - printf(" --GC-depth Bin size for GC-depth graph and the maximum reference length [2e4,6e9]\n"); + printf(" --GC-depth Bin size for GC-depth graph and the maximum reference length [2e4,4.2e9]\n"); printf(" -h, --help This help message\n"); printf(" -i, --insert-size Maximum insert size [8000]\n"); printf(" -I, --id Include only listed read group or sample name\n"); @@ -1318,7 +1320,7 @@ int main(int argc, char *argv[]) stats->max_qual = 40; stats->isize_main_bulk = 0.99; // There are always outliers at the far end stats->gcd_bin_size = 20e3; - stats->gcd_ref_size = 3e9; + stats->gcd_ref_size = 4.2e9; stats->rseq_pos = -1; stats->tid = stats->gcd_pos = stats->igcd = -1; stats->is_sorted = 1;