int n = 1 + stats->gcd_ref_size / (stats->gcd_bin_size - seq_len);
if ( n <= stats->igcd )
- error("Uh: n=%d igcd=%d\n", n,stats->igcd );
+ error("The --GC-depth bin size is too small or reference genome too big; please decrease the bin size or increase the reference length\n");
- if ( n >= stats->ngcd )
+ if ( n > stats->ngcd )
{
stats->gcd = realloc(stats->gcd, n*sizeof(gc_depth_t));
if ( !stats->gcd )
}
printf("# Coverage distribution. Use `grep ^COV | cut -f 2-` to extract this part.\n");
- printf("COV\t[<%d]\t%d\t%ld\n",stats->cov_min,stats->cov_min-1, (long)stats->cov[0]);
+ if ( stats->cov[0] )
+ printf("COV\t[<%d]\t%d\t%ld\n",stats->cov_min,stats->cov_min-1, (long)stats->cov[0]);
int icov;
for (icov=1; icov<stats->ncov-1; icov++)
- printf("COV\t[%d-%d]\t%d\t%ld\n",stats->cov_min + (icov-1)*stats->cov_step, stats->cov_min + icov*stats->cov_step-1,stats->cov_min + icov*stats->cov_step-1, (long)stats->cov[icov]);
- printf("COV\t[%d<]\t%d\t%ld\n",stats->cov_min + (stats->ncov-2)*stats->cov_step-1,stats->cov_min + (stats->ncov-2)*stats->cov_step-1, (long)stats->cov[stats->ncov-1]);
-
+ if ( stats->cov[icov] )
+ printf("COV\t[%d-%d]\t%d\t%ld\n",stats->cov_min + (icov-1)*stats->cov_step, stats->cov_min + icov*stats->cov_step-1,stats->cov_min + icov*stats->cov_step-1, (long)stats->cov[icov]);
+ if ( stats->cov[stats->ncov-1] )
+ printf("COV\t[%d<]\t%d\t%ld\n",stats->cov_min + (stats->ncov-2)*stats->cov_step-1,stats->cov_min + (stats->ncov-2)*stats->cov_step-1, (long)stats->cov[stats->ncov-1]);
// Calculate average GC content, then sort by GC and depth
printf("# GC-depth. Use `grep ^GCD | cut -f 2-` to extract this part. The columns are: GC%%, unique sequence percentiles, 10th, 25th, 50th, 75th and 90th depth percentile\n");
printf(" -d, --remove-dups Exlude from statistics reads marked as duplicates\n");
printf(" -f, --required-flag <int> Required flag, 0 for unset [0]\n");
printf(" -F, --filtering-flag <int> Filtering flag, 0 for unset [0]\n");
- printf(" --GC-depth <float,float> Bin size for GC-depth graph and the maximum reference length [2e4,6e9]\n");
+ printf(" --GC-depth <float,float> Bin size for GC-depth graph and the maximum reference length [2e4,4.2e9]\n");
printf(" -h, --help This help message\n");
printf(" -i, --insert-size <int> Maximum insert size [8000]\n");
printf(" -I, --id <string> Include only listed read group or sample name\n");
stats->max_qual = 40;
stats->isize_main_bulk = 0.99; // There are always outliers at the far end
stats->gcd_bin_size = 20e3;
- stats->gcd_ref_size = 3e9;
+ stats->gcd_ref_size = 4.2e9;
stats->rseq_pos = -1;
stats->tid = stats->gcd_pos = stats->igcd = -1;
stats->is_sorted = 1;