if ( cig==1 )
{
- int idx = is_fwd ? icycle : read_len-icycle;
+ int idx = is_fwd ? icycle : read_len-icycle-ncig;
if ( idx<0 )
error("FIXME: read_len=%d vs icycle=%d\n", read_len,icycle);
if ( idx >= stats->nbases || idx<0 ) error("FIXME: %d vs %d, %s:%d %s\n", idx,stats->nbases, stats->sam->header->target_name[bam_line->core.tid],bam_line->core.pos+1,bam1_qname(bam_line));
int n = 1 + stats->gcd_ref_size / (stats->gcd_bin_size - seq_len);
if ( n <= stats->igcd )
- error("Uh: n=%d igcd=%d\n", n,stats->igcd );
+ error("The --GC-depth bin size is too small or reference genome too big; please decrease the bin size or increase the reference length\n");
- if ( n >= stats->ngcd )
+ if ( n > stats->ngcd )
{
stats->gcd = realloc(stats->gcd, n*sizeof(gc_depth_t));
if ( !stats->gcd )
printf("# Indels per cycle. Use `grep ^IC | cut -f 2-` to extract this part. The columns are: cycle, number of insertions (fwd), .. (rev) , number of deletions (fwd), .. (rev)\n");
for (ilen=0; ilen<=stats->nbases; ilen++)
{
+ // For deletions we print the index of the cycle before the deleted base (1-based) and for insertions
+ // the index of the cycle of the first inserted base (also 1-based)
if ( stats->ins_cycles_1st[ilen]>0 || stats->ins_cycles_2nd[ilen]>0 || stats->del_cycles_1st[ilen]>0 || stats->del_cycles_2nd[ilen]>0 )
printf("IC\t%d\t%ld\t%ld\t%ld\t%ld\n", ilen+1, (long)stats->ins_cycles_1st[ilen], (long)stats->ins_cycles_2nd[ilen], (long)stats->del_cycles_1st[ilen], (long)stats->del_cycles_2nd[ilen]);
}
printf(" -d, --remove-dups Exlude from statistics reads marked as duplicates\n");
printf(" -f, --required-flag <int> Required flag, 0 for unset [0]\n");
printf(" -F, --filtering-flag <int> Filtering flag, 0 for unset [0]\n");
- printf(" --GC-depth <float,float> Bin size for GC-depth graph and the maximum reference length [2e4,6e9]\n");
+ printf(" --GC-depth <float,float> Bin size for GC-depth graph and the maximum reference length [2e4,4.2e9]\n");
printf(" -h, --help This help message\n");
printf(" -i, --insert-size <int> Maximum insert size [8000]\n");
printf(" -I, --id <string> Include only listed read group or sample name\n");
stats->max_qual = 40;
stats->isize_main_bulk = 0.99; // There are always outliers at the far end
stats->gcd_bin_size = 20e3;
- stats->gcd_ref_size = 3e9;
+ stats->gcd_ref_size = 4.2e9;
stats->rseq_pos = -1;
stats->tid = stats->gcd_pos = stats->igcd = -1;
stats->is_sorted = 1;