X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=misc%2Fbamcheck.c;h=a438adac3453e73a86f65017bb72ca54ba3ac4c4;hb=4a1c610ee1da37d3b903733462085d40feba79e2;hp=66a6861716a6f3aa1be208acae460530090d2b96;hpb=2e9880c7bfc5b88c6a0bebefd310157a33c84a36;p=samtools.git diff --git a/misc/bamcheck.c b/misc/bamcheck.c index 66a6861..a438ada 100644 --- a/misc/bamcheck.c +++ b/misc/bamcheck.c @@ -293,7 +293,7 @@ void count_indels(stats_t *stats,bam1_t *bam_line) if ( cig==1 ) { - int idx = is_fwd ? icycle : read_len-icycle; + int idx = is_fwd ? icycle : read_len-icycle-ncig; if ( idx<0 ) error("FIXME: read_len=%d vs icycle=%d\n", read_len,icycle); if ( idx >= stats->nbases || idx<0 ) error("FIXME: %d vs %d, %s:%d %s\n", idx,stats->nbases, stats->sam->header->target_name[bam_line->core.tid],bam_line->core.pos+1,bam1_qname(bam_line)); @@ -515,9 +515,9 @@ void realloc_gcd_buffer(stats_t *stats, int seq_len) int n = 1 + stats->gcd_ref_size / (stats->gcd_bin_size - seq_len); if ( n <= stats->igcd ) - error("Uh: n=%d igcd=%d\n", n,stats->igcd ); + error("The --GC-depth bin size is too small or reference genome too big; please decrease the bin size or increase the reference length\n"); - if ( n >= stats->ngcd ) + if ( n > stats->ngcd ) { stats->gcd = realloc(stats->gcd, n*sizeof(gc_depth_t)); if ( !stats->gcd ) @@ -1043,6 +1043,8 @@ void output_stats(stats_t *stats) printf("# Indels per cycle. Use `grep ^IC | cut -f 2-` to extract this part. The columns are: cycle, number of insertions (fwd), .. (rev) , number of deletions (fwd), .. (rev)\n"); for (ilen=0; ilen<=stats->nbases; ilen++) { + // For deletions we print the index of the cycle before the deleted base (1-based) and for insertions + // the index of the cycle of the first inserted base (also 1-based) if ( stats->ins_cycles_1st[ilen]>0 || stats->ins_cycles_2nd[ilen]>0 || stats->del_cycles_1st[ilen]>0 || stats->del_cycles_2nd[ilen]>0 ) printf("IC\t%d\t%ld\t%ld\t%ld\t%ld\n", ilen+1, (long)stats->ins_cycles_1st[ilen], (long)stats->ins_cycles_2nd[ilen], (long)stats->del_cycles_1st[ilen], (long)stats->del_cycles_2nd[ilen]); } @@ -1277,7 +1279,7 @@ void error(const char *format, ...) printf(" -d, --remove-dups Exlude from statistics reads marked as duplicates\n"); printf(" -f, --required-flag Required flag, 0 for unset [0]\n"); printf(" -F, --filtering-flag Filtering flag, 0 for unset [0]\n"); - printf(" --GC-depth Bin size for GC-depth graph and the maximum reference length [2e4,6e9]\n"); + printf(" --GC-depth Bin size for GC-depth graph and the maximum reference length [2e4,4.2e9]\n"); printf(" -h, --help This help message\n"); printf(" -i, --insert-size Maximum insert size [8000]\n"); printf(" -I, --id Include only listed read group or sample name\n"); @@ -1316,7 +1318,7 @@ int main(int argc, char *argv[]) stats->max_qual = 40; stats->isize_main_bulk = 0.99; // There are always outliers at the far end stats->gcd_bin_size = 20e3; - stats->gcd_ref_size = 3e9; + stats->gcd_ref_size = 4.2e9; stats->rseq_pos = -1; stats->tid = stats->gcd_pos = stats->igcd = -1; stats->is_sorted = 1;