X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=misc%2Fbamcheck.c;h=efccf9a112703f7801e5e523598f06ea447f28f7;hb=b224b0da969d8997e573cc0005dc815b704c0398;hp=76a9f6ee04a8247ebbfd93b35a2e1b0c3e1ca8eb;hpb=0ab78ddb98818366103c11330526b8a2ea33a757;p=samtools.git diff --git a/misc/bamcheck.c b/misc/bamcheck.c index 76a9f6e..efccf9a 100644 --- a/misc/bamcheck.c +++ b/misc/bamcheck.c @@ -12,10 +12,9 @@ considered, even small overlap is good enough to include the read in the stats. */ -#define BAMCHECK_VERSION "2012-03-29" +#define BAMCHECK_VERSION "2012-04-04" #define _ISOC99_SOURCE -#define _GNU_SOURCE #include #include #include @@ -282,8 +281,9 @@ void count_indels(stats_t *stats,bam1_t *bam_line) if ( cig==1 ) { - int idx = is_fwd ? icycle : read_len-icycle-1; - if ( idx >= stats->nbases ) error("FIXME: %d vs %d\n", idx,stats->nbases); + int idx = is_fwd ? icycle : read_len-icycle; + if ( idx<0 ) error("FIXME: read_len=%d vs icycle=%d\n", read_len,icycle); + if ( idx >= stats->nbases || idx<0 ) error("FIXME: %d vs %d\n", idx,stats->nbases); stats->ins_cycles[idx]++; icycle += ncig; if ( ncig<=stats->nindels ) @@ -292,7 +292,8 @@ void count_indels(stats_t *stats,bam1_t *bam_line) } if ( cig==2 ) { - int idx = is_fwd ? icycle : read_len-icycle-1; + int idx = is_fwd ? icycle-1 : read_len-icycle-1; + if ( idx<0 ) continue; // discard meaningless deletions if ( idx >= stats->nbases ) error("FIXME: %d vs %d\n", idx,stats->nbases); stats->del_cycles[idx]++; if ( ncig<=stats->nindels ) @@ -515,15 +516,15 @@ void realloc_buffers(stats_t *stats, int seq_len) error("Could not realloc buffers, the sequence too long: %d (%ld)\n", seq_len,n*sizeof(uint64_t)); memset(stats->deletions + stats->nbases, 0, (n-stats->nbases)*sizeof(uint64_t)); - stats->ins_cycles = realloc(stats->ins_cycles, n*sizeof(uint64_t)); + stats->ins_cycles = realloc(stats->ins_cycles, (n+1)*sizeof(uint64_t)); if ( !stats->ins_cycles ) - error("Could not realloc buffers, the sequence too long: %d (%ld)\n", seq_len,n*sizeof(uint64_t)); - memset(stats->ins_cycles + stats->nbases, 0, (n-stats->nbases)*sizeof(uint64_t)); + error("Could not realloc buffers, the sequence too long: %d (%ld)\n", seq_len,(n+1)*sizeof(uint64_t)); + memset(stats->ins_cycles + stats->nbases + 1, 0, (n+1-stats->nbases)*sizeof(uint64_t)); - stats->del_cycles = realloc(stats->del_cycles, n*sizeof(uint64_t)); + stats->del_cycles = realloc(stats->del_cycles, (n+1)*sizeof(uint64_t)); if ( !stats->del_cycles ) - error("Could not realloc buffers, the sequence too long: %d (%ld)\n", seq_len,n*sizeof(uint64_t)); - memset(stats->del_cycles + stats->nbases, 0, (n-stats->nbases)*sizeof(uint64_t)); + error("Could not realloc buffers, the sequence too long: %d (%ld)\n", seq_len,(n+1)*sizeof(uint64_t)); + memset(stats->del_cycles + stats->nbases + 1, 0, (n+1-stats->nbases)*sizeof(uint64_t)); stats->nbases = n; @@ -932,7 +933,7 @@ void output_stats(stats_t *stats) } printf("# Indels per cycle. Use `grep ^IC | cut -f 2-` to extract this part. The columns are: cycle, number of insertions, number of deletions\n"); - for (ilen=0; ilennbases; ilen++) + for (ilen=0; ilen<=stats->nbases; ilen++) { if ( stats->ins_cycles[ilen]>0 || stats->del_cycles[ilen]>0 ) printf("IC\t%d\t%ld\t%ld\n", ilen+1, (long)stats->ins_cycles[ilen], (long)stats->del_cycles[ilen]); @@ -982,6 +983,40 @@ void output_stats(stats_t *stats) void bam_init_header_hash(bam_header_t *header); +size_t mygetline(char **line, size_t *n, FILE *fp) +{ + if (line == NULL || n == NULL || fp == NULL) + { + errno = EINVAL; + return -1; + } + if (*n==0 || !*line) + { + *line = NULL; + *n = 0; + } + + size_t nread=0; + int c; + while ((c=getc(fp))!= EOF && c!='\n') + { + if ( ++nread>=*n ) + { + *n += 255; + *line = realloc(*line, sizeof(char)*(*n)); + } + (*line)[nread-1] = c; + } + if ( nread>=*n ) + { + *n += 255; + *line = realloc(*line, sizeof(char)*(*n)); + } + (*line)[nread] = 0; + return nread>0 ? nread : -1; + +} + void init_regions(stats_t *stats, char *file) { khiter_t iter; @@ -998,13 +1033,13 @@ void init_regions(stats_t *stats, char *file) ssize_t nread; int warned = 0; int prev_tid=-1, prev_pos=-1; - while ((nread = getline(&line, &len, fp)) != -1) + while ((nread = mygetline(&line, &len, fp)) != -1) { if ( line[0] == '#' ) continue; int i = 0; while ( i=nread ) error("Could not parse the file: %s\n", file); + if ( i>=nread ) error("Could not parse the file: %s [%s]\n", file,line); line[i] = 0; iter = kh_get(str, header_hash, line); @@ -1210,8 +1245,8 @@ int main(int argc, char *argv[]) stats->read_lengths = calloc(stats->nbases,sizeof(uint64_t)); stats->insertions = calloc(stats->nbases,sizeof(uint64_t)); stats->deletions = calloc(stats->nbases,sizeof(uint64_t)); - stats->ins_cycles = calloc(stats->nbases,sizeof(uint64_t)); - stats->del_cycles = calloc(stats->nbases,sizeof(uint64_t)); + stats->ins_cycles = calloc(stats->nbases+1,sizeof(uint64_t)); + stats->del_cycles = calloc(stats->nbases+1,sizeof(uint64_t)); if ( targets ) init_regions(stats, targets);