X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=bam_import.c;h=da2bf942ed90573cb3c964b4ced593ba7cbcf3d8;hb=9f118264ea012adc21a46d7c03eaad4f9ce7d4d4;hp=29516c92ff42593313caf2833c38213d62ebdc52;hpb=80658e4d152b53bd55927c432ceece7702ab49d7;p=samtools.git diff --git a/bam_import.c b/bam_import.c index 29516c9..da2bf94 100644 --- a/bam_import.c +++ b/bam_import.c @@ -14,7 +14,7 @@ #include "kseq.h" #include "khash.h" -KSTREAM_INIT(gzFile, gzread, 8192) +KSTREAM_INIT(gzFile, gzread, 16384) KHASH_MAP_INIT_STR(ref, uint64_t) void bam_init_header_hash(bam_header_t *header); @@ -183,7 +183,7 @@ static inline void append_text(bam_header_t *header, kstring_t *str) // Sanity check if ( header->l_text+str->l+1 >= header->n_text ) { - fprintf(stderr,"append_text FIXME: %ld>=%ld, x=%ld,y=%ld\n", header->l_text+str->l+1,header->n_text,x,y); + fprintf(stderr,"append_text FIXME: %ld>=%ld, x=%ld,y=%ld\n", header->l_text+str->l+1,(long)header->n_text,x,y); abort(); } strncpy(header->text + header->l_text, str->s, str->l+1); // we cannot use strcpy() here. @@ -291,27 +291,32 @@ int sam_read1(tamFile fp, bam_header_t *header, bam1_t *b) if (ks_getuntil(ks, KS_SEP_TAB, str, &dret) < 0) return -3; z += str->l + 1; if (str->s[0] != '*') { + uint32_t *cigar; for (s = str->s; *s; ++s) { - if (isalpha(*s)) ++c->n_cigar; + if ((isalpha(*s)) || (*s=='=')) ++c->n_cigar; else if (!isdigit(*s)) parse_error(fp->n_lines, "invalid CIGAR character"); } b->data = alloc_data(b, doff + c->n_cigar * 4); + cigar = bam1_cigar(b); for (i = 0, s = str->s; i != c->n_cigar; ++i) { x = strtol(s, &t, 10); op = toupper(*t); - if (op == 'M' || op == '=' || op == 'X') op = BAM_CMATCH; + if (op == 'M') op = BAM_CMATCH; else if (op == 'I') op = BAM_CINS; else if (op == 'D') op = BAM_CDEL; else if (op == 'N') op = BAM_CREF_SKIP; else if (op == 'S') op = BAM_CSOFT_CLIP; else if (op == 'H') op = BAM_CHARD_CLIP; else if (op == 'P') op = BAM_CPAD; + else if (op == '=') op = BAM_CEQUAL; + else if (op == 'X') op = BAM_CDIFF; + else if (op == 'B') op = BAM_CBACK; else parse_error(fp->n_lines, "invalid CIGAR operation"); s = t + 1; - bam1_cigar(b)[i] = x << BAM_CIGAR_SHIFT | op; + cigar[i] = bam_cigar_gen(x, op); } if (*s) parse_error(fp->n_lines, "unmatched CIGAR operation"); - c->bin = bam_reg2bin(c->pos, bam_calend(c, bam1_cigar(b))); + c->bin = bam_reg2bin(c->pos, bam_calend(c, cigar)); doff += c->n_cigar * 4; } else { if (!(c->flag&BAM_FUNMAP)) { @@ -337,8 +342,11 @@ int sam_read1(tamFile fp, bam_header_t *header, bam1_t *b) z += str->l + 1; if (strcmp(str->s, "*")) { c->l_qseq = strlen(str->s); - if (c->n_cigar && c->l_qseq != (int32_t)bam_cigar2qlen(c, bam1_cigar(b))) + if (c->n_cigar && c->l_qseq != (int32_t)bam_cigar2qlen(c, bam1_cigar(b))) { + fprintf(stderr, "Line %ld, sequence length %i vs %i from CIGAR\n", + (long)fp->n_lines, c->l_qseq, (int32_t)bam_cigar2qlen(c, bam1_cigar(b))); parse_error(fp->n_lines, "CIGAR and sequence length are inconsistent"); + } p = (uint8_t*)alloc_data(b, doff + c->l_qseq + (c->l_qseq+1)/2) + doff; memset(p, 0, (c->l_qseq+1)/2); for (i = 0; i < c->l_qseq; ++i) @@ -454,6 +462,7 @@ int sam_read1(tamFile fp, bam_header_t *header, bam1_t *b) } b->l_aux = doff - doff0; b->data_len = doff; + if (bam_no_B) bam_remove_B(b); return z; }