X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=bam_import.c;fp=bam_import.c;h=19cd75f4352f2d144b1d0a36ed0925cab6a50cc8;hb=a8c861864486f4c30dc664e3378ffd6e021b6364;hp=4c9395b7dccf6cc7adb071a8e93e5845a107690e;hpb=c41114488f4daad5679b1617029b4fef4acad3d8;p=samtools.git diff --git a/bam_import.c b/bam_import.c index 4c9395b..19cd75f 100644 --- a/bam_import.c +++ b/bam_import.c @@ -359,7 +359,7 @@ int sam_read1(tamFile fp, bam_header_t *header, bam1_t *b) for (i = 0, s = str->s; i != c->n_cigar; ++i) { x = strtol(s, &t, 10); op = toupper(*t); - if (op == 'M') op = BAM_CMATCH; + if (op == 'M' || op == '=' || op == 'X') op = BAM_CMATCH; else if (op == 'I') op = BAM_CINS; else if (op == 'D') op = BAM_CDEL; else if (op == 'N') op = BAM_CREF_SKIP; @@ -373,7 +373,10 @@ int sam_read1(tamFile fp, bam_header_t *header, bam1_t *b) if (*s) parse_error(fp->n_lines, "unmatched CIGAR operation"); c->bin = bam_reg2bin(c->pos, bam_calend(c, bam1_cigar(b))); doff += c->n_cigar * 4; - } else c->bin = bam_reg2bin(c->pos, c->pos + 1); + } else { + if (!(c->flag&BAM_FUNMAP)) parse_error(fp->n_lines, "mapped sequence without CIGAR"); + c->bin = bam_reg2bin(c->pos, c->pos + 1); + } } { // mtid, mpos, isize ret = ks_getuntil(ks, KS_SEP_TAB, str, &dret); z += str->l + 1; @@ -386,16 +389,18 @@ int sam_read1(tamFile fp, bam_header_t *header, bam1_t *b) } { // seq and qual int i; - uint8_t *p; + uint8_t *p = 0; if (ks_getuntil(ks, KS_SEP_TAB, str, &dret) < 0) return -5; // seq z += str->l + 1; - c->l_qseq = strlen(str->s); - if (c->n_cigar && c->l_qseq != (int32_t)bam_cigar2qlen(c, bam1_cigar(b))) - parse_error(fp->n_lines, "CIGAR and sequence length are inconsistent"); - p = (uint8_t*)alloc_data(b, doff + c->l_qseq + (c->l_qseq+1)/2) + doff; - memset(p, 0, (c->l_qseq+1)/2); - for (i = 0; i < c->l_qseq; ++i) - p[i/2] |= bam_nt16_table[(int)str->s[i]] << 4*(1-i%2); + if (strcmp(str->s, "*")) { + c->l_qseq = strlen(str->s); + if (c->n_cigar && c->l_qseq != (int32_t)bam_cigar2qlen(c, bam1_cigar(b))) + parse_error(fp->n_lines, "CIGAR and sequence length are inconsistent"); + p = (uint8_t*)alloc_data(b, doff + c->l_qseq + (c->l_qseq+1)/2) + doff; + memset(p, 0, (c->l_qseq+1)/2); + for (i = 0; i < c->l_qseq; ++i) + p[i/2] |= bam_nt16_table[(int)str->s[i]] << 4*(1-i%2); + } else c->l_qseq = 0; if (ks_getuntil(ks, KS_SEP_TAB, str, &dret) < 0) return -6; // qual z += str->l + 1; if (strcmp(str->s, "*") && c->l_qseq != strlen(str->s))