X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=bam_import.c;h=eaa9452080e2624bc38a131265171bd2b53d4d11;hb=d410ecbf60b5aed90ee20c6ba40637ca50241edd;hp=4c9395b7dccf6cc7adb071a8e93e5845a107690e;hpb=70e923225f586027ab0954d23cb03a8ea0bfdbc5;p=samtools.git diff --git a/bam_import.c b/bam_import.c index 4c9395b..eaa9452 100644 --- a/bam_import.c +++ b/bam_import.c @@ -211,7 +211,6 @@ int sam_header_parse_rg(bam_header_t *h) break; } if (r < p) r = p; - s = r + 3; } if (rgid->l && rglib->l) { bam_strmap_put(h->rg2lib, rgid->s, rglib->s); @@ -359,7 +358,7 @@ int sam_read1(tamFile fp, bam_header_t *header, bam1_t *b) for (i = 0, s = str->s; i != c->n_cigar; ++i) { x = strtol(s, &t, 10); op = toupper(*t); - if (op == 'M') op = BAM_CMATCH; + if (op == 'M' || op == '=' || op == 'X') op = BAM_CMATCH; else if (op == 'I') op = BAM_CINS; else if (op == 'D') op = BAM_CDEL; else if (op == 'N') op = BAM_CREF_SKIP; @@ -373,7 +372,13 @@ int sam_read1(tamFile fp, bam_header_t *header, bam1_t *b) if (*s) parse_error(fp->n_lines, "unmatched CIGAR operation"); c->bin = bam_reg2bin(c->pos, bam_calend(c, bam1_cigar(b))); doff += c->n_cigar * 4; - } else c->bin = bam_reg2bin(c->pos, c->pos + 1); + } else { + if (!(c->flag&BAM_FUNMAP)) { + fprintf(stderr, "Parse warning at line %lld: mapped sequence without CIGAR\n", (long long)fp->n_lines); + c->flag |= BAM_FUNMAP; + } + c->bin = bam_reg2bin(c->pos, c->pos + 1); + } } { // mtid, mpos, isize ret = ks_getuntil(ks, KS_SEP_TAB, str, &dret); z += str->l + 1; @@ -386,16 +391,18 @@ int sam_read1(tamFile fp, bam_header_t *header, bam1_t *b) } { // seq and qual int i; - uint8_t *p; + uint8_t *p = 0; if (ks_getuntil(ks, KS_SEP_TAB, str, &dret) < 0) return -5; // seq z += str->l + 1; - c->l_qseq = strlen(str->s); - if (c->n_cigar && c->l_qseq != (int32_t)bam_cigar2qlen(c, bam1_cigar(b))) - parse_error(fp->n_lines, "CIGAR and sequence length are inconsistent"); - p = (uint8_t*)alloc_data(b, doff + c->l_qseq + (c->l_qseq+1)/2) + doff; - memset(p, 0, (c->l_qseq+1)/2); - for (i = 0; i < c->l_qseq; ++i) - p[i/2] |= bam_nt16_table[(int)str->s[i]] << 4*(1-i%2); + if (strcmp(str->s, "*")) { + c->l_qseq = strlen(str->s); + if (c->n_cigar && c->l_qseq != (int32_t)bam_cigar2qlen(c, bam1_cigar(b))) + parse_error(fp->n_lines, "CIGAR and sequence length are inconsistent"); + p = (uint8_t*)alloc_data(b, doff + c->l_qseq + (c->l_qseq+1)/2) + doff; + memset(p, 0, (c->l_qseq+1)/2); + for (i = 0; i < c->l_qseq; ++i) + p[i/2] |= bam_nt16_table[(int)str->s[i]] << 4*(1-i%2); + } else c->l_qseq = 0; if (ks_getuntil(ks, KS_SEP_TAB, str, &dret) < 0) return -6; // qual z += str->l + 1; if (strcmp(str->s, "*") && c->l_qseq != strlen(str->s))