From: Heng Li Date: Fri, 9 Jul 2010 04:39:34 +0000 (+0000) Subject: make pileup work with CIGAR with I/D at the beginning or in the end X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=3d80bf0f14674656673a9374ea31d85b8e693100;p=samtools.git make pileup work with CIGAR with I/D at the beginning or in the end --- diff --git a/bam_pileup.c b/bam_pileup.c index d9a75da..3c41a16 100644 --- a/bam_pileup.c +++ b/bam_pileup.c @@ -73,18 +73,28 @@ static inline int resolve_cigar(bam_pileup1_t *p, uint32_t pos) p->qpos = y + (pos - x); if (x == pos && is_restart) p->is_head = 1; if (x + l - 1 == pos) { // come to the end of a match - if (k < c->n_cigar - 1) { // there are additional operation(s) + int has_next_match = 0; + unsigned i; + for (i = k + 1; i < c->n_cigar; ++i) { + uint32_t cigar = bam1_cigar(b)[i]; + int opi = cigar&BAM_CIGAR_MASK; + if (opi == BAM_CMATCH) { + has_next_match = 1; + break; + } else if (opi == BAM_CSOFT_CLIP || opi == BAM_CREF_SKIP || opi == BAM_CHARD_CLIP) break; + } + if (!has_next_match) p->is_tail = 1; + if (k < c->n_cigar - 1 && has_next_match) { // there are additional operation(s) uint32_t cigar = bam1_cigar(b)[k+1]; // next CIGAR int op_next = cigar&BAM_CIGAR_MASK; // next CIGAR operation if (op_next == BAM_CDEL) p->indel = -(int32_t)(cigar>>BAM_CIGAR_SHIFT); // del else if (op_next == BAM_CINS) p->indel = cigar>>BAM_CIGAR_SHIFT; // ins - if (op_next == BAM_CDEL || op_next == BAM_CINS) { - if (k + 2 < c->n_cigar) op_next = bam1_cigar(b)[k+2]&BAM_CIGAR_MASK; - else p->is_tail = 1; + else if (op_next == BAM_CPAD && k + 2 < c->n_cigar) { // no working for adjacent padding + cigar = bam1_cigar(b)[k+2]; op_next = cigar&BAM_CIGAR_MASK; + if (op_next == BAM_CDEL) p->indel = -(int32_t)(cigar>>BAM_CIGAR_SHIFT); // del + else if (op_next == BAM_CINS) p->indel = cigar>>BAM_CIGAR_SHIFT; // ins } - if (op_next == BAM_CSOFT_CLIP || op_next == BAM_CREF_SKIP || op_next == BAM_CHARD_CLIP) - p->is_tail = 1; // tail - } else p->is_tail = 1; // this is the last operation; set tail + } } } x += l; y += l; @@ -96,7 +106,8 @@ static inline int resolve_cigar(bam_pileup1_t *p, uint32_t pos) x += l; } else if (op == BAM_CREF_SKIP) x += l; else if (op == BAM_CINS || op == BAM_CSOFT_CLIP) y += l; - is_restart = (op == BAM_CREF_SKIP || op == BAM_CSOFT_CLIP || op == BAM_CHARD_CLIP); + if (is_restart) is_restart ^= (op == BAM_CMATCH); + else is_restart ^= (op == BAM_CREF_SKIP || op == BAM_CSOFT_CLIP || op == BAM_CHARD_CLIP); if (x > pos) { if (op == BAM_CREF_SKIP) ret = 0; // then do not put it into pileup at all break; diff --git a/examples/toy.fa b/examples/toy.fa new file mode 100644 index 0000000..38312c1 --- /dev/null +++ b/examples/toy.fa @@ -0,0 +1,2 @@ +>ref +AGCATGTTAGATAAGATAGCTGTGCTAGTAGGCAGTCAGCGCCAT diff --git a/examples/toy.sam b/examples/toy.sam new file mode 100644 index 0000000..baf7388 --- /dev/null +++ b/examples/toy.sam @@ -0,0 +1,7 @@ +@SQ SN:ref LN:45 +r001 163 ref 7 30 8M2I4M1D3M = 37 39 TTAGATAAAGGATACTG * +r002 0 ref 9 30 1S2I6M1P1I4M2I * 0 0 AAAAGATAAGGATAAA * +r003 0 ref 9 30 5H6M * 0 0 AGCTAA * +r004 0 ref 16 30 6M14N1I5M * 0 0 ATAGCTCTCAGC * +r003 16 ref 29 30 6H5M * 0 0 TAGGC * +r001 83 ref 37 30 9M = 7 -39 CAGCGCCAT * \ No newline at end of file