]> git.donarmstrong.com Git - samtools.git/blobdiff - padding.c
First attempt at updating MPOS in 'samtools depad'
[samtools.git] / padding.c
index b7a254ff69df23fbc99aa05c0c8ec162baa95251..ad8bf5c480d97642dd433ac740059b1d6e45e1e5 100644 (file)
--- a/padding.c
+++ b/padding.c
@@ -170,7 +170,7 @@ int bam_pad2unpad(samfile_t *in, samfile_t *out, faidx_t *fai)
                        r_tid = b->core.tid;
                        unpad_seq(b, &r);
                        if (h->target_len[r_tid] != r.l) {
-                               fprintf(stderr, "[depad] ERROR: (Padded) length of '%s' is %i in BAM header, but %id in embedded reference\n", bam1_qname(b), h->target_len[r_tid], r.l);
+                               fprintf(stderr, "[depad] ERROR: (Padded) length of '%s' is %d in BAM header, but %ld in embedded reference\n", bam1_qname(b), h->target_len[r_tid], r.l);
                                return -1;
                        }
                        if (fai) {
@@ -186,8 +186,8 @@ int bam_pad2unpad(samfile_t *in, samfile_t *out, faidx_t *fai)
                                                // Show gaps as ASCII 45
                                                fprintf(stderr, "[depad] ERROR: Embedded sequence and reference FASTA don't match for %s base %i, '%c' vs '%c'\n",
                                                        h->target_name[b->core.tid], i+1,
-                                                       r.s[i] ? bam_nt16_rev_table[r.s[i]] : 45,
-                                                       q.s[i] ? bam_nt16_rev_table[q.s[i]] : 45);
+                                                       r.s[i] ? bam_nt16_rev_table[(int)r.s[i]] : 45,
+                                                       q.s[i] ? bam_nt16_rev_table[(int)q.s[i]] : 45);
                                                return -1;
                                        }
                                }
@@ -271,6 +271,31 @@ int bam_pad2unpad(samfile_t *in, samfile_t *out, faidx_t *fai)
                        n2 = k;
                        replace_cigar(b, n2, cigar2);
                        b->core.pos = posmap[b->core.pos];
+                       if (b->core.mpos < 0) {
+                               /* Nice case, no mate to worry about*/
+                       } else if (b->core.mtid == b->core.tid) {
+                               /* Nice case, same reference */
+                               b->core.mpos = posmap[b->core.mpos];
+                       } else {
+                               /* Nasty case, Must load alternative posmap */
+                               if (!fai) {
+                                       fprintf(stderr, "[depad] ERROR: Needed reference %s sequence for mate (and no FASTA file)\n", h->target_name[b->core.mtid]);
+                                       return -1;
+                               }
+                               /* Temporarily load the other reference sequence */
+                               if (load_unpadded_ref(fai, h->target_name[b->core.mtid], h->target_len[b->core.mtid], &r)) {
+                                       fprintf(stderr, "[depad] ERROR: Failed to load '%s' from reference FASTA\n", h->target_name[b->core.mtid]);
+                                       return -1;
+                               }
+                               posmap = update_posmap(posmap, r);
+                               b->core.mpos = posmap[b->core.mpos];
+                               /* Restore the reference and posmap*/
+                               if (load_unpadded_ref(fai, h->target_name[b->core.tid], h->target_len[b->core.tid], &r)) {
+                                       fprintf(stderr, "[depad] ERROR: Failed to load '%s' from reference FASTA\n", h->target_name[b->core.tid]);
+                                       return -1;
+                               }
+                               posmap = update_posmap(posmap, r);
+                       }
                }
                samwrite(out, b);
        }