]> git.donarmstrong.com Git - samtools.git/commitdiff
* samtools-0.1.5-27 (r443)
authorHeng Li <lh3@live.co.uk>
Tue, 11 Aug 2009 08:29:11 +0000 (08:29 +0000)
committerHeng Li <lh3@live.co.uk>
Tue, 11 Aug 2009 08:29:11 +0000 (08:29 +0000)
 * SEQ and QUAL can be "*"
 * parse CIGAR "=" and "X" as "M"

bam.c
bam_import.c
bamtk.c

diff --git a/bam.c b/bam.c
index 52d97a6959dfad2076afdb3c64f37fd76b87fb40..619b46a6718a773aa073e7ecede13ef5cd10dee8 100644 (file)
--- a/bam.c
+++ b/bam.c
@@ -271,10 +271,12 @@ char *bam_format1_core(const bam_header_t *header, const bam1_t *b, int of)
        else if (c->mtid == c->tid) kputs("=\t", &str);
        else ksprintf(&str, "%s\t", header->target_name[c->mtid]);
        ksprintf(&str, "%d\t%d\t", c->mpos + 1, c->isize);
-       for (i = 0; i < c->l_qseq; ++i) kputc(bam_nt16_rev_table[bam1_seqi(s, i)], &str);
-       kputc('\t', &str);
-       if (t[0] == 0xff) kputc('*', &str);
-       else for (i = 0; i < c->l_qseq; ++i) kputc(t[i] + 33, &str);
+       if (c->l_qseq) {
+               for (i = 0; i < c->l_qseq; ++i) kputc(bam_nt16_rev_table[bam1_seqi(s, i)], &str);
+               kputc('\t', &str);
+               if (t[0] == 0xff) kputc('*', &str);
+               else for (i = 0; i < c->l_qseq; ++i) kputc(t[i] + 33, &str);
+       } else ksprintf(&str, "*\t*");
        s = bam1_aux(b);
        while (s < b->data + b->data_len) {
                uint8_t type, key[2];
index 4c9395b7dccf6cc7adb071a8e93e5845a107690e..19cd75f4352f2d144b1d0a36ed0925cab6a50cc8 100644 (file)
@@ -359,7 +359,7 @@ int sam_read1(tamFile fp, bam_header_t *header, bam1_t *b)
                        for (i = 0, s = str->s; i != c->n_cigar; ++i) {
                                x = strtol(s, &t, 10);
                                op = toupper(*t);
-                               if (op == 'M') op = BAM_CMATCH;
+                               if (op == 'M' || op == '=' || op == 'X') op = BAM_CMATCH;
                                else if (op == 'I') op = BAM_CINS;
                                else if (op == 'D') op = BAM_CDEL;
                                else if (op == 'N') op = BAM_CREF_SKIP;
@@ -373,7 +373,10 @@ int sam_read1(tamFile fp, bam_header_t *header, bam1_t *b)
                        if (*s) parse_error(fp->n_lines, "unmatched CIGAR operation");
                        c->bin = bam_reg2bin(c->pos, bam_calend(c, bam1_cigar(b)));
                        doff += c->n_cigar * 4;
-               } else c->bin = bam_reg2bin(c->pos, c->pos + 1);
+               } else {
+                       if (!(c->flag&BAM_FUNMAP)) parse_error(fp->n_lines, "mapped sequence without CIGAR");
+                       c->bin = bam_reg2bin(c->pos, c->pos + 1);
+               }
        }
        { // mtid, mpos, isize
                ret = ks_getuntil(ks, KS_SEP_TAB, str, &dret); z += str->l + 1;
@@ -386,16 +389,18 @@ int sam_read1(tamFile fp, bam_header_t *header, bam1_t *b)
        }
        { // seq and qual
                int i;
-               uint8_t *p;
+               uint8_t *p = 0;
                if (ks_getuntil(ks, KS_SEP_TAB, str, &dret) < 0) return -5; // seq
                z += str->l + 1;
-               c->l_qseq = strlen(str->s);
-               if (c->n_cigar && c->l_qseq != (int32_t)bam_cigar2qlen(c, bam1_cigar(b)))
-                       parse_error(fp->n_lines, "CIGAR and sequence length are inconsistent");
-               p = (uint8_t*)alloc_data(b, doff + c->l_qseq + (c->l_qseq+1)/2) + doff;
-               memset(p, 0, (c->l_qseq+1)/2);
-               for (i = 0; i < c->l_qseq; ++i)
-                       p[i/2] |= bam_nt16_table[(int)str->s[i]] << 4*(1-i%2);
+               if (strcmp(str->s, "*")) {
+                       c->l_qseq = strlen(str->s);
+                       if (c->n_cigar && c->l_qseq != (int32_t)bam_cigar2qlen(c, bam1_cigar(b)))
+                               parse_error(fp->n_lines, "CIGAR and sequence length are inconsistent");
+                       p = (uint8_t*)alloc_data(b, doff + c->l_qseq + (c->l_qseq+1)/2) + doff;
+                       memset(p, 0, (c->l_qseq+1)/2);
+                       for (i = 0; i < c->l_qseq; ++i)
+                               p[i/2] |= bam_nt16_table[(int)str->s[i]] << 4*(1-i%2);
+               } else c->l_qseq = 0;
                if (ks_getuntil(ks, KS_SEP_TAB, str, &dret) < 0) return -6; // qual
                z += str->l + 1;
                if (strcmp(str->s, "*") && c->l_qseq != strlen(str->s))
diff --git a/bamtk.c b/bamtk.c
index 56fa73a3acfa5e26c18370efe2960aab1b2a2858..2df90323812775db7102785effc8c9192948178f 100644 (file)
--- a/bamtk.c
+++ b/bamtk.c
@@ -9,7 +9,7 @@
 #endif
 
 #ifndef PACKAGE_VERSION
-#define PACKAGE_VERSION "0.1.5-26 (r442)"
+#define PACKAGE_VERSION "0.1.5-27 (r443)"
 #endif
 
 int bam_taf2baf(int argc, char *argv[]);