X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=bam_import.c;h=404da01eb82841c48dd62fb03ec9d7bedff51865;hb=17d530d96c290e6e3956c034117dce8f39480edd;hp=4b7bb2122e0e046bf65874bdda27e03828479b06;hpb=635998cfe030da5f3dbec42a6daa3ca82fa5c871;p=samtools.git diff --git a/bam_import.c b/bam_import.c index 4b7bb21..404da01 100644 --- a/bam_import.c +++ b/bam_import.c @@ -44,29 +44,27 @@ struct __tamFile_t { uint64_t n_lines; }; -char **bam_load_pos(const char *fn, int *_n) +char **__bam_get_lines(const char *fn, int *_n) // for bam_plcmd.c only { char **list = 0, *s; - int n = 0, dret, m = 0, c; + int n = 0, dret, m = 0; gzFile fp = (strcmp(fn, "-") == 0)? gzdopen(fileno(stdin), "r") : gzopen(fn, "r"); kstream_t *ks; kstring_t *str; str = (kstring_t*)calloc(1, sizeof(kstring_t)); ks = ks_init(fp); - while (ks_getuntil(ks, 0, str, &dret) > 0) { + while (ks_getuntil(ks, '\n', str, &dret) > 0) { if (n == m) { m = m? m << 1 : 16; list = (char**)realloc(list, m * sizeof(char*)); } - s = list[n++] = (char*)calloc(str->l + 5, 1); + if (str->s[str->l-1] == '\r') + str->s[--str->l] = '\0'; + s = list[n++] = (char*)calloc(str->l + 1, 1); strcpy(s, str->s); - s += str->l + 1; - ks_getuntil(ks, 0, str, &dret); - *((uint32_t*)s) = atoi(str->s); - if (dret != '\n') - while ((c = ks_getc(fp)) >= 0 && c != '\n'); } ks_destroy(ks); + gzclose(fp); free(str->s); free(str); *_n = n; return list; @@ -153,7 +151,7 @@ int sam_read1(tamFile fp, bam_header_t *header, bam1_t *b) kstring_t *str = fp->str; kstream_t *ks = fp->ks; - while ((ret = ks_getuntil(fp->ks, 0, str, &dret)) >= 0 && str->s[0] == '@') { // skip header + while ((ret = ks_getuntil(fp->ks, KS_SEP_TAB, str, &dret)) >= 0 && str->s[0] == '@') { // skip header str->s[str->l] = dret; // note that str->s is NOT null terminated!! append_text(header, str); if (dret != '\n') { @@ -163,7 +161,7 @@ int sam_read1(tamFile fp, bam_header_t *header, bam1_t *b) } ++fp->n_lines; } - while (ret == 0) ret = ks_getuntil(fp->ks, 0, str, &dret); // special consideration for "\r\n" + while (ret == 0) ret = ks_getuntil(fp->ks, KS_SEP_TAB, str, &dret); // special consideration for "\r\n" if (ret < 0) return -1; ++fp->n_lines; doff = 0; @@ -174,10 +172,10 @@ int sam_read1(tamFile fp, bam_header_t *header, bam1_t *b) doff += c->l_qname; } { // flag, tid, pos, qual - ret = ks_getuntil(ks, 0, str, &dret); c->flag = atoi(str->s); - ret = ks_getuntil(ks, 0, str, &dret); c->tid = bam_get_tid(header, str->s); - ret = ks_getuntil(ks, 0, str, &dret); c->pos = isdigit(str->s[0])? atoi(str->s) - 1 : -1; - ret = ks_getuntil(ks, 0, str, &dret); c->qual = isdigit(str->s[0])? atoi(str->s) : 0; + ret = ks_getuntil(ks, KS_SEP_TAB, str, &dret); c->flag = atoi(str->s); + ret = ks_getuntil(ks, KS_SEP_TAB, str, &dret); c->tid = bam_get_tid(header, str->s); + ret = ks_getuntil(ks, KS_SEP_TAB, str, &dret); c->pos = isdigit(str->s[0])? atoi(str->s) - 1 : -1; + ret = ks_getuntil(ks, KS_SEP_TAB, str, &dret); c->qual = isdigit(str->s[0])? atoi(str->s) : 0; if (ret < 0) return -2; } { // cigar @@ -185,7 +183,7 @@ int sam_read1(tamFile fp, bam_header_t *header, bam1_t *b) int i, op; long x; c->n_cigar = 0; - if (ks_getuntil(ks, 0, str, &dret) < 0) return -3; + if (ks_getuntil(ks, KS_SEP_TAB, str, &dret) < 0) return -3; if (str->s[0] != '*') { for (s = str->s; *s; ++s) { if (isalpha(*s)) ++c->n_cigar; @@ -209,18 +207,18 @@ int sam_read1(tamFile fp, bam_header_t *header, bam1_t *b) if (*s) parse_error(fp->n_lines, "unmatched CIGAR operation"); c->bin = bam_reg2bin(c->pos, bam_calend(c, bam1_cigar(b))); doff += c->n_cigar * 4; - } + } else c->bin = bam_reg2bin(c->pos, c->pos + 1); } { // mtid, mpos, isize - ret = ks_getuntil(ks, 0, str, &dret); c->mtid = strcmp(str->s, "=")? bam_get_tid(header, str->s) : c->tid; - ret = ks_getuntil(ks, 0, str, &dret); c->mpos = isdigit(str->s[0])? atoi(str->s) - 1 : -1; - ret = ks_getuntil(ks, 0, str, &dret); c->isize = (str->s[0] == '-' || isdigit(str->s[0]))? atoi(str->s) : 0; + ret = ks_getuntil(ks, KS_SEP_TAB, str, &dret); c->mtid = strcmp(str->s, "=")? bam_get_tid(header, str->s) : c->tid; + ret = ks_getuntil(ks, KS_SEP_TAB, str, &dret); c->mpos = isdigit(str->s[0])? atoi(str->s) - 1 : -1; + ret = ks_getuntil(ks, KS_SEP_TAB, str, &dret); c->isize = (str->s[0] == '-' || isdigit(str->s[0]))? atoi(str->s) : 0; if (ret < 0) return -4; } { // seq and qual int i; uint8_t *p; - if (ks_getuntil(ks, 0, str, &dret) < 0) return -5; // seq + if (ks_getuntil(ks, KS_SEP_TAB, str, &dret) < 0) return -5; // seq c->l_qseq = strlen(str->s); if (c->n_cigar && c->l_qseq != (int32_t)bam_cigar2qlen(c, bam1_cigar(b))) parse_error(fp->n_lines, "CIGAR and sequence length are inconsistent"); @@ -228,7 +226,7 @@ int sam_read1(tamFile fp, bam_header_t *header, bam1_t *b) bzero(p, (c->l_qseq+1)/2); for (i = 0; i < c->l_qseq; ++i) p[i/2] |= bam_nt16_table[(int)str->s[i]] << 4*(1-i%2); - if (ks_getuntil(ks, 0, str, &dret) < 0) return -6; // qual + if (ks_getuntil(ks, KS_SEP_TAB, str, &dret) < 0) return -6; // qual if (c->l_qseq != strlen(str->s)) parse_error(fp->n_lines, "sequence and quality are inconsistent"); p += (c->l_qseq+1)/2; @@ -237,7 +235,7 @@ int sam_read1(tamFile fp, bam_header_t *header, bam1_t *b) } doff0 = doff; if (dret != '\n' && dret != '\r') { // aux - while (ks_getuntil(ks, 0, str, &dret) >= 0) { + while (ks_getuntil(ks, KS_SEP_TAB, str, &dret) >= 0) { uint8_t *s, type, key[2]; if (str->l < 6 || str->s[2] != ':' || str->s[4] != ':') parse_error(fp->n_lines, "missing colon in auxiliary data");