From: Heng Li Date: Mon, 7 Sep 2009 09:10:28 +0000 (+0000) Subject: * samtools-0.1.6-2 (r458) X-Git-Url: https://git.donarmstrong.com/?p=samtools.git;a=commitdiff_plain;h=5edf84137d9ca68041477d36f2be640c42f46651 * samtools-0.1.6-2 (r458) * added more interface to faidx (by Nils) * updated documentation --- diff --git a/bamtk.c b/bamtk.c index 8db8bcb..97f6560 100644 --- a/bamtk.c +++ b/bamtk.c @@ -9,7 +9,7 @@ #endif #ifndef PACKAGE_VERSION -#define PACKAGE_VERSION "0.1.6-2 (r457)" +#define PACKAGE_VERSION "0.1.6-3 (r458)" #endif int bam_taf2baf(int argc, char *argv[]); diff --git a/faidx.c b/faidx.c index 055445f..77adbde 100644 --- a/faidx.c +++ b/faidx.c @@ -323,6 +323,40 @@ int faidx_main(int argc, char *argv[]) return 0; } +int faidx_fetch_nseq(const faidx_t *fai) +{ + return fai->n; +} + +char *faidx_fetch_seq(const faidx_t *fai, char *c_name, int p_beg_i, int p_end_i, int *len) +{ + int l; + char c; + khiter_t iter; + faidx1_t val; + char *seq=NULL; + + // Adjust position + iter = kh_get(s, fai->hash, c_name); + if(iter == kh_end(fai->hash)) return 0; + val = kh_value(fai->hash, iter); + if(p_end_i < p_beg_i) p_beg_i = p_end_i; + if(p_beg_i < 0) p_beg_i = 0; + else if(val.len <= p_beg_i) p_beg_i = val.len - 1; + if(p_end_i < 0) p_end_i = 0; + else if(val.len <= p_end_i) p_end_i = val.len - 1; + + // Now retrieve the sequence + l = 0; + seq = (char*)malloc(p_end_i - p_beg_i + 2); + razf_seek(fai->rz, val.offset + p_beg_i / val.line_blen * val.line_len + p_beg_i % val.line_blen, SEEK_SET); + while (razf_read(fai->rz, &c, 1) == 1 && l < p_end_i - p_beg_i + 1) + if (isgraph(c)) seq[l++] = c; + seq[l] = '\0'; + *len = l; + return seq; +} + #ifdef FAIDX_MAIN int main(int argc, char *argv[]) { return faidx_main(argc, argv); } #endif diff --git a/faidx.h b/faidx.h index 1a52fb7..1fb1b1f 100644 --- a/faidx.h +++ b/faidx.h @@ -75,6 +75,27 @@ extern "C" { */ char *fai_fetch(const faidx_t *fai, const char *reg, int *len); + /*! + @abstract Fetch the number of sequences. + @param fai Pointer to the faidx_t struct + @return The number of sequences + */ + int faidx_fetch_nseq(const faidx_t *fai); + + /*! + @abstract Fetch the sequence in a region. + @param fai Pointer to the faidx_t struct + @param c_name Region name + @param p_beg_i Beginning position number (zero-based) + @param p_end_i End position number (zero-based) + @param len Length of the region + @return Pointer to the sequence; null on failure + + @discussion The returned sequence is allocated by malloc family + and should be destroyed by end users by calling free() on it. + */ + char *faidx_fetch_seq(const faidx_t *fai, char *c_name, int p_beg_i, int p_end_i, int *len); + #ifdef __cplusplus } #endif diff --git a/samtools.1 b/samtools.1 index d2c78f1..0108fdc 100644 --- a/samtools.1 +++ b/samtools.1 @@ -220,14 +220,16 @@ mapping quality. A symbol `$' marks the end of a read segment. If option .B -c -is applied, the consensus base, consensus quality, SNP quality and RMS -mapping quality of the reads covering the site will be inserted between -the `reference base' and the `read bases' columns. An indel occupies an -additional line. Each indel line consists of chromosome name, -coordinate, a star, the genotype, consensus quality, SNP quality, RMS -mapping quality, # covering reads, the first alllele, the second allele, -# reads supporting the first allele, # reads supporting the second -allele and # reads containing indels different from the top two alleles. +is applied, the consensus base, Phred-scaled consensus quality, SNP +quality (i.e. the Phred-scaled probability of the consensus being +identical to the reference) and root mean square (RMS) mapping quality +of the reads covering the site will be inserted between the `reference +base' and the `read bases' columns. An indel occupies an additional +line. Each indel line consists of chromosome name, coordinate, a star, +the genotype, consensus quality, SNP quality, RMS mapping quality, # +covering reads, the first alllele, the second allele, # reads supporting +the first allele, # reads supporting the second allele and # reads +containing indels different from the top two alleles. .B OPTIONS: .RS