/* The MIT License
- Copyright (c) 2008 Genome Research Ltd (GRL).
+ Copyright (c) 2008-2010 Genome Research Ltd (GRL).
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
BAM library provides I/O and various operations on manipulating files
in the BAM (Binary Alignment/Mapping) or SAM (Sequence Alignment/Map)
- format. It now supports importing from or exporting to TAM, sorting,
+ format. It now supports importing from or exporting to SAM, sorting,
merging, generating pileup, and quickly retrieval of reads overlapped
with a specified region.
@copyright Genome Research Ltd.
*/
+#define BAM_VERSION "0.1.18 (r982:295)"
+
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
/*
CIGAR operations.
*/
-/*! @abstract CIGAR: match */
+/*! @abstract CIGAR: M = match or mismatch*/
#define BAM_CMATCH 0
-/*! @abstract CIGAR: insertion to the reference */
+/*! @abstract CIGAR: I = insertion to the reference */
#define BAM_CINS 1
-/*! @abstract CIGAR: deletion from the reference */
+/*! @abstract CIGAR: D = deletion from the reference */
#define BAM_CDEL 2
-/*! @abstract CIGAR: skip on the reference (e.g. spliced alignment) */
+/*! @abstract CIGAR: N = skip on the reference (e.g. spliced alignment) */
#define BAM_CREF_SKIP 3
-/*! @abstract CIGAR: clip on the read with clipped sequence present in qseq */
+/*! @abstract CIGAR: S = clip on the read with clipped sequence
+ present in qseq */
#define BAM_CSOFT_CLIP 4
-/*! @abstract CIGAR: clip on the read with clipped sequence trimmed off */
+/*! @abstract CIGAR: H = clip on the read with clipped sequence trimmed off */
#define BAM_CHARD_CLIP 5
-/*! @abstract CIGAR: padding */
+/*! @abstract CIGAR: P = padding */
#define BAM_CPAD 6
+/*! @abstract CIGAR: equals = match */
+#define BAM_CEQUAL 7
+/*! @abstract CIGAR: X = mismatch */
+#define BAM_CDIFF 8
/*! @typedef
@abstract Structure for core alignment information.
@param b pointer to an alignment
@return pointer to quality string
*/
-#define bam1_qual(b) ((b)->data + (b)->core.n_cigar*4 + (b)->core.l_qname + ((b)->core.l_qseq + 1)/2)
+#define bam1_qual(b) ((b)->data + (b)->core.n_cigar*4 + (b)->core.l_qname + (((b)->core.l_qseq + 1)>>1))
/*! @function
@abstract Get a base on read
*/
extern int bam_is_be;
+/*!
+ @abstract Verbose level between 0 and 3; 0 is supposed to disable all
+ debugging information, though this may not have been implemented.
+ */
+extern int bam_verbose;
+
/*! @abstract Table for converting a nucleotide character to the 4-bit encoding. */
extern unsigned char bam_nt16_table[256];
char *bam_format1_core(const bam_header_t *header, const bam1_t *b, int of);
+ /*!
+ @abstract Check whether a BAM record is plausibly valid
+ @param header associated header structure, or NULL if unavailable
+ @param b alignment to validate
+ @return 0 if the alignment is invalid; non-zero otherwise
+
+ @discussion Simple consistency check of some of the fields of the
+ alignment record. If the header is provided, several additional checks
+ are made. Not all fields are checked, so a non-zero result is not a
+ guarantee that the record is valid. However it is usually good enough
+ to detect when bam_seek() has been called with a virtual file offset
+ that is not the offset of an alignment record.
+ */
+ int bam_validate1(const bam_header_t *header, const bam1_t *b);
+
const char *bam_get_library(bam_header_t *header, const bam1_t *b);
bam1_t *b;
int32_t qpos;
int indel, level;
- uint32_t is_del:1, is_head:1, is_tail:1;
+ uint32_t is_del:1, is_head:1, is_tail:1, is_refskip:1, aux:28;
} bam_pileup1_t;
typedef int (*bam_plp_auto_f)(void *data, bam1_t *b);
const bam_pileup1_t *bam_plp_next(bam_plp_t iter, int *_tid, int *_pos, int *_n_plp);
const bam_pileup1_t *bam_plp_auto(bam_plp_t iter, int *_tid, int *_pos, int *_n_plp);
void bam_plp_set_mask(bam_plp_t iter, int mask);
+ void bam_plp_set_maxcnt(bam_plp_t iter, int maxcnt);
void bam_plp_reset(bam_plp_t iter);
void bam_plp_destroy(bam_plp_t iter);
bam_mplp_t bam_mplp_init(int n, bam_plp_auto_f func, void **data);
void bam_mplp_destroy(bam_mplp_t iter);
+ void bam_mplp_set_maxcnt(bam_mplp_t iter, int maxcnt);
int bam_mplp_auto(bam_mplp_t iter, int *_tid, int *_pos, int *n_plp, const bam_pileup1_t **plp);
/*! @typedef
{
uint8_t *data = bdst->data;
int m_data = bdst->m_data; // backup data and m_data
- if (m_data < bsrc->m_data) { // double the capacity
- m_data = bsrc->m_data; kroundup32(m_data);
+ if (m_data < bsrc->data_len) { // double the capacity
+ m_data = bsrc->data_len; kroundup32(m_data);
data = (uint8_t*)realloc(data, m_data);
}
memcpy(data, bsrc->data, bsrc->data_len); // copy var-len data
return b;
}
+static inline int bam_aux_type2size(int x)
+{
+ if (x == 'C' || x == 'c' || x == 'A') return 1;
+ else if (x == 'S' || x == 's') return 2;
+ else if (x == 'I' || x == 'i' || x == 'f') return 4;
+ else return 0;
+}
+
+
#endif