1 // ***************************************************************************
2 // BamWriter (c) 2009 Michael Strömberg
3 // Marth Lab, Deptartment of Biology, Boston College
4 // All rights reserved.
5 // ---------------------------------------------------------------------------
6 // The BGZF routines were adapted from the bgzf.c code developed at the Broad
8 // ---------------------------------------------------------------------------
9 // Provides the basic functionality for producing BAM files
10 // ***************************************************************************
20 #include "BamAlignment.h"
29 #define OS_UNKNOWN 255
34 #define GZIP_WINDOW_BITS -15
35 #define Z_DEFAULT_MEM_LEVEL 8
38 #define BLOCK_HEADER_LENGTH 18
39 #define BLOCK_FOOTER_LENGTH 8
40 #define MAX_BLOCK_SIZE 65536
41 #define DEFAULT_BLOCK_SIZE 65536
44 #define BAM_CORE_SIZE 32
48 #define BAM_CREF_SKIP 3
49 #define BAM_CSOFT_CLIP 4
50 #define BAM_CHARD_CLIP 5
52 #define BAM_CIGAR_SHIFT 4
54 #define BAM_CIGAR_MASK ((1 << BAM_CIGAR_SHIFT) - 1)
59 // define our BZGF structure
63 unsigned int UncompressedBlockSize;
64 unsigned int CompressedBlockSize;
65 unsigned int BlockLength;
66 unsigned int BlockOffset;
67 uint64_t BlockAddress;
70 char* UncompressedBlock;
71 char* CompressedBlock;
75 : UncompressedBlockSize(DEFAULT_BLOCK_SIZE)
76 , CompressedBlockSize(MAX_BLOCK_SIZE)
82 , UncompressedBlock(NULL)
83 , CompressedBlock(NULL)
86 CompressedBlock = new char[CompressedBlockSize];
87 UncompressedBlock = new char[UncompressedBlockSize];
89 printf("ERROR: Unable to allocate memory for our BGZF object.\n");
96 if(CompressedBlock) delete [] CompressedBlock;
97 if(UncompressedBlock) delete [] UncompressedBlock;
108 // closes the alignment archive
110 // opens the alignment archive
111 void Open(const string& filename, const string& samHeader, const RefVector& referenceSequences);
112 // saves the alignment to the alignment archive
113 void SaveAlignment(const BamAlignment& al);
115 // closes the BAM file
116 void BgzfClose(void);
117 // compresses the current block
118 int BgzfDeflateBlock(void);
119 // flushes the data in the BGZF block
120 void BgzfFlushBlock(void);
121 // opens the BAM file for writing
122 void BgzfOpen(const string& filename);
123 // packs an unsigned integer into the specified buffer
124 static inline void BgzfPackUnsignedInt(char* buffer, unsigned int value);
125 // packs an unsigned short into the specified buffer
126 static inline void BgzfPackUnsignedShort(char* buffer, unsigned short value);
127 // writes the supplied data into the BGZF buffer
128 unsigned int BgzfWrite(const char* data, const unsigned int dataLen);
129 // calculates the minimum bin that contains a region [begin, end)
130 static inline unsigned int CalculateMinimumBin(unsigned int begin, unsigned int end);
131 // creates a packed cigar string from the supplied alignment
132 static void CreatePackedCigar(const vector<CigarOp>& cigarOperations, string& packedCigar);
133 // encodes the supplied query sequence into 4-bit notation
134 static void EncodeQuerySequence(const string& query, string& encodedQuery);
135 // our BGZF output object
139 // packs an unsigned integer into the specified buffer
140 inline void BamWriter::BgzfPackUnsignedInt(char* buffer, unsigned int value) {
141 buffer[0] = (char)value;
142 buffer[1] = (char)(value >> 8);
143 buffer[2] = (char)(value >> 16);
144 buffer[3] = (char)(value >> 24);
147 // packs an unsigned short into the specified buffer
148 inline void BamWriter::BgzfPackUnsignedShort(char* buffer, unsigned short value) {
149 buffer[0] = (char)value;
150 buffer[1] = (char)(value >> 8);