1 // ***************************************************************************
2 // BamWriter (c) 2009 Michael Strömberg
3 // Marth Lab, Deptartment of Biology, Boston College
4 // All rights reserved.
5 // ---------------------------------------------------------------------------
6 // Provides the basic functionality for producing BAM files
7 // ***************************************************************************
17 #include "BamAlignment.h"
26 #define OS_UNKNOWN 255
31 #define GZIP_WINDOW_BITS -15
32 #define Z_DEFAULT_MEM_LEVEL 8
35 #define BLOCK_HEADER_LENGTH 18
36 #define BLOCK_FOOTER_LENGTH 8
37 #define MAX_BLOCK_SIZE 65536
38 #define DEFAULT_BLOCK_SIZE 65536
41 #define BAM_CORE_SIZE 32
45 #define BAM_CREF_SKIP 3
46 #define BAM_CSOFT_CLIP 4
47 #define BAM_CHARD_CLIP 5
49 #define BAM_CIGAR_SHIFT 4
51 #define BAM_CIGAR_MASK ((1 << BAM_CIGAR_SHIFT) - 1)
56 // define our BZGF structure
58 unsigned int UncompressedBlockSize;
59 unsigned int CompressedBlockSize;
60 unsigned int BlockLength;
61 unsigned int BlockOffset;
62 uint64_t BlockAddress;
65 char* UncompressedBlock;
66 char* CompressedBlock;
70 : UncompressedBlockSize(DEFAULT_BLOCK_SIZE)
71 , CompressedBlockSize(MAX_BLOCK_SIZE)
77 , UncompressedBlock(NULL)
78 , CompressedBlock(NULL)
81 CompressedBlock = new char[CompressedBlockSize];
82 UncompressedBlock = new char[UncompressedBlockSize];
84 printf("ERROR: Unable to allocate memory for our BGZF object.\n");
91 if(CompressedBlock) delete [] CompressedBlock;
92 if(UncompressedBlock) delete [] UncompressedBlock;
102 // closes the alignment archive
104 // opens the alignment archive
105 void Open(const string& filename, const string& samHeader, const RefVector& referenceSequences);
106 // saves the alignment to the alignment archive
107 void SaveAlignment(const BamAlignment& al);
109 // closes the BAM file
110 void BgzfClose(void);
111 // compresses the current block
112 int BgzfDeflateBlock(void);
113 // flushes the data in the BGZF block
114 void BgzfFlushBlock(void);
115 // opens the BAM file for writing
116 void BgzfOpen(const string& filename);
117 // packs an unsigned integer into the specified buffer
118 static inline void BgzfPackUnsignedInt(char* buffer, unsigned int value);
119 // packs an unsigned short into the specified buffer
120 static inline void BgzfPackUnsignedShort(char* buffer, unsigned short value);
121 // writes the supplied data into the BGZF buffer
122 unsigned int BgzfWrite(const char* data, const unsigned int dataLen);
123 // calculates the minimum bin that contains a region [begin, end)
124 static inline unsigned int CalculateMinimumBin(unsigned int begin, unsigned int end);
125 // creates a packed cigar string from the supplied alignment
126 static void CreatePackedCigar(const vector<CigarOp>& cigarOperations, string& packedCigar);
127 // encodes the supplied query sequence into 4-bit notation
128 static void EncodeQuerySequence(const string& query, string& encodedQuery);
129 // our BGZF output object
133 // packs an unsigned integer into the specified buffer
134 inline void BamWriter::BgzfPackUnsignedInt(char* buffer, unsigned int value) {
135 buffer[0] = (char)value;
136 buffer[1] = (char)(value >> 8);
137 buffer[2] = (char)(value >> 16);
138 buffer[3] = (char)(value >> 24);
141 // packs an unsigned short into the specified buffer
142 inline void BamWriter::BgzfPackUnsignedShort(char* buffer, unsigned short value) {
143 buffer[0] = (char)value;
144 buffer[1] = (char)(value >> 8);