4 // Marth Lab, Boston College
\r
5 // Last modified: 6 April 2009
\r
7 #include "BamReader.h"
\r
21 int main(int argc, char* argv[]) {
\r
23 // check command line parameters
\r
26 cerr << "Usage: BamReaderTest <bamFile> <bamIndexFile>" << endl;
\r
31 // get filenames from command line
\r
32 const char* bamFilename = argv[1];
\r
33 const char* bamIndexFilename = argv[2];
\r
40 vector<string> refNames;
\r
42 BamAlignment bAlignment;
\r
43 BamAlignmentVector alignments;
\r
45 RefVector references;
\r
47 // ------------------------------------------------------------------------------------------------------ //
\r
48 // Declare BamReader & open BAM file - automatically loads up header and index file (.bai) information
\r
49 // ------------------------------------------------------------------------------------------------------ //
\r
51 BamReader bReader(bamFilename, bamIndexFilename);
\r
54 cerr << "Opening BamReader for BAM file: " << bamFilename << " ..... ";
\r
56 if ( bReader.Open() ) {
\r
57 cerr << "ok" << endl;
\r
60 cerr << "error" << endl;
\r
64 // ------------------------------------------------------------ //
\r
65 // Find out how many reference sequences are in BAM file
\r
66 // ------------------------------------------------------------ //
\r
68 references = bReader.GetReferenceData();
\r
71 cerr << "Total number of ref seqs: " << references.size() << endl;
\r
73 // ---------------------------------------------------------------------------- //
\r
74 // Get the names/lengths of all the reference sequences that have alignments
\r
75 // ---------------------------------------------------------------------------- //
\r
78 cerr << "All ref sequences with alignments:" << endl;
\r
82 if ( !references.empty() ) {
\r
84 RefVector::iterator refIter = references.begin();
\r
85 RefVector::iterator refEnd = references.end();
\r
88 // iterate over reference names, print to STDERR
\r
89 for( ; refIter != refEnd; ++refIter) {
\r
91 if ( (*refIter).RefHasAlignments ) {
\r
92 cerr << "ID: " << refID << endl;
\r
93 cerr << "Name: " << (*refIter).RefName << endl;
\r
94 cerr << "Length: " << (*refIter).RefLength << endl;
\r
102 // --------------------------------------------------------------------------------------------- //
\r
103 // Get the SAM-style header text, if available (not available in the example file shown here)
\r
104 // --------------------------------------------------------------------------------------------- //
\r
106 cerr << "------------------------------------" << endl;
\r
107 cerr << "SAM header text: " << endl;
\r
109 // get (SAM) header text
\r
110 string header = bReader.GetHeaderText();
\r
112 cerr << ( (header.empty()) ? "no header data" : header ) << endl;
\r
113 cerr << "------------------------------------" << endl;
\r
115 // --------------------------------------------------------------------------------------------- //
\r
116 // Here we start accessing alignments
\r
117 // The first method shows how to iterate over all reads in a BamFile
\r
118 // This method will work on any BAM file (sorted/non-sorted, with/without an index)
\r
119 // --------------------------------------------------------------------------------------------- //
\r
121 // Call Rewind() to make sure you're at the 1st alignment in the file
\r
122 // Please note, however, it's not necessary in this case, since file pointer initially set to 1st alignment
\r
123 // but this is probably a good habit to develop to ensure correctness and make your intent obvious in the code
\r
125 cerr << "Getting (up to) first 1000 alignments" << endl;
\r
127 // start at 1st alignment
\r
128 if ( bReader.Rewind() ) {
\r
130 alignmentCount = 0;
\r
131 while ( bReader.GetNextAlignment(bAlignment) && (alignmentCount < 1000) ) {
\r
133 // disregard unmapped alignments
\r
134 if ( bAlignment.IsMapped() ) {
\r
138 cout << "----------------------------" << endl;
\r
139 cout << "Alignment " << alignmentCount << endl;
\r
140 cout << bAlignment.Name << endl;
\r
141 cout << bAlignment.AlignedBases << endl;
\r
142 cout << "Aligned to " << references.at(bAlignment.RefID).RefName << ":" << bAlignment.Position << endl;
\r
144 cout << "Cigar Data: " << endl;
\r
146 vector<CigarOp>::const_iterator cigarIter = bAlignment.CigarData.begin();
\r
147 vector<CigarOp>::const_iterator cigarEnd = bAlignment.CigarData.end();
\r
148 for ( ; cigarIter != cigarEnd; ++cigarIter ) {
\r
149 cout << "Type: " << (*cigarIter).Type << "\tLength: " << (*cigarIter).Length << endl;
\r
152 if(!bAlignment.TagData.empty()) {
\r
153 cout << "Tag data is present." << endl;
\r
155 if(bAlignment.GetReadGroup(readGroup)) {
\r
156 cout << "- read group: " << readGroup << endl;
\r
162 cerr << "Found " << alignmentCount << " alignments." << endl;
\r
163 } else { cerr << "Could not rewind" << endl; }
\r
165 // ---------------------------------------------------------------------------------------------------------- //
\r
166 // You can iterate over each individual alignment that overlaps a specified region
\r
167 // Set the refID & left boundary parameters using Jump(refID, left)
\r
168 // Jump() actually positions the file pointer actually before the left boundary
\r
169 // This ensures that reads beginning before, but overlapping 'left' are included as well
\r
170 // Client is responsible for setting/checking right boundary - see below
\r
171 // ---------------------------------------------------------------------------------------------------------- //
\r
173 cerr << "Jumping to region" << endl;
\r
175 // get refID using a known refName
\r
177 refID = bReader.GetRefID(refName);
\r
178 if (refID == (int)references.size()) {
\r
179 cerr << "Reference " << refName << " not found." << endl;
\r
183 // set left boundary
\r
184 unsigned int leftBound = 500;
\r
186 // set right boundary - either user-specified number to set a discrete region
\r
187 // OR you can query the BamReader for the end of the reference
\r
188 unsigned int rightBound = references.at(refID).RefLength;
\r
191 cerr << "Iterating over alignments on reference: " << refName << " from " << leftBound << " to ref end (" << rightBound << ")" << endl;
\r
193 // set region - specific region on reference
\r
194 if ( bReader.Jump(refID, leftBound) ) {
\r
196 alignmentCount = 0;
\r
197 while ( bReader.GetNextAlignment(bAlignment) && (bAlignment.Position <= rightBound) ) {
\r
199 if ( bAlignment.IsMapped() ) {
\r
203 if ( (alignmentCount % 100000) == 0) { cerr << "Retrieved " << alignmentCount << " so far..." << endl; }
\r
205 cout << "----------------------------" << endl;
\r
206 cout << "Alignment " << alignmentCount << endl;
\r
207 cout << bAlignment.Name << endl;
\r
208 cout << bAlignment.AlignedBases << endl;
\r
209 cout << "Aligned to " << references.at(bAlignment.RefID).RefName << ":" << bAlignment.Position << endl;
\r
213 cerr << "Found " << alignmentCount << " alignments." << endl;
\r
214 } else { cerr << "Could not jump to region specified" << endl; }
\r
216 // ----------------------------------------------------------------------------------------------------- //
\r
217 // You can 'rewind' back to beginning of BAM file at any time
\r
218 // ----------------------------------------------------------------------------------------------------- //
\r
221 cerr << "Rewinding BAM file... then getting first 1000 alignments" << endl;
\r
223 alignmentCount = 0;
\r
224 if (bReader.Rewind() ) {
\r
225 while ( bReader.GetNextAlignment(bAlignment) && (alignmentCount < 1000) ) {
\r
227 // disregard unmapped alignments
\r
228 if ( bAlignment.IsMapped() ) {
\r
232 cout << "----------------------------" << endl;
\r
233 cout << "Alignment " << alignmentCount << endl;
\r
234 cout << bAlignment.Name << endl;
\r
235 cout << bAlignment.AlignedBases << endl;
\r
236 cout << "Aligned to " << references.at(bAlignment.RefID).RefName << ":" << bAlignment.Position << endl;
\r
240 cerr << "Found " << alignmentCount << " alignments." << endl;
\r
241 } else { cerr << "Could not rewind" << endl; }
\r
243 // ---------------------------------------------------------------------- //
\r
244 // Close BamReader object (releases internal header/index data) and exit
\r
245 // ---------------------------------------------------------------------- //
\r
248 cerr << "Closing BAM file: " << bamFilename << endl;
\r
252 cerr << "Exiting..." << endl << endl;
\r