\r
// Derek Barnett\r
// Marth Lab, Boston College\r
-// Last modified: 6 April 2009\r
+// Last modified: 12 May 2009\r
\r
#include "BamReader.h"\r
\r
const char* bamFilename = argv[1];\r
const char* bamIndexFilename = argv[2];\r
\r
- string refName;\r
- int refID;\r
-\r
- int alignmentCount;\r
-\r
- vector<string> refNames;\r
-\r
+ int alignmentsRead = 0;\r
BamAlignment bAlignment;\r
- BamAlignmentVector alignments;\r
-\r
- RefVector references;\r
-\r
- // ------------------------------------------------------------------------------------------------------ //\r
- // Declare BamReader & open BAM file - automatically loads up header and index file (.bai) information\r
- // ------------------------------------------------------------------------------------------------------ //\r
\r
- BamReader bReader(bamFilename, bamIndexFilename);\r
+ BamReader bReader;\r
+ cerr << endl << "Opening BAM file (and index)" << endl << endl;\r
+ bReader.Open(bamFilename, bamIndexFilename);\r
\r
- cerr << endl;\r
- cerr << "Opening BamReader for BAM file: " << bamFilename << " ..... ";\r
- \r
- if ( bReader.Open() ) { \r
- cerr << "ok" << endl; \r
- }\r
- else {\r
- cerr << "error" << endl;\r
- exit(1);\r
+ string header = bReader.GetHeaderText();\r
+ cerr << "Printing header text..." << endl;\r
+ if ( header.empty() ) {\r
+ cerr << "No header provided." << endl << endl;\r
+ } else {\r
+ cerr << "----------------------" << endl;\r
+ cerr << "Header Text: " << endl;\r
+ cerr << "----------------------" << endl;\r
+ cerr << header << endl << endl;\r
}\r
- \r
- // ------------------------------------------------------------ //\r
- // Find out how many reference sequences are in BAM file\r
- // ------------------------------------------------------------ //\r
- \r
- references = bReader.GetReferenceData();\r
\r
- cerr << endl;\r
- cerr << "Total number of ref seqs: " << references.size() << endl;\r
- \r
- // ---------------------------------------------------------------------------- //\r
- // Get the names/lengths of all the reference sequences that have alignments\r
- // ---------------------------------------------------------------------------- //\r
- \r
- cerr << endl;\r
- cerr << "All ref sequences with alignments:" << endl;\r
+ RefVector references = bReader.GetReferenceData();\r
+ cerr << "Printing references..." << endl;\r
+ RefVector::const_iterator refIter = references.begin();\r
+ RefVector::const_iterator refEnd = references.end();\r
+ for ( ; refIter != refEnd; ++refIter) {\r
+ cerr << "Reference entry: " << endl;\r
+ cerr << (*refIter).RefName << endl;\r
+ cerr << (*refIter).RefLength << endl;\r
+ cerr << "Has alignments? : " << ( ((*refIter).RefHasAlignments) ? "yes" : "no" ) << endl;\r
+ }\r
cerr << endl;\r
\r
+ alignmentsRead = 0;\r
+ while ( bReader.GetNextAlignment(bAlignment) && (alignmentsRead <= 10) ) {\r
+ cerr << "Alignment " << alignmentsRead << endl;\r
+ cerr << bAlignment.Name << endl;\r
+ cerr << "Aligned to ref " << bAlignment.RefID << " at position " << bAlignment.Position << endl;\r
+ ++alignmentsRead;\r
+ }\r
\r
- if ( !references.empty() ) {\r
- \r
- RefVector::iterator refIter = references.begin();\r
- RefVector::iterator refEnd = references.end();\r
- \r
- refID = 0;\r
- // iterate over reference names, print to STDERR\r
- for( ; refIter != refEnd; ++refIter) {\r
-\r
- if ( (*refIter).RefHasAlignments ) {\r
- cerr << "ID: " << refID << endl;\r
- cerr << "Name: " << (*refIter).RefName << endl;\r
- cerr << "Length: " << (*refIter).RefLength << endl;\r
- cerr << endl;\r
+ cerr << "Jumping in BAM file" << endl;\r
+ if ( bReader.Jump(1, 5000) ) {\r
+ cerr << "Jumping seems ok - getting first 10 alignments that start at or after chr2:5000" << endl;\r
+\r
+ alignmentsRead = 0;\r
+ while ( bReader.GetNextAlignment(bAlignment) && (alignmentsRead <= 10) ) {\r
+ if ( bAlignment.Position >= 5000 ) { \r
+ cerr << "Alignment " << alignmentsRead << endl;\r
+ cerr << bAlignment.Name << endl;\r
+ cerr << "Aligned to ref " << bAlignment.RefID << " at position " << bAlignment.Position << endl;\r
+ ++alignmentsRead;\r
}\r
-\r
- ++refID;\r
}\r
}\r
\r
- // --------------------------------------------------------------------------------------------- //\r
- // Get the SAM-style header text, if available (not available in the example file shown here)\r
- // --------------------------------------------------------------------------------------------- // \r
- \r
- cerr << "------------------------------------" << endl;\r
- cerr << "SAM header text: " << endl;\r
- \r
- // get (SAM) header text\r
- string header = bReader.GetHeaderText();\r
- \r
- cerr << ( (header.empty()) ? "no header data" : header ) << endl;\r
- cerr << "------------------------------------" << endl;\r
-\r
- // --------------------------------------------------------------------------------------------- //\r
- // Here we start accessing alignments\r
- // The first method shows how to iterate over all reads in a BamFile\r
- // This method will work on any BAM file (sorted/non-sorted, with/without an index)\r
- // --------------------------------------------------------------------------------------------- //\r
-\r
- // Call Rewind() to make sure you're at the 1st alignment in the file\r
- // Please note, however, it's not necessary in this case, since file pointer initially set to 1st alignment\r
- // but this is probably a good habit to develop to ensure correctness and make your intent obvious in the code\r
-\r
- cerr << "Getting (up to) first 1000 alignments" << endl;\r
-\r
- // start at 1st alignment\r
+ cerr << "Rewinding BAM file" << endl;\r
if ( bReader.Rewind() ) {\r
+ cerr << "Rewind seems to be ok - getting first 10 alignments" << endl;\r
\r
- alignmentCount = 0;\r
- while ( bReader.GetNextAlignment(bAlignment) && (alignmentCount < 1000) ) {\r
- \r
- // disregard unmapped alignments\r
- if ( bAlignment.IsMapped() ) {\r
-\r
- ++alignmentCount;\r
- \r
- cout << "----------------------------" << endl;\r
- cout << "Alignment " << alignmentCount << endl;\r
- cout << bAlignment.Name << endl;\r
- cout << bAlignment.AlignedBases << endl;\r
- cout << "Aligned to " << references.at(bAlignment.RefID).RefName << ":" << bAlignment.Position << endl;\r
- \r
- cout << "Cigar Data: " << endl;\r
-\r
- vector<CigarOp>::const_iterator cigarIter = bAlignment.CigarData.begin();\r
- vector<CigarOp>::const_iterator cigarEnd = bAlignment.CigarData.end();\r
- for ( ; cigarIter != cigarEnd; ++cigarIter ) {\r
- cout << "Type: " << (*cigarIter).Type << "\tLength: " << (*cigarIter).Length << endl;\r
- }\r
-\r
- if(!bAlignment.TagData.empty()) {\r
- cout << "Tag data is present." << endl;\r
- string readGroup;\r
- if(bAlignment.GetReadGroup(readGroup)) {\r
- cout << "- read group: " << readGroup << endl;\r
- }\r
- }\r
- }\r
+ alignmentsRead = 0;\r
+ while ( bReader.GetNextAlignment(bAlignment) && (alignmentsRead <= 10) ) {\r
+ cerr << "Alignment " << alignmentsRead << endl;\r
+ cerr << bAlignment.Name << endl;\r
+ cerr << "Aligned to ref " << bAlignment.RefID << " at position " << bAlignment.Position << endl;\r
+ ++alignmentsRead;\r
}\r
-\r
- cerr << "Found " << alignmentCount << " alignments." << endl;\r
- } else { cerr << "Could not rewind" << endl; }\r
-\r
- // ---------------------------------------------------------------------------------------------------------- //\r
- // You can iterate over each individual alignment that overlaps a specified region\r
- // Set the refID & left boundary parameters using Jump(refID, left)\r
- // Jump() actually positions the file pointer actually before the left boundary\r
- // This ensures that reads beginning before, but overlapping 'left' are included as well \r
- // Client is responsible for setting/checking right boundary - see below\r
- // ---------------------------------------------------------------------------------------------------------- //\r
-/*\r
- cerr << "Jumping to region" << endl;\r
-\r
- // get refID using a known refName\r
- refName = "seq1";\r
- refID = bReader.GetRefID(refName);\r
- if (refID == (int)references.size()) { \r
- cerr << "Reference " << refName << " not found." << endl;\r
- exit(1);\r
}\r
\r
- // set left boundary\r
- unsigned int leftBound = 500;\r
-\r
- // set right boundary - either user-specified number to set a discrete region\r
- // OR you can query the BamReader for the end of the reference\r
- unsigned int rightBound = references.at(refID).RefLength;\r
-\r
- cerr << endl;\r
- cerr << "Iterating over alignments on reference: " << refName << " from " << leftBound << " to ref end (" << rightBound << ")" << endl;\r
-\r
- // set region - specific region on reference\r
- if ( bReader.Jump(refID, leftBound) ) { \r
-\r
- alignmentCount = 0;\r
- while ( bReader.GetNextAlignment(bAlignment) && (bAlignment.Position <= rightBound) ) {\r
- \r
- if ( bAlignment.IsMapped() ) {\r
-\r
- ++alignmentCount;\r
- \r
- if ( (alignmentCount % 100000) == 0) { cerr << "Retrieved " << alignmentCount << " so far..." << endl; }\r
- \r
- cout << "----------------------------" << endl;\r
- cout << "Alignment " << alignmentCount << endl;\r
- cout << bAlignment.Name << endl;\r
- cout << bAlignment.AlignedBases << endl;\r
- cout << "Aligned to " << references.at(bAlignment.RefID).RefName << ":" << bAlignment.Position << endl;\r
- }\r
- }\r
-\r
- cerr << "Found " << alignmentCount << " alignments." << endl; \r
- } else { cerr << "Could not jump to region specified" << endl; }\r
-\r
- // ----------------------------------------------------------------------------------------------------- //\r
- // You can 'rewind' back to beginning of BAM file at any time \r
- // ----------------------------------------------------------------------------------------------------- //\r
-\r
- cerr << endl;\r
- cerr << "Rewinding BAM file... then getting first 1000 alignments" << endl;\r
-\r
- alignmentCount = 0;\r
- if (bReader.Rewind() ) {\r
- while ( bReader.GetNextAlignment(bAlignment) && (alignmentCount < 1000) ) {\r
- \r
- // disregard unmapped alignments\r
- if ( bAlignment.IsMapped() ) {\r
-\r
- ++alignmentCount;\r
- \r
- cout << "----------------------------" << endl;\r
- cout << "Alignment " << alignmentCount << endl;\r
- cout << bAlignment.Name << endl;\r
- cout << bAlignment.AlignedBases << endl;\r
- cout << "Aligned to " << references.at(bAlignment.RefID).RefName << ":" << bAlignment.Position << endl;\r
- }\r
- }\r
-\r
- cerr << "Found " << alignmentCount << " alignments." << endl;\r
- } else { cerr << "Could not rewind" << endl; }\r
-*/\r
- // ---------------------------------------------------------------------- //\r
- // Close BamReader object (releases internal header/index data) and exit\r
- // ---------------------------------------------------------------------- //\r
- \r
- cerr << endl;\r
- cerr << "Closing BAM file: " << bamFilename << endl;\r
- \r
+ cerr << "Closing BAM file..." << endl << endl;\r
bReader.Close();\r
\r
cerr << "Exiting..." << endl << endl;\r