]> git.donarmstrong.com Git - bamtools.git/blobdiff - BamReaderMain.cpp
Major overhaul of BamReader. No longer relying on bgzf.* API. Sped up random-access...
[bamtools.git] / BamReaderMain.cpp
index 5426763a76c20b9fb03ba95a36f135f35284a62a..d568fed2e86f3458647254f12907883283e596ad 100644 (file)
@@ -2,7 +2,7 @@
 \r
 // Derek Barnett\r
 // Marth Lab, Boston College\r
-// Last modified: 6 April 2009\r
+// Last modified: 12 May 2009\r
 \r
 #include "BamReader.h"\r
 \r
@@ -32,221 +32,73 @@ int main(int argc, char* argv[]) {
        const char* bamFilename = argv[1];\r
        const char* bamIndexFilename = argv[2];\r
 \r
-       string refName;\r
-       int refID;\r
-\r
-       int alignmentCount;\r
-\r
-       vector<string> refNames;\r
-\r
+       int alignmentsRead = 0;\r
        BamAlignment bAlignment;\r
-       BamAlignmentVector alignments;\r
-\r
-       RefVector references;\r
-\r
-       // ------------------------------------------------------------------------------------------------------ //\r
-       // Declare BamReader & open BAM file - automatically loads up header and index file (.bai) information\r
-       // ------------------------------------------------------------------------------------------------------ //\r
        \r
-       BamReader bReader(bamFilename, bamIndexFilename);\r
+       BamReader bReader;\r
+       cerr << endl << "Opening BAM file (and index)" << endl << endl;\r
+       bReader.Open(bamFilename, bamIndexFilename);\r
 \r
-       cerr << endl;\r
-       cerr << "Opening BamReader for BAM file: " << bamFilename << " ..... ";\r
-       \r
-       if ( bReader.Open() ) { \r
-               cerr << "ok" << endl; \r
-       }\r
-       else {\r
-               cerr << "error" << endl;\r
-               exit(1);\r
+       string header = bReader.GetHeaderText();\r
+       cerr << "Printing header text..." << endl;\r
+       if ( header.empty() ) {\r
+               cerr << "No header provided." << endl << endl;\r
+       } else {\r
+               cerr << "----------------------" << endl;\r
+               cerr << "Header Text: " << endl;\r
+               cerr << "----------------------" << endl;\r
+               cerr << header << endl << endl;\r
        }\r
-       \r
-       // ------------------------------------------------------------ //\r
-       // Find out how many reference sequences are in BAM file\r
-       // ------------------------------------------------------------ //\r
-       \r
-       references = bReader.GetReferenceData();\r
 \r
-       cerr << endl;\r
-       cerr << "Total number of ref seqs: " << references.size() << endl;\r
-       \r
-       // ---------------------------------------------------------------------------- //\r
-       // Get the names/lengths of all the reference sequences that have alignments\r
-       // ---------------------------------------------------------------------------- //\r
-       \r
-       cerr << endl;\r
-       cerr << "All ref sequences with alignments:" << endl;\r
+       RefVector references = bReader.GetReferenceData();\r
+       cerr << "Printing references..." << endl;\r
+       RefVector::const_iterator refIter = references.begin();\r
+       RefVector::const_iterator refEnd  = references.end();\r
+       for ( ; refIter != refEnd; ++refIter) {\r
+               cerr << "Reference entry: " << endl;\r
+               cerr << (*refIter).RefName << endl;\r
+               cerr << (*refIter).RefLength << endl;\r
+               cerr << "Has alignments? : " << ( ((*refIter).RefHasAlignments) ? "yes" : "no" ) << endl;\r
+       }\r
        cerr << endl;\r
 \r
+       alignmentsRead = 0;\r
+       while ( bReader.GetNextAlignment(bAlignment) && (alignmentsRead <= 10) ) {\r
+               cerr << "Alignment " << alignmentsRead << endl;\r
+               cerr << bAlignment.Name << endl;\r
+               cerr << "Aligned to ref " << bAlignment.RefID << " at position " << bAlignment.Position << endl;\r
+               ++alignmentsRead;\r
+       }\r
 \r
-       if ( !references.empty() ) {\r
-               \r
-               RefVector::iterator refIter = references.begin();\r
-               RefVector::iterator refEnd  = references.end();\r
-               \r
-               refID = 0;\r
-               // iterate over reference names, print to STDERR\r
-               for( ; refIter != refEnd; ++refIter) {\r
-\r
-                       if ( (*refIter).RefHasAlignments ) {\r
-                               cerr << "ID:     " << refID << endl;\r
-                               cerr << "Name:   " << (*refIter).RefName << endl;\r
-                               cerr << "Length: " << (*refIter).RefLength << endl;\r
-                               cerr << endl;\r
+       cerr << "Jumping in BAM file" << endl;\r
+       if ( bReader.Jump(1, 5000) ) {\r
+               cerr << "Jumping seems ok - getting first 10 alignments that start at or after chr2:5000" << endl;\r
+\r
+               alignmentsRead = 0;\r
+               while ( bReader.GetNextAlignment(bAlignment) && (alignmentsRead <= 10) ) {\r
+                       if ( bAlignment.Position >= 5000 ) { \r
+                               cerr << "Alignment " << alignmentsRead << endl;\r
+                               cerr << bAlignment.Name << endl;\r
+                               cerr << "Aligned to ref " << bAlignment.RefID << " at position " << bAlignment.Position << endl;\r
+                               ++alignmentsRead;\r
                        }\r
-\r
-                       ++refID;\r
                }\r
        }\r
 \r
-       // --------------------------------------------------------------------------------------------- //\r
-       // Get the SAM-style header text, if available (not available in the example file shown here)\r
-       // --------------------------------------------------------------------------------------------- //     \r
-       \r
-       cerr << "------------------------------------" << endl;\r
-       cerr << "SAM header text: " << endl;\r
-       \r
-       // get (SAM) header text\r
-       string header = bReader.GetHeaderText();\r
-       \r
-       cerr << ( (header.empty()) ? "no header data" : header ) << endl;\r
-       cerr << "------------------------------------" << endl;\r
-\r
-       // --------------------------------------------------------------------------------------------- //\r
-       // Here we start accessing alignments\r
-       // The first method shows how to iterate over all reads in a BamFile\r
-       // This method will work on any BAM file (sorted/non-sorted, with/without an index)\r
-       // --------------------------------------------------------------------------------------------- //\r
-\r
-       // Call Rewind() to make sure you're at the 1st alignment in the file\r
-       // Please note, however, it's not necessary in this case, since file pointer initially set to 1st alignment\r
-       // but this is probably a good habit to develop to ensure correctness and make your intent obvious in the code\r
-\r
-       cerr << "Getting (up to) first 1000 alignments" << endl;\r
-\r
-       // start at 1st alignment\r
+       cerr << "Rewinding BAM file" << endl;\r
        if ( bReader.Rewind() ) {\r
+               cerr << "Rewind seems to be ok - getting first 10 alignments" << endl;\r
                \r
-               alignmentCount = 0;\r
-               while ( bReader.GetNextAlignment(bAlignment) && (alignmentCount < 1000) ) {\r
-               \r
-                       // disregard unmapped alignments\r
-                       if ( bAlignment.IsMapped() ) {\r
-\r
-                               ++alignmentCount;\r
-                               \r
-                               cout << "----------------------------" << endl;\r
-                               cout << "Alignment " << alignmentCount << endl;\r
-                               cout << bAlignment.Name << endl;\r
-                               cout << bAlignment.AlignedBases << endl;\r
-                               cout << "Aligned to " << references.at(bAlignment.RefID).RefName << ":" << bAlignment.Position << endl;\r
-                               \r
-                               cout << "Cigar Data: " << endl;\r
-\r
-                               vector<CigarOp>::const_iterator cigarIter = bAlignment.CigarData.begin();\r
-                               vector<CigarOp>::const_iterator cigarEnd  = bAlignment.CigarData.end();\r
-                               for ( ; cigarIter != cigarEnd; ++cigarIter ) {\r
-                                       cout << "Type: " << (*cigarIter).Type << "\tLength: " << (*cigarIter).Length << endl;\r
-                               }\r
-\r
-                               if(!bAlignment.TagData.empty()) {\r
-                                 cout << "Tag data is present." << endl;\r
-                                 string readGroup;\r
-                                 if(bAlignment.GetReadGroup(readGroup)) {\r
-                                   cout << "- read group: " << readGroup << endl;\r
-                                 }\r
-                               }\r
-                       }\r
+               alignmentsRead = 0;\r
+               while ( bReader.GetNextAlignment(bAlignment) && (alignmentsRead <= 10) ) {\r
+                       cerr << "Alignment " << alignmentsRead << endl;\r
+                       cerr << bAlignment.Name << endl;\r
+                       cerr << "Aligned to ref " << bAlignment.RefID << " at position " << bAlignment.Position << endl;\r
+                       ++alignmentsRead;\r
                }\r
-\r
-               cerr << "Found " << alignmentCount << " alignments." << endl;\r
-       } else { cerr << "Could not rewind" << endl; }\r
-\r
-       // ---------------------------------------------------------------------------------------------------------- //\r
-       // You can iterate over each individual alignment that overlaps a specified region\r
-       // Set the refID & left boundary parameters using Jump(refID, left)\r
-       // Jump() actually positions the file pointer actually before the left boundary\r
-       // This ensures that reads beginning before, but overlapping 'left' are included as well \r
-       // Client is responsible for setting/checking right boundary - see below\r
-       // ---------------------------------------------------------------------------------------------------------- //\r
-/*\r
-       cerr << "Jumping to region" << endl;\r
-\r
-       // get refID using a known refName\r
-       refName = "seq1";\r
-       refID = bReader.GetRefID(refName);\r
-       if (refID == (int)references.size()) { \r
-               cerr << "Reference " << refName << " not found." << endl;\r
-               exit(1);\r
        }\r
 \r
-       // set left boundary\r
-       unsigned int leftBound = 500;\r
-\r
-       // set right boundary - either user-specified number to set a discrete region\r
-       //                      OR you can query the BamReader for the end of the reference\r
-       unsigned int rightBound = references.at(refID).RefLength;\r
-\r
-       cerr << endl;\r
-       cerr << "Iterating over alignments on reference: " << refName << " from " << leftBound << " to ref end (" << rightBound << ")" << endl;\r
-\r
-       // set region - specific region on reference\r
-       if ( bReader.Jump(refID, leftBound) ) { \r
-\r
-               alignmentCount = 0;\r
-               while ( bReader.GetNextAlignment(bAlignment) && (bAlignment.Position <= rightBound) ) {\r
-               \r
-                       if ( bAlignment.IsMapped() ) {\r
-\r
-                               ++alignmentCount;\r
-                       \r
-                               if ( (alignmentCount % 100000) == 0) { cerr << "Retrieved " << alignmentCount << " so far..." << endl; }\r
-                       \r
-                               cout << "----------------------------" << endl;\r
-                               cout << "Alignment " << alignmentCount << endl;\r
-                               cout << bAlignment.Name << endl;\r
-                               cout << bAlignment.AlignedBases << endl;\r
-                               cout << "Aligned to " << references.at(bAlignment.RefID).RefName << ":" << bAlignment.Position << endl;\r
-                       }\r
-               }\r
-\r
-               cerr << "Found " << alignmentCount << " alignments." << endl; \r
-       } else { cerr << "Could not jump to region specified" << endl; }\r
-\r
-       // ----------------------------------------------------------------------------------------------------- //\r
-       // You can 'rewind' back to beginning of BAM file at any time \r
-       // ----------------------------------------------------------------------------------------------------- //\r
-\r
-       cerr << endl;\r
-       cerr << "Rewinding BAM file... then getting first 1000 alignments" << endl;\r
-\r
-       alignmentCount = 0;\r
-       if (bReader.Rewind() ) {\r
-               while ( bReader.GetNextAlignment(bAlignment) && (alignmentCount < 1000) ) {\r
-               \r
-                       // disregard unmapped alignments\r
-                       if ( bAlignment.IsMapped() ) {\r
-\r
-                               ++alignmentCount;\r
-                       \r
-                               cout << "----------------------------" << endl;\r
-                               cout << "Alignment " << alignmentCount << endl;\r
-                               cout << bAlignment.Name << endl;\r
-                               cout << bAlignment.AlignedBases << endl;\r
-                               cout << "Aligned to " << references.at(bAlignment.RefID).RefName << ":" << bAlignment.Position << endl;\r
-                       }\r
-               }\r
-\r
-               cerr << "Found " << alignmentCount << " alignments." << endl;\r
-       } else { cerr << "Could not rewind" << endl; }\r
-*/\r
-       // ---------------------------------------------------------------------- //\r
-       // Close BamReader object (releases internal header/index data) and exit\r
-       // ---------------------------------------------------------------------- //\r
-       \r
-       cerr << endl;\r
-       cerr << "Closing BAM file: " << bamFilename << endl;\r
-       \r
+       cerr << "Closing BAM file..." << endl << endl;\r
        bReader.Close();\r
 \r
        cerr << "Exiting..." << endl << endl;\r