// Marth Lab, Department of Biology, Boston College\r
// All rights reserved.\r
// ---------------------------------------------------------------------------\r
-// Last modified: 14 April 2010 (DB)\r
+// Last modified: 8 June 2010 (DB)\r
// ---------------------------------------------------------------------------\r
// Provides the basic constants, data structures, etc. for using BAM files\r
// ***************************************************************************\r
// ----------------------------------------------------------------\r
// Auxiliary data structs & typedefs\r
\r
+struct BamAlignmentSupportData {\r
+ \r
+ // data members\r
+ std::string AllCharData;\r
+ uint32_t BlockLength;\r
+ uint32_t NumCigarOperations;\r
+ uint32_t QueryNameLength;\r
+ uint32_t QuerySequenceLength;\r
+ \r
+ // constructor\r
+ BamAlignmentSupportData(void)\r
+ : BlockLength(0)\r
+ , NumCigarOperations(0)\r
+ , QueryNameLength(0)\r
+ , QuerySequenceLength(0)\r
+ { }\r
+};\r
+\r
struct CigarOp {\r
+ \r
+ // data members\r
char Type; // Operation type (MIDNSHP)\r
uint32_t Length; // Operation length (number of bases)\r
+ \r
+ // constructor\r
+ CigarOp(const char type = '\0', \r
+ const uint32_t length = 0) \r
+ : Type(type)\r
+ , Length(length) \r
+ { }\r
};\r
\r
struct RefData {\r
// Marth Lab, Department of Biology, Boston College\r
// All rights reserved.\r
// ---------------------------------------------------------------------------\r
-// Last modified: 14 April 2010 (DB)\r
+// Last modified: 8 June 2010 (DB)\r
// ---------------------------------------------------------------------------\r
// Uses BGZF routines were adapted from the bgzf.c code developed at the Broad\r
// Institute.\r
using namespace BamTools;\r
using namespace std;\r
\r
-namespace BamTools {\r
- struct BamAlignmentSupportData {\r
- string AllCharData;\r
- uint32_t BlockLength;\r
- uint32_t NumCigarOperations;\r
- uint32_t QueryNameLength;\r
- uint32_t QuerySequenceLength;\r
- };\r
-} // namespace BamTools\r
-\r
struct BamReader::BamReaderPrivate {\r
\r
// -------------------------------\r
\r
// access alignment data\r
bool GetNextAlignment(BamAlignment& bAlignment);\r
+ bool GetNextAlignmentCore(BamAlignment& bAlignment, BamAlignmentSupportData& supportData);\r
\r
// access auxiliary data\r
int GetReferenceID(const string& refName) const;\r
\r
// access alignment data\r
bool BamReader::GetNextAlignment(BamAlignment& bAlignment) { return d->GetNextAlignment(bAlignment); }\r
+bool BamReader::GetNextAlignmentCore(BamAlignment& bAlignment, BamAlignmentSupportData& supportData) { return d->GetNextAlignmentCore(bAlignment, supportData); }\r
\r
// access auxiliary data\r
const string BamReader::GetHeaderText(void) const { return d->HeaderText; }\r
// load next alignment until region overlap is found\r
while ( !IsOverlap(bAlignment) ) {\r
// if no valid alignment available (likely EOF) return failure\r
- if ( !LoadNextAlignment(bAlignment, supportData) ) { return false; }\r
+ if ( !LoadNextAlignment(bAlignment, supportData) ) return false;\r
}\r
\r
// return success (alignment found that overlaps region)\r
}\r
\r
// no valid alignment\r
- else { return false; }\r
+ else \r
+ return false;\r
+}\r
+\r
+// retrieves next available alignment core data (returns success/fail)\r
+// ** DOES NOT parse any character data (bases, qualities, tag data)\r
+// these can be accessed, if necessary, from the supportData \r
+// useful for operations requiring ONLY positional or other alignment-related information\r
+bool BamReader::BamReaderPrivate::GetNextAlignmentCore(BamAlignment& bAlignment, BamAlignmentSupportData& supportData) {\r
+\r
+ // if valid alignment available\r
+ if ( LoadNextAlignment(bAlignment, supportData) ) {\r
+\r
+ // if region not specified, return success\r
+ if ( !IsRegionSpecified ) return true;\r
+\r
+ // load next alignment until region overlap is found\r
+ while ( !IsOverlap(bAlignment) ) {\r
+ // if no valid alignment available (likely EOF) return failure\r
+ if ( !LoadNextAlignment(bAlignment, supportData) ) return false;\r
+ }\r
+\r
+ // return success (alignment found that overlaps region)\r
+ return true;\r
+ }\r
+\r
+ // no valid alignment\r
+ else\r
+ return false;\r
}\r
\r
// calculate closest indexed file offset for region specified\r
// Marth Lab, Department of Biology, Boston College\r
// All rights reserved.\r
// ---------------------------------------------------------------------------\r
-// Last modified: 30 March 2010 (DB)\r
+// Last modified: 8 June 2010 (DB)\r
// ---------------------------------------------------------------------------\r
// Uses BGZF routines were adapted from the bgzf.c code developed at the Broad\r
// Institute.\r
\r
// retrieves next available alignment (returns success/fail)\r
bool GetNextAlignment(BamAlignment& bAlignment);\r
+ // retrieves next available alignment core data (returns success/fail)\r
+ // ** DOES NOT parse any character data (bases, qualities, tag data)\r
+ // these can be accessed, if necessary, from the supportData \r
+ // useful for operations requiring ONLY positional or other alignment-related information\r
+ bool GetNextAlignmentCore(BamAlignment& bAlignment, BamAlignmentSupportData& supportData);\r
\r
// ----------------------\r
// access auxiliary data\r
\r
// "public" interface\r
void Close(void);\r
- void Open(const std::string& filename, const std::string& samHeader, const BamTools::RefVector& referenceSequences);\r
- void SaveAlignment(const BamTools::BamAlignment& al);\r
+ void Open(const string& filename, const string& samHeader, const RefVector& referenceSequences);\r
+ void SaveAlignment(const BamAlignment& al);\r
+ void SaveAlignment(const BamAlignment& al, const BamAlignmentSupportData& supportData);\r
\r
// internal methods\r
- void CreatePackedCigar(const std::vector<CigarOp>& cigarOperations, std::string& packedCigar);\r
- void EncodeQuerySequence(const std::string& query, std::string& encodedQuery);\r
+ void CreatePackedCigar(const vector<CigarOp>& cigarOperations, string& packedCigar);\r
+ void EncodeQuerySequence(const string& query, string& encodedQuery);\r
};\r
\r
// -----------------------------------------------------\r
}\r
\r
// closes the alignment archive\r
-void BamWriter::Close(void) {\r
- d->Close();\r
+void BamWriter::Close(void) { \r
+ d->Close(); \r
}\r
\r
// opens the alignment archive\r
}\r
\r
// saves the alignment to the alignment archive\r
-void BamWriter::SaveAlignment(const BamAlignment& al) {\r
+void BamWriter::SaveAlignment(const BamAlignment& al) { \r
d->SaveAlignment(al);\r
}\r
\r
+void BamWriter::SaveAlignment(const BamAlignment& al, const BamAlignmentSupportData& supportData) {\r
+ d->SaveAlignment(al, supportData);\r
+}\r
+\r
// -----------------------------------------------------\r
// BamWriterPrivate implementation\r
// -----------------------------------------------------\r
mBGZF.Write(al.TagData.data(), tagDataLength);\r
}\r
}\r
+\r
+void BamWriter::BamWriterPrivate::SaveAlignment(const BamAlignment& al, const BamAlignmentSupportData& supportData) {\r
+ \r
+ // assign the BAM core data\r
+ uint32_t buffer[8];\r
+ buffer[0] = al.RefID;\r
+ buffer[1] = al.Position;\r
+ buffer[2] = (al.Bin << 16) | (al.MapQuality << 8) | supportData.QueryNameLength;\r
+ buffer[3] = (al.AlignmentFlag << 16) | supportData.NumCigarOperations;\r
+ buffer[4] = supportData.QuerySequenceLength;\r
+ buffer[5] = al.MateRefID;\r
+ buffer[6] = al.MatePosition;\r
+ buffer[7] = al.InsertSize;\r
+\r
+ // write the block size\r
+ unsigned int blockSize = supportData.BlockLength;\r
+ if ( IsBigEndian ) { SwapEndian_32(blockSize); }\r
+ mBGZF.Write((char*)&blockSize, BT_SIZEOF_INT);\r
+\r
+ // write the BAM core\r
+ if ( IsBigEndian ) { \r
+ for ( int i = 0; i < 8; ++i ) { \r
+ SwapEndian_32(buffer[i]); \r
+ } \r
+ }\r
+ mBGZF.Write((char*)&buffer, BAM_CORE_SIZE);\r
+\r
+ // write the raw char data\r
+ mBGZF.Write((char*)supportData.AllCharData.data(), supportData.BlockLength-BAM_CORE_SIZE);\r
+}\r
+\r
// ***************************************************************************\r
-// BamWriter.h (c) 2009 Michael Strömberg, Derek Barnett\r
+// BamWriter.h (c) 2009 Michael Str�mberg, Derek Barnett\r
// Marth Lab, Department of Biology, Boston College\r
// All rights reserved.\r
// ---------------------------------------------------------------------------\r
void Open(const std::string& filename, const std::string& samHeader, const BamTools::RefVector& referenceSequences);\r
// saves the alignment to the alignment archive\r
void SaveAlignment(const BamTools::BamAlignment& al);\r
+ // saves the (partial) alignment, using support data, to the alignment archive\r
+ void SaveAlignment(const BamTools::BamAlignment& al, const BamTools::BamAlignmentSupportData& supportData);\r
\r
// private implementation\r
private:\r