From: Derek Date: Wed, 9 Jun 2010 03:29:45 +0000 (-0400) Subject: Added GetNextAlignmentCore() to BamReader API as well as a corresponding SaveAlignmen... X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=c8518cde9e4def44b657b34eee23096bb7bdcb00;p=bamtools.git Added GetNextAlignmentCore() to BamReader API as well as a corresponding SaveAlignment() in BamWriter. Both utilitze the BamAlignmentSupportData structure which contains the raw character data and lengths, and which has been bumped to BamAux.h. Exposing these methods should allow for quicker read/writes for tools that are only concerned with alignment/positional data, not the actual sequences. --- diff --git a/BamAux.h b/BamAux.h index 3d14a46..4659249 100644 --- a/BamAux.h +++ b/BamAux.h @@ -3,7 +3,7 @@ // Marth Lab, Department of Biology, Boston College // All rights reserved. // --------------------------------------------------------------------------- -// Last modified: 14 April 2010 (DB) +// Last modified: 8 June 2010 (DB) // --------------------------------------------------------------------------- // Provides the basic constants, data structures, etc. for using BAM files // *************************************************************************** @@ -154,9 +154,36 @@ struct BamAlignment { // ---------------------------------------------------------------- // Auxiliary data structs & typedefs +struct BamAlignmentSupportData { + + // data members + std::string AllCharData; + uint32_t BlockLength; + uint32_t NumCigarOperations; + uint32_t QueryNameLength; + uint32_t QuerySequenceLength; + + // constructor + BamAlignmentSupportData(void) + : BlockLength(0) + , NumCigarOperations(0) + , QueryNameLength(0) + , QuerySequenceLength(0) + { } +}; + struct CigarOp { + + // data members char Type; // Operation type (MIDNSHP) uint32_t Length; // Operation length (number of bases) + + // constructor + CigarOp(const char type = '\0', + const uint32_t length = 0) + : Type(type) + , Length(length) + { } }; struct RefData { diff --git a/BamReader.cpp b/BamReader.cpp index 7213b23..53c32e9 100644 --- a/BamReader.cpp +++ b/BamReader.cpp @@ -3,7 +3,7 @@ // Marth Lab, Department of Biology, Boston College // All rights reserved. // --------------------------------------------------------------------------- -// Last modified: 14 April 2010 (DB) +// Last modified: 8 June 2010 (DB) // --------------------------------------------------------------------------- // Uses BGZF routines were adapted from the bgzf.c code developed at the Broad // Institute. @@ -23,16 +23,6 @@ using namespace BamTools; using namespace std; -namespace BamTools { - struct BamAlignmentSupportData { - string AllCharData; - uint32_t BlockLength; - uint32_t NumCigarOperations; - uint32_t QueryNameLength; - uint32_t QuerySequenceLength; - }; -} // namespace BamTools - struct BamReader::BamReaderPrivate { // ------------------------------- @@ -79,6 +69,7 @@ struct BamReader::BamReaderPrivate { // access alignment data bool GetNextAlignment(BamAlignment& bAlignment); + bool GetNextAlignmentCore(BamAlignment& bAlignment, BamAlignmentSupportData& supportData); // access auxiliary data int GetReferenceID(const string& refName) const; @@ -148,6 +139,7 @@ bool BamReader::Rewind(void) { return d->Rewind(); } // access alignment data bool BamReader::GetNextAlignment(BamAlignment& bAlignment) { return d->GetNextAlignment(bAlignment); } +bool BamReader::GetNextAlignmentCore(BamAlignment& bAlignment, BamAlignmentSupportData& supportData) { return d->GetNextAlignmentCore(bAlignment, supportData); } // access auxiliary data const string BamReader::GetHeaderText(void) const { return d->HeaderText; } @@ -526,7 +518,7 @@ bool BamReader::BamReaderPrivate::GetNextAlignment(BamAlignment& bAlignment) { // load next alignment until region overlap is found while ( !IsOverlap(bAlignment) ) { // if no valid alignment available (likely EOF) return failure - if ( !LoadNextAlignment(bAlignment, supportData) ) { return false; } + if ( !LoadNextAlignment(bAlignment, supportData) ) return false; } // return success (alignment found that overlaps region) @@ -535,7 +527,35 @@ bool BamReader::BamReaderPrivate::GetNextAlignment(BamAlignment& bAlignment) { } // no valid alignment - else { return false; } + else + return false; +} + +// retrieves next available alignment core data (returns success/fail) +// ** DOES NOT parse any character data (bases, qualities, tag data) +// these can be accessed, if necessary, from the supportData +// useful for operations requiring ONLY positional or other alignment-related information +bool BamReader::BamReaderPrivate::GetNextAlignmentCore(BamAlignment& bAlignment, BamAlignmentSupportData& supportData) { + + // if valid alignment available + if ( LoadNextAlignment(bAlignment, supportData) ) { + + // if region not specified, return success + if ( !IsRegionSpecified ) return true; + + // load next alignment until region overlap is found + while ( !IsOverlap(bAlignment) ) { + // if no valid alignment available (likely EOF) return failure + if ( !LoadNextAlignment(bAlignment, supportData) ) return false; + } + + // return success (alignment found that overlaps region) + return true; + } + + // no valid alignment + else + return false; } // calculate closest indexed file offset for region specified diff --git a/BamReader.h b/BamReader.h index fe28abc..88cc74a 100644 --- a/BamReader.h +++ b/BamReader.h @@ -3,7 +3,7 @@ // Marth Lab, Department of Biology, Boston College // All rights reserved. // --------------------------------------------------------------------------- -// Last modified: 30 March 2010 (DB) +// Last modified: 8 June 2010 (DB) // --------------------------------------------------------------------------- // Uses BGZF routines were adapted from the bgzf.c code developed at the Broad // Institute. @@ -51,6 +51,11 @@ class BamReader { // retrieves next available alignment (returns success/fail) bool GetNextAlignment(BamAlignment& bAlignment); + // retrieves next available alignment core data (returns success/fail) + // ** DOES NOT parse any character data (bases, qualities, tag data) + // these can be accessed, if necessary, from the supportData + // useful for operations requiring ONLY positional or other alignment-related information + bool GetNextAlignmentCore(BamAlignment& bAlignment, BamAlignmentSupportData& supportData); // ---------------------- // access auxiliary data diff --git a/BamReader.o b/BamReader.o new file mode 100644 index 0000000..44fc7d9 Binary files /dev/null and b/BamReader.o differ diff --git a/BamWriter.cpp b/BamWriter.cpp index 2cd2742..9d18fae 100644 --- a/BamWriter.cpp +++ b/BamWriter.cpp @@ -35,12 +35,13 @@ struct BamWriter::BamWriterPrivate { // "public" interface void Close(void); - void Open(const std::string& filename, const std::string& samHeader, const BamTools::RefVector& referenceSequences); - void SaveAlignment(const BamTools::BamAlignment& al); + void Open(const string& filename, const string& samHeader, const RefVector& referenceSequences); + void SaveAlignment(const BamAlignment& al); + void SaveAlignment(const BamAlignment& al, const BamAlignmentSupportData& supportData); // internal methods - void CreatePackedCigar(const std::vector& cigarOperations, std::string& packedCigar); - void EncodeQuerySequence(const std::string& query, std::string& encodedQuery); + void CreatePackedCigar(const vector& cigarOperations, string& packedCigar); + void EncodeQuerySequence(const string& query, string& encodedQuery); }; // ----------------------------------------------------- @@ -59,8 +60,8 @@ BamWriter::~BamWriter(void) { } // closes the alignment archive -void BamWriter::Close(void) { - d->Close(); +void BamWriter::Close(void) { + d->Close(); } // opens the alignment archive @@ -69,10 +70,14 @@ void BamWriter::Open(const string& filename, const string& samHeader, const RefV } // saves the alignment to the alignment archive -void BamWriter::SaveAlignment(const BamAlignment& al) { +void BamWriter::SaveAlignment(const BamAlignment& al) { d->SaveAlignment(al); } +void BamWriter::SaveAlignment(const BamAlignment& al, const BamAlignmentSupportData& supportData) { + d->SaveAlignment(al, supportData); +} + // ----------------------------------------------------- // BamWriterPrivate implementation // ----------------------------------------------------- @@ -380,3 +385,34 @@ void BamWriter::BamWriterPrivate::SaveAlignment(const BamAlignment& al) { mBGZF.Write(al.TagData.data(), tagDataLength); } } + +void BamWriter::BamWriterPrivate::SaveAlignment(const BamAlignment& al, const BamAlignmentSupportData& supportData) { + + // assign the BAM core data + uint32_t buffer[8]; + buffer[0] = al.RefID; + buffer[1] = al.Position; + buffer[2] = (al.Bin << 16) | (al.MapQuality << 8) | supportData.QueryNameLength; + buffer[3] = (al.AlignmentFlag << 16) | supportData.NumCigarOperations; + buffer[4] = supportData.QuerySequenceLength; + buffer[5] = al.MateRefID; + buffer[6] = al.MatePosition; + buffer[7] = al.InsertSize; + + // write the block size + unsigned int blockSize = supportData.BlockLength; + if ( IsBigEndian ) { SwapEndian_32(blockSize); } + mBGZF.Write((char*)&blockSize, BT_SIZEOF_INT); + + // write the BAM core + if ( IsBigEndian ) { + for ( int i = 0; i < 8; ++i ) { + SwapEndian_32(buffer[i]); + } + } + mBGZF.Write((char*)&buffer, BAM_CORE_SIZE); + + // write the raw char data + mBGZF.Write((char*)supportData.AllCharData.data(), supportData.BlockLength-BAM_CORE_SIZE); +} + diff --git a/BamWriter.h b/BamWriter.h index 14de8b5..31c7d61 100644 --- a/BamWriter.h +++ b/BamWriter.h @@ -1,5 +1,5 @@ // *************************************************************************** -// BamWriter.h (c) 2009 Michael Strömberg, Derek Barnett +// BamWriter.h (c) 2009 Michael Str�mberg, Derek Barnett // Marth Lab, Department of Biology, Boston College // All rights reserved. // --------------------------------------------------------------------------- @@ -37,6 +37,8 @@ class BamWriter { void Open(const std::string& filename, const std::string& samHeader, const BamTools::RefVector& referenceSequences); // saves the alignment to the alignment archive void SaveAlignment(const BamTools::BamAlignment& al); + // saves the (partial) alignment, using support data, to the alignment archive + void SaveAlignment(const BamTools::BamAlignment& al, const BamTools::BamAlignmentSupportData& supportData); // private implementation private: diff --git a/BamWriter.o b/BamWriter.o new file mode 100644 index 0000000..2cefcea Binary files /dev/null and b/BamWriter.o differ