]> git.donarmstrong.com Git - bamtools.git/commitdiff
Added GetNextAlignmentCore() to BamReader API as well as a corresponding SaveAlignmen...
authorDerek <derekwbarnett@gmail.com>
Wed, 9 Jun 2010 03:29:45 +0000 (23:29 -0400)
committerDerek <derekwbarnett@gmail.com>
Wed, 9 Jun 2010 03:29:45 +0000 (23:29 -0400)
BamAux.h
BamReader.cpp
BamReader.h
BamReader.o [new file with mode: 0644]
BamWriter.cpp
BamWriter.h
BamWriter.o [new file with mode: 0644]

index 3d14a46fd19549ca6fb4601f05fa99656f7a3d06..46592497888843f7e14295f62290ecad73fa620f 100644 (file)
--- a/BamAux.h
+++ b/BamAux.h
@@ -3,7 +3,7 @@
 // Marth Lab, Department of Biology, Boston College\r
 // All rights reserved.\r
 // ---------------------------------------------------------------------------\r
-// Last modified: 14 April 2010 (DB)\r
+// Last modified: 8 June 2010 (DB)\r
 // ---------------------------------------------------------------------------\r
 // Provides the basic constants, data structures, etc. for using BAM files\r
 // ***************************************************************************\r
@@ -154,9 +154,36 @@ struct BamAlignment {
 // ----------------------------------------------------------------\r
 // Auxiliary data structs & typedefs\r
 \r
+struct BamAlignmentSupportData {\r
+      \r
+    // data members\r
+    std::string AllCharData;\r
+    uint32_t    BlockLength;\r
+    uint32_t    NumCigarOperations;\r
+    uint32_t    QueryNameLength;\r
+    uint32_t    QuerySequenceLength;\r
+    \r
+    // constructor\r
+    BamAlignmentSupportData(void)\r
+        : BlockLength(0)\r
+        , NumCigarOperations(0)\r
+        , QueryNameLength(0)\r
+        , QuerySequenceLength(0)\r
+    { }\r
+};\r
+\r
 struct CigarOp {\r
+  \r
+    // data members\r
     char     Type;   // Operation type (MIDNSHP)\r
     uint32_t Length; // Operation length (number of bases)\r
+    \r
+    // constructor\r
+    CigarOp(const char type = '\0', \r
+            const uint32_t length = 0) \r
+        : Type(type)\r
+        , Length(length) \r
+    { }\r
 };\r
 \r
 struct RefData {\r
index 7213b237d753f18399e5f1379800492e0f943b7d..53c32e9e1db6bf144a6c8ba6edec0a7ab2cc9d40 100644 (file)
@@ -3,7 +3,7 @@
 // Marth Lab, Department of Biology, Boston College\r
 // All rights reserved.\r
 // ---------------------------------------------------------------------------\r
-// Last modified: 14 April 2010 (DB)\r
+// Last modified: 8 June 2010 (DB)\r
 // ---------------------------------------------------------------------------\r
 // Uses BGZF routines were adapted from the bgzf.c code developed at the Broad\r
 // Institute.\r
 using namespace BamTools;\r
 using namespace std;\r
 \r
-namespace BamTools {\r
-  struct BamAlignmentSupportData {\r
-      string   AllCharData;\r
-      uint32_t BlockLength;\r
-      uint32_t NumCigarOperations;\r
-      uint32_t QueryNameLength;\r
-      uint32_t QuerySequenceLength;\r
-  };\r
-} // namespace BamTools\r
-\r
 struct BamReader::BamReaderPrivate {\r
 \r
     // -------------------------------\r
@@ -79,6 +69,7 @@ struct BamReader::BamReaderPrivate {
 \r
     // access alignment data\r
     bool GetNextAlignment(BamAlignment& bAlignment);\r
+    bool GetNextAlignmentCore(BamAlignment& bAlignment, BamAlignmentSupportData& supportData);\r
 \r
     // access auxiliary data\r
     int GetReferenceID(const string& refName) const;\r
@@ -148,6 +139,7 @@ bool BamReader::Rewind(void) { return d->Rewind(); }
 \r
 // access alignment data\r
 bool BamReader::GetNextAlignment(BamAlignment& bAlignment) { return d->GetNextAlignment(bAlignment); }\r
+bool BamReader::GetNextAlignmentCore(BamAlignment& bAlignment, BamAlignmentSupportData& supportData) { return d->GetNextAlignmentCore(bAlignment, supportData); }\r
 \r
 // access auxiliary data\r
 const string BamReader::GetHeaderText(void) const { return d->HeaderText; }\r
@@ -526,7 +518,7 @@ bool BamReader::BamReaderPrivate::GetNextAlignment(BamAlignment& bAlignment) {
         // load next alignment until region overlap is found\r
         while ( !IsOverlap(bAlignment) ) {\r
             // if no valid alignment available (likely EOF) return failure\r
-            if ( !LoadNextAlignment(bAlignment, supportData) ) { return false; }\r
+            if ( !LoadNextAlignment(bAlignment, supportData) ) return false;\r
         }\r
 \r
         // return success (alignment found that overlaps region)\r
@@ -535,7 +527,35 @@ bool BamReader::BamReaderPrivate::GetNextAlignment(BamAlignment& bAlignment) {
     }\r
 \r
     // no valid alignment\r
-    else { return false; }\r
+    else \r
+        return false;\r
+}\r
+\r
+// retrieves next available alignment core data (returns success/fail)\r
+// ** DOES NOT parse any character data (bases, qualities, tag data)\r
+//    these can be accessed, if necessary, from the supportData \r
+// useful for operations requiring ONLY positional or other alignment-related information\r
+bool BamReader::BamReaderPrivate::GetNextAlignmentCore(BamAlignment& bAlignment, BamAlignmentSupportData& supportData) {\r
+\r
+    // if valid alignment available\r
+    if ( LoadNextAlignment(bAlignment, supportData) ) {\r
+\r
+        // if region not specified, return success\r
+        if ( !IsRegionSpecified ) return true;\r
+\r
+        // load next alignment until region overlap is found\r
+        while ( !IsOverlap(bAlignment) ) {\r
+            // if no valid alignment available (likely EOF) return failure\r
+            if ( !LoadNextAlignment(bAlignment, supportData) ) return false;\r
+        }\r
+\r
+        // return success (alignment found that overlaps region)\r
+        return true;\r
+    }\r
+\r
+    // no valid alignment\r
+    else\r
+        return false;\r
 }\r
 \r
 // calculate closest indexed file offset for region specified\r
index fe28abcc6e07c1799052fec014d00df126132295..88cc74a25d3cfd04ab90e7957dbfe808b313121e 100644 (file)
@@ -3,7 +3,7 @@
 // Marth Lab, Department of Biology, Boston College\r
 // All rights reserved.\r
 // ---------------------------------------------------------------------------\r
-// Last modified: 30 March 2010 (DB)\r
+// Last modified: 8 June 2010 (DB)\r
 // ---------------------------------------------------------------------------\r
 // Uses BGZF routines were adapted from the bgzf.c code developed at the Broad\r
 // Institute.\r
@@ -51,6 +51,11 @@ class BamReader {
 \r
         // retrieves next available alignment (returns success/fail)\r
         bool GetNextAlignment(BamAlignment& bAlignment);\r
+        // retrieves next available alignment core data (returns success/fail)\r
+        // ** DOES NOT parse any character data (bases, qualities, tag data)\r
+        //    these can be accessed, if necessary, from the supportData \r
+        // useful for operations requiring ONLY positional or other alignment-related information\r
+        bool GetNextAlignmentCore(BamAlignment& bAlignment, BamAlignmentSupportData& supportData);\r
 \r
         // ----------------------\r
         // access auxiliary data\r
diff --git a/BamReader.o b/BamReader.o
new file mode 100644 (file)
index 0000000..44fc7d9
Binary files /dev/null and b/BamReader.o differ
index 2cd2742dce5c5acc9a679927cb06af5e88bdeb63..9d18fae6ba616b1d243913e7e07c4a55c70a88b4 100644 (file)
@@ -35,12 +35,13 @@ struct BamWriter::BamWriterPrivate {
 \r
     // "public" interface\r
     void Close(void);\r
-    void Open(const std::string& filename, const std::string& samHeader, const BamTools::RefVector& referenceSequences);\r
-    void SaveAlignment(const BamTools::BamAlignment& al);\r
+    void Open(const string& filename, const string& samHeader, const RefVector& referenceSequences);\r
+    void SaveAlignment(const BamAlignment& al);\r
+    void SaveAlignment(const BamAlignment& al, const BamAlignmentSupportData& supportData);\r
 \r
     // internal methods\r
-    void CreatePackedCigar(const std::vector<CigarOp>& cigarOperations, std::string& packedCigar);\r
-    void EncodeQuerySequence(const std::string& query, std::string& encodedQuery);\r
+    void CreatePackedCigar(const vector<CigarOp>& cigarOperations, string& packedCigar);\r
+    void EncodeQuerySequence(const string& query, string& encodedQuery);\r
 };\r
 \r
 // -----------------------------------------------------\r
@@ -59,8 +60,8 @@ BamWriter::~BamWriter(void) {
 }\r
 \r
 // closes the alignment archive\r
-void BamWriter::Close(void) {\r
-    d->Close();\r
+void BamWriter::Close(void) { \r
+  d->Close(); \r
 }\r
 \r
 // opens the alignment archive\r
@@ -69,10 +70,14 @@ void BamWriter::Open(const string& filename, const string& samHeader, const RefV
 }\r
 \r
 // saves the alignment to the alignment archive\r
-void BamWriter::SaveAlignment(const BamAlignment& al) {\r
+void BamWriter::SaveAlignment(const BamAlignment& al) { \r
     d->SaveAlignment(al);\r
 }\r
 \r
+void BamWriter::SaveAlignment(const BamAlignment& al, const BamAlignmentSupportData& supportData) {\r
+    d->SaveAlignment(al, supportData);\r
+}\r
+\r
 // -----------------------------------------------------\r
 // BamWriterPrivate implementation\r
 // -----------------------------------------------------\r
@@ -380,3 +385,34 @@ void BamWriter::BamWriterPrivate::SaveAlignment(const BamAlignment& al) {
         mBGZF.Write(al.TagData.data(), tagDataLength);\r
     }\r
 }\r
+\r
+void BamWriter::BamWriterPrivate::SaveAlignment(const BamAlignment& al, const BamAlignmentSupportData& supportData) {\r
+  \r
+    // assign the BAM core data\r
+    uint32_t buffer[8];\r
+    buffer[0] = al.RefID;\r
+    buffer[1] = al.Position;\r
+    buffer[2] = (al.Bin << 16) | (al.MapQuality << 8) | supportData.QueryNameLength;\r
+    buffer[3] = (al.AlignmentFlag << 16) | supportData.NumCigarOperations;\r
+    buffer[4] = supportData.QuerySequenceLength;\r
+    buffer[5] = al.MateRefID;\r
+    buffer[6] = al.MatePosition;\r
+    buffer[7] = al.InsertSize;\r
+\r
+    // write the block size\r
+    unsigned int blockSize = supportData.BlockLength;\r
+    if ( IsBigEndian ) { SwapEndian_32(blockSize); }\r
+    mBGZF.Write((char*)&blockSize, BT_SIZEOF_INT);\r
+\r
+    // write the BAM core\r
+    if ( IsBigEndian ) { \r
+        for ( int i = 0; i < 8; ++i ) { \r
+            SwapEndian_32(buffer[i]); \r
+        } \r
+    }\r
+    mBGZF.Write((char*)&buffer, BAM_CORE_SIZE);\r
+\r
+    // write the raw char data\r
+    mBGZF.Write((char*)supportData.AllCharData.data(), supportData.BlockLength-BAM_CORE_SIZE);\r
+}\r
+\r
index 14de8b557c130ea5c5da567cca9482093557df27..31c7d61b8a3fe259ed353d54014cc3dc068a9e1e 100644 (file)
@@ -1,5 +1,5 @@
 // ***************************************************************************\r
-// BamWriter.h (c) 2009 Michael Strömberg, Derek Barnett\r
+// BamWriter.h (c) 2009 Michael Strmberg, Derek Barnett\r
 // Marth Lab, Department of Biology, Boston College\r
 // All rights reserved.\r
 // ---------------------------------------------------------------------------\r
@@ -37,6 +37,8 @@ class BamWriter {
         void Open(const std::string& filename, const std::string& samHeader, const BamTools::RefVector& referenceSequences);\r
         // saves the alignment to the alignment archive\r
         void SaveAlignment(const BamTools::BamAlignment& al);\r
+        // saves the (partial) alignment, using support data, to the alignment archive\r
+        void SaveAlignment(const BamTools::BamAlignment& al, const BamTools::BamAlignmentSupportData& supportData);\r
 \r
     // private implementation\r
     private:\r
diff --git a/BamWriter.o b/BamWriter.o
new file mode 100644 (file)
index 0000000..2cefcea
Binary files /dev/null and b/BamWriter.o differ