Removed STDERR pollution by API

author derek <derekwbarnett@gmail.com>

Fri, 7 Oct 2011 19:12:57 +0000 (15:12 -0400)

committer derek <derekwbarnett@gmail.com>

Fri, 7 Oct 2011 19:12:57 +0000 (15:12 -0400)
author derek <derekwbarnett@gmail.com>
Fri, 7 Oct 2011 19:12:57 +0000 (15:12 -0400)
committer derek <derekwbarnett@gmail.com>
Fri, 7 Oct 2011 19:12:57 +0000 (15:12 -0400)
diff --git a/src/api/BamAlignment.cpp b/src/api/BamAlignment.cpp

index 980303ae6050f116b4e9197de9d10d5e259af9ec..0eff5c72e46fba52c137c063989a66dd90e1d52c 100644 (file)
--- a/src/api/BamAlignment.cpp
+++ b/src/api/BamAlignment.cpp
@@ -2,7 +2,7 @@
  // BamAlignment.cpp (c) 2009 Derek Barnett
  // Marth Lab, Department of Biology, Boston College
  // ---------------------------------------------------------------------------
-// Last modified: 4 October 2011 (DB)
+// Last modified: 7 October 2011 (DB)
  // ---------------------------------------------------------------------------
  // Provides the BamAlignment data structure
  // ***************************************************************************
@@ -10,15 +10,6 @@
  #include <api/BamAlignment.h>
  #include <api/BamConstants.h>
  using namespace BamTools;
-
-#include <cctype>
-#include <cstdio>
-#include <cstdlib>
-#include <cstring>
-#include <exception>
-#include <iostream>
-#include <map>
-#include <utility>
  using namespace std;
  
  /*! \class BamTools::BamAlignment
@@ -162,7 +153,7 @@ bool BamAlignment::BuildCharData(void) {
  
      // calculate character lengths/offsets
      const unsigned int dataLength     = SupportData.BlockLength - Constants::BAM_CORE_SIZE;
-    const unsigned int seqDataOffset  = SupportData.QueryNameLength + (SupportData.NumCigarOperations * 4);
+    const unsigned int seqDataOffset  = SupportData.QueryNameLength + (SupportData.NumCigarOperations*4);
      const unsigned int qualDataOffset = seqDataOffset + (SupportData.QuerySequenceLength+1)/2;
      const unsigned int tagDataOffset  = qualDataOffset + SupportData.QuerySequenceLength;
      const unsigned int tagDataLength  = dataLength - tagDataOffset;
@@ -185,8 +176,8 @@ bool BamAlignment::BuildCharData(void) {
      QueryBases.clear();
      if ( hasSeqData ) {
          QueryBases.reserve(SupportData.QuerySequenceLength);
-        for (unsigned int i = 0; i < SupportData.QuerySequenceLength; ++i) {
-            char singleBase = Constants::BAM_DNA_LOOKUP[ ( (seqData[(i/2)] >> (4*(1-(i%2)))) & 0xf ) ];
+        for ( size_t i = 0; i < SupportData.QuerySequenceLength; ++i ) {
+            const char singleBase = Constants::BAM_DNA_LOOKUP[ ( (seqData[(i/2)] >> (4*(1-(i%2)))) & 0xf ) ];
              QueryBases.append(1, singleBase);
          }
      }
@@ -195,8 +186,8 @@ bool BamAlignment::BuildCharData(void) {
      Qualities.clear();
      if ( hasQualData ) {
          Qualities.reserve(SupportData.QuerySequenceLength);
-        for (unsigned int i = 0; i < SupportData.QuerySequenceLength; ++i) {
-            char singleQuality = (char)(qualData[i]+33);
+        for ( size_t i = 0; i < SupportData.QuerySequenceLength; ++i ) {
+            const char singleQuality = static_cast<const char>(qualData[i]+33);
              Qualities.append(1, singleQuality);
          }
      }
@@ -218,7 +209,7 @@ bool BamAlignment::BuildCharData(void) {
          for ( ; cigarIter != cigarEnd; ++cigarIter ) {
              const CigarOp& op = (*cigarIter);
  
-            switch (op.Type) {
+            switch ( op.Type ) {
  
                  // for 'M', 'I', '=', 'X' - write bases
                  case (Constants::BAM_CIGAR_MATCH_CHAR)    :
@@ -253,11 +244,11 @@ bool BamAlignment::BuildCharData(void) {
                  case (Constants::BAM_CIGAR_HARDCLIP_CHAR) :
                      break;
  
-                // shouldn't get here
+                // invalid CIGAR op-code
                  default:
-                    cerr << "BamAlignment ERROR: invalid CIGAR operation type: "
-                         << op.Type << endl;
-                    exit(1);
+                    const string message = string("invalid CIGAR operation type: ") + op.Type;
+                    SetErrorString("BamAlignment::BuildCharData", message);
+                    return false;
              }
          }
      }
@@ -266,8 +257,8 @@ bool BamAlignment::BuildCharData(void) {
      TagData.clear();
      if ( hasTagData ) {
          if ( IsBigEndian ) {
-            int i = 0;
-            while ( (unsigned int)i < tagDataLength ) {
+            size_t i = 0;
+            while ( i < tagDataLength ) {
  
                  i += Constants::BAM_TAG_TAGSIZE;  // skip tag chars (e.g. "RG", "NM", etc.)
                  const char type = tagData[i];     // get tag type at position i
@@ -313,12 +304,12 @@ bool BamAlignment::BuildCharData(void) {
  
                          // swap endian-ness of number of elements in place, then retrieve for loop
                          BamTools::SwapEndian_32p(&tagData[i]);
-                        int32_t numElements;
+                        uint32_t numElements;
                          memcpy(&numElements, &tagData[i], sizeof(uint32_t));
                          i += sizeof(uint32_t);
  
                          // swap endian-ness of array elements
-                        for ( int j = 0; j < numElements; ++j ) {
+                        for ( size_t j = 0; j < numElements; ++j ) {
                              switch (arrayType) {
                                  case (Constants::BAM_TAG_TYPE_INT8)  :
                                  case (Constants::BAM_TAG_TYPE_UINT8) :
@@ -337,9 +328,8 @@ bool BamAlignment::BuildCharData(void) {
                                      i += sizeof(uint32_t);
                                      break;
                                  default:
-                                    // error case
-                                    cerr << "BamAlignment ERROR: unknown binary array type encountered: "
-                                         << arrayType << endl;
+                                    const string message = string("invalid binary array type: ") + arrayType;
+                                    SetErrorString("BamAlignment::BuildCharData", message);
                                      return false;
                              }
                          }
@@ -347,18 +337,18 @@ bool BamAlignment::BuildCharData(void) {
                          break;
                      }
  
-                    // shouldn't get here
+                    // invalid tag type-code
                      default :
-                        cerr << "BamAlignment ERROR: invalid tag value type: "
-                             << type << endl;
-                        exit(1);
+                        const string message = string("invalid tag type: ") + type;
+                        SetErrorString("BamAlignment::BuildCharData", message);
+                        return false;
                  }
              }
          }
  
          // store tagData in alignment
          TagData.resize(tagDataLength);
-        memcpy((char*)TagData.data(), tagData, tagDataLength);
+        memcpy((char*)(TagData.data()), tagData, tagDataLength);
      }
  
      // clear the core-only flag
@@ -414,7 +404,7 @@ bool BamAlignment::BuildCharData(void) {
  bool BamAlignment::FindTag(const std::string& tag,
                             char*& pTagData,
                             const unsigned int& tagDataLength,
-                           unsigned int& numBytesParsed)
+                           unsigned int& numBytesParsed) const
  {
  
      while ( numBytesParsed < tagDataLength ) {
@@ -468,6 +458,8 @@ bool BamAlignment::GetEditDistance(uint32_t& editDistance) const {
  */
  int BamAlignment::GetEndPosition(bool usePadded, bool zeroBased) const {
  
+    // TODO: Come back to this for coordinate issues !!!
+
      // initialize alignment end to starting position
      int alignEnd = Position;
  
@@ -493,6 +485,18 @@ int BamAlignment::GetEndPosition(bool usePadded, bool zeroBased) const {
      return alignEnd;
  }
  
+/*! \fn std::string BamAlignment::GetErrorString(void) const
+    \brief Returns a description of the last error that occurred
+
+    This method allows elimnation of STDERR pollution. Developers of client code
+    may choose how the messages are displayed to the user, if at all.
+
+    \return description of last error that occurred
+*/
+std::string BamAlignment::GetErrorString(void) const {
+    return ErrorString;
+}
+
  /*! \fn bool BamAlignment::GetReadGroup(std::string& readGroup) const
      \brief Retrieves value of read group tag ("RG").
  
@@ -540,9 +544,17 @@ bool BamAlignment::GetReadGroup(std::string& readGroup) const {
  */
  bool BamAlignment::GetTagType(const std::string& tag, char& type) const {
    
-    // make sure tag data exists
-    if ( SupportData.HasCoreOnly || TagData.empty() ) 
+    // skip if alignment is core-only
+    if ( SupportData.HasCoreOnly ) {
+        // TODO: set error string?
+        return false;
+    }
+
+    // skip if no tags present
+    if ( TagData.empty() ) {
+        // TODO: set error string?
          return false;
+    }
  
      // localize the tag data
      char* pTagData = (char*)TagData.data();
@@ -550,8 +562,10 @@ bool BamAlignment::GetTagType(const std::string& tag, char& type) const {
      unsigned int numBytesParsed = 0;
      
      // if tag not found, return failure
-    if ( !FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
+    if ( !FindTag(tag, pTagData, tagDataLength, numBytesParsed) ){
+        // TODO: set error string?
          return false;
+    }
  
      // otherwise, retrieve & validate tag type code
      type = *(pTagData - 1);
@@ -571,8 +585,8 @@ bool BamAlignment::GetTagType(const std::string& tag, char& type) const {
  
          // unknown tag type
          default:
-            cerr << "BamAlignment ERROR: unknown tag type encountered: "
-                 << type << endl;
+            const string message = string("invalid tag type: ") + type;
+            SetErrorString("BamAlignment::GetTagType", message);
              return false;
      }
  }
@@ -686,17 +700,15 @@ bool BamAlignment::IsSecondMate(void) const {
  
      \return \c true if both \a tag and \a type are correct sizes
  */
-bool BamAlignment::IsValidSize(const string& tag, const string& type) {
+bool BamAlignment::IsValidSize(const std::string& tag, const std::string& type) const {
      return (tag.size()  == Constants::BAM_TAG_TAGSIZE) &&
             (type.size() == Constants::BAM_TAG_TYPESIZE);
  }
  
-/*! \fn bool BamAlignment::RemoveTag(const std::string& tag)
+/*! \fn void BamAlignment::RemoveTag(const std::string& tag)
      \brief Removes field from BAM tags.
-
-    \return \c true if tag was removed successfully (or didn't exist before)
  */
-bool BamAlignment::RemoveTag(const std::string& tag) {
+void BamAlignment::RemoveTag(const std::string& tag) {
    
      // if char data not populated, do that first
      if ( SupportData.HasCoreOnly )
@@ -704,7 +716,7 @@ bool BamAlignment::RemoveTag(const std::string& tag) {
  
      // skip if no tags available
      if ( TagData.empty() )
-        return false;
+        return;
    
      // localize the tag data
      char* pOriginalTagData = (char*)TagData.data();
@@ -713,19 +725,19 @@ bool BamAlignment::RemoveTag(const std::string& tag) {
      unsigned int newTagDataLength = 0;
      unsigned int numBytesParsed = 0;
  
-    // if tag not found, simply return true
+    // skip if tag not found
      if  ( !FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) )
-        return true;
+        return;
  
      // otherwise, remove it
-    char* newTagData = new char[originalTagDataLength];
+    RaiiBuffer newTagData(originalTagDataLength);
  
      // copy original tag data up til desired tag
      pTagData       -= 3;
      numBytesParsed -= 3;
      const unsigned int beginningTagDataLength = numBytesParsed;
      newTagDataLength += beginningTagDataLength;
-    memcpy(newTagData, pOriginalTagData, numBytesParsed);
+    memcpy(newTagData.Buffer, pOriginalTagData, numBytesParsed);
  
      // attemp to skip to next tag
      const char* pTagStorageType = pTagData + 2;
@@ -736,15 +748,21 @@ bool BamAlignment::RemoveTag(const std::string& tag) {
          // squeeze remaining tag data
          const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);
          const unsigned int endTagDataLength = originalTagDataLength - beginningTagDataLength - skippedDataLength;
-        memcpy(newTagData + beginningTagDataLength, pTagData, endTagDataLength );
+        memcpy(newTagData.Buffer + beginningTagDataLength, pTagData, endTagDataLength );
  
          // save modified tag data in alignment
-        TagData.assign(newTagData, beginningTagDataLength + endTagDataLength);
+        TagData.assign(newTagData.Buffer, beginningTagDataLength + endTagDataLength);
      }
+}
  
-    // clean up & return success
-    delete[] newTagData;
-    return true;
+/*! \fn void BamAlignment::SetErrorString(const std::string& where, const std::string& what) const
+    \internal
+
+    Sets a formatted error string for this alignment.
+*/
+void BamAlignment::SetErrorString(const std::string& where, const std::string& what) const {
+    static const string SEPARATOR = ": ";
+    ErrorString = where + SEPARATOR + what;
  }
  
  /*! \fn void BamAlignment::SetIsDuplicate(bool ok)
@@ -877,7 +895,7 @@ void BamAlignment::SetIsUnmapped(bool ok) {
  */
  bool BamAlignment::SkipToNextTag(const char storageType,
                                   char*& pTagData,
-                                 unsigned int& numBytesParsed)
+                                 unsigned int& numBytesParsed) const
  {
      switch (storageType) {
  
@@ -922,7 +940,7 @@ bool BamAlignment::SkipToNextTag(const char storageType,
  
              // read number of elements
              int32_t numElements;
-            memcpy(&numElements, pTagData, sizeof(uint32_t)); // already endian-swapped if necessary
+            memcpy(&numElements, pTagData, sizeof(uint32_t)); // already endian-swapped, if needed
              numBytesParsed += sizeof(uint32_t);
              pTagData       += sizeof(uint32_t);
  
@@ -943,8 +961,8 @@ bool BamAlignment::SkipToNextTag(const char storageType,
                      bytesToSkip = numElements*sizeof(uint32_t);
                      break;
                  default:
-                    cerr << "BamAlignment ERROR: unknown binary array type encountered: "
-                         << arrayType << endl;
+                    const string message = string("invalid binary array type: ") + arrayType;
+                    SetErrorString("BamAlignment::SkipToNextTag", message);
                      return false;
              }
  
@@ -955,11 +973,11 @@ bool BamAlignment::SkipToNextTag(const char storageType,
          }
  
          default:
-            cerr << "BamAlignment ERROR: unknown tag type encountered"
-                 << storageType << endl;
+            const string message = string("invalid tag type: ") + storageType;
+            SetErrorString("BamAlignment::SkipToNextTag", message);
              return false;
      }
  
-    // return success
+    // if we get here, tag skipped OK - return success
      return true;
  }
diff --git a/src/api/BamAlignment.h b/src/api/BamAlignment.h

index 33175b5d10e5b5466da978ff11f0bfd714ea8ed5..b09649974dfd61c01c3d3704fdcf0dc2d111775f 100644 (file)
--- a/src/api/BamAlignment.h
+++ b/src/api/BamAlignment.h
@@ -2,7 +2,7 @@
  // BamAlignment.h (c) 2009 Derek Barnett
  // Marth Lab, Department of Biology, Boston College
  // ---------------------------------------------------------------------------
-// Last modified: 4 October 2011 (DB)
+// Last modified: 7 October 2011 (DB)
  // ---------------------------------------------------------------------------
  // Provides the BamAlignment data structure
  // ***************************************************************************
@@ -13,11 +13,8 @@
  #include <api/api_global.h>
  #include <api/BamAux.h>
  #include <api/BamConstants.h>
-#include <cctype>
-#include <cstdio>
  #include <cstdlib>
  #include <cstring>
-#include <iostream>
  #include <string>
  #include <vector>
  
@@ -75,24 +72,16 @@ struct API_EXPORT BamAlignment {
      public:
  
          // add a new tag
-        template<typename T> bool AddTag(const std::string& tag,
-                                         const std::string& type,
-                                         const T& value);
-        template<typename T> bool AddTag(const std::string& tag,
-                                         const std::vector<T>& values);
+        template<typename T> bool AddTag(const std::string& tag, const std::string& type, const T& value);
+        template<typename T> bool AddTag(const std::string& tag, const std::vector<T>& values);
  
          // edit (or append) tag
-        template<typename T> bool EditTag(const std::string& tag,
-                                          const std::string& type,
-                                          const T& value);
-        template<typename T> bool EditTag(const std::string& tag,
-                                          const std::vector<T>& values);
+        template<typename T> bool EditTag(const std::string& tag, const std::string& type, const T& value);
+        template<typename T> bool EditTag(const std::string& tag, const std::vector<T>& values);
  
          // retrieves tag data
-        template<typename T> bool GetTag(const std::string& tag,
-                                         T& destination) const;
-        template<typename T> bool GetTag(const std::string& tag,
-                                         std::vector<T>& destination) const;
+        template<typename T> bool GetTag(const std::string& tag, T& destination) const;
+        template<typename T> bool GetTag(const std::string& tag, std::vector<T>& destination) const;
  
          // retrieves the BAM type-code for requested tag
          // (returns whether or not tag exists, and type-code is valid)
@@ -101,12 +90,12 @@ struct API_EXPORT BamAlignment {
          // legacy methods (consider deprecated, but still available)
          bool GetEditDistance(uint32_t& editDistance) const;         // retrieves value of "NM" tag
          bool GetReadGroup(std::string& readGroup) const;            // retrieves value of "RG" tag
-        
+
          // returns true if alignment has a record for this tag name
          bool HasTag(const std::string& tag) const;
  
          // removes a tag
-        bool RemoveTag(const std::string& tag);
+        void RemoveTag(const std::string& tag);
  
      // additional methods
      public:
@@ -116,6 +105,9 @@ struct API_EXPORT BamAlignment {
          // calculates alignment end position
          int GetEndPosition(bool usePadded = false, bool zeroBased = true) const;  
  
+        // returns a description of the last error that occurred
+        std::string GetErrorString(void) const;
+
      // public data fields
      public:
          std::string Name;               // read name
@@ -138,15 +130,17 @@ struct API_EXPORT BamAlignment {
      //! \cond
      // internal utility methods
      private:
-        static bool FindTag(const std::string& tag,
-                            char*& pTagData,
-                            const unsigned int& tagDataLength,
-                            unsigned int& numBytesParsed);
-        static bool IsValidSize(const std::string& tag,
-                                const std::string& type);
-        static bool SkipToNextTag(const char storageType,
+        bool FindTag(const std::string& tag,
+                     char*& pTagData,
+                     const unsigned int& tagDataLength,
+                     unsigned int& numBytesParsed) const;
+        bool IsValidSize(const std::string& tag,
+                         const std::string& type) const;
+        void SetErrorString(const std::string& where,
+                            const std::string& what) const;
+        bool SkipToNextTag(const char storageType,
                             char*& pTagData,
-                           unsigned int& numBytesParsed);
+                           unsigned int& numBytesParsed) const;
  
      // internal data
      private:
@@ -173,6 +167,8 @@ struct API_EXPORT BamAlignment {
          BamAlignmentSupportData SupportData;
          friend class Internal::BamReaderPrivate;
          friend class Internal::BamWriterPrivate;
+
+        mutable std::string ErrorString; // mutable to allow updates even in logically const methods
      //! \endcond
  };
  
@@ -188,10 +184,17 @@ inline bool BamAlignment::AddTag(const std::string& tag,
      if ( SupportData.HasCoreOnly )
          BuildCharData();
  
-    // validate tag/type size & that storage type code is OK for T
-    if ( !IsValidSize(tag, type) ) return false;
-    if ( !TagTypeHelper<T>::CanConvertTo(type.at(0)) )
+    // check tag/type size
+    if ( !IsValidSize(tag, type) ) {
+        // TODO: set error string?
+        return false;
+    }
+
+    // check that storage type code is OK for T
+    if ( !TagTypeHelper<T>::CanConvertTo(type.at(0)) ) {
+        // TODO: set error string?
          return false;
+    }
  
      // localize the tag data
      char* pTagData = (char*)TagData.data();
@@ -200,8 +203,10 @@ inline bool BamAlignment::AddTag(const std::string& tag,
  
      // if tag already exists, return false
      // use EditTag explicitly instead
-    if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
+    if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
+        // TODO: set error string?
          return false;
+    }
  
      // otherwise, convert value to string
      union { T value; char valueBuffer[sizeof(T)]; } un;
@@ -209,20 +214,17 @@ inline bool BamAlignment::AddTag(const std::string& tag,
  
      // copy original tag data to temp buffer
      const std::string newTag = tag + type;
-    const int newTagDataLength = tagDataLength + newTag.size() + sizeof(T); // leave room for new T
-    char* originalTagData = new char[newTagDataLength];
-    memcpy(originalTagData, TagData.c_str(), tagDataLength + 1);    // '+1' for TagData null-term
+    const size_t newTagDataLength = tagDataLength + newTag.size() + sizeof(T); // leave room for new T
+    RaiiBuffer originalTagData(newTagDataLength);
+    memcpy(originalTagData.Buffer, TagData.c_str(), tagDataLength + 1);    // '+1' for TagData null-term
  
      // append newTag
-    strcat(originalTagData + tagDataLength, newTag.data());
-    memcpy(originalTagData + tagDataLength + newTag.size(), un.valueBuffer, sizeof(T));
+    strcat(originalTagData.Buffer + tagDataLength, newTag.data());
+    memcpy(originalTagData.Buffer + tagDataLength + newTag.size(), un.valueBuffer, sizeof(T));
  
      // store temp buffer back in TagData
-    const char* newTagData = (const char*)originalTagData;
+    const char* newTagData = (const char*)originalTagData.Buffer;
      TagData.assign(newTagData, newTagDataLength);
-
-    // clean up & return success
-    delete[] originalTagData;
      return true;
  }
  
@@ -235,10 +237,17 @@ inline bool BamAlignment::AddTag<std::string>(const std::string& tag,
      if ( SupportData.HasCoreOnly )
          BuildCharData();
  
-    // validate tag/type size & that storage type code is OK for string
-    if ( !IsValidSize(tag, type) ) return false;
-    if ( !TagTypeHelper<std::string>::CanConvertTo(type.at(0)) )
+    // check tag/type size
+    if ( !IsValidSize(tag, type) ) {
+        // TODO: set error string?
+        return false;
+    }
+
+    // check that storage type code is OK for string
+    if ( !TagTypeHelper<std::string>::CanConvertTo(type.at(0)) ) {
+        // TODO: set error string?
          return false;
+    }
  
      // localize the tag data
      char* pTagData = (char*)TagData.data();
@@ -247,24 +256,23 @@ inline bool BamAlignment::AddTag<std::string>(const std::string& tag,
  
      // if tag already exists, return false
      // use EditTag explicitly instead
-    if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
+    if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
+        // TODO: set error string?
          return false;
+    }
  
      // otherwise, copy tag data to temp buffer
      const std::string newTag = tag + type + value;
-    const int newTagDataLength = tagDataLength + newTag.size() + 1; // leave room for null-term
-    char* originalTagData = new char[newTagDataLength];
-    memcpy(originalTagData, TagData.c_str(), tagDataLength + 1);    // '+1' for TagData null-term
+    const size_t newTagDataLength = tagDataLength + newTag.size() + 1; // leave room for null-term
+    RaiiBuffer originalTagData(newTagDataLength);
+    memcpy(originalTagData.Buffer, TagData.c_str(), tagDataLength + 1);    // '+1' for TagData null-term
  
-    // append newTag
-    strcat(originalTagData + tagDataLength, newTag.data());  // removes original null-term, appends newTag + null-term
+    // append newTag (removes original null-term, then appends newTag + null-term)
+    strcat(originalTagData.Buffer + tagDataLength, newTag.data());
  
      // store temp buffer back in TagData
-    const char* newTagData = (const char*)originalTagData;
+    const char* newTagData = (const char*)originalTagData.Buffer;
      TagData.assign(newTagData, newTagDataLength);
-
-    // clean up & return success
-    delete[] originalTagData;
      return true;
  }
  
@@ -287,8 +295,10 @@ inline bool BamAlignment::AddTag(const std::string& tag,
  
      // if tag already exists, return false
      // use EditTag explicitly instead
-    if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
+    if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
+        // TODO: set error string?
          return false;
+    }
  
      // build new tag's base information
      char newTagBase[Constants::BAM_TAG_ARRAYBASE_SIZE];
@@ -301,28 +311,25 @@ inline bool BamAlignment::AddTag(const std::string& tag,
      memcpy(newTagBase + 4, &numElements, sizeof(int32_t));
  
      // copy current TagData string to temp buffer, leaving room for new tag's contents
-    const int newTagDataLength = tagDataLength +
-                                 Constants::BAM_TAG_ARRAYBASE_SIZE +
-                                 numElements*sizeof(T);
-    char* originalTagData = new char[newTagDataLength];
-    memcpy(originalTagData, TagData.c_str(), tagDataLength+1); // '+1' for TagData's null-term
+    const size_t newTagDataLength = tagDataLength +
+                                    Constants::BAM_TAG_ARRAYBASE_SIZE +
+                                    numElements*sizeof(T);
+    RaiiBuffer originalTagData(newTagDataLength);
+    memcpy(originalTagData.Buffer, TagData.c_str(), tagDataLength+1); // '+1' for TagData's null-term
  
      // write newTagBase (removes old null term)
-    strcat(originalTagData + tagDataLength, (const char*)newTagBase);
+    strcat(originalTagData.Buffer + tagDataLength, (const char*)newTagBase);
  
      // add vector elements to tag
      int elementsBeginOffset = tagDataLength + Constants::BAM_TAG_ARRAYBASE_SIZE;
      for ( int i = 0 ; i < numElements; ++i ) {
          const T& value = values.at(i);
-        memcpy(originalTagData + elementsBeginOffset + i*sizeof(T), &value, sizeof(T));
+        memcpy(originalTagData.Buffer + elementsBeginOffset + i*sizeof(T), &value, sizeof(T));
      }
  
      // store temp buffer back in TagData
-    const char* newTagData = (const char*)originalTagData;
+    const char* newTagData = (const char*)originalTagData.Buffer;
      TagData.assign(newTagData, newTagDataLength);
-
-    // cleanup & return success
-    delete[] originalTagData;
      return true;
  }
  
@@ -359,9 +366,17 @@ template<typename T>
  inline bool BamAlignment::GetTag(const std::string& tag,
                                   T& destination) const
  {
-    // skip if core-only or no tags present
-    if ( SupportData.HasCoreOnly || TagData.empty() )
+    // skip if alignment is core-only
+    if ( SupportData.HasCoreOnly ) {
+        // TODO: set error string?
          return false;
+    }
+
+    // skip if no tags present
+    if ( TagData.empty() ) {
+        // TODO: set error string?
+        return false;
+    }
  
      // localize the tag data
      char* pTagData = (char*)TagData.data();
@@ -369,13 +384,19 @@ inline bool BamAlignment::GetTag(const std::string& tag,
      unsigned int numBytesParsed = 0;
  
      // return failure if tag not found
-    if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
+    if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
+        // TODO: set error string?
          return false;
+    }
  
-    // otherwise try to copy data into destination
+    // fetch data type
      const char type = *(pTagData - 1);
-    if ( !TagTypeHelper<T>::CanConvertFrom(type) )
+    if ( !TagTypeHelper<T>::CanConvertFrom(type) ) {
+        // TODO: set error string ?
          return false;
+    }
+
+    // determine data length
      int destinationLength = 0;
      switch ( type ) {
  
@@ -403,18 +424,18 @@ inline bool BamAlignment::GetTag(const std::string& tag,
          case (Constants::BAM_TAG_TYPE_STRING) :
          case (Constants::BAM_TAG_TYPE_HEX)    :
          case (Constants::BAM_TAG_TYPE_ARRAY)  :
-            std::cerr << "BamAlignment ERROR: cannot store tag of type " << type
-                 << " in integer destination" << std::endl;
+            SetErrorString("BamAlignment::GetTag",
+                           "cannot store variable length tag data into a numeric destination");
              return false;
  
          // unrecognized tag type
          default:
-            std::cerr << "BamAlignment ERROR: unknown tag type encountered: "
-                 << type << std::endl;
+            const std::string message = std::string("invalid tag type: ") + type;
+            SetErrorString("BamAlignment::GetTag", message);
              return false;
      }
  
-    // store in destination
+    // store data in destination
      destination = 0;
      memcpy(&destination, pTagData, destinationLength);
  
@@ -426,9 +447,17 @@ template<>
  inline bool BamAlignment::GetTag<std::string>(const std::string& tag,
                                                std::string& destination) const
  {
-    // skip if core-only or no tags present
-    if ( SupportData.HasCoreOnly || TagData.empty() )
+    // skip if alignment is core-only
+    if ( SupportData.HasCoreOnly ) {
+        // TODO: set error string?
          return false;
+    }
+
+    // skip if no tags present
+    if ( TagData.empty() ) {
+        // TODO: set error string?
+        return false;
+    }
  
      // localize the tag data
      char* pTagData = (char*)TagData.data();
@@ -436,8 +465,10 @@ inline bool BamAlignment::GetTag<std::string>(const std::string& tag,
      unsigned int numBytesParsed = 0;
  
      // return failure if tag not found
-    if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
+    if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
+        // TODO: set error string?
          return false;
+    }
  
      // otherwise copy data into destination
      const unsigned int dataLength = strlen(pTagData);
@@ -454,9 +485,17 @@ template<typename T>
  inline bool BamAlignment::GetTag(const std::string& tag,
                                   std::vector<T>& destination) const
  {
-    // skip if core-only or no tags present
-    if ( SupportData.HasCoreOnly || TagData.empty() )
+    // skip if alignment is core-only
+    if ( SupportData.HasCoreOnly ) {
+        // TODO: set error string?
          return false;
+    }
+
+    // skip if no tags present
+    if ( TagData.empty() ) {
+        // TODO: set error string?
+        return false;
+    }
  
      // localize the tag data
      char* pTagData = (char*)TagData.data();
@@ -464,22 +503,27 @@ inline bool BamAlignment::GetTag(const std::string& tag,
      unsigned int numBytesParsed = 0;
  
      // return false if tag not found
-    if ( !FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
+    if ( !FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
+        // TODO: set error string?
          return false;
+    }
  
      // check that tag is array type
      const char tagType = *(pTagData - 1);
      if ( tagType != Constants::BAM_TAG_TYPE_ARRAY ) {
-        std::cerr << "BamAlignment ERROR: Cannot store non-array data from tag: "
-             << tag << " in array destination" << std::endl;
+        SetErrorString("BamAlignment::GetTag", "cannot store a non-array tag in array destination");
          return false;
      }
  
-    // calculate length of each element in tag's array
+    // fetch element type
      const char elementType = *pTagData;
-    if ( !TagTypeHelper<T>::CanConvertFrom(elementType) )
+    if ( !TagTypeHelper<T>::CanConvertFrom(elementType) ) {
+        // TODO: set error string ?
          return false;
+    }
      ++pTagData;
+
+    // calculate length of each element in tag's array
      int elementLength = 0;
      switch ( elementType ) {
          case (Constants::BAM_TAG_TYPE_ASCII) :
@@ -503,14 +547,14 @@ inline bool BamAlignment::GetTag(const std::string& tag,
          case (Constants::BAM_TAG_TYPE_STRING) :
          case (Constants::BAM_TAG_TYPE_HEX)    :
          case (Constants::BAM_TAG_TYPE_ARRAY)  :
-            std::cerr << "BamAlignment ERROR: array element type: " << elementType
-                 << " cannot be stored in integer value" << std::endl;
+            SetErrorString("BamAlignment::GetTag",
+                           "invalid array data, variable-length elements are not allowed");
              return false;
  
          // unknown tag type
          default:
-            std::cerr << "BamAlignment ERROR: unknown element type encountered: "
-                 << elementType << std::endl;
+            const std::string message = std::string("invalid array element type: ") + elementType;
+            SetErrorString("BamAlignment::GetTag", message);
              return false;
      }
  
diff --git a/src/api/BamAux.h b/src/api/BamAux.h

index 4c5a99aca66eb76d3e0646f152ff25bf402cbdae..14e65476b4c1f69d02bf02e417ededdcc5e02985 100644 (file)
--- a/src/api/BamAux.h
+++ b/src/api/BamAux.h
@@ -2,7 +2,7 @@
  // BamAux.h (c) 2009 Derek Barnett, Michael Str�mberg\r
  // Marth Lab, Department of Biology, Boston College\r
  // ---------------------------------------------------------------------------\r
-// Last modified: 4 March 2011 (DB)\r
+// Last modified: 7 October 2011 (DB)\r
  // ---------------------------------------------------------------------------\r
  // Provides data structures & utility methods that are used throughout the API.\r
  // ***************************************************************************\r
@@ -451,6 +451,19 @@ API_EXPORT inline unsigned short UnpackUnsignedShort(char* buffer) {
      return UnpackUnsignedShort( (const char*)buffer );\r
  }\r
  \r
+// ----------------------------------------------------------------\r
+// 'internal' helper structs\r
+\r
+struct RaiiBuffer {\r
+    RaiiBuffer(const unsigned int n)\r
+        : Buffer( new char[n]() )\r
+    { }\r
+    ~RaiiBuffer(void) {\r
+        delete[] Buffer;\r
+    }\r
+    char* Buffer;\r
+};\r
+\r
  } // namespace BamTools\r
  \r
  #endif // BAMAUX_H\r
diff --git a/src/api/BamConstants.h b/src/api/BamConstants.h

index 3e674ee5d3afe06fcf2b0d0c20f8a2775a6d78e2..4a35d8f5fa72f3be06f8680f1a8f82cf8258fac6 100644 (file)
--- a/src/api/BamConstants.h
+++ b/src/api/BamConstants.h
@@ -2,7 +2,7 @@
  // BamConstants.h (c) 2011 Derek Barnett
  // Marth Lab, Department of Biology, Boston College
  // ---------------------------------------------------------------------------
-// Last modified: 4 October 2011 (DB)
+// Last modified: 5 October 2011 (DB)
  // ---------------------------------------------------------------------------
  // Provides basic constants for handling BAM files.
  // ***************************************************************************
@@ -21,15 +21,15 @@
  namespace BamTools {
  namespace Constants {
  
-const int BAM_SIZEOF_INT = 4;
+const uint8_t BAM_SIZEOF_INT = 4;
  
  // header magic number
-const char* const  BAM_HEADER_MAGIC = "BAM\1";
-const unsigned int BAM_HEADER_MAGIC_LENGTH = 4;
+const char* const BAM_HEADER_MAGIC = "BAM\1";
+const uint8_t BAM_HEADER_MAGIC_LENGTH = 4;
  
  // BAM alignment core size
-const int BAM_CORE_SIZE = 32;
-const int BAM_CORE_BUFFER_SIZE = 8;
+const uint8_t BAM_CORE_SIZE        = 32;
+const uint8_t BAM_CORE_BUFFER_SIZE = 8;
  
  // BAM alignment flags
  const int BAM_ALIGNMENT_PAIRED              = 0x0001;
@@ -46,15 +46,15 @@ const int BAM_ALIGNMENT_DUPLICATE           = 0x0400;
  
  // CIGAR constants
  const char* const BAM_CIGAR_LOOKUP = "MIDNSHP=X";
-const int BAM_CIGAR_MATCH    = 0;
-const int BAM_CIGAR_INS      = 1;
-const int BAM_CIGAR_DEL      = 2;
-const int BAM_CIGAR_REFSKIP  = 3;
-const int BAM_CIGAR_SOFTCLIP = 4;
-const int BAM_CIGAR_HARDCLIP = 5;
-const int BAM_CIGAR_PAD      = 6;
-const int BAM_CIGAR_SEQMATCH = 7;
-const int BAM_CIGAR_MISMATCH = 8;
+const uint8_t BAM_CIGAR_MATCH    = 0;
+const uint8_t BAM_CIGAR_INS      = 1;
+const uint8_t BAM_CIGAR_DEL      = 2;
+const uint8_t BAM_CIGAR_REFSKIP  = 3;
+const uint8_t BAM_CIGAR_SOFTCLIP = 4;
+const uint8_t BAM_CIGAR_HARDCLIP = 5;
+const uint8_t BAM_CIGAR_PAD      = 6;
+const uint8_t BAM_CIGAR_SEQMATCH = 7;
+const uint8_t BAM_CIGAR_MISMATCH = 8;
  
  const char BAM_CIGAR_MATCH_CHAR    = 'M';
  const char BAM_CIGAR_INS_CHAR      = 'I';
@@ -66,8 +66,8 @@ const char BAM_CIGAR_PAD_CHAR      = 'P';
  const char BAM_CIGAR_SEQMATCH_CHAR = '=';
  const char BAM_CIGAR_MISMATCH_CHAR = 'X';
  
-const int BAM_CIGAR_SHIFT    = 4;
-const int BAM_CIGAR_MASK     = ((1 << BAM_CIGAR_SHIFT) - 1);
+const int BAM_CIGAR_SHIFT = 4;
+const int BAM_CIGAR_MASK  = ((1 << BAM_CIGAR_SHIFT) - 1);
  
  // BAM tag types
  const char BAM_TAG_TYPE_ASCII  = 'A';
@@ -82,46 +82,66 @@ const char BAM_TAG_TYPE_STRING = 'Z';
  const char BAM_TAG_TYPE_HEX    = 'H';
  const char BAM_TAG_TYPE_ARRAY  = 'B';
  
-const size_t BAM_TAG_TAGSIZE  = 2;
-const size_t BAM_TAG_TYPESIZE = 1;
-const int BAM_TAG_ARRAYBASE_SIZE = 8;
+const uint8_t BAM_TAG_TAGSIZE        = 2;
+const uint8_t BAM_TAG_TYPESIZE       = 1;
+const uint8_t BAM_TAG_ARRAYBASE_SIZE = 8;
  
  // DNA bases
  const char* const BAM_DNA_LOOKUP = "=ACMGRSVTWYHKDBN";
-const unsigned char BAM_BASECODE_EQUAL = 0;
-const unsigned char BAM_BASECODE_A     = 1;
-const unsigned char BAM_BASECODE_C     = 2;
-const unsigned char BAM_BASECODE_G     = 4;
-const unsigned char BAM_BASECODE_T     = 8;
-const unsigned char BAM_BASECODE_N     = 15;
+const uint8_t BAM_BASECODE_EQUAL = 0;
+const uint8_t BAM_BASECODE_A     = 1;
+const uint8_t BAM_BASECODE_C     = 2;
+const uint8_t BAM_BASECODE_M     = 3;
+const uint8_t BAM_BASECODE_G     = 4;
+const uint8_t BAM_BASECODE_R     = 5;
+const uint8_t BAM_BASECODE_S     = 6;
+const uint8_t BAM_BASECODE_V     = 7;
+const uint8_t BAM_BASECODE_T     = 8;
+const uint8_t BAM_BASECODE_W     = 9;
+const uint8_t BAM_BASECODE_Y     = 10;
+const uint8_t BAM_BASECODE_H     = 11;
+const uint8_t BAM_BASECODE_K     = 12;
+const uint8_t BAM_BASECODE_D     = 13;
+const uint8_t BAM_BASECODE_B     = 14;
+const uint8_t BAM_BASECODE_N     = 15;
  
-const char BAM_DNA_EQUAL   = '=';
-const char BAM_DNA_A       = 'A';
-const char BAM_DNA_C       = 'C';
-const char BAM_DNA_G       = 'G';
-const char BAM_DNA_T       = 'T';
-const char BAM_DNA_N       = 'N';
-const char BAM_DNA_DEL     = '-';
-const char BAM_DNA_PAD     = '*';
+const char BAM_DNA_EQUAL = '=';
+const char BAM_DNA_A     = 'A';
+const char BAM_DNA_C     = 'C';
+const char BAM_DNA_M     = 'M';
+const char BAM_DNA_G     = 'G';
+const char BAM_DNA_R     = 'R';
+const char BAM_DNA_S     = 'S';
+const char BAM_DNA_V     = 'V';
+const char BAM_DNA_T     = 'T';
+const char BAM_DNA_W     = 'W';
+const char BAM_DNA_Y     = 'Y';
+const char BAM_DNA_H     = 'H';
+const char BAM_DNA_K     = 'K';
+const char BAM_DNA_D     = 'D';
+const char BAM_DNA_B     = 'B';
+const char BAM_DNA_N     = 'N';
+const char BAM_DNA_DEL   = '-';
+const char BAM_DNA_PAD   = '*';
  
  // zlib constants
-const int GZIP_ID1   = 31;
-const int GZIP_ID2   = 139;
-const int CM_DEFLATE = 8;
-const int FLG_FEXTRA = 4;
-const int OS_UNKNOWN = 255;
-const int BGZF_XLEN  = 6;
-const int BGZF_ID1   = 66;
-const int BGZF_ID2   = 67;
-const int BGZF_LEN   = 2;
-const int GZIP_WINDOW_BITS    = -15;
-const int Z_DEFAULT_MEM_LEVEL = 8;
+const char GZIP_ID1   = 31;
+const char GZIP_ID2   = 139;
+const char CM_DEFLATE = 8;
+const char FLG_FEXTRA = 4;
+const char OS_UNKNOWN = 255;
+const char BGZF_XLEN  = 6;
+const char BGZF_ID1   = 66;
+const char BGZF_ID2   = 67;
+const char BGZF_LEN   = 2;
+const int8_t GZIP_WINDOW_BITS = -15;
+const int8_t Z_DEFAULT_MEM_LEVEL = 8;
  
  // BZGF constants
-const int BGZF_BLOCK_HEADER_LENGTH = 18;
-const int BGZF_BLOCK_FOOTER_LENGTH = 8;
-const int BGZF_MAX_BLOCK_SIZE      = 65536;
-const int BGZF_DEFAULT_BLOCK_SIZE  = 65536;
+const uint8_t  BGZF_BLOCK_HEADER_LENGTH  = 18;
+const uint8_t  BGZF_BLOCK_FOOTER_LENGTH  = 8;
+const uint32_t BGZF_MAX_BLOCK_SIZE       = 65536;
+const uint32_t BGZF_DEFAULT_BLOCK_SIZE   = 65536;
  
  } // namespace Constants
  
diff --git a/src/api/BamIndex.h b/src/api/BamIndex.h

index a52922e76b6d3f0bc4272ba142b7215a02b9c765..b202e921ff9d13c56796469abef5550486241430 100644 (file)
--- a/src/api/BamIndex.h
+++ b/src/api/BamIndex.h
@@ -2,7 +2,7 @@
  // BamIndex.h (c) 2009 Derek Barnett
  // Marth Lab, Department of Biology, Boston College
  // ---------------------------------------------------------------------------
-// Last modified: 5 April 2011 (DB)
+// Last modified: 6 October 2011 (DB)
  // ---------------------------------------------------------------------------
  // Provides basic BAM index interface
  // ***************************************************************************
@@ -56,22 +56,36 @@ class API_EXPORT BamIndex {
      // index interface
      public:
          // builds index from associated BAM file & writes out to index file
-        virtual bool Create(void) =0; // creates index file from BAM file
+        virtual bool Create(void) =0;
+
+        // returns a human-readable description of the last error encountered
+        std::string GetErrorString(void) { return m_errorString; }
+
          // returns whether reference has alignments or no
          virtual bool HasAlignments(const int& referenceID) const =0;
+
          // attempts to use index data to jump to @region, returns success/fail
          // a "successful" jump indicates no error, but not whether this region has data
          //   * thus, the method sets a flag to indicate whether there are alignments
          //     available after the jump position
          virtual bool Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion) =0;
+
          // loads existing data from file into memory
          virtual bool Load(const std::string& filename) =0;
+
          // change the index caching behavior
          virtual void SetCacheMode(const BamIndex::IndexCacheMode& mode) =0;
  
+    // internal methods
+    protected:
+        void SetErrorString(const std::string& where, const std::string& what) {
+            m_errorString = where + ": " + what;
+        }
+
      // data members
      protected:
-        Internal::BamReaderPrivate* m_reader; // copy, not ownedprivate:
+        Internal::BamReaderPrivate* m_reader; // copy, not owned
+        std::string m_errorString;
  };
  
  } // namespace BamTools
diff --git a/src/api/BamMultiReader.cpp b/src/api/BamMultiReader.cpp

index 00798714b5e228fbddb5ef2934da6feb054cd87a..562b89964bdc3b98e5810c0c59a89dd0a786b218 100644 (file)
--- a/src/api/BamMultiReader.cpp
+++ b/src/api/BamMultiReader.cpp
@@ -2,7 +2,7 @@
  // BamMultiReader.cpp (c) 2010 Erik Garrison, Derek Barnett
  // Marth Lab, Department of Biology, Boston College
  // ---------------------------------------------------------------------------
-// Last modified: 1 October 2011 (DB)
+// Last modified: 7 October 2011 (DB)
  // ---------------------------------------------------------------------------
  // Convenience class for reading multiple BAM files.
  //
@@ -46,8 +46,8 @@ BamMultiReader::~BamMultiReader(void) {
  
      \sa CloseFile(), IsOpen(), Open(), BamReader::Close()
  */
-void BamMultiReader::Close(void) {
-    d->Close();
+bool BamMultiReader::Close(void) {
+    return d->Close();
  }
  
  /*! \fn void BamMultiReader::CloseFile(const std::string& filename)
@@ -57,8 +57,8 @@ void BamMultiReader::Close(void) {
  
      \sa Close(), IsOpen(), Open(), BamReader::Close()
  */
-void BamMultiReader::CloseFile(const std::string& filename) {
-    d->CloseFile(filename);
+bool BamMultiReader::CloseFile(const std::string& filename) {
+    return d->CloseFile(filename);
  }
  
  /*! \fn bool BamMultiReader::CreateIndexes(const BamIndex::IndexType& type)
@@ -86,6 +86,11 @@ const std::vector<std::string> BamMultiReader::Filenames(void) const {
      return d->Filenames();
  }
  
+// returns a description of the last error that occurred
+std::string BamMultiReader::GetErrorString(void) const {
+    return d->GetErrorString();
+}
+
  /*! \fn SamHeader BamMultiReader::GetHeader(void) const
      \brief Returns unified SAM-format header for all files
  
diff --git a/src/api/BamMultiReader.h b/src/api/BamMultiReader.h

index 959048c27f1f6dea1ee5fc5522abd53ea019fb90..1b1c27021bc457962d17160b019619932d4ecee6 100644 (file)
--- a/src/api/BamMultiReader.h
+++ b/src/api/BamMultiReader.h
@@ -2,7 +2,7 @@
  // BamMultiReader.h (c) 2010 Erik Garrison, Derek Barnett
  // Marth Lab, Department of Biology, Boston College
  // ---------------------------------------------------------------------------
-// Last modified: 1 October 2011 (DB)
+// Last modified: 7 October 2011 (DB)
  // ---------------------------------------------------------------------------
  // Convenience class for reading multiple BAM files.
  // ***************************************************************************
@@ -38,9 +38,9 @@ class API_EXPORT BamMultiReader {
          // ----------------------
  
          // closes all open BAM files
-        void Close(void);
+        bool Close(void);
          // close only the requested BAM file
-        void CloseFile(const std::string& filename);
+        bool CloseFile(const std::string& filename);
          // returns list of filenames for all open BAM files
          const std::vector<std::string> Filenames(void) const;
          // returns true if multireader has any open BAM files
@@ -100,6 +100,13 @@ class API_EXPORT BamMultiReader {
          // changes the caching behavior of the index data
          void SetIndexCacheMode(const BamIndex::IndexCacheMode& mode);
  
+        // ----------------------
+        // error handling
+        // ----------------------
+
+        // returns a human-readable description of the last error that occurred
+        std::string GetErrorString(void) const;
+
      // private implementation
      private:
          Internal::BamMultiReaderPrivate* d;
diff --git a/src/api/BamReader.cpp b/src/api/BamReader.cpp

index 4b25b15960c40d351bf7786de75637289ea0ee18..19dd135e7704b844b6ef46a2fb568c1dedf1df90 100644 (file)
--- a/src/api/BamReader.cpp
+++ b/src/api/BamReader.cpp
@@ -2,7 +2,7 @@
  // BamReader.cpp (c) 2009 Derek Barnett, Michael Str�mberg
  // Marth Lab, Department of Biology, Boston College
  // ---------------------------------------------------------------------------
-// Last modified: 4 March 2011 (DB)
+// Last modified: 7 October 2011 (DB)
  // ---------------------------------------------------------------------------
  // Provides read access to BAM files.
  // ***************************************************************************
@@ -38,15 +38,16 @@ BamReader::~BamReader(void) {
      d = 0;
  }
  
-/*! \fn void BamReader::Close(void)
+/*! \fn bool BamReader::Close(void)
      \brief Closes the current BAM file.
  
      Also clears out all header and reference data.
  
+    \return \c true if file closed OK
      \sa IsOpen(), Open()
  */
-void BamReader::Close(void) {
-    d->Close();
+bool BamReader::Close(void) {
+    return d->Close();
  }
  
  /*! \fn bool BamReader::CreateIndex(const BamIndex::IndexType& type)
@@ -60,6 +61,10 @@ bool BamReader::CreateIndex(const BamIndex::IndexType& type) {
      return d->CreateIndex(type);
  }
  
+string BamReader::GetErrorString(void) const {
+    return d->GetErrorString();
+}
+
  /*! \fn const std::string BamReader::GetFilename(void) const
      \brief Returns name of current BAM file.
  
diff --git a/src/api/BamReader.h b/src/api/BamReader.h

index 836fbf5e3cbac860f62916c3e717c72618089a7b..ce04af65a9edbd364c323dba22acde565dd3ea0c 100644 (file)
--- a/src/api/BamReader.h
+++ b/src/api/BamReader.h
@@ -2,7 +2,7 @@
  // BamReader.h (c) 2009 Derek Barnett, Michael Str�mberg\r
  // Marth Lab, Department of Biology, Boston College\r
  // ---------------------------------------------------------------------------\r
-// Last modified: 4 March 2011 (DB)\r
+// Last modified: 7 October 2011 (DB)\r
  // ---------------------------------------------------------------------------\r
  // Provides read access to BAM files.\r
  // ***************************************************************************\r
@@ -37,7 +37,7 @@ class API_EXPORT BamReader {
          // ----------------------\r
  \r
          // closes the current BAM file\r
-        void Close(void);\r
+        bool Close(void);\r
          // returns filename of current BAM file\r
          const std::string GetFilename(void) const;\r
          // returns true if a BAM file is open for reading\r
@@ -102,6 +102,13 @@ class API_EXPORT BamReader {
          // changes the caching behavior of the index data\r
          void SetIndexCacheMode(const BamIndex::IndexCacheMode& mode);\r
  \r
+        // ----------------------\r
+        // error handling\r
+        // ----------------------\r
+\r
+        // returns a human-readable description of the last error that occurred\r
+        std::string GetErrorString(void) const;\r
+\r
      // deprecated methods\r
      public:\r
          // returns true if index data is available\r
diff --git a/src/api/BamWriter.cpp b/src/api/BamWriter.cpp

index 8bf6f18c5caf31c404d88cb24ca2020996a128e7..3912245a3f6034180716b79875e7fdd5fa566a0d 100644 (file)
--- a/src/api/BamWriter.cpp
+++ b/src/api/BamWriter.cpp
@@ -2,7 +2,7 @@
  // BamWriter.cpp (c) 2009 Michael Str�mberg, Derek Barnett\r
  // Marth Lab, Department of Biology, Boston College\r
  // ---------------------------------------------------------------------------\r
-// Last modified: 4 March 2011 (DB)\r
+// Last modified: 4 October 2011 (DB)\r
  // ---------------------------------------------------------------------------\r
  // Provides the basic functionality for producing BAM files\r
  // ***************************************************************************\r
@@ -13,8 +13,6 @@
  #include <api/internal/BamWriter_p.h>\r
  using namespace BamTools;\r
  using namespace BamTools::Internal;\r
-\r
-#include <iostream>\r
  using namespace std;\r
  \r
  /*! \class BamTools::BamWriter\r
@@ -57,6 +55,11 @@ void BamWriter::Close(void) {
      d->Close();\r
  }\r
  \r
+// returns a human-readable description of the last error that occurred\r
+std::string BamWriter::GetErrorString(void) const {\r
+    return d->GetErrorString();\r
+}\r
+\r
  /*! \fn bool BamWriter::IsOpen(void) const\r
      \brief Returns \c true if BAM file is open for writing.\r
      \sa Open()\r
@@ -115,11 +118,11 @@ bool BamWriter::Open(const std::string& filename,
      \param alignment BamAlignment record to save\r
      \sa BamReader::GetNextAlignment(), BamReader::GetNextAlignmentCore()\r
  */\r
-void BamWriter::SaveAlignment(const BamAlignment& alignment) {\r
-    d->SaveAlignment(alignment);\r
+bool BamWriter::SaveAlignment(const BamAlignment& alignment) {\r
+    return d->SaveAlignment(alignment);\r
  }\r
  \r
-/*! \fn void BamWriter::SetCompressionMode(const CompressionMode& compressionMode)\r
+/*! \fn void BamWriter::SetCompressionMode(const BamWriter::CompressionMode& compressionMode)\r
      \brief Sets the output compression mode.\r
  \r
      Default mode is BamWriter::Compressed.\r
@@ -137,6 +140,6 @@ void BamWriter::SaveAlignment(const BamAlignment& alignment) {
      \param compressionMode desired output compression behavior\r
      \sa IsOpen(), Open()\r
  */\r
-void BamWriter::SetCompressionMode(const CompressionMode& compressionMode) {\r
+void BamWriter::SetCompressionMode(const BamWriter::CompressionMode& compressionMode) {\r
      d->SetWriteCompressed( compressionMode == BamWriter::Compressed );\r
  }\r
diff --git a/src/api/BamWriter.h b/src/api/BamWriter.h

index 56ac30142949c034c209b2e872e70aed9bebb3fa..5e8d21f22fd7a53151cf8366a5d7cd091bbfc5fc 100644 (file)
--- a/src/api/BamWriter.h
+++ b/src/api/BamWriter.h
@@ -2,7 +2,7 @@
  // BamWriter.h (c) 2009 Michael Str�mberg, Derek Barnett\r
  // Marth Lab, Department of Biology, Boston College\r
  // ---------------------------------------------------------------------------\r
-// Last modified: 4 March 2011 (DB)\r
+// Last modified: 5 October 2011 (DB)\r
  // ---------------------------------------------------------------------------\r
  // Provides the basic functionality for producing BAM files\r
  // ***************************************************************************\r
@@ -25,9 +25,11 @@ namespace Internal {
  \r
  class API_EXPORT BamWriter {\r
  \r
-    public: enum CompressionMode { Compressed = 0\r
-                                 , Uncompressed\r
-                                 };\r
+    // enums\r
+    public:\r
+        enum CompressionMode { Compressed = 0\r
+                             , Uncompressed\r
+                             };\r
  \r
      // ctor & dtor\r
      public:\r
@@ -38,6 +40,8 @@ class API_EXPORT BamWriter {
      public:\r
          //  closes the current BAM file\r
          void Close(void);\r
+        // returns a human-readable description of the last error that occurred\r
+        std::string GetErrorString(void) const;\r
          // returns true if BAM file is open for writing\r
          bool IsOpen(void) const;\r
          // opens a BAM file for writing\r
@@ -49,9 +53,9 @@ class API_EXPORT BamWriter {
                    const SamHeader& samHeader,\r
                    const RefVector& referenceSequences);\r
          // saves the alignment to the alignment archive\r
-        void SaveAlignment(const BamAlignment& alignment);\r
+        bool SaveAlignment(const BamAlignment& alignment);\r
          // sets the output compression mode\r
-        void SetCompressionMode(const CompressionMode& compressionMode);\r
+        void SetCompressionMode(const BamWriter::CompressionMode& compressionMode);\r
  \r
      // private implementation\r
      private:\r
diff --git a/src/api/CMakeLists.txt b/src/api/CMakeLists.txt

index 7306f568f3a1b273f9ef1e4a4701fc1e4a37725d..62d2e7f21c8b963e2d341abb3e48d344c76a20c0 100644 (file)
--- a/src/api/CMakeLists.txt
+++ b/src/api/CMakeLists.txt
@@ -25,6 +25,7 @@ set( BamToolsAPISources
          SamReadGroupDictionary.cpp
          SamSequence.cpp
          SamSequenceDictionary.cpp
+        internal/BamException_p.cpp
          internal/BamHeader_p.cpp
          internal/BamIndexFactory_p.cpp
          internal/BamMultiReader_p.cpp
diff --git a/src/api/SamHeader.cpp b/src/api/SamHeader.cpp

index 77e1c6d4b5fee014aadb6e6e702f4cc60f9f8867..363fb5ce18fa1579cee6885351a1eb2d969cad44 100644 (file)
--- a/src/api/SamHeader.cpp
+++ b/src/api/SamHeader.cpp
@@ -2,13 +2,14 @@
  // SamHeader.cpp (c) 2010 Derek Barnett
  // Marth Lab, Department of Biology, Boston College
  // ---------------------------------------------------------------------------
-// Last modified: 19 April 2011 (DB)
+// Last modified: 6 October 2011 (DB)
  // ---------------------------------------------------------------------------
  // Provides direct read/write access to the SAM header data fields.
  // ***************************************************************************
  
  #include <api/SamConstants.h>
  #include <api/SamHeader.h>
+#include <api/internal/BamException_p.h>
  #include <api/internal/SamFormatParser_p.h>
  #include <api/internal/SamFormatPrinter_p.h>
  #include <api/internal/SamHeaderValidator_p.h>
@@ -63,8 +64,7 @@ SamHeader::SamHeader(const std::string& headerText)
      , SortOrder(Constants::SAM_HD_SORTORDER_UNKNOWN)
      , GroupOrder("")
  {
-    SamFormatParser parser(*this);
-    parser.Parse(headerText);
+    SetHeaderText(headerText);
  }
  
  /*! \fn SamHeader::SamHeader(const SamHeader& other)
@@ -88,6 +88,8 @@ SamHeader::~SamHeader(void) { }
      \brief Clears all header contents.
  */
  void SamHeader::Clear(void) {
+
+    // clear SAM header components
      Version.clear();
      SortOrder.clear();
      GroupOrder.clear();
@@ -95,6 +97,23 @@ void SamHeader::Clear(void) {
      ReadGroups.Clear();
      Programs.Clear();
      Comments.clear();
+
+    // clear error string
+    m_errorString.clear();
+}
+
+/*! \fn std::string SamHeader::GetErrorString(void) const
+    \brief Returns human-readable description of last error encountered
+*/
+std::string SamHeader::GetErrorString(void) const {
+    return m_errorString;
+}
+
+/*! \fn bool SamHeader::HasError(void) const
+    \brief Returns \c true if header encountered an error
+*/
+bool SamHeader::HasError(void) const {
+    return (!m_errorString.empty());
  }
  
  /*! \fn bool SamHeader::HasVersion(void) const
@@ -149,12 +168,32 @@ bool SamHeader::HasComments(void) const {
  /*! \fn bool SamHeader::IsValid(bool verbose = false) const
      \brief Checks header contents for required data and proper formatting.
      \param verbose If set to true, validation errors & warnings will be printed to stderr.
-                   Otherwise, output is suppressed and only validation check occurs.
+                   Otherwise, messages are available through SamHeader::GetErrorString().
      \return \c true if SAM header is well-formed
  */
  bool SamHeader::IsValid(bool verbose) const {
+
      SamHeaderValidator validator(*this);
-    return validator.Validate(verbose);
+
+    // if SAM header is valid, return success
+    if ( validator.Validate() )
+        return true;
+
+    // otherwiser
+    else {
+
+        // print messages to stderr
+        if ( verbose )
+            validator.PrintMessages(std::cerr);
+
+        // or catch in local error string
+        else {
+            stringstream errorStream("");
+            validator.PrintMessages(errorStream);
+            m_errorString = errorStream.str();
+        }
+        return false;
+    }
  }
  
  /*! \fn void SamHeader::SetHeaderText(const std::string& headerText)
@@ -166,9 +205,18 @@ void SamHeader::SetHeaderText(const std::string& headerText) {
      // clear prior data
      Clear();
  
-    // parse header text into data
-    SamFormatParser parser(*this);
-    parser.Parse(headerText);
+    try {
+        SamFormatParser parser(*this);
+        parser.Parse(headerText);
+    } catch ( BamException& e ) {
+
+        // clear anything parsed so far
+        // no telling what's valid and what's partially parsed
+        Clear();
+
+        // set error string
+        m_errorString = e.what();
+    }
  }
  
  /*! \fn std::string SamHeader::ToString(void) const
diff --git a/src/api/SamHeader.h b/src/api/SamHeader.h

index 3b67621ac4dca6bd94aba4293b7d944bf7eba5e8..707edbe760d23b3fc7ba0a4c52c571aa52ffb66a 100644 (file)
--- a/src/api/SamHeader.h
+++ b/src/api/SamHeader.h
@@ -2,7 +2,7 @@
  // SamHeader.h (c) 2010 Derek Barnett
  // Marth Lab, Department of Biology, Boston College
  // ---------------------------------------------------------------------------
-// Last modified: 18 April 2011 (DB)
+// Last modified: 6 October 2011 (DB)
  // ---------------------------------------------------------------------------
  // Provides direct read/write access to the SAM header data fields.
  // ***************************************************************************
@@ -28,27 +28,29 @@ struct API_EXPORT SamHeader {
  
      // query/modify entire SamHeader
      void Clear(void);                                   // clears all header contents
+    std::string GetErrorString(void) const;
+    bool HasError(void) const;
      bool IsValid(bool verbose = false) const;           // returns true if SAM header is well-formed
      void SetHeaderText(const std::string& headerText);  // replaces data fields with contents of SAM-formatted text
      std::string ToString(void) const;                   // returns the printable, SAM-formatted header text
  
      // convenience query methods
-    bool HasVersion(void) const;            // returns true if header contains format version entry
-    bool HasSortOrder(void) const;          // returns true if header contains sort order entry
-    bool HasGroupOrder(void) const;         // returns true if header contains group order entry
-    bool HasSequences(void) const;          // returns true if header contains any sequence entries
-    bool HasReadGroups(void) const;         // returns true if header contains any read group entries
-    bool HasPrograms(void) const;           // returns true if header contains any program record entries
-    bool HasComments(void) const;           // returns true if header contains comments
+    bool HasVersion(void) const;       // returns true if header contains format version entry
+    bool HasSortOrder(void) const;     // returns true if header contains sort order entry
+    bool HasGroupOrder(void) const;    // returns true if header contains group order entry
+    bool HasSequences(void) const;     // returns true if header contains any sequence entries
+    bool HasReadGroups(void) const;    // returns true if header contains any read group entries
+    bool HasPrograms(void) const;      // returns true if header contains any program record entries
+    bool HasComments(void) const;      // returns true if header contains comments
  
      // --------------
      // data members
      // --------------
  
      // header metadata (@HD line)
-    std::string Version;                    // VN:<Version>  *Required for valid SAM header, if @HD record is present*
-    std::string SortOrder;                  // SO:<SortOrder>
-    std::string GroupOrder;                 // GO:<GroupOrder>
+    std::string Version;               // VN:<Version>  *Required, if @HD record is present*
+    std::string SortOrder;             // SO:<SortOrder>
+    std::string GroupOrder;            // GO:<GroupOrder>
  
      // header sequences (@SQ entries)
      SamSequenceDictionary Sequences;
@@ -61,6 +63,9 @@ struct API_EXPORT SamHeader {
  
      // header comments (@CO entries)
      std::vector<std::string> Comments;
+
+    private:
+        mutable std::string m_errorString;
  };
  
  } // namespace BamTools
diff --git a/src/api/internal/BamException_p.cpp b/src/api/internal/BamException_p.cpp

new file mode 100644 (file)

index 0000000..38241d8
--- /dev/null
+++ b/src/api/internal/BamException_p.cpp
@@ -0,0 +1,15 @@
+// ***************************************************************************
+// BamException_p.cpp (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 5 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides a basic exception class for BamTools internals
+// ***************************************************************************
+
+#include <api/internal/BamException_p.h>
+using namespace BamTools;
+using namespace BamTools::Internal;
+using namespace std;
+
+const string BamException::SEPARATOR = ": ";
diff --git a/src/api/internal/BamException_p.h b/src/api/internal/BamException_p.h

new file mode 100644 (file)

index 0000000..5199737
--- /dev/null
+++ b/src/api/internal/BamException_p.h
@@ -0,0 +1,51 @@
+// ***************************************************************************
+// BamException_p.h (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 6 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides a basic exception class for BamTools internals
+// ***************************************************************************
+
+#ifndef BAMEXCEPTION_P_H
+#define BAMEXCEPTION_P_H
+
+//  -------------
+//  W A R N I N G
+//  -------------
+//
+// This file is not part of the BamTools API.  It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include <exception>
+#include <string>
+
+namespace BamTools {
+namespace Internal {
+
+class BamException : public std::exception {
+
+    public:
+        inline BamException(const std::string& where, const std::string& message)
+            : std::exception()
+            , m_errorString(where + SEPARATOR + message)
+        { }
+
+        inline ~BamException(void) throw() { }
+
+        inline const char* what(void) const throw() {
+            return m_errorString.c_str();
+        }
+
+    private:
+        std::string m_errorString;
+        static const std::string SEPARATOR;
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // BAMEXCEPTION_P_H
diff --git a/src/api/internal/BamHeader_p.cpp b/src/api/internal/BamHeader_p.cpp

index 48b8c0908d7f16707b8eb832e98e601a97ddfca5..6ff2f4b50575f33f530f6de00daaf70a4e75c804 100644 (file)
--- a/src/api/internal/BamHeader_p.cpp
+++ b/src/api/internal/BamHeader_p.cpp
@@ -2,24 +2,37 @@
  // BamHeader_p.cpp (c) 2010 Derek Barnett
  // Marth Lab, Department of Biology, Boston College
  // ---------------------------------------------------------------------------
-// Last modified: 21 March 2011 (DB)
+// Last modified: 6 October 2011 (DB)
  // ---------------------------------------------------------------------------
  // Provides the basic functionality for handling BAM headers.
  // ***************************************************************************
  
  #include <api/BamAux.h>
  #include <api/BamConstants.h>
+#include <api/internal/BamException_p.h>
  #include <api/internal/BamHeader_p.h>
  #include <api/internal/BgzfStream_p.h>
  using namespace BamTools;
  using namespace BamTools::Internal;
  
-#include <cstdio>
  #include <cstdlib>
  #include <cstring>
-#include <iostream>
  using namespace std;
  
+// ------------------------
+// static utility methods
+// ------------------------
+
+static inline
+bool isValidMagicNumber(const char* buffer) {
+    return ( strncmp(buffer, Constants::BAM_HEADER_MAGIC,
+                     Constants::BAM_HEADER_MAGIC_LENGTH) == 0 );
+}
+
+// --------------------------
+// BamHeader implementation
+// --------------------------
+
  // ctor
  BamHeader::BamHeader(void) { }
  
@@ -27,23 +40,17 @@ BamHeader::BamHeader(void) { }
  BamHeader::~BamHeader(void) { }
  
  // reads magic number from BGZF stream, returns true if valid
-bool BamHeader::CheckMagicNumber(BgzfStream* stream) {
+void BamHeader::CheckMagicNumber(BgzfStream* stream) {
  
      // try to read magic number
      char buffer[Constants::BAM_HEADER_MAGIC_LENGTH];
-    if ( stream->Read(buffer, Constants::BAM_HEADER_MAGIC_LENGTH) != (int)Constants::BAM_HEADER_MAGIC_LENGTH ) {
-        fprintf(stderr, "BamHeader ERROR: could not read magic number\n");
-        return false;
-    }
+    const size_t numBytesRead = stream->Read(buffer, Constants::BAM_HEADER_MAGIC_LENGTH);
+    if ( numBytesRead != (int)Constants::BAM_HEADER_MAGIC_LENGTH )
+        throw BamException("BamHeader::CheckMagicNumber", "could not read magic number");
  
      // validate magic number
-    if ( strncmp(buffer, Constants::BAM_HEADER_MAGIC, Constants::BAM_HEADER_MAGIC_LENGTH) != 0 ) {
-        fprintf(stderr, "BamHeader ERROR: invalid magic number\n");
-        return false;
-    }
-
-    // all checks out
-    return true;
+    if ( !isValidMagicNumber(buffer) )
+        throw BamException("BamHeader::CheckMagicNumber", "invalid magic number");
  }
  
  // clear SamHeader data
@@ -57,68 +64,49 @@ bool BamHeader::IsValid(void) const {
  }
  
  // load BAM header ('magic number' and SAM header text) from BGZF stream
-// returns true if all OK
-bool BamHeader::Load(BgzfStream* stream) {
+void BamHeader::Load(BgzfStream* stream) {
  
-    // cannot load if invalid stream
-    if ( stream == 0 )
-        return false;
+    // read & check magic number
+    CheckMagicNumber(stream);
  
-    // cannot load if magic number is invalid
-    if ( !CheckMagicNumber(stream) )
-        return false;
-
-    // cannot load header if cannot read header length
+    // read header (length, then actual text)
      uint32_t length(0);
-    if ( !ReadHeaderLength(stream, length) )
-        return false;
-
-    // cannot load header if cannot read header text
-    if ( !ReadHeaderText(stream, length) )
-        return false;
-
-    // otherwise, everything OK
-    return true;
+    ReadHeaderLength(stream, length);
+    ReadHeaderText(stream, length);
  }
  
  // reads SAM header text length from BGZF stream, stores it in @length
-// returns read success/fail status
-bool BamHeader::ReadHeaderLength(BgzfStream* stream, uint32_t& length) {
+void BamHeader::ReadHeaderLength(BgzfStream* stream, uint32_t& length) {
  
-    // attempt to read BAM header text length
+    // read BAM header text length
      char buffer[sizeof(uint32_t)];
-    if ( stream->Read(buffer, sizeof(uint32_t)) != sizeof(uint32_t) ) {
-        fprintf(stderr, "BamHeader ERROR: could not read header length\n");
-        return false;
-    }
+    const size_t numBytesRead = stream->Read(buffer, sizeof(uint32_t));
+    if ( numBytesRead != sizeof(uint32_t) )
+        throw BamException("BamHeader::ReadHeaderLength", "could not read header length");
  
-    // convert char buffer to length, return success
+    // convert char buffer to length
      length = BamTools::UnpackUnsignedInt(buffer);
      if ( BamTools::SystemIsBigEndian() )
          BamTools::SwapEndian_32(length);
-    return true;
  }
  
  // reads SAM header text from BGZF stream, stores in SamHeader object
-// returns read success/fail status
-bool BamHeader::ReadHeaderText(BgzfStream* stream, const uint32_t& length) {
+void BamHeader::ReadHeaderText(BgzfStream* stream, const uint32_t& length) {
  
-    // set up destination buffer
+    // read header text
      char* headerText = (char*)calloc(length + 1, 1);
+    const size_t bytesRead = stream->Read(headerText, length);
  
-    // attempt to read header text
-    const unsigned bytesRead = stream->Read(headerText, length);
-    const bool readOk = ( bytesRead == length );
-    if ( readOk )
-        m_header.SetHeaderText( (string)((const char*)headerText) );
-    else
-        fprintf(stderr, "BamHeader ERROR: could not read header text\n");
+    // if error reading, clean up buffer & throw
+    if ( bytesRead != length ) {
+        free(headerText);
+        throw BamException("BamHeader::ReadHeaderText", "could not read header text");
+    }
  
-    // clean up calloc-ed temp variable (on success or fail)
+    // otherwise, text was read OK
+    // store & cleanup
+    m_header.SetHeaderText( (string)((const char*)headerText) );
      free(headerText);
-
-    // return read success
-    return readOk;
  }
  
  // returns *copy* of SamHeader data object
diff --git a/src/api/internal/BamHeader_p.h b/src/api/internal/BamHeader_p.h

index 3738c6066177cec90924394052084aeb5886c27e..42af68ce4f34dc1a4c2b70610469f2f2d9710d21 100644 (file)
--- a/src/api/internal/BamHeader_p.h
+++ b/src/api/internal/BamHeader_p.h
@@ -2,7 +2,7 @@
  // BamHeader_p.h (c) 2010 Derek Barnett
  // Marth Lab, Department of Biology, Boston College
  // ---------------------------------------------------------------------------
-// Last modified: 26 January 2011 (DB)
+// Last modified: 6 October 2011 (DB)
  // ---------------------------------------------------------------------------
  // Provides the basic functionality for handling BAM headers.
  // ***************************************************************************
@@ -43,7 +43,7 @@ class BamHeader {
          bool IsValid(void) const;
          // load BAM header ('magic number' and SAM header text) from BGZF stream
          // returns true if all OK
-        bool Load(BgzfStream* stream);
+        void Load(BgzfStream* stream);
          // returns (editable) copy of SamHeader data object
          SamHeader ToSamHeader(void) const;
          // returns SAM-formatted string of header data
@@ -51,14 +51,12 @@ class BamHeader {
  
      // internal methods
      private:
-        // reads magic number from BGZF stream, returns true if valid
-        bool CheckMagicNumber(BgzfStream* stream);
+        // reads magic number from BGZF stream
+        void CheckMagicNumber(BgzfStream* stream);
          // reads SAM header length from BGZF stream, stores it in @length
-        // returns read success/fail status
-        bool ReadHeaderLength(BgzfStream* stream, uint32_t& length);
+        void ReadHeaderLength(BgzfStream* stream, uint32_t& length);
          // reads SAM header text from BGZF stream, stores in SamHeader object
-        // returns read success/fail status
-        bool ReadHeaderText(BgzfStream* stream, const uint32_t& length);
+        void ReadHeaderText(BgzfStream* stream, const uint32_t& length);
  
      // data members
      private:
diff --git a/src/api/internal/BamIndexFactory_p.cpp b/src/api/internal/BamIndexFactory_p.cpp

index c91f43374f1fc0318c43c46e869fe0619afa4b1d..4e9d1f28a4040aee6bc6140ffa23a61fad53f477 100644 (file)
--- a/src/api/internal/BamIndexFactory_p.cpp
+++ b/src/api/internal/BamIndexFactory_p.cpp
@@ -2,7 +2,7 @@
  // BamIndexFactory_p.cpp (c) 2011 Derek Barnett
  // Marth Lab, Department of Biology, Boston College
  // ---------------------------------------------------------------------------
-// Last modified: 5 April 2011 (DB)
+// Last modified: 6 October 2011 (DB)
  // ---------------------------------------------------------------------------
  // Provides interface for generating BamIndex implementations
  // ***************************************************************************
@@ -13,8 +13,6 @@
  #include <api/internal/BamToolsIndex_p.h>
  using namespace BamTools;
  using namespace BamTools::Internal;
-
-#include <cstdio>
  using namespace std;
  
  // generates index filename from BAM filename (depending on requested type)
@@ -26,7 +24,6 @@ const string BamIndexFactory::CreateIndexFilename(const string& bamFilename,
          case ( BamIndex::STANDARD ) : return ( bamFilename + BamStandardIndex::Extension() );
          case ( BamIndex::BAMTOOLS ) : return ( bamFilename + BamToolsIndex::Extension() );
          default :
-            cerr << "BamIndexFactory ERROR: unknown index type" << type << endl;
              return string();
      }
  }
@@ -59,7 +56,6 @@ BamIndex* BamIndexFactory::CreateIndexOfType(const BamIndex::IndexType& type,
          case ( BamIndex::STANDARD ) : return new BamStandardIndex(reader);
          case ( BamIndex::BAMTOOLS ) : return new BamToolsIndex(reader);
          default :
-            cerr << "BamIndexFactory ERROR: unknown index type " << type << endl;
              return 0;
      }
  }
@@ -72,7 +68,7 @@ const string BamIndexFactory::FileExtension(const string& filename) {
          return string();
  
      // look for last dot in filename
-    size_t lastDotPosition = filename.find_last_of('.');
+    const size_t lastDotPosition = filename.find_last_of('.');
  
      // if none found, return empty string
      if ( lastDotPosition == string::npos )
@@ -88,6 +84,10 @@ const string BamIndexFactory::FileExtension(const string& filename) {
  const string BamIndexFactory::FindIndexFilename(const string& bamFilename,
                                                  const BamIndex::IndexType& preferredType)
  {
+    // skip if BAM filename provided is empty
+    if ( bamFilename.empty() )
+        return string();
+
      // try to find index of preferred type first
      // return index filename if found
      string indexFilename = CreateIndexFilename(bamFilename, preferredType);
diff --git a/src/api/internal/BamMultiMerger_p.h b/src/api/internal/BamMultiMerger_p.h

index 9248df0eb39a6dca3d2ca99e8b08e5f6b7315259..2b7b110ad1ae4967ef086273f32650ea4fc6d848 100644 (file)
--- a/src/api/internal/BamMultiMerger_p.h
+++ b/src/api/internal/BamMultiMerger_p.h
@@ -121,6 +121,10 @@ class MultiMerger : public IMultiMerger {
  
  template <typename Compare>
  inline void MultiMerger<Compare>::Add(MergeItem item) {
+
+    // N.B. - any future custom Compare types must define this method
+    //        see algorithms/Sort.h
+
      if ( CompareType::UsesCharData() )
          item.Alignment->BuildCharData();
      m_data.insert(item);
diff --git a/src/api/internal/BamMultiReader_p.cpp b/src/api/internal/BamMultiReader_p.cpp

index 292c287352e73dedc7fdb713ee248e26833c2e22..f92258ddb856839d8c68721ae0af77990a8d101a 100644 (file)
--- a/src/api/internal/BamMultiReader_p.cpp
+++ b/src/api/internal/BamMultiReader_p.cpp
@@ -2,7 +2,7 @@
  // BamMultiReader_p.cpp (c) 2010 Derek Barnett, Erik Garrison
  // Marth Lab, Department of Biology, Boston College
  // ---------------------------------------------------------------------------
-// Last modified: 3 October 2011 (DB)
+// Last modified: 7 October 2011 (DB)
  // ---------------------------------------------------------------------------
  // Functionality for simultaneously reading multiple BAM files
  // *************************************************************************
@@ -29,24 +29,45 @@ BamMultiReaderPrivate::BamMultiReaderPrivate(void)
  
  // dtor
  BamMultiReaderPrivate::~BamMultiReaderPrivate(void) {
-
-    // close all open BAM readers (& clean up cache)
      Close();
  }
  
  // close all BAM files
-void BamMultiReaderPrivate::Close(void) {
-    CloseFiles( Filenames() );
+bool BamMultiReaderPrivate::Close(void) {
+
+    m_errorString.clear();
+
+    if ( CloseFiles(Filenames()) )
+        return true;
+    else {
+        const string currentError = m_errorString;
+        const string message = string("error encountered while closing all files: \n\t") + currentError;
+        SetErrorString("BamMultiReader::Close", message);
+        return false;
+    }
  }
  
  // close requested BAM file
-void BamMultiReaderPrivate::CloseFile(const string& filename) {
+bool BamMultiReaderPrivate::CloseFile(const string& filename) {
+
+    m_errorString.clear();
+
      vector<string> filenames(1, filename);
-    CloseFiles(filenames);
+    if ( CloseFiles(filenames) )
+        return true;
+    else {
+        const string currentError = m_errorString;
+        const string message = string("error while closing file: ") + filename + "\n" + currentError;
+        SetErrorString("BamMultiReader::CloseFile", message);
+        return false;
+    }
  }
  
  // close requested BAM files
-void BamMultiReaderPrivate::CloseFiles(const vector<string>& filenames) {
+bool BamMultiReaderPrivate::CloseFiles(const vector<string>& filenames) {
+
+    bool errorsEncountered = false;
+    m_errorString.clear();
  
      // iterate over filenames
      vector<string>::const_iterator filesIter = filenames.begin();
@@ -70,7 +91,12 @@ void BamMultiReaderPrivate::CloseFiles(const vector<string>& filenames) {
                  m_alignmentCache->Remove(reader);
  
                  // clean up reader & its alignment
-                reader->Close();
+                if ( !reader->Close() ) {
+                    m_errorString.append(1, '\t');
+                    m_errorString.append(reader->GetErrorString());
+                    m_errorString.append(1, '\n');
+                    errorsEncountered = true;
+                }
                  delete reader;
                  reader = 0;
  
@@ -95,12 +121,16 @@ void BamMultiReaderPrivate::CloseFiles(const vector<string>& filenames) {
          delete m_alignmentCache;
          m_alignmentCache = 0;
      }
+
+    // return whether all readers closed OK
+    return !errorsEncountered;
  }
  
  // creates index files for BAM files that don't have them
  bool BamMultiReaderPrivate::CreateIndexes(const BamIndex::IndexType& type) {
  
-    bool result = true;
+    bool errorsEncountered = false;
+    m_errorString.clear();
  
      // iterate over readers
      vector<MergeItem>::iterator itemIter = m_readers.begin();
@@ -111,11 +141,24 @@ bool BamMultiReaderPrivate::CreateIndexes(const BamIndex::IndexType& type) {
          if ( reader == 0 ) continue;
  
          // if reader doesn't have an index, create one
-        if ( !reader->HasIndex() )
-            result &= reader->CreateIndex(type);
+        if ( !reader->HasIndex() ) {
+            if ( !reader->CreateIndex(type) ) {
+                m_errorString.append(1, '\t');
+                m_errorString.append(reader->GetErrorString());
+                m_errorString.append(1, '\n');
+                errorsEncountered = true;
+            }
+        }
      }
  
-    return result;
+    // check for errors encountered before returning success/fail
+    if ( errorsEncountered ) {
+        const string currentError = m_errorString;
+        const string message = string("error while creating index files: ") + "\n" + currentError;
+        SetErrorString("BamMultiReader::CreateIndexes", message);
+        return false;
+    } else
+        return true;
  }
  
  IMultiMerger* BamMultiReaderPrivate::CreateAlignmentCache(void) const {
@@ -159,6 +202,10 @@ const vector<string> BamMultiReaderPrivate::Filenames(void) const {
      return filenames;
  }
  
+string BamMultiReaderPrivate::GetErrorString(void) const {
+    return m_errorString;
+}
+
  SamHeader BamMultiReaderPrivate::GetHeader(void) const {
      const string& text = GetHeaderText();
      return SamHeader(text);
@@ -225,48 +272,42 @@ bool BamMultiReaderPrivate::GetNextAlignmentCore(BamAlignment& al) {
  int BamMultiReaderPrivate::GetReferenceCount(void) const {
  
      // handle empty multireader
-    if ( m_readers.empty() )
-        return 0;
+    if ( m_readers.empty() ) return 0;
  
      // return reference count from first reader
      const MergeItem& item = m_readers.front();
      const BamReader* reader = item.Reader;
-    if ( reader ) return reader->GetReferenceCount();
-
-    // invalid reader
-    return 0;
+    if ( reader == 0 ) return 0;
+    else
+        return reader->GetReferenceCount();
  }
  
  // returns vector of reference objects
  const RefVector BamMultiReaderPrivate::GetReferenceData(void) const {
  
      // handle empty multireader
-    if ( m_readers.empty() )
-        return RefVector();
+    if ( m_readers.empty() ) return RefVector();
  
      // return reference data from first BamReader
      const MergeItem& item = m_readers.front();
      const BamReader* reader = item.Reader;
-    if ( reader ) return reader->GetReferenceData();
-
-    // invalid reader
-    return RefVector();
+    if ( reader == 0 ) return RefVector();
+    else
+        return reader->GetReferenceData();
  }
  
  // returns refID from reference name
  int BamMultiReaderPrivate::GetReferenceID(const string& refName) const {
  
      // handle empty multireader
-    if ( m_readers.empty() )
-        return -1;
+    if ( m_readers.empty() ) return -1;
  
      // return reference ID from first BamReader
      const MergeItem& item = m_readers.front();
      const BamReader* reader = item.Reader;
-    if ( reader ) return reader->GetReferenceID(refName);
-
-    // invalid reader
-    return -1;
+    if ( reader == 0 ) return -1;
+    else
+        return reader->GetReferenceID(refName);
  }
  // ---------------------------------------------------------------------------------------
  
@@ -330,11 +371,8 @@ bool BamMultiReaderPrivate::Jump(int refID, int position) {
          BamReader* reader = item.Reader;
          if ( reader == 0 ) continue;
  
-        // attempt jump() on each
-        if ( !reader->Jump(refID, position) ) {
-            cerr << "BamMultiReader ERROR: could not jump " << reader->GetFilename()
-                 << " to " << refID << ":" << position << endl;
-        }
+        // jump in each BamReader to position of interest
+        reader->Jump(refID, position);
      }
  
      // returns status of cache update
@@ -344,7 +382,8 @@ bool BamMultiReaderPrivate::Jump(int refID, int position) {
  // locate (& load) index files for BAM readers that don't already have one loaded
  bool BamMultiReaderPrivate::LocateIndexes(const BamIndex::IndexType& preferredType) {
  
-    bool result = true;
+    bool errorsEncountered = false;
+    m_errorString.clear();
  
      // iterate over readers
      vector<MergeItem>::iterator readerIter = m_readers.begin();
@@ -355,25 +394,41 @@ bool BamMultiReaderPrivate::LocateIndexes(const BamIndex::IndexType& preferredTy
          if ( reader == 0 ) continue;
  
          // if reader has no index, try to locate one
-        if ( !reader->HasIndex() )
-            result &= reader->LocateIndex(preferredType);
+        if ( !reader->HasIndex() ) {
+            if ( !reader->LocateIndex(preferredType) ) {
+                m_errorString.append(1, '\t');
+                m_errorString.append(reader->GetErrorString());
+                m_errorString.append(1, '\n');
+                errorsEncountered = true;
+            }
+        }
      }
  
-    return result;
+    // check for errors encountered before returning success/fail
+    if ( errorsEncountered ) {
+        const string currentError = m_errorString;
+        const string message = string("error while locating index files: ") + "\n" + currentError;
+        SetErrorString("BamMultiReader::LocatingIndexes", message);
+        return false;
+    } else
+        return true;
  }
  
  // opens BAM files
  bool BamMultiReaderPrivate::Open(const vector<string>& filenames) {
  
-    bool openedOk = true;
-
-    // put all current readers back at beginning
-    openedOk &= Rewind();
+    m_errorString.clear();
  
      // put all current readers back at beginning (refreshes alignment cache)
-    Rewind();
+    if ( !Rewind() ) {
+        const string currentError = m_errorString;
+        const string message = string("unable to rewind existing readers: \n\t") + currentError;
+        SetErrorString("BamMultiReader::Open", message);
+        return false;
+    }
  
      // iterate over filenames
+    bool errorsEncountered = false;
      vector<string>::const_iterator filenameIter = filenames.begin();
      vector<string>::const_iterator filenameEnd  = filenames.end();
      for ( ; filenameIter != filenameEnd; ++filenameIter ) {
@@ -388,27 +443,48 @@ bool BamMultiReaderPrivate::Open(const vector<string>& filenames) {
          if ( readerOpened )
              m_readers.push_back( MergeItem(reader, new BamAlignment) );
  
-        // otherwise clean up invalid reader
-        else delete reader;
+        // otherwise store error & clean up invalid reader
+        else {
+            m_errorString.append(1, '\t');
+            m_errorString += string("unable to open file: ") + filename;
+            m_errorString.append(1, '\n');
+            errorsEncountered = true;
  
-        // update method return status
-        openedOk &= readerOpened;
+            delete reader;
+            reader = 0;
+        }
      }
  
-    // if more than one reader open, check for consistency
-    if ( m_readers.size() > 1 )
-        openedOk &= ValidateReaders();
+    // check for errors while opening
+    if ( errorsEncountered ) {
+        const string currentError = m_errorString;
+        const string message = string("unable to open all files: \t\n") + currentError;
+        SetErrorString("BamMultiReader::Open", message);
+        return false;
+    }
  
-    // update alignment cache
-    openedOk &= UpdateAlignmentCache();
+    // check for BAM file consistency
+    if ( !ValidateReaders() ) {
+        const string currentError = m_errorString;
+        const string message = string("unable to open inconsistent files: \t\n") + currentError;
+        SetErrorString("BamMultiReader::Open", message);
+        return false;
+    }
  
-    // return success
-    return openedOk;
+    // update alignment cache
+    return UpdateAlignmentCache();
  }
  
  bool BamMultiReaderPrivate::OpenFile(const std::string& filename) {
      vector<string> filenames(1, filename);
-    return Open(filenames);
+    if ( Open(filenames) )
+        return true;
+    else {
+        const string currentError = m_errorString;
+        const string message = string("could not open file: ") + filename + "\n\t" + currentError;
+        SetErrorString("BamMultiReader::OpenFile", message);
+        return false;
+    }
  }
  
  bool BamMultiReaderPrivate::OpenIndexes(const vector<string>& indexFilenames) {
@@ -418,11 +494,14 @@ bool BamMultiReaderPrivate::OpenIndexes(const vector<string>& indexFilenames) {
      //       first reader without an index?
  
      // make sure same number of index filenames as readers
-    if ( m_readers.size() != indexFilenames.size() )
+    if ( m_readers.size() != indexFilenames.size() ) {
+        const string message("size of index file list does not match current BAM file count");
+        SetErrorString("BamMultiReader::OpenIndexes", message);
          return false;
+    }
  
-    // init result flag
-    bool result = true;
+    bool errorsEncountered = false;
+    m_errorString.clear();
  
      // iterate over BamReaders
      vector<string>::const_iterator indexFilenameIter = indexFilenames.begin();
@@ -436,7 +515,12 @@ bool BamMultiReaderPrivate::OpenIndexes(const vector<string>& indexFilenames) {
          // open index filename on reader
          if ( reader ) {
              const string& indexFilename = (*indexFilenameIter);
-            result &= reader->OpenIndex(indexFilename);
+            if ( !reader->OpenIndex(indexFilename) ) {
+                m_errorString.append(1, '\t');
+                m_errorString += reader->GetErrorString();
+                m_errorString.append(1, '\n');
+                errorsEncountered = true;
+            }
          }
  
          // increment filename iterator, skip if no more index files to open
@@ -444,13 +528,16 @@ bool BamMultiReaderPrivate::OpenIndexes(const vector<string>& indexFilenames) {
              break;
      }
  
-    // TODO: any validation needed here??
-
      // return success/fail
-    return result;
+    if ( errorsEncountered ) {
+        const string currentError = m_errorString;
+        const string message = string("could not open all index files: \n\t") + currentError;
+        SetErrorString("BamMultiReader::OpenIndexes", message);
+        return false;
+    } else
+        return true;
  }
  
-
  bool BamMultiReaderPrivate::PopNextCachedAlignment(BamAlignment& al, const bool needCharData) {
  
      // skip if no alignments available
@@ -475,8 +562,6 @@ bool BamMultiReaderPrivate::PopNextCachedAlignment(BamAlignment& al, const bool
  
      // load next alignment from reader & store in cache
      SaveNextAlignment(reader, alignment);
-
-    // return success
      return true;
  }
  
@@ -485,7 +570,9 @@ bool BamMultiReaderPrivate::Rewind(void) {
  
      // attempt to rewind files
      if ( !RewindReaders() ) {
-        cerr << "BamMultiReader ERROR: could not rewind file(s) successfully";
+        const string currentError = m_errorString;
+        const string message = string("could not rewind readers: \n\t") + currentError;
+        SetErrorString("BamMultiReader::Rewind", message);
          return false;
      }
  
@@ -496,7 +583,8 @@ bool BamMultiReaderPrivate::Rewind(void) {
  // returns BAM file pointers to beginning of alignment data
  bool BamMultiReaderPrivate::RewindReaders(void) {
  
-    bool result = true;
+    m_errorString.clear();
+    bool errorsEncountered = false;
  
      // iterate over readers
      vector<MergeItem>::iterator readerIter = m_readers.begin();
@@ -507,19 +595,32 @@ bool BamMultiReaderPrivate::RewindReaders(void) {
          if ( reader == 0 ) continue;
  
          // attempt rewind on BamReader
-        result &= reader->Rewind();
+        if ( !reader->Rewind() ) {
+            m_errorString.append(1, '\t');
+            m_errorString.append( reader->GetErrorString() );
+            m_errorString.append(1, '\n');
+            errorsEncountered = true;
+        }
      }
  
-    return result;
+    return !errorsEncountered;
  }
  
  void BamMultiReaderPrivate::SaveNextAlignment(BamReader* reader, BamAlignment* alignment) {
  
      // if can read alignment from reader, store in cache
-    // N.B. - lazy building of alignment's char data,
-    // only populated on demand by sorting merger or client call to GetNextAlignment()
+    //
+    // N.B. - lazy building of alignment's char data - populated only:
+    //        automatically by alignment cache to maintain its sorting OR
+    //        on demand from client call to future call to GetNextAlignment()
+
      if ( reader->GetNextAlignmentCore(*alignment) )
-        m_alignmentCache->Add(MergeItem(reader, alignment));
+        m_alignmentCache->Add( MergeItem(reader, alignment) );
+}
+
+void BamMultiReaderPrivate::SetErrorString(const string& where, const string& what) const {
+    static const string SEPARATOR = ": ";
+    m_errorString = where + SEPARATOR + what;
  }
  
  // sets the index caching mode on the readers
@@ -553,12 +654,8 @@ bool BamMultiReaderPrivate::SetRegion(const BamRegion& region) {
          BamReader* reader = item.Reader;
          if ( reader == 0 ) continue;
  
-        // attempt to set BamReader's region of interest
-        if ( !reader->SetRegion(region) ) {
-            cerr << "BamMultiReader WARNING: could not jump " << reader->GetFilename() << " to "
-                 << region.LeftRefID  << ":" << region.LeftPosition   << ".."
-                 << region.RightRefID << ":" << region.RightPosition  << endl;
-        }
+        // set region of interest
+        reader->SetRegion(region);
      }
  
      // return status of cache update
@@ -572,7 +669,7 @@ bool BamMultiReaderPrivate::UpdateAlignmentCache(void) {
      if ( m_alignmentCache == 0 ) {
          m_alignmentCache = CreateAlignmentCache();
          if ( m_alignmentCache == 0 ) {
-            // set error string
+            SetErrorString("BamMultiReader::UpdateAlignmentCache", "unable to create new alignment cache");
              return false;
          }
      }
@@ -603,8 +700,10 @@ bool BamMultiReaderPrivate::UpdateAlignmentCache(void) {
  // the multireader is undefined, so we force program exit.
  bool BamMultiReaderPrivate::ValidateReaders(void) const {
  
-    // skip if no readers opened
-    if ( m_readers.empty() )
+    m_errorString.clear();
+
+    // skip if 0 or 1 readers opened
+    if ( m_readers.empty() || (m_readers.size() == 1) )
          return true;
  
      // retrieve first reader
@@ -635,10 +734,10 @@ bool BamMultiReaderPrivate::ValidateReaders(void) const {
  
          // check compatible sort order
          if ( currentReaderSortOrder != firstReaderSortOrder ) {
-            // error string
-            cerr << "BamMultiReader ERROR: mismatched sort order in " << reader->GetFilename()
-                 << ", expected "  << firstReaderSortOrder
-                 << ", but found " << currentReaderSortOrder << endl;
+            const string message = string("mismatched sort order in ") + reader->GetFilename() +
+                                   ", expected " + firstReaderSortOrder +
+                                   ", but found " + currentReaderSortOrder;
+            SetErrorString("BamMultiReader::ValidateReaders", message);
              return false;
          }
  
@@ -656,9 +755,11 @@ bool BamMultiReaderPrivate::ValidateReaders(void) const {
          if ( (currentReaderRefCount != firstReaderRefCount) ||
               (firstReaderRefSize    != currentReaderRefSize) )
          {
-            cerr << "BamMultiReader ERROR: mismatched number of references in " << reader->GetFilename()
-                 << " expected " << firstReaderRefCount
-                 << " reference sequences but only found " << currentReaderRefCount << endl;
+            stringstream s("");
+            s << "mismatched reference count in " << reader->GetFilename()
+              << ", expected " << firstReaderRefCount
+              << ", but found " << currentReaderRefCount;
+            SetErrorString("BamMultiReader::ValidateReaders", s.str());
              return false;
          }
  
@@ -672,27 +773,30 @@ bool BamMultiReaderPrivate::ValidateReaders(void) const {
              if ( (firstRef.RefName   != currentRef.RefName) ||
                   (firstRef.RefLength != currentRef.RefLength) )
              {
-                cerr << "BamMultiReader ERROR: mismatched references found in " << reader->GetFilename()
-                     << " expected: " << endl;
+                stringstream s("");
+                s << "mismatched references found in" << reader->GetFilename()
+                  << "expected: " << endl;
  
                  // print first reader's reference data
                  RefVector::const_iterator refIter = firstReaderRefData.begin();
                  RefVector::const_iterator refEnd  = firstReaderRefData.end();
                  for ( ; refIter != refEnd; ++refIter ) {
                      const RefData& entry = (*refIter);
-                    cerr << entry.RefName << " " << entry.RefLength << endl;
+                    stringstream s("");
+                    s << entry.RefName << " " << endl;
                  }
  
-                cerr << "but found: " << endl;
+                s << "but found: " << endl;
  
                  // print current reader's reference data
                  refIter = currentReaderRefData.begin();
                  refEnd  = currentReaderRefData.end();
                  for ( ; refIter != refEnd; ++refIter ) {
                      const RefData& entry = (*refIter);
-                    cerr << entry.RefName << " " << entry.RefLength << endl;
+                    s << entry.RefName << " " << entry.RefLength << endl;
                  }
  
+                SetErrorString("BamMultiReader::ValidateReaders", s.str());
                  return false;
              }
  
diff --git a/src/api/internal/BamMultiReader_p.h b/src/api/internal/BamMultiReader_p.h

index 4c4e5ea3ba382d2186857449bf8dc1847a3f60ce..bb94db620bd5b6d5d3fb55b387d77bdae14c4b63 100644 (file)
--- a/src/api/internal/BamMultiReader_p.h
+++ b/src/api/internal/BamMultiReader_p.h
@@ -2,7 +2,7 @@
  // BamMultiReader_p.h (c) 2010 Derek Barnett
  // Marth Lab, Department of Biology, Boston College
  // ---------------------------------------------------------------------------
-// Last modified: 3 October 2011 (DB)
+// Last modified: 7 October 2011 (DB)
  // ---------------------------------------------------------------------------
  // Functionality for simultaneously reading multiple BAM files
  // *************************************************************************
@@ -44,9 +44,8 @@ class BamMultiReaderPrivate {
      public:
  
          // file operations
-        void Close(void);
-        void CloseFile(const std::string& filename);
-        void CloseFiles(const std::vector<std::string>& filenames);
+        bool Close(void);
+        bool CloseFile(const std::string& filename);
          const std::vector<std::string> Filenames(void) const;
          bool Jump(int refID, int position = 0);
          bool Open(const std::vector<std::string>& filenames);
@@ -73,13 +72,18 @@ class BamMultiReaderPrivate {
          bool OpenIndexes(const std::vector<std::string>& indexFilenames);
          void SetIndexCacheMode(const BamIndex::IndexCacheMode mode);
  
+        // error handling
+        std::string GetErrorString(void) const;
+
      // 'internal' methods
      public:
  
+        bool CloseFiles(const std::vector<std::string>& filenames);
          IMultiMerger* CreateAlignmentCache(void) const;
          bool PopNextCachedAlignment(BamAlignment& al, const bool needCharData);
          bool RewindReaders(void);
          void SaveNextAlignment(BamReader* reader, BamAlignment* alignment);
+        void SetErrorString(const std::string& where, const std::string& what) const; //
          bool UpdateAlignmentCache(void);
          bool ValidateReaders(void) const;
  
@@ -87,6 +91,7 @@ class BamMultiReaderPrivate {
      public:
          std::vector<MergeItem> m_readers;
          IMultiMerger* m_alignmentCache;
+        mutable std::string m_errorString;
  };
  
  } // namespace Internal
diff --git a/src/api/internal/BamRandomAccessController_p.cpp b/src/api/internal/BamRandomAccessController_p.cpp

index a599f7c48704a7361576e47e7705d486813944b9..7f5c3506a9fc85d4912c0fec99c8fae620f0c759 100644 (file)
--- a/src/api/internal/BamRandomAccessController_p.cpp
+++ b/src/api/internal/BamRandomAccessController_p.cpp
@@ -2,19 +2,21 @@
  // BamRandomAccessController_p.cpp (c) 2011 Derek Barnett
  // Marth Lab, Department of Biology, Boston College
  // ---------------------------------------------------------------------------
-// Last modified: 5 April 2011(DB)
+// Last modified: 6 October 2011(DB)
  // ---------------------------------------------------------------------------
  // Manages random access operations in a BAM file
  // **************************************************************************
  
  #include <api/BamIndex.h>
+#include <api/internal/BamException_p.h>
  #include <api/internal/BamRandomAccessController_p.h>
  #include <api/internal/BamReader_p.h>
  #include <api/internal/BamIndexFactory_p.h>
  using namespace BamTools;
  using namespace BamTools::Internal;
  
-#include <iostream>
+#include <cassert>
+#include <sstream>
  using namespace std;
  
  BamRandomAccessController::BamRandomAccessController(void)
@@ -133,8 +135,10 @@ void BamRandomAccessController::Close(void) {
  }
  
  void BamRandomAccessController::ClearIndex(void) {
-    delete m_index;
-    m_index = 0;
+    if ( m_index ) {
+        delete m_index;
+        m_index = 0;
+    }
  }
  
  void BamRandomAccessController::ClearRegion(void) {
@@ -143,23 +147,30 @@ void BamRandomAccessController::ClearRegion(void) {
  }
  
  bool BamRandomAccessController::CreateIndex(BamReaderPrivate* reader,
-                                            const BamIndex::IndexType& type) {
-
+                                            const BamIndex::IndexType& type)
+{
      // skip if reader is invalid
-    if ( reader == 0 )
+    assert(reader);
+    if ( !reader->IsOpen() ) {
+        SetErrorString("BamRandomAccessController::CreateIndex",
+                       "cannot create index for unopened reader");
          return false;
+    }
  
      // create new index of requested type
      BamIndex* newIndex = BamIndexFactory::CreateIndexOfType(type, reader);
      if ( newIndex == 0 ) {
-        cerr << "BamRandomAccessController ERROR: could not create index of type " << type << endl;
+        stringstream s("");
+        s << "could not create index of type: " << type;
+        SetErrorString("BamRandomAccessController::CreateIndex", s.str());
          return false;
      }
  
      // attempt to build index from current BamReader file
      if ( !newIndex->Create() ) {
-        cerr << "BamRandomAccessController ERROR: could not create index for BAM file: "
-             << reader->Filename() << endl;
+        const string indexError = newIndex->GetErrorString();
+        const string message = "could not create index: \n\t" + indexError;
+        SetErrorString("BamRandomAccessController::CreateIndex", message);
          return false;
      }
  
@@ -171,6 +182,10 @@ bool BamRandomAccessController::CreateIndex(BamReaderPrivate* reader,
      return true;
  }
  
+string BamRandomAccessController::GetErrorString(void) const {
+    return m_errorString;
+}
+
  bool BamRandomAccessController::HasIndex(void) const {
      return ( m_index != 0 );
  }
@@ -187,13 +202,13 @@ bool BamRandomAccessController::LocateIndex(BamReaderPrivate* reader,
                                              const BamIndex::IndexType& preferredType)
  {
      // look up index filename, deferring to preferredType if possible
+    assert(reader);
      const string& indexFilename = BamIndexFactory::FindIndexFilename(reader->Filename(), preferredType);
  
      // if no index file found (of any type)
      if ( indexFilename.empty() ) {
-        cerr << "BamRandomAccessController WARNING: "
-             << "could not find index file for BAM: "
-             << reader->Filename() << endl;
+        const string message = string("could not find index file for:") + reader->Filename();
+        SetErrorString("BamRandomAccessController::LocateIndex", message);
          return false;
      }
  
@@ -206,7 +221,8 @@ bool BamRandomAccessController::OpenIndex(const string& indexFilename, BamReader
      // attempt create new index of type based on filename
      BamIndex* index = BamIndexFactory::CreateIndexFromFilename(indexFilename, reader);
      if ( index == 0 ) {
-        cerr << "BamRandomAccessController ERROR: could not create index for file: " << indexFilename << endl;
+        const string message = string("could not open index file: ") + indexFilename;
+        SetErrorString("BamRandomAccessController::OpenIndex", message);
          return false;
      }
  
@@ -215,7 +231,10 @@ bool BamRandomAccessController::OpenIndex(const string& indexFilename, BamReader
  
      // attempt to load data from index file
      if ( !index->Load(indexFilename) ) {
-        cerr << "BamRandomAccessController ERROR: could not load index data from file: " << indexFilename << endl;
+        const string indexError = index->GetErrorString();
+        const string message = string("could not load index data from file: ") + indexFilename +
+                               "\n\t" + indexError;
+        SetErrorString("BamRandomAccessController::OpenIndex", message);
          return false;
      }
  
@@ -228,6 +247,10 @@ bool BamRandomAccessController::RegionHasAlignments(void) const {
      return m_hasAlignmentsInRegion;
  }
  
+void BamRandomAccessController::SetErrorString(const string& where, const string& what) {
+    m_errorString = where + ": " + what;
+}
+
  void BamRandomAccessController::SetIndex(BamIndex* index) {
      if ( m_index )
          ClearIndex();
@@ -240,16 +263,16 @@ void BamRandomAccessController::SetIndexCacheMode(const BamIndex::IndexCacheMode
          m_index->SetCacheMode(mode);
  }
  
-bool BamRandomAccessController::SetRegion(BamReaderPrivate* reader,
-                                          const BamRegion& region,
-                                          const int& referenceCount)
-{
+bool BamRandomAccessController::SetRegion(const BamRegion& region, const int& referenceCount) {
+
      // store region
      m_region = region;
  
      // cannot jump when no index is available
-    if ( !HasIndex() )
+    if ( !HasIndex() ) {
+        SetErrorString("BamRandomAccessController", "cannot jump if no index data available");
          return false;
+    }
  
      // adjust region as necessary to reflect where data actually begins
      AdjustRegion(referenceCount);
@@ -257,7 +280,7 @@ bool BamRandomAccessController::SetRegion(BamReaderPrivate* reader,
      // if no data present, return true
      //   * Not an error, but future attempts to access alignments in this region will not return data
      //     Returning true is useful in a BamMultiReader setting where some BAM files may
-    //     lack alignments in regions where other BAMs do have data.
+    //     lack alignments in regions where other files still have data available.
      if ( !m_hasAlignmentsInRegion )
          return true;
  
@@ -267,6 +290,13 @@ bool BamRandomAccessController::SetRegion(BamReaderPrivate* reader,
      //    This covers 'corner case' where a region is requested that lies beyond the last
      //    alignment on a reference. If this occurs, any subsequent calls to GetNextAlignment[Core]
      //    will not return data. BamMultiReader will still be able to successfully pull alignments
-    //    from a region from multiple files even if one or more have no data.
-    return m_index->Jump(m_region, &m_hasAlignmentsInRegion);
+    //    from a region from other files even if this one has no data.
+    if ( !m_index->Jump(m_region, &m_hasAlignmentsInRegion) ) {
+        const string indexError = m_index->GetErrorString();
+        const string message = string("could not set region\n\t") + indexError;
+        SetErrorString("BamRandomAccessController::OpenIndex", message);
+        return false;
+    }
+    else
+        return true;
  }
diff --git a/src/api/internal/BamRandomAccessController_p.h b/src/api/internal/BamRandomAccessController_p.h

index 86af28c26cbd36efda32f1fde8ec58580927086d..ff902b3077fd9bd73dcce70c8a2a26005de534b3 100644 (file)
--- a/src/api/internal/BamRandomAccessController_p.h
+++ b/src/api/internal/BamRandomAccessController_p.h
@@ -2,7 +2,7 @@
  // BamRandomAccessController_p.h (c) 2011 Derek Barnett
  // Marth Lab, Department of Biology, Boston College
  // ---------------------------------------------------------------------------
-// Last modified: 24 February 2011(DB)
+// Last modified: 6 October 2011(DB)
  // ---------------------------------------------------------------------------
  // Manages random access operations in a BAM file
  // ***************************************************************************
@@ -44,13 +44,10 @@ class BamRandomAccessController {
          BamRandomAccessController(void);
          ~BamRandomAccessController(void);
  
-    // general interface
+    // BamRandomAccessController interface
      public:
-        void Close(void);
  
-    // index operations
-    public:
-        //
+        // index methods
          void ClearIndex(void);
          bool CreateIndex(BamReaderPrivate* reader, const BamIndex::IndexType& type);
          bool HasIndex(void) const;
@@ -60,31 +57,37 @@ class BamRandomAccessController {
          void SetIndex(BamIndex* index);
          void SetIndexCacheMode(const BamIndex::IndexCacheMode& mode);
  
-    // region operations
-    public:
+        // region methods
          void ClearRegion(void);
          bool HasRegion(void) const;
          RegionState AlignmentState(const BamAlignment& alignment) const;
          bool RegionHasAlignments(void) const;
-        bool SetRegion(BamReaderPrivate* reader,
-                       const BamRegion& region,
-                       const int& referenceCount);
+        bool SetRegion(const BamRegion& region, const int& referenceCount);
  
-    // 'internal' methods
-    public:
+        // general methods
+        void Close(void);
+        std::string GetErrorString(void) const;
+
+    // internal methods
+    private:
          // adjusts requested region if necessary (depending on where data actually begins)
          void AdjustRegion(const int& referenceCount);
+        // error-string handling
+        void SetErrorString(const std::string& where, const std::string& what);
  
      // data members
      private:
  
          // index data
-        BamIndex* m_index;  // owns index, not a copy - responsible for deleting
+        BamIndex* m_index;  // owns the index, not a copy - responsible for deleting
          BamIndex::IndexCacheMode m_indexCacheMode;
  
          // region data
          BamRegion m_region;
          bool m_hasAlignmentsInRegion;
+
+        // general data
+        std::string m_errorString;
  };
  
  } // namespace Internal
diff --git a/src/api/internal/BamReader_p.cpp b/src/api/internal/BamReader_p.cpp

index 65b19f330a60c7fe9ac00cc1b992a98ce7b186d6..770701770b5855e935a2db01bb85fccbf2845691 100644 (file)
--- a/src/api/internal/BamReader_p.cpp
+++ b/src/api/internal/BamReader_p.cpp
@@ -2,13 +2,14 @@
  // BamReader_p.cpp (c) 2009 Derek Barnett
  // Marth Lab, Department of Biology, Boston College
  // ---------------------------------------------------------------------------
-// Last modified: 10 May 2011 (DB)
+// Last modified: 7 October 2011 (DB)
  // ---------------------------------------------------------------------------
  // Provides the basic functionality for reading BAM files
  // ***************************************************************************
  
  #include <api/BamConstants.h>
  #include <api/BamReader.h>
+#include <api/internal/BamException_p.h>
  #include <api/internal/BamHeader_p.h>
  #include <api/internal/BamRandomAccessController_p.h>
  #include <api/internal/BamReader_p.h>
@@ -19,6 +20,7 @@ using namespace BamTools;
  using namespace BamTools::Internal;
  
  #include <algorithm>
+#include <cassert>
  #include <iostream>
  #include <iterator>
  #include <vector>
@@ -38,24 +40,52 @@ BamReaderPrivate::~BamReaderPrivate(void) {
  }
  
  // closes the BAM file
-void BamReaderPrivate::Close(void) {
+bool BamReaderPrivate::Close(void) {
  
-    // clear header & reference data
+    // clear BAM metadata
      m_references.clear();
      m_header.Clear();
  
-    // close internal
-    m_randomAccessController.Close();
-    m_stream.Close();
-
      // clear filename
      m_filename.clear();
+
+    // close random access controller
+    m_randomAccessController.Close();
+
+    // if stream is open, attempt close
+    if ( IsOpen() ) {
+        try {
+            m_stream.Close();
+        } catch ( BamException& e ) {
+            const string streamError = e.what();
+            const string message = string("encountered error closing BAM file: \n\t") + streamError;
+            SetErrorString("BamReader::Close", message);
+            return false;
+        }
+    }
+
+    // return success
+    return true;
  }
  
  // creates an index file of requested type on current BAM file
  bool BamReaderPrivate::CreateIndex(const BamIndex::IndexType& type) {
-    if ( !IsOpen() ) return false;
-    return m_randomAccessController.CreateIndex(this, type);
+
+    // skip if BAM file not open
+    if ( !IsOpen() ) {
+        SetErrorString("BamReader::CreateIndex", "cannot create index on unopened BAM file");
+        return false;
+    }
+
+    // attempt to create index
+    if ( m_randomAccessController.CreateIndex(this, type) )
+        return true;
+    else {
+        const string bracError = m_randomAccessController.GetErrorString();
+        const string message = string("could not create index: \n\t") + bracError;
+        SetErrorString("BamReader::CreateIndex", message);
+        return false;
+    }
  }
  
  // return path & filename of current BAM file
@@ -63,6 +93,10 @@ const string BamReaderPrivate::Filename(void) const {
      return m_filename;
  }
  
+string BamReaderPrivate::GetErrorString(void) const {
+    return m_errorString;
+}
+
  // return header data as std::string
  string BamReaderPrivate::GetHeaderText(void) const {
      return m_header.ToString();
@@ -83,7 +117,14 @@ bool BamReaderPrivate::GetNextAlignment(BamAlignment& alignment) {
          alignment.Filename = m_filename;
  
          // return success/failure of parsing char data
-        return alignment.BuildCharData();
+        if ( alignment.BuildCharData() )
+            return true;
+        else {
+            const string alError = alignment.GetErrorString();
+            const string message = string("could not populate alignment data: \n\t") + alError;
+            SetErrorString("BamReader::GetNextAlignment", message);
+            return false;
+        }
      }
  
      // no valid alignment found
@@ -96,43 +137,52 @@ bool BamReaderPrivate::GetNextAlignment(BamAlignment& alignment) {
  // useful for operations requiring ONLY positional or other alignment-related information
  bool BamReaderPrivate::GetNextAlignmentCore(BamAlignment& alignment) {
  
-    // skip if region is set but has no alignments
-    if ( m_randomAccessController.HasRegion() &&
-         !m_randomAccessController.RegionHasAlignments() )
-    {
-        return false;
-    }
-
-    // if can't read next alignment
-    if ( !LoadNextAlignment(alignment) )
-        return false;
-
-    // check alignment's region-overlap state
-    BamRandomAccessController::RegionState state = m_randomAccessController.AlignmentState(alignment);
-
-    // if alignment starts after region, no need to keep reading
-    if ( state == BamRandomAccessController::AfterRegion )
-        return false;
+    try {
  
-    // read until overlap is found
-    while ( state != BamRandomAccessController::OverlapsRegion ) {
+        // skip if region is set but has no alignments
+        if ( m_randomAccessController.HasRegion() &&
+             !m_randomAccessController.RegionHasAlignments() )
+        {
+            return false;
+        }
  
          // if can't read next alignment
          if ( !LoadNextAlignment(alignment) )
              return false;
  
          // check alignment's region-overlap state
-        state = m_randomAccessController.AlignmentState(alignment);
+        BamRandomAccessController::RegionState state = m_randomAccessController.AlignmentState(alignment);
  
          // if alignment starts after region, no need to keep reading
          if ( state == BamRandomAccessController::AfterRegion )
              return false;
-    }
  
-    // if we get here, we found the next 'valid' alignment
-    // (e.g. overlaps current region if one was set, simply the next alignment if not)
-    alignment.SupportData.HasCoreOnly = true;
-    return true;
+        // read until overlap is found
+        while ( state != BamRandomAccessController::OverlapsRegion ) {
+
+            // if can't read next alignment
+            if ( !LoadNextAlignment(alignment) )
+                return false;
+
+            // check alignment's region-overlap state
+            state = m_randomAccessController.AlignmentState(alignment);
+
+            // if alignment starts after region, no need to keep reading
+            if ( state == BamRandomAccessController::AfterRegion )
+                return false;
+        }
+
+        // if we get here, we found the next 'valid' alignment
+        // (e.g. overlaps current region if one was set, simply the next alignment if not)
+        alignment.SupportData.HasCoreOnly = true;
+        return true;
+
+    } catch ( BamException& e ) {
+        const string streamError = e.what();
+        const string message = string("encountered error reading BAM alignment: \n\t") + streamError;
+        SetErrorString("BamReader::GetNextAlignmentCore", message);
+        return false;
+    }
  }
  
  int BamReaderPrivate::GetReferenceCount(void) const {
@@ -168,8 +218,8 @@ bool BamReaderPrivate::IsOpen(void) const {
  }
  
  // load BAM header data
-bool BamReaderPrivate::LoadHeaderData(void) {
-     return m_header.Load(&m_stream);
+void BamReaderPrivate::LoadHeaderData(void) {
+    m_header.Load(&m_stream);
  }
  
  // populates BamAlignment with alignment data under file pointer, returns success/fail
@@ -180,7 +230,8 @@ bool BamReaderPrivate::LoadNextAlignment(BamAlignment& alignment) {
      m_stream.Read(buffer, sizeof(uint32_t));
      alignment.SupportData.BlockLength = BamTools::UnpackUnsignedInt(buffer);
      if ( m_isBigEndian ) BamTools::SwapEndian_32(alignment.SupportData.BlockLength);
-    if ( alignment.SupportData.BlockLength == 0 ) return false;
+    if ( alignment.SupportData.BlockLength == 0 )
+        return false;
  
      // read in core alignment data, make sure the right size of data was read
      char x[Constants::BAM_CORE_SIZE];
@@ -217,12 +268,12 @@ bool BamReaderPrivate::LoadNextAlignment(BamAlignment& alignment) {
      // read in character data - make sure proper data size was read
      bool readCharDataOK = false;
      const unsigned int dataLength = alignment.SupportData.BlockLength - Constants::BAM_CORE_SIZE;
-    char* allCharData = (char*)calloc(sizeof(char), dataLength);
+    RaiiBuffer allCharData(dataLength);
  
-    if ( m_stream.Read(allCharData, dataLength) == (signed int)dataLength ) {
+    if ( m_stream.Read(allCharData.Buffer, dataLength) == dataLength ) {
  
          // store 'allCharData' in supportData structure
-        alignment.SupportData.AllCharData.assign((const char*)allCharData, dataLength);
+        alignment.SupportData.AllCharData.assign((const char*)allCharData.Buffer, dataLength);
  
          // set success flag
          readCharDataOK = true;
@@ -231,7 +282,7 @@ bool BamReaderPrivate::LoadNextAlignment(BamAlignment& alignment) {
          // need to calculate this here so that  BamAlignment::GetEndPosition() performs correctly,
          // even when GetNextAlignmentCore() is called
          const unsigned int cigarDataOffset = alignment.SupportData.QueryNameLength;
-        uint32_t* cigarData = (uint32_t*)(allCharData + cigarDataOffset);
+        uint32_t* cigarData = (uint32_t*)(allCharData.Buffer + cigarDataOffset);
          CigarOp op;
          alignment.CigarData.clear();
          alignment.CigarData.reserve(alignment.SupportData.NumCigarOperations);
@@ -249,8 +300,7 @@ bool BamReaderPrivate::LoadNextAlignment(BamAlignment& alignment) {
          }
      }
  
-    // clean up & return parsing success/failure
-    free(allCharData);
+    // return success/failure
      return readCharDataOK;
  }
  
@@ -271,22 +321,19 @@ bool BamReaderPrivate::LoadReferenceData(void) {
          m_stream.Read(buffer, sizeof(uint32_t));
          uint32_t refNameLength = BamTools::UnpackUnsignedInt(buffer);
          if ( m_isBigEndian ) BamTools::SwapEndian_32(refNameLength);
-        char* refName = (char*)calloc(refNameLength, 1);
+        RaiiBuffer refName(refNameLength);
  
          // get reference name and reference sequence length
-        m_stream.Read(refName, refNameLength);
+        m_stream.Read(refName.Buffer, refNameLength);
          m_stream.Read(buffer, sizeof(int32_t));
          int32_t refLength = BamTools::UnpackSignedInt(buffer);
          if ( m_isBigEndian ) BamTools::SwapEndian_32(refLength);
  
          // store data for reference
          RefData aReference;
-        aReference.RefName   = (string)((const char*)refName);
+        aReference.RefName   = (string)((const char*)refName.Buffer);
          aReference.RefLength = refLength;
          m_references.push_back(aReference);
-
-        // clean up calloc-ed temp variable
-        free(refName);
      }
  
      // return success
@@ -294,70 +341,106 @@ bool BamReaderPrivate::LoadReferenceData(void) {
  }
  
  bool BamReaderPrivate::LocateIndex(const BamIndex::IndexType& preferredType) {
-    return m_randomAccessController.LocateIndex(this, preferredType);
+
+    if ( m_randomAccessController.LocateIndex(this, preferredType) )
+        return true;
+    else {
+        const string bracError = m_randomAccessController.GetErrorString();
+        const string message = string("could not locate index: \n\t") + bracError;
+        SetErrorString("BamReader::LocateIndex", message);
+        return false;
+    }
  }
  
  // opens BAM file (and index)
  bool BamReaderPrivate::Open(const string& filename) {
  
-    // close current BAM file if open
-    if ( m_stream.IsOpen )
-        Close();
+    bool result;
  
-    // attempt to open BgzfStream for reading
-    if ( !m_stream.Open(filename, "rb") ) {
-        cerr << "BamReader ERROR: Could not open BGZF stream for " << filename << endl;
-        return false;
-    }
+    try {
  
-    // attempt to load header data
-    if ( !LoadHeaderData() ) {
-        cerr << "BamReader ERROR: Could not load header data for " << filename << endl;
+        // make sure we're starting with fresh state
          Close();
-        return false;
-    }
  
-    // attempt to load reference data
-    if ( !LoadReferenceData() ) {
-        cerr << "BamReader ERROR: Could not load reference data for " << filename << endl;
-        Close();
+        // open BgzfStream
+        m_stream.Open(filename, "rb");
+        assert(m_stream);
+
+        // load BAM metadata
+        LoadHeaderData();
+        LoadReferenceData();
+
+        // store filename & offset of first alignment
+        m_filename = filename;
+        m_alignmentsBeginOffset = m_stream.Tell();
+
+        // set flag
+        result = true;
+
+    } catch ( BamException& e ) {
+        const string error = e.what();
+        const string message = string("could not open file: ") + filename +
+                               "\n\t" + error;
+        SetErrorString("BamReader::Open", message);
          return false;
      }
  
-    // if all OK, store filename & offset of first alignment
-    m_filename = filename;
-    m_alignmentsBeginOffset = m_stream.Tell();
-
-    // return success
-    return true;
+    // return success/failure
+    return result;
  }
  
  bool BamReaderPrivate::OpenIndex(const std::string& indexFilename) {
-    return m_randomAccessController.OpenIndex(indexFilename, this);
+
+    if ( m_randomAccessController.OpenIndex(indexFilename, this) )
+        return true;
+    else {
+        const string bracError = m_randomAccessController.GetErrorString();
+        const string message = string("could not open index: \n\t") + bracError;
+        SetErrorString("BamReader::OpenIndex", message);
+        return false;
+    }
  }
  
  // returns BAM file pointer to beginning of alignment data
  bool BamReaderPrivate::Rewind(void) {
  
-    // attempt rewind to first alignment
-    if ( !m_stream.Seek(m_alignmentsBeginOffset) )
-        return false;
-
-    // verify that we can read first alignment
-    BamAlignment al;
-    if ( !LoadNextAlignment(al) )
-        return false;
-
      // reset region
      m_randomAccessController.ClearRegion();
  
-    // rewind back to beginning of first alignment
-    // return success/fail of seek
-    return m_stream.Seek(m_alignmentsBeginOffset);
+    // return status of seeking back to first alignment
+    if ( Seek(m_alignmentsBeginOffset) )
+        return true;
+    else {
+        const string currentError = m_errorString;
+        const string message = string("could not rewind: \n\t") + currentError;
+        SetErrorString("BamReader::Rewind", message);
+        return false;
+    }
  }
  
  bool BamReaderPrivate::Seek(const int64_t& position) {
-    return m_stream.Seek(position);
+
+    // skip if BAM file not open
+    if ( !IsOpen() ) {
+        SetErrorString("BamReader::Seek", "cannot seek on unopened BAM file");
+        return false;
+    }
+
+    try {
+        m_stream.Seek(position);
+        return true;
+    }
+    catch ( BamException& e ) {
+        const string streamError = e.what();
+        const string message = string("could not seek in BAM file: \n\t") + streamError;
+        SetErrorString("BamReader::Seek", message);
+        return false;
+    }
+}
+
+void BamReaderPrivate::SetErrorString(const string& where, const string& what) {
+    static const string SEPARATOR = ": ";
+    m_errorString = where + SEPARATOR + what;
  }
  
  void BamReaderPrivate::SetIndex(BamIndex* index) {
@@ -372,7 +455,15 @@ void BamReaderPrivate::SetIndexCacheMode(const BamIndex::IndexCacheMode& mode) {
  // sets current region & attempts to jump to it
  // returns success/failure
  bool BamReaderPrivate::SetRegion(const BamRegion& region) {
-    return m_randomAccessController.SetRegion(this, region, m_references.size());
+
+    if ( m_randomAccessController.SetRegion(region, m_references.size()) )
+        return true;
+    else {
+        const string bracError = m_randomAccessController.GetErrorString();
+        const string message = string("could not set region: \n\t") + bracError;
+        SetErrorString("BamReader::SetRegion", message);
+        return false;
+    }
  }
  
  int64_t BamReaderPrivate::Tell(void) const {
diff --git a/src/api/internal/BamReader_p.h b/src/api/internal/BamReader_p.h

index 2901e54d2edca656d77810322672c0c0ae08aa76..ccc3835246a85b7372b1660be961339d5d4b8183 100644 (file)
--- a/src/api/internal/BamReader_p.h
+++ b/src/api/internal/BamReader_p.h
@@ -2,7 +2,7 @@
  // BamReader_p.h (c) 2010 Derek Barnett
  // Marth Lab, Department of Biology, Boston College
  // ---------------------------------------------------------------------------
-// Last modified: 5 April 2011 (DB)
+// Last modified: 7 October 2011 (DB)
  // ---------------------------------------------------------------------------
  // Provides the basic functionality for reading BAM files
  // ***************************************************************************
@@ -43,7 +43,7 @@ class BamReaderPrivate {
      public:
  
          // file operations
-        void Close(void);
+        bool Close(void);
          const std::string Filename(void) const;
          bool IsOpen(void) const;
          bool Open(const std::string& filename);
@@ -69,13 +69,17 @@ class BamReaderPrivate {
          void SetIndex(BamIndex* index);
          void SetIndexCacheMode(const BamIndex::IndexCacheMode& mode);
  
+        // error handling
+        std::string GetErrorString(void) const;
+        void SetErrorString(const std::string& where, const std::string& what);
+
      // internal methods, but available as a BamReaderPrivate 'interface'
      //
      // these methods should only be used by BamTools::Internal classes
      // (currently only used by the BamIndex subclasses)
      public:
          // retrieves header text from BAM file
-        bool LoadHeaderData(void);
+        void LoadHeaderData(void);
          // retrieves BAM alignment under file pointer
          // (does no overlap checking or character data parsing)
          bool LoadNextAlignment(BamAlignment& alignment);
@@ -104,6 +108,9 @@ class BamReaderPrivate {
          BamHeader m_header;
          BamRandomAccessController m_randomAccessController;
          BgzfStream m_stream;
+
+        // error handling
+        std::string m_errorString;
  };
  
  } // namespace Internal
diff --git a/src/api/internal/BamStandardIndex_p.cpp b/src/api/internal/BamStandardIndex_p.cpp

index 43f12ebf8838ce505180e11891e749c5313fcfd4..ad21fec3a581116c89091bdd5ba90eadf6876c46 100644 (file)
--- a/src/api/internal/BamStandardIndex_p.cpp
+++ b/src/api/internal/BamStandardIndex_p.cpp
@@ -2,12 +2,13 @@
  // BamStandardIndex.cpp (c) 2010 Derek Barnett
  // Marth Lab, Department of Biology, Boston College
  // ---------------------------------------------------------------------------
-// Last modified: 24 June 2011 (DB)
+// Last modified: 6 October 2011 (DB)
  // ---------------------------------------------------------------------------
  // Provides index operations for the standardized BAM index format (".bai")
  // ***************************************************************************
  
  #include <api/BamAlignment.h>
+#include <api/internal/BamException_p.h>
  #include <api/internal/BamReader_p.h>
  #include <api/internal/BamStandardIndex_p.h>
  using namespace BamTools;
@@ -17,10 +18,13 @@ using namespace BamTools::Internal;
  #include <cstdlib>
  #include <cstring>
  #include <algorithm>
-#include <iostream>
+#include <sstream>
  using namespace std;
  
+// -----------------------------------
  // static BamStandardIndex constants
+// -----------------------------------
+
  const int BamStandardIndex::MAX_BIN               = 37450;  // =(8^6-1)/7+1
  const int BamStandardIndex::BAM_LIDX_SHIFT        = 14;
  const string BamStandardIndex::BAI_EXTENSION      = ".bai";
@@ -29,12 +33,36 @@ const int BamStandardIndex::SIZEOF_ALIGNMENTCHUNK = sizeof(uint64_t)*2;
  const int BamStandardIndex::SIZEOF_BINCORE        = sizeof(uint32_t) + sizeof(int32_t);
  const int BamStandardIndex::SIZEOF_LINEAROFFSET   = sizeof(uint64_t);
  
+// ----------------------------
+// RaiiWrapper implementation
+// ----------------------------
+
+BamStandardIndex::RaiiWrapper::RaiiWrapper(void)
+    : IndexStream(0)
+    , Buffer(0)
+{ }
+
+BamStandardIndex::RaiiWrapper::~RaiiWrapper(void) {
+
+    if ( IndexStream ) {
+        fclose(IndexStream);
+        IndexStream = 0;
+    }
+
+    if ( Buffer ) {
+        delete[] Buffer;
+        Buffer = 0;
+    }
+}
+
+// ---------------------------------
+// BamStandardIndex implementation
+// ---------------------------------
+
  // ctor
  BamStandardIndex::BamStandardIndex(Internal::BamReaderPrivate* reader)
      : BamIndex(reader)
-    , m_indexStream(0)
      , m_cacheMode(BamIndex::LimitedIndexCaching)
-    , m_buffer(0)
      , m_bufferLength(0)
  {
       m_isBigEndian = BamTools::SystemIsBigEndian();
@@ -45,14 +73,14 @@ BamStandardIndex::~BamStandardIndex(void) {
      CloseFile();
  }
  
-bool BamStandardIndex::AdjustRegion(const BamRegion& region, uint32_t& begin, uint32_t& end) {
+void BamStandardIndex::AdjustRegion(const BamRegion& region, uint32_t& begin, uint32_t& end) {
  
      // retrieve references from reader
      const RefVector& references = m_reader->GetReferenceData();
  
      // make sure left-bound position is valid
      if ( region.LeftPosition > references.at(region.LeftRefID).RefLength )
-        return false;
+        throw BamException("BamStandardIndex::AdjustRegion", "invalid region requested");
  
      // set region 'begin'
      begin = (unsigned int)region.LeftPosition;
@@ -64,9 +92,6 @@ bool BamStandardIndex::AdjustRegion(const BamRegion& region, uint32_t& begin, ui
  
      // otherwise, set region 'end' to last reference base
      else end = (unsigned int)references.at(region.LeftRefID).RefLength - 1;
-
-    // return success
-    return true;
  }
  
  void BamStandardIndex::CalculateCandidateBins(const uint32_t& begin,
@@ -85,14 +110,13 @@ void BamStandardIndex::CalculateCandidateBins(const uint32_t& begin,
      for (k = 4681 + (begin>>14); k <= 4681 + (end>>14); ++k) { candidateBins.insert(k); }
  }
  
-bool BamStandardIndex::CalculateCandidateOffsets(const BaiReferenceSummary& refSummary,
+void BamStandardIndex::CalculateCandidateOffsets(const BaiReferenceSummary& refSummary,
                                                   const uint64_t& minOffset,
                                                   set<uint16_t>& candidateBins,
                                                   vector<int64_t>& offsets)
  {
-    // attempt seek to first bin
-    if ( !Seek(refSummary.FirstBinFilePosition, SEEK_SET) )
-        return false;
+    // seek to first bin
+    Seek(refSummary.FirstBinFilePosition, SEEK_SET);
  
      // iterate over reference bins
      uint32_t binId;
@@ -101,8 +125,7 @@ bool BamStandardIndex::CalculateCandidateOffsets(const BaiReferenceSummary& refS
      for ( int i = 0; i < refSummary.NumBins; ++i ) {
  
          // read bin contents (if successful, alignment chunks are now in m_buffer)
-        if ( !ReadBinIntoBuffer(binId, numAlignmentChunks) )
-            return false;
+        ReadBinIntoBuffer(binId, numAlignmentChunks);
  
          // see if bin is a 'candidate bin'
          candidateBinIter = candidateBins.find(binId);
@@ -114,17 +137,17 @@ bool BamStandardIndex::CalculateCandidateOffsets(const BaiReferenceSummary& refS
          // otherwise, check bin's contents against for overlap
          else {
  
-            unsigned int offset = 0;
+            size_t offset = 0;
              uint64_t chunkStart;
              uint64_t chunkStop;
  
              // iterate over alignment chunks
-            for (int j = 0; j < numAlignmentChunks; ++j ) {
+            for ( int j = 0; j < numAlignmentChunks; ++j ) {
  
                  // read chunk start & stop from buffer
-                memcpy((char*)&chunkStart, m_buffer+offset, sizeof(uint64_t));
+                memcpy((char*)&chunkStart, Resources.Buffer+offset, sizeof(uint64_t));
                  offset += sizeof(uint64_t);
-                memcpy((char*)&chunkStop, m_buffer+offset, sizeof(uint64_t));
+                memcpy((char*)&chunkStop, Resources.Buffer+offset, sizeof(uint64_t));
                  offset += sizeof(uint64_t);
  
                  // swap endian-ness if necessary
@@ -147,9 +170,6 @@ bool BamStandardIndex::CalculateCandidateOffsets(const BaiReferenceSummary& refS
                  break;
          }
      }
-
-    // return success
-    return true;
  }
  
  uint64_t BamStandardIndex::CalculateMinOffset(const BaiReferenceSummary& refSummary,
@@ -178,10 +198,10 @@ void BamStandardIndex::CheckBufferSize(char*& buffer,
              delete[] buffer;
              buffer = new char[bufferLength];
          }
-    } catch ( std::bad_alloc ) {
-        cerr << "BamStandardIndex ERROR: out of memory when allocating "
-             << requestedBytes << " byes" << endl;
-        exit(1);
+    } catch ( std::bad_alloc&  ) {
+        stringstream s("");
+        s << "out of memory when allocating " << requestedBytes << " bytes";
+        throw BamException("BamStandardIndex::CheckBufferSize", s.str());
      }
  }
  
@@ -195,31 +215,24 @@ void BamStandardIndex::CheckBufferSize(unsigned char*& buffer,
              delete[] buffer;
              buffer = new unsigned char[bufferLength];
          }
-    } catch ( std::bad_alloc ) {
-        cerr << "BamStandardIndex ERROR: out of memory when allocating "
-             << requestedBytes << " byes" << endl;
-        exit(1);
+    } catch ( std::bad_alloc& ) {
+        stringstream s("");
+        s << "out of memory when allocating " << requestedBytes << " bytes";
+        throw BamException("BamStandardIndex::CheckBufferSize", s.str());
      }
  }
  
-bool BamStandardIndex::CheckMagicNumber(void) {
+void BamStandardIndex::CheckMagicNumber(void) {
  
      // check 'magic number' to see if file is BAI index
      char magic[4];
-    size_t elementsRead = fread(magic, sizeof(char), 4, m_indexStream);
-    if ( elementsRead != 4 ) {
-        cerr << "BamStandardIndex ERROR: could not read format 'magic number'" << endl;
-        return false;
-    }
+    const size_t elementsRead = fread(magic, sizeof(char), 4, Resources.IndexStream);
+    if ( elementsRead != 4 )
+        throw BamException("BamStandardIndex::CheckMagicNumber", "could not read BAI magic number");
  
      // compare to expected value
-    if ( strncmp(magic, BamStandardIndex::BAI_MAGIC, 4) != 0 ) {
-        cerr << "BamStandardIndex ERROR: invalid format" << endl;
-        return false;
-    }
-
-    // otherwise OK
-    return true;
+    if ( strncmp(magic, BamStandardIndex::BAI_MAGIC, 4) != 0 )
+        throw BamException("BamStandardIndex::CheckMagicNumber", "invalid BAI magic number");
  }
  
  void BamStandardIndex::ClearReferenceEntry(BaiReferenceEntry& refEntry) {
@@ -231,169 +244,176 @@ void BamStandardIndex::ClearReferenceEntry(BaiReferenceEntry& refEntry) {
  void BamStandardIndex::CloseFile(void) {
  
      // close file stream
-    if ( IsFileOpen() )
-        fclose(m_indexStream);
+    if ( IsFileOpen() ) {
+        fclose(Resources.IndexStream);
+        Resources.IndexStream = 0;
+    }
  
      // clear index file summary data
      m_indexFileSummary.clear();
  
      // clean up I/O buffer
-    delete[] m_buffer;
-    m_buffer = 0;
+    delete[] Resources.Buffer;
+    Resources.Buffer = 0;
      m_bufferLength = 0;
  }
  
  // builds index from associated BAM file & writes out to index file
  bool BamStandardIndex::Create(void) {
  
-    // return false if BamReader is invalid or not open
+    // skip if BamReader is invalid or not open
      if ( m_reader == 0 || !m_reader->IsOpen() ) {
-        cerr << "BamStandardIndex ERROR: BamReader is not open"
-             << ", aborting index creation" << endl;
+        SetErrorString("BamStandardIndex::Create", "could not create index: reader is not open");
          return false;
      }
  
      // rewind BamReader
      if ( !m_reader->Rewind() ) {
-        cerr << "BamStandardIndex ERROR: could not rewind BamReader to create index"
-             << ", aborting index creation" << endl;
+        const string readerError = m_reader->GetErrorString();
+        const string message = "could not create index: \n\t" + readerError;
+        SetErrorString("BamStandardIndex::Create", message);
          return false;
      }
  
-    // open new index file (read & write)
-    string indexFilename = m_reader->Filename() + Extension();
-    if ( !OpenFile(indexFilename, "w+b") ) {
-        cerr << "BamStandardIndex ERROR: could not open output index file: " << indexFilename
-             << ", aborting index creation" << endl;
-        return false;
-    }
+    try {
  
-    // initialize BaiFileSummary with number of references
-    const int& numReferences = m_reader->GetReferenceCount();
-    ReserveForSummary(numReferences);
+        // open new index file (read & write)
+        string indexFilename = m_reader->Filename() + Extension();
+        OpenFile(indexFilename, "w+b");
+
+        // initialize BaiFileSummary with number of references
+        const int& numReferences = m_reader->GetReferenceCount();
+        ReserveForSummary(numReferences);
+
+        // initialize output file
+        WriteHeader();
+
+        // set up bin, ID, offset, & coordinate markers
+        const uint32_t defaultValue = 0xffffffffu;
+        uint32_t currentBin    = defaultValue;
+        uint32_t lastBin       = defaultValue;
+        int32_t  currentRefID  = defaultValue;
+        int32_t  lastRefID     = defaultValue;
+        uint64_t currentOffset = (uint64_t)m_reader->Tell();
+        uint64_t lastOffset    = currentOffset;
+        int32_t  lastPosition  = defaultValue;
+
+        // iterate through alignments in BAM file
+        BamAlignment al;
+        BaiReferenceEntry refEntry;
+        while ( m_reader->LoadNextAlignment(al) ) {
+
+            // changed to new reference
+            if ( lastRefID != al.RefID ) {
+
+                // if not first reference, save previous reference data
+                if ( lastRefID != (int32_t)defaultValue ) {
+
+                    SaveAlignmentChunkToBin(refEntry.Bins, currentBin, currentOffset, lastOffset);
+                    WriteReferenceEntry(refEntry);
+                    ClearReferenceEntry(refEntry);
+
+                    // write any empty references between (but *NOT* including) lastRefID & al.RefID
+                    for ( int i = lastRefID+1; i < al.RefID; ++i ) {
+                        BaiReferenceEntry emptyEntry(i);
+                        WriteReferenceEntry(emptyEntry);
+                    }
+
+                    // update bin markers
+                    currentOffset = lastOffset;
+                    currentBin    = al.Bin;
+                    lastBin       = al.Bin;
+                    currentRefID  = al.RefID;
+                }
  
-    // initialize output file
-    bool createdOk = true;
-    createdOk &= WriteHeader();
-
-    // set up bin, ID, offset, & coordinate markers
-    const uint32_t defaultValue = 0xffffffffu;
-    uint32_t currentBin    = defaultValue;
-    uint32_t lastBin       = defaultValue;
-    int32_t  currentRefID  = defaultValue;
-    int32_t  lastRefID     = defaultValue;
-    uint64_t currentOffset = (uint64_t)m_reader->Tell();
-    uint64_t lastOffset    = currentOffset;
-    int32_t  lastPosition  = defaultValue;
-
-    // iterate through alignments in BAM file
-    BamAlignment al;
-    BaiReferenceEntry refEntry;
-    while ( m_reader->LoadNextAlignment(al) ) {
+                // otherwise, this is first pass
+                // be sure to write any empty references up to (but *NOT* including) current RefID
+                else {
+                    for ( int i = 0; i < al.RefID; ++i ) {
+                        BaiReferenceEntry emptyEntry(i);
+                        WriteReferenceEntry(emptyEntry);
+                    }
+                }
  
-        // changed to new reference
-        if ( lastRefID != al.RefID ) {
+                // update reference markers
+                refEntry.ID = al.RefID;
+                lastRefID   = al.RefID;
+                lastBin     = defaultValue;
+            }
  
-            // if not first reference, save previous reference data
-            if ( lastRefID != (int32_t)defaultValue ) {
+            // if lastPosition greater than current alignment position - file not sorted properly
+            else if ( lastPosition > al.Position ) {
+                stringstream s("");
+                s << "BAM file is not properly sorted by coordinate" << endl
+                  << "Current alignment position: " << al.Position
+                  << " < previous alignment position: " << lastPosition
+                  << " on reference ID: " << al.RefID << endl;
+                SetErrorString("BamStandardIndex::Create", s.str());
+                return false;
+            }
  
-                SaveAlignmentChunkToBin(refEntry.Bins, currentBin, currentOffset, lastOffset);
-                createdOk &= WriteReferenceEntry(refEntry);
-                ClearReferenceEntry(refEntry);
+            // if alignment's ref ID is valid & its bin is not a 'leaf'
+            if ( (al.RefID >= 0) && (al.Bin < 4681) )
+                SaveLinearOffsetEntry(refEntry.LinearOffsets, al.Position, al.GetEndPosition(), lastOffset);
  
-                // write any empty references between (but *NOT* including) lastRefID & al.RefID
-                for ( int i = lastRefID+1; i < al.RefID; ++i ) {
-                    BaiReferenceEntry emptyEntry(i);
-                    createdOk &= WriteReferenceEntry(emptyEntry);
-                }
+            // changed to new BAI bin
+            if ( al.Bin != lastBin ) {
  
-                // update bin markers
+                // if not first bin on reference, save previous bin data
+                if ( currentBin != defaultValue )
+                    SaveAlignmentChunkToBin(refEntry.Bins, currentBin, currentOffset, lastOffset);
+
+                // update markers
                  currentOffset = lastOffset;
                  currentBin    = al.Bin;
                  lastBin       = al.Bin;
                  currentRefID  = al.RefID;
-            }
  
-            // first pass
-            // write any empty references up to (but *NOT* including) al.RefID
-            else {
-                for ( int i = 0; i < al.RefID; ++i ) {
-                    BaiReferenceEntry emptyEntry(i);
-                    createdOk &= WriteReferenceEntry(emptyEntry);
-                }
+                // if invalid RefID, break out
+                if ( currentRefID < 0 )
+                    break;
              }
  
-            // update reference markers
-            refEntry.ID = al.RefID;
-            lastRefID   = al.RefID;
-            lastBin     = defaultValue;
-        }
+            // make sure that current file pointer is beyond lastOffset
+            if ( m_reader->Tell() <= (int64_t)lastOffset ) {
+                SetErrorString("BamStandardIndex::Create", "calculating offsets failed");
+                return false;
+            }
  
-        // if lastPosition greater than current alignment position - file not sorted properly
-        else if ( lastPosition > al.Position ) {
-            cerr << "BamStandardIndex ERROR: BAM file is not properly sorted by coordinate"
-                 << ", aborting index creation"
-                 << endl
-                 << "At alignment: " << al.Name
-                 << " : previous position " << lastPosition
-                 << " > this alignment position " << al.Position
-                 << " on reference id: " << al.RefID << endl;
-            return false;
+            // update lastOffset & lastPosition
+            lastOffset   = m_reader->Tell();
+            lastPosition = al.Position;
          }
  
-        // if alignment's ref ID is valid & its bin is not a 'leaf'
-        if ( (al.RefID >= 0) && (al.Bin < 4681) )
-            SaveLinearOffsetEntry(refEntry.LinearOffsets, al.Position, al.GetEndPosition(), lastOffset);
-
-        // changed to new BAI bin
-        if ( al.Bin != lastBin ) {
-
-            // if not first bin on reference, save previous bin data
-            if ( currentBin != defaultValue )
-                SaveAlignmentChunkToBin(refEntry.Bins, currentBin, currentOffset, lastOffset);
+        // after finishing alignments, if any data was read, check:
+        if ( currentRefID >= 0 ) {
  
-            // update markers
-            currentOffset = lastOffset;
-            currentBin    = al.Bin;
-            lastBin       = al.Bin;
-            currentRefID  = al.RefID;
+            // store last alignment chunk to its bin, then write last reference entry with data
+            SaveAlignmentChunkToBin(refEntry.Bins, currentBin, currentOffset, lastOffset);
+            WriteReferenceEntry(refEntry);
  
-            // if invalid RefID, break out
-            if ( currentRefID < 0 )
-                break;
-        }
-
-        // make sure that current file pointer is beyond lastOffset
-        if ( m_reader->Tell() <= (int64_t)lastOffset ) {
-            cerr << "BamStandardIndex ERROR: calculating offsets failed"
-                 << ", aborting index creation" << endl;
-            return false;
+            // then write any empty references remaining at end of file
+            for ( int i = currentRefID+1; i < numReferences; ++i ) {
+                BaiReferenceEntry emptyEntry(i);
+                WriteReferenceEntry(emptyEntry);
+            }
          }
  
-        // update lastOffset & lastPosition
-        lastOffset   = m_reader->Tell();
-        lastPosition = al.Position;
+    } catch ( BamException& e) {
+        m_errorString = e.what();
+        return false;
      }
  
-    // after finishing alignments, if any data was read, check:
-    if ( currentRefID >= 0 ) {
-
-        // store last alignment chunk to its bin, then write last reference entry with data
-        SaveAlignmentChunkToBin(refEntry.Bins, currentBin, currentOffset, lastOffset);
-        createdOk &= WriteReferenceEntry(refEntry);
-
-        // then write any empty references remaining at end of file
-        for ( int i = currentRefID+1; i < numReferences; ++i ) {
-            BaiReferenceEntry emptyEntry(i);
-            createdOk &= WriteReferenceEntry(emptyEntry);
-        }
+    // rewind BamReader
+    if ( !m_reader->Rewind() ) {
+        const string readerError = m_reader->GetErrorString();
+        const string message = "could not create index: \n\t" + readerError;
+        SetErrorString("BamStandardIndex::Create", message);
+        return false;
      }
  
-    // rewind reader now that we're done building
-    createdOk &= m_reader->Rewind();
-
-    // return result
-    return createdOk;
+    // return success
+    return true;
  }
  
  // returns format's file extension
@@ -401,11 +421,11 @@ const string BamStandardIndex::Extension(void) {
      return BamStandardIndex::BAI_EXTENSION;
  }
  
-bool BamStandardIndex::GetOffset(const BamRegion& region, int64_t& offset, bool* hasAlignmentsInRegion) {
+void BamStandardIndex::GetOffset(const BamRegion& region, int64_t& offset, bool* hasAlignmentsInRegion) {
  
      // cannot calculate offsets if unknown/invalid reference ID requested
      if ( region.LeftRefID < 0 || region.LeftRefID >= (int)m_indexFileSummary.size() )
-        return false;
+        throw BamException("BamStandardIndex::GetOffset", "invalid reference ID requested");
  
      // retrieve index summary for left bound reference
      const BaiReferenceSummary& refSummary = m_indexFileSummary.at(region.LeftRefID);
@@ -413,10 +433,7 @@ bool BamStandardIndex::GetOffset(const BamRegion& region, int64_t& offset, bool*
      // set up region boundaries based on actual BamReader data
      uint32_t begin;
      uint32_t end;
-    if ( !AdjustRegion(region, begin, end) ) {
-        cerr << "BamStandardIndex ERROR: cannot calculate offsets on invalid region" << endl;
-        return false;
-    }
+    AdjustRegion(region, begin, end);
  
      // retrieve all candidate bin IDs for region
      set<uint16_t> candidateBins;
@@ -427,18 +444,11 @@ bool BamStandardIndex::GetOffset(const BamRegion& region, int64_t& offset, bool*
      const uint64_t& minOffset = CalculateMinOffset(refSummary, begin);
  
      // attempt to use reference summary, minOffset, & candidateBins to calculate offsets
-    // no data should not be error
+    // no data should not be error, just bail
      vector<int64_t> offsets;
-    if ( !CalculateCandidateOffsets(refSummary, minOffset, candidateBins, offsets) ) {
-        cerr << "BamStandardIndex ERROR: could not calculate candidate offsets for requested region" << endl;
-        return false;
-    }
-
-    // if not candidate offsets are present in the indexed (most likely sparce coverage)
-    // then silently bail
-    if( offsets.size() == 0 ) {
-        return false;
-    }
+    CalculateCandidateOffsets(refSummary, minOffset, candidateBins, offsets);
+    if ( offsets.empty() )
+        return;
      
      // ensure that offsets are sorted before processing
      sort( offsets.begin(), offsets.end() );
@@ -459,9 +469,9 @@ bool BamStandardIndex::GetOffset(const BamRegion& region, int64_t& offset, bool*
          // attempt seek to candidate offset
          const int64_t& candidateOffset = (*offsetIter);
          if ( !m_reader->Seek(candidateOffset) ) {
-            cerr << "BamStandardIndex ERROR: could not jump"
-                 << ", there was a problem seeking in BAM file" << endl;
-            return false;
+            const string readerError = m_reader->GetErrorString();
+            const string message = "could not seek in BAM file: \n\t" + readerError;
+            throw BamException("BamToolsIndex::GetOffset", message);
          }
  
          // load first available alignment, setting flag to true if data exists
@@ -474,13 +484,10 @@ bool BamStandardIndex::GetOffset(const BamRegion& region, int64_t& offset, bool*
          } else count = step;
      }
  
-    // seek back to the offset before the 'current offset' (to cover overlaps)
+    // step back to the offset before the 'current offset' (to make sure we cover overlaps)
      if ( offsetIter != offsets.begin() )
          --offsetIter;
      offset = (*offsetIter);
-
-    // return succes
-    return true;
  }
  
  // returns whether reference has alignments or no
@@ -492,7 +499,7 @@ bool BamStandardIndex::HasAlignments(const int& referenceID) const {
  }
  
  bool BamStandardIndex::IsFileOpen(void) const {
-    return ( m_indexStream != 0 );
+    return ( Resources.IndexStream != 0 );
  }
  
  // attempts to use index data to jump to @region, returns success/fail
@@ -504,15 +511,18 @@ bool BamStandardIndex::Jump(const BamRegion& region, bool* hasAlignmentsInRegion
      // clear out flag
      *hasAlignmentsInRegion = false;
  
-    // skip if reader is not valid or is not open
-    if ( m_reader == 0 || !m_reader->IsOpen() )
+    // skip if invalid reader or not open
+    if ( m_reader == 0 || !m_reader->IsOpen() ) {
+        SetErrorString("BamStandardIndex::Jump", "could not jump: reader is not open");
          return false;
+    }
  
      // calculate nearest offset to jump to
      int64_t offset;
-    if ( !GetOffset(region, offset, hasAlignmentsInRegion) ) {
-        cerr << "BamStandardIndex ERROR: could not jump"
-             << ", unable to calculate offset for specified region" << endl;
+    try {
+        GetOffset(region, offset, hasAlignmentsInRegion);
+    } catch ( BamException& e ) {
+        m_errorString = e.what();
          return false;
      }
  
@@ -528,31 +538,24 @@ bool BamStandardIndex::Jump(const BamRegion& region, bool* hasAlignmentsInRegion
  // loads existing data from file into memory
  bool BamStandardIndex::Load(const std::string& filename) {
  
-    // attempt open index file (read-only)
-    if ( !OpenFile(filename, "rb") ) {
-        cerr << "BamStandardIndex ERROR: could not open input index file: " << filename
-             << ", aborting index load" << endl;
-        return false;
-    }
+    try {
  
-    // if invalid format 'magic number', close & return failure
-    if ( !CheckMagicNumber() ) {
-        cerr << "BamStandardIndex ERROR: unexpected format for index file: " << filename
-             << ", aborting index load" << endl;
-        CloseFile();
-        return false;
-    }
+        // attempt to open file (read-only)
+        OpenFile(filename, "rb");
+
+        // validate format
+        CheckMagicNumber();
+
+        // load in-memory summary of index data
+        SummarizeIndexFile();
  
-    // attempt to load index file summary, return success/failure
-    if ( !SummarizeIndexFile() ) {
-        cerr << "BamStandardIndex ERROR: could not generate a summary of index file " << filename
-             << ", aborting index load" << endl;
-        CloseFile();
+        // return success
+        return true;
+
+    } catch ( BamException& e ) {
+        m_errorString = e.what();
          return false;
      }
-
-    // if we get here, index summary is loaded OK
-    return true;
  }
  
  uint64_t BamStandardIndex::LookupLinearOffset(const BaiReferenceSummary& refSummary, const int& index) {
@@ -560,13 +563,11 @@ uint64_t BamStandardIndex::LookupLinearOffset(const BaiReferenceSummary& refSumm
      // attempt seek to proper index file position
      const int64_t linearOffsetFilePosition = (int64_t)refSummary.FirstLinearOffsetFilePosition +
                                               index*BamStandardIndex::SIZEOF_LINEAROFFSET;
-    if ( !Seek(linearOffsetFilePosition, SEEK_SET) )
-        return 0;
+    Seek(linearOffsetFilePosition, SEEK_SET);
  
      // read linear offset from BAI file
-    uint64_t linearOffset(0);
-    if ( !ReadLinearOffset(linearOffset) )
-        return 0;
+    uint64_t linearOffset;
+    ReadLinearOffset(linearOffset);
      return linearOffset;
  }
  
@@ -611,82 +612,85 @@ void BamStandardIndex::MergeAlignmentChunks(BaiAlignmentChunkVector& chunks) {
      chunks = mergedChunks;
  }
  
-bool BamStandardIndex::OpenFile(const std::string& filename, const char* mode) {
+void BamStandardIndex::OpenFile(const std::string& filename, const char* mode) {
  
      // make sure any previous index file is closed
      CloseFile();
  
      // attempt to open file
-    m_indexStream = fopen(filename.c_str(), mode);
-    return IsFileOpen();
+    Resources.IndexStream = fopen(filename.c_str(), mode);
+    if ( !IsFileOpen() ) {
+        const string message = string("could not open file: ") + filename;
+        throw BamException("BamStandardIndex::OpenFile", message);
+    }
  }
  
-bool BamStandardIndex::ReadBinID(uint32_t& binId) {
-    size_t elementsRead = 0;
-    elementsRead += fread(&binId, sizeof(binId), 1, m_indexStream);
+void BamStandardIndex::ReadBinID(uint32_t& binId) {
+    const size_t elementsRead = fread(&binId, sizeof(binId), 1, Resources.IndexStream);
      if ( m_isBigEndian ) SwapEndian_32(binId);
-    return ( elementsRead == 1 );
+    if ( elementsRead != 1 )
+        throw BamException("BamStandardIndex::ReadBinID", "could not read BAI bin ID");
  }
  
-bool BamStandardIndex::ReadBinIntoBuffer(uint32_t& binId, int32_t& numAlignmentChunks) {
-
-    bool readOk = true;
+void BamStandardIndex::ReadBinIntoBuffer(uint32_t& binId, int32_t& numAlignmentChunks) {
  
      // read bin header
-    readOk &= ReadBinID(binId);
-    readOk &= ReadNumAlignmentChunks(numAlignmentChunks);
+    ReadBinID(binId);
+    ReadNumAlignmentChunks(numAlignmentChunks);
  
      // read bin contents
      const unsigned int bytesRequested = numAlignmentChunks*BamStandardIndex::SIZEOF_ALIGNMENTCHUNK;
-    readOk &= ReadIntoBuffer(bytesRequested);
-
-    // return success/failure
-    return readOk;
+    ReadIntoBuffer(bytesRequested);
  }
  
-bool BamStandardIndex::ReadIntoBuffer(const unsigned int& bytesRequested) {
+void BamStandardIndex::ReadIntoBuffer(const unsigned int& bytesRequested) {
  
      // ensure that our buffer is big enough for request
-    BamStandardIndex::CheckBufferSize(m_buffer, m_bufferLength, bytesRequested);
+    BamStandardIndex::CheckBufferSize(Resources.Buffer, m_bufferLength, bytesRequested);
  
      // read from BAI file stream
-    size_t bytesRead = fread( m_buffer, sizeof(char), bytesRequested, m_indexStream );
-    return ( bytesRead == (size_t)bytesRequested );
+    const size_t bytesRead = fread( Resources.Buffer, sizeof(char), bytesRequested, Resources.IndexStream );
+    if ( bytesRead != (size_t)bytesRequested ) {
+        stringstream s("");
+        s << "expected to read: " << bytesRequested << " bytes, "
+          << "but instead read: " << bytesRead;
+        throw BamException("BamStandardIndex::ReadIntoBuffer", s.str());
+    }
  }
  
-bool BamStandardIndex::ReadLinearOffset(uint64_t& linearOffset) {
-    size_t elementsRead = 0;
-    elementsRead += fread(&linearOffset, sizeof(linearOffset), 1, m_indexStream);
+void BamStandardIndex::ReadLinearOffset(uint64_t& linearOffset) {
+    const size_t elementsRead = fread(&linearOffset, sizeof(linearOffset), 1, Resources.IndexStream);
      if ( m_isBigEndian ) SwapEndian_64(linearOffset);
-    return ( elementsRead == 1 );
+    if ( elementsRead != 1 )
+        throw BamException("BamStandardIndex::ReadLinearOffset", "could not read BAI linear offset");
  }
  
-bool BamStandardIndex::ReadNumAlignmentChunks(int& numAlignmentChunks) {
-    size_t elementsRead = 0;
-    elementsRead += fread(&numAlignmentChunks, sizeof(numAlignmentChunks), 1, m_indexStream);
+void BamStandardIndex::ReadNumAlignmentChunks(int& numAlignmentChunks) {
+    const size_t elementsRead = fread(&numAlignmentChunks, sizeof(numAlignmentChunks), 1, Resources.IndexStream);
      if ( m_isBigEndian ) SwapEndian_32(numAlignmentChunks);
-    return ( elementsRead == 1 );
+    if ( elementsRead != 1 )
+        throw BamException("BamStandardIndex::ReadNumAlignmentChunks", "could not read BAI chunk count");
  }
  
-bool BamStandardIndex::ReadNumBins(int& numBins) {
-    size_t elementsRead = 0;
-    elementsRead += fread(&numBins, sizeof(numBins), 1, m_indexStream);
+void BamStandardIndex::ReadNumBins(int& numBins) {
+    const size_t elementsRead = fread(&numBins, sizeof(numBins), 1, Resources.IndexStream);
      if ( m_isBigEndian ) SwapEndian_32(numBins);
-    return ( elementsRead == 1 );
+    if ( elementsRead != 1 )
+        throw BamException("BamStandardIndex::ReadNumBins", "could not read BAI bin count");
  }
  
-bool BamStandardIndex::ReadNumLinearOffsets(int& numLinearOffsets) {
-    size_t elementsRead = 0;
-    elementsRead += fread(&numLinearOffsets, sizeof(numLinearOffsets), 1, m_indexStream);
+void BamStandardIndex::ReadNumLinearOffsets(int& numLinearOffsets) {
+    const size_t elementsRead = fread(&numLinearOffsets, sizeof(numLinearOffsets), 1, Resources.IndexStream);
      if ( m_isBigEndian ) SwapEndian_32(numLinearOffsets);
-    return ( elementsRead == 1 );
+    if ( elementsRead != 1 )
+        throw BamException("BamStandardIndex::ReadNumAlignmentChunks", "could not read BAI linear offset count");
  }
  
-bool BamStandardIndex::ReadNumReferences(int& numReferences) {
-    size_t elementsRead = 0;
-    elementsRead += fread(&numReferences, sizeof(numReferences), 1, m_indexStream);
+void BamStandardIndex::ReadNumReferences(int& numReferences) {
+    const size_t elementsRead = fread(&numReferences, sizeof(numReferences), 1, Resources.IndexStream);
      if ( m_isBigEndian ) SwapEndian_32(numReferences);
-    return ( elementsRead == 1 );
+    if ( elementsRead != 1 )
+        throw BamException("BamStandardIndex::ReadNumReferences", "could not read reference count");
  }
  
  void BamStandardIndex::ReserveForSummary(const int& numReferences) {
@@ -695,15 +699,13 @@ void BamStandardIndex::ReserveForSummary(const int& numReferences) {
  }
  
  void BamStandardIndex::SaveAlignmentChunkToBin(BaiBinMap& binMap,
-                                    const uint32_t& currentBin,
-                                    const uint64_t& currentOffset,
-                                    const uint64_t& lastOffset)
+                                               const uint32_t& currentBin,
+                                               const uint64_t& currentOffset,
+                                               const uint64_t& lastOffset)
  {
      // create new alignment chunk
      BaiAlignmentChunk newChunk(currentOffset, lastOffset);
  
-
-
      // if no entry exists yet for this bin, create one and store alignment chunk
      BaiBinMap::iterator binIter = binMap.find(currentBin);
      if ( binIter == binMap.end() ) {
@@ -754,8 +756,9 @@ void BamStandardIndex::SaveLinearOffsetsSummary(const int& refId, const int& num
  }
  
  // seek to position in index file stream
-bool BamStandardIndex::Seek(const int64_t& position, const int& origin) {
-    return ( fseek64(m_indexStream, position, origin) == 0 );
+void BamStandardIndex::Seek(const int64_t& position, const int& origin) {
+    if ( fseek64(Resources.IndexStream, position, origin) != 0 )
+        throw BamException("BamStandardIndex::Seek", "could not seek in BAI file");
  }
  
  // change the index caching behavior
@@ -764,99 +767,77 @@ void BamStandardIndex::SetCacheMode(const BamIndex::IndexCacheMode& mode) {
      // do nothing else here ? cache mode will be ignored from now on, most likely
  }
  
-bool BamStandardIndex::SkipBins(const int& numBins) {
+void BamStandardIndex::SkipBins(const int& numBins) {
      uint32_t binId;
      int32_t numAlignmentChunks;
-    bool skippedOk = true;
      for (int i = 0; i < numBins; ++i)
-        skippedOk &= ReadBinIntoBuffer(binId, numAlignmentChunks); // results & buffer ignored
-    return skippedOk;
+        ReadBinIntoBuffer(binId, numAlignmentChunks); // results & buffer ignored
  }
  
-bool BamStandardIndex::SkipLinearOffsets(const int& numLinearOffsets) {
+void BamStandardIndex::SkipLinearOffsets(const int& numLinearOffsets) {
      const unsigned int bytesRequested = numLinearOffsets*BamStandardIndex::SIZEOF_LINEAROFFSET;
-    return ReadIntoBuffer(bytesRequested);
+    ReadIntoBuffer(bytesRequested);
  }
  
  void BamStandardIndex::SortLinearOffsets(BaiLinearOffsetVector& linearOffsets) {
      sort( linearOffsets.begin(), linearOffsets.end() );
  }
  
-bool BamStandardIndex::SummarizeBins(BaiReferenceSummary& refSummary) {
+void BamStandardIndex::SummarizeBins(BaiReferenceSummary& refSummary) {
  
      // load number of bins
      int numBins;
-    if ( !ReadNumBins(numBins) )
-        return false;
+    ReadNumBins(numBins);
  
      // store bins summary for this reference
      refSummary.NumBins = numBins;
      refSummary.FirstBinFilePosition = Tell();
  
-    // attempt skip reference bins, return success/failure
-    if ( !SkipBins(numBins) )
-        return false;
-
-    // if we get here, bin summarized OK
-    return true;
+    // skip this reference's bins
+    SkipBins(numBins);
  }
  
-bool BamStandardIndex::SummarizeIndexFile(void) {
+void BamStandardIndex::SummarizeIndexFile(void) {
  
      // load number of reference sequences
      int numReferences;
-    if ( !ReadNumReferences(numReferences) )
-        return false;
+    ReadNumReferences(numReferences);
  
      // initialize file summary data
      ReserveForSummary(numReferences);
  
      // iterate over reference entries
-    bool loadedOk = true;
      BaiFileSummary::iterator summaryIter = m_indexFileSummary.begin();
      BaiFileSummary::iterator summaryEnd  = m_indexFileSummary.end();
      for ( int i = 0; summaryIter != summaryEnd; ++summaryIter, ++i )
-        loadedOk &= SummarizeReference(*summaryIter);
-
-    // return result
-    return loadedOk;
+        SummarizeReference(*summaryIter);
  }
  
-bool BamStandardIndex::SummarizeLinearOffsets(BaiReferenceSummary& refSummary) {
+void BamStandardIndex::SummarizeLinearOffsets(BaiReferenceSummary& refSummary) {
  
      // load number of linear offsets
      int numLinearOffsets;
-    if ( !ReadNumLinearOffsets(numLinearOffsets) )
-        return false;
+    ReadNumLinearOffsets(numLinearOffsets);
  
      // store bin summary data for this reference
      refSummary.NumLinearOffsets = numLinearOffsets;
      refSummary.FirstLinearOffsetFilePosition = Tell();
  
      // skip linear offsets in index file
-    if ( !SkipLinearOffsets(numLinearOffsets) )
-        return false;
-
-    // if get here, linear offsets summarized OK
-    return true;
+    SkipLinearOffsets(numLinearOffsets);
  }
  
-bool BamStandardIndex::SummarizeReference(BaiReferenceSummary& refSummary) {
-
-    bool loadedOk = true;
-    loadedOk &= SummarizeBins(refSummary);
-    loadedOk &= SummarizeLinearOffsets(refSummary);
-    return loadedOk;
+void BamStandardIndex::SummarizeReference(BaiReferenceSummary& refSummary) {
+    SummarizeBins(refSummary);
+    SummarizeLinearOffsets(refSummary);
  }
  
  // return position of file pointer in index file stream
  int64_t BamStandardIndex::Tell(void) const {
-    return ftell64(m_indexStream);
+    return ftell64(Resources.IndexStream);
  }
  
-bool BamStandardIndex::WriteAlignmentChunk(const BaiAlignmentChunk& chunk) {
-
-    size_t elementsWritten = 0;
+void BamStandardIndex::WriteAlignmentChunk(const BaiAlignmentChunk& chunk) {
  
      // localize alignment chunk offsets
      uint64_t start = chunk.Start;
@@ -869,92 +850,81 @@ bool BamStandardIndex::WriteAlignmentChunk(const BaiAlignmentChunk& chunk) {
      }
  
      // write to index file
-    elementsWritten += fwrite(&start, sizeof(start), 1, m_indexStream);
-    elementsWritten += fwrite(&stop,  sizeof(stop),  1, m_indexStream);
-
-    // return success/failure of write
-    return ( elementsWritten == 2 );
+    size_t elementsWritten = 0;
+    elementsWritten += fwrite(&start, sizeof(start), 1, Resources.IndexStream);
+    elementsWritten += fwrite(&stop,  sizeof(stop),  1, Resources.IndexStream);
+    if ( elementsWritten != 2 )
+        throw BamException("BamStandardIndex::WriteAlignmentChunk", "could not write BAI alignment chunk");
  }
  
-bool BamStandardIndex::WriteAlignmentChunks(BaiAlignmentChunkVector& chunks) {
+void BamStandardIndex::WriteAlignmentChunks(BaiAlignmentChunkVector& chunks) {
  
      // make sure chunks are merged (simplified) before writing & saving summary
      MergeAlignmentChunks(chunks);
  
-    size_t elementsWritten = 0;
-
      // write chunks
      int32_t chunkCount = chunks.size();
      if ( m_isBigEndian ) SwapEndian_32(chunkCount);
-    elementsWritten += fwrite(&chunkCount, sizeof(chunkCount), 1, m_indexStream);
+    const size_t elementsWritten = fwrite(&chunkCount, sizeof(chunkCount), 1, Resources.IndexStream);
+    if ( elementsWritten != 1 )
+        throw BamException("BamStandardIndex::WriteAlignmentChunks", "could not write BAI chunk count");
  
      // iterate over chunks
-    bool chunksOk = true;
      BaiAlignmentChunkVector::const_iterator chunkIter = chunks.begin();
      BaiAlignmentChunkVector::const_iterator chunkEnd  = chunks.end();
      for ( ; chunkIter != chunkEnd; ++chunkIter )
-        chunksOk &= WriteAlignmentChunk( (*chunkIter) );
-
-    // return success/failure of write
-    return ( (elementsWritten == 1) && chunksOk );
+        WriteAlignmentChunk( (*chunkIter) );
  }
  
-bool BamStandardIndex::WriteBin(const uint32_t& binId, BaiAlignmentChunkVector& chunks) {
-
-    size_t elementsWritten = 0;
+void BamStandardIndex::WriteBin(const uint32_t& binId, BaiAlignmentChunkVector& chunks) {
  
      // write BAM bin ID
      uint32_t binKey = binId;
      if ( m_isBigEndian ) SwapEndian_32(binKey);
-    elementsWritten += fwrite(&binKey, sizeof(binKey), 1, m_indexStream);
+    const size_t elementsWritten = fwrite(&binKey, sizeof(binKey), 1, Resources.IndexStream);
+    if ( elementsWritten != 1 )
+        throw BamException("BamStandardIndex::WriteBin", "could not write bin ID");
  
      // write bin's alignment chunks
-    bool chunksOk = WriteAlignmentChunks(chunks);
-
-    // return success/failure of write
-    return ( (elementsWritten == 1) && chunksOk );
+    WriteAlignmentChunks(chunks);
  }
  
-bool BamStandardIndex::WriteBins(const int& refId, BaiBinMap& bins) {
-
-    size_t elementsWritten = 0;
+void BamStandardIndex::WriteBins(const int& refId, BaiBinMap& bins) {
  
      // write number of bins
      int32_t binCount = bins.size();
      if ( m_isBigEndian ) SwapEndian_32(binCount);
-    elementsWritten += fwrite(&binCount, sizeof(binCount), 1, m_indexStream);
+    const size_t elementsWritten = fwrite(&binCount, sizeof(binCount), 1, Resources.IndexStream);
+    if ( elementsWritten != 1 )
+        throw BamException("BamStandardIndex::WriteBins", "could not write bin count");
  
      // save summary for reference's bins
      SaveBinsSummary(refId, bins.size());
  
      // iterate over bins
-    bool binsOk = true;
      BaiBinMap::iterator binIter = bins.begin();
      BaiBinMap::iterator binEnd  = bins.end();
      for ( ; binIter != binEnd; ++binIter )
-        binsOk &= WriteBin( (*binIter).first, (*binIter).second );
-
-    // return success/failure of write
-    return ( (elementsWritten == 1) && binsOk );
+        WriteBin( (*binIter).first, (*binIter).second );
  }
  
-bool BamStandardIndex::WriteHeader(void) {
+void BamStandardIndex::WriteHeader(void) {
  
      size_t elementsWritten = 0;
  
      // write magic number
-    elementsWritten += fwrite(BamStandardIndex::BAI_MAGIC, sizeof(char), 4, m_indexStream);
+    elementsWritten += fwrite(BamStandardIndex::BAI_MAGIC, sizeof(char), 4, Resources.IndexStream);
  
      // write number of reference sequences
      int32_t numReferences = m_indexFileSummary.size();
      if ( m_isBigEndian ) SwapEndian_32(numReferences);
-    elementsWritten += fwrite(&numReferences, sizeof(numReferences), 1, m_indexStream);
+    elementsWritten += fwrite(&numReferences, sizeof(numReferences), 1, Resources.IndexStream);
  
-    // return success/failure of write
-    return (elementsWritten == 5);
+    if ( elementsWritten != 5 )
+        throw BamException("BamStandardIndex::WriteHeader", "could not write BAI header");
  }
  
-bool BamStandardIndex::WriteLinearOffsets(const int& refId, BaiLinearOffsetVector& linearOffsets) {
+void BamStandardIndex::WriteLinearOffsets(const int& refId, BaiLinearOffsetVector& linearOffsets) {
  
      // make sure linear offsets are sorted before writing & saving summary
      SortLinearOffsets(linearOffsets);
@@ -964,7 +934,7 @@ bool BamStandardIndex::WriteLinearOffsets(const int& refId, BaiLinearOffsetVecto
      // write number of linear offsets
      int32_t offsetCount = linearOffsets.size();
      if ( m_isBigEndian ) SwapEndian_32(offsetCount);
-    elementsWritten += fwrite(&offsetCount, sizeof(offsetCount), 1, m_indexStream);
+    elementsWritten += fwrite(&offsetCount, sizeof(offsetCount), 1, Resources.IndexStream);
  
      // save summary for reference's linear offsets
      SaveLinearOffsetsSummary(refId, linearOffsets.size());
@@ -977,16 +947,14 @@ bool BamStandardIndex::WriteLinearOffsets(const int& refId, BaiLinearOffsetVecto
          // write linear offset
          uint64_t linearOffset = (*offsetIter);
          if ( m_isBigEndian ) SwapEndian_64(linearOffset);
-        elementsWritten += fwrite(&linearOffset, sizeof(linearOffset), 1, m_indexStream);
+        elementsWritten += fwrite(&linearOffset, sizeof(linearOffset), 1, Resources.IndexStream);
      }
  
-    // return success/failure of write
-    return ( elementsWritten == (size_t)(linearOffsets.size() + 1) );
+    if ( elementsWritten != (linearOffsets.size() + 1) )
+        throw BamException("BamStandardIndex::WriteLinearOffsets", "could not write BAI linear offsets");
  }
  
-bool BamStandardIndex::WriteReferenceEntry(BaiReferenceEntry& refEntry) {
-    bool refOk = true;
-    refOk &= WriteBins(refEntry.ID, refEntry.Bins);
-    refOk &= WriteLinearOffsets(refEntry.ID, refEntry.LinearOffsets);
-    return refOk;
+void BamStandardIndex::WriteReferenceEntry(BaiReferenceEntry& refEntry) {
+    WriteBins(refEntry.ID, refEntry.Bins);
+    WriteLinearOffsets(refEntry.ID, refEntry.LinearOffsets);
  }
diff --git a/src/api/internal/BamStandardIndex_p.h b/src/api/internal/BamStandardIndex_p.h

index 3fbc7772404c26cfa3a440c204c293c9a7ae69d8..1bd36c886e7a26a28d40c2df5e16ff40fe8c6758 100644 (file)
--- a/src/api/internal/BamStandardIndex_p.h
+++ b/src/api/internal/BamStandardIndex_p.h
@@ -2,7 +2,7 @@
  // BamStandardIndex.h (c) 2010 Derek Barnett
  // Marth Lab, Department of Biology, Boston College
  // ---------------------------------------------------------------------------
-// Last modified: 24 June 2011 (DB)
+// Last modified: 6 October 2011 (DB)
  // ---------------------------------------------------------------------------
  // Provides index operations for the standardized BAM index format (".bai")
  // ***************************************************************************
@@ -128,17 +128,18 @@ class BamStandardIndex : public BamIndex {
          // returns format's file extension
          static const std::string Extension(void);
  
-    // internal file ops
+    // internal methods
      private:
-        bool CheckMagicNumber(void);
+
+        // index file ops
+        void CheckMagicNumber(void);
          void CloseFile(void);
          bool IsFileOpen(void) const;
-        bool OpenFile(const std::string& filename, const char* mode);
-        bool Seek(const int64_t& position, const int& origin);
+        void OpenFile(const std::string& filename, const char* mode);
+        void Seek(const int64_t& position, const int& origin);
          int64_t Tell(void) const;
  
-    // internal BAI index building methods
-    private:
+        // BAI index building methods
          void ClearReferenceEntry(BaiReferenceEntry& refEntry);
          void SaveAlignmentChunkToBin(BaiBinMap& binMap,
                                       const uint32_t& currentBin,
@@ -149,66 +150,68 @@ class BamStandardIndex : public BamIndex {
                                     const int& alignmentStopPosition,
                                     const uint64_t& lastOffset);
  
-    // internal random-access methods
-    private:
-        bool AdjustRegion(const BamRegion& region, uint32_t& begin, uint32_t& end);
+        // random-access methods
+        void AdjustRegion(const BamRegion& region, uint32_t& begin, uint32_t& end);
          void CalculateCandidateBins(const uint32_t& begin,
                                      const uint32_t& end,
                                      std::set<uint16_t>& candidateBins);
-        bool CalculateCandidateOffsets(const BaiReferenceSummary& refSummary,
+        void CalculateCandidateOffsets(const BaiReferenceSummary& refSummary,
                                         const uint64_t& minOffset,
                                         std::set<uint16_t>& candidateBins,
                                         std::vector<int64_t>& offsets);
          uint64_t CalculateMinOffset(const BaiReferenceSummary& refSummary, const uint32_t& begin);
-        bool GetOffset(const BamRegion& region, int64_t& offset, bool* hasAlignmentsInRegion);
+        void GetOffset(const BamRegion& region, int64_t& offset, bool* hasAlignmentsInRegion);
          uint64_t LookupLinearOffset(const BaiReferenceSummary& refSummary, const int& index);
  
-    // internal BAI summary (create/load) methods
-    private:
+        // BAI summary (create/load) methods
          void ReserveForSummary(const int& numReferences);
          void SaveBinsSummary(const int& refId, const int& numBins);
          void SaveLinearOffsetsSummary(const int& refId, const int& numLinearOffsets);
-        bool SkipBins(const int& numBins);
-        bool SkipLinearOffsets(const int& numLinearOffsets);
-        bool SummarizeBins(BaiReferenceSummary& refSummary);
-        bool SummarizeIndexFile(void);
-        bool SummarizeLinearOffsets(BaiReferenceSummary& refSummary);
-        bool SummarizeReference(BaiReferenceSummary& refSummary);
-
-    // internal BAI full index input methods
-    private:
-        bool ReadBinID(uint32_t& binId);
-        bool ReadBinIntoBuffer(uint32_t& binId, int32_t& numAlignmentChunks);
-        bool ReadIntoBuffer(const unsigned int& bytesRequested);
-        bool ReadLinearOffset(uint64_t& linearOffset);
-        bool ReadNumAlignmentChunks(int& numAlignmentChunks);
-        bool ReadNumBins(int& numBins);
-        bool ReadNumLinearOffsets(int& numLinearOffsets);
-        bool ReadNumReferences(int& numReferences);
-
-    // internal BAI full index output methods
-    private:
+        void SkipBins(const int& numBins);
+        void SkipLinearOffsets(const int& numLinearOffsets);
+        void SummarizeBins(BaiReferenceSummary& refSummary);
+        void SummarizeIndexFile(void);
+        void SummarizeLinearOffsets(BaiReferenceSummary& refSummary);
+        void SummarizeReference(BaiReferenceSummary& refSummary);
+
+        // BAI full index input methods
+        void ReadBinID(uint32_t& binId);
+        void ReadBinIntoBuffer(uint32_t& binId, int32_t& numAlignmentChunks);
+        void ReadIntoBuffer(const unsigned int& bytesRequested);
+        void ReadLinearOffset(uint64_t& linearOffset);
+        void ReadNumAlignmentChunks(int& numAlignmentChunks);
+        void ReadNumBins(int& numBins);
+        void ReadNumLinearOffsets(int& numLinearOffsets);
+        void ReadNumReferences(int& numReferences);
+
+        // BAI full index output methods
          void MergeAlignmentChunks(BaiAlignmentChunkVector& chunks);
          void SortLinearOffsets(BaiLinearOffsetVector& linearOffsets);
-        bool WriteAlignmentChunk(const BaiAlignmentChunk& chunk);
-        bool WriteAlignmentChunks(BaiAlignmentChunkVector& chunks);
-        bool WriteBin(const uint32_t& binId, BaiAlignmentChunkVector& chunks);
-        bool WriteBins(const int& refId, BaiBinMap& bins);
-        bool WriteHeader(void);
-        bool WriteLinearOffsets(const int& refId, BaiLinearOffsetVector& linearOffsets);
-        bool WriteReferenceEntry(BaiReferenceEntry& refEntry);
+        void WriteAlignmentChunk(const BaiAlignmentChunk& chunk);
+        void WriteAlignmentChunks(BaiAlignmentChunkVector& chunks);
+        void WriteBin(const uint32_t& binId, BaiAlignmentChunkVector& chunks);
+        void WriteBins(const int& refId, BaiBinMap& bins);
+        void WriteHeader(void);
+        void WriteLinearOffsets(const int& refId, BaiLinearOffsetVector& linearOffsets);
+        void WriteReferenceEntry(BaiReferenceEntry& refEntry);
  
      // data members
      private:
-        FILE* m_indexStream;
-        bool  m_isBigEndian;
+        bool m_isBigEndian;
          BamIndex::IndexCacheMode m_cacheMode;
          BaiFileSummary m_indexFileSummary;
  
          // our input buffer
-        char* m_buffer;
          unsigned int m_bufferLength;
  
+        struct RaiiWrapper {
+            FILE* IndexStream;
+            char* Buffer;
+            RaiiWrapper(void);
+            ~RaiiWrapper(void);
+        };
+        RaiiWrapper Resources;
+
      // static methods
      private:
          // checks if the buffer is large enough to accomodate the requested size
diff --git a/src/api/internal/BamToolsIndex_p.cpp b/src/api/internal/BamToolsIndex_p.cpp

index 047ca6fec8df26683d0644cd228861e78e3279ac..09f74d10306028032a32322b3009f8dd81ca9b1d 100644 (file)
--- a/src/api/internal/BamToolsIndex_p.cpp
+++ b/src/api/internal/BamToolsIndex_p.cpp
@@ -2,12 +2,13 @@
  // BamToolsIndex.cpp (c) 2010 Derek Barnett
  // Marth Lab, Department of Biology, Boston College
  // ---------------------------------------------------------------------------
-// Last modified: 27 April 2011 (DB)
+// Last modified: 6 October 2011 (DB)
  // ---------------------------------------------------------------------------
  // Provides index operations for the BamTools index format (".bti")
  // ***************************************************************************
  
  #include <api/BamAlignment.h>
+#include <api/internal/BamException_p.h>
  #include <api/internal/BamReader_p.h>
  #include <api/internal/BamToolsIndex_p.h>
  #include <api/internal/BgzfStream_p.h>
@@ -23,16 +24,35 @@ using namespace BamTools::Internal;
  #include <map>
  using namespace std;
  
+// --------------------------------
  // static BamToolsIndex constants
-const int BamToolsIndex::DEFAULT_BLOCK_LENGTH = 1000;
+// --------------------------------
+
+const uint32_t BamToolsIndex::DEFAULT_BLOCK_LENGTH = 1000;
  const string BamToolsIndex::BTI_EXTENSION     = ".bti";
  const char* const BamToolsIndex::BTI_MAGIC    = "BTI\1";
  const int BamToolsIndex::SIZEOF_BLOCK         = sizeof(int32_t)*2 + sizeof(int64_t);
  
+// ----------------------------
+// RaiiWrapper implementation
+// ----------------------------
+
+BamToolsIndex::RaiiWrapper::RaiiWrapper(void)
+    : IndexStream(0)
+{ }
+
+BamToolsIndex::RaiiWrapper::~RaiiWrapper(void) {
+    if ( IndexStream )
+        fclose(IndexStream);
+}
+
+// ------------------------------
+// BamToolsIndex implementation
+// ------------------------------
+
  // ctor
  BamToolsIndex::BamToolsIndex(Internal::BamReaderPrivate* reader)
      : BamIndex(reader)
-    , m_indexStream(0)
      , m_cacheMode(BamIndex::LimitedIndexCaching)
      , m_blockSize(BamToolsIndex::DEFAULT_BLOCK_LENGTH)
      , m_inputVersion(0)
@@ -46,71 +66,56 @@ BamToolsIndex::~BamToolsIndex(void) {
      CloseFile();
  }
  
-bool BamToolsIndex::CheckMagicNumber(void) {
+void BamToolsIndex::CheckMagicNumber(void) {
  
-    // check 'magic number' to see if file is BTI index
+    // read magic number
      char magic[4];
-    size_t elementsRead = fread(magic, sizeof(char), 4, m_indexStream);
-    if ( elementsRead != 4 ) {
-        cerr << "BamToolsIndex ERROR: could not read format 'magic' number" << endl;
-        return false;
-    }
-
-    if ( strncmp(magic, BamToolsIndex::BTI_MAGIC, 4) != 0 ) {
-        cerr << "BamToolsIndex ERROR: invalid format" << endl;
-        return false;
-    }
+    size_t elementsRead = fread(magic, sizeof(char), 4, Resources.IndexStream);
+    if ( elementsRead != 4 )
+        throw BamException("BamToolsIndex::CheckMagicNumber", "could not read BTI magic number");
  
-    // otherwise ok
-    return true;
+    // validate expected magic number
+    if ( strncmp(magic, BamToolsIndex::BTI_MAGIC, 4) != 0 )
+        throw BamException("BamToolsIndex::CheckMagicNumber", "invalid BTI magic number");
  }
  
  // check index file version, return true if OK
-bool BamToolsIndex::CheckVersion(void) {
+void BamToolsIndex::CheckVersion(void) {
  
      // read version from file
-    size_t elementsRead = fread(&m_inputVersion, sizeof(m_inputVersion), 1, m_indexStream);
-    if ( elementsRead != 1 ) return false;
+    size_t elementsRead = fread(&m_inputVersion, sizeof(m_inputVersion), 1, Resources.IndexStream);
+    if ( elementsRead != 1 )
+        throw BamException("BamToolsIndex::CheckVersion", "could not read format version");
      if ( m_isBigEndian ) SwapEndian_32(m_inputVersion);
  
      // if version is negative, or zero
-    if ( m_inputVersion <= 0 ) {
-        cerr << "BamToolsIndex ERROR: could not load index file: invalid version."
-             << endl;
-        return false;
-    }
+    if ( m_inputVersion <= 0 )
+        throw BamException("BamToolsIndex::CheckVersion", "invalid format version");
  
      // if version is newer than can be supported by this version of bamtools
      else if ( m_inputVersion > m_outputVersion ) {
-        cerr << "BamToolsIndex ERROR: could not load index file. This version of BamTools does not recognize new index file version"
-             << endl
-             << "Please update BamTools to a more recent version to support this index file."
-             << endl;
-        return false;
+        const string message = "unsupported format: this index was created by a newer version of BamTools. "
+                               "Update your local version of BamTools to use the index file.";
+        throw BamException("BamToolsIndex::CheckVersion", message);
      }
  
      // ------------------------------------------------------------------
      // check for deprecated, unsupported versions
-    // (typically whose format did not accomodate a particular bug fix)
+    // (the format had to be modified to accomodate a particular bug fix)
  
      else if ( (Version)m_inputVersion == BamToolsIndex::BTI_1_0 ) {
-        cerr << "BamToolsIndex ERROR: could not load index file. This version of the index contains a bug related to accessing data near reference ends."
-             << endl << endl
-             << "Please run 'bamtools index -bti -in yourData.bam' to generate an up-to-date, fixed BTI file."
-             << endl << endl;
-        return false;
+        const string message = "unsupported format: this version of the index may not properly handle "
+                               "reference ends. Please run 'bamtools index -bti -in yourData.bam' to "
+                               "generate an up-to-date, fixed BTI file.";
+        throw BamException("BamToolsIndex::CheckVersion", message);
      }
  
      else if ( (Version)m_inputVersion == BamToolsIndex::BTI_1_1 ) {
-        cerr << "BamToolsIndex ERROR: could not load index file. This version of the index contains a bug related to handling empty references."
-             << endl << endl
-             << "Please run 'bamtools index -bti -in yourData.bam' to generate an up-to-date, fixed BTI file."
-             << endl << endl;
-        return false;
+        const string message = "unsupported format: this version of the index may not properly handle "
+                               "empty references. Please run 'bamtools index -bti -in yourData.bam to "
+                               "generate an up-to-date, fixed BTI file.";
+        throw BamException("BamToolsIndex::CheckVersion", message);
      }
-
-    // otherwise ok
-    else return true;
  }
  
  void BamToolsIndex::ClearReferenceEntry(BtiReferenceEntry& refEntry) {
@@ -119,151 +124,155 @@ void BamToolsIndex::ClearReferenceEntry(BtiReferenceEntry& refEntry) {
  }
  
  void BamToolsIndex::CloseFile(void) {
-    if ( IsFileOpen() )
-        fclose(m_indexStream);
+    if ( IsFileOpen() ) {
+        fclose(Resources.IndexStream);
+        Resources.IndexStream = 0;
+    }
      m_indexFileSummary.clear();
  }
  
  // builds index from associated BAM file & writes out to index file
  bool BamToolsIndex::Create(void) {
  
-    // return false if BamReader is invalid or not open
+    // skip if BamReader is invalid or not open
      if ( m_reader == 0 || !m_reader->IsOpen() ) {
-        cerr << "BamToolsIndex ERROR: BamReader is not open"
-             << ", aborting index creation" << endl;
+        SetErrorString("BamToolsIndex::Create", "could not create index: reader is not open");
          return false;
      }
  
      // rewind BamReader
      if ( !m_reader->Rewind() ) {
-        cerr << "BamToolsIndex ERROR: could not rewind BamReader to create index"
-             << ", aborting index creation" << endl;
+        const string readerError = m_reader->GetErrorString();
+        const string message = "could not create index: \n\t" + readerError;
+        SetErrorString("BamToolsIndex::Create", message);
          return false;
      }
  
-    // open new index file (read & write)
-    string indexFilename = m_reader->Filename() + Extension();
-    if ( !OpenFile(indexFilename, "w+b") ) {
-        cerr << "BamToolsIndex ERROR: could not open output index file " << indexFilename
-             << ", aborting index creation" << endl;
-        return false;
-    }
+    try {
+        // open new index file (read & write)
+        string indexFilename = m_reader->Filename() + Extension();
+        OpenFile(indexFilename, "w+b");
  
-    // initialize BtiFileSummary with number of references
-    const int& numReferences = m_reader->GetReferenceCount();
-    InitializeFileSummary(numReferences);
+        // initialize BtiFileSummary with number of references
+        const int& numReferences = m_reader->GetReferenceCount();
+        InitializeFileSummary(numReferences);
  
-    // initialize output file
-    bool createdOk = true;
-    createdOk &= WriteHeader();
+        // intialize output file header
+        WriteHeader();
  
-    // index building markers
-    int32_t currentBlockCount      = 0;
-    int64_t currentAlignmentOffset = m_reader->Tell();
-    int32_t blockRefId             = -1;
-    int32_t blockMaxEndPosition    = -1;
-    int64_t blockStartOffset       = currentAlignmentOffset;
-    int32_t blockStartPosition     = -1;
+        // index building markers
+        uint32_t currentBlockCount      = 0;
+        int64_t currentAlignmentOffset  = m_reader->Tell();
+        int32_t blockRefId              = -1;
+        int32_t blockMaxEndPosition     = -1;
+        int64_t blockStartOffset        = currentAlignmentOffset;
+        int32_t blockStartPosition      = -1;
  
-    // plow through alignments, storing index entries
-    BamAlignment al;
-    BtiReferenceEntry refEntry;
-    while ( m_reader->LoadNextAlignment(al) ) {
+        // plow through alignments, storing index entries
+        BamAlignment al;
+        BtiReferenceEntry refEntry;
+        while ( m_reader->LoadNextAlignment(al) ) {
  
-        // if moved to new reference
-        if ( al.RefID != blockRefId ) {
+            // if moved to new reference
+            if ( al.RefID != blockRefId ) {
  
-            // if first pass, check:
-            if ( currentBlockCount == 0 ) {
+                // if first pass, check:
+                if ( currentBlockCount == 0 ) {
  
-                // write any empty references up to (but not including) al.RefID
-                for ( int i = 0; i < al.RefID; ++i ) {
-                    BtiReferenceEntry emptyEntry(i);
-                    createdOk &= WriteReferenceEntry(emptyEntry);
+                    // write any empty references up to (but not including) al.RefID
+                    for ( int i = 0; i < al.RefID; ++i )
+                        WriteReferenceEntry( BtiReferenceEntry(i) );
                  }
-            }
  
-            // not first pass:
-            else {
+                // not first pass:
+                else {
  
-                // store previous BTI block data in reference entry
-                BtiBlock block(blockMaxEndPosition, blockStartOffset, blockStartPosition);
-                refEntry.Blocks.push_back(block);
+                    // store previous BTI block data in reference entry
+                    BtiBlock block(blockMaxEndPosition, blockStartOffset, blockStartPosition);
+                    refEntry.Blocks.push_back(block);
  
-                // write reference entry, then clear
-                createdOk &= WriteReferenceEntry(refEntry);
-                ClearReferenceEntry(refEntry);
+                    // write reference entry, then clear
+                    WriteReferenceEntry(refEntry);
+                    ClearReferenceEntry(refEntry);
  
-                // write any empty references between (but not including) the last blockRefID and current al.RefID
-                for ( int i = blockRefId+1; i < al.RefID; ++i ) {
-                    BtiReferenceEntry emptyEntry(i);
-                    createdOk &= WriteReferenceEntry(emptyEntry);
+                    // write any empty references between (but not including)
+                    // the last blockRefID and current al.RefID
+                    for ( int i = blockRefId+1; i < al.RefID; ++i )
+                        WriteReferenceEntry( BtiReferenceEntry(i) );
+
+                    // reset block count
+                    currentBlockCount = 0;
                  }
  
-                // reset block count
-                currentBlockCount = 0;
+                // set ID for new reference entry
+                refEntry.ID = al.RefID;
              }
  
-            // set ID for new reference entry
-            refEntry.ID = al.RefID;
-        }
+            // if beginning of block, update counters
+            if ( currentBlockCount == 0 ) {
+                blockRefId          = al.RefID;
+                blockStartOffset    = currentAlignmentOffset;
+                blockStartPosition  = al.Position;
+                blockMaxEndPosition = al.GetEndPosition();
+            }
  
-        // if beginning of block, update counters
-        if ( currentBlockCount == 0 ) {
-            blockRefId          = al.RefID;
-            blockStartOffset    = currentAlignmentOffset;
-            blockStartPosition  = al.Position;
-            blockMaxEndPosition = al.GetEndPosition();
-        }
+            // increment block counter
+            ++currentBlockCount;
  
-        // increment block counter
-        ++currentBlockCount;
+            // check end position
+            int32_t alignmentEndPosition = al.GetEndPosition();
+            if ( alignmentEndPosition > blockMaxEndPosition )
+                blockMaxEndPosition = alignmentEndPosition;
  
-        // check end position
-        int32_t alignmentEndPosition = al.GetEndPosition();
-        if ( alignmentEndPosition > blockMaxEndPosition )
-            blockMaxEndPosition = alignmentEndPosition;
+            // if block is full, get offset for next block, reset currentBlockCount
+            if ( currentBlockCount == m_blockSize ) {
  
-        // if block is full, get offset for next block, reset currentBlockCount
-        if ( currentBlockCount == m_blockSize ) {
+                // store previous block data in reference entry
+                BtiBlock block(blockMaxEndPosition, blockStartOffset, blockStartPosition);
+                refEntry.Blocks.push_back(block);
  
-            // store previous block data in reference entry
-            BtiBlock block(blockMaxEndPosition, blockStartOffset, blockStartPosition);
-            refEntry.Blocks.push_back(block);
+                // update markers
+                blockStartOffset  = m_reader->Tell();
+                currentBlockCount = 0;
+            }
  
-            // update markers
-            blockStartOffset  = m_reader->Tell();
-            currentBlockCount = 0;
+            // not the best name, but for the next iteration, this value will be the offset of the
+            // *current* alignment. this is necessary because we won't know if this next alignment
+            // is on a new reference until we actually read it
+            currentAlignmentOffset = m_reader->Tell();
          }
  
-        // not the best name, but for the next iteration, this value will be the offset of the *current* alignment
-        // necessary because we won't know if this next alignment is on a new reference until we actually read it
-        currentAlignmentOffset = m_reader->Tell();
-    }
+        // after finishing alignments, if any data was read, check:
+        if ( blockRefId >= 0 ) {
  
-    // after finishing alignments, if any data was read, check:
-    if ( blockRefId >= 0 ) {
-
-        // store last BTI block data in reference entry
-        BtiBlock block(blockMaxEndPosition, blockStartOffset, blockStartPosition);
-        refEntry.Blocks.push_back(block);
+            // store last BTI block data in reference entry
+            BtiBlock block(blockMaxEndPosition, blockStartOffset, blockStartPosition);
+            refEntry.Blocks.push_back(block);
  
-        // write last reference entry, then clear
-        createdOk &= WriteReferenceEntry(refEntry);
-        ClearReferenceEntry(refEntry);
+            // write last reference entry, then clear
+            WriteReferenceEntry(refEntry);
+            ClearReferenceEntry(refEntry);
  
-        // then write any empty references remaining at end of file
-        for ( int i = blockRefId+1; i < numReferences; ++i ) {
-            BtiReferenceEntry emptyEntry(i);
-            createdOk &= WriteReferenceEntry(emptyEntry);
+            // then write any empty references remaining at end of file
+            for ( int i = blockRefId+1; i < numReferences; ++i )
+                WriteReferenceEntry( BtiReferenceEntry(i) );
          }
+
+    } catch ( BamException& e ) {
+        m_errorString = e.what();
+        return false;
      }
  
-    // rewind reader & return result
-    createdOk &= m_reader->Rewind();
+    // rewind BamReader
+    if ( !m_reader->Rewind() ) {
+        const string readerError = m_reader->GetErrorString();
+        const string message = "could not create index: \n\t" + readerError;
+        SetErrorString("BamToolsIndex::Create", message);
+        return false;
+    }
  
-    // return result
-    return createdOk;
+    // return success
+    return true;
  }
  
  // returns format's file extension
@@ -271,18 +280,15 @@ const std::string BamToolsIndex::Extension(void) {
      return BamToolsIndex::BTI_EXTENSION;
  }
  
-bool BamToolsIndex::GetOffset(const BamRegion& region, int64_t& offset, bool* hasAlignmentsInRegion) {
+void BamToolsIndex::GetOffset(const BamRegion& region, int64_t& offset, bool* hasAlignmentsInRegion) {
  
      // return false ref ID is not a valid index in file summary data
      if ( region.LeftRefID < 0 || region.LeftRefID >= (int)m_indexFileSummary.size() )
-        return false;
+        throw BamException("BamToolsIndex::GetOffset", "invalid region requested");
  
      // retrieve reference index data for left bound reference
      BtiReferenceEntry refEntry(region.LeftRefID);
-    if ( !ReadReferenceEntry(refEntry) ) {
-        cerr << "BamToolsIndex ERROR: could not retrieve index data from BTI file" << endl;
-        return false;
-    }
+    ReadReferenceEntry(refEntry);
  
      // binary search for an overlapping block (may not be first one though)
      bool found = false;
@@ -337,9 +343,6 @@ bool BamToolsIndex::GetOffset(const BamRegion& region, int64_t& offset, bool* ha
  
      // sets to false if blocks container is empty, or if no matching block could be found
      *hasAlignmentsInRegion = found;
-
-    // return success
-    return true;
  }
  
  // returns whether reference has alignments or no
@@ -350,14 +353,16 @@ bool BamToolsIndex::HasAlignments(const int& referenceID) const {
      return ( refSummary.NumBlocks > 0 );
  }
  
+// pre-allocates space for each reference's summary data
  void BamToolsIndex::InitializeFileSummary(const int& numReferences) {
      m_indexFileSummary.clear();
      for ( int i = 0; i < numReferences; ++i )
          m_indexFileSummary.push_back( BtiReferenceSummary() );
  }
  
+// returns true if the index stream is open
  bool BamToolsIndex::IsFileOpen(void) const {
-    return ( m_indexStream != 0 );
+    return ( Resources.IndexStream != 0 );
  }
  
  // attempts to use index data to jump to @region, returns success/fail
@@ -370,19 +375,24 @@ bool BamToolsIndex::Jump(const BamTools::BamRegion& region, bool* hasAlignmentsI
      *hasAlignmentsInRegion = false;
  
      // skip if invalid reader or not open
-    if ( m_reader == 0 || !m_reader->IsOpen() )
+    if ( m_reader == 0 || !m_reader->IsOpen() ) {
+        SetErrorString("BamToolsIndex::Jump", "could not jump: reader is not open");
          return false;
+    }
  
      // make sure left-bound position is valid
      const RefVector& references = m_reader->GetReferenceData();
-    if ( region.LeftPosition > references.at(region.LeftRefID).RefLength )
+    if ( region.LeftPosition > references.at(region.LeftRefID).RefLength ) {
+        SetErrorString("BamToolsIndex::Jump", "could not create index: invalid region requested");
          return false;
+    }
  
      // calculate nearest offset to jump to
      int64_t offset;
-    if ( !GetOffset(region, offset, hasAlignmentsInRegion) ) {
-        cerr << "BamToolsIndex ERROR: could not jump"
-             << ", unable to calculate offset for specified region" << endl;
+    try {
+        GetOffset(region, offset, hasAlignmentsInRegion);
+    } catch ( BamException& e ) {
+        m_errorString = e.what();
          return false;
      }
  
@@ -393,116 +403,101 @@ bool BamToolsIndex::Jump(const BamTools::BamRegion& region, bool* hasAlignmentsI
  // loads existing data from file into memory
  bool BamToolsIndex::Load(const std::string& filename) {
  
-    // attempt open index file (read-only)
-    if ( !OpenFile(filename, "rb") ) {
-        cerr << "BamToolsIndex ERROR: could not open input index file " << filename
-             << ", aborting index load" << endl;
-        return false;
-    }
+    try {
  
-    // attempt to load & validate BTI header data
-    if ( !LoadHeader() ) {
-        cerr << "BamToolsIndex ERROR: could load header from index file " << filename
-             << ", aborting index load" << endl;
-        CloseFile();
-        return false;
-    }
+        // attempt to open file (read-only)
+        OpenFile(filename, "rb");
+
+        // load metadata & generate in-memory summary
+        LoadHeader();
+        LoadFileSummary();
+
+        // return success
+        return true;
  
-    // attempt to load index file summary
-    if ( !LoadFileSummary() ) {
-        cerr << "BamToolsIndex ERROR: could not generate a summary of index file " << filename
-             << ", aborting index load" << endl;
-        CloseFile();
+    } catch ( BamException& e ) {
+        m_errorString = e.what();
          return false;
      }
-
-    // if we get here, index summary is loaded OK
-    return true;
  }
  
-bool BamToolsIndex::LoadFileSummary(void) {
+void BamToolsIndex::LoadFileSummary(void) {
  
      // load number of reference sequences
      int numReferences;
-    if ( !LoadNumReferences(numReferences) )
-        return false;
+    LoadNumReferences(numReferences);
  
      // initialize file summary data
      InitializeFileSummary(numReferences);
  
-    // iterate over reference entries
-    bool loadedOk = true;
+    // load summary for each reference
      BtiFileSummary::iterator summaryIter = m_indexFileSummary.begin();
      BtiFileSummary::iterator summaryEnd  = m_indexFileSummary.end();
      for ( ; summaryIter != summaryEnd; ++summaryIter )
-        loadedOk &= LoadReferenceSummary(*summaryIter);
-
-    // return result
-    return loadedOk;
+        LoadReferenceSummary(*summaryIter);
  }
  
-bool BamToolsIndex::LoadHeader(void) {
+void BamToolsIndex::LoadHeader(void) {
  
-    // if invalid format 'magic number'
-    if ( !CheckMagicNumber() )
-        return false;
-
-    // if invalid BTI version
-    if ( !CheckVersion() )
-        return false;
+    // check BTI file metadata
+    CheckMagicNumber();
+    CheckVersion();
  
      // use file's BTI block size to set member variable
-    size_t elementsRead = fread(&m_blockSize, sizeof(m_blockSize), 1, m_indexStream);
+    const size_t elementsRead = fread(&m_blockSize, sizeof(m_blockSize), 1, Resources.IndexStream);
      if ( m_isBigEndian ) SwapEndian_32(m_blockSize);
-    return ( elementsRead == 1 );
+    if ( elementsRead != 1 )
+        throw BamException("BamToolsIndex::LoadHeader", "could not read BTI block size");
  }
  
-bool BamToolsIndex::LoadNumBlocks(int& numBlocks) {
-    size_t elementsRead = 0;
-    elementsRead += fread(&numBlocks, sizeof(numBlocks), 1, m_indexStream);
+void BamToolsIndex::LoadNumBlocks(int& numBlocks) {
+    const size_t elementsRead = fread(&numBlocks, sizeof(numBlocks), 1, Resources.IndexStream);
      if ( m_isBigEndian ) SwapEndian_32(numBlocks);
-    return ( elementsRead == 1 );
+    if ( elementsRead != 1 )
+        throw BamException("BamToolsIndex::LoadNumBlocks", "could not read number of BTI blocks");
  }
  
-bool BamToolsIndex::LoadNumReferences(int& numReferences) {
-    size_t elementsRead = 0;
-    elementsRead += fread(&numReferences, sizeof(numReferences), 1, m_indexStream);
+void BamToolsIndex::LoadNumReferences(int& numReferences) {
+    const size_t elementsRead = fread(&numReferences, sizeof(numReferences), 1, Resources.IndexStream);
      if ( m_isBigEndian ) SwapEndian_32(numReferences);
-    return ( elementsRead == 1 );
+    if ( elementsRead != 1 )
+        throw BamException("BamToolsIndex::LoadNumReferences", "could not read number of references");
  }
  
-bool BamToolsIndex::LoadReferenceSummary(BtiReferenceSummary& refSummary) {
+void BamToolsIndex::LoadReferenceSummary(BtiReferenceSummary& refSummary) {
  
      // load number of blocks
      int numBlocks;
-    if ( !LoadNumBlocks(numBlocks) )
-        return false;
+    LoadNumBlocks(numBlocks);
  
      // store block summary data for this reference
      refSummary.NumBlocks = numBlocks;
      refSummary.FirstBlockFilePosition = Tell();
  
-    // skip blocks in index file (and return status)
-    return SkipBlocks(numBlocks);
+    // skip reference's blocks
+    SkipBlocks(numBlocks);
  }
  
-bool BamToolsIndex::OpenFile(const std::string& filename, const char* mode) {
+void BamToolsIndex::OpenFile(const std::string& filename, const char* mode) {
  
      // make sure any previous index file is closed
      CloseFile();
  
      // attempt to open file
-    m_indexStream = fopen(filename.c_str(), mode);
-    return IsFileOpen();
+    Resources.IndexStream = fopen(filename.c_str(), mode);
+    if ( !IsFileOpen() ) {
+        const string message = string("could not open file: ") + filename;
+        throw BamException("BamToolsIndex::OpenFile", message);
+    }
  }
  
-bool BamToolsIndex::ReadBlock(BtiBlock& block) {
+void BamToolsIndex::ReadBlock(BtiBlock& block) {
  
      // read in block data members
      size_t elementsRead = 0;
-    elementsRead += fread(&block.MaxEndPosition, sizeof(block.MaxEndPosition), 1, m_indexStream);
-    elementsRead += fread(&block.StartOffset,    sizeof(block.StartOffset),    1, m_indexStream);
-    elementsRead += fread(&block.StartPosition,  sizeof(block.StartPosition),  1, m_indexStream);
+    elementsRead += fread(&block.MaxEndPosition, sizeof(block.MaxEndPosition), 1, Resources.IndexStream);
+    elementsRead += fread(&block.StartOffset,    sizeof(block.StartOffset),    1, Resources.IndexStream);
+    elementsRead += fread(&block.StartPosition,  sizeof(block.StartPosition),  1, Resources.IndexStream);
  
      // swap endian-ness if necessary
      if ( m_isBigEndian ) {
@@ -511,46 +506,41 @@ bool BamToolsIndex::ReadBlock(BtiBlock& block) {
          SwapEndian_32(block.StartPosition);
      }
  
-    // return success/failure
-    return ( elementsRead == 3 );
+    if ( elementsRead != 3 )
+        throw BamException("BamToolsIndex::ReadBlock", "could not read block");
  }
  
-bool BamToolsIndex::ReadBlocks(const BtiReferenceSummary& refSummary, BtiBlockVector& blocks) {
+void BamToolsIndex::ReadBlocks(const BtiReferenceSummary& refSummary, BtiBlockVector& blocks) {
  
      // prep blocks container
      blocks.clear();
      blocks.reserve(refSummary.NumBlocks);
  
      // skip to first block entry
-    if ( !Seek( refSummary.FirstBlockFilePosition, SEEK_SET ) ) {
-        cerr << "BamToolsIndex ERROR: could not seek to position "
-             << refSummary.FirstBlockFilePosition << endl;
-        return false;
-    }
+    Seek( refSummary.FirstBlockFilePosition, SEEK_SET );
  
      // read & store block entries
-    bool readOk = true;
      BtiBlock block;
      for ( int i = 0; i < refSummary.NumBlocks; ++i ) {
-        readOk &= ReadBlock(block);
+        ReadBlock(block);
          blocks.push_back(block);
      }
-    return readOk;
  }
  
-bool BamToolsIndex::ReadReferenceEntry(BtiReferenceEntry& refEntry) {
+void BamToolsIndex::ReadReferenceEntry(BtiReferenceEntry& refEntry) {
  
      // return false if refId not valid index in file summary structure
      if ( refEntry.ID < 0 || refEntry.ID >= (int)m_indexFileSummary.size() )
-        return false;
+        throw BamException("BamToolsIndex::ReadReferenceEntry", "invalid reference requested");
  
      // use index summary to assist reading the reference's BTI blocks
      const BtiReferenceSummary& refSummary = m_indexFileSummary.at(refEntry.ID);
-    return ReadBlocks(refSummary, refEntry.Blocks);
+    ReadBlocks(refSummary, refEntry.Blocks);
  }
  
-bool BamToolsIndex::Seek(const int64_t& position, const int& origin) {
-    return ( fseek64(m_indexStream, position, origin) == 0 );
+void BamToolsIndex::Seek(const int64_t& position, const int& origin) {
+    if ( fseek64(Resources.IndexStream, position, origin) != 0 )
+        throw BamException("BamToolsIndex::Seek", "could not seek in BAI file");
  }
  
  // change the index caching behavior
@@ -559,15 +549,15 @@ void BamToolsIndex::SetCacheMode(const BamIndex::IndexCacheMode& mode) {
      // do nothing else here ? cache mode will be ignored from now on, most likely
  }
  
-bool BamToolsIndex::SkipBlocks(const int& numBlocks) {
-    return Seek( numBlocks*BamToolsIndex::SIZEOF_BLOCK, SEEK_CUR );
+void BamToolsIndex::SkipBlocks(const int& numBlocks) {
+    Seek( numBlocks*BamToolsIndex::SIZEOF_BLOCK, SEEK_CUR );
  }
  
  int64_t BamToolsIndex::Tell(void) const {
-    return ftell64(m_indexStream);
+    return ftell64(Resources.IndexStream);
  }
  
-bool BamToolsIndex::WriteBlock(const BtiBlock& block) {
+void BamToolsIndex::WriteBlock(const BtiBlock& block) {
  
      // copy entry data
      int32_t maxEndPosition = block.MaxEndPosition;
@@ -583,59 +573,55 @@ bool BamToolsIndex::WriteBlock(const BtiBlock& block) {
  
      // write the reference index entry
      size_t elementsWritten = 0;
-    elementsWritten += fwrite(&maxEndPosition, sizeof(maxEndPosition), 1, m_indexStream);
-    elementsWritten += fwrite(&startOffset,    sizeof(startOffset),    1, m_indexStream);
-    elementsWritten += fwrite(&startPosition,  sizeof(startPosition),  1, m_indexStream);
-    return ( elementsWritten == 3 );
+    elementsWritten += fwrite(&maxEndPosition, sizeof(maxEndPosition), 1, Resources.IndexStream);
+    elementsWritten += fwrite(&startOffset,    sizeof(startOffset),    1, Resources.IndexStream);
+    elementsWritten += fwrite(&startPosition,  sizeof(startPosition),  1, Resources.IndexStream);
+    if ( elementsWritten != 3 )
+        throw BamException("BamToolsIndex::WriteBlock", "could not write BTI block");
  }
  
-bool BamToolsIndex::WriteBlocks(const BtiBlockVector& blocks) {
-    bool writtenOk = true;
+void BamToolsIndex::WriteBlocks(const BtiBlockVector& blocks) {
      BtiBlockVector::const_iterator blockIter = blocks.begin();
      BtiBlockVector::const_iterator blockEnd  = blocks.end();
      for ( ; blockIter != blockEnd; ++blockIter )
-        writtenOk &= WriteBlock(*blockIter);
-    return writtenOk;
+        WriteBlock(*blockIter);
  }
  
-bool BamToolsIndex::WriteHeader(void) {
+void BamToolsIndex::WriteHeader(void) {
  
      size_t elementsWritten = 0;
  
      // write BTI index format 'magic number'
-    elementsWritten += fwrite(BamToolsIndex::BTI_MAGIC, 1, 4, m_indexStream);
+    elementsWritten += fwrite(BamToolsIndex::BTI_MAGIC, 1, 4, Resources.IndexStream);
  
      // write BTI index format version
      int32_t currentVersion = (int32_t)m_outputVersion;
      if ( m_isBigEndian ) SwapEndian_32(currentVersion);
-    elementsWritten += fwrite(&currentVersion, sizeof(currentVersion), 1, m_indexStream);
+    elementsWritten += fwrite(&currentVersion, sizeof(currentVersion), 1, Resources.IndexStream);
  
      // write block size
-    int32_t blockSize = m_blockSize;
+    uint32_t blockSize = m_blockSize;
      if ( m_isBigEndian ) SwapEndian_32(blockSize);
-    elementsWritten += fwrite(&blockSize, sizeof(blockSize), 1, m_indexStream);
+    elementsWritten += fwrite(&blockSize, sizeof(blockSize), 1, Resources.IndexStream);
  
      // write number of references
      int32_t numReferences = m_indexFileSummary.size();
      if ( m_isBigEndian ) SwapEndian_32(numReferences);
-    elementsWritten += fwrite(&numReferences, sizeof(numReferences), 1, m_indexStream);
+    elementsWritten += fwrite(&numReferences, sizeof(numReferences), 1, Resources.IndexStream);
  
-    // return success/failure of write
-    return ( elementsWritten == 7 );
+    if ( elementsWritten != 7 )
+        throw BamException("BamToolsIndex::WriteHeader", "could not write BTI header");
  }
  
-bool BamToolsIndex::WriteReferenceEntry(const BtiReferenceEntry& refEntry) {
-
-    size_t elementsWritten = 0;
+void BamToolsIndex::WriteReferenceEntry(const BtiReferenceEntry& refEntry) {
  
      // write number of blocks this reference
      uint32_t numBlocks = refEntry.Blocks.size();
      if ( m_isBigEndian ) SwapEndian_32(numBlocks);
-    elementsWritten += fwrite(&numBlocks, sizeof(numBlocks), 1, m_indexStream);
+    const size_t elementsWritten = fwrite(&numBlocks, sizeof(numBlocks), 1, Resources.IndexStream);
+    if ( elementsWritten != 1 )
+        throw BamException("BamToolsIndex::WriteReferenceEntry", "could not write number of blocks");
  
      // write actual block entries
-    const bool blocksOk = WriteBlocks(refEntry.Blocks);
-
-    // return success/fail
-    return ( elementsWritten == 1) && blocksOk;
+    WriteBlocks(refEntry.Blocks);
  }
diff --git a/src/api/internal/BamToolsIndex_p.h b/src/api/internal/BamToolsIndex_p.h

index 269792ed979152e09131a232d512b83dd35aa523..ec846c5e1770ef55a8653c3377ec6ca71fbab755 100644 (file)
--- a/src/api/internal/BamToolsIndex_p.h
+++ b/src/api/internal/BamToolsIndex_p.h
@@ -2,7 +2,7 @@
  // BamToolsIndex.h (c) 2010 Derek Barnett
  // Marth Lab, Department of Biology, Boston College
  // ---------------------------------------------------------------------------
-// Last modified: 5 April 2011 (DB)
+// Last modified: 6 October 2011 (DB)
  // ---------------------------------------------------------------------------
  // Provides index operations for the BamTools index format (".bti")
  // ***************************************************************************
@@ -122,60 +122,59 @@ class BamToolsIndex : public BamIndex {
          // returns format's file extension
          static const std::string Extension(void);
  
-    // internal file ops
+    // internal methods
      private:
-        bool CheckMagicNumber(void);
-        bool CheckVersion(void);
+
+        // index file ops
+        void CheckMagicNumber(void);
+        void CheckVersion(void);
          void CloseFile(void);
          bool IsFileOpen(void) const;
-        bool OpenFile(const std::string& filename, const char* mode);
-        bool Seek(const int64_t& position, const int& origin);
+        void OpenFile(const std::string& filename, const char* mode);
+        void Seek(const int64_t& position, const int& origin);
          int64_t Tell(void) const;
  
-    // internal BTI index building methods
-    private:
+        // index-creation methods
          void ClearReferenceEntry(BtiReferenceEntry& refEntry);
-
-    // internal random-access methods
-    private:
-        bool GetOffset(const BamRegion& region, int64_t& offset, bool* hasAlignmentsInRegion);
-
-    // internal BTI summary data methods
-    private:
+        void WriteBlock(const BtiBlock& block);
+        void WriteBlocks(const BtiBlockVector& blocks);
+        void WriteHeader(void);
+        void WriteReferenceEntry(const BtiReferenceEntry& refEntry);
+
+        // random-access methods
+        void GetOffset(const BamRegion& region, int64_t& offset, bool* hasAlignmentsInRegion);
+        void ReadBlock(BtiBlock& block);
+        void ReadBlocks(const BtiReferenceSummary& refSummary, BtiBlockVector& blocks);
+        void ReadReferenceEntry(BtiReferenceEntry& refEntry);
+
+        // BTI summary data methods
          void InitializeFileSummary(const int& numReferences);
-        bool LoadFileSummary(void);
-        bool LoadHeader(void);
-        bool LoadNumBlocks(int& numBlocks);
-        bool LoadNumReferences(int& numReferences);
-        bool LoadReferenceSummary(BtiReferenceSummary& refSummary);
-        bool SkipBlocks(const int& numBlocks);
-
-    // internal BTI full index input methods
-    private:
-        bool ReadBlock(BtiBlock& block);
-        bool ReadBlocks(const BtiReferenceSummary& refSummary, BtiBlockVector& blocks);
-        bool ReadReferenceEntry(BtiReferenceEntry& refEntry);
-
-    // internal BTI full index output methods
-    private:
-        bool WriteBlock(const BtiBlock& block);
-        bool WriteBlocks(const BtiBlockVector& blocks);
-        bool WriteHeader(void);
-        bool WriteReferenceEntry(const BtiReferenceEntry& refEntry);
+        void LoadFileSummary(void);
+        void LoadHeader(void);
+        void LoadNumBlocks(int& numBlocks);
+        void LoadNumReferences(int& numReferences);
+        void LoadReferenceSummary(BtiReferenceSummary& refSummary);
+        void SkipBlocks(const int& numBlocks);
  
      // data members
      private:
-        FILE* m_indexStream;
          bool  m_isBigEndian;
          BamIndex::IndexCacheMode m_cacheMode;
          BtiFileSummary m_indexFileSummary;
-        int m_blockSize;
+        uint32_t m_blockSize;
          int32_t m_inputVersion; // Version is serialized as int
          Version m_outputVersion;
  
+        struct RaiiWrapper {
+            FILE* IndexStream;
+            RaiiWrapper(void);
+            ~RaiiWrapper(void);
+        };
+        RaiiWrapper Resources;
+
      // static constants
      private:
-        static const int DEFAULT_BLOCK_LENGTH;
+        static const uint32_t DEFAULT_BLOCK_LENGTH;
          static const std::string BTI_EXTENSION;
          static const char* const BTI_MAGIC;
          static const int SIZEOF_BLOCK;
diff --git a/src/api/internal/BamWriter_p.cpp b/src/api/internal/BamWriter_p.cpp

index e77c099f8a53b6488e39c6c2e3a630c3e1884570..a8fe370b5a374a192b1264894e932218bda58d28 100644 (file)
--- a/src/api/internal/BamWriter_p.cpp
+++ b/src/api/internal/BamWriter_p.cpp
@@ -2,18 +2,18 @@
  // BamWriter_p.cpp (c) 2010 Derek Barnett
  // Marth Lab, Department of Biology, Boston College
  // ---------------------------------------------------------------------------
-// Last modified: 16 June 2011 (DB)
+// Last modified: 6 October 2011 (DB)
  // ---------------------------------------------------------------------------
  // Provides the basic functionality for producing BAM files
  // ***************************************************************************
  
  #include <api/BamAlignment.h>
  #include <api/BamConstants.h>
+#include <api/internal/BamException_p.h>
  #include <api/internal/BamWriter_p.h>
  using namespace BamTools;
  using namespace BamTools::Internal;
  
-#include <cstdio>
  #include <cstdlib>
  #include <cstring>
  using namespace std;
@@ -25,11 +25,11 @@ BamWriterPrivate::BamWriterPrivate(void)
  
  // dtor
  BamWriterPrivate::~BamWriterPrivate(void) {
-    m_stream.Close();
+    Close();
  }
  
  // calculates minimum bin for a BAM alignment interval
-unsigned int BamWriterPrivate::CalculateMinimumBin(const int begin, int end) const {
+uint32_t BamWriterPrivate::CalculateMinimumBin(const int begin, int end) const {
      --end;
      if ( (begin >> 14) == (end >> 14) ) return 4681 + (begin >> 14);
      if ( (begin >> 17) == (end >> 17) ) return  585 + (begin >> 17);
@@ -41,14 +41,23 @@ unsigned int BamWriterPrivate::CalculateMinimumBin(const int begin, int end) con
  
  // closes the alignment archive
  void BamWriterPrivate::Close(void) {
-    m_stream.Close();
+
+    // skip if file not open
+    if ( !IsOpen() ) return;
+
+    // close output stream
+    try {
+        m_stream.Close();
+    } catch ( BamException& e ) {
+        m_errorString = e.what();
+    }
  }
  
  // creates a cigar string from the supplied alignment
  void BamWriterPrivate::CreatePackedCigar(const vector<CigarOp>& cigarOperations, string& packedCigar) {
  
      // initialize
-    const unsigned int numCigarOperations = cigarOperations.size();
+    const size_t numCigarOperations = cigarOperations.size();
      packedCigar.resize(numCigarOperations * Constants::BAM_SIZEOF_INT);
  
      // pack the cigar data into the string
@@ -60,7 +69,7 @@ void BamWriterPrivate::CreatePackedCigar(const vector<CigarOp>& cigarOperations,
      for ( ; coIter != coEnd; ++coIter ) {
  
          // store op in packedCigar
-        unsigned int cigarOp;
+        uint8_t cigarOp;
          switch ( coIter->Type ) {
              case (Constants::BAM_CIGAR_MATCH_CHAR)    : cigarOp = Constants::BAM_CIGAR_MATCH;    break;
              case (Constants::BAM_CIGAR_INS_CHAR)      : cigarOp = Constants::BAM_CIGAR_INS;      break;
@@ -72,8 +81,8 @@ void BamWriterPrivate::CreatePackedCigar(const vector<CigarOp>& cigarOperations,
              case (Constants::BAM_CIGAR_SEQMATCH_CHAR) : cigarOp = Constants::BAM_CIGAR_SEQMATCH; break;
              case (Constants::BAM_CIGAR_MISMATCH_CHAR) : cigarOp = Constants::BAM_CIGAR_MISMATCH; break;
              default:
-              fprintf(stderr, "BamWriter ERROR: unknown cigar operation found: %c\n", coIter->Type);
-              exit(1);
+                const string message = string("invalid CIGAR operation type") + coIter->Type;
+                throw BamException("BamWriter::CreatePackedCigar", message);
          }
  
          *pPackedCigar = coIter->Length << Constants::BAM_CIGAR_SHIFT | cigarOp;
@@ -85,26 +94,36 @@ void BamWriterPrivate::CreatePackedCigar(const vector<CigarOp>& cigarOperations,
  void BamWriterPrivate::EncodeQuerySequence(const string& query, string& encodedQuery) {
  
      // prepare the encoded query string
-    const unsigned int queryLen = query.size();
-    const unsigned int encodedQueryLen = (unsigned int)((queryLen / 2.0) + 0.5);
-    encodedQuery.resize(encodedQueryLen);
+    const size_t queryLength = query.size();
+    const size_t encodedQueryLength = static_cast<size_t>((queryLength+1)/2);
+    encodedQuery.resize(encodedQueryLength);
      char* pEncodedQuery = (char*)encodedQuery.data();
      const char* pQuery = (const char*)query.data();
  
+    // walk through original query sequence, encoding its bases
      unsigned char nucleotideCode;
      bool useHighWord = true;
-
      while ( *pQuery ) {
          switch ( *pQuery ) {
              case (Constants::BAM_DNA_EQUAL) : nucleotideCode = Constants::BAM_BASECODE_EQUAL; break;
              case (Constants::BAM_DNA_A)     : nucleotideCode = Constants::BAM_BASECODE_A;     break;
              case (Constants::BAM_DNA_C)     : nucleotideCode = Constants::BAM_BASECODE_C;     break;
+            case (Constants::BAM_DNA_M)     : nucleotideCode = Constants::BAM_BASECODE_M;     break;
              case (Constants::BAM_DNA_G)     : nucleotideCode = Constants::BAM_BASECODE_G;     break;
+            case (Constants::BAM_DNA_R)     : nucleotideCode = Constants::BAM_BASECODE_R;     break;
+            case (Constants::BAM_DNA_S)     : nucleotideCode = Constants::BAM_BASECODE_S;     break;
+            case (Constants::BAM_DNA_V)     : nucleotideCode = Constants::BAM_BASECODE_V;     break;
              case (Constants::BAM_DNA_T)     : nucleotideCode = Constants::BAM_BASECODE_T;     break;
+            case (Constants::BAM_DNA_W)     : nucleotideCode = Constants::BAM_BASECODE_W;     break;
+            case (Constants::BAM_DNA_Y)     : nucleotideCode = Constants::BAM_BASECODE_Y;     break;
+            case (Constants::BAM_DNA_H)     : nucleotideCode = Constants::BAM_BASECODE_H;     break;
+            case (Constants::BAM_DNA_K)     : nucleotideCode = Constants::BAM_BASECODE_K;     break;
+            case (Constants::BAM_DNA_D)     : nucleotideCode = Constants::BAM_BASECODE_D;     break;
+            case (Constants::BAM_DNA_B)     : nucleotideCode = Constants::BAM_BASECODE_B;     break;
              case (Constants::BAM_DNA_N)     : nucleotideCode = Constants::BAM_BASECODE_N;     break;
              default:
-                fprintf(stderr, "BamWriter ERROR: only the following bases are supported in the BAM format: {=, A, C, G, T, N}. Found [%c]\n", *pQuery);
-                exit(1);
+                const string message = string("invalid base: ") + *pQuery;
+                throw BamException("BamWriter::EncodeQuerySequence", message);
          }
  
          // pack the nucleotide code
@@ -122,6 +141,11 @@ void BamWriterPrivate::EncodeQuerySequence(const string& query, string& encodedQ
      }
  }
  
+// returns a description of the last error that occurred
+std::string BamWriterPrivate::GetErrorString(void) const {
+    return m_errorString;
+}
+
  // returns whether BAM file is open for writing or not
  bool BamWriterPrivate::IsOpen(void) const {
      return m_stream.IsOpen;
@@ -132,251 +156,265 @@ bool BamWriterPrivate::Open(const string& filename,
                              const string& samHeaderText,
                              const RefVector& referenceSequences)
  {
-    // open the BGZF file for writing, return failure if error
-    if ( !m_stream.Open(filename, "wb") )
-        return false;
+    try {
  
-    // write BAM file 'metadata' components
-    WriteMagicNumber();
-    WriteSamHeaderText(samHeaderText);
-    WriteReferences(referenceSequences);
-    return true;
-}
+        // open the BGZF file for writing, return failure if error
+        m_stream.Open(filename, "wb");
  
-// saves the alignment to the alignment archive
-void BamWriterPrivate::SaveAlignment(const BamAlignment& al) {
-
-    // if BamAlignment contains only the core data and a raw char data buffer
-    // (as a result of BamReader::GetNextAlignmentCore())
-    if ( al.SupportData.HasCoreOnly ) {
-
-        // write the block size
-        unsigned int blockSize = al.SupportData.BlockLength;
-        if ( m_isBigEndian ) BamTools::SwapEndian_32(blockSize);
-        m_stream.Write((char*)&blockSize, Constants::BAM_SIZEOF_INT);
-
-        // re-calculate bin (in case BamAlignment's position has been previously modified)
-        const uint32_t alignmentBin = CalculateMinimumBin(al.Position, al.GetEndPosition());
-
-        // assign the BAM core data
-        uint32_t buffer[Constants::BAM_CORE_BUFFER_SIZE];
-        buffer[0] = al.RefID;
-        buffer[1] = al.Position;
-        buffer[2] = (alignmentBin << 16) | (al.MapQuality << 8) | al.SupportData.QueryNameLength;
-        buffer[3] = (al.AlignmentFlag << 16) | al.SupportData.NumCigarOperations;
-        buffer[4] = al.SupportData.QuerySequenceLength;
-        buffer[5] = al.MateRefID;
-        buffer[6] = al.MatePosition;
-        buffer[7] = al.InsertSize;
-
-        // swap BAM core endian-ness, if necessary
-        if ( m_isBigEndian ) {
-            for ( int i = 0; i < 8; ++i )
-                BamTools::SwapEndian_32(buffer[i]);
-        }
+        // write BAM file 'metadata' components
+        WriteMagicNumber();
+        WriteSamHeaderText(samHeaderText);
+        WriteReferences(referenceSequences);
  
-        // write the BAM core
-        m_stream.Write((char*)&buffer, Constants::BAM_CORE_SIZE);
+        // return success
+        return true;
  
-        // write the raw char data
-        m_stream.Write((char*)al.SupportData.AllCharData.data(),
-                       al.SupportData.BlockLength-Constants::BAM_CORE_SIZE);
+    } catch ( BamException& e ) {
+        m_errorString = e.what();
+        return false;
      }
+}
  
-    // otherwise, BamAlignment should contain character in the standard fields: Name, QueryBases, etc
-    // ( resulting from BamReader::GetNextAlignment() *OR* being generated directly by client code )
-    else {
-
-        // calculate char lengths
-        const unsigned int nameLength         = al.Name.size() + 1;
-        const unsigned int numCigarOperations = al.CigarData.size();
-        const unsigned int queryLength        = al.QueryBases.size();
-        const unsigned int tagDataLength      = al.TagData.size();
-
-        // no way to tell if BamAlignment.Bin is already defined (no default, invalid value)
-        // force calculation of Bin before storing
-        const int endPosition = al.GetEndPosition();
-        const unsigned int alignmentBin = CalculateMinimumBin(al.Position, endPosition);
-
-        // create our packed cigar string
-        string packedCigar;
-        CreatePackedCigar(al.CigarData, packedCigar);
-        const unsigned int packedCigarLength = packedCigar.size();
-
-        // encode the query
-        string encodedQuery;
-        EncodeQuerySequence(al.QueryBases, encodedQuery);
-        const unsigned int encodedQueryLength = encodedQuery.size();
-
-        // write the block size
-        const unsigned int dataBlockSize = nameLength +
-                                           packedCigarLength +
-                                           encodedQueryLength +
-                                           queryLength +
-                                           tagDataLength;
-        unsigned int blockSize = Constants::BAM_CORE_SIZE + dataBlockSize;
-        if ( m_isBigEndian ) BamTools::SwapEndian_32(blockSize);
-        m_stream.Write((char*)&blockSize, Constants::BAM_SIZEOF_INT);
-
-        // assign the BAM core data
-        uint32_t buffer[Constants::BAM_CORE_BUFFER_SIZE];
-        buffer[0] = al.RefID;
-        buffer[1] = al.Position;
-        buffer[2] = (alignmentBin << 16) | (al.MapQuality << 8) | nameLength;
-        buffer[3] = (al.AlignmentFlag << 16) | numCigarOperations;
-        buffer[4] = queryLength;
-        buffer[5] = al.MateRefID;
-        buffer[6] = al.MatePosition;
-        buffer[7] = al.InsertSize;
-
-        // swap BAM core endian-ness, if necessary
-        if ( m_isBigEndian ) {
-            for ( int i = 0; i < 8; ++i )
-                BamTools::SwapEndian_32(buffer[i]);
-        }
-
-        // write the BAM core
-        m_stream.Write((char*)&buffer, Constants::BAM_CORE_SIZE);
-
-        // write the query name
-        m_stream.Write(al.Name.c_str(), nameLength);
-
-        // write the packed cigar
-        if ( m_isBigEndian ) {
-            char* cigarData = (char*)calloc(sizeof(char), packedCigarLength);
-            memcpy(cigarData, packedCigar.data(), packedCigarLength);
-            if ( m_isBigEndian ) {
-                for ( unsigned int i = 0; i < packedCigarLength; ++i )
-                    BamTools::SwapEndian_32p(&cigarData[i]);
-            }
-            m_stream.Write(cigarData, packedCigarLength);
-            free(cigarData);
-        }
-        else
-            m_stream.Write(packedCigar.data(), packedCigarLength);
+// saves the alignment to the alignment archive
+bool BamWriterPrivate::SaveAlignment(const BamAlignment& al) {
  
-        // write the encoded query sequence
-        m_stream.Write(encodedQuery.data(), encodedQueryLength);
+    try {
  
-        // write the base qualities
-        char* pBaseQualities = (char*)al.Qualities.data();
-        for ( unsigned int i = 0; i < queryLength; ++i )
-            pBaseQualities[i] -= 33; // FASTQ conversion
-        m_stream.Write(pBaseQualities, queryLength);
+        // if BamAlignment contains only the core data and a raw char data buffer
+        // (as a result of BamReader::GetNextAlignmentCore())
+        if ( al.SupportData.HasCoreOnly )
+            WriteCoreAlignment(al);
  
-        // write the read group tag
-        if ( m_isBigEndian ) {
+        // otherwise, BamAlignment should contain character in the standard fields: Name, QueryBases, etc
+        // (resulting from BamReader::GetNextAlignment() *OR* being generated directly by client code)
+        else WriteAlignment(al);
  
-            char* tagData = (char*)calloc(sizeof(char), tagDataLength);
-            memcpy(tagData, al.TagData.data(), tagDataLength);
+        // if we get here, everything OK
+        return true;
  
-            int i = 0;
-            while ( (unsigned int)i < tagDataLength ) {
+    } catch ( BamException& e ) {
+        m_errorString = e.what();
+        return false;
+    }
+}
  
-                i += Constants::BAM_TAG_TAGSIZE;  // skip tag chars (e.g. "RG", "NM", etc.)
-                const char type = tagData[i];     // get tag type at position i
-                ++i;
+void BamWriterPrivate::SetWriteCompressed(bool ok) {
+    // modifying compression is not allowed if BAM file is open
+    if ( !IsOpen() )
+        m_stream.SetWriteCompressed(ok);
+}
  
-                switch ( type ) {
+void BamWriterPrivate::WriteAlignment(const BamAlignment& al) {
+
+    // calculate char lengths
+    const unsigned int nameLength         = al.Name.size() + 1;
+    const unsigned int numCigarOperations = al.CigarData.size();
+    const unsigned int queryLength        = al.QueryBases.size();
+    const unsigned int tagDataLength      = al.TagData.size();
+
+    // no way to tell if BamAlignment.Bin is already defined (no default, invalid value)
+    // force calculation of Bin before storing
+    const int endPosition = al.GetEndPosition();
+    const uint32_t alignmentBin = CalculateMinimumBin(al.Position, endPosition);
+
+    // create our packed cigar string
+    string packedCigar;
+    CreatePackedCigar(al.CigarData, packedCigar);
+    const unsigned int packedCigarLength = packedCigar.size();
+
+    // encode the query
+    string encodedQuery;
+    EncodeQuerySequence(al.QueryBases, encodedQuery);
+    const unsigned int encodedQueryLength = encodedQuery.size();
+
+    // write the block size
+    const unsigned int dataBlockSize = nameLength +
+                                       packedCigarLength +
+                                       encodedQueryLength +
+                                       queryLength +
+                                       tagDataLength;
+    unsigned int blockSize = Constants::BAM_CORE_SIZE + dataBlockSize;
+    if ( m_isBigEndian ) BamTools::SwapEndian_32(blockSize);
+    m_stream.Write((char*)&blockSize, Constants::BAM_SIZEOF_INT);
+
+    // assign the BAM core data
+    uint32_t buffer[Constants::BAM_CORE_BUFFER_SIZE];
+    buffer[0] = al.RefID;
+    buffer[1] = al.Position;
+    buffer[2] = (alignmentBin << 16) | (al.MapQuality << 8) | nameLength;
+    buffer[3] = (al.AlignmentFlag << 16) | numCigarOperations;
+    buffer[4] = queryLength;
+    buffer[5] = al.MateRefID;
+    buffer[6] = al.MatePosition;
+    buffer[7] = al.InsertSize;
+
+    // swap BAM core endian-ness, if necessary
+    if ( m_isBigEndian ) {
+        for ( int i = 0; i < 8; ++i )
+            BamTools::SwapEndian_32(buffer[i]);
+    }
  
-                    case(Constants::BAM_TAG_TYPE_ASCII) :
-                    case(Constants::BAM_TAG_TYPE_INT8)  :
-                    case(Constants::BAM_TAG_TYPE_UINT8) :
-                        ++i;
-                        break;
-
-                    case(Constants::BAM_TAG_TYPE_INT16)  :
-                    case(Constants::BAM_TAG_TYPE_UINT16) :
-                        BamTools::SwapEndian_16p(&tagData[i]);
-                        i += sizeof(uint16_t);
-                        break;
-
-                    case(Constants::BAM_TAG_TYPE_FLOAT)  :
-                    case(Constants::BAM_TAG_TYPE_INT32)  :
-                    case(Constants::BAM_TAG_TYPE_UINT32) :
-                        BamTools::SwapEndian_32p(&tagData[i]);
-                        i += sizeof(uint32_t);
-                        break;
-
-                    case(Constants::BAM_TAG_TYPE_HEX) :
-                    case(Constants::BAM_TAG_TYPE_STRING) :
-                        // no endian swapping necessary for hex-string/string data
-                        while ( tagData[i] )
-                            ++i;
-                        // increment one more for null terminator
-                        ++i;
-                        break;
+    // write the BAM core
+    m_stream.Write((char*)&buffer, Constants::BAM_CORE_SIZE);
  
-                    case(Constants::BAM_TAG_TYPE_ARRAY) :
+    // write the query name
+    m_stream.Write(al.Name.c_str(), nameLength);
  
-                    {
-                        // read array type
-                        const char arrayType = tagData[i];
+    // write the packed cigar
+    if ( m_isBigEndian ) {
+        char* cigarData = new char[packedCigarLength]();
+        memcpy(cigarData, packedCigar.data(), packedCigarLength);
+        if ( m_isBigEndian ) {
+            for ( size_t i = 0; i < packedCigarLength; ++i )
+                BamTools::SwapEndian_32p(&cigarData[i]);
+        }
+        m_stream.Write(cigarData, packedCigarLength);
+        delete[] cigarData; // TODO: cleanup on Write exception thrown?
+    }
+    else
+        m_stream.Write(packedCigar.data(), packedCigarLength);
+
+    // write the encoded query sequence
+    m_stream.Write(encodedQuery.data(), encodedQueryLength);
+
+    // write the base qualities
+    char* pBaseQualities = (char*)al.Qualities.data();
+    for ( size_t i = 0; i < queryLength; ++i )
+        pBaseQualities[i] -= 33; // FASTQ conversion
+    m_stream.Write(pBaseQualities, queryLength);
+
+    // write the read group tag
+    if ( m_isBigEndian ) {
+
+        char* tagData = new char[tagDataLength]();
+        memcpy(tagData, al.TagData.data(), tagDataLength);
+
+        size_t i = 0;
+        while ( i < tagDataLength ) {
+
+            i += Constants::BAM_TAG_TAGSIZE;  // skip tag chars (e.g. "RG", "NM", etc.)
+            const char type = tagData[i];     // get tag type at position i
+            ++i;
+
+            switch ( type ) {
+
+                case(Constants::BAM_TAG_TYPE_ASCII) :
+                case(Constants::BAM_TAG_TYPE_INT8)  :
+                case(Constants::BAM_TAG_TYPE_UINT8) :
+                    ++i;
+                    break;
+
+                case(Constants::BAM_TAG_TYPE_INT16)  :
+                case(Constants::BAM_TAG_TYPE_UINT16) :
+                    BamTools::SwapEndian_16p(&tagData[i]);
+                    i += sizeof(uint16_t);
+                    break;
+
+                case(Constants::BAM_TAG_TYPE_FLOAT)  :
+                case(Constants::BAM_TAG_TYPE_INT32)  :
+                case(Constants::BAM_TAG_TYPE_UINT32) :
+                    BamTools::SwapEndian_32p(&tagData[i]);
+                    i += sizeof(uint32_t);
+                    break;
+
+                case(Constants::BAM_TAG_TYPE_HEX) :
+                case(Constants::BAM_TAG_TYPE_STRING) :
+                    // no endian swapping necessary for hex-string/string data
+                    while ( tagData[i] )
                          ++i;
-
-                        // swap endian-ness of number of elements in place, then retrieve for loop
-                        BamTools::SwapEndian_32p(&tagData[i]);
-                        int32_t numElements;
-                        memcpy(&numElements, &tagData[i], sizeof(uint32_t));
-                        i += sizeof(uint32_t);
-
-                        // swap endian-ness of array elements
-                        for ( int j = 0; j < numElements; ++j ) {
-                            switch (arrayType) {
-                                case (Constants::BAM_TAG_TYPE_INT8)  :
-                                case (Constants::BAM_TAG_TYPE_UINT8) :
-                                    // no endian-swapping necessary
-                                    ++i;
-                                    break;
-                                case (Constants::BAM_TAG_TYPE_INT16)  :
-                                case (Constants::BAM_TAG_TYPE_UINT16) :
-                                    BamTools::SwapEndian_16p(&tagData[i]);
-                                    i += sizeof(uint16_t);
-                                    break;
-                                case (Constants::BAM_TAG_TYPE_FLOAT)  :
-                                case (Constants::BAM_TAG_TYPE_INT32)  :
-                                case (Constants::BAM_TAG_TYPE_UINT32) :
-                                    BamTools::SwapEndian_32p(&tagData[i]);
-                                    i += sizeof(uint32_t);
-                                    break;
-                                default:
-                                    // error case
-                                    fprintf(stderr,
-                                            "BamWriter ERROR: unknown binary array type encountered: [%c]\n",
-                                            arrayType);
-                                    exit(1);
-                            }
+                    // increment one more for null terminator
+                    ++i;
+                    break;
+
+                case(Constants::BAM_TAG_TYPE_ARRAY) :
+
+                {
+                    // read array type
+                    const char arrayType = tagData[i];
+                    ++i;
+
+                    // swap endian-ness of number of elements in place, then retrieve for loop
+                    BamTools::SwapEndian_32p(&tagData[i]);
+                    int32_t numElements;
+                    memcpy(&numElements, &tagData[i], sizeof(uint32_t));
+                    i += sizeof(uint32_t);
+
+                    // swap endian-ness of array elements
+                    for ( int j = 0; j < numElements; ++j ) {
+                        switch (arrayType) {
+                            case (Constants::BAM_TAG_TYPE_INT8)  :
+                            case (Constants::BAM_TAG_TYPE_UINT8) :
+                                // no endian-swapping necessary
+                                ++i;
+                                break;
+                            case (Constants::BAM_TAG_TYPE_INT16)  :
+                            case (Constants::BAM_TAG_TYPE_UINT16) :
+                                BamTools::SwapEndian_16p(&tagData[i]);
+                                i += sizeof(uint16_t);
+                                break;
+                            case (Constants::BAM_TAG_TYPE_FLOAT)  :
+                            case (Constants::BAM_TAG_TYPE_INT32)  :
+                            case (Constants::BAM_TAG_TYPE_UINT32) :
+                                BamTools::SwapEndian_32p(&tagData[i]);
+                                i += sizeof(uint32_t);
+                                break;
+                            default:
+                                delete[] tagData;
+                                const string message = string("invalid binary array type: ") + arrayType;
+                                throw BamException("BamWriter::SaveAlignment", message);
                          }
-
-                        break;
                      }
  
-                    default :
-                        fprintf(stderr, "BamWriter ERROR: invalid tag value type\n"); // shouldn't get here
-                        free(tagData);
-                        exit(1);
+                    break;
                  }
+
+                default :
+                    delete[] tagData;
+                    const string message = string("invalid tag type: ") + type;
+                    throw BamException("BamWriter::SaveAlignment", message);
              }
-            m_stream.Write(tagData, tagDataLength);
-            free(tagData);
          }
-        else
-            m_stream.Write(al.TagData.data(), tagDataLength);
+
+        m_stream.Write(tagData, tagDataLength);
+        delete[] tagData; // TODO: cleanup on Write exception thrown?
      }
+    else
+        m_stream.Write(al.TagData.data(), tagDataLength);
  }
  
-void BamWriterPrivate::SetWriteCompressed(bool ok) {
-
-    // warn if BAM file is already open
-    // modifying compression is not allowed in this case
-    if ( IsOpen() ) {
-        cerr << "BamWriter WARNING: attempting to change compression mode on an open BAM file is not allowed. "
-             << "Ignoring request." << endl;
-        return;
+void BamWriterPrivate::WriteCoreAlignment(const BamAlignment& al) {
+
+    // write the block size
+    unsigned int blockSize = al.SupportData.BlockLength;
+    if ( m_isBigEndian ) BamTools::SwapEndian_32(blockSize);
+    m_stream.Write((char*)&blockSize, Constants::BAM_SIZEOF_INT);
+
+    // re-calculate bin (in case BamAlignment's position has been previously modified)
+    const uint32_t alignmentBin = CalculateMinimumBin(al.Position, al.GetEndPosition());
+
+    // assign the BAM core data
+    uint32_t buffer[Constants::BAM_CORE_BUFFER_SIZE];
+    buffer[0] = al.RefID;
+    buffer[1] = al.Position;
+    buffer[2] = (alignmentBin << 16) | (al.MapQuality << 8) | al.SupportData.QueryNameLength;
+    buffer[3] = (al.AlignmentFlag << 16) | al.SupportData.NumCigarOperations;
+    buffer[4] = al.SupportData.QuerySequenceLength;
+    buffer[5] = al.MateRefID;
+    buffer[6] = al.MatePosition;
+    buffer[7] = al.InsertSize;
+
+    // swap BAM core endian-ness, if necessary
+    if ( m_isBigEndian ) {
+        for ( int i = 0; i < 8; ++i )
+            BamTools::SwapEndian_32(buffer[i]);
      }
  
-    // set BgzfStream compression mode
-    m_stream.SetWriteCompressed(ok);
+    // write the BAM core
+    m_stream.Write((char*)&buffer, Constants::BAM_CORE_SIZE);
+
+    // write the raw char data
+    m_stream.Write((char*)al.SupportData.AllCharData.data(),
+                   al.SupportData.BlockLength-Constants::BAM_CORE_SIZE);
  }
  
  void BamWriterPrivate::WriteMagicNumber(void) {
diff --git a/src/api/internal/BamWriter_p.h b/src/api/internal/BamWriter_p.h

index caa440eaee91e777f760c35ae7505e5ab6729644..e3547fe43061ba1646090e0a2a9643a3ac85ad3a 100644 (file)
--- a/src/api/internal/BamWriter_p.h
+++ b/src/api/internal/BamWriter_p.h
@@ -2,7 +2,7 @@
  // BamWriter_p.h (c) 2010 Derek Barnett
  // Marth Lab, Department of Biology, Boston College
  // ---------------------------------------------------------------------------
-// Last modified: 24 February 2011 (DB)
+// Last modified: 6 October 2011 (DB)
  // ---------------------------------------------------------------------------
  // Provides the basic functionality for producing BAM files
  // ***************************************************************************
@@ -26,6 +26,9 @@
  #include <vector>
  
  namespace BamTools {
+
+class BamAlignment;
+
  namespace Internal {
  
  class BamWriterPrivate {
@@ -38,18 +41,21 @@ class BamWriterPrivate {
      // interface methods
      public:
          void Close(void);
+        std::string GetErrorString(void) const;
          bool IsOpen(void) const;
          bool Open(const std::string& filename,
                    const std::string& samHeaderText,
                    const BamTools::RefVector& referenceSequences);
-        void SaveAlignment(const BamAlignment& al);
+        bool SaveAlignment(const BamAlignment& al);
          void SetWriteCompressed(bool ok);
  
      // 'internal' methods
      public:
-        unsigned int CalculateMinimumBin(const int begin, int end) const;
+        uint32_t CalculateMinimumBin(const int begin, int end) const;
          void CreatePackedCigar(const std::vector<BamTools::CigarOp>& cigarOperations, std::string& packedCigar);
          void EncodeQuerySequence(const std::string& query, std::string& encodedQuery);
+        void WriteAlignment(const BamAlignment& al);
+        void WriteCoreAlignment(const BamAlignment& al);
          void WriteMagicNumber(void);
          void WriteReferences(const BamTools::RefVector& referenceSequences);
          void WriteSamHeaderText(const std::string& samHeaderText);
@@ -58,6 +64,7 @@ class BamWriterPrivate {
      private:
          BgzfStream m_stream;
          bool m_isBigEndian;
+        std::string m_errorString;
  };
  
  } // namespace Internal
diff --git a/src/api/internal/BgzfStream_p.cpp b/src/api/internal/BgzfStream_p.cpp

index 36599b5b363fd6241fcd9fe6dff9a5c3b168235d..69592d655e60811f87416315e8c86d4889c28a66 100644 (file)
--- a/src/api/internal/BgzfStream_p.cpp
+++ b/src/api/internal/BgzfStream_p.cpp
@@ -2,84 +2,120 @@
  // BgzfStream_p.cpp (c) 2011 Derek Barnett
  // Marth Lab, Department of Biology, Boston College
  // ---------------------------------------------------------------------------
-// Last modified: 2 September 2011(DB)
+// Last modified: 6 October 2011(DB)
  // ---------------------------------------------------------------------------
  // Based on BGZF routines developed at the Broad Institute.
  // Provides the basic functionality for reading & writing BGZF files
  // Replaces the old BGZF.* files to avoid clashing with other toolkits
  // ***************************************************************************
  
+#include <api/internal/BamException_p.h>
  #include <api/internal/BgzfStream_p.h>
  using namespace BamTools;
  using namespace BamTools::Internal;
  
  #include <cstring>
  #include <algorithm>
+#include <sstream>
  using namespace std;
  
+// ----------------------------
+// RaiiWrapper implementation
+// ----------------------------
+
+BgzfStream::RaiiWrapper::RaiiWrapper(void)
+    : Stream(0)
+{
+    CompressedBlock   = new char[Constants::BGZF_MAX_BLOCK_SIZE];
+    UncompressedBlock = new char[Constants::BGZF_DEFAULT_BLOCK_SIZE];
+}
+
+BgzfStream::RaiiWrapper::~RaiiWrapper(void) {
+
+    // clean up buffers
+    delete[] CompressedBlock;
+    delete[] UncompressedBlock;
+    CompressedBlock = 0;
+    UncompressedBlock = 0;
+
+    if ( Stream ) {
+        fflush(Stream);
+        fclose(Stream);
+        Stream = 0;
+    }
+}
+
+// ---------------------------
+// BgzfStream implementation
+// ---------------------------
+
  // constructor
  BgzfStream::BgzfStream(void)
-    : UncompressedBlockSize(Constants::BGZF_DEFAULT_BLOCK_SIZE)
-    , CompressedBlockSize(Constants::BGZF_MAX_BLOCK_SIZE)
-    , BlockLength(0)
+    : BlockLength(0)
      , BlockOffset(0)
      , BlockAddress(0)
      , IsOpen(false)
      , IsWriteOnly(false)
      , IsWriteCompressed(true)
-    , Stream(NULL)
-    , UncompressedBlock(NULL)
-    , CompressedBlock(NULL)
-{
-    try {
-        CompressedBlock   = new char[CompressedBlockSize];
-        UncompressedBlock = new char[UncompressedBlockSize];
-    } catch( std::bad_alloc& ba ) {
-        fprintf(stderr, "BgzfStream ERROR: unable to allocate memory\n");
-        exit(1);
-    }
-}
+{ }
  
  // destructor
  BgzfStream::~BgzfStream(void) {
-    if( CompressedBlock   ) delete[] CompressedBlock;
-    if( UncompressedBlock ) delete[] UncompressedBlock;
+    Close();
+}
+
+// checks BGZF block header
+bool BgzfStream::CheckBlockHeader(char* header) {
+    return (header[0] == Constants::GZIP_ID1 &&
+            header[1] == Constants::GZIP_ID2 &&
+            header[2] == Z_DEFLATED &&
+            (header[3] & Constants::FLG_FEXTRA) != 0 &&
+            BamTools::UnpackUnsignedShort(&header[10]) == Constants::BGZF_XLEN &&
+            header[12] == Constants::BGZF_ID1 &&
+            header[13] == Constants::BGZF_ID2 &&
+            BamTools::UnpackUnsignedShort(&header[14]) == Constants::BGZF_LEN );
  }
  
  // closes BGZF file
  void BgzfStream::Close(void) {
  
      // skip if file not open
-    if ( !IsOpen ) return;
+    if ( !IsOpen )
+        return;
  
      // if writing to file, flush the current BGZF block,
      // then write an empty block (as EOF marker)
      if ( IsWriteOnly ) {
          FlushBlock();
-        int blockLength = DeflateBlock();
-        fwrite(CompressedBlock, 1, blockLength, Stream);
+        const size_t blockLength = DeflateBlock();
+        fwrite(Resources.CompressedBlock, 1, blockLength, Resources.Stream);
      }
  
      // flush and close stream
-    fflush(Stream);
-    fclose(Stream);
-
-    // reset flags
-    IsWriteCompressed = true;
+    fflush(Resources.Stream);
+    fclose(Resources.Stream);
+    Resources.Stream = 0;
+
+    // reset initial state
+    BlockLength = 0;
+    BlockOffset = 0;
+    BlockAddress = 0;
      IsOpen = false;
+    IsWriteOnly = false;
+    IsWriteCompressed = true;
  }
  
  // compresses the current block
-int BgzfStream::DeflateBlock(void) {
+size_t BgzfStream::DeflateBlock(void) {
  
      // initialize the gzip header
-    char* buffer = CompressedBlock;
+    char* buffer = Resources.CompressedBlock;
      memset(buffer, 0, 18);
      buffer[0]  = Constants::GZIP_ID1;
-    buffer[1]  = (char)Constants::GZIP_ID2;
+    buffer[1]  = Constants::GZIP_ID2;
      buffer[2]  = Constants::CM_DEFLATE;
      buffer[3]  = Constants::FLG_FEXTRA;
-    buffer[9]  = (char)Constants::OS_UNKNOWN;
+    buffer[9]  = Constants::OS_UNKNOWN;
      buffer[10] = Constants::BGZF_XLEN;
      buffer[12] = Constants::BGZF_ID1;
      buffer[13] = Constants::BGZF_ID2;
@@ -90,8 +126,8 @@ int BgzfStream::DeflateBlock(void) {
  
      // loop to retry for blocks that do not compress enough
      int inputLength = BlockOffset;
-    int compressedLength = 0;
-    unsigned int bufferSize = CompressedBlockSize;
+    size_t compressedLength = 0;
+    const unsigned int bufferSize = Constants::BGZF_MAX_BLOCK_SIZE;
  
      while ( true ) {
  
@@ -99,82 +135,78 @@ int BgzfStream::DeflateBlock(void) {
          z_stream zs;
          zs.zalloc    = NULL;
          zs.zfree     = NULL;
-        zs.next_in   = (Bytef*)UncompressedBlock;
+        zs.next_in   = (Bytef*)Resources.UncompressedBlock;
          zs.avail_in  = inputLength;
          zs.next_out  = (Bytef*)&buffer[Constants::BGZF_BLOCK_HEADER_LENGTH];
-        zs.avail_out = bufferSize - Constants::BGZF_BLOCK_HEADER_LENGTH - Constants::BGZF_BLOCK_FOOTER_LENGTH;
+        zs.avail_out = bufferSize -
+                       Constants::BGZF_BLOCK_HEADER_LENGTH -
+                       Constants::BGZF_BLOCK_FOOTER_LENGTH;
  
          // initialize the zlib compression algorithm
-        if ( deflateInit2(&zs,
-                          compressionLevel,
-                          Z_DEFLATED,
-                          Constants::GZIP_WINDOW_BITS,
-                          Constants::Z_DEFAULT_MEM_LEVEL,
-                          Z_DEFAULT_STRATEGY) != Z_OK )
-        {
-            fprintf(stderr, "BgzfStream ERROR: zlib deflate initialization failed\n");
-            exit(1);
-        }
+        int status = deflateInit2(&zs,
+                                  compressionLevel,
+                                  Z_DEFLATED,
+                                  Constants::GZIP_WINDOW_BITS,
+                                  Constants::Z_DEFAULT_MEM_LEVEL,
+                                  Z_DEFAULT_STRATEGY);
+        if ( status != Z_OK )
+            throw BamException("BgzfStream::DeflateBlock", "zlib deflateInit2 failed");
  
          // compress the data
-        int status = deflate(&zs, Z_FINISH);
+        status = deflate(&zs, Z_FINISH);
+
+        // if not at stream end
          if ( status != Z_STREAM_END ) {
  
              deflateEnd(&zs);
  
-            // reduce the input length and try again
-            if ( status == Z_OK ) {
-                inputLength -= 1024;
-                if ( inputLength < 0 ) {
-                    fprintf(stderr, "BgzfStream ERROR: input reduction failed\n");
-                    exit(1);
-                }
-                continue;
-            }
-
-            fprintf(stderr, "BgzfStream ERROR: zlib::deflateEnd() failed\n");
-            exit(1);
-        }
-
-        // finalize the compression routine
-        if ( deflateEnd(&zs) != Z_OK ) {
-            fprintf(stderr, "BgzfStream ERROR: zlib::deflateEnd() failed\n");
-            exit(1);
-        }
+            // if error status
+            if ( status != Z_OK )
+                throw BamException("BgzfStream::DeflateBlock", "zlib deflate failed");
  
-        compressedLength = zs.total_out;
-        compressedLength += Constants::BGZF_BLOCK_HEADER_LENGTH + Constants::BGZF_BLOCK_FOOTER_LENGTH;
-        if ( compressedLength > Constants::BGZF_MAX_BLOCK_SIZE ) {
-            fprintf(stderr, "BgzfStream ERROR: deflate overflow\n");
-            exit(1);
+            // not enough space available in buffer
+            // try to reduce the input length & re-start loop
+            inputLength -= 1024;
+            if ( inputLength <= 0 )
+                throw BamException("BgzfStream::DeflateBlock", "input reduction failed");
+            continue;
          }
  
+        // finalize the compression routine
+        status = deflateEnd(&zs);
+        if ( status != Z_OK )
+            throw BamException("BgzfStream::DeflateBlock", "zlib deflateEnd failed");
+
+        // update compressedLength
+        compressedLength = zs.total_out +
+                           Constants::BGZF_BLOCK_HEADER_LENGTH +
+                           Constants::BGZF_BLOCK_FOOTER_LENGTH;
+        if ( compressedLength > Constants::BGZF_MAX_BLOCK_SIZE )
+            throw BamException("BgzfStream::DeflateBlock", "deflate overflow");
+
+        // quit while loop
          break;
      }
  
      // store the compressed length
-    BamTools::PackUnsignedShort(&buffer[16], (unsigned short)(compressedLength - 1));
+    BamTools::PackUnsignedShort(&buffer[16], static_cast<uint16_t>(compressedLength - 1));
  
      // store the CRC32 checksum
-    unsigned int crc = crc32(0, NULL, 0);
-    crc = crc32(crc, (Bytef*)UncompressedBlock, inputLength);
+    uint32_t crc = crc32(0, NULL, 0);
+    crc = crc32(crc, (Bytef*)Resources.UncompressedBlock, inputLength);
      BamTools::PackUnsignedInt(&buffer[compressedLength - 8], crc);
      BamTools::PackUnsignedInt(&buffer[compressedLength - 4], inputLength);
  
      // ensure that we have less than a block of data left
      int remaining = BlockOffset - inputLength;
      if ( remaining > 0 ) {
-        if ( remaining > inputLength ) {
-            fprintf(stderr, "BgzfStream ERROR: after deflate, remainder too large\n");
-            exit(1);
-        }
-        memcpy(UncompressedBlock, UncompressedBlock + inputLength, remaining);
+        if ( remaining > inputLength )
+            throw BamException("BgzfStream::DeflateBlock", "after deflate, remainder too large");
+        memcpy(Resources.UncompressedBlock, Resources.UncompressedBlock + inputLength, remaining);
      }
  
-    // update block data
+    // update block data & return compressedlength
      BlockOffset = remaining;
-
-    // return result
      return compressedLength;
  }
  
@@ -185,14 +217,15 @@ void BgzfStream::FlushBlock(void) {
      while ( BlockOffset > 0 ) {
  
          // compress the data block
-        int blockLength = DeflateBlock();
+        const size_t blockLength = DeflateBlock();
  
          // flush the data to our output stream
-        int numBytesWritten = fwrite(CompressedBlock, 1, blockLength, Stream);
+        const size_t numBytesWritten = fwrite(Resources.CompressedBlock, 1, blockLength, Resources.Stream);
          if ( numBytesWritten != blockLength ) {
-            fprintf(stderr, "BgzfStream ERROR: expected to write %u bytes during flushing, but wrote %u bytes\n",
-                    blockLength, numBytesWritten);
-            exit(1);
+            stringstream s("");
+            s << "expected to write " << blockLength
+              << " bytes during flushing, but wrote " << numBytesWritten;
+            throw BamException("BgzfStream::FlushBlock", s.str());
          }
  
          // update block data
@@ -201,34 +234,34 @@ void BgzfStream::FlushBlock(void) {
  }
  
  // decompresses the current block
-int BgzfStream::InflateBlock(const int& blockLength) {
+size_t BgzfStream::InflateBlock(const size_t& blockLength) {
  
-    // inflate the data from compressed buffer into uncompressed buffer
+    // setup zlib stream object
      z_stream zs;
      zs.zalloc    = NULL;
      zs.zfree     = NULL;
-    zs.next_in   = (Bytef*)CompressedBlock + 18;
+    zs.next_in   = (Bytef*)Resources.CompressedBlock + 18;
      zs.avail_in  = blockLength - 16;
-    zs.next_out  = (Bytef*)UncompressedBlock;
-    zs.avail_out = UncompressedBlockSize;
+    zs.next_out  = (Bytef*)Resources.UncompressedBlock;
+    zs.avail_out = Constants::BGZF_DEFAULT_BLOCK_SIZE;
  
+    // initialize
      int status = inflateInit2(&zs, Constants::GZIP_WINDOW_BITS);
-    if ( status != Z_OK ) {
-        fprintf(stderr, "BgzfStream ERROR: could not decompress block - zlib::inflateInit() failed\n");
-        return -1;
-    }
+    if ( status != Z_OK )
+        throw BamException("BgzfStream::InflateBlock", "zlib inflateInit failed");
  
+    // decompress
      status = inflate(&zs, Z_FINISH);
      if ( status != Z_STREAM_END ) {
          inflateEnd(&zs);
-        fprintf(stderr, "BgzfStream ERROR: could not decompress block - zlib::inflate() failed\n");
-        return -1;
+        throw BamException("BgzfStream::InflateBlock", "zlib inflate failed");
      }
  
+    // finalize
      status = inflateEnd(&zs);
      if ( status != Z_OK ) {
-        fprintf(stderr, "BgzfStream ERROR: could not decompress block - zlib::inflateEnd() failed\n");
-        return -1;
+        inflateEnd(&zs);
+        throw BamException("BgzfStream::InflateBlock", "zlib inflateEnd failed");
      }
  
      // return result
@@ -236,10 +269,11 @@ int BgzfStream::InflateBlock(const int& blockLength) {
  }
  
  // opens the BGZF file for reading (mode is either "rb" for reading, or "wb" for writing)
-bool BgzfStream::Open(const string& filename, const char* mode) {
+void BgzfStream::Open(const string& filename, const char* mode) {
  
-    // close current stream, if necessary, before opening next
-    if ( IsOpen ) Close();
+    // make sure we're starting with fresh state
+    if ( IsOpen )
+        Close();
  
      // determine open mode
      if ( strcmp(mode, "rb") == 0 )
@@ -247,42 +281,41 @@ bool BgzfStream::Open(const string& filename, const char* mode) {
      else if ( strcmp(mode, "wb") == 0)
          IsWriteOnly = true;
      else {
-        fprintf(stderr, "BgzfStream ERROR: unknown file mode: %s\n", mode);
-        return false;
+        const string message = string("unknown file mode: ") + mode;
+        throw BamException("BgzfStream::Open", message);
      }
  
      // open BGZF stream on a file
      if ( (filename != "stdin") && (filename != "stdout") && (filename != "-"))
-        Stream = fopen(filename.c_str(), mode);
+        Resources.Stream = fopen(filename.c_str(), mode);
  
      // open BGZF stream on stdin
      else if ( (filename == "stdin" || filename == "-") && (strcmp(mode, "rb") == 0 ) )
-        Stream = freopen(NULL, mode, stdin);
+        Resources.Stream = freopen(NULL, mode, stdin);
  
      // open BGZF stream on stdout
      else if ( (filename == "stdout" || filename == "-") && (strcmp(mode, "wb") == 0) )
-        Stream = freopen(NULL, mode, stdout);
+        Resources.Stream = freopen(NULL, mode, stdout);
  
-    if ( !Stream ) {
-        fprintf(stderr, "BgzfStream ERROR: unable to open file %s\n", filename.c_str() );
-        return false;
+    // ensure valid Stream
+    if ( !Resources.Stream ) {
+        const string message = string("unable to open file: ") + filename;
+        throw BamException("BgzfStream::Open", message);
      }
  
-    // set flag & return success
+    // set flag
      IsOpen = true;
-    return true;
  }
  
  // reads BGZF data into a byte buffer
-int BgzfStream::Read(char* data, const unsigned int dataLength) {
+size_t BgzfStream::Read(char* data, const size_t dataLength) {
  
      // if stream not open for reading (or empty request)
      if ( !IsOpen || IsWriteOnly || dataLength == 0 )
          return 0;
  
      // read blocks as needed until desired data length is retrieved
-    char* output = data;
-    unsigned int numBytesRead = 0;
+    size_t numBytesRead = 0;
      while ( numBytesRead < dataLength ) {
  
          // determine bytes available in current block
@@ -290,110 +323,95 @@ int BgzfStream::Read(char* data, const unsigned int dataLength) {
  
          // read (and decompress) next block if needed
          if ( bytesAvailable <= 0 ) {
-            if ( !ReadBlock() ) return -1;
+            ReadBlock();
              bytesAvailable = BlockLength - BlockOffset;
-            if ( bytesAvailable <= 0 ) break;
+            if ( bytesAvailable <= 0 )
+                break;
          }
  
          // copy data from uncompressed source buffer into data destination buffer
-        char* buffer   = UncompressedBlock;
-        int copyLength = min( (int)(dataLength-numBytesRead), bytesAvailable );
-        memcpy(output, buffer + BlockOffset, copyLength);
+        const size_t copyLength = min( (dataLength-numBytesRead), (size_t)bytesAvailable );
+        memcpy(data, Resources.UncompressedBlock + BlockOffset, copyLength);
  
          // update counters
          BlockOffset  += copyLength;
-        output       += copyLength;
+        data         += copyLength;
          numBytesRead += copyLength;
      }
  
      // update block data
      if ( BlockOffset == BlockLength ) {
-        BlockAddress = ftell64(Stream);
+        BlockAddress = ftell64(Resources.Stream);
          BlockOffset  = 0;
          BlockLength  = 0;
      }
  
+    // return actual number of bytes read
      return numBytesRead;
  }
  
  // reads a BGZF block
-bool BgzfStream::ReadBlock(void) {
+void BgzfStream::ReadBlock(void) {
  
-    char header[Constants::BGZF_BLOCK_HEADER_LENGTH];
-    int64_t blockAddress = ftell64(Stream);
+    // store block start
+    int64_t blockAddress = ftell64(Resources.Stream);
  
      // read block header from file
-    int count = fread(header, 1, sizeof(header), Stream);
+    char header[Constants::BGZF_BLOCK_HEADER_LENGTH];
+    size_t count = fread(header, 1, Constants::BGZF_BLOCK_HEADER_LENGTH, Resources.Stream);
  
-    // if block header empty
+    // if block header empty, set marker & skip rest of method
      if ( count == 0 ) {
          BlockLength = 0;
-        return true;
+        return;
      }
  
      // if block header invalid size
-    if ( count != sizeof(header) ) {
-        fprintf(stderr, "BgzfStream ERROR: read block failed - could not read block header\n");
-        return false;
-    }
+    if ( count != sizeof(header) )
+        throw BamException("BgzfStream::ReadBlock", "invalid block header size");
  
      // validate block header contents
-    if ( !BgzfStream::CheckBlockHeader(header) ) {
-        fprintf(stderr, "BgzfStream ERROR: read block failed - invalid block header\n");
-        return false;
-    }
+    if ( !BgzfStream::CheckBlockHeader(header) )
+        throw BamException("BgzfStream::ReadBlock", "invalid block header contents");
  
      // copy header contents to compressed buffer
-    int blockLength = BamTools::UnpackUnsignedShort(&header[16]) + 1;
-    char* compressedBlock = CompressedBlock;
-    memcpy(compressedBlock, header, Constants::BGZF_BLOCK_HEADER_LENGTH);
-    int remaining = blockLength - Constants::BGZF_BLOCK_HEADER_LENGTH;
+    const size_t blockLength = BamTools::UnpackUnsignedShort(&header[16]) + 1;
+    memcpy(Resources.CompressedBlock, header, Constants::BGZF_BLOCK_HEADER_LENGTH);
  
      // read remainder of block
-    count = fread(&compressedBlock[Constants::BGZF_BLOCK_HEADER_LENGTH], 1, remaining, Stream);
-    if ( count != remaining ) {
-        fprintf(stderr, "BgzfStream ERROR: read block failed - could not read data from block\n");
-        return false;
-    }
+    const size_t remaining = blockLength - Constants::BGZF_BLOCK_HEADER_LENGTH;
+    count = fread(&Resources.CompressedBlock[Constants::BGZF_BLOCK_HEADER_LENGTH], 1, remaining, Resources.Stream);
+    if ( count != remaining )
+        throw BamException("BgzfStream::ReadBlock", "could not read data from block");
  
      // decompress block data
      count = InflateBlock(blockLength);
-    if ( count < 0 ) {
-        fprintf(stderr, "BgzfStream ERROR: read block failed - could not decompress block data\n");
-        return false;
-    }
  
-    // update block data
+    // update block metadata
      if ( BlockLength != 0 )
          BlockOffset = 0;
      BlockAddress = blockAddress;
      BlockLength  = count;
-
-    // return success
-    return true;
  }
  
  // seek to position in BGZF file
-bool BgzfStream::Seek(const int64_t& position) {
-
-    // skip if not open
-    if ( !IsOpen ) return false;
+void BgzfStream::Seek(const int64_t& position) {
  
      // determine adjusted offset & address
      int     blockOffset  = (position & 0xFFFF);
      int64_t blockAddress = (position >> 16) & 0xFFFFFFFFFFFFLL;
  
      // attempt seek in file
-    if ( fseek64(Stream, blockAddress, SEEK_SET) != 0 ) {
-        fprintf(stderr, "BgzfStream ERROR: unable to seek in file\n");
-        return false;
+    if ( fseek64(Resources.Stream, blockAddress, SEEK_SET) != 0 ) {
+        stringstream s("");
+        s << "unable to seek to position: " << position;
+        throw BamException("BgzfStream::Seek", s.str());
      }
  
-    // update block data & return success
+    // if successful, update block metadata
      BlockLength  = 0;
      BlockAddress = blockAddress;
      BlockOffset  = blockOffset;
-    return true;
  }
  
  void BgzfStream::SetWriteCompressed(bool ok) {
@@ -408,31 +426,33 @@ int64_t BgzfStream::Tell(void) const {
  }
  
  // writes the supplied data into the BGZF buffer
-unsigned int BgzfStream::Write(const char* data, const unsigned int dataLen) {
+size_t BgzfStream::Write(const char* data, const size_t dataLength) {
  
      // skip if file not open for writing
-    if ( !IsOpen || !IsWriteOnly ) return false;
+    if ( !IsOpen || !IsWriteOnly )
+        return false;
  
      // write blocks as needed til all data is written
-    unsigned int numBytesWritten = 0;
+    size_t numBytesWritten = 0;
      const char* input = data;
-    unsigned int blockLength = UncompressedBlockSize;
-    while ( numBytesWritten < dataLen ) {
+    const size_t blockLength = Constants::BGZF_DEFAULT_BLOCK_SIZE;
+    while ( numBytesWritten < dataLength ) {
  
          // copy data contents to uncompressed output buffer
-        unsigned int copyLength = min(blockLength - BlockOffset, dataLen - numBytesWritten);
-        char* buffer = UncompressedBlock;
+        const size_t copyLength = min(blockLength - BlockOffset, dataLength - numBytesWritten);
+        char* buffer = Resources.UncompressedBlock;
          memcpy(buffer + BlockOffset, input, copyLength);
  
-        // update counter
+        // update counters
          BlockOffset     += copyLength;
          input           += copyLength;
          numBytesWritten += copyLength;
  
          // flush (& compress) output buffer when full
-        if ( BlockOffset == blockLength ) FlushBlock();
+        if ( BlockOffset == blockLength )
+            FlushBlock();
      }
  
-    // return result
+    // return actual number of bytes written
      return numBytesWritten;
  }
diff --git a/src/api/internal/BgzfStream_p.h b/src/api/internal/BgzfStream_p.h

index 838f30cde3f2f2a163fa72cd2852ea00315a4887..7ebd647e339819e7544176a548e5694293b8f37f 100644 (file)
--- a/src/api/internal/BgzfStream_p.h
+++ b/src/api/internal/BgzfStream_p.h
@@ -26,6 +26,7 @@
  #include <api/BamConstants.h>
  #include "zlib.h"
  #include <cstdio>
+#include <memory>
  #include <string>
  
  namespace BamTools {
@@ -43,64 +44,53 @@ class BgzfStream {
          // closes BGZF file
          void Close(void);
          // opens the BGZF file (mode is either "rb" for reading, or "wb" for writing)
-        bool Open(const std::string& filename, const char* mode);
+        void Open(const std::string& filename, const char* mode);
          // reads BGZF data into a byte buffer
-        int Read(char* data, const unsigned int dataLength);
+        size_t Read(char* data, const size_t dataLength);
          // seek to position in BGZF file
-        bool Seek(const int64_t& position);
+        void Seek(const int64_t& position);
          // enable/disable compressed output
          void SetWriteCompressed(bool ok);
          // get file position in BGZF file
          int64_t Tell(void) const;
          // writes the supplied data into the BGZF buffer
-        unsigned int Write(const char* data, const unsigned int dataLen);
+        size_t Write(const char* data, const size_t dataLength);
  
      // internal methods
      private:
          // compresses the current block
-        int DeflateBlock(void);
+        size_t DeflateBlock(void);
          // flushes the data in the BGZF block
          void FlushBlock(void);
          // de-compresses the current block
-        int InflateBlock(const int& blockLength);
+        size_t InflateBlock(const size_t& blockLength);
          // reads a BGZF block
-        bool ReadBlock(void);
+        void ReadBlock(void);
  
      // static 'utility' methods
      public:
          // checks BGZF block header
-        static inline bool CheckBlockHeader(char* header);
+        static bool CheckBlockHeader(char* header);
  
      // data members
      public:
-        unsigned int UncompressedBlockSize;
-        unsigned int CompressedBlockSize;
          unsigned int BlockLength;
          unsigned int BlockOffset;
-        uint64_t BlockAddress;
+        int64_t BlockAddress;
          bool IsOpen;
          bool IsWriteOnly;
          bool IsWriteCompressed;
-        FILE* Stream;
-        char* UncompressedBlock;
-        char* CompressedBlock;
-};
  
-// -------------------------------------------------------------
-// static 'utility' method implementations
+        struct RaiiWrapper {
+            RaiiWrapper(void);
+            ~RaiiWrapper(void);
+            char* UncompressedBlock;
+            char* CompressedBlock;
+            FILE* Stream;
+        };
+        RaiiWrapper Resources;
  
-// checks BGZF block header
-inline
-bool BgzfStream::CheckBlockHeader(char* header) {
-    return (header[0] == Constants::GZIP_ID1 &&
-            header[1] == (char)Constants::GZIP_ID2 &&
-            header[2] == Z_DEFLATED &&
-            (header[3] & Constants::FLG_FEXTRA) != 0 &&
-            BamTools::UnpackUnsignedShort(&header[10]) == Constants::BGZF_XLEN &&
-            header[12] == Constants::BGZF_ID1 &&
-            header[13] == Constants::BGZF_ID2 &&
-            BamTools::UnpackUnsignedShort(&header[14]) == Constants::BGZF_LEN );
-}
+};
  
  } // namespace Internal
  } // namespace BamTools
diff --git a/src/api/internal/SamFormatParser_p.cpp b/src/api/internal/SamFormatParser_p.cpp

index 1431b7a2753a49095b6974db528963cf41f29a5c..b92e6fdd0989fa0a5c874c9c009225a3c24482ef 100644 (file)
--- a/src/api/internal/SamFormatParser_p.cpp
+++ b/src/api/internal/SamFormatParser_p.cpp
@@ -2,13 +2,14 @@
  // SamFormatParser.cpp (c) 2010 Derek Barnett
  // Marth Lab, Department of Biology, Boston College
  // ---------------------------------------------------------------------------
-// Last modified: 19 April 2011 (DB)
+// Last modified: 6 October 2011 (DB)
  // ---------------------------------------------------------------------------
  // Provides functionality for parsing SAM header text into SamHeader object
  // ***************************************************************************
  
  #include <api/SamConstants.h>
  #include <api/SamHeader.h>
+#include <api/internal/BamException_p.h>
  #include <api/internal/SamFormatParser_p.h>
  using namespace BamTools;
  using namespace BamTools::Internal;
@@ -43,7 +44,7 @@ void SamFormatParser::Parse(const string& headerText) {
  void SamFormatParser::ParseSamLine(const string& line) {
  
      // skip if line is not long enough to contain true values
-    if (line.length() < 5 ) return;
+    if ( line.length() < 5 ) return;
  
      // determine token at beginning of line
      const string firstToken = line.substr(0,3);
@@ -53,8 +54,10 @@ void SamFormatParser::ParseSamLine(const string& line) {
      else if ( firstToken == Constants::SAM_RG_BEGIN_TOKEN) ParseRGLine(restOfLine);
      else if ( firstToken == Constants::SAM_PG_BEGIN_TOKEN) ParsePGLine(restOfLine);
      else if ( firstToken == Constants::SAM_CO_BEGIN_TOKEN) ParseCOLine(restOfLine);
-    else
-        cerr << "SamFormatParser ERROR: unknown token: " << firstToken << endl;
+    else {
+        const string message = string("unknown token: ") + firstToken;
+        throw BamException("SamFormatParser::ParseSamLine", message);
+    }
  }
  
  void SamFormatParser::ParseHDLine(const string& line) {
@@ -75,13 +78,15 @@ void SamFormatParser::ParseHDLine(const string& line) {
          if      ( tokenTag == Constants::SAM_HD_VERSION_TAG    ) m_header.Version    = tokenValue;
          else if ( tokenTag == Constants::SAM_HD_SORTORDER_TAG  ) m_header.SortOrder  = tokenValue;
          else if ( tokenTag == Constants::SAM_HD_GROUPORDER_TAG ) m_header.GroupOrder = tokenValue;
-        else
-            cerr << "SamFormatParser ERROR: unknown HD tag: " << tokenTag << endl;
+        else {
+            const string message = string("unknown HD tag: ") + tokenTag;
+            throw BamException("SamFormatParser::ParseHDLine", message);
+        }
      }
  
-    // if @HD line exists, VN must be provided
+    // check for required tags
      if ( !m_header.HasVersion() )
-        cerr << "SamFormatParser ERROR: @HD line is missing VN tag" << endl;
+        throw BamException("SamFormatParser::ParseHDLine", "@HD line is missing VN tag");
  }
  
  void SamFormatParser::ParseSQLine(const string& line) {
@@ -107,27 +112,20 @@ void SamFormatParser::ParseSQLine(const string& line) {
          else if ( tokenTag == Constants::SAM_SQ_CHECKSUM_TAG   ) seq.Checksum = tokenValue;
          else if ( tokenTag == Constants::SAM_SQ_SPECIES_TAG    ) seq.Species = tokenValue;
          else if ( tokenTag == Constants::SAM_SQ_URI_TAG        ) seq.URI = tokenValue;
-        else
-            cerr << "SamFormatParser ERROR: unknown SQ tag: " << tokenTag << endl;
+        else {
+            const string message = string("unknown SQ tag: ") + tokenTag;
+            throw BamException("SamFormatParser::ParseSQLine", message);
+        }
      }
  
-    bool isMissingRequiredFields = false;
-
-    // if @SQ line exists, SN must be provided
-    if ( !seq.HasName() ) {
-        isMissingRequiredFields = true;
-        cerr << "SamFormatParser ERROR: @SQ line is missing SN tag" << endl;
-    }
-
-    // if @SQ line exists, LN must be provided
-    if ( !seq.HasLength() ) {
-        isMissingRequiredFields = true;
-        cerr << "SamFormatParser ERROR: @SQ line is missing LN tag" << endl;
-    }
+    // check for required tags
+    if ( !seq.HasName() )
+        throw BamException("SamFormatParser::ParseSQLine", "@SQ line is missing SN tag");
+    if ( !seq.HasLength() )
+        throw BamException("SamFormatParser::ParseSQLine", "@SQ line is missing LN tag");
  
      // store SAM sequence entry
-    if ( !isMissingRequiredFields )
-        m_header.Sequences.Add(seq);
+    m_header.Sequences.Add(seq);
  }
  
  void SamFormatParser::ParseRGLine(const string& line) {
@@ -159,21 +157,18 @@ void SamFormatParser::ParseRGLine(const string& line) {
          else if ( tokenTag == Constants::SAM_RG_SAMPLE_TAG              ) rg.Sample = tokenValue;
          else if ( tokenTag == Constants::SAM_RG_SEQCENTER_TAG           ) rg.SequencingCenter = tokenValue;
          else if ( tokenTag == Constants::SAM_RG_SEQTECHNOLOGY_TAG       ) rg.SequencingTechnology = tokenValue;
-        else
-            cerr << "SamFormatParser ERROR: unknown RG tag: " << tokenTag << endl;
+        else {
+            const string message = string("unknown RG tag: ") + tokenTag;
+            throw BamException("SamFormatParser::ParseRGLine", message);
+        }
      }
  
-    bool isMissingRequiredFields = false;
-
-    // if @RG line exists, ID must be provided
-    if ( !rg.HasID() ) {
-        isMissingRequiredFields = true;
-        cerr << "SamFormatParser ERROR: @RG line is missing ID tag" << endl;
-    }
+    // check for required tags
+    if ( !rg.HasID() )
+        throw BamException("SamFormatParser::ParseRGLine", "@RG line is missing ID tag");
  
      // store SAM read group entry
-    if ( !isMissingRequiredFields )
-        m_header.ReadGroups.Add(rg);
+    m_header.ReadGroups.Add(rg);
  }
  
  void SamFormatParser::ParsePGLine(const string& line) {
@@ -198,21 +193,18 @@ void SamFormatParser::ParsePGLine(const string& line) {
          else if ( tokenTag == Constants::SAM_PG_COMMANDLINE_TAG     ) pg.CommandLine = tokenValue;
          else if ( tokenTag == Constants::SAM_PG_PREVIOUSPROGRAM_TAG ) pg.PreviousProgramID = tokenValue;
          else if ( tokenTag == Constants::SAM_PG_VERSION_TAG         ) pg.Version = tokenValue;
-        else
-            cerr << "SamFormatParser ERROR: unknown PG tag: " << tokenTag << endl;
+        else {
+            const string message = string("unknown PG tag: ") + tokenTag;
+            throw BamException("SamFormatParser::ParsePGLine", message);
+        }
      }
  
-    bool isMissingRequiredFields = false;
-
-    // if @PG line exists, ID must be provided
-    if ( !pg.HasID() ) {
-        isMissingRequiredFields = true;
-        cerr << "SamFormatParser ERROR: @PG line is missing ID tag" << endl;
-    }
+    // check for required tags
+    if ( !pg.HasID() )
+        throw BamException("SamFormatParser::ParsePGLine", "@PG line is missing ID tag");
  
-    // store SAM program record
-    if ( !isMissingRequiredFields )
-        m_header.Programs.Add(pg);
+    // store SAM program entry
+    m_header.Programs.Add(pg);
  }
  
  void SamFormatParser::ParseCOLine(const string& line) {
diff --git a/src/api/internal/SamFormatPrinter_p.cpp b/src/api/internal/SamFormatPrinter_p.cpp

index df13fe55b9419cfc1eb19aee3ec9c68b8c4eb274..d81db5d450918e4ca97ed04f5e8144b899b6ac4b 100644 (file)
--- a/src/api/internal/SamFormatPrinter_p.cpp
+++ b/src/api/internal/SamFormatPrinter_p.cpp
@@ -2,7 +2,7 @@
  // SamFormatPrinter.cpp (c) 2010 Derek Barnett
  // Marth Lab, Department of Biology, Boston College
  // ---------------------------------------------------------------------------
-// Last modified: 19 April 2011 (DB)
+// Last modified: 6 October 2011 (DB)
  // ---------------------------------------------------------------------------
  // Provides functionality for printing formatted SAM header to string
  // ***************************************************************************
@@ -18,16 +18,25 @@ using namespace BamTools::Internal;
  #include <vector>
  using namespace std;
  
+// ------------------------
+// static utility methods
+// ------------------------
+
+static inline
+const string FormatTag(const string& tag, const string& value) {
+    return string(Constants::SAM_TAB + tag + Constants::SAM_COLON + value);
+}
+
+// ---------------------------------
+// SamFormatPrinter implementation
+// ---------------------------------
+
  SamFormatPrinter::SamFormatPrinter(const SamHeader& header)
      : m_header(header)
  { }
  
  SamFormatPrinter::~SamFormatPrinter(void) { }
  
-const string SamFormatPrinter::FormatTag(const string &tag, const string &value) const {
-    return string(Constants::SAM_TAB + tag + Constants::SAM_COLON + value);
-}
-
  const string SamFormatPrinter::ToString(void) const {
  
      // clear out stream
diff --git a/src/api/internal/SamFormatPrinter_p.h b/src/api/internal/SamFormatPrinter_p.h

index 8250db660cab02e545f4fbbc505a4083a1b6064b..ea29181a69988f8cd460c607be2190ad58d0c7a5 100644 (file)
--- a/src/api/internal/SamFormatPrinter_p.h
+++ b/src/api/internal/SamFormatPrinter_p.h
@@ -2,7 +2,7 @@
  // SamFormatPrinter.h (c) 2010 Derek Barnett
  // Marth Lab, Department of Biology, Boston College
  // ---------------------------------------------------------------------------
-// Last modified: 23 December 2010 (DB)
+// Last modified: 6 October 2011 (DB)
  // ---------------------------------------------------------------------------
  // Provides functionality for printing formatted SAM header to string
  // ***************************************************************************
@@ -42,7 +42,6 @@ class SamFormatPrinter {
  
      // internal methods
      private:
-        const std::string FormatTag(const std::string& tag, const std::string& value) const;
          void PrintHD(std::stringstream& out) const;
          void PrintSQ(std::stringstream& out) const;
          void PrintRG(std::stringstream& out) const;
diff --git a/src/api/internal/SamHeaderValidator_p.cpp b/src/api/internal/SamHeaderValidator_p.cpp

index 936da49c04bf13ae431ee01f875f1eab450cdeb6..75e2c135ff9f35ff1d929b800691af64d8dedb1c 100644 (file)
--- a/src/api/internal/SamHeaderValidator_p.cpp
+++ b/src/api/internal/SamHeaderValidator_p.cpp
@@ -2,7 +2,7 @@
  // SamHeaderValidator.cpp (c) 2010 Derek Barnett
  // Marth Lab, Department of Biology, Boston College
  // ---------------------------------------------------------------------------
-// Last modified: 18 April 2011 (DB)
+// Last modified: 6 October 2011 (DB)
  // ---------------------------------------------------------------------------
  // Provides functionality for validating SamHeader data
  // ***************************************************************************
@@ -15,14 +15,15 @@ using namespace BamTools;
  using namespace BamTools::Internal;
  
  #include <cctype>
-#include <iostream>
  #include <set>
  #include <sstream>
  using namespace std;
  
-namespace BamTools {
-namespace Internal {
+// ------------------------
+// static utility methods
+// -------------------------
  
+static
  bool caseInsensitiveCompare(const string& lhs, const string& rhs) {
  
      // can omit checking chars if lengths not equal
@@ -41,9 +42,6 @@ bool caseInsensitiveCompare(const string& lhs, const string& rhs) {
      return true;
  }
  
-} // namespace Internal
-} // namespace BamTools
-
  // ------------------------------------------------------------------------
  // Allow validation rules to vary, as needed, between SAM header versions
  //
@@ -76,25 +74,62 @@ SamHeaderValidator::SamHeaderValidator(const SamHeader& header)
  
  SamHeaderValidator::~SamHeaderValidator(void) { }
  
-bool SamHeaderValidator::Validate(bool verbose) {
+void SamHeaderValidator::AddError(const string& message) {
+    m_errorMessages.push_back(ERROR_PREFIX + message + NEWLINE);
+}
+
+void SamHeaderValidator::AddWarning(const string& message) {
+    m_warningMessages.push_back(WARN_PREFIX + message + NEWLINE);
+}
+
+void SamHeaderValidator::PrintErrorMessages(ostream& stream) {
+
+    // skip if no error messages
+    if ( m_errorMessages.empty() )
+        return;
+
+    // print error header line
+    stream << "* SAM header has " << m_errorMessages.size() << " errors:" << endl;
+
+    // print each error message
+    vector<string>::const_iterator errorIter = m_errorMessages.begin();
+    vector<string>::const_iterator errorEnd  = m_errorMessages.end();
+    for ( ; errorIter != errorEnd; ++errorIter )
+        stream << (*errorIter);
+}
+
+void SamHeaderValidator::PrintMessages(ostream& stream) {
+    PrintErrorMessages(stream);
+    PrintWarningMessages(stream);
+}
+
+void SamHeaderValidator::PrintWarningMessages(ostream& stream) {
+
+    // skip if no warning messages
+    if ( m_warningMessages.empty() )
+        return;
+
+    // print warning header line
+    stream << "* SAM header has " << m_warningMessages.size() << " warnings:" << endl;
+
+    // print each warning message
+    vector<string>::const_iterator warnIter = m_warningMessages.begin();
+    vector<string>::const_iterator warnEnd  = m_warningMessages.end();
+    for ( ; warnIter != warnEnd; ++warnIter )
+        stream << (*warnIter);
+}
  
-    // validate header components
+// entry point for validation
+bool SamHeaderValidator::Validate(void) {
      bool isValid = true;
      isValid &= ValidateMetadata();
      isValid &= ValidateSequenceDictionary();
      isValid &= ValidateReadGroupDictionary();
      isValid &= ValidateProgramChain();
-
-    // report errors if desired
-    if ( verbose ) {
-        PrintErrorMessages();
-        PrintWarningMessages();
-    }
-
-    // return validation status
      return isValid;
  }
  
+// check all SAM header 'metadata'
  bool SamHeaderValidator::ValidateMetadata(void) {
      bool isValid = true;
      isValid &= ValidateVersion();
@@ -103,6 +138,7 @@ bool SamHeaderValidator::ValidateMetadata(void) {
      return isValid;
  }
  
+// check SAM header version tag
  bool SamHeaderValidator::ValidateVersion(void) {
  
      const string& version = m_header.Version;
@@ -147,6 +183,7 @@ bool SamHeaderValidator::ContainsOnlyDigits(const string& s) {
      return ( nonDigitPosition == string::npos ) ;
  }
  
+// validate SAM header sort order tag
  bool SamHeaderValidator::ValidateSortOrder(void) {
  
      const string& sortOrder = m_header.SortOrder;
@@ -171,6 +208,7 @@ bool SamHeaderValidator::ValidateSortOrder(void) {
      return false;
  }
  
+// validate SAM header group order tag
  bool SamHeaderValidator::ValidateGroupOrder(void) {
  
      const string& groupOrder = m_header.GroupOrder;
@@ -193,6 +231,7 @@ bool SamHeaderValidator::ValidateGroupOrder(void) {
      return false;
  }
  
+// validate SAM header sequence dictionary
  bool SamHeaderValidator::ValidateSequenceDictionary(void) {
  
      bool isValid = true;
@@ -213,6 +252,7 @@ bool SamHeaderValidator::ValidateSequenceDictionary(void) {
      return isValid;
  }
  
+// make sure all SQ names are unique
  bool SamHeaderValidator::ContainsUniqueSequenceNames(void) {
  
      bool isValid = true;
@@ -244,6 +284,7 @@ bool SamHeaderValidator::ContainsUniqueSequenceNames(void) {
      return isValid;
  }
  
+// validate SAM header sequence entry
  bool SamHeaderValidator::ValidateSequence(const SamSequence& seq) {
      bool isValid = true;
      isValid &= CheckNameFormat(seq.Name);
@@ -251,6 +292,7 @@ bool SamHeaderValidator::ValidateSequence(const SamSequence& seq) {
      return isValid;
  }
  
+// check sequence name is valid format
  bool SamHeaderValidator::CheckNameFormat(const string& name) {
  
      // invalid if name is empty
@@ -269,6 +311,7 @@ bool SamHeaderValidator::CheckNameFormat(const string& name) {
      return true;
  }
  
+// check that sequence length is within accepted range
  bool SamHeaderValidator::CheckLengthInRange(const string& length) {
  
      // invalid if empty
@@ -292,6 +335,7 @@ bool SamHeaderValidator::CheckLengthInRange(const string& length) {
      return true;
  }
  
+// validate SAM header read group dictionary
  bool SamHeaderValidator::ValidateReadGroupDictionary(void) {
  
      bool isValid = true;
@@ -312,6 +356,7 @@ bool SamHeaderValidator::ValidateReadGroupDictionary(void) {
      return isValid;
  }
  
+// make sure RG IDs and platform units are unique
  bool SamHeaderValidator::ContainsUniqueIDsAndPlatformUnits(void) {
  
      bool isValid = true;
@@ -364,6 +409,7 @@ bool SamHeaderValidator::ContainsUniqueIDsAndPlatformUnits(void) {
      return isValid;
  }
  
+// validate SAM header read group entry
  bool SamHeaderValidator::ValidateReadGroup(const SamReadGroup& rg) {
      bool isValid = true;
      isValid &= CheckReadGroupID(rg.ID);
@@ -371,6 +417,7 @@ bool SamHeaderValidator::ValidateReadGroup(const SamReadGroup& rg) {
      return isValid;
  }
  
+// make sure RG ID exists
  bool SamHeaderValidator::CheckReadGroupID(const string& id) {
  
      // invalid if empty
@@ -383,6 +430,7 @@ bool SamHeaderValidator::CheckReadGroupID(const string& id) {
      return true;
  }
  
+// make sure RG sequencing tech is one of the accepted keywords
  bool SamHeaderValidator::CheckSequencingTechnology(const string& technology) {
  
      // if no technology provided, no problem, just return OK
@@ -407,6 +455,7 @@ bool SamHeaderValidator::CheckSequencingTechnology(const string& technology) {
      return false;
  }
  
+// validate the SAM header "program chain"
  bool SamHeaderValidator::ValidateProgramChain(void) {
      bool isValid = true;
      isValid &= ContainsUniqueProgramIds();
@@ -414,6 +463,7 @@ bool SamHeaderValidator::ValidateProgramChain(void) {
      return isValid;
  }
  
+// make sure all PG IDs are unique
  bool SamHeaderValidator::ContainsUniqueProgramIds(void) {
  
      bool isValid = true;
@@ -445,6 +495,7 @@ bool SamHeaderValidator::ContainsUniqueProgramIds(void) {
      return isValid;
  }
  
+// make sure that any PP tags present point to existing @PG IDs
  bool SamHeaderValidator::ValidatePreviousProgramIds(void) {
  
      bool isValid = true;
@@ -471,40 +522,3 @@ bool SamHeaderValidator::ValidatePreviousProgramIds(void) {
      // return validation state
      return isValid;
  }
-void SamHeaderValidator::AddError(const string& message) {
-    m_errorMessages.push_back(ERROR_PREFIX + message + NEWLINE);
-}
-
-void SamHeaderValidator::AddWarning(const string& message) {
-    m_warningMessages.push_back(WARN_PREFIX + message + NEWLINE);
-}
-
-void SamHeaderValidator::PrintErrorMessages(void) {
-
-    // skip if no error messages
-    if ( m_errorMessages.empty() ) return;
-
-    // print error header line
-    cerr << "* SAM header has " << m_errorMessages.size() << " errors:" << endl;
-
-    // print each error message
-    vector<string>::const_iterator errorIter = m_errorMessages.begin();
-    vector<string>::const_iterator errorEnd  = m_errorMessages.end();
-    for ( ; errorIter != errorEnd; ++errorIter )
-        cerr << (*errorIter);
-}
-
-void SamHeaderValidator::PrintWarningMessages(void) {
-
-    // skip if no warning messages
-    if ( m_warningMessages.empty() ) return;
-
-    // print warning header line
-    cerr << "* SAM header has " << m_warningMessages.size() << " warnings:" << endl;
-
-    // print each warning message
-    vector<string>::const_iterator warnIter = m_warningMessages.begin();
-    vector<string>::const_iterator warnEnd  = m_warningMessages.end();
-    for ( ; warnIter != warnEnd; ++warnIter )
-        cerr << (*warnIter);
-}
diff --git a/src/api/internal/SamHeaderValidator_p.h b/src/api/internal/SamHeaderValidator_p.h

index 1fc1644dd5e1033019724bff5826e2218e246c24..7d0c60aa32e464f1774c882848413d27cd034f8e 100644 (file)
--- a/src/api/internal/SamHeaderValidator_p.h
+++ b/src/api/internal/SamHeaderValidator_p.h
@@ -2,7 +2,7 @@
  // SamHeaderValidator.h (c) 2010 Derek Barnett
  // Marth Lab, Department of Biology, Boston College
  // ---------------------------------------------------------------------------
-// Last modified: 13 January 2011 (DB)
+// Last modified: 6 October 2011 (DB)
  // ---------------------------------------------------------------------------
  // Provides functionality for validating SamHeader data
  // ***************************************************************************
@@ -20,6 +20,7 @@
  //
  // We mean it.
  
+#include <iostream>
  #include <string>
  #include <vector>
  
@@ -40,9 +41,12 @@ class SamHeaderValidator {
  
      // SamHeaderValidator interface
      public:
+
+        // prints error & warning messages
+        void PrintMessages(std::ostream& stream);
+
          // validates SamHeader data, returns true/false accordingly
-        // prints error & warning messages to stderr when @verbose is true
-        bool Validate(bool verbose = false);
+        bool Validate(void);
  
      // internal methods
      private:
@@ -76,8 +80,8 @@ class SamHeaderValidator {
          // error reporting
          void AddError(const std::string& message);
          void AddWarning(const std::string& message);
-        void PrintErrorMessages(void);
-        void PrintWarningMessages(void);
+        void PrintErrorMessages(std::ostream& stream);
+        void PrintWarningMessages(std::ostream& stream);
  
      // data members
      private:
author	derek <derekwbarnett@gmail.com>
	Fri, 7 Oct 2011 19:12:57 +0000 (15:12 -0400)
committer	derek <derekwbarnett@gmail.com>
	Fri, 7 Oct 2011 19:12:57 +0000 (15:12 -0400)
src/api/BamAlignment.cpp		patch \| blob \| history
src/api/BamAlignment.h		patch \| blob \| history
src/api/BamAux.h		patch \| blob \| history
src/api/BamConstants.h		patch \| blob \| history
src/api/BamIndex.h		patch \| blob \| history
src/api/BamMultiReader.cpp		patch \| blob \| history
src/api/BamMultiReader.h		patch \| blob \| history
src/api/BamReader.cpp		patch \| blob \| history
src/api/BamReader.h		patch \| blob \| history
src/api/BamWriter.cpp		patch \| blob \| history
src/api/BamWriter.h		patch \| blob \| history
src/api/CMakeLists.txt		patch \| blob \| history
src/api/SamHeader.cpp		patch \| blob \| history
src/api/SamHeader.h		patch \| blob \| history
src/api/internal/BamException_p.cpp	[new file with mode: 0644]	patch \| blob
src/api/internal/BamException_p.h	[new file with mode: 0644]	patch \| blob
src/api/internal/BamHeader_p.cpp		patch \| blob \| history
src/api/internal/BamHeader_p.h		patch \| blob \| history
src/api/internal/BamIndexFactory_p.cpp		patch \| blob \| history
src/api/internal/BamMultiMerger_p.h		patch \| blob \| history
src/api/internal/BamMultiReader_p.cpp		patch \| blob \| history
src/api/internal/BamMultiReader_p.h		patch \| blob \| history
src/api/internal/BamRandomAccessController_p.cpp		patch \| blob \| history
src/api/internal/BamRandomAccessController_p.h		patch \| blob \| history
src/api/internal/BamReader_p.cpp		patch \| blob \| history
src/api/internal/BamReader_p.h		patch \| blob \| history
src/api/internal/BamStandardIndex_p.cpp		patch \| blob \| history
src/api/internal/BamStandardIndex_p.h		patch \| blob \| history
src/api/internal/BamToolsIndex_p.cpp		patch \| blob \| history
src/api/internal/BamToolsIndex_p.h		patch \| blob \| history
src/api/internal/BamWriter_p.cpp		patch \| blob \| history
src/api/internal/BamWriter_p.h		patch \| blob \| history
src/api/internal/BgzfStream_p.cpp		patch \| blob \| history
src/api/internal/BgzfStream_p.h		patch \| blob \| history
src/api/internal/SamFormatParser_p.cpp		patch \| blob \| history
src/api/internal/SamFormatPrinter_p.cpp		patch \| blob \| history
src/api/internal/SamFormatPrinter_p.h		patch \| blob \| history
src/api/internal/SamHeaderValidator_p.cpp		patch \| blob \| history
src/api/internal/SamHeaderValidator_p.h		patch \| blob \| history