merge with remoteio branch

author derek <derekwbarnett@gmail.com>

Mon, 28 Nov 2011 23:55:31 +0000 (18:55 -0500)

committer derek <derekwbarnett@gmail.com>

Mon, 28 Nov 2011 23:55:31 +0000 (18:55 -0500)
author derek <derekwbarnett@gmail.com>
Mon, 28 Nov 2011 23:55:31 +0000 (18:55 -0500)
committer derek <derekwbarnett@gmail.com>
Mon, 28 Nov 2011 23:55:31 +0000 (18:55 -0500)
diff --git a/src/api/BamAux.h b/src/api/BamAux.h

index f4511259ea67136934e736a3cebf0846353d65ed..0dd3e993377ea609cf3e573d14b4ba1952700a44 100644 (file)
--- a/src/api/BamAux.h
+++ b/src/api/BamAux.h
@@ -2,7 +2,7 @@
  // BamAux.h (c) 2009 Derek Barnett, Michael Str�mberg\r
  // Marth Lab, Department of Biology, Boston College\r
  // ---------------------------------------------------------------------------\r
-// Last modified: 10 October 2011 (DB)\r
+// Last modified: 25 October 2011 (DB)\r
  // ---------------------------------------------------------------------------\r
  // Provides data structures & utility methods that are used throughout the API.\r
  // ***************************************************************************\r
@@ -11,6 +11,7 @@
  #define BAMAUX_H\r
  \r
  #include "api/api_global.h"\r
+#include <cstring>\r
  #include <fstream> \r
  #include <iostream>\r
  #include <string>\r
@@ -441,13 +442,25 @@ API_EXPORT inline unsigned short UnpackUnsignedShort(char* buffer) {
      \internal\r
  */\r
  struct RaiiBuffer {\r
+\r
+    // data members\r
+    char* Buffer;\r
+    const size_t NumBytes;\r
+\r
+    // ctor & dtor\r
      RaiiBuffer(const size_t n)\r
          : Buffer( new char[n]() )\r
+        , NumBytes(n)\r
      { }\r
+\r
      ~RaiiBuffer(void) {\r
          delete[] Buffer;\r
      }\r
-    char* Buffer;\r
+\r
+    // add'l methods\r
+    void Clear(void) {\r
+        memset(Buffer, 0, NumBytes);\r
+    }\r
  };\r
  \r
  } // namespace BamTools\r
diff --git a/src/api/BamMultiReader.cpp b/src/api/BamMultiReader.cpp

index ef38469651c292e49fdbfec9317cc903791c24f3..f61aa2648f372c7214df85a8cb38aa0f14631d07 100644 (file)
--- a/src/api/BamMultiReader.cpp
+++ b/src/api/BamMultiReader.cpp
@@ -2,7 +2,7 @@
  // BamMultiReader.cpp (c) 2010 Erik Garrison, Derek Barnett
  // Marth Lab, Department of Biology, Boston College
  // ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
+// Last modified: 25 October 2011 (DB)
  // ---------------------------------------------------------------------------
  // Convenience class for reading multiple BAM files.
  //
@@ -13,7 +13,7 @@
  // ***************************************************************************
  
  #include "api/BamMultiReader.h"
-#include "api/internal/BamMultiReader_p.h"
+#include "api/internal/bam/BamMultiReader_p.h"
  using namespace BamTools;
  
  #include <string>
diff --git a/src/api/BamMultiReader.h b/src/api/BamMultiReader.h

index ea068d0c0ad58a9736fde8b4abb34de6632fcefe..e5fc9c987fb3d1f78be383cd355126205aa2524c 100644 (file)
--- a/src/api/BamMultiReader.h
+++ b/src/api/BamMultiReader.h
@@ -2,7 +2,7 @@
  // BamMultiReader.h (c) 2010 Erik Garrison, Derek Barnett
  // Marth Lab, Department of Biology, Boston College
  // ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
+// Last modified: 25 October 2011 (DB)
  // ---------------------------------------------------------------------------
  // Convenience class for reading multiple BAM files.
  // ***************************************************************************
diff --git a/src/api/BamReader.cpp b/src/api/BamReader.cpp

index 6080b3656198d29fa22f6d6cb82d64f102ae5edf..ae2adec94e498dadc97115807ab0d52dbe178804 100644 (file)
--- a/src/api/BamReader.cpp
+++ b/src/api/BamReader.cpp
@@ -2,13 +2,13 @@
  // BamReader.cpp (c) 2009 Derek Barnett, Michael Str�mberg
  // Marth Lab, Department of Biology, Boston College
  // ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
+// Last modified: 25 October 2011 (DB)
  // ---------------------------------------------------------------------------
  // Provides read access to BAM files.
  // ***************************************************************************
  
  #include "api/BamReader.h"
-#include "api/internal/BamReader_p.h"
+#include "api/internal/bam/BamReader_p.h"
  using namespace BamTools;
  using namespace BamTools::Internal;
  
diff --git a/src/api/BamWriter.cpp b/src/api/BamWriter.cpp

index b1582a86a8236ff4d9bca170b06a4148f4870dd5..cbbfdae7f02eef0b6f49c1205bcd8e0dd55cfdf2 100644 (file)
--- a/src/api/BamWriter.cpp
+++ b/src/api/BamWriter.cpp
@@ -2,7 +2,7 @@
  // BamWriter.cpp (c) 2009 Michael Str�mberg, Derek Barnett\r
  // Marth Lab, Department of Biology, Boston College\r
  // ---------------------------------------------------------------------------\r
-// Last modified: 10 October 2011 (DB)\r
+// Last modified: 25 October 2011 (DB)\r
  // ---------------------------------------------------------------------------\r
  // Provides the basic functionality for producing BAM files\r
  // ***************************************************************************\r
@@ -10,7 +10,7 @@
  #include "api/BamAlignment.h"\r
  #include "api/BamWriter.h"\r
  #include "api/SamHeader.h"\r
-#include "api/internal/BamWriter_p.h"\r
+#include "api/internal/bam/BamWriter_p.h"\r
  using namespace BamTools;\r
  using namespace BamTools::Internal;\r
  using namespace std;\r
diff --git a/src/api/CMakeLists.txt b/src/api/CMakeLists.txt

index c8504edba46e3023c35a1f51ac9d66bf7e109e4d..539feca0317a4e801ce14fa34626c0a98849e654 100644 (file)
--- a/src/api/CMakeLists.txt
+++ b/src/api/CMakeLists.txt
@@ -12,7 +12,10 @@ include_directories( ${BamTools_SOURCE_DIR}/src )
  add_definitions( -DBAMTOOLS_API_LIBRARY ) # (for proper exporting of library symbols)
  add_definitions( -fPIC ) # (attempt to force PIC compiling on CentOS, not being set on shared libs by CMake)
  
-# list of all BamTools API source (.cpp) files
+# fetch all internal source files
+add_subdirectory ( internal )
+
+# make list of all API source files
  set( BamToolsAPISources
          BamAlignment.cpp
          BamMultiReader.cpp
@@ -25,26 +28,7 @@ set( BamToolsAPISources
          SamReadGroupDictionary.cpp
          SamSequence.cpp
          SamSequenceDictionary.cpp
-        internal/BamDeviceFactory_p.cpp
-        internal/BamException_p.cpp
-        internal/BamFile_p.cpp
-        internal/BamFtp_p.cpp
-        internal/BamHeader_p.cpp
-        internal/BamHttp_p.cpp
-        internal/BamIndexFactory_p.cpp
-        internal/BamMultiReader_p.cpp
-        internal/BamPipe_p.cpp
-        internal/BamRandomAccessController_p.cpp
-        internal/BamReader_p.cpp
-        internal/BamStandardIndex_p.cpp
-        internal/BamToolsIndex_p.cpp
-        internal/BamWriter_p.cpp
-        internal/BgzfStream_p.cpp
-        internal/ILocalIODevice_p.cpp
-        internal/IRemoteIODevice_p.cpp
-        internal/SamFormatParser_p.cpp
-        internal/SamFormatPrinter_p.cpp
-        internal/SamHeaderValidator_p.cpp
+        ${InternalSources}
  )
  
  # create main BamTools API shared library
@@ -52,38 +36,48 @@ add_library( BamTools SHARED ${BamToolsAPISources} )
  set_target_properties( BamTools PROPERTIES
                         SOVERSION "2.0.5"
                         OUTPUT_NAME "bamtools" )
-target_link_libraries( BamTools z )
-install( TARGETS BamTools LIBRARY DESTINATION "lib/bamtools" RUNTIME DESTINATION "bin" )
  
  # create main BamTools API static library
  add_library( BamTools-static STATIC ${BamToolsAPISources} )
-set_target_properties( BamTools-static PROPERTIES
-                       OUTPUT_NAME "bamtools"
+set_target_properties( BamTools-static PROPERTIES 
+                       OUTPUT_NAME "bamtools" 
                         PREFIX "lib" )
-target_link_libraries( BamTools-static z )
-install( TARGETS BamTools-static ARCHIVE DESTINATION "lib/bamtools" )
+
+# link libraries with zlib automatically
+if ( _WIN32 )
+    set( APILibs z ws2_32 )
+else ( _WIN32 )
+    set( APILibs z )
+endif ( _WIN32 )
+
+target_link_libraries( BamTools ${APILibs} )
+target_link_libraries( BamTools-static ${APILibs} )
+
+# set library install destinations
+install( TARGETS BamTools LIBRARY DESTINATION "lib/bamtools" RUNTIME DESTINATION "bin")
+install( TARGETS BamTools-static ARCHIVE DESTINATION "lib/bamtools")
  
  # export API headers
-include( ../ExportHeader.cmake )
-set( ApiIncludeDir "api" )
-ExportHeader( APIHeaders api_global.h             ${ApiIncludeDir} )
-ExportHeader( APIHeaders BamAlgorithms.h          ${ApiIncludeDir} )
-ExportHeader( APIHeaders BamAlignment.h           ${ApiIncludeDir} )
-ExportHeader( APIHeaders BamAux.h                 ${ApiIncludeDir} )
-ExportHeader( APIHeaders BamConstants.h           ${ApiIncludeDir} )
-ExportHeader( APIHeaders BamIndex.h               ${ApiIncludeDir} )
-ExportHeader( APIHeaders BamMultiReader.h         ${ApiIncludeDir} )
-ExportHeader( APIHeaders BamReader.h              ${ApiIncludeDir} )
-ExportHeader( APIHeaders BamWriter.h              ${ApiIncludeDir} )
-ExportHeader( APIHeaders IBamIODevice.h           ${ApiIncludeDir} )
-ExportHeader( APIHeaders SamConstants.h           ${ApiIncludeDir} )
-ExportHeader( APIHeaders SamHeader.h              ${ApiIncludeDir} )
-ExportHeader( APIHeaders SamProgram.h             ${ApiIncludeDir} )
-ExportHeader( APIHeaders SamProgramChain.h        ${ApiIncludeDir} )
-ExportHeader( APIHeaders SamReadGroup.h           ${ApiIncludeDir} )
-ExportHeader( APIHeaders SamReadGroupDictionary.h ${ApiIncludeDir} )
-ExportHeader( APIHeaders SamSequence.h            ${ApiIncludeDir} )
-ExportHeader( APIHeaders SamSequenceDictionary.h  ${ApiIncludeDir} )
+include(../ExportHeader.cmake)
+set(ApiIncludeDir "api")
+ExportHeader(APIHeaders api_global.h             ${ApiIncludeDir})
+ExportHeader(APIHeaders BamAlgorithms.h          ${ApiIncludeDir})
+ExportHeader(APIHeaders BamAlignment.h           ${ApiIncludeDir})
+ExportHeader(APIHeaders BamAux.h                 ${ApiIncludeDir})
+ExportHeader(APIHeaders BamConstants.h           ${ApiIncludeDir})
+ExportHeader(APIHeaders BamIndex.h               ${ApiIncludeDir})
+ExportHeader(APIHeaders BamMultiReader.h         ${ApiIncludeDir})
+ExportHeader(APIHeaders BamReader.h              ${ApiIncludeDir})
+ExportHeader(APIHeaders BamWriter.h              ${ApiIncludeDir})
+ExportHeader(APIHeaders IBamIODevice.h           ${ApiIncludeDir})
+ExportHeader(APIHeaders SamConstants.h           ${ApiIncludeDir})
+ExportHeader(APIHeaders SamHeader.h              ${ApiIncludeDir})
+ExportHeader(APIHeaders SamProgram.h             ${ApiIncludeDir})
+ExportHeader(APIHeaders SamProgramChain.h        ${ApiIncludeDir})
+ExportHeader(APIHeaders SamReadGroup.h           ${ApiIncludeDir})
+ExportHeader(APIHeaders SamReadGroupDictionary.h ${ApiIncludeDir})
+ExportHeader(APIHeaders SamSequence.h            ${ApiIncludeDir})
+ExportHeader(APIHeaders SamSequenceDictionary.h  ${ApiIncludeDir})
  
  set( AlgorithmsIncludeDir "api/algorithms" )
  ExportHeader( AlgorithmsHeaders algorithms/Sort.h ${AlgorithmsIncludeDir} )
diff --git a/src/api/IBamIODevice.h b/src/api/IBamIODevice.h

index b34e449a04ed966756c8c1bc3cb71a60f7bd970f..cf641298a9df17d19b5cbd71b5f9027fbaeb334a 100644 (file)
--- a/src/api/IBamIODevice.h
+++ b/src/api/IBamIODevice.h
@@ -2,7 +2,7 @@
  // IBamIODevice.h (c) 2011 Derek Barnett
  // Marth Lab, Department of Biology, Boston College
  // ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
+// Last modified: 10 November 2011 (DB)
  // ---------------------------------------------------------------------------
  // Base class for all BAM I/O devices (e.g. local file, pipe, HTTP, FTP, etc.)
  //
@@ -19,6 +19,7 @@
  #define IBAMIODEVICE_H
  
  #include "api/api_global.h"
+#include <cstdio>
  #include <string>
  
  namespace BamTools {
@@ -26,9 +27,10 @@ namespace BamTools {
  class API_EXPORT IBamIODevice {
  
      // enums
-    public: enum OpenMode { NotOpen = 0
-                          , ReadOnly
-                          , WriteOnly
+    public: enum OpenMode { NotOpen   = 0x0000
+                          , ReadOnly  = 0x0001
+                          , WriteOnly = 0x0002
+                          , ReadWrite = ReadOnly | WriteOnly
                            };
  
      // ctor & dtor
@@ -38,14 +40,16 @@ class API_EXPORT IBamIODevice {
      // IBamIODevice interface
      public:
  
+        // TODO: add seek(pos, *from*)
+
          // pure virtuals
          virtual void Close(void) =0;
          virtual bool IsRandomAccess(void) const =0;
          virtual bool Open(const OpenMode mode) =0;
-        virtual size_t Read(char* data, const unsigned int numBytes) =0;
-        virtual bool Seek(const int64_t& position) =0;
+        virtual int64_t Read(char* data, const unsigned int numBytes) =0;
+        virtual bool Seek(const int64_t& position, const int origin = SEEK_SET) =0;
          virtual int64_t Tell(void) const =0;
-        virtual size_t Write(const char* data, const unsigned int numBytes) =0;
+        virtual int64_t Write(const char* data, const unsigned int numBytes) =0;
  
          // default implementation provided
          virtual std::string GetErrorString(void);
diff --git a/src/api/SamHeader.cpp b/src/api/SamHeader.cpp

index 5de2abcd9006d6eb86693973c416b0a61b9b2998..b2925f2a5d6575729ba785e4926c85bab8bb9fd5 100644 (file)
--- a/src/api/SamHeader.cpp
+++ b/src/api/SamHeader.cpp
@@ -2,17 +2,17 @@
  // SamHeader.cpp (c) 2010 Derek Barnett
  // Marth Lab, Department of Biology, Boston College
  // ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
+// Last modified: 25 October 2011 (DB)
  // ---------------------------------------------------------------------------
  // Provides direct read/write access to the SAM header data fields.
  // ***************************************************************************
  
  #include "api/SamConstants.h"
  #include "api/SamHeader.h"
-#include "api/internal/BamException_p.h"
-#include "api/internal/SamFormatParser_p.h"
-#include "api/internal/SamFormatPrinter_p.h"
-#include "api/internal/SamHeaderValidator_p.h"
+#include "api/internal/utils/BamException_p.h"
+#include "api/internal/sam/SamFormatParser_p.h"
+#include "api/internal/sam/SamFormatPrinter_p.h"
+#include "api/internal/sam/SamHeaderValidator_p.h"
  using namespace BamTools;
  using namespace BamTools::Internal;
  using namespace std;
diff --git a/src/api/internal/BamDeviceFactory_p.cpp b/src/api/internal/BamDeviceFactory_p.cpp

deleted file mode 100644 (file)

index 895d08c..0000000
--- a/src/api/internal/BamDeviceFactory_p.cpp
+++ /dev/null
@@ -1,37 +0,0 @@
-// ***************************************************************************
-// BamDeviceFactory_p.cpp (c) 2011 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 September 2011 (DB)
-// ---------------------------------------------------------------------------
-// Creates built-in concrete implementations of IBamIODevices
-// ***************************************************************************
-
-#include "api/internal/BamDeviceFactory_p.h"
-#include "api/internal/BamFile_p.h"
-#include "api/internal/BamFtp_p.h"
-#include "api/internal/BamHttp_p.h"
-#include "api/internal/BamPipe_p.h"
-using namespace BamTools;
-using namespace BamTools::Internal;
-
-#include <iostream>
-using namespace std;
-
-IBamIODevice* BamDeviceFactory::CreateDevice(const string& source) {
-
-    // check for requested pipe
-    if ( source == "-" || source == "stdin" || source == "stdout" )
-        return new BamPipe;
-
-    // check for HTTP prefix
-    if ( source.find("http://") == 0 )
-        return new BamHttp(source);
-
-    // check for FTP prefix
-    if ( source.find("ftp://") == 0 )
-        return new BamFtp(source);
-
-    // otherwise assume a "normal" file
-    return new BamFile(source);
-}
diff --git a/src/api/internal/BamDeviceFactory_p.h b/src/api/internal/BamDeviceFactory_p.h

deleted file mode 100644 (file)

index 1d48533..0000000
--- a/src/api/internal/BamDeviceFactory_p.h
+++ /dev/null
@@ -1,37 +0,0 @@
-// ***************************************************************************
-// BamDeviceFactory_p.h (c) 2011 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Creates built-in concrete implementations of IBamIODevices
-// ***************************************************************************
-
-#ifndef BAMDEVICEFACTORY_P_H
-#define BAMDEVICEFACTORY_P_H
-
-//  -------------
-//  W A R N I N G
-//  -------------
-//
-// This file is not part of the BamTools API.  It exists purely as an
-// implementation detail. This header file may change from version to version
-// without notice, or even be removed.
-//
-// We mean it.
-
-#include "api/IBamIODevice.h"
-#include <string>
-
-namespace BamTools {
-namespace Internal {
-
-class BamDeviceFactory {
-    public:
-        static IBamIODevice* CreateDevice(const std::string& source);
-};
-
-} // namespace Internal
-} // namespace BamTools
-
-#endif // BAMDEVICEFACTORY_P_H
diff --git a/src/api/internal/BamException_p.cpp b/src/api/internal/BamException_p.cpp

deleted file mode 100644 (file)

index 38469e7..0000000
--- a/src/api/internal/BamException_p.cpp
+++ /dev/null
@@ -1,15 +0,0 @@
-// ***************************************************************************
-// BamException_p.cpp (c) 2011 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides a basic exception class for BamTools internals
-// ***************************************************************************
-
-#include "api/internal/BamException_p.h"
-using namespace BamTools;
-using namespace BamTools::Internal;
-using namespace std;
-
-const string BamException::SEPARATOR = ": ";
diff --git a/src/api/internal/BamException_p.h b/src/api/internal/BamException_p.h

deleted file mode 100644 (file)

index 5199737..0000000
--- a/src/api/internal/BamException_p.h
+++ /dev/null
@@ -1,51 +0,0 @@
-// ***************************************************************************
-// BamException_p.h (c) 2011 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 6 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides a basic exception class for BamTools internals
-// ***************************************************************************
-
-#ifndef BAMEXCEPTION_P_H
-#define BAMEXCEPTION_P_H
-
-//  -------------
-//  W A R N I N G
-//  -------------
-//
-// This file is not part of the BamTools API.  It exists purely as an
-// implementation detail. This header file may change from version to version
-// without notice, or even be removed.
-//
-// We mean it.
-
-#include <exception>
-#include <string>
-
-namespace BamTools {
-namespace Internal {
-
-class BamException : public std::exception {
-
-    public:
-        inline BamException(const std::string& where, const std::string& message)
-            : std::exception()
-            , m_errorString(where + SEPARATOR + message)
-        { }
-
-        inline ~BamException(void) throw() { }
-
-        inline const char* what(void) const throw() {
-            return m_errorString.c_str();
-        }
-
-    private:
-        std::string m_errorString;
-        static const std::string SEPARATOR;
-};
-
-} // namespace Internal
-} // namespace BamTools
-
-#endif // BAMEXCEPTION_P_H
diff --git a/src/api/internal/BamFile_p.cpp b/src/api/internal/BamFile_p.cpp

deleted file mode 100644 (file)

index 74c4ed6..0000000
--- a/src/api/internal/BamFile_p.cpp
+++ /dev/null
@@ -1,67 +0,0 @@
-// ***************************************************************************
-// BamFile_p.cpp (c) 2011 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides BAM file-specific IO behavior
-// ***************************************************************************
-
-#include "api/internal/BamFile_p.h"
-using namespace BamTools;
-using namespace BamTools::Internal;
-
-#include <cstdio>
-#include <iostream>
-using namespace std;
-
-BamFile::BamFile(const string& filename)
-    : ILocalIODevice()
-    , m_filename(filename)
-{ }
-
-BamFile::~BamFile(void) { }
-
-void BamFile::Close(void) {
-    if ( IsOpen() ) {
-        m_filename.clear();
-        ILocalIODevice::Close();
-    }
-}
-
-bool BamFile::IsRandomAccess(void) const {
-    return true;
-}
-
-bool BamFile::Open(const IBamIODevice::OpenMode mode) {
-
-    // make sure we're starting with a fresh file stream
-    Close();
-
-    // attempt to open FILE* depending on requested openmode
-    if ( mode == IBamIODevice::ReadOnly )
-        m_stream = fopen(m_filename.c_str(), "rb");
-    else if ( mode == IBamIODevice::WriteOnly )
-        m_stream = fopen(m_filename.c_str(), "wb");
-    else {
-        SetErrorString("BamFile::Open", "unknown open mode requested");
-        return false;
-    }
-
-    // check that we obtained a valid FILE*
-    if ( m_stream == 0 ) {
-        const string message_base = string("could not open file handle for ");
-        const string message = message_base + ( (m_filename.empty()) ? "empty filename" : m_filename );
-        SetErrorString("BamFile::Open", message);
-        return false;
-    }
-
-    // store current IO mode & return success
-    m_mode = mode;
-    return true;
-}
-
-bool BamFile::Seek(const int64_t& position) {
-    BT_ASSERT_X( m_stream, "BamFile::Seek() - null stream" );
-    return ( fseek64(m_stream, position, SEEK_SET) == 0 );
-}
diff --git a/src/api/internal/BamFile_p.h b/src/api/internal/BamFile_p.h

deleted file mode 100644 (file)

index 873e71a..0000000
--- a/src/api/internal/BamFile_p.h
+++ /dev/null
@@ -1,51 +0,0 @@
-// ***************************************************************************
-// BamFile_p.h (c) 2011 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides BAM file-specific IO behavior
-// ***************************************************************************
-
-#ifndef BAMFILE_P_H
-#define BAMFILE_P_H
-
-//  -------------
-//  W A R N I N G
-//  -------------
-//
-// This file is not part of the BamTools API.  It exists purely as an
-// implementation detail. This header file may change from version to version
-// without notice, or even be removed.
-//
-// We mean it.
-
-#include "api/internal/ILocalIODevice_p.h"
-#include <string>
-
-namespace BamTools {
-namespace Internal {
-
-class BamFile : public ILocalIODevice {
-
-    // ctor & dtor
-    public:
-        BamFile(const std::string& filename);
-        ~BamFile(void);
-
-    // ILocalIODevice implementation
-    public:
-        void Close(void);
-        bool IsRandomAccess(void) const;
-        bool Open(const IBamIODevice::OpenMode mode);
-        bool Seek(const int64_t& position);
-
-    // data members
-    private:
-        std::string m_filename;
-};
-
-} // namespace Internal
-} // namespace BamTools
-
-#endif // BAMFILE_P_H
diff --git a/src/api/internal/BamFtp_p.cpp b/src/api/internal/BamFtp_p.cpp

deleted file mode 100644 (file)

index 779d099..0000000
--- a/src/api/internal/BamFtp_p.cpp
+++ /dev/null
@@ -1,56 +0,0 @@
-// ***************************************************************************
-// BamFtp_p.cpp (c) 2011 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides reading/writing of BAM files on FTP server
-// ***************************************************************************
-
-#include "api/internal/BamFtp_p.h"
-using namespace BamTools;
-using namespace BamTools::Internal;
-
-using namespace std;
-
-BamFtp::BamFtp(const string& url)
-    : IBamIODevice()
-{
-    BT_ASSERT_X(false, "BamFtp not yet implemented");
-}
-
-BamFtp::~BamFtp(void) { }
-
-void BamFtp::Close(void) {
-    return ;
-}
-
-bool BamFtp::IsRandomAccess(void) const {
-    return true;
-}
-
-bool BamFtp::Open(const IBamIODevice::OpenMode mode) {
-    (void) mode;
-    return true;
-}
-
-size_t BamFtp::Read(char* data, const unsigned int numBytes) {
-    (void)data;
-    (void)numBytes;
-    return 0;
-}
-
-bool BamFtp::Seek(const int64_t& position) {
-    (void)position;
-    return true;
-}
-
-int64_t BamFtp::Tell(void) const {
-    return -1;
-}
-
-size_t BamFtp::Write(const char* data, const unsigned int numBytes) {
-    (void)data;
-    (void)numBytes;
-    return 0;
-}
diff --git a/src/api/internal/BamFtp_p.h b/src/api/internal/BamFtp_p.h

deleted file mode 100644 (file)

index 1f5ee0f..0000000
--- a/src/api/internal/BamFtp_p.h
+++ /dev/null
@@ -1,56 +0,0 @@
-// ***************************************************************************
-// BamFtp_p.h (c) 2011 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides reading/writing of BAM files on FTP server
-// ***************************************************************************
-
-#ifndef BAMFTP_P_H
-#define BAMFTP_P_H
-
-//  -------------
-//  W A R N I N G
-//  -------------
-//
-// This file is not part of the BamTools API.  It exists purely as an
-// implementation detail. This header file may change from version to version
-// without notice, or even be removed.
-//
-// We mean it.
-
-#include "api/IBamIODevice.h"
-#include <string>
-
-namespace BamTools {
-namespace Internal {
-
-class BamFtp : public IBamIODevice {
-
-    // ctor & dtor
-    public:
-        BamFtp(const std::string& url);
-        ~BamFtp(void);
-
-    // IBamIODevice implementation
-    public:
-        void Close(void);
-        bool IsRandomAccess(void) const;
-        bool Open(const IBamIODevice::OpenMode mode);
-        size_t Read(char* data, const unsigned int numBytes);
-        bool Seek(const int64_t& position);
-        int64_t Tell(void) const;
-        size_t Write(const char* data, const unsigned int numBytes);
-
-    // internal methods
-    private:
-
-    // data members
-    private:
-};
-
-} // namespace Internal
-} // namespace BamTools
-
-#endif // BAMFTP_P_H
diff --git a/src/api/internal/BamHeader_p.cpp b/src/api/internal/BamHeader_p.cpp

deleted file mode 100644 (file)

index dc734bf..0000000
--- a/src/api/internal/BamHeader_p.cpp
+++ /dev/null
@@ -1,120 +0,0 @@
-// ***************************************************************************
-// BamHeader_p.cpp (c) 2010 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides the basic functionality for handling BAM headers.
-// ***************************************************************************
-
-#include "api/BamAux.h"
-#include "api/BamConstants.h"
-#include "api/internal/BamException_p.h"
-#include "api/internal/BamHeader_p.h"
-#include "api/internal/BgzfStream_p.h"
-using namespace BamTools;
-using namespace BamTools::Internal;
-
-#include <cstdlib>
-#include <cstring>
-using namespace std;
-
-// ------------------------
-// static utility methods
-// ------------------------
-
-static inline
-bool isValidMagicNumber(const char* buffer) {
-    return ( strncmp(buffer, Constants::BAM_HEADER_MAGIC,
-                     Constants::BAM_HEADER_MAGIC_LENGTH) == 0 );
-}
-
-// --------------------------
-// BamHeader implementation
-// --------------------------
-
-// ctor
-BamHeader::BamHeader(void) { }
-
-// dtor
-BamHeader::~BamHeader(void) { }
-
-// reads magic number from BGZF stream, returns true if valid
-void BamHeader::CheckMagicNumber(BgzfStream* stream) {
-
-    // try to read magic number
-    char buffer[Constants::BAM_HEADER_MAGIC_LENGTH];
-    const size_t numBytesRead = stream->Read(buffer, Constants::BAM_HEADER_MAGIC_LENGTH);
-    if ( numBytesRead != (int)Constants::BAM_HEADER_MAGIC_LENGTH )
-        throw BamException("BamHeader::CheckMagicNumber", "could not read magic number");
-
-    // validate magic number
-    if ( !isValidMagicNumber(buffer) )
-        throw BamException("BamHeader::CheckMagicNumber", "invalid magic number");
-}
-
-// clear SamHeader data
-void BamHeader::Clear(void) {
-    m_header.Clear();
-}
-
-// return true if SamHeader data is valid
-bool BamHeader::IsValid(void) const {
-    return m_header.IsValid();
-}
-
-// load BAM header ('magic number' and SAM header text) from BGZF stream
-void BamHeader::Load(BgzfStream* stream) {
-
-    // read & check magic number
-    CheckMagicNumber(stream);
-
-    // read header (length, then actual text)
-    uint32_t length(0);
-    ReadHeaderLength(stream, length);
-    ReadHeaderText(stream, length);
-}
-
-// reads SAM header text length from BGZF stream, stores it in @length
-void BamHeader::ReadHeaderLength(BgzfStream* stream, uint32_t& length) {
-
-    // read BAM header text length
-    char buffer[sizeof(uint32_t)];
-    const size_t numBytesRead = stream->Read(buffer, sizeof(uint32_t));
-    if ( numBytesRead != sizeof(uint32_t) )
-        throw BamException("BamHeader::ReadHeaderLength", "could not read header length");
-
-    // convert char buffer to length
-    length = BamTools::UnpackUnsignedInt(buffer);
-    if ( BamTools::SystemIsBigEndian() )
-        BamTools::SwapEndian_32(length);
-}
-
-// reads SAM header text from BGZF stream, stores in SamHeader object
-void BamHeader::ReadHeaderText(BgzfStream* stream, const uint32_t& length) {
-
-    // read header text
-    char* headerText = (char*)calloc(length + 1, 1);
-    const size_t bytesRead = stream->Read(headerText, length);
-
-    // if error reading, clean up buffer & throw
-    if ( bytesRead != length ) {
-        free(headerText);
-        throw BamException("BamHeader::ReadHeaderText", "could not read header text");
-    }
-
-    // otherwise, text was read OK
-    // store & cleanup
-    m_header.SetHeaderText( (string)((const char*)headerText) );
-    free(headerText);
-}
-
-// returns *copy* of SamHeader data object
-SamHeader BamHeader::ToSamHeader(void) const {
-    return m_header;
-}
-
-// returns SAM-formatted string of header data
-string BamHeader::ToString(void) const {
-    return m_header.ToString();
-}
diff --git a/src/api/internal/BamHeader_p.h b/src/api/internal/BamHeader_p.h

deleted file mode 100644 (file)

index 499ad96..0000000
--- a/src/api/internal/BamHeader_p.h
+++ /dev/null
@@ -1,69 +0,0 @@
-// ***************************************************************************
-// BamHeader_p.h (c) 2010 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides the basic functionality for handling BAM headers.
-// ***************************************************************************
-
-#ifndef BAMHEADER_P_H
-#define BAMHEADER_P_H
-
-//  -------------
-//  W A R N I N G
-//  -------------
-//
-// This file is not part of the BamTools API.  It exists purely as an
-// implementation detail. This header file may change from version to version
-// without notice, or even be removed.
-//
-// We mean it.
-
-#include "api/SamHeader.h"
-#include <string>
-
-namespace BamTools {
-namespace Internal {
-
-class BgzfStream;
-
-class BamHeader {
-
-    // ctor & dtor
-    public:
-        BamHeader(void);
-        ~BamHeader(void);
-
-    // BamHeader interface
-    public:
-        // clear SamHeader data
-        void Clear(void);
-        // return true if SamHeader data is valid
-        bool IsValid(void) const;
-        // load BAM header ('magic number' and SAM header text) from BGZF stream
-        // returns true if all OK
-        void Load(BgzfStream* stream);
-        // returns (editable) copy of SamHeader data object
-        SamHeader ToSamHeader(void) const;
-        // returns SAM-formatted string of header data
-        std::string ToString(void) const;
-
-    // internal methods
-    private:
-        // reads magic number from BGZF stream
-        void CheckMagicNumber(BgzfStream* stream);
-        // reads SAM header length from BGZF stream, stores it in @length
-        void ReadHeaderLength(BgzfStream* stream, uint32_t& length);
-        // reads SAM header text from BGZF stream, stores in SamHeader object
-        void ReadHeaderText(BgzfStream* stream, const uint32_t& length);
-
-    // data members
-    private:
-        SamHeader m_header;
-};
-
-} // namespace Internal
-} // namespace BamTools
-
-#endif // BAMHEADER_P_H
diff --git a/src/api/internal/BamHttp_p.cpp b/src/api/internal/BamHttp_p.cpp

deleted file mode 100644 (file)

index 83b4c3b..0000000
--- a/src/api/internal/BamHttp_p.cpp
+++ /dev/null
@@ -1,56 +0,0 @@
-// ***************************************************************************
-// BamHttp_p.cpp (c) 2011 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides reading/writing of BAM files on HTTP server
-// ***************************************************************************
-
-#include "api/internal/BamHttp_p.h"
-using namespace BamTools;
-using namespace BamTools::Internal;
-
-using namespace std;
-
-BamHttp::BamHttp(const string& url)
-    : IBamIODevice()
-{
-    BT_ASSERT_X(false, "BamHttp not yet implemented");
-}
-
-BamHttp::~BamHttp(void) { }
-
-void BamHttp::Close(void) {
-    return ;
-}
-
-bool BamHttp::IsRandomAccess(void) const {
-    return true;
-}
-
-bool BamHttp::Open(const IBamIODevice::OpenMode mode) {
-    (void) mode;
-    return true;
-}
-
-size_t BamHttp::Read(char* data, const unsigned int numBytes) {
-    (void)data;
-    (void)numBytes;
-    return 0;
-}
-
-bool BamHttp::Seek(const int64_t& position) {
-    (void)position;
-    return true;
-}
-
-int64_t BamHttp::Tell(void) const {
-    return -1;
-}
-
-size_t BamHttp::Write(const char* data, const unsigned int numBytes) {
-    (void)data;
-    (void)numBytes;
-    return 0;
-}
diff --git a/src/api/internal/BamHttp_p.h b/src/api/internal/BamHttp_p.h

deleted file mode 100644 (file)

index 38e94b7..0000000
--- a/src/api/internal/BamHttp_p.h
+++ /dev/null
@@ -1,56 +0,0 @@
-// ***************************************************************************
-// BamHttp_p.h (c) 2011 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides reading/writing of BAM files on HTTP server
-// ***************************************************************************
-
-#ifndef BAMHTTP_P_H
-#define BAMHTTP_P_H
-
-//  -------------
-//  W A R N I N G
-//  -------------
-//
-// This file is not part of the BamTools API.  It exists purely as an
-// implementation detail. This header file may change from version to version
-// without notice, or even be removed.
-//
-// We mean it.
-
-#include "api/IBamIODevice.h"
-#include <string>
-
-namespace BamTools {
-namespace Internal {
-
-class BamHttp : public IBamIODevice {
-
-    // ctor & dtor
-    public:
-        BamHttp(const std::string& url);
-        ~BamHttp(void);
-
-    // IBamIODevice implementation
-    public:
-        void Close(void);
-        bool IsRandomAccess(void) const;
-        bool Open(const IBamIODevice::OpenMode mode);
-        size_t Read(char* data, const unsigned int numBytes);
-        bool Seek(const int64_t& position);
-        int64_t Tell(void) const;
-        size_t Write(const char* data, const unsigned int numBytes);
-
-    // internal methods
-    private:
-
-    // data members
-    private:
-};
-
-} // namespace Internal
-} // namespace BamTools
-
-#endif // BAMHTTP_P_H
diff --git a/src/api/internal/BamIndexFactory_p.cpp b/src/api/internal/BamIndexFactory_p.cpp

deleted file mode 100644 (file)

index 2cf871f..0000000
--- a/src/api/internal/BamIndexFactory_p.cpp
+++ /dev/null
@@ -1,112 +0,0 @@
-// ***************************************************************************
-// BamIndexFactory_p.cpp (c) 2011 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides interface for generating BamIndex implementations
-// ***************************************************************************
-
-#include "api/BamAux.h"
-#include "api/internal/BamIndexFactory_p.h"
-#include "api/internal/BamStandardIndex_p.h"
-#include "api/internal/BamToolsIndex_p.h"
-using namespace BamTools;
-using namespace BamTools::Internal;
-using namespace std;
-
-// generates index filename from BAM filename (depending on requested type)
-// if type is unknown, returns empty string
-const string BamIndexFactory::CreateIndexFilename(const string& bamFilename,
-                                                  const BamIndex::IndexType& type)
-{
-    switch ( type ) {
-        case ( BamIndex::STANDARD ) : return ( bamFilename + BamStandardIndex::Extension() );
-        case ( BamIndex::BAMTOOLS ) : return ( bamFilename + BamToolsIndex::Extension() );
-        default :
-            return string();
-    }
-}
-
-// creates a new BamIndex object, depending on extension of @indexFilename
-BamIndex* BamIndexFactory::CreateIndexFromFilename(const string& indexFilename, BamReaderPrivate* reader) {
-
-    // if file doesn't exist, return null index
-    if ( !BamTools::FileExists(indexFilename) )
-        return 0;
-
-    // get file extension from index filename, including dot (".EXT")
-    // if can't get file extension, return null index
-    const string extension = FileExtension(indexFilename);
-    if ( extension.empty() )
-        return 0;
-
-    // create index based on extension
-    if      ( extension == BamStandardIndex::Extension() ) return new BamStandardIndex(reader);
-    else if ( extension == BamToolsIndex::Extension()    ) return new BamToolsIndex(reader);
-    else
-        return 0;
-}
-
-// creates a new BamIndex, object of requested @type
-BamIndex* BamIndexFactory::CreateIndexOfType(const BamIndex::IndexType& type,
-                                             BamReaderPrivate* reader)
-{
-    switch ( type ) {
-        case ( BamIndex::STANDARD ) : return new BamStandardIndex(reader);
-        case ( BamIndex::BAMTOOLS ) : return new BamToolsIndex(reader);
-        default :
-            return 0;
-    }
-}
-
-// retrieves file extension (including '.')
-const string BamIndexFactory::FileExtension(const string& filename) {
-
-    // if filename cannot contain valid path + extension, return empty string
-    if ( filename.empty() || filename.length() <= 4 )
-        return string();
-
-    // look for last dot in filename
-    const size_t lastDotPosition = filename.find_last_of('.');
-
-    // if none found, return empty string
-    if ( lastDotPosition == string::npos )
-        return string();
-
-    // return substring from last dot position
-    return filename.substr(lastDotPosition);
-}
-
-// returns name of existing index file that corresponds to @bamFilename
-// will defer to @preferredType if possible, if not will attempt to load any supported type
-// returns empty string if not found
-const string BamIndexFactory::FindIndexFilename(const string& bamFilename,
-                                                const BamIndex::IndexType& preferredType)
-{
-    // skip if BAM filename provided is empty
-    if ( bamFilename.empty() )
-        return string();
-
-    // try to find index of preferred type first
-    // return index filename if found
-    string indexFilename = CreateIndexFilename(bamFilename, preferredType);
-    if ( !indexFilename.empty() && BamTools::FileExists(indexFilename) )
-        return indexFilename;
-
-    // couldn't find preferred type, try the other supported types
-    // return index filename if found
-    if ( preferredType != BamIndex::STANDARD ) {
-        indexFilename = CreateIndexFilename(bamFilename, BamIndex::STANDARD);
-        if ( !indexFilename.empty() && BamTools::FileExists(indexFilename) )
-            return indexFilename;
-    }
-    if ( preferredType != BamIndex::BAMTOOLS ) {
-        indexFilename = CreateIndexFilename(bamFilename, BamIndex::BAMTOOLS);
-        if ( !indexFilename.empty() && BamTools::FileExists(indexFilename) )
-            return indexFilename;
-    }
-
-    // otherwise couldn't find any index matching this filename
-    return string();
-}
diff --git a/src/api/internal/BamIndexFactory_p.h b/src/api/internal/BamIndexFactory_p.h

deleted file mode 100644 (file)

index 4e4f1cf..0000000
--- a/src/api/internal/BamIndexFactory_p.h
+++ /dev/null
@@ -1,49 +0,0 @@
-// ***************************************************************************
-// BamIndexFactory_p.h (c) 2011 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides interface for generating BamIndex implementations
-// ***************************************************************************
-
-#ifndef BAMINDEX_FACTORY_P_H
-#define BAMINDEX_FACTORY_P_H
-
-#include "api/BamIndex.h"
-#include <string>
-
-namespace BamTools {
-namespace Internal {
-
-class BamIndexFactory {
-
-    // static interface methods
-    public:
-        // creates a new BamIndex object, depending on extension of @indexFilename
-        static BamIndex* CreateIndexFromFilename(const std::string& indexFilename,
-                                                 BamReaderPrivate* reader);
-        // creates a new BamIndex object, of requested @type
-        static BamIndex* CreateIndexOfType(const BamIndex::IndexType& type,
-                                           BamReaderPrivate* reader);
-        // returns name of existing index file that corresponds to @bamFilename
-        // will defer to @preferredType if possible
-        // if @preferredType not found, will attempt to load any supported index type
-        // returns empty string if no index file (of any type) is found
-        static const std::string FindIndexFilename(const std::string& bamFilename,
-                                                   const BamIndex::IndexType& preferredType);
-
-    // internal methods
-    public:
-        // generates index filename from BAM filename (depending on requested type)
-        // if type is unknown, returns empty string
-        static const std::string CreateIndexFilename(const std::string& bamFilename,
-                                                     const BamIndex::IndexType& type);
-        // retrieves file extension (including '.')
-        static const std::string FileExtension(const std::string& filename);
-};
-
-} // namespace Internal
-} // namespace BamTools
-
-#endif // BAMINDEX_FACTORY_P_H
diff --git a/src/api/internal/BamMultiMerger_p.h b/src/api/internal/BamMultiMerger_p.h

deleted file mode 100644 (file)

index 3000097..0000000
--- a/src/api/internal/BamMultiMerger_p.h
+++ /dev/null
@@ -1,266 +0,0 @@
-// ***************************************************************************
-// BamMultiMerger_p.h (c) 2010 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides merging functionality for BamMultiReader.  At this point, supports
-// sorting results by (refId, position) or by read name.
-// ***************************************************************************
-
-#ifndef BAMMULTIMERGER_P_H
-#define BAMMULTIMERGER_P_H
-
-//  -------------
-//  W A R N I N G
-//  -------------
-//
-// This file is not part of the BamTools API.  It exists purely as an
-// implementation detail. This header file may change from version to version
-// without notice, or even be removed.
-//
-// We mean it.
-
-#include "api/BamAlignment.h"
-#include "api/BamReader.h"
-#include "api/algorithms/Sort.h"
-#include <deque>
-#include <functional>
-#include <set>
-#include <string>
-
-namespace BamTools {
-namespace Internal {
-
-struct MergeItem {
-
-    // data members
-    BamReader*    Reader;
-    BamAlignment* Alignment;
-
-    // ctors & dtor
-    MergeItem(BamReader* reader = 0,
-              BamAlignment* alignment = 0)
-        : Reader(reader)
-        , Alignment(alignment)
-    { }
-
-    MergeItem(const MergeItem& other)
-        : Reader(other.Reader)
-        , Alignment(other.Alignment)
-    { }
-
-    ~MergeItem(void) { }
-};
-
-template<typename Compare>
-struct MergeItemSorter : public std::binary_function<MergeItem, MergeItem, bool> {
-
-    public:
-        MergeItemSorter(const Compare& comp = Compare())
-            : m_comp(comp)
-        { }
-
-        bool operator()(const MergeItem& lhs, const MergeItem& rhs) {
-            const BamAlignment& l = *lhs.Alignment;
-            const BamAlignment& r = *rhs.Alignment;
-            return m_comp(l,r);
-        }
-
-    private:
-        Compare m_comp;
-};
-
-// pure ABC so we can just work polymorphically with any specific merger implementation
-class IMultiMerger {
-
-    public:
-        IMultiMerger(void) { }
-        virtual ~IMultiMerger(void) { }
-    public:
-        virtual void Add(MergeItem item) =0;
-        virtual void Clear(void) =0;
-        virtual const MergeItem& First(void) const =0;
-        virtual bool IsEmpty(void) const =0;
-        virtual void Remove(BamReader* reader) =0;
-        virtual int Size(void) const =0;
-        virtual MergeItem TakeFirst(void) =0;
-};
-
-// general merger
-template<typename Compare>
-class MultiMerger : public IMultiMerger {
-
-    public:
-        typedef Compare                      CompareType;
-        typedef MergeItemSorter<CompareType> MergeType;
-
-    public:
-        explicit MultiMerger(const Compare& comp = Compare())
-            : IMultiMerger()
-            , m_data( MergeType(comp) )
-        { }
-        ~MultiMerger(void) { }
-
-    public:
-        void Add(MergeItem item);
-        void Clear(void);
-        const MergeItem& First(void) const;
-        bool IsEmpty(void) const;
-        void Remove(BamReader* reader);
-        int Size(void) const;
-        MergeItem TakeFirst(void);
-
-    private:
-        typedef MergeItem                              ValueType;
-        typedef std::multiset<ValueType, MergeType>    ContainerType;
-        typedef typename ContainerType::iterator       DataIterator;
-        typedef typename ContainerType::const_iterator DataConstIterator;
-        ContainerType m_data;
-};
-
-template <typename Compare>
-inline void MultiMerger<Compare>::Add(MergeItem item) {
-
-    // N.B. - any future custom Compare types must define this method
-    //        see algorithms/Sort.h
-
-    if ( CompareType::UsesCharData() )
-        item.Alignment->BuildCharData();
-    m_data.insert(item);
-}
-
-template <typename Compare>
-inline void MultiMerger<Compare>::Clear(void) {
-    m_data.clear();
-}
-
-template <typename Compare>
-inline const MergeItem& MultiMerger<Compare>::First(void) const {
-    const ValueType& entry = (*m_data.begin());
-    return entry;
-}
-
-template <typename Compare>
-inline bool MultiMerger<Compare>::IsEmpty(void) const {
-    return m_data.empty();
-}
-template <typename Compare>
-inline void MultiMerger<Compare>::Remove(BamReader* reader) {
-
-    if ( reader == 0 ) return;
-    const std::string& filenameToRemove = reader->GetFilename();
-
-    // iterate over readers in cache
-    DataIterator dataIter = m_data.begin();
-    DataIterator dataEnd  = m_data.end();
-    for ( ; dataIter != dataEnd; ++dataIter ) {
-        const MergeItem& item = (*dataIter);
-        const BamReader* itemReader = item.Reader;
-        if ( itemReader == 0 ) continue;
-
-        // remove iterator on match
-        if ( itemReader->GetFilename() == filenameToRemove ) {
-            m_data.erase(dataIter);
-            return;
-        }
-    }
-}
-template <typename Compare>
-inline int MultiMerger<Compare>::Size(void) const {
-    return m_data.size();
-}
-
-template <typename Compare>
-inline MergeItem MultiMerger<Compare>::TakeFirst(void) {
-    DataIterator firstIter = m_data.begin();
-    MergeItem    firstItem = (*firstIter);
-    m_data.erase(firstIter);
-    return firstItem;
-}
-
-// unsorted "merger"
-template<>
-class MultiMerger<Algorithms::Sort::Unsorted> : public IMultiMerger {
-
-    public:
-        explicit MultiMerger(const Algorithms::Sort::Unsorted& comp = Algorithms::Sort::Unsorted())
-            : IMultiMerger()
-        { }
-        ~MultiMerger(void) { }
-
-    public:
-        void Add(MergeItem item);
-        void Clear(void);
-        const MergeItem& First(void) const;
-        bool IsEmpty(void) const;
-        void Remove(BamReader* reader);
-        int Size(void) const;
-        MergeItem TakeFirst(void);
-
-    private:
-        typedef MergeItem                     ValueType;
-        typedef std::deque<ValueType>         ContainerType;
-        typedef ContainerType::iterator       DataIterator;
-        typedef ContainerType::const_iterator DataConstIterator;
-        ContainerType m_data;
-};
-
-inline
-void MultiMerger<Algorithms::Sort::Unsorted>::Add(MergeItem item) {
-    m_data.push_back(item);
-}
-
-inline
-void MultiMerger<Algorithms::Sort::Unsorted>::Clear(void) {
-    m_data.clear();
-}
-
-inline
-const MergeItem& MultiMerger<Algorithms::Sort::Unsorted>::First(void) const {
-    return m_data.front();
-}
-
-inline
-bool MultiMerger<Algorithms::Sort::Unsorted>::IsEmpty(void) const {
-    return m_data.empty();
-}
-
-inline
-void MultiMerger<Algorithms::Sort::Unsorted>::Remove(BamReader* reader) {
-
-    if ( reader == 0 ) return;
-    const std::string filenameToRemove = reader->GetFilename();
-
-    // iterate over readers in cache
-    DataIterator dataIter = m_data.begin();
-    DataIterator dataEnd  = m_data.end();
-    for ( ; dataIter != dataEnd; ++dataIter ) {
-        const MergeItem& item = (*dataIter);
-        const BamReader* itemReader = item.Reader;
-        if ( itemReader == 0 ) continue;
-
-        // remove iterator on match
-        if ( itemReader->GetFilename() == filenameToRemove ) {
-            m_data.erase(dataIter);
-            return;
-        }
-    }
-}
-
-inline
-int MultiMerger<Algorithms::Sort::Unsorted>::Size(void) const {
-    return m_data.size();
-}
-
-inline
-MergeItem MultiMerger<Algorithms::Sort::Unsorted>::TakeFirst(void) {
-    MergeItem firstItem = m_data.front();
-    m_data.pop_front();
-    return firstItem;
-}
-
-} // namespace Internal
-} // namespace BamTools
-
-#endif // BAMMULTIMERGER_P_H
diff --git a/src/api/internal/BamMultiReader_p.cpp b/src/api/internal/BamMultiReader_p.cpp

deleted file mode 100644 (file)

index 55ae615..0000000
--- a/src/api/internal/BamMultiReader_p.cpp
+++ /dev/null
@@ -1,799 +0,0 @@
-// ***************************************************************************
-// BamMultiReader_p.cpp (c) 2010 Derek Barnett, Erik Garrison
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 14 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Functionality for simultaneously reading multiple BAM files
-// *************************************************************************
-
-#include "api/BamAlignment.h"
-#include "api/BamMultiReader.h"
-#include "api/SamConstants.h"
-#include "api/algorithms/Sort.h"
-#include "api/internal/BamMultiReader_p.h"
-using namespace BamTools;
-using namespace BamTools::Internal;
-
-#include <algorithm>
-#include <fstream>
-#include <iostream>
-#include <iterator>
-#include <sstream>
-using namespace std;
-
-// ctor
-BamMultiReaderPrivate::BamMultiReaderPrivate(void)
-    : m_alignmentCache(0)
-{ }
-
-// dtor
-BamMultiReaderPrivate::~BamMultiReaderPrivate(void) {
-    Close();
-}
-
-// close all BAM files
-bool BamMultiReaderPrivate::Close(void) {
-
-    m_errorString.clear();
-
-    if ( CloseFiles(Filenames()) )
-        return true;
-    else {
-        const string currentError = m_errorString;
-        const string message = string("error encountered while closing all files: \n\t") + currentError;
-        SetErrorString("BamMultiReader::Close", message);
-        return false;
-    }
-}
-
-// close requested BAM file
-bool BamMultiReaderPrivate::CloseFile(const string& filename) {
-
-    m_errorString.clear();
-
-    vector<string> filenames(1, filename);
-    if ( CloseFiles(filenames) )
-        return true;
-    else {
-        const string currentError = m_errorString;
-        const string message = string("error while closing file: ") + filename + "\n" + currentError;
-        SetErrorString("BamMultiReader::CloseFile", message);
-        return false;
-    }
-}
-
-// close requested BAM files
-bool BamMultiReaderPrivate::CloseFiles(const vector<string>& filenames) {
-
-    bool errorsEncountered = false;
-    m_errorString.clear();
-
-    // iterate over filenames
-    vector<string>::const_iterator filesIter = filenames.begin();
-    vector<string>::const_iterator filesEnd  = filenames.end();
-    for ( ; filesIter != filesEnd; ++filesIter ) {
-        const string& filename = (*filesIter);
-        if ( filename.empty() ) continue;
-
-        // iterate over readers
-        vector<MergeItem>::iterator readerIter = m_readers.begin();
-        vector<MergeItem>::iterator readerEnd  = m_readers.end();
-        for ( ; readerIter != readerEnd; ++readerIter ) {
-            MergeItem& item = (*readerIter);
-            BamReader* reader = item.Reader;
-            if ( reader == 0 ) continue;
-
-            // if reader matches requested filename
-            if ( reader->GetFilename() == filename ) {
-
-                // remove reader's entry from alignment cache
-                m_alignmentCache->Remove(reader);
-
-                // clean up reader & its alignment
-                if ( !reader->Close() ) {
-                    m_errorString.append(1, '\t');
-                    m_errorString.append(reader->GetErrorString());
-                    m_errorString.append(1, '\n');
-                    errorsEncountered = true;
-                }
-                delete reader;
-                reader = 0;
-
-                // delete reader's alignment entry
-                BamAlignment* alignment = item.Alignment;
-                delete alignment;
-                alignment = 0;
-
-                // remove reader from reader list
-                m_readers.erase(readerIter);
-
-                // on match, just go on to next filename
-                // (no need to keep looking and item iterator is invalid now anyway)
-                break;
-            }
-        }
-    }
-
-    // make sure alignment cache is cleaned up if all readers closed
-    if ( m_readers.empty() && m_alignmentCache ) {
-        m_alignmentCache->Clear();
-        delete m_alignmentCache;
-        m_alignmentCache = 0;
-    }
-
-    // return whether all readers closed OK
-    return !errorsEncountered;
-}
-
-// creates index files for BAM files that don't have them
-bool BamMultiReaderPrivate::CreateIndexes(const BamIndex::IndexType& type) {
-
-    bool errorsEncountered = false;
-    m_errorString.clear();
-
-    // iterate over readers
-    vector<MergeItem>::iterator itemIter = m_readers.begin();
-    vector<MergeItem>::iterator itemEnd  = m_readers.end();
-    for ( ; itemIter != itemEnd; ++itemIter ) {
-        MergeItem& item = (*itemIter);
-        BamReader* reader = item.Reader;
-        if ( reader == 0 ) continue;
-
-        // if reader doesn't have an index, create one
-        if ( !reader->HasIndex() ) {
-            if ( !reader->CreateIndex(type) ) {
-                m_errorString.append(1, '\t');
-                m_errorString.append(reader->GetErrorString());
-                m_errorString.append(1, '\n');
-                errorsEncountered = true;
-            }
-        }
-    }
-
-    // check for errors encountered before returning success/fail
-    if ( errorsEncountered ) {
-        const string currentError = m_errorString;
-        const string message = string("error while creating index files: ") + "\n" + currentError;
-        SetErrorString("BamMultiReader::CreateIndexes", message);
-        return false;
-    } else
-        return true;
-}
-
-IMultiMerger* BamMultiReaderPrivate::CreateAlignmentCache(void) const {
-
-    // fetch SamHeader
-    SamHeader header = GetHeader();
-
-    // if BAM files are sorted by position
-    if ( header.SortOrder == Constants::SAM_HD_SORTORDER_COORDINATE )
-        return new MultiMerger<Algorithms::Sort::ByPosition>();
-
-    // if BAM files are sorted by read name
-    if ( header.SortOrder == Constants::SAM_HD_SORTORDER_QUERYNAME )
-        return new MultiMerger<Algorithms::Sort::ByName>();
-
-    // otherwise "unknown" or "unsorted", use unsorted merger and just read in
-    return new MultiMerger<Algorithms::Sort::Unsorted>();
-}
-
-const vector<string> BamMultiReaderPrivate::Filenames(void) const {
-
-    // init filename container
-    vector<string> filenames;
-    filenames.reserve( m_readers.size() );
-
-    // iterate over readers
-    vector<MergeItem>::const_iterator itemIter = m_readers.begin();
-    vector<MergeItem>::const_iterator itemEnd  = m_readers.end();
-    for ( ; itemIter != itemEnd; ++itemIter ) {
-        const MergeItem& item = (*itemIter);
-        const BamReader* reader = item.Reader;
-        if ( reader == 0 ) continue;
-
-        // store filename if not empty
-        const string& filename = reader->GetFilename();
-        if ( !filename.empty() )
-            filenames.push_back(filename);
-    }
-
-    // return result
-    return filenames;
-}
-
-string BamMultiReaderPrivate::GetErrorString(void) const {
-    return m_errorString;
-}
-
-SamHeader BamMultiReaderPrivate::GetHeader(void) const {
-    const string& text = GetHeaderText();
-    return SamHeader(text);
-}
-
-// makes a virtual, unified header for all the bam files in the multireader
-string BamMultiReaderPrivate::GetHeaderText(void) const {
-
-    // N.B. - right now, simply copies all header data from first BAM,
-    //        and then appends RG's from other BAM files
-    // TODO: make this more intelligent wrt other header lines/fields
-
-    // if no readers open
-    const size_t numReaders = m_readers.size();
-    if ( numReaders == 0 ) return string();
-
-    // retrieve first reader's header
-    const MergeItem& firstItem = m_readers.front();
-    const BamReader* reader = firstItem.Reader;
-    if ( reader == 0 ) return string();
-    SamHeader mergedHeader = reader->GetHeader();
-
-    // iterate over any remaining readers (skipping the first)
-    for ( size_t i = 1; i < numReaders; ++i ) {
-        const MergeItem& item = m_readers.at(i);
-        const BamReader* reader = item.Reader;
-        if ( reader == 0 ) continue;
-
-        // retrieve current reader's header
-        const SamHeader currentHeader = reader->GetHeader();
-
-        // append current reader's RG entries to merged header
-        // N.B. - SamReadGroupDictionary handles duplicate-checking
-        mergedHeader.ReadGroups.Add(currentHeader.ReadGroups);
-
-        // TODO: merge anything else??
-    }
-
-    // return stringified header
-    return mergedHeader.ToString();
-}
-
-// get next alignment among all files
-bool BamMultiReaderPrivate::GetNextAlignment(BamAlignment& al) {
-    return PopNextCachedAlignment(al, true);
-}
-
-// get next alignment among all files without parsing character data from alignments
-bool BamMultiReaderPrivate::GetNextAlignmentCore(BamAlignment& al) {
-    return PopNextCachedAlignment(al, false);
-}
-
-// ---------------------------------------------------------------------------------------
-//
-// NB: The following GetReferenceX() functions assume that we have identical
-// references for all BAM files.  We enforce this by invoking the
-// ValidateReaders() method to verify that our reference data is the same
-// across all files on Open - so we will not encounter a situation in which
-// there is a mismatch and we are still live.
-//
-// ---------------------------------------------------------------------------------------
-
-// returns the number of reference sequences
-int BamMultiReaderPrivate::GetReferenceCount(void) const {
-
-    // handle empty multireader
-    if ( m_readers.empty() ) return 0;
-
-    // return reference count from first reader
-    const MergeItem& item = m_readers.front();
-    const BamReader* reader = item.Reader;
-    if ( reader == 0 ) return 0;
-    else
-        return reader->GetReferenceCount();
-}
-
-// returns vector of reference objects
-const RefVector BamMultiReaderPrivate::GetReferenceData(void) const {
-
-    // handle empty multireader
-    if ( m_readers.empty() ) return RefVector();
-
-    // return reference data from first BamReader
-    const MergeItem& item = m_readers.front();
-    const BamReader* reader = item.Reader;
-    if ( reader == 0 ) return RefVector();
-    else
-        return reader->GetReferenceData();
-}
-
-// returns refID from reference name
-int BamMultiReaderPrivate::GetReferenceID(const string& refName) const {
-
-    // handle empty multireader
-    if ( m_readers.empty() ) return -1;
-
-    // return reference ID from first BamReader
-    const MergeItem& item = m_readers.front();
-    const BamReader* reader = item.Reader;
-    if ( reader == 0 ) return -1;
-    else
-        return reader->GetReferenceID(refName);
-}
-// ---------------------------------------------------------------------------------------
-
-// returns true if all readers have index data available
-// this is useful to indicate whether Jump() or SetRegion() are possible
-bool BamMultiReaderPrivate::HasIndexes(void) const {
-
-    // handle empty multireader
-    if ( m_readers.empty() )
-        return false;
-
-    bool result = true;
-
-    // iterate over readers
-    vector<MergeItem>::const_iterator readerIter = m_readers.begin();
-    vector<MergeItem>::const_iterator readerEnd  = m_readers.end();
-    for ( ; readerIter != readerEnd; ++readerIter ) {
-        const MergeItem& item = (*readerIter);
-        const BamReader* reader = item.Reader;
-        if ( reader  == 0 ) continue;
-
-        // see if current reader has index data
-        result &= reader->HasIndex();
-    }
-
-    return result;
-}
-
-// returns true if multireader has open readers
-bool BamMultiReaderPrivate::HasOpenReaders(void) {
-
-    // iterate over readers
-    vector<MergeItem>::const_iterator readerIter = m_readers.begin();
-    vector<MergeItem>::const_iterator readerEnd  = m_readers.end();
-    for ( ; readerIter != readerEnd; ++readerIter ) {
-        const MergeItem& item = (*readerIter);
-        const BamReader* reader = item.Reader;
-        if ( reader == 0 ) continue;
-
-        // return true whenever an open reader is found
-        if ( reader->IsOpen() ) return true;
-    }
-
-    // no readers open
-    return false;
-}
-
-// performs random-access jump using (refID, position) as a left-bound
-bool BamMultiReaderPrivate::Jump(int refID, int position) {
-
-    // NB: While it may make sense to track readers in which we can
-    // successfully Jump, in practice a failure of Jump means "no
-    // alignments here."  It makes sense to simply accept the failure,
-    // UpdateAlignments(), and continue.
-
-    // iterate over readers
-    vector<MergeItem>::iterator readerIter = m_readers.begin();
-    vector<MergeItem>::iterator readerEnd  = m_readers.end();
-    for ( ; readerIter != readerEnd; ++readerIter ) {
-        MergeItem& item = (*readerIter);
-        BamReader* reader = item.Reader;
-        if ( reader == 0 ) continue;
-
-        // jump in each BamReader to position of interest
-        reader->Jump(refID, position);
-    }
-
-    // returns status of cache update
-    return UpdateAlignmentCache();
-}
-
-// locate (& load) index files for BAM readers that don't already have one loaded
-bool BamMultiReaderPrivate::LocateIndexes(const BamIndex::IndexType& preferredType) {
-
-    bool errorsEncountered = false;
-    m_errorString.clear();
-
-    // iterate over readers
-    vector<MergeItem>::iterator readerIter = m_readers.begin();
-    vector<MergeItem>::iterator readerEnd  = m_readers.end();
-    for ( ; readerIter != readerEnd; ++readerIter ) {
-        MergeItem& item = (*readerIter);
-        BamReader* reader = item.Reader;
-        if ( reader == 0 ) continue;
-
-        // if reader has no index, try to locate one
-        if ( !reader->HasIndex() ) {
-            if ( !reader->LocateIndex(preferredType) ) {
-                m_errorString.append(1, '\t');
-                m_errorString.append(reader->GetErrorString());
-                m_errorString.append(1, '\n');
-                errorsEncountered = true;
-            }
-        }
-    }
-
-    // check for errors encountered before returning success/fail
-    if ( errorsEncountered ) {
-        const string currentError = m_errorString;
-        const string message = string("error while locating index files: ") + "\n" + currentError;
-        SetErrorString("BamMultiReader::LocatingIndexes", message);
-        return false;
-    } else
-        return true;
-}
-
-// opens BAM files
-bool BamMultiReaderPrivate::Open(const vector<string>& filenames) {
-
-    m_errorString.clear();
-
-    // put all current readers back at beginning (refreshes alignment cache)
-    if ( !Rewind() ) {
-        const string currentError = m_errorString;
-        const string message = string("unable to rewind existing readers: \n\t") + currentError;
-        SetErrorString("BamMultiReader::Open", message);
-        return false;
-    }
-
-    // iterate over filenames
-    bool errorsEncountered = false;
-    vector<string>::const_iterator filenameIter = filenames.begin();
-    vector<string>::const_iterator filenameEnd  = filenames.end();
-    for ( ; filenameIter != filenameEnd; ++filenameIter ) {
-        const string& filename = (*filenameIter);
-        if ( filename.empty() ) continue;
-
-        // attempt to open BamReader
-        BamReader* reader = new BamReader;
-        const bool readerOpened = reader->Open(filename);
-
-        // if opened OK, store it
-        if ( readerOpened )
-            m_readers.push_back( MergeItem(reader, new BamAlignment) );
-
-        // otherwise store error & clean up invalid reader
-        else {
-            m_errorString.append(1, '\t');
-            m_errorString += string("unable to open file: ") + filename;
-            m_errorString.append(1, '\n');
-            errorsEncountered = true;
-
-            delete reader;
-            reader = 0;
-        }
-    }
-
-    // check for errors while opening
-    if ( errorsEncountered ) {
-        const string currentError = m_errorString;
-        const string message = string("unable to open all files: \t\n") + currentError;
-        SetErrorString("BamMultiReader::Open", message);
-        return false;
-    }
-
-    // check for BAM file consistency
-    if ( !ValidateReaders() ) {
-        const string currentError = m_errorString;
-        const string message = string("unable to open inconsistent files: \t\n") + currentError;
-        SetErrorString("BamMultiReader::Open", message);
-        return false;
-    }
-
-    // update alignment cache
-    return UpdateAlignmentCache();
-}
-
-bool BamMultiReaderPrivate::OpenFile(const std::string& filename) {
-    vector<string> filenames(1, filename);
-    if ( Open(filenames) )
-        return true;
-    else {
-        const string currentError = m_errorString;
-        const string message = string("could not open file: ") + filename + "\n\t" + currentError;
-        SetErrorString("BamMultiReader::OpenFile", message);
-        return false;
-    }
-}
-
-bool BamMultiReaderPrivate::OpenIndexes(const vector<string>& indexFilenames) {
-
-    // TODO: This needs to be cleaner - should not assume same order.
-    //       And either way, shouldn't start at first reader.  Should start at
-    //       first reader without an index?
-
-    // make sure same number of index filenames as readers
-    if ( m_readers.size() != indexFilenames.size() ) {
-        const string message("size of index file list does not match current BAM file count");
-        SetErrorString("BamMultiReader::OpenIndexes", message);
-        return false;
-    }
-
-    bool errorsEncountered = false;
-    m_errorString.clear();
-
-    // iterate over BamReaders
-    vector<string>::const_iterator indexFilenameIter = indexFilenames.begin();
-    vector<string>::const_iterator indexFilenameEnd  = indexFilenames.end();
-    vector<MergeItem>::iterator readerIter = m_readers.begin();
-    vector<MergeItem>::iterator readerEnd  = m_readers.end();
-    for ( ; readerIter != readerEnd; ++readerIter ) {
-        MergeItem& item = (*readerIter);
-        BamReader* reader = item.Reader;
-
-        // open index filename on reader
-        if ( reader ) {
-            const string& indexFilename = (*indexFilenameIter);
-            if ( !reader->OpenIndex(indexFilename) ) {
-                m_errorString.append(1, '\t');
-                m_errorString += reader->GetErrorString();
-                m_errorString.append(1, '\n');
-                errorsEncountered = true;
-            }
-        }
-
-        // increment filename iterator, skip if no more index files to open
-        if ( ++indexFilenameIter == indexFilenameEnd )
-            break;
-    }
-
-    // return success/fail
-    if ( errorsEncountered ) {
-        const string currentError = m_errorString;
-        const string message = string("could not open all index files: \n\t") + currentError;
-        SetErrorString("BamMultiReader::OpenIndexes", message);
-        return false;
-    } else
-        return true;
-}
-
-bool BamMultiReaderPrivate::PopNextCachedAlignment(BamAlignment& al, const bool needCharData) {
-
-    // skip if no alignments available
-    if ( m_alignmentCache == 0 || m_alignmentCache->IsEmpty() )
-        return false;
-
-    // pop next merge item entry from cache
-    MergeItem item = m_alignmentCache->TakeFirst();
-    BamReader* reader = item.Reader;
-    BamAlignment* alignment = item.Alignment;
-    if ( reader == 0 || alignment == 0 )
-        return false;
-
-    // set char data if requested
-    if ( needCharData ) {
-        alignment->BuildCharData();
-        alignment->Filename = reader->GetFilename();
-    }
-
-    // store cached alignment into destination parameter (by copy)
-    al = *alignment;
-
-    // load next alignment from reader & store in cache
-    SaveNextAlignment(reader, alignment);
-    return true;
-}
-
-// returns BAM file pointers to beginning of alignment data & resets alignment cache
-bool BamMultiReaderPrivate::Rewind(void) {
-
-    // skip if no readers open
-    if ( m_readers.empty() )
-        return true;
-
-    // attempt to rewind files
-    if ( !RewindReaders() ) {
-        const string currentError = m_errorString;
-        const string message = string("could not rewind readers: \n\t") + currentError;
-        SetErrorString("BamMultiReader::Rewind", message);
-        return false;
-    }
-
-    // return status of cache update
-    return UpdateAlignmentCache();
-}
-
-// returns BAM file pointers to beginning of alignment data
-bool BamMultiReaderPrivate::RewindReaders(void) {
-
-    m_errorString.clear();
-    bool errorsEncountered = false;
-
-    // iterate over readers
-    vector<MergeItem>::iterator readerIter = m_readers.begin();
-    vector<MergeItem>::iterator readerEnd  = m_readers.end();
-    for ( ; readerIter != readerEnd; ++readerIter ) {
-        MergeItem& item = (*readerIter);
-        BamReader* reader = item.Reader;
-        if ( reader == 0 ) continue;
-
-        // attempt rewind on BamReader
-        if ( !reader->Rewind() ) {
-            m_errorString.append(1, '\t');
-            m_errorString.append( reader->GetErrorString() );
-            m_errorString.append(1, '\n');
-            errorsEncountered = true;
-        }
-    }
-
-    return !errorsEncountered;
-}
-
-void BamMultiReaderPrivate::SaveNextAlignment(BamReader* reader, BamAlignment* alignment) {
-
-    // if can read alignment from reader, store in cache
-    //
-    // N.B. - lazy building of alignment's char data - populated only:
-    //        automatically by alignment cache to maintain its sorting OR
-    //        on demand from client call to future call to GetNextAlignment()
-
-    if ( reader->GetNextAlignmentCore(*alignment) )
-        m_alignmentCache->Add( MergeItem(reader, alignment) );
-}
-
-void BamMultiReaderPrivate::SetErrorString(const string& where, const string& what) const {
-    static const string SEPARATOR = ": ";
-    m_errorString = where + SEPARATOR + what;
-}
-
-bool BamMultiReaderPrivate::SetRegion(const BamRegion& region) {
-
-    // NB: While it may make sense to track readers in which we can
-    // successfully SetRegion, In practice a failure of SetRegion means "no
-    // alignments here."  It makes sense to simply accept the failure,
-    // UpdateAlignments(), and continue.
-
-    // iterate over alignments
-    vector<MergeItem>::iterator readerIter = m_readers.begin();
-    vector<MergeItem>::iterator readerEnd  = m_readers.end();
-    for ( ; readerIter != readerEnd; ++readerIter ) {
-        MergeItem& item = (*readerIter);
-        BamReader* reader = item.Reader;
-        if ( reader == 0 ) continue;
-
-        // set region of interest
-        reader->SetRegion(region);
-    }
-
-    // return status of cache update
-    return UpdateAlignmentCache();
-}
-
-// updates our alignment cache
-bool BamMultiReaderPrivate::UpdateAlignmentCache(void) {
-
-    // create alignment cache if not created yet
-    if ( m_alignmentCache == 0 ) {
-        m_alignmentCache = CreateAlignmentCache();
-        if ( m_alignmentCache == 0 ) {
-            SetErrorString("BamMultiReader::UpdateAlignmentCache", "unable to create new alignment cache");
-            return false;
-        }
-    }
-
-    // clear any prior cache data
-    m_alignmentCache->Clear();
-
-    // iterate over readers
-    vector<MergeItem>::iterator readerIter = m_readers.begin();
-    vector<MergeItem>::iterator readerEnd  = m_readers.end();
-    for ( ; readerIter != readerEnd; ++readerIter ) {
-        MergeItem& item = (*readerIter);
-        BamReader* reader = item.Reader;
-        BamAlignment* alignment = item.Alignment;
-        if ( reader == 0 || alignment == 0 ) continue;
-
-        // save next alignment from each reader in cache
-        SaveNextAlignment(reader, alignment);
-    }
-
-    // if we get here, ok
-    return true;
-}
-
-// ValidateReaders checks that all the readers point to BAM files representing
-// alignments against the same set of reference sequences, and that the
-// sequences are identically ordered.  If these checks fail the operation of
-// the multireader is undefined, so we force program exit.
-bool BamMultiReaderPrivate::ValidateReaders(void) const {
-
-    m_errorString.clear();
-
-    // skip if 0 or 1 readers opened
-    if ( m_readers.empty() || (m_readers.size() == 1) )
-        return true;
-
-    // retrieve first reader
-    const MergeItem& firstItem = m_readers.front();
-    const BamReader* firstReader = firstItem.Reader;
-    if ( firstReader == 0 ) return false;
-
-    // retrieve first reader's header data
-    const SamHeader& firstReaderHeader = firstReader->GetHeader();
-    const string& firstReaderSortOrder = firstReaderHeader.SortOrder;
-
-    // retrieve first reader's reference data
-    const RefVector& firstReaderRefData = firstReader->GetReferenceData();
-    const int firstReaderRefCount = firstReader->GetReferenceCount();
-    const int firstReaderRefSize = firstReaderRefData.size();
-
-    // iterate over all readers
-    vector<MergeItem>::const_iterator readerIter = m_readers.begin();
-    vector<MergeItem>::const_iterator readerEnd  = m_readers.end();
-    for ( ; readerIter != readerEnd; ++readerIter ) {
-        const MergeItem& item = (*readerIter);
-        BamReader* reader = item.Reader;
-        if ( reader == 0 ) continue;
-
-        // get current reader's header data
-        const SamHeader& currentReaderHeader = reader->GetHeader();
-        const string& currentReaderSortOrder = currentReaderHeader.SortOrder;
-
-        // check compatible sort order
-        if ( currentReaderSortOrder != firstReaderSortOrder ) {
-            const string message = string("mismatched sort order in ") + reader->GetFilename() +
-                                   ", expected " + firstReaderSortOrder +
-                                   ", but found " + currentReaderSortOrder;
-            SetErrorString("BamMultiReader::ValidateReaders", message);
-            return false;
-        }
-
-        // get current reader's reference data
-        const RefVector currentReaderRefData = reader->GetReferenceData();
-        const int currentReaderRefCount = reader->GetReferenceCount();
-        const int currentReaderRefSize  = currentReaderRefData.size();
-
-        // init reference data iterators
-        RefVector::const_iterator firstRefIter   = firstReaderRefData.begin();
-        RefVector::const_iterator firstRefEnd    = firstReaderRefData.end();
-        RefVector::const_iterator currentRefIter = currentReaderRefData.begin();
-
-        // compare reference counts from BamReader ( & container size, in case of BR error)
-        if ( (currentReaderRefCount != firstReaderRefCount) ||
-             (firstReaderRefSize    != currentReaderRefSize) )
-        {
-            stringstream s("");
-            s << "mismatched reference count in " << reader->GetFilename()
-              << ", expected " << firstReaderRefCount
-              << ", but found " << currentReaderRefCount;
-            SetErrorString("BamMultiReader::ValidateReaders", s.str());
-            return false;
-        }
-
-        // this will be ok; we just checked above that we have identically-sized sets of references
-        // here we simply check if they are all, in fact, equal in content
-        while ( firstRefIter != firstRefEnd ) {
-            const RefData& firstRef   = (*firstRefIter);
-            const RefData& currentRef = (*currentRefIter);
-
-            // compare reference name & length
-            if ( (firstRef.RefName   != currentRef.RefName) ||
-                 (firstRef.RefLength != currentRef.RefLength) )
-            {
-                stringstream s("");
-                s << "mismatched references found in" << reader->GetFilename()
-                  << "expected: " << endl;
-
-                // print first reader's reference data
-                RefVector::const_iterator refIter = firstReaderRefData.begin();
-                RefVector::const_iterator refEnd  = firstReaderRefData.end();
-                for ( ; refIter != refEnd; ++refIter ) {
-                    const RefData& entry = (*refIter);
-                    stringstream s("");
-                    s << entry.RefName << " " << endl;
-                }
-
-                s << "but found: " << endl;
-
-                // print current reader's reference data
-                refIter = currentReaderRefData.begin();
-                refEnd  = currentReaderRefData.end();
-                for ( ; refIter != refEnd; ++refIter ) {
-                    const RefData& entry = (*refIter);
-                    s << entry.RefName << " " << entry.RefLength << endl;
-                }
-
-                SetErrorString("BamMultiReader::ValidateReaders", s.str());
-                return false;
-            }
-
-            // update iterators
-            ++firstRefIter;
-            ++currentRefIter;
-        }
-    }
-
-    // if we get here, everything checks out
-    return true;
-}
diff --git a/src/api/internal/BamMultiReader_p.h b/src/api/internal/BamMultiReader_p.h

deleted file mode 100644 (file)

index 9d001f5..0000000
--- a/src/api/internal/BamMultiReader_p.h
+++ /dev/null
@@ -1,99 +0,0 @@
-// ***************************************************************************
-// BamMultiReader_p.h (c) 2010 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Functionality for simultaneously reading multiple BAM files
-// *************************************************************************
-
-#ifndef BAMMULTIREADER_P_H
-#define BAMMULTIREADER_P_H
-
-//  -------------
-//  W A R N I N G
-//  -------------
-//
-// This file is not part of the BamTools API.  It exists purely as an
-// implementation detail. This header file may change from version to version
-// without notice, or even be removed.
-//
-// We mean it.
-
-#include "api/SamHeader.h"
-#include "api/BamMultiReader.h"
-#include "api/internal/BamMultiMerger_p.h"
-#include <string>
-#include <vector>
-
-namespace BamTools {
-namespace Internal {
-
-class BamMultiReaderPrivate {
-
-    // typedefs
-    public:
-        typedef std::pair<BamReader*, BamAlignment*> ReaderAlignment;
-
-    // constructor / destructor
-    public:
-        BamMultiReaderPrivate(void);
-        ~BamMultiReaderPrivate(void);
-
-    // public interface
-    public:
-
-        // file operations
-        bool Close(void);
-        bool CloseFile(const std::string& filename);
-        const std::vector<std::string> Filenames(void) const;
-        bool Jump(int refID, int position = 0);
-        bool Open(const std::vector<std::string>& filenames);
-        bool OpenFile(const std::string& filename);
-        bool Rewind(void);
-        bool SetRegion(const BamRegion& region);
-
-        // access alignment data
-        bool GetNextAlignment(BamAlignment& al);
-        bool GetNextAlignmentCore(BamAlignment& al);
-        bool HasOpenReaders(void);
-
-        // access auxiliary data
-        SamHeader GetHeader(void) const;
-        std::string GetHeaderText(void) const;
-        int GetReferenceCount(void) const;
-        const BamTools::RefVector GetReferenceData(void) const;
-        int GetReferenceID(const std::string& refName) const;
-
-        // BAM index operations
-        bool CreateIndexes(const BamIndex::IndexType& type = BamIndex::STANDARD);
-        bool HasIndexes(void) const;
-        bool LocateIndexes(const BamIndex::IndexType& preferredType = BamIndex::STANDARD);
-        bool OpenIndexes(const std::vector<std::string>& indexFilenames);
-
-        // error handling
-        std::string GetErrorString(void) const;
-
-    // 'internal' methods
-    public:
-
-        bool CloseFiles(const std::vector<std::string>& filenames);
-        IMultiMerger* CreateAlignmentCache(void) const;
-        bool PopNextCachedAlignment(BamAlignment& al, const bool needCharData);
-        bool RewindReaders(void);
-        void SaveNextAlignment(BamReader* reader, BamAlignment* alignment);
-        void SetErrorString(const std::string& where, const std::string& what) const; //
-        bool UpdateAlignmentCache(void);
-        bool ValidateReaders(void) const;
-
-    // data members
-    public:
-        std::vector<MergeItem> m_readers;
-        IMultiMerger* m_alignmentCache;
-        mutable std::string m_errorString;
-};
-
-} // namespace Internal
-} // namespace BamTools
-
-#endif // BAMMULTIREADER_P_H
diff --git a/src/api/internal/BamPipe_p.cpp b/src/api/internal/BamPipe_p.cpp

deleted file mode 100644 (file)

index e13ad7c..0000000
--- a/src/api/internal/BamPipe_p.cpp
+++ /dev/null
@@ -1,57 +0,0 @@
-// ***************************************************************************
-// BamPipe_p.cpp (c) 2011 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides BAM pipe-specific IO behavior
-// ***************************************************************************
-
-#include "api/internal/BamPipe_p.h"
-using namespace BamTools;
-using namespace BamTools::Internal;
-
-#include <cstdio>
-#include <iostream>
-using namespace std;
-
-BamPipe::BamPipe(void) : ILocalIODevice() { }
-
-BamPipe::~BamPipe(void) { }
-
-bool BamPipe::IsRandomAccess(void) const {
-    return false;
-}
-
-bool BamPipe::Open(const IBamIODevice::OpenMode mode) {
-
-    // make sure we're starting with a fresh pipe
-    Close();
-
-    // open stdin/stdout depending on requested openmode
-    if ( mode == IBamIODevice::ReadOnly )
-        m_stream = freopen(0, "rb", stdin);
-    else if ( mode == IBamIODevice::WriteOnly )
-        m_stream = freopen(0, "wb", stdout);
-    else {
-        SetErrorString("BamPipe::Open", "unknown open mode requested");
-        return false;
-    }
-
-    // check that we obtained a valid FILE*
-    if ( m_stream == 0 ) {
-        const string message_base = string("could not open handle on ");
-        const string message = message_base + ( (mode == IBamIODevice::ReadOnly) ? "stdin" : "stdout" );
-        SetErrorString("BamPipe::Open", message);
-        return false;
-    }
-
-    // store current IO mode & return success
-    m_mode = mode;
-    return true;
-}
-
-bool BamPipe::Seek(const int64_t& ) {
-    SetErrorString("BamPipe::Seek", "random access not allowed in FIFO pipe");
-    return false;
-}
diff --git a/src/api/internal/BamPipe_p.h b/src/api/internal/BamPipe_p.h

deleted file mode 100644 (file)

index 8996766..0000000
--- a/src/api/internal/BamPipe_p.h
+++ /dev/null
@@ -1,46 +0,0 @@
-// ***************************************************************************
-// BamPipe_p.h (c) 2011 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides BAM pipe-specific IO behavior
-// ***************************************************************************
-
-#ifndef BAMPIPE_P_H
-#define BAMPIPE_P_H
-
-//  -------------
-//  W A R N I N G
-//  -------------
-//
-// This file is not part of the BamTools API.  It exists purely as an
-// implementation detail. This header file may change from version to version
-// without notice, or even be removed.
-//
-// We mean it.
-
-#include "api/internal/ILocalIODevice_p.h"
-#include <string>
-
-namespace BamTools {
-namespace Internal {
-
-class BamPipe : public ILocalIODevice {
-
-    // ctor & dtor
-    public:
-        BamPipe(void);
-        ~BamPipe(void);
-
-    // IBamIODevice implementation
-    public:
-        bool IsRandomAccess(void) const;
-        bool Open(const IBamIODevice::OpenMode mode);
-        bool Seek(const int64_t& position);
-};
-
-} // namespace Internal
-} // namespace BamTools
-
-#endif // BAMPIPE_P_H
diff --git a/src/api/internal/BamRandomAccessController_p.cpp b/src/api/internal/BamRandomAccessController_p.cpp

deleted file mode 100644 (file)

index c223ed7..0000000
--- a/src/api/internal/BamRandomAccessController_p.cpp
+++ /dev/null
@@ -1,289 +0,0 @@
-// ***************************************************************************
-// BamRandomAccessController_p.cpp (c) 2011 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011(DB)
-// ---------------------------------------------------------------------------
-// Manages random access operations in a BAM file
-// **************************************************************************
-
-#include "api/BamIndex.h"
-#include "api/internal/BamException_p.h"
-#include "api/internal/BamRandomAccessController_p.h"
-#include "api/internal/BamReader_p.h"
-#include "api/internal/BamIndexFactory_p.h"
-using namespace BamTools;
-using namespace BamTools::Internal;
-
-#include <cassert>
-#include <sstream>
-using namespace std;
-
-BamRandomAccessController::BamRandomAccessController(void)
-    : m_index(0)
-    , m_hasAlignmentsInRegion(true)
-{ }
-
-BamRandomAccessController::~BamRandomAccessController(void) {
-    Close();
-}
-
-void BamRandomAccessController::AdjustRegion(const int& referenceCount) {
-
-    // skip if no index available
-    if ( m_index == 0 )
-        return;
-
-    // see if any references in region have alignments
-    m_hasAlignmentsInRegion = false;
-    int currentId = m_region.LeftRefID;
-    const int rightBoundRefId = ( m_region.isRightBoundSpecified() ? m_region.RightRefID : referenceCount - 1 );
-    while ( currentId <= rightBoundRefId ) {
-        m_hasAlignmentsInRegion = m_index->HasAlignments(currentId);
-        if ( m_hasAlignmentsInRegion ) break;
-        ++currentId;
-    }
-
-    // if no data found on any reference in region
-    if ( !m_hasAlignmentsInRegion )
-        return;
-
-    // if left bound of desired region had no data, use first reference that had data
-    // otherwise, leave requested region as-is
-    if ( currentId != m_region.LeftRefID ) {
-        m_region.LeftRefID = currentId;
-        m_region.LeftPosition = 0;
-    }
-}
-
-// returns alignments' "RegionState": { Before|Overlaps|After } current region
-BamRandomAccessController::RegionState
-BamRandomAccessController::AlignmentState(const BamAlignment& alignment) const {
-
-    // if region has no left bound at all
-    if ( !m_region.isLeftBoundSpecified() )
-        return OverlapsRegion;
-
-    // handle unmapped reads - return AFTER region to halt processing
-    if ( alignment.RefID == -1 )
-        return AfterRegion;
-
-    // if alignment is on any reference before left bound reference
-    if ( alignment.RefID < m_region.LeftRefID )
-        return BeforeRegion;
-
-    // if alignment is on left bound reference
-    else if ( alignment.RefID == m_region.LeftRefID ) {
-
-        // if alignment starts at or after left bound position
-        if ( alignment.Position >= m_region.LeftPosition) {
-
-            if ( m_region.isRightBoundSpecified() &&             // right bound is specified AND
-                 m_region.LeftRefID == m_region.RightRefID &&    // left & right bounds on same reference AND
-                 alignment.Position >= m_region.RightPosition )  // alignment starts on or after right bound position
-                return AfterRegion;
-
-            // otherwise, alignment overlaps region
-            else return OverlapsRegion;
-        }
-
-        // alignment starts before left bound position
-        else {
-
-            // if alignment overlaps left bound position
-            if ( alignment.GetEndPosition() > m_region.LeftPosition )
-                return OverlapsRegion;
-            else
-                return BeforeRegion;
-        }
-    }
-
-    // otherwise alignment is on a reference after left bound reference
-    else {
-
-        // if region has a right bound
-        if ( m_region.isRightBoundSpecified() ) {
-
-            // alignment is on any reference between boundaries
-            if ( alignment.RefID < m_region.RightRefID )
-                return OverlapsRegion;
-
-            // alignment is on any reference after right boundary
-            else if ( alignment.RefID > m_region.RightRefID )
-                return AfterRegion;
-
-            // alignment is on right bound reference
-            else {
-
-                // if alignment starts before right bound position
-                if ( alignment.Position < m_region.RightPosition )
-                    return OverlapsRegion;
-                else
-                    return AfterRegion;
-            }
-        }
-
-        // otherwise, alignment starts after left bound and there is no right bound given
-        else return OverlapsRegion;
-    }
-}
-
-void BamRandomAccessController::Close(void) {
-    ClearIndex();
-    ClearRegion();
-}
-
-void BamRandomAccessController::ClearIndex(void) {
-    if ( m_index ) {
-        delete m_index;
-        m_index = 0;
-    }
-}
-
-void BamRandomAccessController::ClearRegion(void) {
-    m_region.clear();
-    m_hasAlignmentsInRegion = true;
-}
-
-bool BamRandomAccessController::CreateIndex(BamReaderPrivate* reader,
-                                            const BamIndex::IndexType& type)
-{
-    // skip if reader is invalid
-    assert(reader);
-    if ( !reader->IsOpen() ) {
-        SetErrorString("BamRandomAccessController::CreateIndex",
-                       "cannot create index for unopened reader");
-        return false;
-    }
-
-    // create new index of requested type
-    BamIndex* newIndex = BamIndexFactory::CreateIndexOfType(type, reader);
-    if ( newIndex == 0 ) {
-        stringstream s("");
-        s << "could not create index of type: " << type;
-        SetErrorString("BamRandomAccessController::CreateIndex", s.str());
-        return false;
-    }
-
-    // attempt to build index from current BamReader file
-    if ( !newIndex->Create() ) {
-        const string indexError = newIndex->GetErrorString();
-        const string message = "could not create index: \n\t" + indexError;
-        SetErrorString("BamRandomAccessController::CreateIndex", message);
-        return false;
-    }
-
-    // save new index & return success
-    SetIndex(newIndex);
-    return true;
-}
-
-string BamRandomAccessController::GetErrorString(void) const {
-    return m_errorString;
-}
-
-bool BamRandomAccessController::HasIndex(void) const {
-    return ( m_index != 0 );
-}
-
-bool BamRandomAccessController::HasRegion(void) const  {
-    return ( !m_region.isNull() );
-}
-
-bool BamRandomAccessController::IndexHasAlignmentsForReference(const int& refId) {
-    return m_index->HasAlignments(refId);
-}
-
-bool BamRandomAccessController::LocateIndex(BamReaderPrivate* reader,
-                                            const BamIndex::IndexType& preferredType)
-{
-    // look up index filename, deferring to preferredType if possible
-    assert(reader);
-    const string& indexFilename = BamIndexFactory::FindIndexFilename(reader->Filename(), preferredType);
-
-    // if no index file found (of any type)
-    if ( indexFilename.empty() ) {
-        const string message = string("could not find index file for:") + reader->Filename();
-        SetErrorString("BamRandomAccessController::LocateIndex", message);
-        return false;
-    }
-
-    // otherwise open & use index file that was found
-    return OpenIndex(indexFilename, reader);
-}
-
-bool BamRandomAccessController::OpenIndex(const string& indexFilename, BamReaderPrivate* reader) {
-
-    // attempt create new index of type based on filename
-    BamIndex* index = BamIndexFactory::CreateIndexFromFilename(indexFilename, reader);
-    if ( index == 0 ) {
-        const string message = string("could not open index file: ") + indexFilename;
-        SetErrorString("BamRandomAccessController::OpenIndex", message);
-        return false;
-    }
-
-    // attempt to load data from index file
-    if ( !index->Load(indexFilename) ) {
-        const string indexError = index->GetErrorString();
-        const string message = string("could not load index data from file: ") + indexFilename +
-                               "\n\t" + indexError;
-        SetErrorString("BamRandomAccessController::OpenIndex", message);
-        return false;
-    }
-
-    // save new index & return success
-    SetIndex(index);
-    return true;
-}
-
-bool BamRandomAccessController::RegionHasAlignments(void) const {
-    return m_hasAlignmentsInRegion;
-}
-
-void BamRandomAccessController::SetErrorString(const string& where, const string& what) {
-    m_errorString = where + ": " + what;
-}
-
-void BamRandomAccessController::SetIndex(BamIndex* index) {
-    if ( m_index )
-        ClearIndex();
-    m_index = index;
-}
-
-bool BamRandomAccessController::SetRegion(const BamRegion& region, const int& referenceCount) {
-
-    // store region
-    m_region = region;
-
-    // cannot jump when no index is available
-    if ( !HasIndex() ) {
-        SetErrorString("BamRandomAccessController", "cannot jump if no index data available");
-        return false;
-    }
-
-    // adjust region as necessary to reflect where data actually begins
-    AdjustRegion(referenceCount);
-
-    // if no data present, return true
-    //   * Not an error, but future attempts to access alignments in this region will not return data
-    //     Returning true is useful in a BamMultiReader setting where some BAM files may
-    //     lack alignments in regions where other files still have data available.
-    if ( !m_hasAlignmentsInRegion )
-        return true;
-
-    // return success/failure of jump to specified region,
-    //
-    //  * Index::Jump() is allowed to modify the m_hasAlignmentsInRegion flag
-    //    This covers 'corner case' where a region is requested that lies beyond the last
-    //    alignment on a reference. If this occurs, any subsequent calls to GetNextAlignment[Core]
-    //    will not return data. BamMultiReader will still be able to successfully pull alignments
-    //    from a region from other files even if this one has no data.
-    if ( !m_index->Jump(m_region, &m_hasAlignmentsInRegion) ) {
-        const string indexError = m_index->GetErrorString();
-        const string message = string("could not set region\n\t") + indexError;
-        SetErrorString("BamRandomAccessController::OpenIndex", message);
-        return false;
-    }
-    else
-        return true;
-}
diff --git a/src/api/internal/BamRandomAccessController_p.h b/src/api/internal/BamRandomAccessController_p.h

deleted file mode 100644 (file)

index 9262a61..0000000
--- a/src/api/internal/BamRandomAccessController_p.h
+++ /dev/null
@@ -1,94 +0,0 @@
-// ***************************************************************************
-// BamRandomAccessController_p.h (c) 2011 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011(DB)
-// ---------------------------------------------------------------------------
-// Manages random access operations in a BAM file
-// ***************************************************************************
-
-#ifndef BAMRACONTROLLER_P_H
-#define BAMRACONTROLLER_P_H
-
-//  -------------
-//  W A R N I N G
-//  -------------
-//
-// This file is not part of the BamTools API.  It exists purely as an
-// implementation detail. This header file may change from version to version
-// without notice, or even be removed.
-//
-// We mean it.
-
-#include "api/BamAux.h"
-#include "api/BamIndex.h"
-
-namespace BamTools {
-
-class BamAlignment;
-
-namespace Internal {
-
-class BamReaderPrivate;
-
-class BamRandomAccessController {
-
-    // enums
-    public: enum RegionState { BeforeRegion = 0
-                             , OverlapsRegion
-                             , AfterRegion
-                             };
-
-    // ctor & dtor
-    public:
-        BamRandomAccessController(void);
-        ~BamRandomAccessController(void);
-
-    // BamRandomAccessController interface
-    public:
-
-        // index methods
-        void ClearIndex(void);
-        bool CreateIndex(BamReaderPrivate* reader, const BamIndex::IndexType& type);
-        bool HasIndex(void) const;
-        bool IndexHasAlignmentsForReference(const int& refId);
-        bool LocateIndex(BamReaderPrivate* reader, const BamIndex::IndexType& preferredType);
-        bool OpenIndex(const std::string& indexFilename, BamReaderPrivate* reader);
-        void SetIndex(BamIndex* index);
-
-        // region methods
-        void ClearRegion(void);
-        bool HasRegion(void) const;
-        RegionState AlignmentState(const BamAlignment& alignment) const;
-        bool RegionHasAlignments(void) const;
-        bool SetRegion(const BamRegion& region, const int& referenceCount);
-
-        // general methods
-        void Close(void);
-        std::string GetErrorString(void) const;
-
-    // internal methods
-    private:
-        // adjusts requested region if necessary (depending on where data actually begins)
-        void AdjustRegion(const int& referenceCount);
-        // error-string handling
-        void SetErrorString(const std::string& where, const std::string& what);
-
-    // data members
-    private:
-
-        // index data
-        BamIndex* m_index;  // owns the index, not a copy - responsible for deleting
-
-        // region data
-        BamRegion m_region;
-        bool m_hasAlignmentsInRegion;
-
-        // general data
-        std::string m_errorString;
-};
-
-} // namespace Internal
-} // namespace BamTools
-
-#endif // BAMRACONTROLLER_P_H
diff --git a/src/api/internal/BamReader_p.cpp b/src/api/internal/BamReader_p.cpp

deleted file mode 100644 (file)

index a344358..0000000
--- a/src/api/internal/BamReader_p.cpp
+++ /dev/null
@@ -1,465 +0,0 @@
-// ***************************************************************************
-// BamReader_p.cpp (c) 2009 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 14 November 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides the basic functionality for reading BAM files
-// ***************************************************************************
-
-#include "api/BamConstants.h"
-#include "api/BamReader.h"
-#include "api/IBamIODevice.h"
-#include "api/internal/BamDeviceFactory_p.h"
-#include "api/internal/BamException_p.h"
-#include "api/internal/BamHeader_p.h"
-#include "api/internal/BamRandomAccessController_p.h"
-#include "api/internal/BamReader_p.h"
-#include "api/internal/BamStandardIndex_p.h"
-#include "api/internal/BamToolsIndex_p.h"
-using namespace BamTools;
-using namespace BamTools::Internal;
-
-#include <algorithm>
-#include <cassert>
-#include <iostream>
-#include <iterator>
-#include <vector>
-using namespace std;
-
-// constructor
-BamReaderPrivate::BamReaderPrivate(BamReader* parent)
-    : m_alignmentsBeginOffset(0)
-    , m_parent(parent)
-{
-    m_isBigEndian = BamTools::SystemIsBigEndian();
-}
-
-// destructor
-BamReaderPrivate::~BamReaderPrivate(void) {
-    Close();
-}
-
-// closes the BAM file
-bool BamReaderPrivate::Close(void) {
-
-    // clear BAM metadata
-    m_references.clear();
-    m_header.Clear();
-
-    // clear filename
-    m_filename.clear();
-
-    // close random access controller
-    m_randomAccessController.Close();
-
-    // if stream is open, attempt close
-    if ( IsOpen() ) {
-        try {
-            m_stream.Close();
-        } catch ( BamException& e ) {
-            const string streamError = e.what();
-            const string message = string("encountered error closing BAM file: \n\t") + streamError;
-            SetErrorString("BamReader::Close", message);
-            return false;
-        }
-    }
-
-    // return success
-    return true;
-}
-
-// creates an index file of requested type on current BAM file
-bool BamReaderPrivate::CreateIndex(const BamIndex::IndexType& type) {
-
-    // skip if BAM file not open
-    if ( !IsOpen() ) {
-        SetErrorString("BamReader::CreateIndex", "cannot create index on unopened BAM file");
-        return false;
-    }
-
-    // attempt to create index
-    if ( m_randomAccessController.CreateIndex(this, type) )
-        return true;
-    else {
-        const string bracError = m_randomAccessController.GetErrorString();
-        const string message = string("could not create index: \n\t") + bracError;
-        SetErrorString("BamReader::CreateIndex", message);
-        return false;
-    }
-}
-
-// return path & filename of current BAM file
-const string BamReaderPrivate::Filename(void) const {
-    return m_filename;
-}
-
-string BamReaderPrivate::GetErrorString(void) const {
-    return m_errorString;
-}
-
-// return header data as std::string
-string BamReaderPrivate::GetHeaderText(void) const {
-    return m_header.ToString();
-}
-
-// return header data as SamHeader object
-SamHeader BamReaderPrivate::GetSamHeader(void) const {
-    return m_header.ToSamHeader();
-}
-
-// get next alignment (with character data fully parsed)
-bool BamReaderPrivate::GetNextAlignment(BamAlignment& alignment) {
-
-    // if valid alignment found
-    if ( GetNextAlignmentCore(alignment) ) {
-
-        // store alignment's "source" filename
-        alignment.Filename = m_filename;
-
-        // return success/failure of parsing char data
-        if ( alignment.BuildCharData() )
-            return true;
-        else {
-            const string alError = alignment.GetErrorString();
-            const string message = string("could not populate alignment data: \n\t") + alError;
-            SetErrorString("BamReader::GetNextAlignment", message);
-            return false;
-        }
-    }
-
-    // no valid alignment found
-    return false;
-}
-
-// retrieves next available alignment core data (returns success/fail)
-// ** DOES NOT populate any character data fields (read name, bases, qualities, tag data, filename)
-//    these can be accessed, if necessary, from the supportData
-// useful for operations requiring ONLY positional or other alignment-related information
-bool BamReaderPrivate::GetNextAlignmentCore(BamAlignment& alignment) {
-
-    // skip if stream not opened
-    if ( !m_stream.IsOpen() )
-        return false;
-
-    try {
-
-        // skip if region is set but has no alignments
-        if ( m_randomAccessController.HasRegion() &&
-             !m_randomAccessController.RegionHasAlignments() )
-        {
-            return false;
-        }
-
-        // if can't read next alignment
-        if ( !LoadNextAlignment(alignment) )
-            return false;
-
-        // check alignment's region-overlap state
-        BamRandomAccessController::RegionState state = m_randomAccessController.AlignmentState(alignment);
-
-        // if alignment starts after region, no need to keep reading
-        if ( state == BamRandomAccessController::AfterRegion )
-            return false;
-
-        // read until overlap is found
-        while ( state != BamRandomAccessController::OverlapsRegion ) {
-
-            // if can't read next alignment
-            if ( !LoadNextAlignment(alignment) )
-                return false;
-
-            // check alignment's region-overlap state
-            state = m_randomAccessController.AlignmentState(alignment);
-
-            // if alignment starts after region, no need to keep reading
-            if ( state == BamRandomAccessController::AfterRegion )
-                return false;
-        }
-
-        // if we get here, we found the next 'valid' alignment
-        // (e.g. overlaps current region if one was set, simply the next alignment if not)
-        alignment.SupportData.HasCoreOnly = true;
-        return true;
-
-    } catch ( BamException& e ) {
-        const string streamError = e.what();
-        const string message = string("encountered error reading BAM alignment: \n\t") + streamError;
-        SetErrorString("BamReader::GetNextAlignmentCore", message);
-        return false;
-    }
-}
-
-int BamReaderPrivate::GetReferenceCount(void) const {
-    return m_references.size();
-}
-
-const RefVector& BamReaderPrivate::GetReferenceData(void) const {
-    return m_references;
-}
-
-// returns RefID for given RefName (returns References.size() if not found)
-int BamReaderPrivate::GetReferenceID(const string& refName) const {
-
-    // retrieve names from reference data
-    vector<string> refNames;
-    RefVector::const_iterator refIter = m_references.begin();
-    RefVector::const_iterator refEnd  = m_references.end();
-    for ( ; refIter != refEnd; ++refIter)
-        refNames.push_back( (*refIter).RefName );
-
-    // return 'index-of' refName (or -1 if not found)
-    int index = distance(refNames.begin(), find(refNames.begin(), refNames.end(), refName));
-    if ( index == (int)m_references.size() ) return -1;
-    else return index;
-}
-
-bool BamReaderPrivate::HasIndex(void) const {
-    return m_randomAccessController.HasIndex();
-}
-
-bool BamReaderPrivate::IsOpen(void) const {
-    return m_stream.IsOpen();
-}
-
-// load BAM header data
-void BamReaderPrivate::LoadHeaderData(void) {
-    m_header.Load(&m_stream);
-}
-
-// populates BamAlignment with alignment data under file pointer, returns success/fail
-bool BamReaderPrivate::LoadNextAlignment(BamAlignment& alignment) {
-
-    // read in the 'block length' value, make sure it's not zero
-    char buffer[sizeof(uint32_t)];
-    m_stream.Read(buffer, sizeof(uint32_t));
-    alignment.SupportData.BlockLength = BamTools::UnpackUnsignedInt(buffer);
-    if ( m_isBigEndian ) BamTools::SwapEndian_32(alignment.SupportData.BlockLength);
-    if ( alignment.SupportData.BlockLength == 0 )
-        return false;
-
-    // read in core alignment data, make sure the right size of data was read
-    char x[Constants::BAM_CORE_SIZE];
-    if ( m_stream.Read(x, Constants::BAM_CORE_SIZE) != Constants::BAM_CORE_SIZE )
-        return false;
-
-    // swap core endian-ness if necessary
-    if ( m_isBigEndian ) {
-        for ( unsigned int i = 0; i < Constants::BAM_CORE_SIZE; i+=sizeof(uint32_t) )
-            BamTools::SwapEndian_32p(&x[i]);
-    }
-
-    // set BamAlignment 'core' and 'support' data
-    alignment.RefID    = BamTools::UnpackSignedInt(&x[0]);
-    alignment.Position = BamTools::UnpackSignedInt(&x[4]);
-
-    unsigned int tempValue = BamTools::UnpackUnsignedInt(&x[8]);
-    alignment.Bin        = tempValue >> 16;
-    alignment.MapQuality = tempValue >> 8 & 0xff;
-    alignment.SupportData.QueryNameLength = tempValue & 0xff;
-
-    tempValue = BamTools::UnpackUnsignedInt(&x[12]);
-    alignment.AlignmentFlag = tempValue >> 16;
-    alignment.SupportData.NumCigarOperations = tempValue & 0xffff;
-
-    alignment.SupportData.QuerySequenceLength = BamTools::UnpackUnsignedInt(&x[16]);
-    alignment.MateRefID    = BamTools::UnpackSignedInt(&x[20]);
-    alignment.MatePosition = BamTools::UnpackSignedInt(&x[24]);
-    alignment.InsertSize   = BamTools::UnpackSignedInt(&x[28]);
-
-    // set BamAlignment length
-    alignment.Length = alignment.SupportData.QuerySequenceLength;
-
-    // read in character data - make sure proper data size was read
-    bool readCharDataOK = false;
-    const unsigned int dataLength = alignment.SupportData.BlockLength - Constants::BAM_CORE_SIZE;
-    RaiiBuffer allCharData(dataLength);
-
-    if ( m_stream.Read(allCharData.Buffer, dataLength) == dataLength ) {
-
-        // store 'allCharData' in supportData structure
-        alignment.SupportData.AllCharData.assign((const char*)allCharData.Buffer, dataLength);
-
-        // set success flag
-        readCharDataOK = true;
-
-        // save CIGAR ops
-        // need to calculate this here so that  BamAlignment::GetEndPosition() performs correctly,
-        // even when GetNextAlignmentCore() is called
-        const unsigned int cigarDataOffset = alignment.SupportData.QueryNameLength;
-        uint32_t* cigarData = (uint32_t*)(allCharData.Buffer + cigarDataOffset);
-        CigarOp op;
-        alignment.CigarData.clear();
-        alignment.CigarData.reserve(alignment.SupportData.NumCigarOperations);
-        for ( unsigned int i = 0; i < alignment.SupportData.NumCigarOperations; ++i ) {
-
-            // swap endian-ness if necessary
-            if ( m_isBigEndian ) BamTools::SwapEndian_32(cigarData[i]);
-
-            // build CigarOp structure
-            op.Length = (cigarData[i] >> Constants::BAM_CIGAR_SHIFT);
-            op.Type   = Constants::BAM_CIGAR_LOOKUP[ (cigarData[i] & Constants::BAM_CIGAR_MASK) ];
-
-            // save CigarOp
-            alignment.CigarData.push_back(op);
-        }
-    }
-
-    // return success/failure
-    return readCharDataOK;
-}
-
-// loads reference data from BAM file
-bool BamReaderPrivate::LoadReferenceData(void) {
-
-    // get number of reference sequences
-    char buffer[sizeof(uint32_t)];
-    m_stream.Read(buffer, sizeof(uint32_t));
-    uint32_t numberRefSeqs = BamTools::UnpackUnsignedInt(buffer);
-    if ( m_isBigEndian ) BamTools::SwapEndian_32(numberRefSeqs);
-    m_references.reserve((int)numberRefSeqs);
-
-    // iterate over all references in header
-    for ( unsigned int i = 0; i != numberRefSeqs; ++i ) {
-
-        // get length of reference name
-        m_stream.Read(buffer, sizeof(uint32_t));
-        uint32_t refNameLength = BamTools::UnpackUnsignedInt(buffer);
-        if ( m_isBigEndian ) BamTools::SwapEndian_32(refNameLength);
-        RaiiBuffer refName(refNameLength);
-
-        // get reference name and reference sequence length
-        m_stream.Read(refName.Buffer, refNameLength);
-        m_stream.Read(buffer, sizeof(int32_t));
-        int32_t refLength = BamTools::UnpackSignedInt(buffer);
-        if ( m_isBigEndian ) BamTools::SwapEndian_32(refLength);
-
-        // store data for reference
-        RefData aReference;
-        aReference.RefName   = (string)((const char*)refName.Buffer);
-        aReference.RefLength = refLength;
-        m_references.push_back(aReference);
-    }
-
-    // return success
-    return true;
-}
-
-bool BamReaderPrivate::LocateIndex(const BamIndex::IndexType& preferredType) {
-
-    if ( m_randomAccessController.LocateIndex(this, preferredType) )
-        return true;
-    else {
-        const string bracError = m_randomAccessController.GetErrorString();
-        const string message = string("could not locate index: \n\t") + bracError;
-        SetErrorString("BamReader::LocateIndex", message);
-        return false;
-    }
-}
-
-// opens BAM file (and index)
-bool BamReaderPrivate::Open(const string& filename) {
-
-    try {
-
-        // make sure we're starting with fresh state
-        Close();
-
-        // open BgzfStream
-        m_stream.Open(filename, IBamIODevice::ReadOnly);
-
-        // load BAM metadata
-        LoadHeaderData();
-        LoadReferenceData();
-
-        // store filename & offset of first alignment
-        m_filename = filename;
-        m_alignmentsBeginOffset = m_stream.Tell();
-
-        // return success
-        return true;
-
-    } catch ( BamException& e ) {
-        const string error = e.what();
-        const string message = string("could not open file: ") + filename +
-                               "\n\t" + error;
-        SetErrorString("BamReader::Open", message);
-        return false;
-    }
-}
-
-bool BamReaderPrivate::OpenIndex(const std::string& indexFilename) {
-
-    if ( m_randomAccessController.OpenIndex(indexFilename, this) )
-        return true;
-    else {
-        const string bracError = m_randomAccessController.GetErrorString();
-        const string message = string("could not open index: \n\t") + bracError;
-        SetErrorString("BamReader::OpenIndex", message);
-        return false;
-    }
-}
-
-// returns BAM file pointer to beginning of alignment data
-bool BamReaderPrivate::Rewind(void) {
-
-    // reset region
-    m_randomAccessController.ClearRegion();
-
-    // return status of seeking back to first alignment
-    if ( Seek(m_alignmentsBeginOffset) )
-        return true;
-    else {
-        const string currentError = m_errorString;
-        const string message = string("could not rewind: \n\t") + currentError;
-        SetErrorString("BamReader::Rewind", message);
-        return false;
-    }
-}
-
-bool BamReaderPrivate::Seek(const int64_t& position) {
-
-    // skip if BAM file not open
-    if ( !IsOpen() ) {
-        SetErrorString("BamReader::Seek", "cannot seek on unopened BAM file");
-        return false;
-    }
-
-    try {
-        m_stream.Seek(position);
-        return true;
-    }
-    catch ( BamException& e ) {
-        const string streamError = e.what();
-        const string message = string("could not seek in BAM file: \n\t") + streamError;
-        SetErrorString("BamReader::Seek", message);
-        return false;
-    }
-}
-
-void BamReaderPrivate::SetErrorString(const string& where, const string& what) {
-    static const string SEPARATOR = ": ";
-    m_errorString = where + SEPARATOR + what;
-}
-
-void BamReaderPrivate::SetIndex(BamIndex* index) {
-    m_randomAccessController.SetIndex(index);
-}
-
-// sets current region & attempts to jump to it
-// returns success/failure
-bool BamReaderPrivate::SetRegion(const BamRegion& region) {
-
-    if ( m_randomAccessController.SetRegion(region, m_references.size()) )
-        return true;
-    else {
-        const string bracError = m_randomAccessController.GetErrorString();
-        const string message = string("could not set region: \n\t") + bracError;
-        SetErrorString("BamReader::SetRegion", message);
-        return false;
-    }
-}
-
-int64_t BamReaderPrivate::Tell(void) const {
-    return m_stream.Tell();
-}
diff --git a/src/api/internal/BamReader_p.h b/src/api/internal/BamReader_p.h

deleted file mode 100644 (file)

index f928273..0000000
--- a/src/api/internal/BamReader_p.h
+++ /dev/null
@@ -1,118 +0,0 @@
-// ***************************************************************************
-// BamReader_p.h (c) 2010 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides the basic functionality for reading BAM files
-// ***************************************************************************
-
-#ifndef BAMREADER_P_H
-#define BAMREADER_P_H
-
-//  -------------
-//  W A R N I N G
-//  -------------
-//
-// This file is not part of the BamTools API.  It exists purely as an
-// implementation detail. This header file may change from version to version
-// without notice, or even be removed.
-//
-// We mean it.
-
-#include "api/BamAlignment.h"
-#include "api/BamIndex.h"
-#include "api/BamReader.h"
-#include "api/SamHeader.h"
-#include "api/internal/BamHeader_p.h"
-#include "api/internal/BamRandomAccessController_p.h"
-#include "api/internal/BgzfStream_p.h"
-#include <string>
-
-namespace BamTools {
-namespace Internal {
-
-class BamReaderPrivate {
-
-    // ctor & dtor
-    public:
-        BamReaderPrivate(BamReader* parent);
-        ~BamReaderPrivate(void);
-
-    // BamReader interface
-    public:
-
-        // file operations
-        bool Close(void);
-        const std::string Filename(void) const;
-        bool IsOpen(void) const;
-        bool Open(const std::string& filename);
-        bool Rewind(void);
-        bool SetRegion(const BamRegion& region);
-
-        // access alignment data
-        bool GetNextAlignment(BamAlignment& alignment);
-        bool GetNextAlignmentCore(BamAlignment& alignment);
-
-        // access auxiliary data
-        std::string GetHeaderText(void) const;
-        SamHeader GetSamHeader(void) const;
-        int GetReferenceCount(void) const;
-        const RefVector& GetReferenceData(void) const;
-        int GetReferenceID(const std::string& refName) const;
-
-        // index operations
-        bool CreateIndex(const BamIndex::IndexType& type);
-        bool HasIndex(void) const;
-        bool LocateIndex(const BamIndex::IndexType& preferredType);
-        bool OpenIndex(const std::string& indexFilename);
-        void SetIndex(BamIndex* index);
-
-        // error handling
-        std::string GetErrorString(void) const;
-        void SetErrorString(const std::string& where, const std::string& what);
-
-    // internal methods, but available as a BamReaderPrivate 'interface'
-    //
-    // these methods should only be used by BamTools::Internal classes
-    // (currently only used by the BamIndex subclasses)
-    public:
-        // retrieves header text from BAM file
-        void LoadHeaderData(void);
-        // retrieves BAM alignment under file pointer
-        // (does no overlap checking or character data parsing)
-        bool LoadNextAlignment(BamAlignment& alignment);
-        // builds reference data structure from BAM file
-        bool LoadReferenceData(void);
-        // seek reader to file position
-        bool Seek(const int64_t& position);
-        // return reader's file position
-        int64_t Tell(void) const;
-
-    // data members
-    public:
-
-        // general BAM file data
-        int64_t     m_alignmentsBeginOffset;
-        std::string m_filename;
-        RefVector   m_references;
-
-        // system data
-        bool m_isBigEndian;
-
-        // parent BamReader
-        BamReader* m_parent;
-
-        // BamReaderPrivate components
-        BamHeader m_header;
-        BamRandomAccessController m_randomAccessController;
-        BgzfStream m_stream;
-
-        // error handling
-        std::string m_errorString;
-};
-
-} // namespace Internal
-} // namespace BamTools
-
-#endif // BAMREADER_P_H
diff --git a/src/api/internal/BamStandardIndex_p.cpp b/src/api/internal/BamStandardIndex_p.cpp

deleted file mode 100644 (file)

index 8b23f74..0000000
--- a/src/api/internal/BamStandardIndex_p.cpp
+++ /dev/null
@@ -1,954 +0,0 @@
-// ***************************************************************************
-// BamStandardIndex.cpp (c) 2010 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides index operations for the standardized BAM index format (".bai")
-// ***************************************************************************
-
-#include "api/BamAlignment.h"
-#include "api/internal/BamException_p.h"
-#include "api/internal/BamReader_p.h"
-#include "api/internal/BamStandardIndex_p.h"
-using namespace BamTools;
-using namespace BamTools::Internal;
-
-#include <cstdio>
-#include <cstdlib>
-#include <cstring>
-#include <algorithm>
-#include <sstream>
-using namespace std;
-
-// -----------------------------------
-// static BamStandardIndex constants
-// -----------------------------------
-
-const int BamStandardIndex::MAX_BIN               = 37450;  // =(8^6-1)/7+1
-const int BamStandardIndex::BAM_LIDX_SHIFT        = 14;
-const string BamStandardIndex::BAI_EXTENSION      = ".bai";
-const char* const BamStandardIndex::BAI_MAGIC     = "BAI\1";
-const int BamStandardIndex::SIZEOF_ALIGNMENTCHUNK = sizeof(uint64_t)*2;
-const int BamStandardIndex::SIZEOF_BINCORE        = sizeof(uint32_t) + sizeof(int32_t);
-const int BamStandardIndex::SIZEOF_LINEAROFFSET   = sizeof(uint64_t);
-
-// ----------------------------
-// RaiiWrapper implementation
-// ----------------------------
-
-BamStandardIndex::RaiiWrapper::RaiiWrapper(void)
-    : IndexStream(0)
-    , Buffer(0)
-{ }
-
-BamStandardIndex::RaiiWrapper::~RaiiWrapper(void) {
-
-    if ( IndexStream ) {
-        fclose(IndexStream);
-        IndexStream = 0;
-    }
-
-    if ( Buffer ) {
-        delete[] Buffer;
-        Buffer = 0;
-    }
-}
-
-// ---------------------------------
-// BamStandardIndex implementation
-// ---------------------------------
-
-// ctor
-BamStandardIndex::BamStandardIndex(Internal::BamReaderPrivate* reader)
-    : BamIndex(reader)
-    , m_bufferLength(0)
-{
-     m_isBigEndian = BamTools::SystemIsBigEndian();
-}
-
-// dtor
-BamStandardIndex::~BamStandardIndex(void) {
-    CloseFile();
-}
-
-void BamStandardIndex::AdjustRegion(const BamRegion& region, uint32_t& begin, uint32_t& end) {
-
-    // retrieve references from reader
-    const RefVector& references = m_reader->GetReferenceData();
-
-    // LeftPosition cannot be greater than or equal to reference length
-    if ( region.LeftPosition >= references.at(region.LeftRefID).RefLength )
-        throw BamException("BamStandardIndex::AdjustRegion", "invalid region requested");
-
-    // set region 'begin'
-    begin = (unsigned int)region.LeftPosition;
-
-    // if right bound specified AND left&right bounds are on same reference
-    // OK to use right bound position as region 'end'
-    if ( region.isRightBoundSpecified() && ( region.LeftRefID == region.RightRefID ) )
-        end = (unsigned int)region.RightPosition;
-
-    // otherwise, set region 'end' to last reference base
-    else end = (unsigned int)references.at(region.LeftRefID).RefLength;
-}
-
-// [begin, end)
-void BamStandardIndex::CalculateCandidateBins(const uint32_t& begin,
-                                              const uint32_t& end,
-                                              set<uint16_t>& candidateBins)
-{
-    // initialize list, bin '0' is always a valid bin
-    candidateBins.insert(0);
-
-    // get rest of bins that contain this region
-    unsigned int k;
-    for (k =    1 + (begin>>26); k <=    1 + (end>>26); ++k) { candidateBins.insert(k); }
-    for (k =    9 + (begin>>23); k <=    9 + (end>>23); ++k) { candidateBins.insert(k); }
-    for (k =   73 + (begin>>20); k <=   73 + (end>>20); ++k) { candidateBins.insert(k); }
-    for (k =  585 + (begin>>17); k <=  585 + (end>>17); ++k) { candidateBins.insert(k); }
-    for (k = 4681 + (begin>>14); k <= 4681 + (end>>14); ++k) { candidateBins.insert(k); }
-}
-
-void BamStandardIndex::CalculateCandidateOffsets(const BaiReferenceSummary& refSummary,
-                                                 const uint64_t& minOffset,
-                                                 set<uint16_t>& candidateBins,
-                                                 vector<int64_t>& offsets)
-{
-    // seek to first bin
-    Seek(refSummary.FirstBinFilePosition, SEEK_SET);
-
-    // iterate over reference bins
-    uint32_t binId;
-    int32_t numAlignmentChunks;
-    set<uint16_t>::iterator candidateBinIter;
-    for ( int i = 0; i < refSummary.NumBins; ++i ) {
-
-        // read bin contents (if successful, alignment chunks are now in m_buffer)
-        ReadBinIntoBuffer(binId, numAlignmentChunks);
-
-        // see if bin is a 'candidate bin'
-        candidateBinIter = candidateBins.find(binId);
-
-        // if not, move on to next bin
-        if ( candidateBinIter == candidateBins.end() )
-            continue;
-
-        // otherwise, check bin's contents against for overlap
-        else {
-
-            size_t offset = 0;
-            uint64_t chunkStart;
-            uint64_t chunkStop;
-
-            // iterate over alignment chunks
-            for ( int j = 0; j < numAlignmentChunks; ++j ) {
-
-                // read chunk start & stop from buffer
-                memcpy((char*)&chunkStart, Resources.Buffer+offset, sizeof(uint64_t));
-                offset += sizeof(uint64_t);
-                memcpy((char*)&chunkStop, Resources.Buffer+offset, sizeof(uint64_t));
-                offset += sizeof(uint64_t);
-
-                // swap endian-ness if necessary
-                if ( m_isBigEndian ) {
-                    SwapEndian_64(chunkStart);
-                    SwapEndian_64(chunkStop);
-                }
-
-                // store alignment chunk's start offset
-                // if its stop offset is larger than our 'minOffset'
-                if ( chunkStop >= minOffset )
-                    offsets.push_back(chunkStart);
-            }
-
-            // 'pop' bin ID from candidate bins set
-            candidateBins.erase(candidateBinIter);
-
-            // quit if no more candidates
-            if ( candidateBins.empty() )
-                break;
-        }
-    }
-}
-
-uint64_t BamStandardIndex::CalculateMinOffset(const BaiReferenceSummary& refSummary,
-                                              const uint32_t& begin)
-{
-    // if no linear offsets exist, return 0
-    if ( refSummary.NumLinearOffsets == 0 )
-        return 0;
-
-    // if 'begin' starts beyond last linear offset, use the last linear offset as minimum
-    // else use the offset corresponding to the requested start position
-    const int shiftedBegin = begin>>BamStandardIndex::BAM_LIDX_SHIFT;
-    if ( shiftedBegin >= refSummary.NumLinearOffsets )
-        return LookupLinearOffset( refSummary, refSummary.NumLinearOffsets-1 );
-    else
-        return LookupLinearOffset( refSummary, shiftedBegin );
-}
-
-void BamStandardIndex::CheckBufferSize(char*& buffer,
-                                       unsigned int& bufferLength,
-                                       const unsigned int& requestedBytes)
-{
-    try {
-        if ( requestedBytes > bufferLength ) {
-            bufferLength = requestedBytes + 10;
-            delete[] buffer;
-            buffer = new char[bufferLength];
-        }
-    } catch ( std::bad_alloc&  ) {
-        stringstream s("");
-        s << "out of memory when allocating " << requestedBytes << " bytes";
-        throw BamException("BamStandardIndex::CheckBufferSize", s.str());
-    }
-}
-
-void BamStandardIndex::CheckBufferSize(unsigned char*& buffer,
-                                       unsigned int& bufferLength,
-                                       const unsigned int& requestedBytes)
-{
-    try {
-        if ( requestedBytes > bufferLength ) {
-            bufferLength = requestedBytes + 10;
-            delete[] buffer;
-            buffer = new unsigned char[bufferLength];
-        }
-    } catch ( std::bad_alloc& ) {
-        stringstream s("");
-        s << "out of memory when allocating " << requestedBytes << " bytes";
-        throw BamException("BamStandardIndex::CheckBufferSize", s.str());
-    }
-}
-
-void BamStandardIndex::CheckMagicNumber(void) {
-
-    // check 'magic number' to see if file is BAI index
-    char magic[4];
-    const size_t elementsRead = fread(magic, sizeof(char), 4, Resources.IndexStream);
-    if ( elementsRead != 4 )
-        throw BamException("BamStandardIndex::CheckMagicNumber", "could not read BAI magic number");
-
-    // compare to expected value
-    if ( strncmp(magic, BamStandardIndex::BAI_MAGIC, 4) != 0 )
-        throw BamException("BamStandardIndex::CheckMagicNumber", "invalid BAI magic number");
-}
-
-void BamStandardIndex::ClearReferenceEntry(BaiReferenceEntry& refEntry) {
-    refEntry.ID = -1;
-    refEntry.Bins.clear();
-    refEntry.LinearOffsets.clear();
-}
-
-void BamStandardIndex::CloseFile(void) {
-
-    // close file stream
-    if ( IsFileOpen() ) {
-        fclose(Resources.IndexStream);
-        Resources.IndexStream = 0;
-    }
-
-    // clear index file summary data
-    m_indexFileSummary.clear();
-
-    // clean up I/O buffer
-    delete[] Resources.Buffer;
-    Resources.Buffer = 0;
-    m_bufferLength = 0;
-}
-
-// builds index from associated BAM file & writes out to index file
-bool BamStandardIndex::Create(void) {
-
-    // skip if BamReader is invalid or not open
-    if ( m_reader == 0 || !m_reader->IsOpen() ) {
-        SetErrorString("BamStandardIndex::Create", "could not create index: reader is not open");
-        return false;
-    }
-
-    // rewind BamReader
-    if ( !m_reader->Rewind() ) {
-        const string readerError = m_reader->GetErrorString();
-        const string message = "could not create index: \n\t" + readerError;
-        SetErrorString("BamStandardIndex::Create", message);
-        return false;
-    }
-
-    try {
-
-        // open new index file (read & write)
-        string indexFilename = m_reader->Filename() + Extension();
-        OpenFile(indexFilename, "w+b");
-
-        // initialize BaiFileSummary with number of references
-        const int& numReferences = m_reader->GetReferenceCount();
-        ReserveForSummary(numReferences);
-
-        // initialize output file
-        WriteHeader();
-
-        // set up bin, ID, offset, & coordinate markers
-        const uint32_t defaultValue = 0xffffffffu;
-        uint32_t currentBin    = defaultValue;
-        uint32_t lastBin       = defaultValue;
-        int32_t  currentRefID  = defaultValue;
-        int32_t  lastRefID     = defaultValue;
-        uint64_t currentOffset = (uint64_t)m_reader->Tell();
-        uint64_t lastOffset    = currentOffset;
-        int32_t  lastPosition  = defaultValue;
-
-        // iterate through alignments in BAM file
-        BamAlignment al;
-        BaiReferenceEntry refEntry;
-        while ( m_reader->LoadNextAlignment(al) ) {
-
-            // changed to new reference
-            if ( lastRefID != al.RefID ) {
-
-                // if not first reference, save previous reference data
-                if ( lastRefID != (int32_t)defaultValue ) {
-
-                    SaveAlignmentChunkToBin(refEntry.Bins, currentBin, currentOffset, lastOffset);
-                    WriteReferenceEntry(refEntry);
-                    ClearReferenceEntry(refEntry);
-
-                    // write any empty references between (but *NOT* including) lastRefID & al.RefID
-                    for ( int i = lastRefID+1; i < al.RefID; ++i ) {
-                        BaiReferenceEntry emptyEntry(i);
-                        WriteReferenceEntry(emptyEntry);
-                    }
-
-                    // update bin markers
-                    currentOffset = lastOffset;
-                    currentBin    = al.Bin;
-                    lastBin       = al.Bin;
-                    currentRefID  = al.RefID;
-                }
-
-                // otherwise, this is first pass
-                // be sure to write any empty references up to (but *NOT* including) current RefID
-                else {
-                    for ( int i = 0; i < al.RefID; ++i ) {
-                        BaiReferenceEntry emptyEntry(i);
-                        WriteReferenceEntry(emptyEntry);
-                    }
-                }
-
-                // update reference markers
-                refEntry.ID = al.RefID;
-                lastRefID   = al.RefID;
-                lastBin     = defaultValue;
-            }
-
-            // if lastPosition greater than current alignment position - file not sorted properly
-            else if ( lastPosition > al.Position ) {
-                stringstream s("");
-                s << "BAM file is not properly sorted by coordinate" << endl
-                  << "Current alignment position: " << al.Position
-                  << " < previous alignment position: " << lastPosition
-                  << " on reference ID: " << al.RefID << endl;
-                SetErrorString("BamStandardIndex::Create", s.str());
-                return false;
-            }
-
-            // if alignment's ref ID is valid & its bin is not a 'leaf'
-            if ( (al.RefID >= 0) && (al.Bin < 4681) )
-                SaveLinearOffsetEntry(refEntry.LinearOffsets, al.Position, al.GetEndPosition(), lastOffset);
-
-            // changed to new BAI bin
-            if ( al.Bin != lastBin ) {
-
-                // if not first bin on reference, save previous bin data
-                if ( currentBin != defaultValue )
-                    SaveAlignmentChunkToBin(refEntry.Bins, currentBin, currentOffset, lastOffset);
-
-                // update markers
-                currentOffset = lastOffset;
-                currentBin    = al.Bin;
-                lastBin       = al.Bin;
-                currentRefID  = al.RefID;
-
-                // if invalid RefID, break out
-                if ( currentRefID < 0 )
-                    break;
-            }
-
-            // make sure that current file pointer is beyond lastOffset
-            if ( m_reader->Tell() <= (int64_t)lastOffset ) {
-                SetErrorString("BamStandardIndex::Create", "calculating offsets failed");
-                return false;
-            }
-
-            // update lastOffset & lastPosition
-            lastOffset   = m_reader->Tell();
-            lastPosition = al.Position;
-        }
-
-        // after finishing alignments, if any data was read, check:
-        if ( currentRefID >= 0 ) {
-
-            // store last alignment chunk to its bin, then write last reference entry with data
-            SaveAlignmentChunkToBin(refEntry.Bins, currentBin, currentOffset, lastOffset);
-            WriteReferenceEntry(refEntry);
-
-            // then write any empty references remaining at end of file
-            for ( int i = currentRefID+1; i < numReferences; ++i ) {
-                BaiReferenceEntry emptyEntry(i);
-                WriteReferenceEntry(emptyEntry);
-            }
-        }
-
-    } catch ( BamException& e) {
-        m_errorString = e.what();
-        return false;
-    }
-
-    // rewind BamReader
-    if ( !m_reader->Rewind() ) {
-        const string readerError = m_reader->GetErrorString();
-        const string message = "could not create index: \n\t" + readerError;
-        SetErrorString("BamStandardIndex::Create", message);
-        return false;
-    }
-
-    // return success
-    return true;
-}
-
-// returns format's file extension
-const string BamStandardIndex::Extension(void) {
-    return BamStandardIndex::BAI_EXTENSION;
-}
-
-void BamStandardIndex::GetOffset(const BamRegion& region, int64_t& offset, bool* hasAlignmentsInRegion) {
-
-    // cannot calculate offsets if unknown/invalid reference ID requested
-    if ( region.LeftRefID < 0 || region.LeftRefID >= (int)m_indexFileSummary.size() )
-        throw BamException("BamStandardIndex::GetOffset", "invalid reference ID requested");
-
-    // retrieve index summary for left bound reference
-    const BaiReferenceSummary& refSummary = m_indexFileSummary.at(region.LeftRefID);
-
-    // set up region boundaries based on actual BamReader data
-    uint32_t begin;
-    uint32_t end;
-    AdjustRegion(region, begin, end);
-
-    // retrieve all candidate bin IDs for region
-    set<uint16_t> candidateBins;
-    CalculateCandidateBins(begin, end, candidateBins);
-
-    // use reference's linear offsets to calculate the minimum offset
-    // that must be considered to find overlap
-    const uint64_t& minOffset = CalculateMinOffset(refSummary, begin);
-
-    // attempt to use reference summary, minOffset, & candidateBins to calculate offsets
-    // no data should not be error, just bail
-    vector<int64_t> offsets;
-    CalculateCandidateOffsets(refSummary, minOffset, candidateBins, offsets);
-    if ( offsets.empty() )
-        return;
-    
-    // ensure that offsets are sorted before processing
-    sort( offsets.begin(), offsets.end() );
-
-    // binary search for an overlapping block (may not be first one though)
-    BamAlignment al;
-    typedef vector<int64_t>::const_iterator OffsetConstIterator;
-    OffsetConstIterator offsetFirst = offsets.begin();
-    OffsetConstIterator offsetIter  = offsetFirst;
-    OffsetConstIterator offsetLast  = offsets.end();
-    iterator_traits<OffsetConstIterator>::difference_type count = distance(offsetFirst, offsetLast);
-    iterator_traits<OffsetConstIterator>::difference_type step;
-    while ( count > 0 ) {
-        offsetIter = offsetFirst;
-        step = count/2;
-        advance(offsetIter, step);
-
-        // attempt seek to candidate offset
-        const int64_t& candidateOffset = (*offsetIter);
-        if ( !m_reader->Seek(candidateOffset) ) {
-            const string readerError = m_reader->GetErrorString();
-            const string message = "could not seek in BAM file: \n\t" + readerError;
-            throw BamException("BamToolsIndex::GetOffset", message);
-        }
-
-        // load first available alignment, setting flag to true if data exists
-        *hasAlignmentsInRegion = m_reader->LoadNextAlignment(al);
-
-        // check alignment against region
-        if ( al.GetEndPosition() <= region.LeftPosition ) {
-            offsetFirst = ++offsetIter;
-            count -= step+1;
-        } else count = step;
-    }
-
-    // step back to the offset before the 'current offset' (to make sure we cover overlaps)
-    if ( offsetIter != offsets.begin() )
-        --offsetIter;
-    offset = (*offsetIter);
-}
-
-// returns whether reference has alignments or no
-bool BamStandardIndex::HasAlignments(const int& referenceID) const {
-    if ( referenceID < 0 || referenceID >= (int)m_indexFileSummary.size() )
-        return false;
-    const BaiReferenceSummary& refSummary = m_indexFileSummary.at(referenceID);
-    return ( refSummary.NumBins > 0 );
-}
-
-bool BamStandardIndex::IsFileOpen(void) const {
-    return ( Resources.IndexStream != 0 );
-}
-
-// attempts to use index data to jump to @region, returns success/fail
-// a "successful" jump indicates no error, but not whether this region has data
-//   * thus, the method sets a flag to indicate whether there are alignments
-//     available after the jump position
-bool BamStandardIndex::Jump(const BamRegion& region, bool* hasAlignmentsInRegion) {
-
-    // clear out flag
-    *hasAlignmentsInRegion = false;
-
-    // skip if invalid reader or not open
-    if ( m_reader == 0 || !m_reader->IsOpen() ) {
-        SetErrorString("BamStandardIndex::Jump", "could not jump: reader is not open");
-        return false;
-    }
-
-    // calculate nearest offset to jump to
-    int64_t offset;
-    try {
-        GetOffset(region, offset, hasAlignmentsInRegion);
-    } catch ( BamException& e ) {
-        m_errorString = e.what();
-        return false;
-    }
-
-    // if region has alignments, return success/fail of seeking there
-    if ( *hasAlignmentsInRegion )
-        return m_reader->Seek(offset);
-
-    // otherwise, simply return true (but hasAlignmentsInRegion flag has been set to false)
-    // (this is OK, BamReader will check this flag before trying to load data)
-    return true;
-}
-
-// loads existing data from file into memory
-bool BamStandardIndex::Load(const std::string& filename) {
-
-    try {
-
-        // attempt to open file (read-only)
-        OpenFile(filename, "rb");
-
-        // validate format
-        CheckMagicNumber();
-
-        // load in-memory summary of index data
-        SummarizeIndexFile();
-
-        // return success
-        return true;
-
-    } catch ( BamException& e ) {
-        m_errorString = e.what();
-        return false;
-    }
-}
-
-uint64_t BamStandardIndex::LookupLinearOffset(const BaiReferenceSummary& refSummary, const int& index) {
-
-    // attempt seek to proper index file position
-    const int64_t linearOffsetFilePosition = (int64_t)refSummary.FirstLinearOffsetFilePosition +
-                                             index*BamStandardIndex::SIZEOF_LINEAROFFSET;
-    Seek(linearOffsetFilePosition, SEEK_SET);
-
-    // read linear offset from BAI file
-    uint64_t linearOffset;
-    ReadLinearOffset(linearOffset);
-    return linearOffset;
-}
-
-void BamStandardIndex::MergeAlignmentChunks(BaiAlignmentChunkVector& chunks) {
-
-    // skip if chunks are empty, nothing to merge
-    if ( chunks.empty() )
-        return;
-
-    // set up merged alignment chunk container
-    BaiAlignmentChunkVector mergedChunks;
-    mergedChunks.push_back( chunks[0] );
-
-    // iterate over chunks
-    int i = 0;
-    BaiAlignmentChunkVector::iterator chunkIter = chunks.begin();
-    BaiAlignmentChunkVector::iterator chunkEnd  = chunks.end();
-    for ( ++chunkIter; chunkIter != chunkEnd; ++chunkIter) {
-
-        // get 'currentMergeChunk' based on numeric index
-        BaiAlignmentChunk& currentMergeChunk = mergedChunks[i];
-
-        // get sourceChunk based on source vector iterator
-        BaiAlignmentChunk& sourceChunk = (*chunkIter);
-
-        // if currentMergeChunk ends where sourceChunk starts, then merge the two
-        if ( currentMergeChunk.Stop>>16 == sourceChunk.Start>>16 )
-            currentMergeChunk.Stop = sourceChunk.Stop;
-
-        // otherwise
-        else {
-            // append sourceChunk after currentMergeChunk
-            mergedChunks.push_back(sourceChunk);
-
-            // update i, so the next iteration will consider the
-            // recently-appended sourceChunk as new mergeChunk candidate
-            ++i;
-        }
-    }
-
-    // saved newly-merged chunks into (parameter) chunks
-    chunks = mergedChunks;
-}
-
-void BamStandardIndex::OpenFile(const std::string& filename, const char* mode) {
-
-    // make sure any previous index file is closed
-    CloseFile();
-
-    // attempt to open file
-    Resources.IndexStream = fopen(filename.c_str(), mode);
-    if ( !IsFileOpen() ) {
-        const string message = string("could not open file: ") + filename;
-        throw BamException("BamStandardIndex::OpenFile", message);
-    }
-}
-
-void BamStandardIndex::ReadBinID(uint32_t& binId) {
-    const size_t elementsRead = fread(&binId, sizeof(binId), 1, Resources.IndexStream);
-    if ( m_isBigEndian ) SwapEndian_32(binId);
-    if ( elementsRead != 1 )
-        throw BamException("BamStandardIndex::ReadBinID", "could not read BAI bin ID");
-}
-
-void BamStandardIndex::ReadBinIntoBuffer(uint32_t& binId, int32_t& numAlignmentChunks) {
-
-    // read bin header
-    ReadBinID(binId);
-    ReadNumAlignmentChunks(numAlignmentChunks);
-
-    // read bin contents
-    const unsigned int bytesRequested = numAlignmentChunks*BamStandardIndex::SIZEOF_ALIGNMENTCHUNK;
-    ReadIntoBuffer(bytesRequested);
-}
-
-void BamStandardIndex::ReadIntoBuffer(const unsigned int& bytesRequested) {
-
-    // ensure that our buffer is big enough for request
-    BamStandardIndex::CheckBufferSize(Resources.Buffer, m_bufferLength, bytesRequested);
-
-    // read from BAI file stream
-    const size_t bytesRead = fread( Resources.Buffer, sizeof(char), bytesRequested, Resources.IndexStream );
-    if ( bytesRead != (size_t)bytesRequested ) {
-        stringstream s("");
-        s << "expected to read: " << bytesRequested << " bytes, "
-          << "but instead read: " << bytesRead;
-        throw BamException("BamStandardIndex::ReadIntoBuffer", s.str());
-    }
-}
-
-void BamStandardIndex::ReadLinearOffset(uint64_t& linearOffset) {
-    const size_t elementsRead = fread(&linearOffset, sizeof(linearOffset), 1, Resources.IndexStream);
-    if ( m_isBigEndian ) SwapEndian_64(linearOffset);
-    if ( elementsRead != 1 )
-        throw BamException("BamStandardIndex::ReadLinearOffset", "could not read BAI linear offset");
-}
-
-void BamStandardIndex::ReadNumAlignmentChunks(int& numAlignmentChunks) {
-    const size_t elementsRead = fread(&numAlignmentChunks, sizeof(numAlignmentChunks), 1, Resources.IndexStream);
-    if ( m_isBigEndian ) SwapEndian_32(numAlignmentChunks);
-    if ( elementsRead != 1 )
-        throw BamException("BamStandardIndex::ReadNumAlignmentChunks", "could not read BAI chunk count");
-}
-
-void BamStandardIndex::ReadNumBins(int& numBins) {
-    const size_t elementsRead = fread(&numBins, sizeof(numBins), 1, Resources.IndexStream);
-    if ( m_isBigEndian ) SwapEndian_32(numBins);
-    if ( elementsRead != 1 )
-        throw BamException("BamStandardIndex::ReadNumBins", "could not read BAI bin count");
-}
-
-void BamStandardIndex::ReadNumLinearOffsets(int& numLinearOffsets) {
-    const size_t elementsRead = fread(&numLinearOffsets, sizeof(numLinearOffsets), 1, Resources.IndexStream);
-    if ( m_isBigEndian ) SwapEndian_32(numLinearOffsets);
-    if ( elementsRead != 1 )
-        throw BamException("BamStandardIndex::ReadNumAlignmentChunks", "could not read BAI linear offset count");
-}
-
-void BamStandardIndex::ReadNumReferences(int& numReferences) {
-    const size_t elementsRead = fread(&numReferences, sizeof(numReferences), 1, Resources.IndexStream);
-    if ( m_isBigEndian ) SwapEndian_32(numReferences);
-    if ( elementsRead != 1 )
-        throw BamException("BamStandardIndex::ReadNumReferences", "could not read reference count");
-}
-
-void BamStandardIndex::ReserveForSummary(const int& numReferences) {
-    m_indexFileSummary.clear();
-    m_indexFileSummary.assign( numReferences, BaiReferenceSummary() );
-}
-
-void BamStandardIndex::SaveAlignmentChunkToBin(BaiBinMap& binMap,
-                                               const uint32_t& currentBin,
-                                               const uint64_t& currentOffset,
-                                               const uint64_t& lastOffset)
-{
-    // create new alignment chunk
-    BaiAlignmentChunk newChunk(currentOffset, lastOffset);
-
-    // if no entry exists yet for this bin, create one and store alignment chunk
-    BaiBinMap::iterator binIter = binMap.find(currentBin);
-    if ( binIter == binMap.end() ) {
-        BaiAlignmentChunkVector newChunks;
-        newChunks.push_back(newChunk);
-        binMap.insert( pair<uint32_t, BaiAlignmentChunkVector>(currentBin, newChunks));
-    }
-
-    // otherwise, just append alignment chunk
-    else {
-        BaiAlignmentChunkVector& binChunks = (*binIter).second;
-        binChunks.push_back( newChunk );
-    }
-}
-
-void BamStandardIndex::SaveBinsSummary(const int& refId, const int& numBins) {
-    BaiReferenceSummary& refSummary = m_indexFileSummary.at(refId);
-    refSummary.NumBins = numBins;
-    refSummary.FirstBinFilePosition = Tell();
-}
-
-void BamStandardIndex::SaveLinearOffsetEntry(BaiLinearOffsetVector& offsets,
-                                             const int& alignmentStartPosition,
-                                             const int& alignmentStopPosition,
-                                             const uint64_t& lastOffset)
-{
-    // get converted offsets
-    const int beginOffset = alignmentStartPosition >> BamStandardIndex::BAM_LIDX_SHIFT;
-    const int endOffset   = (alignmentStopPosition - 1) >> BamStandardIndex::BAM_LIDX_SHIFT;
-
-    // resize vector if necessary
-    int oldSize = offsets.size();
-    int newSize = endOffset + 1;
-    if ( oldSize < newSize )
-        offsets.resize(newSize, 0);
-
-    // store offset
-    for( int i = beginOffset + 1; i <= endOffset; ++i ) {
-        if ( offsets[i] == 0 )
-            offsets[i] = lastOffset;
-    }
-}
-
-void BamStandardIndex::SaveLinearOffsetsSummary(const int& refId, const int& numLinearOffsets) {
-    BaiReferenceSummary& refSummary = m_indexFileSummary.at(refId);
-    refSummary.NumLinearOffsets = numLinearOffsets;
-    refSummary.FirstLinearOffsetFilePosition = Tell();
-}
-
-// seek to position in index file stream
-void BamStandardIndex::Seek(const int64_t& position, const int& origin) {
-    if ( fseek64(Resources.IndexStream, position, origin) != 0 )
-        throw BamException("BamStandardIndex::Seek", "could not seek in BAI file");
-}
-
-void BamStandardIndex::SkipBins(const int& numBins) {
-    uint32_t binId;
-    int32_t numAlignmentChunks;
-    for (int i = 0; i < numBins; ++i)
-        ReadBinIntoBuffer(binId, numAlignmentChunks); // results & buffer ignored
-}
-
-void BamStandardIndex::SkipLinearOffsets(const int& numLinearOffsets) {
-    const unsigned int bytesRequested = numLinearOffsets*BamStandardIndex::SIZEOF_LINEAROFFSET;
-    ReadIntoBuffer(bytesRequested);
-}
-
-void BamStandardIndex::SortLinearOffsets(BaiLinearOffsetVector& linearOffsets) {
-    sort( linearOffsets.begin(), linearOffsets.end() );
-}
-
-void BamStandardIndex::SummarizeBins(BaiReferenceSummary& refSummary) {
-
-    // load number of bins
-    int numBins;
-    ReadNumBins(numBins);
-
-    // store bins summary for this reference
-    refSummary.NumBins = numBins;
-    refSummary.FirstBinFilePosition = Tell();
-
-    // skip this reference's bins
-    SkipBins(numBins);
-}
-
-void BamStandardIndex::SummarizeIndexFile(void) {
-
-    // load number of reference sequences
-    int numReferences;
-    ReadNumReferences(numReferences);
-
-    // initialize file summary data
-    ReserveForSummary(numReferences);
-
-    // iterate over reference entries
-    BaiFileSummary::iterator summaryIter = m_indexFileSummary.begin();
-    BaiFileSummary::iterator summaryEnd  = m_indexFileSummary.end();
-    for ( int i = 0; summaryIter != summaryEnd; ++summaryIter, ++i )
-        SummarizeReference(*summaryIter);
-}
-
-void BamStandardIndex::SummarizeLinearOffsets(BaiReferenceSummary& refSummary) {
-
-    // load number of linear offsets
-    int numLinearOffsets;
-    ReadNumLinearOffsets(numLinearOffsets);
-
-    // store bin summary data for this reference
-    refSummary.NumLinearOffsets = numLinearOffsets;
-    refSummary.FirstLinearOffsetFilePosition = Tell();
-
-    // skip linear offsets in index file
-    SkipLinearOffsets(numLinearOffsets);
-}
-
-void BamStandardIndex::SummarizeReference(BaiReferenceSummary& refSummary) {
-    SummarizeBins(refSummary);
-    SummarizeLinearOffsets(refSummary);
-}
-
-// return position of file pointer in index file stream
-int64_t BamStandardIndex::Tell(void) const {
-    return ftell64(Resources.IndexStream);
-}
-
-void BamStandardIndex::WriteAlignmentChunk(const BaiAlignmentChunk& chunk) {
-
-    // localize alignment chunk offsets
-    uint64_t start = chunk.Start;
-    uint64_t stop  = chunk.Stop;
-
-    // swap endian-ness if necessary
-    if ( m_isBigEndian ) {
-        SwapEndian_64(start);
-        SwapEndian_64(stop);
-    }
-
-    // write to index file
-    size_t elementsWritten = 0;
-    elementsWritten += fwrite(&start, sizeof(start), 1, Resources.IndexStream);
-    elementsWritten += fwrite(&stop,  sizeof(stop),  1, Resources.IndexStream);
-    if ( elementsWritten != 2 )
-        throw BamException("BamStandardIndex::WriteAlignmentChunk", "could not write BAI alignment chunk");
-}
-
-void BamStandardIndex::WriteAlignmentChunks(BaiAlignmentChunkVector& chunks) {
-
-    // make sure chunks are merged (simplified) before writing & saving summary
-    MergeAlignmentChunks(chunks);
-
-    // write chunks
-    int32_t chunkCount = chunks.size();
-    if ( m_isBigEndian ) SwapEndian_32(chunkCount);
-    const size_t elementsWritten = fwrite(&chunkCount, sizeof(chunkCount), 1, Resources.IndexStream);
-    if ( elementsWritten != 1 )
-        throw BamException("BamStandardIndex::WriteAlignmentChunks", "could not write BAI chunk count");
-
-    // iterate over chunks
-    BaiAlignmentChunkVector::const_iterator chunkIter = chunks.begin();
-    BaiAlignmentChunkVector::const_iterator chunkEnd  = chunks.end();
-    for ( ; chunkIter != chunkEnd; ++chunkIter )
-        WriteAlignmentChunk( (*chunkIter) );
-}
-
-void BamStandardIndex::WriteBin(const uint32_t& binId, BaiAlignmentChunkVector& chunks) {
-
-    // write BAM bin ID
-    uint32_t binKey = binId;
-    if ( m_isBigEndian ) SwapEndian_32(binKey);
-    const size_t elementsWritten = fwrite(&binKey, sizeof(binKey), 1, Resources.IndexStream);
-    if ( elementsWritten != 1 )
-        throw BamException("BamStandardIndex::WriteBin", "could not write bin ID");
-
-    // write bin's alignment chunks
-    WriteAlignmentChunks(chunks);
-}
-
-void BamStandardIndex::WriteBins(const int& refId, BaiBinMap& bins) {
-
-    // write number of bins
-    int32_t binCount = bins.size();
-    if ( m_isBigEndian ) SwapEndian_32(binCount);
-    const size_t elementsWritten = fwrite(&binCount, sizeof(binCount), 1, Resources.IndexStream);
-    if ( elementsWritten != 1 )
-        throw BamException("BamStandardIndex::WriteBins", "could not write bin count");
-
-    // save summary for reference's bins
-    SaveBinsSummary(refId, bins.size());
-
-    // iterate over bins
-    BaiBinMap::iterator binIter = bins.begin();
-    BaiBinMap::iterator binEnd  = bins.end();
-    for ( ; binIter != binEnd; ++binIter )
-        WriteBin( (*binIter).first, (*binIter).second );
-}
-
-void BamStandardIndex::WriteHeader(void) {
-
-    size_t elementsWritten = 0;
-
-    // write magic number
-    elementsWritten += fwrite(BamStandardIndex::BAI_MAGIC, sizeof(char), 4, Resources.IndexStream);
-
-    // write number of reference sequences
-    int32_t numReferences = m_indexFileSummary.size();
-    if ( m_isBigEndian ) SwapEndian_32(numReferences);
-    elementsWritten += fwrite(&numReferences, sizeof(numReferences), 1, Resources.IndexStream);
-
-    if ( elementsWritten != 5 )
-        throw BamException("BamStandardIndex::WriteHeader", "could not write BAI header");
-}
-
-void BamStandardIndex::WriteLinearOffsets(const int& refId, BaiLinearOffsetVector& linearOffsets) {
-
-    // make sure linear offsets are sorted before writing & saving summary
-    SortLinearOffsets(linearOffsets);
-
-    size_t elementsWritten = 0;
-
-    // write number of linear offsets
-    int32_t offsetCount = linearOffsets.size();
-    if ( m_isBigEndian ) SwapEndian_32(offsetCount);
-    elementsWritten += fwrite(&offsetCount, sizeof(offsetCount), 1, Resources.IndexStream);
-
-    // save summary for reference's linear offsets
-    SaveLinearOffsetsSummary(refId, linearOffsets.size());
-
-    // iterate over linear offsets
-    BaiLinearOffsetVector::const_iterator offsetIter = linearOffsets.begin();
-    BaiLinearOffsetVector::const_iterator offsetEnd  = linearOffsets.end();
-    for ( ; offsetIter != offsetEnd; ++offsetIter ) {
-
-        // write linear offset
-        uint64_t linearOffset = (*offsetIter);
-        if ( m_isBigEndian ) SwapEndian_64(linearOffset);
-        elementsWritten += fwrite(&linearOffset, sizeof(linearOffset), 1, Resources.IndexStream);
-    }
-
-    if ( elementsWritten != (linearOffsets.size() + 1) )
-        throw BamException("BamStandardIndex::WriteLinearOffsets", "could not write BAI linear offsets");
-}
-
-void BamStandardIndex::WriteReferenceEntry(BaiReferenceEntry& refEntry) {
-    WriteBins(refEntry.ID, refEntry.Bins);
-    WriteLinearOffsets(refEntry.ID, refEntry.LinearOffsets);
-}
diff --git a/src/api/internal/BamStandardIndex_p.h b/src/api/internal/BamStandardIndex_p.h

deleted file mode 100644 (file)

index e49bc26..0000000
--- a/src/api/internal/BamStandardIndex_p.h
+++ /dev/null
@@ -1,237 +0,0 @@
-// ***************************************************************************
-// BamStandardIndex.h (c) 2010 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides index operations for the standardized BAM index format (".bai")
-// ***************************************************************************
-
-#ifndef BAM_STANDARD_INDEX_FORMAT_H
-#define BAM_STANDARD_INDEX_FORMAT_H
-
-//  -------------
-//  W A R N I N G
-//  -------------
-//
-// This file is not part of the BamTools API.  It exists purely as an
-// implementation detail.  This header file may change from version to
-// version without notice, or even be removed.
-//
-// We mean it.
-
-#include "api/BamAux.h"
-#include "api/BamIndex.h"
-#include <map>
-#include <set>
-#include <string>
-#include <vector>
-
-namespace BamTools {
-namespace Internal {
-
-// -----------------------------------------------------------------------------
-// BamStandardIndex data structures
-
-// defines start and end of a contiguous run of alignments
-struct BaiAlignmentChunk {
-
-    // data members
-    uint64_t Start;
-    uint64_t Stop;
-
-    // constructor
-    BaiAlignmentChunk(const uint64_t& start = 0,
-                      const uint64_t& stop = 0)
-        : Start(start)
-        , Stop(stop)
-    { }
-};
-
-// comparison operator (for sorting)
-inline
-bool operator<(const BaiAlignmentChunk& lhs, const BaiAlignmentChunk& rhs) {
-    return lhs.Start < rhs.Start;
-}
-
-// convenience typedef for a list of all alignment 'chunks' in a BAI bin
-typedef std::vector<BaiAlignmentChunk> BaiAlignmentChunkVector;
-
-// convenience typedef for a map of all BAI bins in a reference (ID => chunks)
-typedef std::map<uint32_t, BaiAlignmentChunkVector> BaiBinMap;
-
-// convenience typedef for a list of all 'linear offsets' in a reference
-typedef std::vector<uint64_t> BaiLinearOffsetVector;
-
-// contains all fields necessary for building, loading, & writing
-// full BAI index data for a single reference
-struct BaiReferenceEntry {
-
-    // data members
-    int32_t ID;
-    BaiBinMap Bins;
-    BaiLinearOffsetVector LinearOffsets;
-
-    // ctor
-    BaiReferenceEntry(const int32_t& id = -1)
-        : ID(id)
-    { }
-};
-
-// provides (persistent) summary of BaiReferenceEntry's index data
-struct BaiReferenceSummary {
-
-    // data members
-    int NumBins;
-    int NumLinearOffsets;
-    uint64_t FirstBinFilePosition;
-    uint64_t FirstLinearOffsetFilePosition;
-
-    // ctor
-    BaiReferenceSummary(void)
-        : NumBins(0)
-        , NumLinearOffsets(0)
-        , FirstBinFilePosition(0)
-        , FirstLinearOffsetFilePosition(0)
-    { }
-};
-
-// convenience typedef for describing a full BAI index file summary
-typedef std::vector<BaiReferenceSummary> BaiFileSummary;
-
-// end BamStandardIndex data structures
-// -----------------------------------------------------------------------------
-
-class BamStandardIndex : public BamIndex {
-
-    // ctor & dtor
-    public:
-        BamStandardIndex(Internal::BamReaderPrivate* reader);
-        ~BamStandardIndex(void);
-
-    // BamIndex implementation
-    public:
-        // builds index from associated BAM file & writes out to index file
-        bool Create(void);
-        // returns whether reference has alignments or no
-        bool HasAlignments(const int& referenceID) const;
-        // attempts to use index data to jump to @region, returns success/fail
-        // a "successful" jump indicates no error, but not whether this region has data
-        //   * thus, the method sets a flag to indicate whether there are alignments
-        //     available after the jump position
-        bool Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion);
-        // loads existing data from file into memory
-        bool Load(const std::string& filename);
-        BamIndex::IndexType Type(void) const { return BamIndex::STANDARD; }
-    public:
-        // returns format's file extension
-        static const std::string Extension(void);
-
-    // internal methods
-    private:
-
-        // index file ops
-        void CheckMagicNumber(void);
-        void CloseFile(void);
-        bool IsFileOpen(void) const;
-        void OpenFile(const std::string& filename, const char* mode);
-        void Seek(const int64_t& position, const int& origin);
-        int64_t Tell(void) const;
-
-        // BAI index building methods
-        void ClearReferenceEntry(BaiReferenceEntry& refEntry);
-        void SaveAlignmentChunkToBin(BaiBinMap& binMap,
-                                     const uint32_t& currentBin,
-                                     const uint64_t& currentOffset,
-                                     const uint64_t& lastOffset);
-        void SaveLinearOffsetEntry(BaiLinearOffsetVector& offsets,
-                                   const int& alignmentStartPosition,
-                                   const int& alignmentStopPosition,
-                                   const uint64_t& lastOffset);
-
-        // random-access methods
-        void AdjustRegion(const BamRegion& region, uint32_t& begin, uint32_t& end);
-        void CalculateCandidateBins(const uint32_t& begin,
-                                    const uint32_t& end,
-                                    std::set<uint16_t>& candidateBins);
-        void CalculateCandidateOffsets(const BaiReferenceSummary& refSummary,
-                                       const uint64_t& minOffset,
-                                       std::set<uint16_t>& candidateBins,
-                                       std::vector<int64_t>& offsets);
-        uint64_t CalculateMinOffset(const BaiReferenceSummary& refSummary, const uint32_t& begin);
-        void GetOffset(const BamRegion& region, int64_t& offset, bool* hasAlignmentsInRegion);
-        uint64_t LookupLinearOffset(const BaiReferenceSummary& refSummary, const int& index);
-
-        // BAI summary (create/load) methods
-        void ReserveForSummary(const int& numReferences);
-        void SaveBinsSummary(const int& refId, const int& numBins);
-        void SaveLinearOffsetsSummary(const int& refId, const int& numLinearOffsets);
-        void SkipBins(const int& numBins);
-        void SkipLinearOffsets(const int& numLinearOffsets);
-        void SummarizeBins(BaiReferenceSummary& refSummary);
-        void SummarizeIndexFile(void);
-        void SummarizeLinearOffsets(BaiReferenceSummary& refSummary);
-        void SummarizeReference(BaiReferenceSummary& refSummary);
-
-        // BAI full index input methods
-        void ReadBinID(uint32_t& binId);
-        void ReadBinIntoBuffer(uint32_t& binId, int32_t& numAlignmentChunks);
-        void ReadIntoBuffer(const unsigned int& bytesRequested);
-        void ReadLinearOffset(uint64_t& linearOffset);
-        void ReadNumAlignmentChunks(int& numAlignmentChunks);
-        void ReadNumBins(int& numBins);
-        void ReadNumLinearOffsets(int& numLinearOffsets);
-        void ReadNumReferences(int& numReferences);
-
-        // BAI full index output methods
-        void MergeAlignmentChunks(BaiAlignmentChunkVector& chunks);
-        void SortLinearOffsets(BaiLinearOffsetVector& linearOffsets);
-        void WriteAlignmentChunk(const BaiAlignmentChunk& chunk);
-        void WriteAlignmentChunks(BaiAlignmentChunkVector& chunks);
-        void WriteBin(const uint32_t& binId, BaiAlignmentChunkVector& chunks);
-        void WriteBins(const int& refId, BaiBinMap& bins);
-        void WriteHeader(void);
-        void WriteLinearOffsets(const int& refId, BaiLinearOffsetVector& linearOffsets);
-        void WriteReferenceEntry(BaiReferenceEntry& refEntry);
-
-    // data members
-    private:
-        bool m_isBigEndian;
-        BaiFileSummary m_indexFileSummary;
-
-        // our input buffer
-        unsigned int m_bufferLength;
-
-        struct RaiiWrapper {
-            FILE* IndexStream;
-            char* Buffer;
-            RaiiWrapper(void);
-            ~RaiiWrapper(void);
-        };
-        RaiiWrapper Resources;
-
-    // static methods
-    private:
-        // checks if the buffer is large enough to accomodate the requested size
-        static void CheckBufferSize(char*& buffer,
-                                    unsigned int& bufferLength,
-                                    const unsigned int& requestedBytes);
-        // checks if the buffer is large enough to accomodate the requested size
-        static void CheckBufferSize(unsigned char*& buffer,
-                                    unsigned int& bufferLength,
-                                    const unsigned int& requestedBytes);
-    // static constants
-    private:
-        static const int MAX_BIN;
-        static const int BAM_LIDX_SHIFT;
-        static const std::string BAI_EXTENSION;
-        static const char* const BAI_MAGIC;
-        static const int SIZEOF_ALIGNMENTCHUNK;
-        static const int SIZEOF_BINCORE;
-        static const int SIZEOF_LINEAROFFSET;
-};
-
-} // namespace Internal
-} // namespace BamTools
-
-#endif // BAM_STANDARD_INDEX_FORMAT_H
diff --git a/src/api/internal/BamToolsIndex_p.cpp b/src/api/internal/BamToolsIndex_p.cpp

deleted file mode 100644 (file)

index cdf3d10..0000000
--- a/src/api/internal/BamToolsIndex_p.cpp
+++ /dev/null
@@ -1,615 +0,0 @@
-// ***************************************************************************
-// BamToolsIndex.cpp (c) 2010 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides index operations for the BamTools index format (".bti")
-// ***************************************************************************
-
-#include "api/BamAlignment.h"
-#include "api/internal/BamException_p.h"
-#include "api/internal/BamReader_p.h"
-#include "api/internal/BamToolsIndex_p.h"
-#include "api/internal/BgzfStream_p.h"
-using namespace BamTools;
-using namespace BamTools::Internal;
-
-#include <cstdio>
-#include <cstdlib>
-#include <cstring>
-#include <algorithm>
-#include <iostream>
-#include <iterator>
-#include <map>
-using namespace std;
-
-// --------------------------------
-// static BamToolsIndex constants
-// --------------------------------
-
-const uint32_t BamToolsIndex::DEFAULT_BLOCK_LENGTH = 1000;
-const string BamToolsIndex::BTI_EXTENSION     = ".bti";
-const char* const BamToolsIndex::BTI_MAGIC    = "BTI\1";
-const int BamToolsIndex::SIZEOF_BLOCK         = sizeof(int32_t)*2 + sizeof(int64_t);
-
-// ----------------------------
-// RaiiWrapper implementation
-// ----------------------------
-
-BamToolsIndex::RaiiWrapper::RaiiWrapper(void)
-    : IndexStream(0)
-{ }
-
-BamToolsIndex::RaiiWrapper::~RaiiWrapper(void) {
-    if ( IndexStream )
-        fclose(IndexStream);
-}
-
-// ------------------------------
-// BamToolsIndex implementation
-// ------------------------------
-
-// ctor
-BamToolsIndex::BamToolsIndex(Internal::BamReaderPrivate* reader)
-    : BamIndex(reader)
-    , m_blockSize(BamToolsIndex::DEFAULT_BLOCK_LENGTH)
-    , m_inputVersion(0)
-    , m_outputVersion(BTI_2_0) // latest version - used for writing new index files
-{
-    m_isBigEndian = BamTools::SystemIsBigEndian();
-}
-
-// dtor
-BamToolsIndex::~BamToolsIndex(void) {
-    CloseFile();
-}
-
-void BamToolsIndex::CheckMagicNumber(void) {
-
-    // read magic number
-    char magic[4];
-    size_t elementsRead = fread(magic, sizeof(char), 4, Resources.IndexStream);
-    if ( elementsRead != 4 )
-        throw BamException("BamToolsIndex::CheckMagicNumber", "could not read BTI magic number");
-
-    // validate expected magic number
-    if ( strncmp(magic, BamToolsIndex::BTI_MAGIC, 4) != 0 )
-        throw BamException("BamToolsIndex::CheckMagicNumber", "invalid BTI magic number");
-}
-
-// check index file version, return true if OK
-void BamToolsIndex::CheckVersion(void) {
-
-    // read version from file
-    size_t elementsRead = fread(&m_inputVersion, sizeof(m_inputVersion), 1, Resources.IndexStream);
-    if ( elementsRead != 1 )
-        throw BamException("BamToolsIndex::CheckVersion", "could not read format version");
-    if ( m_isBigEndian ) SwapEndian_32(m_inputVersion);
-
-    // if version is negative, or zero
-    if ( m_inputVersion <= 0 )
-        throw BamException("BamToolsIndex::CheckVersion", "invalid format version");
-
-    // if version is newer than can be supported by this version of bamtools
-    else if ( m_inputVersion > m_outputVersion ) {
-        const string message = "unsupported format: this index was created by a newer version of BamTools. "
-                               "Update your local version of BamTools to use the index file.";
-        throw BamException("BamToolsIndex::CheckVersion", message);
-    }
-
-    // ------------------------------------------------------------------
-    // check for deprecated, unsupported versions
-    // (the format had to be modified to accomodate a particular bug fix)
-
-    // Version 2.0: introduced support for half-open intervals, instead of the old closed intervals
-    //   respondBy: throwing exception - we're not going to try to handle the old BTI files.
-    else if ( (Version)m_inputVersion < BamToolsIndex::BTI_2_0 ) {
-        const string message = "unsupported format: this version of the index may not properly handle "
-                               "coordinate intervals. Please run 'bamtools index -bti -in yourData.bam' "
-                               "to generate an up-to-date, fixed BTI file.";
-        throw BamException("BamToolsIndex::CheckVersion", message);
-    }
-}
-
-void BamToolsIndex::ClearReferenceEntry(BtiReferenceEntry& refEntry) {
-    refEntry.ID = -1;
-    refEntry.Blocks.clear();
-}
-
-void BamToolsIndex::CloseFile(void) {
-    if ( IsFileOpen() ) {
-        fclose(Resources.IndexStream);
-        Resources.IndexStream = 0;
-    }
-    m_indexFileSummary.clear();
-}
-
-// builds index from associated BAM file & writes out to index file
-bool BamToolsIndex::Create(void) {
-
-    // skip if BamReader is invalid or not open
-    if ( m_reader == 0 || !m_reader->IsOpen() ) {
-        SetErrorString("BamToolsIndex::Create", "could not create index: reader is not open");
-        return false;
-    }
-
-    // rewind BamReader
-    if ( !m_reader->Rewind() ) {
-        const string readerError = m_reader->GetErrorString();
-        const string message = "could not create index: \n\t" + readerError;
-        SetErrorString("BamToolsIndex::Create", message);
-        return false;
-    }
-
-    try {
-        // open new index file (read & write)
-        const string indexFilename = m_reader->Filename() + Extension();
-        OpenFile(indexFilename, "w+b");
-
-        // initialize BtiFileSummary with number of references
-        const int& numReferences = m_reader->GetReferenceCount();
-        InitializeFileSummary(numReferences);
-
-        // intialize output file header
-        WriteHeader();
-
-        // index building markers
-        uint32_t currentBlockCount      = 0;
-        int64_t currentAlignmentOffset  = m_reader->Tell();
-        int32_t blockRefId              = -1;
-        int32_t blockMaxEndPosition     = -1;
-        int64_t blockStartOffset        = currentAlignmentOffset;
-        int32_t blockStartPosition      = -1;
-
-        // plow through alignments, storing index entries
-        BamAlignment al;
-        BtiReferenceEntry refEntry;
-        while ( m_reader->LoadNextAlignment(al) ) {
-
-            // if moved to new reference
-            if ( al.RefID != blockRefId ) {
-
-                // if first pass, check:
-                if ( currentBlockCount == 0 ) {
-
-                    // write any empty references up to (but not including) al.RefID
-                    for ( int i = 0; i < al.RefID; ++i )
-                        WriteReferenceEntry( BtiReferenceEntry(i) );
-                }
-
-                // not first pass:
-                else {
-
-                    // store previous BTI block data in reference entry
-                    const BtiBlock block(blockMaxEndPosition, blockStartOffset, blockStartPosition);
-                    refEntry.Blocks.push_back(block);
-
-                    // write reference entry, then clear
-                    WriteReferenceEntry(refEntry);
-                    ClearReferenceEntry(refEntry);
-
-                    // write any empty references between (but not including)
-                    // the last blockRefID and current al.RefID
-                    for ( int i = blockRefId+1; i < al.RefID; ++i )
-                        WriteReferenceEntry( BtiReferenceEntry(i) );
-
-                    // reset block count
-                    currentBlockCount = 0;
-                }
-
-                // set ID for new reference entry
-                refEntry.ID = al.RefID;
-            }
-
-            // if beginning of block, update counters
-            if ( currentBlockCount == 0 ) {
-                blockRefId          = al.RefID;
-                blockStartOffset    = currentAlignmentOffset;
-                blockStartPosition  = al.Position;
-                blockMaxEndPosition = al.GetEndPosition();
-            }
-
-            // increment block counter
-            ++currentBlockCount;
-
-            // check end position
-            const int32_t alignmentEndPosition = al.GetEndPosition();
-            if ( alignmentEndPosition > blockMaxEndPosition )
-                blockMaxEndPosition = alignmentEndPosition;
-
-            // if block is full, get offset for next block, reset currentBlockCount
-            if ( currentBlockCount == m_blockSize ) {
-
-                // store previous block data in reference entry
-                const BtiBlock block(blockMaxEndPosition, blockStartOffset, blockStartPosition);
-                refEntry.Blocks.push_back(block);
-
-                // update markers
-                blockStartOffset  = m_reader->Tell();
-                currentBlockCount = 0;
-            }
-
-            // not the best name, but for the next iteration, this value will be the offset of the
-            // *current* alignment. this is necessary because we won't know if this next alignment
-            // is on a new reference until we actually read it
-            currentAlignmentOffset = m_reader->Tell();
-        }
-
-        // after finishing alignments, if any data was read, check:
-        if ( blockRefId >= 0 ) {
-
-            // store last BTI block data in reference entry
-            const BtiBlock block(blockMaxEndPosition, blockStartOffset, blockStartPosition);
-            refEntry.Blocks.push_back(block);
-
-            // write last reference entry, then clear
-            WriteReferenceEntry(refEntry);
-            ClearReferenceEntry(refEntry);
-
-            // then write any empty references remaining at end of file
-            for ( int i = blockRefId+1; i < numReferences; ++i )
-                WriteReferenceEntry( BtiReferenceEntry(i) );
-        }
-
-    } catch ( BamException& e ) {
-        m_errorString = e.what();
-        return false;
-    }
-
-    // rewind BamReader
-    if ( !m_reader->Rewind() ) {
-        const string readerError = m_reader->GetErrorString();
-        const string message = "could not create index: \n\t" + readerError;
-        SetErrorString("BamToolsIndex::Create", message);
-        return false;
-    }
-
-    // return success
-    return true;
-}
-
-// returns format's file extension
-const std::string BamToolsIndex::Extension(void) {
-    return BamToolsIndex::BTI_EXTENSION;
-}
-
-void BamToolsIndex::GetOffset(const BamRegion& region, int64_t& offset, bool* hasAlignmentsInRegion) {
-
-    // return false ref ID is not a valid index in file summary data
-    if ( region.LeftRefID < 0 || region.LeftRefID >= (int)m_indexFileSummary.size() )
-        throw BamException("BamToolsIndex::GetOffset", "invalid region requested");
-
-    // retrieve reference index data for left bound reference
-    BtiReferenceEntry refEntry(region.LeftRefID);
-    ReadReferenceEntry(refEntry);
-
-    // binary search for an overlapping block (may not be first one though)
-    bool found = false;
-    typedef BtiBlockVector::const_iterator BtiBlockConstIterator;
-    BtiBlockConstIterator blockFirst = refEntry.Blocks.begin();
-    BtiBlockConstIterator blockIter  = blockFirst;
-    BtiBlockConstIterator blockLast  = refEntry.Blocks.end();
-    iterator_traits<BtiBlockConstIterator>::difference_type count = distance(blockFirst, blockLast);
-    iterator_traits<BtiBlockConstIterator>::difference_type step;
-    while ( count > 0 ) {
-        blockIter = blockFirst;
-        step = count/2;
-        advance(blockIter, step);
-
-        const BtiBlock& block = (*blockIter);
-        if ( block.StartPosition <= region.RightPosition ) {
-            if ( block.MaxEndPosition > region.LeftPosition ) {
-                offset = block.StartOffset;
-                break;
-            }
-            blockFirst = ++blockIter;
-            count -= step+1;
-        }
-        else count = step;
-    }
-
-    // if we didn't search "off the end" of the blocks
-    if ( blockIter != blockLast ) {
-
-        // "walk back" until we've gone too far
-        while ( blockIter != blockFirst ) {
-            const BtiBlock& currentBlock = (*blockIter);
-
-            --blockIter;
-            const BtiBlock& previousBlock = (*blockIter);
-            if ( previousBlock.MaxEndPosition <= region.LeftPosition ) {
-                offset = currentBlock.StartOffset;
-                found = true;
-                break;
-            }
-        }
-
-        // if we walked all the way to first block, just return that and let the reader's
-        // region overlap parsing do the rest
-        if ( blockIter == blockFirst ) {
-            const BtiBlock& block = (*blockIter);
-            offset = block.StartOffset;
-            found = true;
-        }
-    }
-
-
-    // sets to false if blocks container is empty, or if no matching block could be found
-    *hasAlignmentsInRegion = found;
-}
-
-// returns whether reference has alignments or no
-bool BamToolsIndex::HasAlignments(const int& referenceID) const {
-    if ( referenceID < 0 || referenceID >= (int)m_indexFileSummary.size() )
-        return false;
-    const BtiReferenceSummary& refSummary = m_indexFileSummary.at(referenceID);
-    return ( refSummary.NumBlocks > 0 );
-}
-
-// pre-allocates space for each reference's summary data
-void BamToolsIndex::InitializeFileSummary(const int& numReferences) {
-    m_indexFileSummary.clear();
-    for ( int i = 0; i < numReferences; ++i )
-        m_indexFileSummary.push_back( BtiReferenceSummary() );
-}
-
-// returns true if the index stream is open
-bool BamToolsIndex::IsFileOpen(void) const {
-    return ( Resources.IndexStream != 0 );
-}
-
-// attempts to use index data to jump to @region, returns success/fail
-// a "successful" jump indicates no error, but not whether this region has data
-//   * thus, the method sets a flag to indicate whether there are alignments
-//     available after the jump position
-bool BamToolsIndex::Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion) {
-
-    // clear flag
-    *hasAlignmentsInRegion = false;
-
-    // skip if invalid reader or not open
-    if ( m_reader == 0 || !m_reader->IsOpen() ) {
-        SetErrorString("BamToolsIndex::Jump", "could not jump: reader is not open");
-        return false;
-    }
-
-    // make sure left-bound position is valid
-    const RefVector& references = m_reader->GetReferenceData();
-    if ( region.LeftPosition > references.at(region.LeftRefID).RefLength ) {
-        SetErrorString("BamToolsIndex::Jump", "could not create index: invalid region requested");
-        return false;
-    }
-
-    // calculate nearest offset to jump to
-    int64_t offset;
-    try {
-        GetOffset(region, offset, hasAlignmentsInRegion);
-    } catch ( BamException& e ) {
-        m_errorString = e.what();
-        return false;
-    }
-
-    // return success/failure of seek
-    return m_reader->Seek(offset);
-}
-
-// loads existing data from file into memory
-bool BamToolsIndex::Load(const std::string& filename) {
-
-    try {
-
-        // attempt to open file (read-only)
-        OpenFile(filename, "rb");
-
-        // load metadata & generate in-memory summary
-        LoadHeader();
-        LoadFileSummary();
-
-        // return success
-        return true;
-
-    } catch ( BamException& e ) {
-        m_errorString = e.what();
-        return false;
-    }
-}
-
-void BamToolsIndex::LoadFileSummary(void) {
-
-    // load number of reference sequences
-    int numReferences;
-    LoadNumReferences(numReferences);
-
-    // initialize file summary data
-    InitializeFileSummary(numReferences);
-
-    // load summary for each reference
-    BtiFileSummary::iterator summaryIter = m_indexFileSummary.begin();
-    BtiFileSummary::iterator summaryEnd  = m_indexFileSummary.end();
-    for ( ; summaryIter != summaryEnd; ++summaryIter )
-        LoadReferenceSummary(*summaryIter);
-}
-
-void BamToolsIndex::LoadHeader(void) {
-
-    // check BTI file metadata
-    CheckMagicNumber();
-    CheckVersion();
-
-    // use file's BTI block size to set member variable
-    const size_t elementsRead = fread(&m_blockSize, sizeof(m_blockSize), 1, Resources.IndexStream);
-    if ( m_isBigEndian ) SwapEndian_32(m_blockSize);
-    if ( elementsRead != 1 )
-        throw BamException("BamToolsIndex::LoadHeader", "could not read BTI block size");
-}
-
-void BamToolsIndex::LoadNumBlocks(int& numBlocks) {
-    const size_t elementsRead = fread(&numBlocks, sizeof(numBlocks), 1, Resources.IndexStream);
-    if ( m_isBigEndian ) SwapEndian_32(numBlocks);
-    if ( elementsRead != 1 )
-        throw BamException("BamToolsIndex::LoadNumBlocks", "could not read number of BTI blocks");
-}
-
-void BamToolsIndex::LoadNumReferences(int& numReferences) {
-    const size_t elementsRead = fread(&numReferences, sizeof(numReferences), 1, Resources.IndexStream);
-    if ( m_isBigEndian ) SwapEndian_32(numReferences);
-    if ( elementsRead != 1 )
-        throw BamException("BamToolsIndex::LoadNumReferences", "could not read number of references");
-}
-
-void BamToolsIndex::LoadReferenceSummary(BtiReferenceSummary& refSummary) {
-
-    // load number of blocks
-    int numBlocks;
-    LoadNumBlocks(numBlocks);
-
-    // store block summary data for this reference
-    refSummary.NumBlocks = numBlocks;
-    refSummary.FirstBlockFilePosition = Tell();
-
-    // skip reference's blocks
-    SkipBlocks(numBlocks);
-}
-
-void BamToolsIndex::OpenFile(const std::string& filename, const char* mode) {
-
-    // make sure any previous index file is closed
-    CloseFile();
-
-    // attempt to open file
-    Resources.IndexStream = fopen(filename.c_str(), mode);
-    if ( !IsFileOpen() ) {
-        const string message = string("could not open file: ") + filename;
-        throw BamException("BamToolsIndex::OpenFile", message);
-    }
-}
-
-void BamToolsIndex::ReadBlock(BtiBlock& block) {
-
-    // read in block data members
-    size_t elementsRead = 0;
-    elementsRead += fread(&block.MaxEndPosition, sizeof(block.MaxEndPosition), 1, Resources.IndexStream);
-    elementsRead += fread(&block.StartOffset,    sizeof(block.StartOffset),    1, Resources.IndexStream);
-    elementsRead += fread(&block.StartPosition,  sizeof(block.StartPosition),  1, Resources.IndexStream);
-
-    // swap endian-ness if necessary
-    if ( m_isBigEndian ) {
-        SwapEndian_32(block.MaxEndPosition);
-        SwapEndian_64(block.StartOffset);
-        SwapEndian_32(block.StartPosition);
-    }
-
-    if ( elementsRead != 3 )
-        throw BamException("BamToolsIndex::ReadBlock", "could not read block");
-}
-
-void BamToolsIndex::ReadBlocks(const BtiReferenceSummary& refSummary, BtiBlockVector& blocks) {
-
-    // prep blocks container
-    blocks.clear();
-    blocks.reserve(refSummary.NumBlocks);
-
-    // skip to first block entry
-    Seek( refSummary.FirstBlockFilePosition, SEEK_SET );
-
-    // read & store block entries
-    BtiBlock block;
-    for ( int i = 0; i < refSummary.NumBlocks; ++i ) {
-        ReadBlock(block);
-        blocks.push_back(block);
-    }
-}
-
-void BamToolsIndex::ReadReferenceEntry(BtiReferenceEntry& refEntry) {
-
-    // return false if refId not valid index in file summary structure
-    if ( refEntry.ID < 0 || refEntry.ID >= (int)m_indexFileSummary.size() )
-        throw BamException("BamToolsIndex::ReadReferenceEntry", "invalid reference requested");
-
-    // use index summary to assist reading the reference's BTI blocks
-    const BtiReferenceSummary& refSummary = m_indexFileSummary.at(refEntry.ID);
-    ReadBlocks(refSummary, refEntry.Blocks);
-}
-
-void BamToolsIndex::Seek(const int64_t& position, const int& origin) {
-    if ( fseek64(Resources.IndexStream, position, origin) != 0 )
-        throw BamException("BamToolsIndex::Seek", "could not seek in BAI file");
-}
-
-void BamToolsIndex::SkipBlocks(const int& numBlocks) {
-    Seek( numBlocks*BamToolsIndex::SIZEOF_BLOCK, SEEK_CUR );
-}
-
-int64_t BamToolsIndex::Tell(void) const {
-    return ftell64(Resources.IndexStream);
-}
-
-void BamToolsIndex::WriteBlock(const BtiBlock& block) {
-
-    // copy entry data
-    int32_t maxEndPosition = block.MaxEndPosition;
-    int64_t startOffset    = block.StartOffset;
-    int32_t startPosition  = block.StartPosition;
-
-    // swap endian-ness if necessary
-    if ( m_isBigEndian ) {
-        SwapEndian_32(maxEndPosition);
-        SwapEndian_64(startOffset);
-        SwapEndian_32(startPosition);
-    }
-
-    // write the reference index entry
-    size_t elementsWritten = 0;
-    elementsWritten += fwrite(&maxEndPosition, sizeof(maxEndPosition), 1, Resources.IndexStream);
-    elementsWritten += fwrite(&startOffset,    sizeof(startOffset),    1, Resources.IndexStream);
-    elementsWritten += fwrite(&startPosition,  sizeof(startPosition),  1, Resources.IndexStream);
-    if ( elementsWritten != 3 )
-        throw BamException("BamToolsIndex::WriteBlock", "could not write BTI block");
-}
-
-void BamToolsIndex::WriteBlocks(const BtiBlockVector& blocks) {
-    BtiBlockVector::const_iterator blockIter = blocks.begin();
-    BtiBlockVector::const_iterator blockEnd  = blocks.end();
-    for ( ; blockIter != blockEnd; ++blockIter )
-        WriteBlock(*blockIter);
-}
-
-void BamToolsIndex::WriteHeader(void) {
-
-    size_t elementsWritten = 0;
-
-    // write BTI index format 'magic number'
-    elementsWritten += fwrite(BamToolsIndex::BTI_MAGIC, 1, 4, Resources.IndexStream);
-
-    // write BTI index format version
-    int32_t currentVersion = (int32_t)m_outputVersion;
-    if ( m_isBigEndian ) SwapEndian_32(currentVersion);
-    elementsWritten += fwrite(&currentVersion, sizeof(currentVersion), 1, Resources.IndexStream);
-
-    // write block size
-    uint32_t blockSize = m_blockSize;
-    if ( m_isBigEndian ) SwapEndian_32(blockSize);
-    elementsWritten += fwrite(&blockSize, sizeof(blockSize), 1, Resources.IndexStream);
-
-    // write number of references
-    int32_t numReferences = m_indexFileSummary.size();
-    if ( m_isBigEndian ) SwapEndian_32(numReferences);
-    elementsWritten += fwrite(&numReferences, sizeof(numReferences), 1, Resources.IndexStream);
-
-    if ( elementsWritten != 7 )
-        throw BamException("BamToolsIndex::WriteHeader", "could not write BTI header");
-}
-
-void BamToolsIndex::WriteReferenceEntry(const BtiReferenceEntry& refEntry) {
-
-    // write number of blocks this reference
-    uint32_t numBlocks = refEntry.Blocks.size();
-    if ( m_isBigEndian ) SwapEndian_32(numBlocks);
-    const size_t elementsWritten = fwrite(&numBlocks, sizeof(numBlocks), 1, Resources.IndexStream);
-    if ( elementsWritten != 1 )
-        throw BamException("BamToolsIndex::WriteReferenceEntry", "could not write number of blocks");
-
-    // write actual block entries
-    WriteBlocks(refEntry.Blocks);
-}
diff --git a/src/api/internal/BamToolsIndex_p.h b/src/api/internal/BamToolsIndex_p.h

deleted file mode 100644 (file)

index 1e9ec18..0000000
--- a/src/api/internal/BamToolsIndex_p.h
+++ /dev/null
@@ -1,185 +0,0 @@
-// ***************************************************************************
-// BamToolsIndex.h (c) 2010 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides index operations for the BamTools index format (".bti")
-// ***************************************************************************
-
-#ifndef BAMTOOLS_INDEX_FORMAT_H
-#define BAMTOOLS_INDEX_FORMAT_H
-
-//  -------------
-//  W A R N I N G
-//  -------------
-//
-// This file is not part of the BamTools API.  It exists purely as an
-// implementation detail.  This header file may change from version to
-// version without notice, or even be removed.
-//
-// We mean it.
-
-#include "api/BamAux.h"
-#include "api/BamIndex.h"
-#include <map>
-#include <string>
-#include <vector>
-
-namespace BamTools {
-namespace Internal {
-
-// contains data for each 'block' in a BTI index
-struct BtiBlock {
-
-    // data members
-    int32_t MaxEndPosition;
-    int64_t StartOffset;
-    int32_t StartPosition;
-
-    // ctor
-    BtiBlock(const int32_t& maxEndPosition = 0,
-             const int64_t& startOffset    = 0,
-             const int32_t& startPosition  = 0)
-        : MaxEndPosition(maxEndPosition)
-        , StartOffset(startOffset)
-        , StartPosition(startPosition)
-    { }
-};
-
-// convenience typedef for describing a a list of BTI blocks on a reference
-typedef std::vector<BtiBlock> BtiBlockVector;
-
-// contains all fields necessary for building, loading, & writing
-// full BTI index data for a single reference
-struct BtiReferenceEntry {
-
-    // data members
-    int32_t ID;
-    BtiBlockVector Blocks;
-
-    // ctor
-    BtiReferenceEntry(const int& id = -1)
-        : ID(id)
-    { }
-};
-
-// provides (persistent) summary of BtiReferenceEntry's index data
-struct BtiReferenceSummary {
-
-    // data members
-    int NumBlocks;
-    uint64_t FirstBlockFilePosition;
-
-    // ctor
-    BtiReferenceSummary(void)
-        : NumBlocks(0)
-        , FirstBlockFilePosition(0)
-    { }
-};
-
-// convenience typedef for describing a full BTI index file summary
-typedef std::vector<BtiReferenceSummary> BtiFileSummary;
-
-class BamToolsIndex : public BamIndex {
-
-    // keep a list of any supported versions here
-    // (might be useful later to handle any 'legacy' versions if the format changes)
-    // listed for example like: BTI_1_0 = 1, BTI_1_1 = 2, BTI_1_2 = 3, BTI_2_0 = 4, and so on
-    //
-    // so a change introduced in BTI_1_2 may be handled from then on by:
-    //
-    // if ( indexVersion >= BTI_1_2 )
-    //   do something new
-    // else
-    //   do the old thing
-    enum Version { BTI_1_0 = 1
-                 , BTI_1_1
-                 , BTI_1_2
-                 , BTI_2_0
-                 };
-
-    // ctor & dtor
-    public:
-        BamToolsIndex(Internal::BamReaderPrivate* reader);
-        ~BamToolsIndex(void);
-
-    // BamIndex implementation
-    public:
-        // builds index from associated BAM file & writes out to index file
-        bool Create(void);
-        // returns whether reference has alignments or no
-        bool HasAlignments(const int& referenceID) const;
-        // attempts to use index data to jump to @region, returns success/fail
-        // a "successful" jump indicates no error, but not whether this region has data
-        //   * thus, the method sets a flag to indicate whether there are alignments
-        //     available after the jump position
-        bool Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion);
-        // loads existing data from file into memory
-        bool Load(const std::string& filename);
-        BamIndex::IndexType Type(void) const { return BamIndex::BAMTOOLS; }
-    public:
-        // returns format's file extension
-        static const std::string Extension(void);
-
-    // internal methods
-    private:
-
-        // index file ops
-        void CheckMagicNumber(void);
-        void CheckVersion(void);
-        void CloseFile(void);
-        bool IsFileOpen(void) const;
-        void OpenFile(const std::string& filename, const char* mode);
-        void Seek(const int64_t& position, const int& origin);
-        int64_t Tell(void) const;
-
-        // index-creation methods
-        void ClearReferenceEntry(BtiReferenceEntry& refEntry);
-        void WriteBlock(const BtiBlock& block);
-        void WriteBlocks(const BtiBlockVector& blocks);
-        void WriteHeader(void);
-        void WriteReferenceEntry(const BtiReferenceEntry& refEntry);
-
-        // random-access methods
-        void GetOffset(const BamRegion& region, int64_t& offset, bool* hasAlignmentsInRegion);
-        void ReadBlock(BtiBlock& block);
-        void ReadBlocks(const BtiReferenceSummary& refSummary, BtiBlockVector& blocks);
-        void ReadReferenceEntry(BtiReferenceEntry& refEntry);
-
-        // BTI summary data methods
-        void InitializeFileSummary(const int& numReferences);
-        void LoadFileSummary(void);
-        void LoadHeader(void);
-        void LoadNumBlocks(int& numBlocks);
-        void LoadNumReferences(int& numReferences);
-        void LoadReferenceSummary(BtiReferenceSummary& refSummary);
-        void SkipBlocks(const int& numBlocks);
-
-    // data members
-    private:
-        bool  m_isBigEndian;
-        BtiFileSummary m_indexFileSummary;
-        uint32_t m_blockSize;
-        int32_t m_inputVersion; // Version is serialized as int
-        Version m_outputVersion;
-
-        struct RaiiWrapper {
-            FILE* IndexStream;
-            RaiiWrapper(void);
-            ~RaiiWrapper(void);
-        };
-        RaiiWrapper Resources;
-
-    // static constants
-    private:
-        static const uint32_t DEFAULT_BLOCK_LENGTH;
-        static const std::string BTI_EXTENSION;
-        static const char* const BTI_MAGIC;
-        static const int SIZEOF_BLOCK;
-};
-
-} // namespace Internal
-} // namespace BamTools
-
-#endif // BAMTOOLS_INDEX_FORMAT_H
diff --git a/src/api/internal/BamWriter_p.cpp b/src/api/internal/BamWriter_p.cpp

deleted file mode 100644 (file)

index 1b1a3f2..0000000
--- a/src/api/internal/BamWriter_p.cpp
+++ /dev/null
@@ -1,462 +0,0 @@
-// ***************************************************************************
-// BamWriter_p.cpp (c) 2010 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides the basic functionality for producing BAM files
-// ***************************************************************************
-
-#include "api/BamAlignment.h"
-#include "api/BamConstants.h"
-#include "api/IBamIODevice.h"
-#include "api/internal/BamException_p.h"
-#include "api/internal/BamWriter_p.h"
-using namespace BamTools;
-using namespace BamTools::Internal;
-
-#include <cstdlib>
-#include <cstring>
-using namespace std;
-
-// ctor
-BamWriterPrivate::BamWriterPrivate(void)
-    : m_isBigEndian( BamTools::SystemIsBigEndian() )
-{ }
-
-// dtor
-BamWriterPrivate::~BamWriterPrivate(void) {
-    Close();
-}
-
-// calculates minimum bin for a BAM alignment interval [begin, end)
-uint32_t BamWriterPrivate::CalculateMinimumBin(const int begin, int end) const {
-    --end;
-    if ( (begin >> 14) == (end >> 14) ) return 4681 + (begin >> 14);
-    if ( (begin >> 17) == (end >> 17) ) return  585 + (begin >> 17);
-    if ( (begin >> 20) == (end >> 20) ) return   73 + (begin >> 20);
-    if ( (begin >> 23) == (end >> 23) ) return    9 + (begin >> 23);
-    if ( (begin >> 26) == (end >> 26) ) return    1 + (begin >> 26);
-    return 0;
-}
-
-// closes the alignment archive
-void BamWriterPrivate::Close(void) {
-
-    // skip if file not open
-    if ( !IsOpen() ) return;
-
-    // close output stream
-    try {
-        m_stream.Close();
-    } catch ( BamException& e ) {
-        m_errorString = e.what();
-    }
-}
-
-// creates a cigar string from the supplied alignment
-void BamWriterPrivate::CreatePackedCigar(const vector<CigarOp>& cigarOperations, string& packedCigar) {
-
-    // initialize
-    const size_t numCigarOperations = cigarOperations.size();
-    packedCigar.resize(numCigarOperations * Constants::BAM_SIZEOF_INT);
-
-    // pack the cigar data into the string
-    unsigned int* pPackedCigar = (unsigned int*)packedCigar.data();
-
-    // iterate over cigar operations
-    vector<CigarOp>::const_iterator coIter = cigarOperations.begin();
-    vector<CigarOp>::const_iterator coEnd  = cigarOperations.end();
-    for ( ; coIter != coEnd; ++coIter ) {
-
-        // store op in packedCigar
-        uint8_t cigarOp;
-        switch ( coIter->Type ) {
-            case (Constants::BAM_CIGAR_MATCH_CHAR)    : cigarOp = Constants::BAM_CIGAR_MATCH;    break;
-            case (Constants::BAM_CIGAR_INS_CHAR)      : cigarOp = Constants::BAM_CIGAR_INS;      break;
-            case (Constants::BAM_CIGAR_DEL_CHAR)      : cigarOp = Constants::BAM_CIGAR_DEL;      break;
-            case (Constants::BAM_CIGAR_REFSKIP_CHAR)  : cigarOp = Constants::BAM_CIGAR_REFSKIP;  break;
-            case (Constants::BAM_CIGAR_SOFTCLIP_CHAR) : cigarOp = Constants::BAM_CIGAR_SOFTCLIP; break;
-            case (Constants::BAM_CIGAR_HARDCLIP_CHAR) : cigarOp = Constants::BAM_CIGAR_HARDCLIP; break;
-            case (Constants::BAM_CIGAR_PAD_CHAR)      : cigarOp = Constants::BAM_CIGAR_PAD;      break;
-            case (Constants::BAM_CIGAR_SEQMATCH_CHAR) : cigarOp = Constants::BAM_CIGAR_SEQMATCH; break;
-            case (Constants::BAM_CIGAR_MISMATCH_CHAR) : cigarOp = Constants::BAM_CIGAR_MISMATCH; break;
-            default:
-                const string message = string("invalid CIGAR operation type") + coIter->Type;
-                throw BamException("BamWriter::CreatePackedCigar", message);
-        }
-
-        *pPackedCigar = coIter->Length << Constants::BAM_CIGAR_SHIFT | cigarOp;
-        pPackedCigar++;
-    }
-}
-
-// encodes the supplied query sequence into 4-bit notation
-void BamWriterPrivate::EncodeQuerySequence(const string& query, string& encodedQuery) {
-
-    // prepare the encoded query string
-    const size_t queryLength = query.size();
-    const size_t encodedQueryLength = static_cast<size_t>((queryLength+1)/2);
-    encodedQuery.resize(encodedQueryLength);
-    char* pEncodedQuery = (char*)encodedQuery.data();
-    const char* pQuery = (const char*)query.data();
-
-    // walk through original query sequence, encoding its bases
-    unsigned char nucleotideCode;
-    bool useHighWord = true;
-    while ( *pQuery ) {
-        switch ( *pQuery ) {
-            case (Constants::BAM_DNA_EQUAL) : nucleotideCode = Constants::BAM_BASECODE_EQUAL; break;
-            case (Constants::BAM_DNA_A)     : nucleotideCode = Constants::BAM_BASECODE_A;     break;
-            case (Constants::BAM_DNA_C)     : nucleotideCode = Constants::BAM_BASECODE_C;     break;
-            case (Constants::BAM_DNA_M)     : nucleotideCode = Constants::BAM_BASECODE_M;     break;
-            case (Constants::BAM_DNA_G)     : nucleotideCode = Constants::BAM_BASECODE_G;     break;
-            case (Constants::BAM_DNA_R)     : nucleotideCode = Constants::BAM_BASECODE_R;     break;
-            case (Constants::BAM_DNA_S)     : nucleotideCode = Constants::BAM_BASECODE_S;     break;
-            case (Constants::BAM_DNA_V)     : nucleotideCode = Constants::BAM_BASECODE_V;     break;
-            case (Constants::BAM_DNA_T)     : nucleotideCode = Constants::BAM_BASECODE_T;     break;
-            case (Constants::BAM_DNA_W)     : nucleotideCode = Constants::BAM_BASECODE_W;     break;
-            case (Constants::BAM_DNA_Y)     : nucleotideCode = Constants::BAM_BASECODE_Y;     break;
-            case (Constants::BAM_DNA_H)     : nucleotideCode = Constants::BAM_BASECODE_H;     break;
-            case (Constants::BAM_DNA_K)     : nucleotideCode = Constants::BAM_BASECODE_K;     break;
-            case (Constants::BAM_DNA_D)     : nucleotideCode = Constants::BAM_BASECODE_D;     break;
-            case (Constants::BAM_DNA_B)     : nucleotideCode = Constants::BAM_BASECODE_B;     break;
-            case (Constants::BAM_DNA_N)     : nucleotideCode = Constants::BAM_BASECODE_N;     break;
-            default:
-                const string message = string("invalid base: ") + *pQuery;
-                throw BamException("BamWriter::EncodeQuerySequence", message);
-        }
-
-        // pack the nucleotide code
-        if ( useHighWord ) {
-            *pEncodedQuery = nucleotideCode << 4;
-            useHighWord = false;
-        } else {
-            *pEncodedQuery |= nucleotideCode;
-            ++pEncodedQuery;
-            useHighWord = true;
-        }
-
-        // increment the query position
-        ++pQuery;
-    }
-}
-
-// returns a description of the last error that occurred
-std::string BamWriterPrivate::GetErrorString(void) const {
-    return m_errorString;
-}
-
-// returns whether BAM file is open for writing or not
-bool BamWriterPrivate::IsOpen(void) const {
-    return m_stream.IsOpen();
-}
-
-// opens the alignment archive
-bool BamWriterPrivate::Open(const string& filename,
-                            const string& samHeaderText,
-                            const RefVector& referenceSequences)
-{
-    try {
-
-        // open the BGZF file for writing
-        m_stream.Open(filename, IBamIODevice::WriteOnly);
-
-        // write BAM file 'metadata' components
-        WriteMagicNumber();
-        WriteSamHeaderText(samHeaderText);
-        WriteReferences(referenceSequences);
-
-        // return success
-        return true;
-
-    } catch ( BamException& e ) {
-        m_errorString = e.what();
-        return false;
-    }
-}
-
-// saves the alignment to the alignment archive
-bool BamWriterPrivate::SaveAlignment(const BamAlignment& al) {
-
-    try {
-
-        // if BamAlignment contains only the core data and a raw char data buffer
-        // (as a result of BamReader::GetNextAlignmentCore())
-        if ( al.SupportData.HasCoreOnly )
-            WriteCoreAlignment(al);
-
-        // otherwise, BamAlignment should contain character in the standard fields: Name, QueryBases, etc
-        // (resulting from BamReader::GetNextAlignment() *OR* being generated directly by client code)
-        else WriteAlignment(al);
-
-        // if we get here, everything OK
-        return true;
-
-    } catch ( BamException& e ) {
-        m_errorString = e.what();
-        return false;
-    }
-}
-
-void BamWriterPrivate::SetWriteCompressed(bool ok) {
-    // modifying compression is not allowed if BAM file is open
-    if ( !IsOpen() )
-        m_stream.SetWriteCompressed(ok);
-}
-
-void BamWriterPrivate::WriteAlignment(const BamAlignment& al) {
-
-    // calculate char lengths
-    const unsigned int nameLength         = al.Name.size() + 1;
-    const unsigned int numCigarOperations = al.CigarData.size();
-    const unsigned int queryLength        = al.QueryBases.size();
-    const unsigned int tagDataLength      = al.TagData.size();
-
-    // no way to tell if alignment's bin is already defined (there is no default, invalid value)
-    // so we'll go ahead calculate its bin ID before storing
-    const uint32_t alignmentBin = CalculateMinimumBin(al.Position, al.GetEndPosition());
-
-    // create our packed cigar string
-    string packedCigar;
-    CreatePackedCigar(al.CigarData, packedCigar);
-    const unsigned int packedCigarLength = packedCigar.size();
-
-    // encode the query
-    string encodedQuery;
-    EncodeQuerySequence(al.QueryBases, encodedQuery);
-    const unsigned int encodedQueryLength = encodedQuery.size();
-
-    // write the block size
-    const unsigned int dataBlockSize = nameLength +
-                                       packedCigarLength +
-                                       encodedQueryLength +
-                                       queryLength +
-                                       tagDataLength;
-    unsigned int blockSize = Constants::BAM_CORE_SIZE + dataBlockSize;
-    if ( m_isBigEndian ) BamTools::SwapEndian_32(blockSize);
-    m_stream.Write((char*)&blockSize, Constants::BAM_SIZEOF_INT);
-
-    // assign the BAM core data
-    uint32_t buffer[Constants::BAM_CORE_BUFFER_SIZE];
-    buffer[0] = al.RefID;
-    buffer[1] = al.Position;
-    buffer[2] = (alignmentBin << 16) | (al.MapQuality << 8) | nameLength;
-    buffer[3] = (al.AlignmentFlag << 16) | numCigarOperations;
-    buffer[4] = queryLength;
-    buffer[5] = al.MateRefID;
-    buffer[6] = al.MatePosition;
-    buffer[7] = al.InsertSize;
-
-    // swap BAM core endian-ness, if necessary
-    if ( m_isBigEndian ) {
-        for ( int i = 0; i < 8; ++i )
-            BamTools::SwapEndian_32(buffer[i]);
-    }
-
-    // write the BAM core
-    m_stream.Write((char*)&buffer, Constants::BAM_CORE_SIZE);
-
-    // write the query name
-    m_stream.Write(al.Name.c_str(), nameLength);
-
-    // write the packed cigar
-    if ( m_isBigEndian ) {
-        char* cigarData = new char[packedCigarLength]();
-        memcpy(cigarData, packedCigar.data(), packedCigarLength);
-        if ( m_isBigEndian ) {
-            for ( size_t i = 0; i < packedCigarLength; ++i )
-                BamTools::SwapEndian_32p(&cigarData[i]);
-        }
-        m_stream.Write(cigarData, packedCigarLength);
-        delete[] cigarData; // TODO: cleanup on Write exception thrown?
-    }
-    else
-        m_stream.Write(packedCigar.data(), packedCigarLength);
-
-    // write the encoded query sequence
-    m_stream.Write(encodedQuery.data(), encodedQueryLength);
-
-    // write the base qualities
-    char* pBaseQualities = (char*)al.Qualities.data();
-    for ( size_t i = 0; i < queryLength; ++i )
-        pBaseQualities[i] -= 33; // FASTQ conversion
-    m_stream.Write(pBaseQualities, queryLength);
-
-    // write the read group tag
-    if ( m_isBigEndian ) {
-
-        char* tagData = new char[tagDataLength]();
-        memcpy(tagData, al.TagData.data(), tagDataLength);
-
-        size_t i = 0;
-        while ( i < tagDataLength ) {
-
-            i += Constants::BAM_TAG_TAGSIZE;  // skip tag chars (e.g. "RG", "NM", etc.)
-            const char type = tagData[i];     // get tag type at position i
-            ++i;
-
-            switch ( type ) {
-
-                case(Constants::BAM_TAG_TYPE_ASCII) :
-                case(Constants::BAM_TAG_TYPE_INT8)  :
-                case(Constants::BAM_TAG_TYPE_UINT8) :
-                    ++i;
-                    break;
-
-                case(Constants::BAM_TAG_TYPE_INT16)  :
-                case(Constants::BAM_TAG_TYPE_UINT16) :
-                    BamTools::SwapEndian_16p(&tagData[i]);
-                    i += sizeof(uint16_t);
-                    break;
-
-                case(Constants::BAM_TAG_TYPE_FLOAT)  :
-                case(Constants::BAM_TAG_TYPE_INT32)  :
-                case(Constants::BAM_TAG_TYPE_UINT32) :
-                    BamTools::SwapEndian_32p(&tagData[i]);
-                    i += sizeof(uint32_t);
-                    break;
-
-                case(Constants::BAM_TAG_TYPE_HEX) :
-                case(Constants::BAM_TAG_TYPE_STRING) :
-                    // no endian swapping necessary for hex-string/string data
-                    while ( tagData[i] )
-                        ++i;
-                    // increment one more for null terminator
-                    ++i;
-                    break;
-
-                case(Constants::BAM_TAG_TYPE_ARRAY) :
-
-                {
-                    // read array type
-                    const char arrayType = tagData[i];
-                    ++i;
-
-                    // swap endian-ness of number of elements in place, then retrieve for loop
-                    BamTools::SwapEndian_32p(&tagData[i]);
-                    int32_t numElements;
-                    memcpy(&numElements, &tagData[i], sizeof(uint32_t));
-                    i += sizeof(uint32_t);
-
-                    // swap endian-ness of array elements
-                    for ( int j = 0; j < numElements; ++j ) {
-                        switch (arrayType) {
-                            case (Constants::BAM_TAG_TYPE_INT8)  :
-                            case (Constants::BAM_TAG_TYPE_UINT8) :
-                                // no endian-swapping necessary
-                                ++i;
-                                break;
-                            case (Constants::BAM_TAG_TYPE_INT16)  :
-                            case (Constants::BAM_TAG_TYPE_UINT16) :
-                                BamTools::SwapEndian_16p(&tagData[i]);
-                                i += sizeof(uint16_t);
-                                break;
-                            case (Constants::BAM_TAG_TYPE_FLOAT)  :
-                            case (Constants::BAM_TAG_TYPE_INT32)  :
-                            case (Constants::BAM_TAG_TYPE_UINT32) :
-                                BamTools::SwapEndian_32p(&tagData[i]);
-                                i += sizeof(uint32_t);
-                                break;
-                            default:
-                                delete[] tagData;
-                                const string message = string("invalid binary array type: ") + arrayType;
-                                throw BamException("BamWriter::SaveAlignment", message);
-                        }
-                    }
-
-                    break;
-                }
-
-                default :
-                    delete[] tagData;
-                    const string message = string("invalid tag type: ") + type;
-                    throw BamException("BamWriter::SaveAlignment", message);
-            }
-        }
-
-        m_stream.Write(tagData, tagDataLength);
-        delete[] tagData; // TODO: cleanup on Write exception thrown?
-    }
-    else
-        m_stream.Write(al.TagData.data(), tagDataLength);
-}
-
-void BamWriterPrivate::WriteCoreAlignment(const BamAlignment& al) {
-
-    // write the block size
-    unsigned int blockSize = al.SupportData.BlockLength;
-    if ( m_isBigEndian ) BamTools::SwapEndian_32(blockSize);
-    m_stream.Write((char*)&blockSize, Constants::BAM_SIZEOF_INT);
-
-    // re-calculate bin (in case BamAlignment's position has been previously modified)
-    const uint32_t alignmentBin = CalculateMinimumBin(al.Position, al.GetEndPosition());
-
-    // assign the BAM core data
-    uint32_t buffer[Constants::BAM_CORE_BUFFER_SIZE];
-    buffer[0] = al.RefID;
-    buffer[1] = al.Position;
-    buffer[2] = (alignmentBin << 16) | (al.MapQuality << 8) | al.SupportData.QueryNameLength;
-    buffer[3] = (al.AlignmentFlag << 16) | al.SupportData.NumCigarOperations;
-    buffer[4] = al.SupportData.QuerySequenceLength;
-    buffer[5] = al.MateRefID;
-    buffer[6] = al.MatePosition;
-    buffer[7] = al.InsertSize;
-
-    // swap BAM core endian-ness, if necessary
-    if ( m_isBigEndian ) {
-        for ( int i = 0; i < 8; ++i )
-            BamTools::SwapEndian_32(buffer[i]);
-    }
-
-    // write the BAM core
-    m_stream.Write((char*)&buffer, Constants::BAM_CORE_SIZE);
-
-    // write the raw char data
-    m_stream.Write((char*)al.SupportData.AllCharData.data(),
-                   al.SupportData.BlockLength-Constants::BAM_CORE_SIZE);
-}
-
-void BamWriterPrivate::WriteMagicNumber(void) {
-    // write BAM file 'magic number'
-    m_stream.Write(Constants::BAM_HEADER_MAGIC, Constants::BAM_HEADER_MAGIC_LENGTH);
-}
-
-void BamWriterPrivate::WriteReferences(const BamTools::RefVector& referenceSequences) {
-
-    // write the number of reference sequences
-    uint32_t numReferenceSequences = referenceSequences.size();
-    if ( m_isBigEndian ) BamTools::SwapEndian_32(numReferenceSequences);
-    m_stream.Write((char*)&numReferenceSequences, Constants::BAM_SIZEOF_INT);
-
-    // foreach reference sequence
-    RefVector::const_iterator rsIter = referenceSequences.begin();
-    RefVector::const_iterator rsEnd  = referenceSequences.end();
-    for ( ; rsIter != rsEnd; ++rsIter ) {
-
-        // write the reference sequence name length
-        uint32_t referenceSequenceNameLen = rsIter->RefName.size() + 1;
-        if ( m_isBigEndian ) BamTools::SwapEndian_32(referenceSequenceNameLen);
-        m_stream.Write((char*)&referenceSequenceNameLen, Constants::BAM_SIZEOF_INT);
-
-        // write the reference sequence name
-        m_stream.Write(rsIter->RefName.c_str(), referenceSequenceNameLen);
-
-        // write the reference sequence length
-        int32_t referenceLength = rsIter->RefLength;
-        if ( m_isBigEndian ) BamTools::SwapEndian_32(referenceLength);
-        m_stream.Write((char*)&referenceLength, Constants::BAM_SIZEOF_INT);
-    }
-}
-
-void BamWriterPrivate::WriteSamHeaderText(const std::string& samHeaderText) {
-
-    // write the SAM header  text length
-    uint32_t samHeaderLen = samHeaderText.size();
-    if ( m_isBigEndian ) BamTools::SwapEndian_32(samHeaderLen);
-    m_stream.Write((char*)&samHeaderLen, Constants::BAM_SIZEOF_INT);
-
-    // write the SAM header text
-    if ( samHeaderLen > 0 )
-        m_stream.Write(samHeaderText.data(), samHeaderLen);
-}
diff --git a/src/api/internal/BamWriter_p.h b/src/api/internal/BamWriter_p.h

deleted file mode 100644 (file)

index cf10941..0000000
--- a/src/api/internal/BamWriter_p.h
+++ /dev/null
@@ -1,73 +0,0 @@
-// ***************************************************************************
-// BamWriter_p.h (c) 2010 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides the basic functionality for producing BAM files
-// ***************************************************************************
-
-#ifndef BAMWRITER_P_H
-#define BAMWRITER_P_H
-
-//  -------------
-//  W A R N I N G
-//  -------------
-//
-// This file is not part of the BamTools API.  It exists purely as an
-// implementation detail.  This header file may change from version to
-// version without notice, or even be removed.
-//
-// We mean it.
-
-#include "api/BamAux.h"
-#include "api/internal/BgzfStream_p.h"
-#include <string>
-#include <vector>
-
-namespace BamTools {
-
-class BamAlignment;
-
-namespace Internal {
-
-class BamWriterPrivate {
-
-    // ctor & dtor
-    public:
-        BamWriterPrivate(void);
-        ~BamWriterPrivate(void);
-
-    // interface methods
-    public:
-        void Close(void);
-        std::string GetErrorString(void) const;
-        bool IsOpen(void) const;
-        bool Open(const std::string& filename,
-                  const std::string& samHeaderText,
-                  const BamTools::RefVector& referenceSequences);
-        bool SaveAlignment(const BamAlignment& al);
-        void SetWriteCompressed(bool ok);
-
-    // 'internal' methods
-    public:
-        uint32_t CalculateMinimumBin(const int begin, int end) const;
-        void CreatePackedCigar(const std::vector<BamTools::CigarOp>& cigarOperations, std::string& packedCigar);
-        void EncodeQuerySequence(const std::string& query, std::string& encodedQuery);
-        void WriteAlignment(const BamAlignment& al);
-        void WriteCoreAlignment(const BamAlignment& al);
-        void WriteMagicNumber(void);
-        void WriteReferences(const BamTools::RefVector& referenceSequences);
-        void WriteSamHeaderText(const std::string& samHeaderText);
-
-    // data members
-    private:
-        BgzfStream m_stream;
-        bool m_isBigEndian;
-        std::string m_errorString;
-};
-
-} // namespace Internal
-} // namespace BamTools
-
-#endif // BAMWRITER_P_H
diff --git a/src/api/internal/BgzfStream_p.cpp b/src/api/internal/BgzfStream_p.cpp

deleted file mode 100644 (file)

index 5891067..0000000
--- a/src/api/internal/BgzfStream_p.cpp
+++ /dev/null
@@ -1,460 +0,0 @@
-// ***************************************************************************
-// BgzfStream_p.cpp (c) 2011 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 11 October 2011(DB)
-// ---------------------------------------------------------------------------
-// Based on BGZF routines developed at the Broad Institute.
-// Provides the basic functionality for reading & writing BGZF files
-// Replaces the old BGZF.* files to avoid clashing with other toolkits
-// ***************************************************************************
-
-#include "api/BamAux.h"
-#include "api/BamConstants.h"
-#include "api/internal/BamDeviceFactory_p.h"
-#include "api/internal/BamException_p.h"
-#include "api/internal/BgzfStream_p.h"
-using namespace BamTools;
-using namespace BamTools::Internal;
-
-#include "zlib.h"
-
-#include <cstring>
-#include <algorithm>
-#include <iostream>
-#include <sstream>
-using namespace std;
-
-// ----------------------------
-// RaiiWrapper implementation
-// ----------------------------
-
-BgzfStream::RaiiWrapper::RaiiWrapper(void) {
-    CompressedBlock   = new char[Constants::BGZF_MAX_BLOCK_SIZE];
-    UncompressedBlock = new char[Constants::BGZF_DEFAULT_BLOCK_SIZE];
-}
-
-BgzfStream::RaiiWrapper::~RaiiWrapper(void) {
-
-    // clean up buffers
-    delete[] CompressedBlock;
-    delete[] UncompressedBlock;
-    CompressedBlock = 0;
-    UncompressedBlock = 0;
-}
-
-// ---------------------------
-// BgzfStream implementation
-// ---------------------------
-
-// constructor
-BgzfStream::BgzfStream(void)
-  : m_blockLength(0)
-  , m_blockOffset(0)
-  , m_blockAddress(0)
-  , m_isWriteCompressed(true)
-  , m_device(0)
-{ }
-
-// destructor
-BgzfStream::~BgzfStream(void) {
-    Close();
-}
-
-// checks BGZF block header
-bool BgzfStream::CheckBlockHeader(char* header) {
-    return (header[0] == Constants::GZIP_ID1 &&
-            header[1] == Constants::GZIP_ID2 &&
-            header[2] == Z_DEFLATED &&
-            (header[3] & Constants::FLG_FEXTRA) != 0 &&
-            BamTools::UnpackUnsignedShort(&header[10]) == Constants::BGZF_XLEN &&
-            header[12] == Constants::BGZF_ID1 &&
-            header[13] == Constants::BGZF_ID2 &&
-            BamTools::UnpackUnsignedShort(&header[14]) == Constants::BGZF_LEN );
-}
-
-// closes BGZF file
-void BgzfStream::Close(void) {
-
-    // skip if no device open
-    if ( m_device == 0 ) return;
-
-    // if writing to file, flush the current BGZF block,
-    // then write an empty block (as EOF marker)
-    if ( m_device->IsOpen() && (m_device->Mode() == IBamIODevice::WriteOnly) ) {
-        FlushBlock();
-        const size_t blockLength = DeflateBlock();
-        m_device->Write(Resources.CompressedBlock, blockLength);
-    }
-
-    // close device
-    m_device->Close();
-    delete m_device;
-    m_device = 0;
-
-    // reset state
-    m_blockLength = 0;
-    m_blockOffset = 0;
-    m_blockAddress = 0;
-    m_isWriteCompressed = true;
-}
-
-// compresses the current block
-size_t BgzfStream::DeflateBlock(void) {
-
-    // initialize the gzip header
-    char* buffer = Resources.CompressedBlock;
-    memset(buffer, 0, 18);
-    buffer[0]  = Constants::GZIP_ID1;
-    buffer[1]  = Constants::GZIP_ID2;
-    buffer[2]  = Constants::CM_DEFLATE;
-    buffer[3]  = Constants::FLG_FEXTRA;
-    buffer[9]  = Constants::OS_UNKNOWN;
-    buffer[10] = Constants::BGZF_XLEN;
-    buffer[12] = Constants::BGZF_ID1;
-    buffer[13] = Constants::BGZF_ID2;
-    buffer[14] = Constants::BGZF_LEN;
-
-    // set compression level
-    const int compressionLevel = ( m_isWriteCompressed ? Z_DEFAULT_COMPRESSION : 0 );
-
-    // loop to retry for blocks that do not compress enough
-    int inputLength = m_blockOffset;
-    size_t compressedLength = 0;
-    const unsigned int bufferSize = Constants::BGZF_MAX_BLOCK_SIZE;
-
-    while ( true ) {
-
-        // initialize zstream values
-        z_stream zs;
-        zs.zalloc    = NULL;
-        zs.zfree     = NULL;
-        zs.next_in   = (Bytef*)Resources.UncompressedBlock;
-        zs.avail_in  = inputLength;
-        zs.next_out  = (Bytef*)&buffer[Constants::BGZF_BLOCK_HEADER_LENGTH];
-        zs.avail_out = bufferSize -
-                       Constants::BGZF_BLOCK_HEADER_LENGTH -
-                       Constants::BGZF_BLOCK_FOOTER_LENGTH;
-
-        // initialize the zlib compression algorithm
-        int status = deflateInit2(&zs,
-                                  compressionLevel,
-                                  Z_DEFLATED,
-                                  Constants::GZIP_WINDOW_BITS,
-                                  Constants::Z_DEFAULT_MEM_LEVEL,
-                                  Z_DEFAULT_STRATEGY);
-        if ( status != Z_OK )
-            throw BamException("BgzfStream::DeflateBlock", "zlib deflateInit2 failed");
-
-        // compress the data
-        status = deflate(&zs, Z_FINISH);
-
-        // if not at stream end
-        if ( status != Z_STREAM_END ) {
-
-            deflateEnd(&zs);
-
-            // there was not enough space available in buffer
-            // try to reduce the input length & re-start loop
-            if ( status == Z_OK ) {
-                inputLength -= 1024;
-                if ( inputLength < 0 )
-                    throw BamException("BgzfStream::DeflateBlock", "input reduction failed");
-                continue;
-            }
-
-            throw BamException("BgzfStream::DeflateBlock", "zlib deflate failed");
-        }
-
-        // finalize the compression routine
-        status = deflateEnd(&zs);
-        if ( status != Z_OK )
-            throw BamException("BgzfStream::DeflateBlock", "zlib deflateEnd failed");
-
-        // update compressedLength
-        compressedLength = zs.total_out +
-                           Constants::BGZF_BLOCK_HEADER_LENGTH +
-                           Constants::BGZF_BLOCK_FOOTER_LENGTH;
-        if ( compressedLength > Constants::BGZF_MAX_BLOCK_SIZE )
-            throw BamException("BgzfStream::DeflateBlock", "deflate overflow");
-
-        // quit while loop
-        break;
-    }
-
-    // store the compressed length
-    BamTools::PackUnsignedShort(&buffer[16], static_cast<uint16_t>(compressedLength - 1));
-
-    // store the CRC32 checksum
-    uint32_t crc = crc32(0, NULL, 0);
-    crc = crc32(crc, (Bytef*)Resources.UncompressedBlock, inputLength);
-    BamTools::PackUnsignedInt(&buffer[compressedLength - 8], crc);
-    BamTools::PackUnsignedInt(&buffer[compressedLength - 4], inputLength);
-
-    // ensure that we have less than a block of data left
-    int remaining = m_blockOffset - inputLength;
-    if ( remaining > 0 ) {
-        if ( remaining > inputLength )
-            throw BamException("BgzfStream::DeflateBlock", "after deflate, remainder too large");
-        memcpy(Resources.UncompressedBlock, Resources.UncompressedBlock + inputLength, remaining);
-    }
-
-    // update block data
-    m_blockOffset = remaining;
-
-    // return result
-    return compressedLength;
-}
-
-// flushes the data in the BGZF block
-void BgzfStream::FlushBlock(void) {
-
-    BT_ASSERT_X( m_device, "BgzfStream::FlushBlock() - attempting to flush to null device" );
-
-    // flush all of the remaining blocks
-    while ( m_blockOffset > 0 ) {
-
-        // compress the data block
-        const size_t blockLength = DeflateBlock();
-
-        // flush the data to our output device
-        const size_t numBytesWritten = m_device->Write(Resources.CompressedBlock, blockLength);
-        if ( numBytesWritten != blockLength ) {
-            stringstream s("");
-            s << "expected to write " << blockLength
-              << " bytes during flushing, but wrote " << numBytesWritten;
-            throw BamException("BgzfStream::FlushBlock", s.str());
-        }
-
-        // update block data
-        m_blockAddress += blockLength;
-    }
-}
-
-// decompresses the current block
-size_t BgzfStream::InflateBlock(const size_t& blockLength) {
-
-    // setup zlib stream object
-    z_stream zs;
-    zs.zalloc    = NULL;
-    zs.zfree     = NULL;
-    zs.next_in   = (Bytef*)Resources.CompressedBlock + 18;
-    zs.avail_in  = blockLength - 16;
-    zs.next_out  = (Bytef*)Resources.UncompressedBlock;
-    zs.avail_out = Constants::BGZF_DEFAULT_BLOCK_SIZE;
-
-    // initialize
-    int status = inflateInit2(&zs, Constants::GZIP_WINDOW_BITS);
-    if ( status != Z_OK )
-        throw BamException("BgzfStream::InflateBlock", "zlib inflateInit failed");
-
-    // decompress
-    status = inflate(&zs, Z_FINISH);
-    if ( status != Z_STREAM_END ) {
-        inflateEnd(&zs);
-        throw BamException("BgzfStream::InflateBlock", "zlib inflate failed");
-    }
-
-    // finalize
-    status = inflateEnd(&zs);
-    if ( status != Z_OK ) {
-        inflateEnd(&zs);
-        throw BamException("BgzfStream::InflateBlock", "zlib inflateEnd failed");
-    }
-
-    // return result
-    return zs.total_out;
-}
-
-bool BgzfStream::IsOpen(void) const {
-    if ( m_device == 0 )
-        return false;
-    return m_device->IsOpen();
-}
-
-void BgzfStream::Open(const string& filename, const IBamIODevice::OpenMode mode) {
-
-    // close current device if necessary
-    Close();
-    BT_ASSERT_X( (m_device == 0), "BgzfStream::Open() - unable to properly close previous IO device" );
-
-    // retrieve new IO device depending on filename
-    m_device = BamDeviceFactory::CreateDevice(filename);
-    BT_ASSERT_X( m_device, "BgzfStream::Open() - unable to create IO device from filename" );
-
-    // if device fails to open
-    if ( !m_device->Open(mode) ) {
-        const string deviceError = m_device->GetErrorString();
-        const string message = string("could not open BGZF stream: \n\t") + deviceError;
-        throw BamException("BgzfStream::Open", message);
-    }
-}
-
-// reads BGZF data into a byte buffer
-size_t BgzfStream::Read(char* data, const size_t dataLength) {
-
-    if ( dataLength == 0 )
-        return 0;
-
-    // if stream not open for reading
-    BT_ASSERT_X( m_device, "BgzfStream::Read() - trying to read from null device");
-    if ( !m_device->IsOpen() || (m_device->Mode() != IBamIODevice::ReadOnly) )
-        return 0;
-
-    // read blocks as needed until desired data length is retrieved
-    char* output = data;
-    size_t numBytesRead = 0;
-    while ( numBytesRead < dataLength ) {
-
-        // determine bytes available in current block
-        int bytesAvailable = m_blockLength - m_blockOffset;
-
-        // read (and decompress) next block if needed
-        if ( bytesAvailable <= 0 ) {
-            ReadBlock();
-            bytesAvailable = m_blockLength - m_blockOffset;
-            if ( bytesAvailable <= 0 )
-                break;
-        }
-
-        // copy data from uncompressed source buffer into data destination buffer
-        const size_t copyLength = min( (dataLength-numBytesRead), (size_t)bytesAvailable );
-        memcpy(output, Resources.UncompressedBlock + m_blockOffset, copyLength);
-
-        // update counters
-        m_blockOffset += copyLength;
-        output        += copyLength;
-        numBytesRead  += copyLength;
-    }
-
-    // update block data
-    if ( m_blockOffset == m_blockLength ) {
-        m_blockAddress = m_device->Tell();
-        m_blockOffset  = 0;
-        m_blockLength  = 0;
-
-    }
-
-    // return actual number of bytes read
-    return numBytesRead;
-}
-
-// reads a BGZF block
-void BgzfStream::ReadBlock(void) {
-
-    BT_ASSERT_X( m_device, "BgzfStream::ReadBlock() - trying to read from null IO device");
-
-    // store block's starting address
-    int64_t blockAddress = m_device->Tell();
-
-    // read block header from file
-    char header[Constants::BGZF_BLOCK_HEADER_LENGTH];
-    size_t numBytesRead = m_device->Read(header, Constants::BGZF_BLOCK_HEADER_LENGTH);
-
-    // if block header empty
-    if ( numBytesRead == 0 ) {
-        m_blockLength = 0;
-        return;
-    }
-
-    // if block header invalid size
-    if ( numBytesRead != Constants::BGZF_BLOCK_HEADER_LENGTH )
-        throw BamException("BgzfStream::ReadBlock", "invalid block header size");
-
-    // validate block header contents
-    if ( !BgzfStream::CheckBlockHeader(header) )
-        throw BamException("BgzfStream::ReadBlock", "invalid block header contents");
-
-    // copy header contents to compressed buffer
-    const size_t blockLength = BamTools::UnpackUnsignedShort(&header[16]) + 1;
-    memcpy(Resources.CompressedBlock, header, Constants::BGZF_BLOCK_HEADER_LENGTH);
-
-    // read remainder of block
-    const size_t remaining = blockLength - Constants::BGZF_BLOCK_HEADER_LENGTH;
-    numBytesRead = m_device->Read(&Resources.CompressedBlock[Constants::BGZF_BLOCK_HEADER_LENGTH], remaining);
-    if ( numBytesRead != remaining )
-        throw BamException("BgzfStream::ReadBlock", "could not read data from block");
-
-    // decompress block data
-    numBytesRead = InflateBlock(blockLength);
-
-    // update block data
-    if ( m_blockLength != 0 )
-        m_blockOffset = 0;
-    m_blockAddress = blockAddress;
-    m_blockLength  = numBytesRead;
-}
-
-// seek to position in BGZF file
-void BgzfStream::Seek(const int64_t& position) {
-
-    BT_ASSERT_X( m_device, "BgzfStream::Seek() - trying to seek on null IO device");
-
-    // skip if device is not open
-    if ( !IsOpen() ) return;
-
-    // determine adjusted offset & address
-    int     blockOffset  = (position & 0xFFFF);
-    int64_t blockAddress = (position >> 16) & 0xFFFFFFFFFFFFLL;
-
-    // attempt seek in file
-    if ( m_device->IsRandomAccess() && m_device->Seek(blockAddress) ) {
-
-        // update block data & return success
-        m_blockLength  = 0;
-        m_blockAddress = blockAddress;
-        m_blockOffset  = blockOffset;
-    }
-    else {
-        stringstream s("");
-        s << "unable to seek to position: " << position;
-        throw BamException("BgzfStream::Seek", s.str());
-    }
-}
-
-void BgzfStream::SetWriteCompressed(bool ok) {
-    m_isWriteCompressed = ok;
-}
-
-// get file position in BGZF file
-int64_t BgzfStream::Tell(void) const {
-    if ( !IsOpen() )
-        return 0;
-    return ( (m_blockAddress << 16) | (m_blockOffset & 0xFFFF) );
-}
-
-// writes the supplied data into the BGZF buffer
-size_t BgzfStream::Write(const char* data, const size_t dataLength) {
-
-    BT_ASSERT_X( m_device, "BgzfStream::Write() - trying to write to null IO device");
-    BT_ASSERT_X( (m_device->Mode() == IBamIODevice::WriteOnly),
-                 "BgzfStream::Write() - trying to write to non-writable IO device");
-
-    // skip if file not open for writing
-    if ( !IsOpen() )
-        return 0;
-
-    // write blocks as needed til all data is written
-    size_t numBytesWritten = 0;
-    const char* input = data;
-    const size_t blockLength = Constants::BGZF_DEFAULT_BLOCK_SIZE;
-    while ( numBytesWritten < dataLength ) {
-
-        // copy data contents to uncompressed output buffer
-        unsigned int copyLength = min(blockLength - m_blockOffset, dataLength - numBytesWritten);
-        char* buffer = Resources.UncompressedBlock;
-        memcpy(buffer + m_blockOffset, input, copyLength);
-
-        // update counter
-        m_blockOffset   += copyLength;
-        input           += copyLength;
-        numBytesWritten += copyLength;
-
-        // flush (& compress) output buffer when full
-        if ( m_blockOffset == blockLength )
-            FlushBlock();
-    }
-
-    // return actual number of bytes written
-    return numBytesWritten;
-}
diff --git a/src/api/internal/BgzfStream_p.h b/src/api/internal/BgzfStream_p.h

deleted file mode 100644 (file)

index 88d7472..0000000
--- a/src/api/internal/BgzfStream_p.h
+++ /dev/null
@@ -1,97 +0,0 @@
-// ***************************************************************************
-// BgzfStream_p.h (c) 2011 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011(DB)
-// ---------------------------------------------------------------------------
-// Based on BGZF routines developed at the Broad Institute.
-// Provides the basic functionality for reading & writing BGZF files
-// Replaces the old BGZF.* files to avoid clashing with other toolkits
-// ***************************************************************************
-
-#ifndef BGZFSTREAM_P_H
-#define BGZFSTREAM_P_H
-
-//  -------------
-//  W A R N I N G
-//  -------------
-//
-// This file is not part of the BamTools API.  It exists purely as an
-// implementation detail. This header file may change from version to version
-// without notice, or even be removed.
-//
-// We mean it.
-
-#include "api/api_global.h"
-#include "api/IBamIODevice.h"
-#include <string>
-
-namespace BamTools {
-namespace Internal {
-
-class BgzfStream {
-
-    // constructor & destructor
-    public:
-        BgzfStream(void);
-        ~BgzfStream(void);
-
-    // main interface methods
-    public:
-        // closes BGZF file
-        void Close(void);
-        // returns true if BgzfStream open for IO
-        bool IsOpen(void) const;
-        // opens the BGZF file
-        void Open(const std::string& filename, const IBamIODevice::OpenMode mode);
-        // reads BGZF data into a byte buffer
-        size_t Read(char* data, const size_t dataLength);
-        // seek to position in BGZF file
-        void Seek(const int64_t& position);
-        // sets IO device (closes previous, if any, but does not attempt to open)
-        void SetIODevice(IBamIODevice* device);
-        // enable/disable compressed output
-        void SetWriteCompressed(bool ok);
-        // get file position in BGZF file
-        int64_t Tell(void) const;
-        // writes the supplied data into the BGZF buffer
-        size_t Write(const char* data, const size_t dataLength);
-
-    // internal methods
-    private:
-        // compresses the current block
-        size_t DeflateBlock(void);
-        // flushes the data in the BGZF block
-        void FlushBlock(void);
-        // de-compresses the current block
-        size_t InflateBlock(const size_t& blockLength);
-        // reads a BGZF block
-        void ReadBlock(void);
-
-    // static 'utility' methods
-    public:
-        // checks BGZF block header
-        static bool CheckBlockHeader(char* header);
-
-    // data members
-    public:
-        unsigned int m_blockLength;
-        unsigned int m_blockOffset;
-        uint64_t     m_blockAddress;
-
-        bool m_isWriteCompressed;
-        IBamIODevice* m_device;
-
-        struct RaiiWrapper {
-            RaiiWrapper(void);
-            ~RaiiWrapper(void);
-            char* UncompressedBlock;
-            char* CompressedBlock;
-        };
-        RaiiWrapper Resources;
-};
-
-} // namespace Internal
-} // namespace BamTools
-
-#endif // BGZFSTREAM_P_H
diff --git a/src/api/internal/CMakeLists.txt b/src/api/internal/CMakeLists.txt

new file mode 100644 (file)

index 0000000..1e7b8dd
--- /dev/null
+++ b/src/api/internal/CMakeLists.txt
@@ -0,0 +1,25 @@
+# ==========================
+# BamTools CMakeLists.txt
+# (c) 2011 Derek Barnett
+#
+# src/api/internal
+# ==========================
+
+set ( InternalDir "internal" )
+
+add_subdirectory ( bam )
+add_subdirectory ( index )
+add_subdirectory ( io )
+add_subdirectory ( sam )
+add_subdirectory ( utils )
+
+set ( InternalSources
+        ${InternalBamSources}
+        ${InternalIndexSources}
+        ${InternalIOSources}
+        ${InternalSamSources}
+        ${InternalUtilsSources}
+
+        PARENT_SCOPE # <-- leave this last
+    )
+
diff --git a/src/api/internal/ILocalIODevice_p.cpp b/src/api/internal/ILocalIODevice_p.cpp

deleted file mode 100644 (file)

index 8730a91..0000000
--- a/src/api/internal/ILocalIODevice_p.cpp
+++ /dev/null
@@ -1,56 +0,0 @@
-// ***************************************************************************
-// ILocalIODevice_p.cpp (c) 2011 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides shared behavior for files & pipes
-// ***************************************************************************
-
-#include "api/internal/ILocalIODevice_p.h"
-using namespace BamTools;
-using namespace BamTools::Internal;
-
-#include <cstdio>
-using namespace std;
-
-ILocalIODevice::ILocalIODevice(void)
-    : IBamIODevice()
-    , m_stream(0)
-{ }
-
-ILocalIODevice::~ILocalIODevice(void) {
-    Close();
-}
-
-void ILocalIODevice::Close(void) {
-
-    // skip if not open
-    if ( !IsOpen() )
-        return;
-
-    // flush & close FILE*
-    fflush(m_stream);
-    fclose(m_stream);
-    m_stream = 0;
-
-    // reset other device state
-    m_mode = IBamIODevice::NotOpen;
-}
-
-size_t ILocalIODevice::Read(char* data, const unsigned int numBytes) {
-    BT_ASSERT_X( m_stream, "ILocalIODevice::Read: trying to read from null stream" );
-    BT_ASSERT_X( (m_mode == IBamIODevice::ReadOnly), "ILocalIODevice::Read: device not in read-only mode");
-    return fread(data, sizeof(char), numBytes, m_stream);
-}
-
-int64_t ILocalIODevice::Tell(void) const {
-    BT_ASSERT_X( m_stream, "ILocalIODevice::Tell: trying to get file position fromnull stream" );
-    return ftell64(m_stream);
-}
-
-size_t ILocalIODevice::Write(const char* data, const unsigned int numBytes) {
-    BT_ASSERT_X( m_stream, "ILocalIODevice::Write: tryint to write to null stream" );
-    BT_ASSERT_X( (m_mode == IBamIODevice::WriteOnly), "ILocalIODevice::Write: device not in write-only mode" );
-    return fwrite(data, sizeof(char), numBytes, m_stream);
-}
diff --git a/src/api/internal/ILocalIODevice_p.h b/src/api/internal/ILocalIODevice_p.h

deleted file mode 100644 (file)

index a71f378..0000000
--- a/src/api/internal/ILocalIODevice_p.h
+++ /dev/null
@@ -1,50 +0,0 @@
-// ***************************************************************************
-// ILocalIODevice_p.h (c) 2011 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides shared behavior for files & pipes
-// ***************************************************************************
-
-#ifndef ILOCALIODEVICE_P_H
-#define ILOCALIODEVICE_P_H
-
-//  -------------
-//  W A R N I N G
-//  -------------
-//
-// This file is not part of the BamTools API.  It exists purely as an
-// implementation detail. This header file may change from version to version
-// without notice, or even be removed.
-//
-// We mean it.
-
-#include "api/IBamIODevice.h"
-
-namespace BamTools {
-namespace Internal {
-
-class ILocalIODevice : public IBamIODevice {
-
-    // ctor & dtor
-    public:
-        ILocalIODevice(void);
-        virtual ~ILocalIODevice(void);
-
-    // IBamIODevice implementation
-    public:
-        virtual void Close(void);
-        virtual size_t Read(char* data, const unsigned int numBytes);
-        virtual int64_t Tell(void) const;
-        virtual size_t Write(const char* data, const unsigned int numBytes);
-
-    // data members
-    protected:
-        FILE* m_stream;
-};
-
-} // namespace Internal
-} // namespace BamTools
-
-#endif // ILOCALIODEVICE_P_H
diff --git a/src/api/internal/IRemoteIODevice_p.cpp b/src/api/internal/IRemoteIODevice_p.cpp

deleted file mode 100644 (file)

index e69de29..0000000
diff --git a/src/api/internal/IRemoteIODevice_p.h b/src/api/internal/IRemoteIODevice_p.h

deleted file mode 100644 (file)

index e69de29..0000000
diff --git a/src/api/internal/SamFormatParser_p.cpp b/src/api/internal/SamFormatParser_p.cpp

deleted file mode 100644 (file)

index 195fdcd..0000000
--- a/src/api/internal/SamFormatParser_p.cpp
+++ /dev/null
@@ -1,222 +0,0 @@
-// ***************************************************************************
-// SamFormatParser.cpp (c) 2010 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides functionality for parsing SAM header text into SamHeader object
-// ***************************************************************************
-
-#include "api/SamConstants.h"
-#include "api/SamHeader.h"
-#include "api/internal/BamException_p.h"
-#include "api/internal/SamFormatParser_p.h"
-using namespace BamTools;
-using namespace BamTools::Internal;
-
-#include <iostream>
-#include <sstream>
-#include <vector>
-using namespace std;
-
-SamFormatParser::SamFormatParser(SamHeader& header)
-    : m_header(header)
-{ }
-
-SamFormatParser::~SamFormatParser(void) { }
-
-void SamFormatParser::Parse(const string& headerText) {
-
-    // clear header's prior contents
-    m_header.Clear();
-
-    // empty header is OK, but skip processing
-    if ( headerText.empty() )
-        return;
-
-    // other wise parse SAM lines
-    istringstream headerStream(headerText);
-    string headerLine("");
-    while ( getline(headerStream, headerLine) )
-         ParseSamLine(headerLine);
-}
-
-void SamFormatParser::ParseSamLine(const string& line) {
-
-    // skip if line is not long enough to contain true values
-    if ( line.length() < 5 ) return;
-
-    // determine token at beginning of line
-    const string firstToken = line.substr(0,3);
-    string restOfLine = line.substr(4);
-    if      ( firstToken == Constants::SAM_HD_BEGIN_TOKEN) ParseHDLine(restOfLine);
-    else if ( firstToken == Constants::SAM_SQ_BEGIN_TOKEN) ParseSQLine(restOfLine);
-    else if ( firstToken == Constants::SAM_RG_BEGIN_TOKEN) ParseRGLine(restOfLine);
-    else if ( firstToken == Constants::SAM_PG_BEGIN_TOKEN) ParsePGLine(restOfLine);
-    else if ( firstToken == Constants::SAM_CO_BEGIN_TOKEN) ParseCOLine(restOfLine);
-    else {
-        const string message = string("unknown token: ") + firstToken;
-        throw BamException("SamFormatParser::ParseSamLine", message);
-    }
-}
-
-void SamFormatParser::ParseHDLine(const string& line) {
-
-    // split HD lines into tokens
-    vector<string> tokens = Split(line, Constants::SAM_TAB);
-
-    // iterate over tokens
-    vector<string>::const_iterator tokenIter = tokens.begin();
-    vector<string>::const_iterator tokenEnd  = tokens.end();
-    for ( ; tokenIter != tokenEnd; ++tokenIter ) {
-
-        // get tag/value
-        const string tokenTag = (*tokenIter).substr(0,2);
-        const string tokenValue = (*tokenIter).substr(3);
-
-        // set header contents
-        if      ( tokenTag == Constants::SAM_HD_VERSION_TAG    ) m_header.Version    = tokenValue;
-        else if ( tokenTag == Constants::SAM_HD_SORTORDER_TAG  ) m_header.SortOrder  = tokenValue;
-        else if ( tokenTag == Constants::SAM_HD_GROUPORDER_TAG ) m_header.GroupOrder = tokenValue;
-        else {
-            const string message = string("unknown HD tag: ") + tokenTag;
-            throw BamException("SamFormatParser::ParseHDLine", message);
-        }
-    }
-
-    // check for required tags
-    if ( !m_header.HasVersion() )
-        throw BamException("SamFormatParser::ParseHDLine", "@HD line is missing VN tag");
-}
-
-void SamFormatParser::ParseSQLine(const string& line) {
-
-    SamSequence seq;
-
-    // split SQ line into tokens
-    vector<string> tokens = Split(line, Constants::SAM_TAB);
-
-    // iterate over tokens
-    vector<string>::const_iterator tokenIter = tokens.begin();
-    vector<string>::const_iterator tokenEnd  = tokens.end();
-    for ( ; tokenIter != tokenEnd; ++tokenIter ) {
-
-        // get tag/value
-        const string tokenTag = (*tokenIter).substr(0,2);
-        const string tokenValue = (*tokenIter).substr(3);
-
-        // set sequence contents
-        if      ( tokenTag == Constants::SAM_SQ_NAME_TAG       ) seq.Name = tokenValue;
-        else if ( tokenTag == Constants::SAM_SQ_LENGTH_TAG     ) seq.Length = tokenValue;
-        else if ( tokenTag == Constants::SAM_SQ_ASSEMBLYID_TAG ) seq.AssemblyID = tokenValue;
-        else if ( tokenTag == Constants::SAM_SQ_CHECKSUM_TAG   ) seq.Checksum = tokenValue;
-        else if ( tokenTag == Constants::SAM_SQ_SPECIES_TAG    ) seq.Species = tokenValue;
-        else if ( tokenTag == Constants::SAM_SQ_URI_TAG        ) seq.URI = tokenValue;
-        else {
-            const string message = string("unknown SQ tag: ") + tokenTag;
-            throw BamException("SamFormatParser::ParseSQLine", message);
-        }
-    }
-
-    // check for required tags
-    if ( !seq.HasName() )
-        throw BamException("SamFormatParser::ParseSQLine", "@SQ line is missing SN tag");
-    if ( !seq.HasLength() )
-        throw BamException("SamFormatParser::ParseSQLine", "@SQ line is missing LN tag");
-
-    // store SAM sequence entry
-    m_header.Sequences.Add(seq);
-}
-
-void SamFormatParser::ParseRGLine(const string& line) {
-
-    SamReadGroup rg;
-
-    // split string into tokens
-    vector<string> tokens = Split(line, Constants::SAM_TAB);
-
-    // iterate over tokens
-    vector<string>::const_iterator tokenIter = tokens.begin();
-    vector<string>::const_iterator tokenEnd  = tokens.end();
-    for ( ; tokenIter != tokenEnd; ++tokenIter ) {
-
-        // get token tag/value
-        const string tokenTag = (*tokenIter).substr(0,2);
-        const string tokenValue = (*tokenIter).substr(3);
-
-        // set read group contents
-        if      ( tokenTag == Constants::SAM_RG_ID_TAG                  ) rg.ID = tokenValue;
-        else if ( tokenTag == Constants::SAM_RG_DESCRIPTION_TAG         ) rg.Description = tokenValue;
-        else if ( tokenTag == Constants::SAM_RG_FLOWORDER_TAG           ) rg.FlowOrder = tokenValue;
-        else if ( tokenTag == Constants::SAM_RG_KEYSEQUENCE_TAG         ) rg.KeySequence = tokenValue;
-        else if ( tokenTag == Constants::SAM_RG_LIBRARY_TAG             ) rg.Library = tokenValue;
-        else if ( tokenTag == Constants::SAM_RG_PLATFORMUNIT_TAG        ) rg.PlatformUnit = tokenValue;
-        else if ( tokenTag == Constants::SAM_RG_PREDICTEDINSERTSIZE_TAG ) rg.PredictedInsertSize = tokenValue;
-        else if ( tokenTag == Constants::SAM_RG_PRODUCTIONDATE_TAG      ) rg.ProductionDate = tokenValue;
-        else if ( tokenTag == Constants::SAM_RG_PROGRAM_TAG             ) rg.Program = tokenValue;
-        else if ( tokenTag == Constants::SAM_RG_SAMPLE_TAG              ) rg.Sample = tokenValue;
-        else if ( tokenTag == Constants::SAM_RG_SEQCENTER_TAG           ) rg.SequencingCenter = tokenValue;
-        else if ( tokenTag == Constants::SAM_RG_SEQTECHNOLOGY_TAG       ) rg.SequencingTechnology = tokenValue;
-        else {
-            const string message = string("unknown RG tag: ") + tokenTag;
-            throw BamException("SamFormatParser::ParseRGLine", message);
-        }
-    }
-
-    // check for required tags
-    if ( !rg.HasID() )
-        throw BamException("SamFormatParser::ParseRGLine", "@RG line is missing ID tag");
-
-    // store SAM read group entry
-    m_header.ReadGroups.Add(rg);
-}
-
-void SamFormatParser::ParsePGLine(const string& line) {
-
-    SamProgram pg;
-
-    // split string into tokens
-    vector<string> tokens = Split(line, Constants::SAM_TAB);
-
-    // iterate over tokens
-    vector<string>::const_iterator tokenIter = tokens.begin();
-    vector<string>::const_iterator tokenEnd  = tokens.end();
-    for ( ; tokenIter != tokenEnd; ++tokenIter ) {
-
-        // get token tag/value
-        const string tokenTag = (*tokenIter).substr(0,2);
-        const string tokenValue = (*tokenIter).substr(3);
-
-        // set program record contents
-        if      ( tokenTag == Constants::SAM_PG_ID_TAG              ) pg.ID = tokenValue;
-        else if ( tokenTag == Constants::SAM_PG_NAME_TAG            ) pg.Name = tokenValue;
-        else if ( tokenTag == Constants::SAM_PG_COMMANDLINE_TAG     ) pg.CommandLine = tokenValue;
-        else if ( tokenTag == Constants::SAM_PG_PREVIOUSPROGRAM_TAG ) pg.PreviousProgramID = tokenValue;
-        else if ( tokenTag == Constants::SAM_PG_VERSION_TAG         ) pg.Version = tokenValue;
-        else {
-            const string message = string("unknown PG tag: ") + tokenTag;
-            throw BamException("SamFormatParser::ParsePGLine", message);
-        }
-    }
-
-    // check for required tags
-    if ( !pg.HasID() )
-        throw BamException("SamFormatParser::ParsePGLine", "@PG line is missing ID tag");
-
-    // store SAM program entry
-    m_header.Programs.Add(pg);
-}
-
-void SamFormatParser::ParseCOLine(const string& line) {
-    // simply add line to comments list
-    m_header.Comments.push_back(line);
-}
-
-const vector<string> SamFormatParser::Split(const string& line, const char delim) {
-    vector<string> tokens;
-    stringstream lineStream(line);
-    string token;
-    while ( getline(lineStream, token, delim) )
-        tokens.push_back(token);
-    return tokens;
-}
diff --git a/src/api/internal/SamFormatParser_p.h b/src/api/internal/SamFormatParser_p.h

deleted file mode 100644 (file)

index cf6d54c..0000000
--- a/src/api/internal/SamFormatParser_p.h
+++ /dev/null
@@ -1,61 +0,0 @@
-// ***************************************************************************
-// SamFormatParser.h (c) 2010 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 23 December 2010 (DB)
-// ---------------------------------------------------------------------------
-// Provides functionality for parsing SAM header text into SamHeader object
-// ***************************************************************************
-
-#ifndef SAM_FORMAT_PARSER_H
-#define SAM_FORMAT_PARSER_H
-
-//  -------------
-//  W A R N I N G
-//  -------------
-//
-// This file is not part of the BamTools API.  It exists purely as an
-// implementation detail. This header file may change from version to version
-// without notice, or even be removed.
-//
-// We mean it.
-
-#include <string>
-#include <vector>
-
-namespace BamTools {
-
-class SamHeader;
-
-namespace Internal {
-
-class SamFormatParser {
-
-    // ctor & dtor
-    public:
-        SamFormatParser(BamTools::SamHeader& header);
-        ~SamFormatParser(void);
-
-    // parse text & populate header data
-    public:
-        void Parse(const std::string& headerText);
-
-    // internal methods
-    private:
-        void ParseSamLine(const std::string& line);
-        void ParseHDLine(const std::string& line);
-        void ParseSQLine(const std::string& line);
-        void ParseRGLine(const std::string& line);
-        void ParsePGLine(const std::string& line);
-        void ParseCOLine(const std::string& line);
-        const std::vector<std::string> Split(const std::string& line, const char delim);
-
-    // data members
-    private:
-        SamHeader& m_header;
-};
-
-} // namespace Internal
-} // namespace BamTools
-
-#endif // SAM_FORMAT_PARSER_H
diff --git a/src/api/internal/SamFormatPrinter_p.cpp b/src/api/internal/SamFormatPrinter_p.cpp

deleted file mode 100644 (file)

index f9a118e..0000000
--- a/src/api/internal/SamFormatPrinter_p.cpp
+++ /dev/null
@@ -1,219 +0,0 @@
-// ***************************************************************************
-// SamFormatPrinter.cpp (c) 2010 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 14 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides functionality for printing formatted SAM header to string
-// ***************************************************************************
-
-#include "api/SamConstants.h"
-#include "api/SamHeader.h"
-#include "api/internal/SamFormatPrinter_p.h"
-using namespace BamTools;
-using namespace BamTools::Internal;
-
-#include <iostream>
-#include <sstream>
-#include <vector>
-using namespace std;
-
-// ------------------------
-// static utility methods
-// ------------------------
-
-static inline
-const string FormatTag(const string& tag, const string& value) {
-    return string(Constants::SAM_TAB + tag + Constants::SAM_COLON + value);
-}
-
-// ---------------------------------
-// SamFormatPrinter implementation
-// ---------------------------------
-
-SamFormatPrinter::SamFormatPrinter(const SamHeader& header)
-    : m_header(header)
-{ }
-
-SamFormatPrinter::~SamFormatPrinter(void) { }
-
-const string SamFormatPrinter::ToString(void) const {
-
-    // clear out stream
-    stringstream out("");
-
-    // generate formatted header text
-    PrintHD(out);
-    PrintSQ(out);
-    PrintRG(out);
-    PrintPG(out);
-    PrintCO(out);
-
-    // return result
-    return out.str();
-}
-
-void SamFormatPrinter::PrintHD(std::stringstream& out) const {
-
-    // if header has @HD data
-    if ( m_header.HasVersion() ) {
-
-        // @HD VN:<Version>
-        out << Constants::SAM_HD_BEGIN_TOKEN
-            << FormatTag(Constants::SAM_HD_VERSION_TAG, m_header.Version);
-
-        // SO:<SortOrder>
-        if ( m_header.HasSortOrder() )
-            out << FormatTag(Constants::SAM_HD_SORTORDER_TAG, m_header.SortOrder);
-
-        // GO:<GroupOrder>
-        if ( m_header.HasGroupOrder() )
-            out << FormatTag(Constants::SAM_HD_GROUPORDER_TAG, m_header.GroupOrder);
-
-        // newline
-        out << endl;
-    }
-}
-
-void SamFormatPrinter::PrintSQ(std::stringstream& out) const {
-
-    // iterate over sequence entries
-    SamSequenceConstIterator seqIter = m_header.Sequences.ConstBegin();
-    SamSequenceConstIterator seqEnd  = m_header.Sequences.ConstEnd();
-    for ( ; seqIter != seqEnd; ++seqIter ) {
-        const SamSequence& seq = (*seqIter);
-
-        // @SQ SN:<Name> LN:<Length>
-        out << Constants::SAM_SQ_BEGIN_TOKEN
-            << FormatTag(Constants::SAM_SQ_NAME_TAG, seq.Name)
-            << FormatTag(Constants::SAM_SQ_LENGTH_TAG, seq.Length);
-
-        // AS:<AssemblyID>
-        if ( seq.HasAssemblyID() )
-            out << FormatTag(Constants::SAM_SQ_ASSEMBLYID_TAG, seq.AssemblyID);
-
-        // M5:<Checksum>
-        if ( seq.HasChecksum() )
-            out << FormatTag(Constants::SAM_SQ_CHECKSUM_TAG, seq.Checksum);
-
-        // SP:<Species>
-        if ( seq.HasSpecies() )
-            out << FormatTag(Constants::SAM_SQ_SPECIES_TAG, seq.Species);
-
-        // UR:<URI>
-        if ( seq.HasURI() )
-            out << FormatTag(Constants::SAM_SQ_URI_TAG, seq.URI);
-
-        // newline
-        out << endl;
-    }
-}
-
-void SamFormatPrinter::PrintRG(std::stringstream& out) const {
-
-    // iterate over read group entries
-    SamReadGroupConstIterator rgIter = m_header.ReadGroups.ConstBegin();
-    SamReadGroupConstIterator rgEnd  = m_header.ReadGroups.ConstEnd();
-    for ( ; rgIter != rgEnd; ++rgIter ) {
-        const SamReadGroup& rg = (*rgIter);
-
-        // @RG ID:<ID>
-        out << Constants::SAM_RG_BEGIN_TOKEN
-            << FormatTag(Constants::SAM_RG_ID_TAG, rg.ID);
-
-        // CN:<SequencingCenter>
-        if ( rg.HasSequencingCenter() )
-            out << FormatTag(Constants::SAM_RG_SEQCENTER_TAG, rg.SequencingCenter);
-
-        // DS:<Description>
-        if ( rg.HasDescription() )
-            out << FormatTag(Constants::SAM_RG_DESCRIPTION_TAG, rg.Description);
-
-        // DT:<ProductionDate>
-        if ( rg.HasProductionDate() )
-            out << FormatTag(Constants::SAM_RG_PRODUCTIONDATE_TAG, rg.ProductionDate);
-
-        // FO:<FlowOrder>
-        if ( rg.HasFlowOrder() )
-            out << FormatTag(Constants::SAM_RG_FLOWORDER_TAG, rg.FlowOrder);
-
-        // KS:<KeySequence>
-        if ( rg.HasKeySequence() )
-            out << FormatTag(Constants::SAM_RG_KEYSEQUENCE_TAG, rg.KeySequence);
-
-        // LB:<Library>
-        if ( rg.HasLibrary() )
-            out << FormatTag(Constants::SAM_RG_LIBRARY_TAG, rg.Library);
-
-        // PG:<Program>
-        if ( rg.HasProgram() )
-            out << FormatTag(Constants::SAM_RG_PROGRAM_TAG, rg.Program);
-
-        // PI:<PredictedInsertSize>
-        if ( rg.HasPredictedInsertSize() )
-            out << FormatTag(Constants::SAM_RG_PREDICTEDINSERTSIZE_TAG, rg.PredictedInsertSize);
-
-        // PL:<SequencingTechnology>
-        if ( rg.HasSequencingTechnology() )
-            out << FormatTag(Constants::SAM_RG_SEQTECHNOLOGY_TAG, rg.SequencingTechnology);
-
-        // PU:<PlatformUnit>
-        if ( rg.HasPlatformUnit() )
-            out << FormatTag(Constants::SAM_RG_PLATFORMUNIT_TAG, rg.PlatformUnit);
-
-        // SM:<Sample>
-        if ( rg.HasSample() )
-            out << FormatTag(Constants::SAM_RG_SAMPLE_TAG, rg.Sample);
-
-        // newline
-        out << endl;
-    }
-}
-
-void SamFormatPrinter::PrintPG(std::stringstream& out) const {
-
-    // iterate over program record entries
-    SamProgramConstIterator pgIter = m_header.Programs.ConstBegin();
-    SamProgramConstIterator pgEnd  = m_header.Programs.ConstEnd();
-    for ( ; pgIter != pgEnd; ++pgIter ) {
-        const SamProgram& pg = (*pgIter);
-
-        // @PG ID:<ID>
-        out << Constants::SAM_PG_BEGIN_TOKEN
-            << FormatTag(Constants::SAM_PG_ID_TAG, pg.ID);
-
-        // PN:<Name>
-        if ( pg.HasName() )
-            out << FormatTag(Constants::SAM_PG_NAME_TAG, pg.Name);
-
-        // CL:<CommandLine>
-        if ( pg.HasCommandLine() )
-            out << FormatTag(Constants::SAM_PG_COMMANDLINE_TAG, pg.CommandLine);
-
-        // PP:<PreviousProgramID>
-        if ( pg.HasPreviousProgramID() )
-            out << FormatTag(Constants::SAM_PG_PREVIOUSPROGRAM_TAG, pg.PreviousProgramID);
-
-        // VN:<Version>
-        if ( pg.HasVersion() )
-            out << FormatTag(Constants::SAM_PG_VERSION_TAG, pg.Version);
-
-        // newline
-        out << endl;
-    }
-}
-
-void SamFormatPrinter::PrintCO(std::stringstream& out) const {
-
-    // iterate over comments
-    vector<string>::const_iterator commentIter = m_header.Comments.begin();
-    vector<string>::const_iterator commentEnd  = m_header.Comments.end();
-    for ( ; commentIter != commentEnd; ++commentIter ) {
-
-        // @CO <Comment>
-        out << Constants::SAM_CO_BEGIN_TOKEN
-            << Constants::SAM_TAB
-            << (*commentIter)
-            << endl;
-    }
-}
diff --git a/src/api/internal/SamFormatPrinter_p.h b/src/api/internal/SamFormatPrinter_p.h

deleted file mode 100644 (file)

index ea29181..0000000
--- a/src/api/internal/SamFormatPrinter_p.h
+++ /dev/null
@@ -1,59 +0,0 @@
-// ***************************************************************************
-// SamFormatPrinter.h (c) 2010 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 6 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides functionality for printing formatted SAM header to string
-// ***************************************************************************
-
-#ifndef SAM_FORMAT_PRINTER_H
-#define SAM_FORMAT_PRINTER_H
-
-//  -------------
-//  W A R N I N G
-//  -------------
-//
-// This file is not part of the BamTools API.  It exists purely as an
-// implementation detail. This header file may change from version to version
-// without notice, or even be removed.
-//
-// We mean it.
-
-#include <sstream>
-#include <string>
-
-namespace BamTools {
-
-class SamHeader;
-
-namespace Internal {
-
-class SamFormatPrinter {
-
-    // ctor & dtor
-    public:
-        SamFormatPrinter(const BamTools::SamHeader& header);
-        ~SamFormatPrinter(void);
-
-    // generates SAM-formatted string from header data
-    public:
-        const std::string ToString(void) const;
-
-    // internal methods
-    private:
-        void PrintHD(std::stringstream& out) const;
-        void PrintSQ(std::stringstream& out) const;
-        void PrintRG(std::stringstream& out) const;
-        void PrintPG(std::stringstream& out) const;
-        void PrintCO(std::stringstream& out) const;
-
-    // data members
-    private:
-        const SamHeader& m_header;
-};
-
-} // namespace Internal
-} // namespace BamTools
-
-#endif // SAM_FORMAT_PRINTER_H
diff --git a/src/api/internal/SamHeaderValidator_p.cpp b/src/api/internal/SamHeaderValidator_p.cpp

deleted file mode 100644 (file)

index c76fff9..0000000
--- a/src/api/internal/SamHeaderValidator_p.cpp
+++ /dev/null
@@ -1,524 +0,0 @@
-// ***************************************************************************
-// SamHeaderValidator.cpp (c) 2010 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 14 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides functionality for validating SamHeader data
-// ***************************************************************************
-
-#include "api/SamConstants.h"
-#include "api/SamHeader.h"
-#include "api/internal/SamHeaderValidator_p.h"
-#include "api/internal/SamHeaderVersion_p.h"
-using namespace BamTools;
-using namespace BamTools::Internal;
-
-#include <cctype>
-#include <set>
-#include <sstream>
-using namespace std;
-
-// ------------------------
-// static utility methods
-// -------------------------
-
-static
-bool caseInsensitiveCompare(const string& lhs, const string& rhs) {
-
-    // can omit checking chars if lengths not equal
-    const int lhsLength = lhs.length();
-    const int rhsLength = rhs.length();
-    if ( lhsLength != rhsLength )
-        return false;
-
-    // do *basic* toupper checks on each string char's
-    for ( int i = 0; i < lhsLength; ++i ) {
-        if ( toupper( (int)lhs.at(i)) != toupper( (int)rhs.at(i)) )
-            return false;
-    }
-
-    // otherwise OK
-    return true;
-}
-
-// ------------------------------------------------------------------------
-// Allow validation rules to vary, as needed, between SAM header versions
-//
-// use SAM_VERSION_X_Y to tag important changes
-//
-// Together, they will allow for comparisons like:
-// if ( m_version < SAM_VERSION_2_0 ) {
-//     // use some older rule
-// else
-//     // use rule introduced with version 2.0
-
-static const SamHeaderVersion SAM_VERSION_1_0 = SamHeaderVersion(1,0);
-static const SamHeaderVersion SAM_VERSION_1_1 = SamHeaderVersion(1,1);
-static const SamHeaderVersion SAM_VERSION_1_2 = SamHeaderVersion(1,2);
-static const SamHeaderVersion SAM_VERSION_1_3 = SamHeaderVersion(1,3);
-static const SamHeaderVersion SAM_VERSION_1_4 = SamHeaderVersion(1,4);
-
-// TODO: This functionality is currently unused.
-//       Make validation "version-aware."
-//
-// ------------------------------------------------------------------------
-
-const string SamHeaderValidator::ERROR_PREFIX = "ERROR: ";
-const string SamHeaderValidator::WARN_PREFIX  = "WARNING: ";
-const string SamHeaderValidator::NEWLINE      = "\n";
-
-SamHeaderValidator::SamHeaderValidator(const SamHeader& header)
-    : m_header(header)
-{ }
-
-SamHeaderValidator::~SamHeaderValidator(void) { }
-
-void SamHeaderValidator::AddError(const string& message) {
-    m_errorMessages.push_back(ERROR_PREFIX + message + NEWLINE);
-}
-
-void SamHeaderValidator::AddWarning(const string& message) {
-    m_warningMessages.push_back(WARN_PREFIX + message + NEWLINE);
-}
-
-void SamHeaderValidator::PrintErrorMessages(ostream& stream) {
-
-    // skip if no error messages
-    if ( m_errorMessages.empty() )
-        return;
-
-    // print error header line
-    stream << "* SAM header has " << m_errorMessages.size() << " errors:" << endl;
-
-    // print each error message
-    vector<string>::const_iterator errorIter = m_errorMessages.begin();
-    vector<string>::const_iterator errorEnd  = m_errorMessages.end();
-    for ( ; errorIter != errorEnd; ++errorIter )
-        stream << (*errorIter);
-}
-
-void SamHeaderValidator::PrintMessages(ostream& stream) {
-    PrintErrorMessages(stream);
-    PrintWarningMessages(stream);
-}
-
-void SamHeaderValidator::PrintWarningMessages(ostream& stream) {
-
-    // skip if no warning messages
-    if ( m_warningMessages.empty() )
-        return;
-
-    // print warning header line
-    stream << "* SAM header has " << m_warningMessages.size() << " warnings:" << endl;
-
-    // print each warning message
-    vector<string>::const_iterator warnIter = m_warningMessages.begin();
-    vector<string>::const_iterator warnEnd  = m_warningMessages.end();
-    for ( ; warnIter != warnEnd; ++warnIter )
-        stream << (*warnIter);
-}
-
-// entry point for validation
-bool SamHeaderValidator::Validate(void) {
-    bool isValid = true;
-    isValid &= ValidateMetadata();
-    isValid &= ValidateSequenceDictionary();
-    isValid &= ValidateReadGroupDictionary();
-    isValid &= ValidateProgramChain();
-    return isValid;
-}
-
-// check all SAM header 'metadata'
-bool SamHeaderValidator::ValidateMetadata(void) {
-    bool isValid = true;
-    isValid &= ValidateVersion();
-    isValid &= ValidateSortOrder();
-    isValid &= ValidateGroupOrder();
-    return isValid;
-}
-
-// check SAM header version tag
-bool SamHeaderValidator::ValidateVersion(void) {
-
-    const string& version = m_header.Version;
-
-    // warn if version not present
-    if ( version.empty() ) {
-        AddWarning("Version (VN) missing. Not required, but strongly recommended");
-        return true;
-    }
-
-    // invalid if version does not contain a period
-    const size_t periodFound = version.find(Constants::SAM_PERIOD);
-    if ( periodFound == string::npos ) {
-        AddError("Invalid version (VN) format: " + version);
-        return false;
-    }
-
-    // invalid if major version is empty or contains non-digits
-    const string majorVersion = version.substr(0, periodFound);
-    if ( majorVersion.empty() || !ContainsOnlyDigits(majorVersion) ) {
-        AddError("Invalid version (VN) format: " + version);
-        return false;
-    }
-
-    // invalid if major version is empty or contains non-digits
-    const string minorVersion = version.substr(periodFound + 1);
-    if ( minorVersion.empty() || !ContainsOnlyDigits(minorVersion) ) {
-        AddError("Invalid version (VN) format: " + version);
-        return false;
-    }
-
-    // TODO: check if version is not just syntactically OK,
-    // but is also a valid SAM version ( 1.0 .. CURRENT )
-
-    // all checked out this far, then version is OK
-    return true;
-}
-
-// assumes non-empty input string
-bool SamHeaderValidator::ContainsOnlyDigits(const string& s) {
-    const size_t nonDigitPosition = s.find_first_not_of(Constants::SAM_DIGITS);
-    return ( nonDigitPosition == string::npos ) ;
-}
-
-// validate SAM header sort order tag
-bool SamHeaderValidator::ValidateSortOrder(void) {
-
-    const string& sortOrder = m_header.SortOrder;
-
-    // warn if sort order not present
-    if ( sortOrder.empty() ) {
-        AddWarning("Sort order (SO) missing. Not required, but strongly recommended");
-        return true;
-    }
-
-    // if sort order is valid keyword
-    if ( sortOrder == Constants::SAM_HD_SORTORDER_COORDINATE ||
-         sortOrder == Constants::SAM_HD_SORTORDER_QUERYNAME  ||
-         sortOrder == Constants::SAM_HD_SORTORDER_UNSORTED
-       )
-    {
-        return true;
-    }
-
-    // otherwise
-    AddError("Invalid sort order (SO): " + sortOrder);
-    return false;
-}
-
-// validate SAM header group order tag
-bool SamHeaderValidator::ValidateGroupOrder(void) {
-
-    const string& groupOrder = m_header.GroupOrder;
-
-    // if no group order, no problem, just return OK
-    if ( groupOrder.empty() )
-        return true;
-
-    // if group order is valid keyword
-    if ( groupOrder == Constants::SAM_HD_GROUPORDER_NONE  ||
-         groupOrder == Constants::SAM_HD_GROUPORDER_QUERY ||
-         groupOrder == Constants::SAM_HD_GROUPORDER_REFERENCE
-       )
-    {
-        return true;
-    }
-
-    // otherwise
-    AddError("Invalid group order (GO): " + groupOrder);
-    return false;
-}
-
-// validate SAM header sequence dictionary
-bool SamHeaderValidator::ValidateSequenceDictionary(void) {
-
-    bool isValid = true;
-
-    // check for unique sequence names
-    isValid &= ContainsUniqueSequenceNames();
-
-    // iterate over sequences
-    const SamSequenceDictionary& sequences = m_header.Sequences;
-    SamSequenceConstIterator seqIter = sequences.ConstBegin();
-    SamSequenceConstIterator seqEnd  = sequences.ConstEnd();
-    for ( ; seqIter != seqEnd; ++seqIter ) {
-        const SamSequence& seq = (*seqIter);
-        isValid &= ValidateSequence(seq);
-    }
-
-    // return validation state
-    return isValid;
-}
-
-// make sure all SQ names are unique
-bool SamHeaderValidator::ContainsUniqueSequenceNames(void) {
-
-    bool isValid = true;
-    set<string> sequenceNames;
-    set<string>::iterator nameIter;
-
-    // iterate over sequences
-    const SamSequenceDictionary& sequences = m_header.Sequences;
-    SamSequenceConstIterator seqIter = sequences.ConstBegin();
-    SamSequenceConstIterator seqEnd  = sequences.ConstEnd();
-    for ( ; seqIter != seqEnd; ++seqIter ) {
-        const SamSequence& seq = (*seqIter);
-
-        // lookup sequence name
-        const string& name = seq.Name;
-        nameIter = sequenceNames.find(name);
-
-        // error if found (duplicate entry)
-        if ( nameIter != sequenceNames.end() ) {
-            AddError("Sequence name (SN): " + name + " is not unique");
-            isValid = false;
-        }
-
-        // otherwise ok, store name
-        sequenceNames.insert(name);
-    }
-
-    // return validation state
-    return isValid;
-}
-
-// validate SAM header sequence entry
-bool SamHeaderValidator::ValidateSequence(const SamSequence& seq) {
-    bool isValid = true;
-    isValid &= CheckNameFormat(seq.Name);
-    isValid &= CheckLengthInRange(seq.Length);
-    return isValid;
-}
-
-// check sequence name is valid format
-bool SamHeaderValidator::CheckNameFormat(const string& name) {
-
-    // invalid if name is empty
-    if ( name.empty() ) {
-        AddError("Sequence entry (@SQ) is missing SN tag");
-        return false;
-    }
-
-    // invalid if first character is a reserved char
-    const char firstChar = name.at(0);
-    if ( firstChar == Constants::SAM_EQUAL || firstChar == Constants::SAM_STAR ) {
-        AddError("Invalid sequence name (SN): " + name);
-        return false;
-    }
-    // otherwise OK
-    return true;
-}
-
-// check that sequence length is within accepted range
-bool SamHeaderValidator::CheckLengthInRange(const string& length) {
-
-    // invalid if empty
-    if ( length.empty() ) {
-        AddError("Sequence entry (@SQ) is missing LN tag");
-        return false;
-    }
-
-    // convert string length to numeric
-    stringstream lengthStream(length);
-    unsigned int sequenceLength;
-    lengthStream >> sequenceLength;
-
-    // invalid if length outside accepted range
-    if ( sequenceLength < Constants::SAM_SQ_LENGTH_MIN || sequenceLength > Constants::SAM_SQ_LENGTH_MAX ) {
-        AddError("Sequence length (LN): " + length + " out of range");
-        return false;
-    }
-
-    // otherwise OK
-    return true;
-}
-
-// validate SAM header read group dictionary
-bool SamHeaderValidator::ValidateReadGroupDictionary(void) {
-
-    bool isValid = true;
-
-    // check for unique read group IDs & platform units
-    isValid &= ContainsUniqueIDsAndPlatformUnits();
-
-    // iterate over read groups
-    const SamReadGroupDictionary& readGroups = m_header.ReadGroups;
-    SamReadGroupConstIterator rgIter = readGroups.ConstBegin();
-    SamReadGroupConstIterator rgEnd  = readGroups.ConstEnd();
-    for ( ; rgIter != rgEnd; ++rgIter ) {
-        const SamReadGroup& rg = (*rgIter);
-        isValid &= ValidateReadGroup(rg);
-    }
-
-    // return validation state
-    return isValid;
-}
-
-// make sure RG IDs and platform units are unique
-bool SamHeaderValidator::ContainsUniqueIDsAndPlatformUnits(void) {
-
-    bool isValid = true;
-    set<string> readGroupIds;
-    set<string> platformUnits;
-    set<string>::iterator idIter;
-    set<string>::iterator puIter;
-
-    // iterate over sequences
-    const SamReadGroupDictionary& readGroups = m_header.ReadGroups;
-    SamReadGroupConstIterator rgIter = readGroups.ConstBegin();
-    SamReadGroupConstIterator rgEnd  = readGroups.ConstEnd();
-    for ( ; rgIter != rgEnd; ++rgIter ) {
-        const SamReadGroup& rg = (*rgIter);
-
-        // --------------------------------
-        // check for unique ID
-
-        // lookup read group ID
-        const string& id = rg.ID;
-        idIter = readGroupIds.find(id);
-
-        // error if found (duplicate entry)
-        if ( idIter != readGroupIds.end() ) {
-            AddError("Read group ID (ID): " + id + " is not unique");
-            isValid = false;
-        }
-
-        // otherwise ok, store id
-        readGroupIds.insert(id);
-
-        // --------------------------------
-        // check for unique platform unit
-
-        // lookup platform unit
-        const string& pu = rg.PlatformUnit;
-        puIter = platformUnits.find(pu);
-
-        // error if found (duplicate entry)
-        if ( puIter != platformUnits.end() ) {
-            AddError("Platform unit (PU): " + pu + " is not unique");
-            isValid = false;
-        }
-
-        // otherwise ok, store platform unit
-        platformUnits.insert(pu);
-    }
-
-    // return validation state
-    return isValid;
-}
-
-// validate SAM header read group entry
-bool SamHeaderValidator::ValidateReadGroup(const SamReadGroup& rg) {
-    bool isValid = true;
-    isValid &= CheckReadGroupID(rg.ID);
-    isValid &= CheckSequencingTechnology(rg.SequencingTechnology);
-    return isValid;
-}
-
-// make sure RG ID exists
-bool SamHeaderValidator::CheckReadGroupID(const string& id) {
-
-    // invalid if empty
-    if ( id.empty() ) {
-        AddError("Read group entry (@RG) is missing ID tag");
-        return false;
-    }
-
-    // otherwise OK
-    return true;
-}
-
-// make sure RG sequencing tech is one of the accepted keywords
-bool SamHeaderValidator::CheckSequencingTechnology(const string& technology) {
-
-    // if no technology provided, no problem, just return OK
-    if ( technology.empty() )
-        return true;
-
-    // if technology is valid keyword
-    if ( caseInsensitiveCompare(technology, Constants::SAM_RG_SEQTECHNOLOGY_CAPILLARY)  ||
-         caseInsensitiveCompare(technology, Constants::SAM_RG_SEQTECHNOLOGY_HELICOS)    ||
-         caseInsensitiveCompare(technology, Constants::SAM_RG_SEQTECHNOLOGY_ILLUMINA)   ||
-         caseInsensitiveCompare(technology, Constants::SAM_RG_SEQTECHNOLOGY_IONTORRENT) ||
-         caseInsensitiveCompare(technology, Constants::SAM_RG_SEQTECHNOLOGY_LS454)      ||
-         caseInsensitiveCompare(technology, Constants::SAM_RG_SEQTECHNOLOGY_PACBIO)     ||
-         caseInsensitiveCompare(technology, Constants::SAM_RG_SEQTECHNOLOGY_SOLID)
-       )
-    {
-        return true;
-    }
-
-    // otherwise
-    AddError("Invalid read group sequencing platform (PL): " + technology);
-    return false;
-}
-
-// validate the SAM header "program chain"
-bool SamHeaderValidator::ValidateProgramChain(void) {
-    bool isValid = true;
-    isValid &= ContainsUniqueProgramIds();
-    isValid &= ValidatePreviousProgramIds();
-    return isValid;
-}
-
-// make sure all PG IDs are unique
-bool SamHeaderValidator::ContainsUniqueProgramIds(void) {
-
-    bool isValid = true;
-    set<string> programIds;
-    set<string>::iterator pgIdIter;
-
-    // iterate over program records
-    const SamProgramChain& programs = m_header.Programs;
-    SamProgramConstIterator pgIter = programs.ConstBegin();
-    SamProgramConstIterator pgEnd  = programs.ConstEnd();
-    for ( ; pgIter != pgEnd; ++pgIter ) {
-        const SamProgram& pg = (*pgIter);
-
-        // lookup program ID
-        const string& pgId = pg.ID;
-        pgIdIter = programIds.find(pgId);
-
-        // error if found (duplicate entry)
-        if ( pgIdIter != programIds.end() ) {
-            AddError("Program ID (ID): " + pgId + " is not unique");
-            isValid = false;
-        }
-
-        // otherwise ok, store ID
-        programIds.insert(pgId);
-    }
-
-    // return validation state
-    return isValid;
-}
-
-// make sure that any PP tags present point to existing @PG IDs
-bool SamHeaderValidator::ValidatePreviousProgramIds(void) {
-
-    bool isValid = true;
-
-    // iterate over program records
-    const SamProgramChain& programs = m_header.Programs;
-    SamProgramConstIterator pgIter = programs.ConstBegin();
-    SamProgramConstIterator pgEnd  = programs.ConstEnd();
-    for ( ; pgIter != pgEnd; ++pgIter ) {
-        const SamProgram& pg = (*pgIter);
-
-        // ignore record for validation if PreviousProgramID is empty
-        const string& ppId = pg.PreviousProgramID;
-        if ( ppId.empty() )
-            continue;
-
-        // see if program "chain" contains an entry for ppId
-        if ( !programs.Contains(ppId) ) {
-            AddError("PreviousProgramID (PP): " + ppId + " is not a known ID");
-            isValid = false;
-        }
-    }
-
-    // return validation state
-    return isValid;
-}
diff --git a/src/api/internal/SamHeaderValidator_p.h b/src/api/internal/SamHeaderValidator_p.h

deleted file mode 100644 (file)

index 7d0c60a..0000000
--- a/src/api/internal/SamHeaderValidator_p.h
+++ /dev/null
@@ -1,105 +0,0 @@
-// ***************************************************************************
-// SamHeaderValidator.h (c) 2010 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 6 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides functionality for validating SamHeader data
-// ***************************************************************************
-
-#ifndef SAM_HEADER_VALIDATOR_P_H
-#define SAM_HEADER_VALIDATOR_P_H
-
-//  -------------
-//  W A R N I N G
-//  -------------
-//
-// This file is not part of the BamTools API.  It exists purely as an
-// implementation detail. This header file may change from version to version
-// without notice, or even be removed.
-//
-// We mean it.
-
-#include <iostream>
-#include <string>
-#include <vector>
-
-namespace BamTools {
-
-class SamHeader;
-class SamReadGroup;
-class SamSequence;
-
-namespace Internal {
-
-class SamHeaderValidator {
-
-    // ctor & dtor
-    public:
-        SamHeaderValidator(const SamHeader& header);
-        ~SamHeaderValidator(void);
-
-    // SamHeaderValidator interface
-    public:
-
-        // prints error & warning messages
-        void PrintMessages(std::ostream& stream);
-
-        // validates SamHeader data, returns true/false accordingly
-        bool Validate(void);
-
-    // internal methods
-    private:
-
-        // validate header metadata
-        bool ValidateMetadata(void);
-        bool ValidateVersion(void);
-        bool ContainsOnlyDigits(const std::string& s);
-        bool ValidateSortOrder(void);
-        bool ValidateGroupOrder(void);
-
-        // validate sequence dictionary
-        bool ValidateSequenceDictionary(void);
-        bool ContainsUniqueSequenceNames(void);
-        bool CheckNameFormat(const std::string& name);
-        bool ValidateSequence(const SamSequence& seq);
-        bool CheckLengthInRange(const std::string& length);
-
-        // validate read group dictionary
-        bool ValidateReadGroupDictionary(void);
-        bool ContainsUniqueIDsAndPlatformUnits(void);
-        bool ValidateReadGroup(const SamReadGroup& rg);
-        bool CheckReadGroupID(const std::string& id);
-        bool CheckSequencingTechnology(const std::string& technology);
-
-        // validate program data
-        bool ValidateProgramChain(void);
-        bool ContainsUniqueProgramIds(void);
-        bool ValidatePreviousProgramIds(void);
-
-        // error reporting
-        void AddError(const std::string& message);
-        void AddWarning(const std::string& message);
-        void PrintErrorMessages(std::ostream& stream);
-        void PrintWarningMessages(std::ostream& stream);
-
-    // data members
-    private:
-
-        // SamHeader being validated
-        const SamHeader& m_header;
-
-        // error reporting helpers
-        static const std::string ERROR_PREFIX;
-        static const std::string WARN_PREFIX;
-        static const std::string NEWLINE;
-
-        // error reporting messages
-        std::vector<std::string> m_errorMessages;
-        std::vector<std::string> m_warningMessages;
-};
-
-} // namespace Internal
-} // namespace BamTools
-
-#endif // SAM_HEADER_VALIDATOR_P_H
diff --git a/src/api/internal/SamHeaderVersion_p.h b/src/api/internal/SamHeaderVersion_p.h

deleted file mode 100644 (file)

index 4f85df0..0000000
--- a/src/api/internal/SamHeaderVersion_p.h
+++ /dev/null
@@ -1,134 +0,0 @@
-// ***************************************************************************
-// SamHeaderVersion.h (c) 2010 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides functionality for comparing SAM header versions
-// *************************************************************************
-
-#ifndef SAM_HEADERVERSION_P_H
-#define SAM_HEADERVERSION_P_H
-
-//  -------------
-//  W A R N I N G
-//  -------------
-//
-// This file is not part of the BamTools API.  It exists purely as an
-// implementation detail. This header file may change from version to version
-// without notice, or even be removed.
-//
-// We mean it.
-
-#include "api/SamConstants.h"
-#include <sstream>
-#include <string>
-
-namespace BamTools {
-namespace Internal {
-
-class SamHeaderVersion {
-
-    // ctors & dtor
-    public:
-        SamHeaderVersion(void)
-            : m_majorVersion(0)
-            , m_minorVersion(0)
-        { }
-
-        explicit SamHeaderVersion(const std::string& version)
-            : m_majorVersion(0)
-            , m_minorVersion(0)
-        {
-            SetVersion(version);
-        }
-
-        SamHeaderVersion(const unsigned int& major, const unsigned int& minor)
-            : m_majorVersion(major)
-            , m_minorVersion(minor)
-        { }
-
-        ~SamHeaderVersion(void) {
-            m_majorVersion = 0;
-            m_minorVersion = 0;
-        }
-    
-    // acess data
-    public:
-        unsigned int MajorVersion(void) const { return m_majorVersion; }
-        unsigned int MinorVersion(void) const { return m_minorVersion; }
-
-        void SetVersion(const std::string& version);
-        std::string ToString(void) const;
-
-    // data members
-    private:
-        unsigned int m_majorVersion;
-        unsigned int m_minorVersion;
-};
-
-inline
-void SamHeaderVersion::SetVersion(const std::string& version) {
-
-    // do nothing if version is empty
-    if ( !version.empty() ) {
-
-        std::stringstream versionStream("");
-
-        // do nothing if period not found
-        const size_t periodFound = version.find(Constants::SAM_PERIOD);
-        if ( periodFound != std::string::npos ) {
-
-            // store major version if non-empty and contains only digits
-            const std::string& majorVersion = version.substr(0, periodFound);
-            versionStream.str(majorVersion);
-            if ( !majorVersion.empty() ) {
-                const size_t nonDigitFound = majorVersion.find_first_not_of(Constants::SAM_DIGITS);
-                if ( nonDigitFound == std::string::npos )
-                    versionStream >> m_majorVersion;
-            }
-
-            // store minor version if non-empty and contains only digits
-            const std::string& minorVersion = version.substr(periodFound + 1);
-            versionStream.str(minorVersion);
-            if ( !minorVersion.empty() ) {
-                const size_t nonDigitFound = minorVersion.find_first_not_of(Constants::SAM_DIGITS);
-                if ( nonDigitFound == std::string::npos )
-                    versionStream >> m_minorVersion;
-            }
-        }
-    }
-}
-
-// -----------------------------------------------------
-// printing
-
-inline std::string SamHeaderVersion::ToString(void) const {
-    std::stringstream version;
-    version << m_majorVersion << Constants::SAM_PERIOD << m_minorVersion;
-    return version.str();
-}
-
-// -----------------------------------------------------
-// comparison operators
-
-inline bool operator==(const SamHeaderVersion& lhs, const SamHeaderVersion& rhs) {
-    return (lhs.MajorVersion() == rhs.MajorVersion()) &&
-           (lhs.MinorVersion() == rhs.MinorVersion());
-}
-
-inline bool operator<(const SamHeaderVersion& lhs, const SamHeaderVersion& rhs) {
-    if ( lhs.MajorVersion() == rhs.MajorVersion() )
-        return lhs.MinorVersion() < rhs.MinorVersion();
-    else 
-        return lhs.MajorVersion() < rhs.MajorVersion();
-}
-
-inline bool operator> (const SamHeaderVersion& lhs, const SamHeaderVersion& rhs) { return rhs < lhs;  }
-inline bool operator<=(const SamHeaderVersion& lhs, const SamHeaderVersion& rhs) { return !(lhs>rhs); }
-inline bool operator>=(const SamHeaderVersion& lhs, const SamHeaderVersion& rhs) { return !(lhs<rhs); }
-
-} // namespace Internal 
-} // namespace BamTools
-
-#endif // SAM_HEADERVERSION_P_H
diff --git a/src/api/internal/bam/BamHeader_p.cpp b/src/api/internal/bam/BamHeader_p.cpp

new file mode 100644 (file)

index 0000000..02c0a25
--- /dev/null
+++ b/src/api/internal/bam/BamHeader_p.cpp
@@ -0,0 +1,120 @@
+// ***************************************************************************
+// BamHeader_p.cpp (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 25 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides the basic functionality for handling BAM headers.
+// ***************************************************************************
+
+#include "api/BamAux.h"
+#include "api/BamConstants.h"
+#include "api/internal/bam/BamHeader_p.h"
+#include "api/internal/io/BgzfStream_p.h"
+#include "api/internal/utils/BamException_p.h"
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+#include <cstdlib>
+#include <cstring>
+using namespace std;
+
+// ------------------------
+// static utility methods
+// ------------------------
+
+static inline
+bool isValidMagicNumber(const char* buffer) {
+    return ( strncmp(buffer, Constants::BAM_HEADER_MAGIC,
+                     Constants::BAM_HEADER_MAGIC_LENGTH) == 0 );
+}
+
+// --------------------------
+// BamHeader implementation
+// --------------------------
+
+// ctor
+BamHeader::BamHeader(void) { }
+
+// dtor
+BamHeader::~BamHeader(void) { }
+
+// reads magic number from BGZF stream, returns true if valid
+void BamHeader::CheckMagicNumber(BgzfStream* stream) {
+
+    // try to read magic number
+    char buffer[Constants::BAM_HEADER_MAGIC_LENGTH];
+    const size_t numBytesRead = stream->Read(buffer, Constants::BAM_HEADER_MAGIC_LENGTH);
+    if ( numBytesRead != (int)Constants::BAM_HEADER_MAGIC_LENGTH )
+        throw BamException("BamHeader::CheckMagicNumber", "could not read magic number");
+
+    // validate magic number
+    if ( !isValidMagicNumber(buffer) )
+        throw BamException("BamHeader::CheckMagicNumber", "invalid magic number");
+}
+
+// clear SamHeader data
+void BamHeader::Clear(void) {
+    m_header.Clear();
+}
+
+// return true if SamHeader data is valid
+bool BamHeader::IsValid(void) const {
+    return m_header.IsValid();
+}
+
+// load BAM header ('magic number' and SAM header text) from BGZF stream
+void BamHeader::Load(BgzfStream* stream) {
+
+    // read & check magic number
+    CheckMagicNumber(stream);
+
+    // read header (length, then actual text)
+    uint32_t length(0);
+    ReadHeaderLength(stream, length);
+    ReadHeaderText(stream, length);
+}
+
+// reads SAM header text length from BGZF stream, stores it in @length
+void BamHeader::ReadHeaderLength(BgzfStream* stream, uint32_t& length) {
+
+    // read BAM header text length
+    char buffer[sizeof(uint32_t)];
+    const size_t numBytesRead = stream->Read(buffer, sizeof(uint32_t));
+    if ( numBytesRead != sizeof(uint32_t) )
+        throw BamException("BamHeader::ReadHeaderLength", "could not read header length");
+
+    // convert char buffer to length
+    length = BamTools::UnpackUnsignedInt(buffer);
+    if ( BamTools::SystemIsBigEndian() )
+        BamTools::SwapEndian_32(length);
+}
+
+// reads SAM header text from BGZF stream, stores in SamHeader object
+void BamHeader::ReadHeaderText(BgzfStream* stream, const uint32_t& length) {
+
+    // read header text
+    char* headerText = (char*)calloc(length + 1, 1);
+    const size_t bytesRead = stream->Read(headerText, length);
+
+    // if error reading, clean up buffer & throw
+    if ( bytesRead != length ) {
+        free(headerText);
+        throw BamException("BamHeader::ReadHeaderText", "could not read header text");
+    }
+
+    // otherwise, text was read OK
+    // store & cleanup
+    m_header.SetHeaderText( (string)((const char*)headerText) );
+    free(headerText);
+}
+
+// returns *copy* of SamHeader data object
+SamHeader BamHeader::ToSamHeader(void) const {
+    return m_header;
+}
+
+// returns SAM-formatted string of header data
+string BamHeader::ToString(void) const {
+    return m_header.ToString();
+}
diff --git a/src/api/internal/bam/BamHeader_p.h b/src/api/internal/bam/BamHeader_p.h

new file mode 100644 (file)

index 0000000..499ad96
--- /dev/null
+++ b/src/api/internal/bam/BamHeader_p.h
@@ -0,0 +1,69 @@
+// ***************************************************************************
+// BamHeader_p.h (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides the basic functionality for handling BAM headers.
+// ***************************************************************************
+
+#ifndef BAMHEADER_P_H
+#define BAMHEADER_P_H
+
+//  -------------
+//  W A R N I N G
+//  -------------
+//
+// This file is not part of the BamTools API.  It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include "api/SamHeader.h"
+#include <string>
+
+namespace BamTools {
+namespace Internal {
+
+class BgzfStream;
+
+class BamHeader {
+
+    // ctor & dtor
+    public:
+        BamHeader(void);
+        ~BamHeader(void);
+
+    // BamHeader interface
+    public:
+        // clear SamHeader data
+        void Clear(void);
+        // return true if SamHeader data is valid
+        bool IsValid(void) const;
+        // load BAM header ('magic number' and SAM header text) from BGZF stream
+        // returns true if all OK
+        void Load(BgzfStream* stream);
+        // returns (editable) copy of SamHeader data object
+        SamHeader ToSamHeader(void) const;
+        // returns SAM-formatted string of header data
+        std::string ToString(void) const;
+
+    // internal methods
+    private:
+        // reads magic number from BGZF stream
+        void CheckMagicNumber(BgzfStream* stream);
+        // reads SAM header length from BGZF stream, stores it in @length
+        void ReadHeaderLength(BgzfStream* stream, uint32_t& length);
+        // reads SAM header text from BGZF stream, stores in SamHeader object
+        void ReadHeaderText(BgzfStream* stream, const uint32_t& length);
+
+    // data members
+    private:
+        SamHeader m_header;
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // BAMHEADER_P_H
diff --git a/src/api/internal/bam/BamMultiMerger_p.h b/src/api/internal/bam/BamMultiMerger_p.h

new file mode 100644 (file)

index 0000000..3000097
--- /dev/null
+++ b/src/api/internal/bam/BamMultiMerger_p.h
@@ -0,0 +1,266 @@
+// ***************************************************************************
+// BamMultiMerger_p.h (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides merging functionality for BamMultiReader.  At this point, supports
+// sorting results by (refId, position) or by read name.
+// ***************************************************************************
+
+#ifndef BAMMULTIMERGER_P_H
+#define BAMMULTIMERGER_P_H
+
+//  -------------
+//  W A R N I N G
+//  -------------
+//
+// This file is not part of the BamTools API.  It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include "api/BamAlignment.h"
+#include "api/BamReader.h"
+#include "api/algorithms/Sort.h"
+#include <deque>
+#include <functional>
+#include <set>
+#include <string>
+
+namespace BamTools {
+namespace Internal {
+
+struct MergeItem {
+
+    // data members
+    BamReader*    Reader;
+    BamAlignment* Alignment;
+
+    // ctors & dtor
+    MergeItem(BamReader* reader = 0,
+              BamAlignment* alignment = 0)
+        : Reader(reader)
+        , Alignment(alignment)
+    { }
+
+    MergeItem(const MergeItem& other)
+        : Reader(other.Reader)
+        , Alignment(other.Alignment)
+    { }
+
+    ~MergeItem(void) { }
+};
+
+template<typename Compare>
+struct MergeItemSorter : public std::binary_function<MergeItem, MergeItem, bool> {
+
+    public:
+        MergeItemSorter(const Compare& comp = Compare())
+            : m_comp(comp)
+        { }
+
+        bool operator()(const MergeItem& lhs, const MergeItem& rhs) {
+            const BamAlignment& l = *lhs.Alignment;
+            const BamAlignment& r = *rhs.Alignment;
+            return m_comp(l,r);
+        }
+
+    private:
+        Compare m_comp;
+};
+
+// pure ABC so we can just work polymorphically with any specific merger implementation
+class IMultiMerger {
+
+    public:
+        IMultiMerger(void) { }
+        virtual ~IMultiMerger(void) { }
+    public:
+        virtual void Add(MergeItem item) =0;
+        virtual void Clear(void) =0;
+        virtual const MergeItem& First(void) const =0;
+        virtual bool IsEmpty(void) const =0;
+        virtual void Remove(BamReader* reader) =0;
+        virtual int Size(void) const =0;
+        virtual MergeItem TakeFirst(void) =0;
+};
+
+// general merger
+template<typename Compare>
+class MultiMerger : public IMultiMerger {
+
+    public:
+        typedef Compare                      CompareType;
+        typedef MergeItemSorter<CompareType> MergeType;
+
+    public:
+        explicit MultiMerger(const Compare& comp = Compare())
+            : IMultiMerger()
+            , m_data( MergeType(comp) )
+        { }
+        ~MultiMerger(void) { }
+
+    public:
+        void Add(MergeItem item);
+        void Clear(void);
+        const MergeItem& First(void) const;
+        bool IsEmpty(void) const;
+        void Remove(BamReader* reader);
+        int Size(void) const;
+        MergeItem TakeFirst(void);
+
+    private:
+        typedef MergeItem                              ValueType;
+        typedef std::multiset<ValueType, MergeType>    ContainerType;
+        typedef typename ContainerType::iterator       DataIterator;
+        typedef typename ContainerType::const_iterator DataConstIterator;
+        ContainerType m_data;
+};
+
+template <typename Compare>
+inline void MultiMerger<Compare>::Add(MergeItem item) {
+
+    // N.B. - any future custom Compare types must define this method
+    //        see algorithms/Sort.h
+
+    if ( CompareType::UsesCharData() )
+        item.Alignment->BuildCharData();
+    m_data.insert(item);
+}
+
+template <typename Compare>
+inline void MultiMerger<Compare>::Clear(void) {
+    m_data.clear();
+}
+
+template <typename Compare>
+inline const MergeItem& MultiMerger<Compare>::First(void) const {
+    const ValueType& entry = (*m_data.begin());
+    return entry;
+}
+
+template <typename Compare>
+inline bool MultiMerger<Compare>::IsEmpty(void) const {
+    return m_data.empty();
+}
+template <typename Compare>
+inline void MultiMerger<Compare>::Remove(BamReader* reader) {
+
+    if ( reader == 0 ) return;
+    const std::string& filenameToRemove = reader->GetFilename();
+
+    // iterate over readers in cache
+    DataIterator dataIter = m_data.begin();
+    DataIterator dataEnd  = m_data.end();
+    for ( ; dataIter != dataEnd; ++dataIter ) {
+        const MergeItem& item = (*dataIter);
+        const BamReader* itemReader = item.Reader;
+        if ( itemReader == 0 ) continue;
+
+        // remove iterator on match
+        if ( itemReader->GetFilename() == filenameToRemove ) {
+            m_data.erase(dataIter);
+            return;
+        }
+    }
+}
+template <typename Compare>
+inline int MultiMerger<Compare>::Size(void) const {
+    return m_data.size();
+}
+
+template <typename Compare>
+inline MergeItem MultiMerger<Compare>::TakeFirst(void) {
+    DataIterator firstIter = m_data.begin();
+    MergeItem    firstItem = (*firstIter);
+    m_data.erase(firstIter);
+    return firstItem;
+}
+
+// unsorted "merger"
+template<>
+class MultiMerger<Algorithms::Sort::Unsorted> : public IMultiMerger {
+
+    public:
+        explicit MultiMerger(const Algorithms::Sort::Unsorted& comp = Algorithms::Sort::Unsorted())
+            : IMultiMerger()
+        { }
+        ~MultiMerger(void) { }
+
+    public:
+        void Add(MergeItem item);
+        void Clear(void);
+        const MergeItem& First(void) const;
+        bool IsEmpty(void) const;
+        void Remove(BamReader* reader);
+        int Size(void) const;
+        MergeItem TakeFirst(void);
+
+    private:
+        typedef MergeItem                     ValueType;
+        typedef std::deque<ValueType>         ContainerType;
+        typedef ContainerType::iterator       DataIterator;
+        typedef ContainerType::const_iterator DataConstIterator;
+        ContainerType m_data;
+};
+
+inline
+void MultiMerger<Algorithms::Sort::Unsorted>::Add(MergeItem item) {
+    m_data.push_back(item);
+}
+
+inline
+void MultiMerger<Algorithms::Sort::Unsorted>::Clear(void) {
+    m_data.clear();
+}
+
+inline
+const MergeItem& MultiMerger<Algorithms::Sort::Unsorted>::First(void) const {
+    return m_data.front();
+}
+
+inline
+bool MultiMerger<Algorithms::Sort::Unsorted>::IsEmpty(void) const {
+    return m_data.empty();
+}
+
+inline
+void MultiMerger<Algorithms::Sort::Unsorted>::Remove(BamReader* reader) {
+
+    if ( reader == 0 ) return;
+    const std::string filenameToRemove = reader->GetFilename();
+
+    // iterate over readers in cache
+    DataIterator dataIter = m_data.begin();
+    DataIterator dataEnd  = m_data.end();
+    for ( ; dataIter != dataEnd; ++dataIter ) {
+        const MergeItem& item = (*dataIter);
+        const BamReader* itemReader = item.Reader;
+        if ( itemReader == 0 ) continue;
+
+        // remove iterator on match
+        if ( itemReader->GetFilename() == filenameToRemove ) {
+            m_data.erase(dataIter);
+            return;
+        }
+    }
+}
+
+inline
+int MultiMerger<Algorithms::Sort::Unsorted>::Size(void) const {
+    return m_data.size();
+}
+
+inline
+MergeItem MultiMerger<Algorithms::Sort::Unsorted>::TakeFirst(void) {
+    MergeItem firstItem = m_data.front();
+    m_data.pop_front();
+    return firstItem;
+}
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // BAMMULTIMERGER_P_H
diff --git a/src/api/internal/bam/BamMultiReader_p.cpp b/src/api/internal/bam/BamMultiReader_p.cpp

new file mode 100644 (file)

index 0000000..d3f2b15
--- /dev/null
+++ b/src/api/internal/bam/BamMultiReader_p.cpp
@@ -0,0 +1,799 @@
+// ***************************************************************************
+// BamMultiReader_p.cpp (c) 2010 Derek Barnett, Erik Garrison
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 25 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Functionality for simultaneously reading multiple BAM files
+// *************************************************************************
+
+#include "api/BamAlignment.h"
+#include "api/BamMultiReader.h"
+#include "api/SamConstants.h"
+#include "api/algorithms/Sort.h"
+#include "api/internal/bam/BamMultiReader_p.h"
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+#include <algorithm>
+#include <fstream>
+#include <iostream>
+#include <iterator>
+#include <sstream>
+using namespace std;
+
+// ctor
+BamMultiReaderPrivate::BamMultiReaderPrivate(void)
+    : m_alignmentCache(0)
+{ }
+
+// dtor
+BamMultiReaderPrivate::~BamMultiReaderPrivate(void) {
+    Close();
+}
+
+// close all BAM files
+bool BamMultiReaderPrivate::Close(void) {
+
+    m_errorString.clear();
+
+    if ( CloseFiles(Filenames()) )
+        return true;
+    else {
+        const string currentError = m_errorString;
+        const string message = string("error encountered while closing all files: \n\t") + currentError;
+        SetErrorString("BamMultiReader::Close", message);
+        return false;
+    }
+}
+
+// close requested BAM file
+bool BamMultiReaderPrivate::CloseFile(const string& filename) {
+
+    m_errorString.clear();
+
+    vector<string> filenames(1, filename);
+    if ( CloseFiles(filenames) )
+        return true;
+    else {
+        const string currentError = m_errorString;
+        const string message = string("error while closing file: ") + filename + "\n" + currentError;
+        SetErrorString("BamMultiReader::CloseFile", message);
+        return false;
+    }
+}
+
+// close requested BAM files
+bool BamMultiReaderPrivate::CloseFiles(const vector<string>& filenames) {
+
+    bool errorsEncountered = false;
+    m_errorString.clear();
+
+    // iterate over filenames
+    vector<string>::const_iterator filesIter = filenames.begin();
+    vector<string>::const_iterator filesEnd  = filenames.end();
+    for ( ; filesIter != filesEnd; ++filesIter ) {
+        const string& filename = (*filesIter);
+        if ( filename.empty() ) continue;
+
+        // iterate over readers
+        vector<MergeItem>::iterator readerIter = m_readers.begin();
+        vector<MergeItem>::iterator readerEnd  = m_readers.end();
+        for ( ; readerIter != readerEnd; ++readerIter ) {
+            MergeItem& item = (*readerIter);
+            BamReader* reader = item.Reader;
+            if ( reader == 0 ) continue;
+
+            // if reader matches requested filename
+            if ( reader->GetFilename() == filename ) {
+
+                // remove reader's entry from alignment cache
+                m_alignmentCache->Remove(reader);
+
+                // clean up reader & its alignment
+                if ( !reader->Close() ) {
+                    m_errorString.append(1, '\t');
+                    m_errorString.append(reader->GetErrorString());
+                    m_errorString.append(1, '\n');
+                    errorsEncountered = true;
+                }
+                delete reader;
+                reader = 0;
+
+                // delete reader's alignment entry
+                BamAlignment* alignment = item.Alignment;
+                delete alignment;
+                alignment = 0;
+
+                // remove reader from reader list
+                m_readers.erase(readerIter);
+
+                // on match, just go on to next filename
+                // (no need to keep looking and item iterator is invalid now anyway)
+                break;
+            }
+        }
+    }
+
+    // make sure alignment cache is cleaned up if all readers closed
+    if ( m_readers.empty() && m_alignmentCache ) {
+        m_alignmentCache->Clear();
+        delete m_alignmentCache;
+        m_alignmentCache = 0;
+    }
+
+    // return whether all readers closed OK
+    return !errorsEncountered;
+}
+
+// creates index files for BAM files that don't have them
+bool BamMultiReaderPrivate::CreateIndexes(const BamIndex::IndexType& type) {
+
+    bool errorsEncountered = false;
+    m_errorString.clear();
+
+    // iterate over readers
+    vector<MergeItem>::iterator itemIter = m_readers.begin();
+    vector<MergeItem>::iterator itemEnd  = m_readers.end();
+    for ( ; itemIter != itemEnd; ++itemIter ) {
+        MergeItem& item = (*itemIter);
+        BamReader* reader = item.Reader;
+        if ( reader == 0 ) continue;
+
+        // if reader doesn't have an index, create one
+        if ( !reader->HasIndex() ) {
+            if ( !reader->CreateIndex(type) ) {
+                m_errorString.append(1, '\t');
+                m_errorString.append(reader->GetErrorString());
+                m_errorString.append(1, '\n');
+                errorsEncountered = true;
+            }
+        }
+    }
+
+    // check for errors encountered before returning success/fail
+    if ( errorsEncountered ) {
+        const string currentError = m_errorString;
+        const string message = string("error while creating index files: ") + "\n" + currentError;
+        SetErrorString("BamMultiReader::CreateIndexes", message);
+        return false;
+    } else
+        return true;
+}
+
+IMultiMerger* BamMultiReaderPrivate::CreateAlignmentCache(void) const {
+
+    // fetch SamHeader
+    SamHeader header = GetHeader();
+
+    // if BAM files are sorted by position
+    if ( header.SortOrder == Constants::SAM_HD_SORTORDER_COORDINATE )
+        return new MultiMerger<Algorithms::Sort::ByPosition>();
+
+    // if BAM files are sorted by read name
+    if ( header.SortOrder == Constants::SAM_HD_SORTORDER_QUERYNAME )
+        return new MultiMerger<Algorithms::Sort::ByName>();
+
+    // otherwise "unknown" or "unsorted", use unsorted merger and just read in
+    return new MultiMerger<Algorithms::Sort::Unsorted>();
+}
+
+const vector<string> BamMultiReaderPrivate::Filenames(void) const {
+
+    // init filename container
+    vector<string> filenames;
+    filenames.reserve( m_readers.size() );
+
+    // iterate over readers
+    vector<MergeItem>::const_iterator itemIter = m_readers.begin();
+    vector<MergeItem>::const_iterator itemEnd  = m_readers.end();
+    for ( ; itemIter != itemEnd; ++itemIter ) {
+        const MergeItem& item = (*itemIter);
+        const BamReader* reader = item.Reader;
+        if ( reader == 0 ) continue;
+
+        // store filename if not empty
+        const string& filename = reader->GetFilename();
+        if ( !filename.empty() )
+            filenames.push_back(filename);
+    }
+
+    // return result
+    return filenames;
+}
+
+string BamMultiReaderPrivate::GetErrorString(void) const {
+    return m_errorString;
+}
+
+SamHeader BamMultiReaderPrivate::GetHeader(void) const {
+    const string& text = GetHeaderText();
+    return SamHeader(text);
+}
+
+// makes a virtual, unified header for all the bam files in the multireader
+string BamMultiReaderPrivate::GetHeaderText(void) const {
+
+    // N.B. - right now, simply copies all header data from first BAM,
+    //        and then appends RG's from other BAM files
+    // TODO: make this more intelligent wrt other header lines/fields
+
+    // if no readers open
+    const size_t numReaders = m_readers.size();
+    if ( numReaders == 0 ) return string();
+
+    // retrieve first reader's header
+    const MergeItem& firstItem = m_readers.front();
+    const BamReader* reader = firstItem.Reader;
+    if ( reader == 0 ) return string();
+    SamHeader mergedHeader = reader->GetHeader();
+
+    // iterate over any remaining readers (skipping the first)
+    for ( size_t i = 1; i < numReaders; ++i ) {
+        const MergeItem& item = m_readers.at(i);
+        const BamReader* reader = item.Reader;
+        if ( reader == 0 ) continue;
+
+        // retrieve current reader's header
+        const SamHeader currentHeader = reader->GetHeader();
+
+        // append current reader's RG entries to merged header
+        // N.B. - SamReadGroupDictionary handles duplicate-checking
+        mergedHeader.ReadGroups.Add(currentHeader.ReadGroups);
+
+        // TODO: merge anything else??
+    }
+
+    // return stringified header
+    return mergedHeader.ToString();
+}
+
+// get next alignment among all files
+bool BamMultiReaderPrivate::GetNextAlignment(BamAlignment& al) {
+    return PopNextCachedAlignment(al, true);
+}
+
+// get next alignment among all files without parsing character data from alignments
+bool BamMultiReaderPrivate::GetNextAlignmentCore(BamAlignment& al) {
+    return PopNextCachedAlignment(al, false);
+}
+
+// ---------------------------------------------------------------------------------------
+//
+// NB: The following GetReferenceX() functions assume that we have identical
+// references for all BAM files.  We enforce this by invoking the
+// ValidateReaders() method to verify that our reference data is the same
+// across all files on Open - so we will not encounter a situation in which
+// there is a mismatch and we are still live.
+//
+// ---------------------------------------------------------------------------------------
+
+// returns the number of reference sequences
+int BamMultiReaderPrivate::GetReferenceCount(void) const {
+
+    // handle empty multireader
+    if ( m_readers.empty() ) return 0;
+
+    // return reference count from first reader
+    const MergeItem& item = m_readers.front();
+    const BamReader* reader = item.Reader;
+    if ( reader == 0 ) return 0;
+    else
+        return reader->GetReferenceCount();
+}
+
+// returns vector of reference objects
+const RefVector BamMultiReaderPrivate::GetReferenceData(void) const {
+
+    // handle empty multireader
+    if ( m_readers.empty() ) return RefVector();
+
+    // return reference data from first BamReader
+    const MergeItem& item = m_readers.front();
+    const BamReader* reader = item.Reader;
+    if ( reader == 0 ) return RefVector();
+    else
+        return reader->GetReferenceData();
+}
+
+// returns refID from reference name
+int BamMultiReaderPrivate::GetReferenceID(const string& refName) const {
+
+    // handle empty multireader
+    if ( m_readers.empty() ) return -1;
+
+    // return reference ID from first BamReader
+    const MergeItem& item = m_readers.front();
+    const BamReader* reader = item.Reader;
+    if ( reader == 0 ) return -1;
+    else
+        return reader->GetReferenceID(refName);
+}
+// ---------------------------------------------------------------------------------------
+
+// returns true if all readers have index data available
+// this is useful to indicate whether Jump() or SetRegion() are possible
+bool BamMultiReaderPrivate::HasIndexes(void) const {
+
+    // handle empty multireader
+    if ( m_readers.empty() )
+        return false;
+
+    bool result = true;
+
+    // iterate over readers
+    vector<MergeItem>::const_iterator readerIter = m_readers.begin();
+    vector<MergeItem>::const_iterator readerEnd  = m_readers.end();
+    for ( ; readerIter != readerEnd; ++readerIter ) {
+        const MergeItem& item = (*readerIter);
+        const BamReader* reader = item.Reader;
+        if ( reader  == 0 ) continue;
+
+        // see if current reader has index data
+        result &= reader->HasIndex();
+    }
+
+    return result;
+}
+
+// returns true if multireader has open readers
+bool BamMultiReaderPrivate::HasOpenReaders(void) {
+
+    // iterate over readers
+    vector<MergeItem>::const_iterator readerIter = m_readers.begin();
+    vector<MergeItem>::const_iterator readerEnd  = m_readers.end();
+    for ( ; readerIter != readerEnd; ++readerIter ) {
+        const MergeItem& item = (*readerIter);
+        const BamReader* reader = item.Reader;
+        if ( reader == 0 ) continue;
+
+        // return true whenever an open reader is found
+        if ( reader->IsOpen() ) return true;
+    }
+
+    // no readers open
+    return false;
+}
+
+// performs random-access jump using (refID, position) as a left-bound
+bool BamMultiReaderPrivate::Jump(int refID, int position) {
+
+    // NB: While it may make sense to track readers in which we can
+    // successfully Jump, in practice a failure of Jump means "no
+    // alignments here."  It makes sense to simply accept the failure,
+    // UpdateAlignments(), and continue.
+
+    // iterate over readers
+    vector<MergeItem>::iterator readerIter = m_readers.begin();
+    vector<MergeItem>::iterator readerEnd  = m_readers.end();
+    for ( ; readerIter != readerEnd; ++readerIter ) {
+        MergeItem& item = (*readerIter);
+        BamReader* reader = item.Reader;
+        if ( reader == 0 ) continue;
+
+        // jump in each BamReader to position of interest
+        reader->Jump(refID, position);
+    }
+
+    // returns status of cache update
+    return UpdateAlignmentCache();
+}
+
+// locate (& load) index files for BAM readers that don't already have one loaded
+bool BamMultiReaderPrivate::LocateIndexes(const BamIndex::IndexType& preferredType) {
+
+    bool errorsEncountered = false;
+    m_errorString.clear();
+
+    // iterate over readers
+    vector<MergeItem>::iterator readerIter = m_readers.begin();
+    vector<MergeItem>::iterator readerEnd  = m_readers.end();
+    for ( ; readerIter != readerEnd; ++readerIter ) {
+        MergeItem& item = (*readerIter);
+        BamReader* reader = item.Reader;
+        if ( reader == 0 ) continue;
+
+        // if reader has no index, try to locate one
+        if ( !reader->HasIndex() ) {
+            if ( !reader->LocateIndex(preferredType) ) {
+                m_errorString.append(1, '\t');
+                m_errorString.append(reader->GetErrorString());
+                m_errorString.append(1, '\n');
+                errorsEncountered = true;
+            }
+        }
+    }
+
+    // check for errors encountered before returning success/fail
+    if ( errorsEncountered ) {
+        const string currentError = m_errorString;
+        const string message = string("error while locating index files: ") + "\n" + currentError;
+        SetErrorString("BamMultiReader::LocatingIndexes", message);
+        return false;
+    } else
+        return true;
+}
+
+// opens BAM files
+bool BamMultiReaderPrivate::Open(const vector<string>& filenames) {
+
+    m_errorString.clear();
+
+    // put all current readers back at beginning (refreshes alignment cache)
+    if ( !Rewind() ) {
+        const string currentError = m_errorString;
+        const string message = string("unable to rewind existing readers: \n\t") + currentError;
+        SetErrorString("BamMultiReader::Open", message);
+        return false;
+    }
+
+    // iterate over filenames
+    bool errorsEncountered = false;
+    vector<string>::const_iterator filenameIter = filenames.begin();
+    vector<string>::const_iterator filenameEnd  = filenames.end();
+    for ( ; filenameIter != filenameEnd; ++filenameIter ) {
+        const string& filename = (*filenameIter);
+        if ( filename.empty() ) continue;
+
+        // attempt to open BamReader
+        BamReader* reader = new BamReader;
+        const bool readerOpened = reader->Open(filename);
+
+        // if opened OK, store it
+        if ( readerOpened )
+            m_readers.push_back( MergeItem(reader, new BamAlignment) );
+
+        // otherwise store error & clean up invalid reader
+        else {
+            m_errorString.append(1, '\t');
+            m_errorString += string("unable to open file: ") + filename;
+            m_errorString.append(1, '\n');
+            errorsEncountered = true;
+
+            delete reader;
+            reader = 0;
+        }
+    }
+
+    // check for errors while opening
+    if ( errorsEncountered ) {
+        const string currentError = m_errorString;
+        const string message = string("unable to open all files: \t\n") + currentError;
+        SetErrorString("BamMultiReader::Open", message);
+        return false;
+    }
+
+    // check for BAM file consistency
+    if ( !ValidateReaders() ) {
+        const string currentError = m_errorString;
+        const string message = string("unable to open inconsistent files: \t\n") + currentError;
+        SetErrorString("BamMultiReader::Open", message);
+        return false;
+    }
+
+    // update alignment cache
+    return UpdateAlignmentCache();
+}
+
+bool BamMultiReaderPrivate::OpenFile(const std::string& filename) {
+    vector<string> filenames(1, filename);
+    if ( Open(filenames) )
+        return true;
+    else {
+        const string currentError = m_errorString;
+        const string message = string("could not open file: ") + filename + "\n\t" + currentError;
+        SetErrorString("BamMultiReader::OpenFile", message);
+        return false;
+    }
+}
+
+bool BamMultiReaderPrivate::OpenIndexes(const vector<string>& indexFilenames) {
+
+    // TODO: This needs to be cleaner - should not assume same order.
+    //       And either way, shouldn't start at first reader.  Should start at
+    //       first reader without an index?
+
+    // make sure same number of index filenames as readers
+    if ( m_readers.size() != indexFilenames.size() ) {
+        const string message("size of index file list does not match current BAM file count");
+        SetErrorString("BamMultiReader::OpenIndexes", message);
+        return false;
+    }
+
+    bool errorsEncountered = false;
+    m_errorString.clear();
+
+    // iterate over BamReaders
+    vector<string>::const_iterator indexFilenameIter = indexFilenames.begin();
+    vector<string>::const_iterator indexFilenameEnd  = indexFilenames.end();
+    vector<MergeItem>::iterator readerIter = m_readers.begin();
+    vector<MergeItem>::iterator readerEnd  = m_readers.end();
+    for ( ; readerIter != readerEnd; ++readerIter ) {
+        MergeItem& item = (*readerIter);
+        BamReader* reader = item.Reader;
+
+        // open index filename on reader
+        if ( reader ) {
+            const string& indexFilename = (*indexFilenameIter);
+            if ( !reader->OpenIndex(indexFilename) ) {
+                m_errorString.append(1, '\t');
+                m_errorString += reader->GetErrorString();
+                m_errorString.append(1, '\n');
+                errorsEncountered = true;
+            }
+        }
+
+        // increment filename iterator, skip if no more index files to open
+        if ( ++indexFilenameIter == indexFilenameEnd )
+            break;
+    }
+
+    // return success/fail
+    if ( errorsEncountered ) {
+        const string currentError = m_errorString;
+        const string message = string("could not open all index files: \n\t") + currentError;
+        SetErrorString("BamMultiReader::OpenIndexes", message);
+        return false;
+    } else
+        return true;
+}
+
+bool BamMultiReaderPrivate::PopNextCachedAlignment(BamAlignment& al, const bool needCharData) {
+
+    // skip if no alignments available
+    if ( m_alignmentCache == 0 || m_alignmentCache->IsEmpty() )
+        return false;
+
+    // pop next merge item entry from cache
+    MergeItem item = m_alignmentCache->TakeFirst();
+    BamReader* reader = item.Reader;
+    BamAlignment* alignment = item.Alignment;
+    if ( reader == 0 || alignment == 0 )
+        return false;
+
+    // set char data if requested
+    if ( needCharData ) {
+        alignment->BuildCharData();
+        alignment->Filename = reader->GetFilename();
+    }
+
+    // store cached alignment into destination parameter (by copy)
+    al = *alignment;
+
+    // load next alignment from reader & store in cache
+    SaveNextAlignment(reader, alignment);
+    return true;
+}
+
+// returns BAM file pointers to beginning of alignment data & resets alignment cache
+bool BamMultiReaderPrivate::Rewind(void) {
+
+    // skip if no readers open
+    if ( m_readers.empty() )
+        return true;
+
+    // attempt to rewind files
+    if ( !RewindReaders() ) {
+        const string currentError = m_errorString;
+        const string message = string("could not rewind readers: \n\t") + currentError;
+        SetErrorString("BamMultiReader::Rewind", message);
+        return false;
+    }
+
+    // return status of cache update
+    return UpdateAlignmentCache();
+}
+
+// returns BAM file pointers to beginning of alignment data
+bool BamMultiReaderPrivate::RewindReaders(void) {
+
+    m_errorString.clear();
+    bool errorsEncountered = false;
+
+    // iterate over readers
+    vector<MergeItem>::iterator readerIter = m_readers.begin();
+    vector<MergeItem>::iterator readerEnd  = m_readers.end();
+    for ( ; readerIter != readerEnd; ++readerIter ) {
+        MergeItem& item = (*readerIter);
+        BamReader* reader = item.Reader;
+        if ( reader == 0 ) continue;
+
+        // attempt rewind on BamReader
+        if ( !reader->Rewind() ) {
+            m_errorString.append(1, '\t');
+            m_errorString.append( reader->GetErrorString() );
+            m_errorString.append(1, '\n');
+            errorsEncountered = true;
+        }
+    }
+
+    return !errorsEncountered;
+}
+
+void BamMultiReaderPrivate::SaveNextAlignment(BamReader* reader, BamAlignment* alignment) {
+
+    // if can read alignment from reader, store in cache
+    //
+    // N.B. - lazy building of alignment's char data - populated only:
+    //        automatically by alignment cache to maintain its sorting OR
+    //        on demand from client call to future call to GetNextAlignment()
+
+    if ( reader->GetNextAlignmentCore(*alignment) )
+        m_alignmentCache->Add( MergeItem(reader, alignment) );
+}
+
+void BamMultiReaderPrivate::SetErrorString(const string& where, const string& what) const {
+    static const string SEPARATOR = ": ";
+    m_errorString = where + SEPARATOR + what;
+}
+
+bool BamMultiReaderPrivate::SetRegion(const BamRegion& region) {
+
+    // NB: While it may make sense to track readers in which we can
+    // successfully SetRegion, In practice a failure of SetRegion means "no
+    // alignments here."  It makes sense to simply accept the failure,
+    // UpdateAlignments(), and continue.
+
+    // iterate over alignments
+    vector<MergeItem>::iterator readerIter = m_readers.begin();
+    vector<MergeItem>::iterator readerEnd  = m_readers.end();
+    for ( ; readerIter != readerEnd; ++readerIter ) {
+        MergeItem& item = (*readerIter);
+        BamReader* reader = item.Reader;
+        if ( reader == 0 ) continue;
+
+        // set region of interest
+        reader->SetRegion(region);
+    }
+
+    // return status of cache update
+    return UpdateAlignmentCache();
+}
+
+// updates our alignment cache
+bool BamMultiReaderPrivate::UpdateAlignmentCache(void) {
+
+    // create alignment cache if not created yet
+    if ( m_alignmentCache == 0 ) {
+        m_alignmentCache = CreateAlignmentCache();
+        if ( m_alignmentCache == 0 ) {
+            SetErrorString("BamMultiReader::UpdateAlignmentCache", "unable to create new alignment cache");
+            return false;
+        }
+    }
+
+    // clear any prior cache data
+    m_alignmentCache->Clear();
+
+    // iterate over readers
+    vector<MergeItem>::iterator readerIter = m_readers.begin();
+    vector<MergeItem>::iterator readerEnd  = m_readers.end();
+    for ( ; readerIter != readerEnd; ++readerIter ) {
+        MergeItem& item = (*readerIter);
+        BamReader* reader = item.Reader;
+        BamAlignment* alignment = item.Alignment;
+        if ( reader == 0 || alignment == 0 ) continue;
+
+        // save next alignment from each reader in cache
+        SaveNextAlignment(reader, alignment);
+    }
+
+    // if we get here, ok
+    return true;
+}
+
+// ValidateReaders checks that all the readers point to BAM files representing
+// alignments against the same set of reference sequences, and that the
+// sequences are identically ordered.  If these checks fail the operation of
+// the multireader is undefined, so we force program exit.
+bool BamMultiReaderPrivate::ValidateReaders(void) const {
+
+    m_errorString.clear();
+
+    // skip if 0 or 1 readers opened
+    if ( m_readers.empty() || (m_readers.size() == 1) )
+        return true;
+
+    // retrieve first reader
+    const MergeItem& firstItem = m_readers.front();
+    const BamReader* firstReader = firstItem.Reader;
+    if ( firstReader == 0 ) return false;
+
+    // retrieve first reader's header data
+    const SamHeader& firstReaderHeader = firstReader->GetHeader();
+    const string& firstReaderSortOrder = firstReaderHeader.SortOrder;
+
+    // retrieve first reader's reference data
+    const RefVector& firstReaderRefData = firstReader->GetReferenceData();
+    const int firstReaderRefCount = firstReader->GetReferenceCount();
+    const int firstReaderRefSize = firstReaderRefData.size();
+
+    // iterate over all readers
+    vector<MergeItem>::const_iterator readerIter = m_readers.begin();
+    vector<MergeItem>::const_iterator readerEnd  = m_readers.end();
+    for ( ; readerIter != readerEnd; ++readerIter ) {
+        const MergeItem& item = (*readerIter);
+        BamReader* reader = item.Reader;
+        if ( reader == 0 ) continue;
+
+        // get current reader's header data
+        const SamHeader& currentReaderHeader = reader->GetHeader();
+        const string& currentReaderSortOrder = currentReaderHeader.SortOrder;
+
+        // check compatible sort order
+        if ( currentReaderSortOrder != firstReaderSortOrder ) {
+            const string message = string("mismatched sort order in ") + reader->GetFilename() +
+                                   ", expected " + firstReaderSortOrder +
+                                   ", but found " + currentReaderSortOrder;
+            SetErrorString("BamMultiReader::ValidateReaders", message);
+            return false;
+        }
+
+        // get current reader's reference data
+        const RefVector currentReaderRefData = reader->GetReferenceData();
+        const int currentReaderRefCount = reader->GetReferenceCount();
+        const int currentReaderRefSize  = currentReaderRefData.size();
+
+        // init reference data iterators
+        RefVector::const_iterator firstRefIter   = firstReaderRefData.begin();
+        RefVector::const_iterator firstRefEnd    = firstReaderRefData.end();
+        RefVector::const_iterator currentRefIter = currentReaderRefData.begin();
+
+        // compare reference counts from BamReader ( & container size, in case of BR error)
+        if ( (currentReaderRefCount != firstReaderRefCount) ||
+             (firstReaderRefSize    != currentReaderRefSize) )
+        {
+            stringstream s("");
+            s << "mismatched reference count in " << reader->GetFilename()
+              << ", expected " << firstReaderRefCount
+              << ", but found " << currentReaderRefCount;
+            SetErrorString("BamMultiReader::ValidateReaders", s.str());
+            return false;
+        }
+
+        // this will be ok; we just checked above that we have identically-sized sets of references
+        // here we simply check if they are all, in fact, equal in content
+        while ( firstRefIter != firstRefEnd ) {
+            const RefData& firstRef   = (*firstRefIter);
+            const RefData& currentRef = (*currentRefIter);
+
+            // compare reference name & length
+            if ( (firstRef.RefName   != currentRef.RefName) ||
+                 (firstRef.RefLength != currentRef.RefLength) )
+            {
+                stringstream s("");
+                s << "mismatched references found in" << reader->GetFilename()
+                  << "expected: " << endl;
+
+                // print first reader's reference data
+                RefVector::const_iterator refIter = firstReaderRefData.begin();
+                RefVector::const_iterator refEnd  = firstReaderRefData.end();
+                for ( ; refIter != refEnd; ++refIter ) {
+                    const RefData& entry = (*refIter);
+                    stringstream s("");
+                    s << entry.RefName << " " << endl;
+                }
+
+                s << "but found: " << endl;
+
+                // print current reader's reference data
+                refIter = currentReaderRefData.begin();
+                refEnd  = currentReaderRefData.end();
+                for ( ; refIter != refEnd; ++refIter ) {
+                    const RefData& entry = (*refIter);
+                    s << entry.RefName << " " << entry.RefLength << endl;
+                }
+
+                SetErrorString("BamMultiReader::ValidateReaders", s.str());
+                return false;
+            }
+
+            // update iterators
+            ++firstRefIter;
+            ++currentRefIter;
+        }
+    }
+
+    // if we get here, everything checks out
+    return true;
+}
diff --git a/src/api/internal/bam/BamMultiReader_p.h b/src/api/internal/bam/BamMultiReader_p.h

new file mode 100644 (file)

index 0000000..9d7c39a
--- /dev/null
+++ b/src/api/internal/bam/BamMultiReader_p.h
@@ -0,0 +1,99 @@
+// ***************************************************************************
+// BamMultiReader_p.h (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 25 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Functionality for simultaneously reading multiple BAM files
+// *************************************************************************
+
+#ifndef BAMMULTIREADER_P_H
+#define BAMMULTIREADER_P_H
+
+//  -------------
+//  W A R N I N G
+//  -------------
+//
+// This file is not part of the BamTools API.  It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include "api/SamHeader.h"
+#include "api/BamMultiReader.h"
+#include "api/internal/bam/BamMultiMerger_p.h"
+#include <string>
+#include <vector>
+
+namespace BamTools {
+namespace Internal {
+
+class BamMultiReaderPrivate {
+
+    // typedefs
+    public:
+        typedef std::pair<BamReader*, BamAlignment*> ReaderAlignment;
+
+    // constructor / destructor
+    public:
+        BamMultiReaderPrivate(void);
+        ~BamMultiReaderPrivate(void);
+
+    // public interface
+    public:
+
+        // file operations
+        bool Close(void);
+        bool CloseFile(const std::string& filename);
+        const std::vector<std::string> Filenames(void) const;
+        bool Jump(int refID, int position = 0);
+        bool Open(const std::vector<std::string>& filenames);
+        bool OpenFile(const std::string& filename);
+        bool Rewind(void);
+        bool SetRegion(const BamRegion& region);
+
+        // access alignment data
+        bool GetNextAlignment(BamAlignment& al);
+        bool GetNextAlignmentCore(BamAlignment& al);
+        bool HasOpenReaders(void);
+
+        // access auxiliary data
+        SamHeader GetHeader(void) const;
+        std::string GetHeaderText(void) const;
+        int GetReferenceCount(void) const;
+        const BamTools::RefVector GetReferenceData(void) const;
+        int GetReferenceID(const std::string& refName) const;
+
+        // BAM index operations
+        bool CreateIndexes(const BamIndex::IndexType& type = BamIndex::STANDARD);
+        bool HasIndexes(void) const;
+        bool LocateIndexes(const BamIndex::IndexType& preferredType = BamIndex::STANDARD);
+        bool OpenIndexes(const std::vector<std::string>& indexFilenames);
+
+        // error handling
+        std::string GetErrorString(void) const;
+
+    // 'internal' methods
+    public:
+
+        bool CloseFiles(const std::vector<std::string>& filenames);
+        IMultiMerger* CreateAlignmentCache(void) const;
+        bool PopNextCachedAlignment(BamAlignment& al, const bool needCharData);
+        bool RewindReaders(void);
+        void SaveNextAlignment(BamReader* reader, BamAlignment* alignment);
+        void SetErrorString(const std::string& where, const std::string& what) const; //
+        bool UpdateAlignmentCache(void);
+        bool ValidateReaders(void) const;
+
+    // data members
+    public:
+        std::vector<MergeItem> m_readers;
+        IMultiMerger* m_alignmentCache;
+        mutable std::string m_errorString;
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // BAMMULTIREADER_P_H
diff --git a/src/api/internal/bam/BamRandomAccessController_p.cpp b/src/api/internal/bam/BamRandomAccessController_p.cpp

new file mode 100644 (file)

index 0000000..848fafd
--- /dev/null
+++ b/src/api/internal/bam/BamRandomAccessController_p.cpp
@@ -0,0 +1,289 @@
+// ***************************************************************************
+// BamRandomAccessController_p.cpp (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 25 October 2011(DB)
+// ---------------------------------------------------------------------------
+// Manages random access operations in a BAM file
+// **************************************************************************
+
+#include "api/BamIndex.h"
+#include "api/internal/bam/BamRandomAccessController_p.h"
+#include "api/internal/bam/BamReader_p.h"
+#include "api/internal/index/BamIndexFactory_p.h"
+#include "api/internal/utils/BamException_p.h"
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+#include <cassert>
+#include <sstream>
+using namespace std;
+
+BamRandomAccessController::BamRandomAccessController(void)
+    : m_index(0)
+    , m_hasAlignmentsInRegion(true)
+{ }
+
+BamRandomAccessController::~BamRandomAccessController(void) {
+    Close();
+}
+
+void BamRandomAccessController::AdjustRegion(const int& referenceCount) {
+
+    // skip if no index available
+    if ( m_index == 0 )
+        return;
+
+    // see if any references in region have alignments
+    m_hasAlignmentsInRegion = false;
+    int currentId = m_region.LeftRefID;
+    const int rightBoundRefId = ( m_region.isRightBoundSpecified() ? m_region.RightRefID : referenceCount - 1 );
+    while ( currentId <= rightBoundRefId ) {
+        m_hasAlignmentsInRegion = m_index->HasAlignments(currentId);
+        if ( m_hasAlignmentsInRegion ) break;
+        ++currentId;
+    }
+
+    // if no data found on any reference in region
+    if ( !m_hasAlignmentsInRegion )
+        return;
+
+    // if left bound of desired region had no data, use first reference that had data
+    // otherwise, leave requested region as-is
+    if ( currentId != m_region.LeftRefID ) {
+        m_region.LeftRefID = currentId;
+        m_region.LeftPosition = 0;
+    }
+}
+
+// returns alignments' "RegionState": { Before|Overlaps|After } current region
+BamRandomAccessController::RegionState
+BamRandomAccessController::AlignmentState(const BamAlignment& alignment) const {
+
+    // if region has no left bound at all
+    if ( !m_region.isLeftBoundSpecified() )
+        return OverlapsRegion;
+
+    // handle unmapped reads - return AFTER region to halt processing
+    if ( alignment.RefID == -1 )
+        return AfterRegion;
+
+    // if alignment is on any reference before left bound reference
+    if ( alignment.RefID < m_region.LeftRefID )
+        return BeforeRegion;
+
+    // if alignment is on left bound reference
+    else if ( alignment.RefID == m_region.LeftRefID ) {
+
+        // if alignment starts at or after left bound position
+        if ( alignment.Position >= m_region.LeftPosition) {
+
+            if ( m_region.isRightBoundSpecified() &&             // right bound is specified AND
+                 m_region.LeftRefID == m_region.RightRefID &&    // left & right bounds on same reference AND
+                 alignment.Position >= m_region.RightPosition )  // alignment starts on or after right bound position
+                return AfterRegion;
+
+            // otherwise, alignment overlaps region
+            else return OverlapsRegion;
+        }
+
+        // alignment starts before left bound position
+        else {
+
+            // if alignment overlaps left bound position
+            if ( alignment.GetEndPosition() > m_region.LeftPosition )
+                return OverlapsRegion;
+            else
+                return BeforeRegion;
+        }
+    }
+
+    // otherwise alignment is on a reference after left bound reference
+    else {
+
+        // if region has a right bound
+        if ( m_region.isRightBoundSpecified() ) {
+
+            // alignment is on any reference between boundaries
+            if ( alignment.RefID < m_region.RightRefID )
+                return OverlapsRegion;
+
+            // alignment is on any reference after right boundary
+            else if ( alignment.RefID > m_region.RightRefID )
+                return AfterRegion;
+
+            // alignment is on right bound reference
+            else {
+
+                // if alignment starts before right bound position
+                if ( alignment.Position < m_region.RightPosition )
+                    return OverlapsRegion;
+                else
+                    return AfterRegion;
+            }
+        }
+
+        // otherwise, alignment starts after left bound and there is no right bound given
+        else return OverlapsRegion;
+    }
+}
+
+void BamRandomAccessController::Close(void) {
+    ClearIndex();
+    ClearRegion();
+}
+
+void BamRandomAccessController::ClearIndex(void) {
+    if ( m_index ) {
+        delete m_index;
+        m_index = 0;
+    }
+}
+
+void BamRandomAccessController::ClearRegion(void) {
+    m_region.clear();
+    m_hasAlignmentsInRegion = true;
+}
+
+bool BamRandomAccessController::CreateIndex(BamReaderPrivate* reader,
+                                            const BamIndex::IndexType& type)
+{
+    // skip if reader is invalid
+    assert(reader);
+    if ( !reader->IsOpen() ) {
+        SetErrorString("BamRandomAccessController::CreateIndex",
+                       "cannot create index for unopened reader");
+        return false;
+    }
+
+    // create new index of requested type
+    BamIndex* newIndex = BamIndexFactory::CreateIndexOfType(type, reader);
+    if ( newIndex == 0 ) {
+        stringstream s("");
+        s << "could not create index of type: " << type;
+        SetErrorString("BamRandomAccessController::CreateIndex", s.str());
+        return false;
+    }
+
+    // attempt to build index from current BamReader file
+    if ( !newIndex->Create() ) {
+        const string indexError = newIndex->GetErrorString();
+        const string message = "could not create index: \n\t" + indexError;
+        SetErrorString("BamRandomAccessController::CreateIndex", message);
+        return false;
+    }
+
+    // save new index & return success
+    SetIndex(newIndex);
+    return true;
+}
+
+string BamRandomAccessController::GetErrorString(void) const {
+    return m_errorString;
+}
+
+bool BamRandomAccessController::HasIndex(void) const {
+    return ( m_index != 0 );
+}
+
+bool BamRandomAccessController::HasRegion(void) const  {
+    return ( !m_region.isNull() );
+}
+
+bool BamRandomAccessController::IndexHasAlignmentsForReference(const int& refId) {
+    return m_index->HasAlignments(refId);
+}
+
+bool BamRandomAccessController::LocateIndex(BamReaderPrivate* reader,
+                                            const BamIndex::IndexType& preferredType)
+{
+    // look up index filename, deferring to preferredType if possible
+    assert(reader);
+    const string& indexFilename = BamIndexFactory::FindIndexFilename(reader->Filename(), preferredType);
+
+    // if no index file found (of any type)
+    if ( indexFilename.empty() ) {
+        const string message = string("could not find index file for:") + reader->Filename();
+        SetErrorString("BamRandomAccessController::LocateIndex", message);
+        return false;
+    }
+
+    // otherwise open & use index file that was found
+    return OpenIndex(indexFilename, reader);
+}
+
+bool BamRandomAccessController::OpenIndex(const string& indexFilename, BamReaderPrivate* reader) {
+
+    // attempt create new index of type based on filename
+    BamIndex* index = BamIndexFactory::CreateIndexFromFilename(indexFilename, reader);
+    if ( index == 0 ) {
+        const string message = string("could not open index file: ") + indexFilename;
+        SetErrorString("BamRandomAccessController::OpenIndex", message);
+        return false;
+    }
+
+    // attempt to load data from index file
+    if ( !index->Load(indexFilename) ) {
+        const string indexError = index->GetErrorString();
+        const string message = string("could not load index data from file: ") + indexFilename +
+                               "\n\t" + indexError;
+        SetErrorString("BamRandomAccessController::OpenIndex", message);
+        return false;
+    }
+
+    // save new index & return success
+    SetIndex(index);
+    return true;
+}
+
+bool BamRandomAccessController::RegionHasAlignments(void) const {
+    return m_hasAlignmentsInRegion;
+}
+
+void BamRandomAccessController::SetErrorString(const string& where, const string& what) {
+    m_errorString = where + ": " + what;
+}
+
+void BamRandomAccessController::SetIndex(BamIndex* index) {
+    if ( m_index )
+        ClearIndex();
+    m_index = index;
+}
+
+bool BamRandomAccessController::SetRegion(const BamRegion& region, const int& referenceCount) {
+
+    // store region
+    m_region = region;
+
+    // cannot jump when no index is available
+    if ( !HasIndex() ) {
+        SetErrorString("BamRandomAccessController", "cannot jump if no index data available");
+        return false;
+    }
+
+    // adjust region as necessary to reflect where data actually begins
+    AdjustRegion(referenceCount);
+
+    // if no data present, return true
+    //   * Not an error, but future attempts to access alignments in this region will not return data
+    //     Returning true is useful in a BamMultiReader setting where some BAM files may
+    //     lack alignments in regions where other files still have data available.
+    if ( !m_hasAlignmentsInRegion )
+        return true;
+
+    // return success/failure of jump to specified region,
+    //
+    //  * Index::Jump() is allowed to modify the m_hasAlignmentsInRegion flag
+    //    This covers 'corner case' where a region is requested that lies beyond the last
+    //    alignment on a reference. If this occurs, any subsequent calls to GetNextAlignment[Core]
+    //    will not return data. BamMultiReader will still be able to successfully pull alignments
+    //    from a region from other files even if this one has no data.
+    if ( !m_index->Jump(m_region, &m_hasAlignmentsInRegion) ) {
+        const string indexError = m_index->GetErrorString();
+        const string message = string("could not set region\n\t") + indexError;
+        SetErrorString("BamRandomAccessController::OpenIndex", message);
+        return false;
+    }
+    else
+        return true;
+}
diff --git a/src/api/internal/bam/BamRandomAccessController_p.h b/src/api/internal/bam/BamRandomAccessController_p.h

new file mode 100644 (file)

index 0000000..9262a61
--- /dev/null
+++ b/src/api/internal/bam/BamRandomAccessController_p.h
@@ -0,0 +1,94 @@
+// ***************************************************************************
+// BamRandomAccessController_p.h (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 October 2011(DB)
+// ---------------------------------------------------------------------------
+// Manages random access operations in a BAM file
+// ***************************************************************************
+
+#ifndef BAMRACONTROLLER_P_H
+#define BAMRACONTROLLER_P_H
+
+//  -------------
+//  W A R N I N G
+//  -------------
+//
+// This file is not part of the BamTools API.  It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include "api/BamAux.h"
+#include "api/BamIndex.h"
+
+namespace BamTools {
+
+class BamAlignment;
+
+namespace Internal {
+
+class BamReaderPrivate;
+
+class BamRandomAccessController {
+
+    // enums
+    public: enum RegionState { BeforeRegion = 0
+                             , OverlapsRegion
+                             , AfterRegion
+                             };
+
+    // ctor & dtor
+    public:
+        BamRandomAccessController(void);
+        ~BamRandomAccessController(void);
+
+    // BamRandomAccessController interface
+    public:
+
+        // index methods
+        void ClearIndex(void);
+        bool CreateIndex(BamReaderPrivate* reader, const BamIndex::IndexType& type);
+        bool HasIndex(void) const;
+        bool IndexHasAlignmentsForReference(const int& refId);
+        bool LocateIndex(BamReaderPrivate* reader, const BamIndex::IndexType& preferredType);
+        bool OpenIndex(const std::string& indexFilename, BamReaderPrivate* reader);
+        void SetIndex(BamIndex* index);
+
+        // region methods
+        void ClearRegion(void);
+        bool HasRegion(void) const;
+        RegionState AlignmentState(const BamAlignment& alignment) const;
+        bool RegionHasAlignments(void) const;
+        bool SetRegion(const BamRegion& region, const int& referenceCount);
+
+        // general methods
+        void Close(void);
+        std::string GetErrorString(void) const;
+
+    // internal methods
+    private:
+        // adjusts requested region if necessary (depending on where data actually begins)
+        void AdjustRegion(const int& referenceCount);
+        // error-string handling
+        void SetErrorString(const std::string& where, const std::string& what);
+
+    // data members
+    private:
+
+        // index data
+        BamIndex* m_index;  // owns the index, not a copy - responsible for deleting
+
+        // region data
+        BamRegion m_region;
+        bool m_hasAlignmentsInRegion;
+
+        // general data
+        std::string m_errorString;
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // BAMRACONTROLLER_P_H
diff --git a/src/api/internal/bam/BamReader_p.cpp b/src/api/internal/bam/BamReader_p.cpp

new file mode 100644 (file)

index 0000000..6904da7
--- /dev/null
+++ b/src/api/internal/bam/BamReader_p.cpp
@@ -0,0 +1,469 @@
+// ***************************************************************************
+// BamReader_p.cpp (c) 2009 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+<<<<<<< HEAD:src/api/internal/BamReader_p.cpp
+// Last modified: 14 November 2011 (DB)
+=======
+// Last modified: 25 October 2011 (DB)
+>>>>>>> remoteio:src/api/internal/bam/BamReader_p.cpp
+// ---------------------------------------------------------------------------
+// Provides the basic functionality for reading BAM files
+// ***************************************************************************
+
+#include "api/BamConstants.h"
+#include "api/BamReader.h"
+#include "api/IBamIODevice.h"
+#include "api/internal/bam/BamHeader_p.h"
+#include "api/internal/bam/BamRandomAccessController_p.h"
+#include "api/internal/bam/BamReader_p.h"
+#include "api/internal/index/BamStandardIndex_p.h"
+#include "api/internal/index/BamToolsIndex_p.h"
+#include "api/internal/io/BamDeviceFactory_p.h"
+#include "api/internal/utils/BamException_p.h"
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+#include <algorithm>
+#include <cassert>
+#include <iostream>
+#include <iterator>
+#include <vector>
+using namespace std;
+
+// constructor
+BamReaderPrivate::BamReaderPrivate(BamReader* parent)
+    : m_alignmentsBeginOffset(0)
+    , m_parent(parent)
+{
+    m_isBigEndian = BamTools::SystemIsBigEndian();
+}
+
+// destructor
+BamReaderPrivate::~BamReaderPrivate(void) {
+    Close();
+}
+
+// closes the BAM file
+bool BamReaderPrivate::Close(void) {
+
+    // clear BAM metadata
+    m_references.clear();
+    m_header.Clear();
+
+    // clear filename
+    m_filename.clear();
+
+    // close random access controller
+    m_randomAccessController.Close();
+
+    // if stream is open, attempt close
+    if ( IsOpen() ) {
+        try {
+            m_stream.Close();
+        } catch ( BamException& e ) {
+            const string streamError = e.what();
+            const string message = string("encountered error closing BAM file: \n\t") + streamError;
+            SetErrorString("BamReader::Close", message);
+            return false;
+        }
+    }
+
+    // return success
+    return true;
+}
+
+// creates an index file of requested type on current BAM file
+bool BamReaderPrivate::CreateIndex(const BamIndex::IndexType& type) {
+
+    // skip if BAM file not open
+    if ( !IsOpen() ) {
+        SetErrorString("BamReader::CreateIndex", "cannot create index on unopened BAM file");
+        return false;
+    }
+
+    // attempt to create index
+    if ( m_randomAccessController.CreateIndex(this, type) )
+        return true;
+    else {
+        const string bracError = m_randomAccessController.GetErrorString();
+        const string message = string("could not create index: \n\t") + bracError;
+        SetErrorString("BamReader::CreateIndex", message);
+        return false;
+    }
+}
+
+// return path & filename of current BAM file
+const string BamReaderPrivate::Filename(void) const {
+    return m_filename;
+}
+
+string BamReaderPrivate::GetErrorString(void) const {
+    return m_errorString;
+}
+
+// return header data as std::string
+string BamReaderPrivate::GetHeaderText(void) const {
+    return m_header.ToString();
+}
+
+// return header data as SamHeader object
+SamHeader BamReaderPrivate::GetSamHeader(void) const {
+    return m_header.ToSamHeader();
+}
+
+// get next alignment (with character data fully parsed)
+bool BamReaderPrivate::GetNextAlignment(BamAlignment& alignment) {
+
+    // if valid alignment found
+    if ( GetNextAlignmentCore(alignment) ) {
+
+        // store alignment's "source" filename
+        alignment.Filename = m_filename;
+
+        // return success/failure of parsing char data
+        if ( alignment.BuildCharData() )
+            return true;
+        else {
+            const string alError = alignment.GetErrorString();
+            const string message = string("could not populate alignment data: \n\t") + alError;
+            SetErrorString("BamReader::GetNextAlignment", message);
+            return false;
+        }
+    }
+
+    // no valid alignment found
+    return false;
+}
+
+// retrieves next available alignment core data (returns success/fail)
+// ** DOES NOT populate any character data fields (read name, bases, qualities, tag data, filename)
+//    these can be accessed, if necessary, from the supportData
+// useful for operations requiring ONLY positional or other alignment-related information
+bool BamReaderPrivate::GetNextAlignmentCore(BamAlignment& alignment) {
+
+    // skip if stream not opened
+    if ( !m_stream.IsOpen() )
+        return false;
+
+    try {
+
+        // skip if region is set but has no alignments
+        if ( m_randomAccessController.HasRegion() &&
+             !m_randomAccessController.RegionHasAlignments() )
+        {
+            return false;
+        }
+
+        // if can't read next alignment
+        if ( !LoadNextAlignment(alignment) )
+            return false;
+
+        // check alignment's region-overlap state
+        BamRandomAccessController::RegionState state = m_randomAccessController.AlignmentState(alignment);
+
+        // if alignment starts after region, no need to keep reading
+        if ( state == BamRandomAccessController::AfterRegion )
+            return false;
+
+        // read until overlap is found
+        while ( state != BamRandomAccessController::OverlapsRegion ) {
+
+            // if can't read next alignment
+            if ( !LoadNextAlignment(alignment) )
+                return false;
+
+            // check alignment's region-overlap state
+            state = m_randomAccessController.AlignmentState(alignment);
+
+            // if alignment starts after region, no need to keep reading
+            if ( state == BamRandomAccessController::AfterRegion )
+                return false;
+        }
+
+        // if we get here, we found the next 'valid' alignment
+        // (e.g. overlaps current region if one was set, simply the next alignment if not)
+        alignment.SupportData.HasCoreOnly = true;
+        return true;
+
+    } catch ( BamException& e ) {
+        const string streamError = e.what();
+        const string message = string("encountered error reading BAM alignment: \n\t") + streamError;
+        SetErrorString("BamReader::GetNextAlignmentCore", message);
+        return false;
+    }
+}
+
+int BamReaderPrivate::GetReferenceCount(void) const {
+    return m_references.size();
+}
+
+const RefVector& BamReaderPrivate::GetReferenceData(void) const {
+    return m_references;
+}
+
+// returns RefID for given RefName (returns References.size() if not found)
+int BamReaderPrivate::GetReferenceID(const string& refName) const {
+
+    // retrieve names from reference data
+    vector<string> refNames;
+    RefVector::const_iterator refIter = m_references.begin();
+    RefVector::const_iterator refEnd  = m_references.end();
+    for ( ; refIter != refEnd; ++refIter)
+        refNames.push_back( (*refIter).RefName );
+
+    // return 'index-of' refName (or -1 if not found)
+    int index = distance(refNames.begin(), find(refNames.begin(), refNames.end(), refName));
+    if ( index == (int)m_references.size() ) return -1;
+    else return index;
+}
+
+bool BamReaderPrivate::HasIndex(void) const {
+    return m_randomAccessController.HasIndex();
+}
+
+bool BamReaderPrivate::IsOpen(void) const {
+    return m_stream.IsOpen();
+}
+
+// load BAM header data
+void BamReaderPrivate::LoadHeaderData(void) {
+    m_header.Load(&m_stream);
+}
+
+// populates BamAlignment with alignment data under file pointer, returns success/fail
+bool BamReaderPrivate::LoadNextAlignment(BamAlignment& alignment) {
+
+    // read in the 'block length' value, make sure it's not zero
+    char buffer[sizeof(uint32_t)];
+    m_stream.Read(buffer, sizeof(uint32_t));
+    alignment.SupportData.BlockLength = BamTools::UnpackUnsignedInt(buffer);
+    if ( m_isBigEndian ) BamTools::SwapEndian_32(alignment.SupportData.BlockLength);
+    if ( alignment.SupportData.BlockLength == 0 )
+        return false;
+
+    // read in core alignment data, make sure the right size of data was read
+    char x[Constants::BAM_CORE_SIZE];
+    if ( m_stream.Read(x, Constants::BAM_CORE_SIZE) != Constants::BAM_CORE_SIZE )
+        return false;
+
+    // swap core endian-ness if necessary
+    if ( m_isBigEndian ) {
+        for ( unsigned int i = 0; i < Constants::BAM_CORE_SIZE; i+=sizeof(uint32_t) )
+            BamTools::SwapEndian_32p(&x[i]);
+    }
+
+    // set BamAlignment 'core' and 'support' data
+    alignment.RefID    = BamTools::UnpackSignedInt(&x[0]);
+    alignment.Position = BamTools::UnpackSignedInt(&x[4]);
+
+    unsigned int tempValue = BamTools::UnpackUnsignedInt(&x[8]);
+    alignment.Bin        = tempValue >> 16;
+    alignment.MapQuality = tempValue >> 8 & 0xff;
+    alignment.SupportData.QueryNameLength = tempValue & 0xff;
+
+    tempValue = BamTools::UnpackUnsignedInt(&x[12]);
+    alignment.AlignmentFlag = tempValue >> 16;
+    alignment.SupportData.NumCigarOperations = tempValue & 0xffff;
+
+    alignment.SupportData.QuerySequenceLength = BamTools::UnpackUnsignedInt(&x[16]);
+    alignment.MateRefID    = BamTools::UnpackSignedInt(&x[20]);
+    alignment.MatePosition = BamTools::UnpackSignedInt(&x[24]);
+    alignment.InsertSize   = BamTools::UnpackSignedInt(&x[28]);
+
+    // set BamAlignment length
+    alignment.Length = alignment.SupportData.QuerySequenceLength;
+
+    // read in character data - make sure proper data size was read
+    bool readCharDataOK = false;
+    const unsigned int dataLength = alignment.SupportData.BlockLength - Constants::BAM_CORE_SIZE;
+    RaiiBuffer allCharData(dataLength);
+
+    if ( m_stream.Read(allCharData.Buffer, dataLength) == dataLength ) {
+
+        // store 'allCharData' in supportData structure
+        alignment.SupportData.AllCharData.assign((const char*)allCharData.Buffer, dataLength);
+
+        // set success flag
+        readCharDataOK = true;
+
+        // save CIGAR ops
+        // need to calculate this here so that  BamAlignment::GetEndPosition() performs correctly,
+        // even when GetNextAlignmentCore() is called
+        const unsigned int cigarDataOffset = alignment.SupportData.QueryNameLength;
+        uint32_t* cigarData = (uint32_t*)(allCharData.Buffer + cigarDataOffset);
+        CigarOp op;
+        alignment.CigarData.clear();
+        alignment.CigarData.reserve(alignment.SupportData.NumCigarOperations);
+        for ( unsigned int i = 0; i < alignment.SupportData.NumCigarOperations; ++i ) {
+
+            // swap endian-ness if necessary
+            if ( m_isBigEndian ) BamTools::SwapEndian_32(cigarData[i]);
+
+            // build CigarOp structure
+            op.Length = (cigarData[i] >> Constants::BAM_CIGAR_SHIFT);
+            op.Type   = Constants::BAM_CIGAR_LOOKUP[ (cigarData[i] & Constants::BAM_CIGAR_MASK) ];
+
+            // save CigarOp
+            alignment.CigarData.push_back(op);
+        }
+    }
+
+    // return success/failure
+    return readCharDataOK;
+}
+
+// loads reference data from BAM file
+bool BamReaderPrivate::LoadReferenceData(void) {
+
+    // get number of reference sequences
+    char buffer[sizeof(uint32_t)];
+    m_stream.Read(buffer, sizeof(uint32_t));
+    uint32_t numberRefSeqs = BamTools::UnpackUnsignedInt(buffer);
+    if ( m_isBigEndian ) BamTools::SwapEndian_32(numberRefSeqs);
+    m_references.reserve((int)numberRefSeqs);
+
+    // iterate over all references in header
+    for ( unsigned int i = 0; i != numberRefSeqs; ++i ) {
+
+        // get length of reference name
+        m_stream.Read(buffer, sizeof(uint32_t));
+        uint32_t refNameLength = BamTools::UnpackUnsignedInt(buffer);
+        if ( m_isBigEndian ) BamTools::SwapEndian_32(refNameLength);
+        RaiiBuffer refName(refNameLength);
+
+        // get reference name and reference sequence length
+        m_stream.Read(refName.Buffer, refNameLength);
+        m_stream.Read(buffer, sizeof(int32_t));
+        int32_t refLength = BamTools::UnpackSignedInt(buffer);
+        if ( m_isBigEndian ) BamTools::SwapEndian_32(refLength);
+
+        // store data for reference
+        RefData aReference;
+        aReference.RefName   = (string)((const char*)refName.Buffer);
+        aReference.RefLength = refLength;
+        m_references.push_back(aReference);
+    }
+
+    // return success
+    return true;
+}
+
+bool BamReaderPrivate::LocateIndex(const BamIndex::IndexType& preferredType) {
+
+    if ( m_randomAccessController.LocateIndex(this, preferredType) )
+        return true;
+    else {
+        const string bracError = m_randomAccessController.GetErrorString();
+        const string message = string("could not locate index: \n\t") + bracError;
+        SetErrorString("BamReader::LocateIndex", message);
+        return false;
+    }
+}
+
+// opens BAM file (and index)
+bool BamReaderPrivate::Open(const string& filename) {
+
+    try {
+
+        // make sure we're starting with fresh state
+        Close();
+
+        // open BgzfStream
+        m_stream.Open(filename, IBamIODevice::ReadOnly);
+
+        // load BAM metadata
+        LoadHeaderData();
+        LoadReferenceData();
+
+        // store filename & offset of first alignment
+        m_filename = filename;
+        m_alignmentsBeginOffset = m_stream.Tell();
+
+        // return success
+        return true;
+
+    } catch ( BamException& e ) {
+        const string error = e.what();
+        const string message = string("could not open file: ") + filename +
+                               "\n\t" + error;
+        SetErrorString("BamReader::Open", message);
+        return false;
+    }
+}
+
+bool BamReaderPrivate::OpenIndex(const std::string& indexFilename) {
+
+    if ( m_randomAccessController.OpenIndex(indexFilename, this) )
+        return true;
+    else {
+        const string bracError = m_randomAccessController.GetErrorString();
+        const string message = string("could not open index: \n\t") + bracError;
+        SetErrorString("BamReader::OpenIndex", message);
+        return false;
+    }
+}
+
+// returns BAM file pointer to beginning of alignment data
+bool BamReaderPrivate::Rewind(void) {
+
+    // reset region
+    m_randomAccessController.ClearRegion();
+
+    // return status of seeking back to first alignment
+    if ( Seek(m_alignmentsBeginOffset) )
+        return true;
+    else {
+        const string currentError = m_errorString;
+        const string message = string("could not rewind: \n\t") + currentError;
+        SetErrorString("BamReader::Rewind", message);
+        return false;
+    }
+}
+
+bool BamReaderPrivate::Seek(const int64_t& position) {
+
+    // skip if BAM file not open
+    if ( !IsOpen() ) {
+        SetErrorString("BamReader::Seek", "cannot seek on unopened BAM file");
+        return false;
+    }
+
+    try {
+        m_stream.Seek(position);
+        return true;
+    }
+    catch ( BamException& e ) {
+        const string streamError = e.what();
+        const string message = string("could not seek in BAM file: \n\t") + streamError;
+        SetErrorString("BamReader::Seek", message);
+        return false;
+    }
+}
+
+void BamReaderPrivate::SetErrorString(const string& where, const string& what) {
+    static const string SEPARATOR = ": ";
+    m_errorString = where + SEPARATOR + what;
+}
+
+void BamReaderPrivate::SetIndex(BamIndex* index) {
+    m_randomAccessController.SetIndex(index);
+}
+
+// sets current region & attempts to jump to it
+// returns success/failure
+bool BamReaderPrivate::SetRegion(const BamRegion& region) {
+
+    if ( m_randomAccessController.SetRegion(region, m_references.size()) )
+        return true;
+    else {
+        const string bracError = m_randomAccessController.GetErrorString();
+        const string message = string("could not set region: \n\t") + bracError;
+        SetErrorString("BamReader::SetRegion", message);
+        return false;
+    }
+}
+
+int64_t BamReaderPrivate::Tell(void) const {
+    return m_stream.Tell();
+}
diff --git a/src/api/internal/bam/BamReader_p.h b/src/api/internal/bam/BamReader_p.h

new file mode 100644 (file)

index 0000000..e8db646
--- /dev/null
+++ b/src/api/internal/bam/BamReader_p.h
@@ -0,0 +1,118 @@
+// ***************************************************************************
+// BamReader_p.h (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 25 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides the basic functionality for reading BAM files
+// ***************************************************************************
+
+#ifndef BAMREADER_P_H
+#define BAMREADER_P_H
+
+//  -------------
+//  W A R N I N G
+//  -------------
+//
+// This file is not part of the BamTools API.  It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include "api/BamAlignment.h"
+#include "api/BamIndex.h"
+#include "api/BamReader.h"
+#include "api/SamHeader.h"
+#include "api/internal/bam/BamHeader_p.h"
+#include "api/internal/bam/BamRandomAccessController_p.h"
+#include "api/internal/io/BgzfStream_p.h"
+#include <string>
+
+namespace BamTools {
+namespace Internal {
+
+class BamReaderPrivate {
+
+    // ctor & dtor
+    public:
+        BamReaderPrivate(BamReader* parent);
+        ~BamReaderPrivate(void);
+
+    // BamReader interface
+    public:
+
+        // file operations
+        bool Close(void);
+        const std::string Filename(void) const;
+        bool IsOpen(void) const;
+        bool Open(const std::string& filename);
+        bool Rewind(void);
+        bool SetRegion(const BamRegion& region);
+
+        // access alignment data
+        bool GetNextAlignment(BamAlignment& alignment);
+        bool GetNextAlignmentCore(BamAlignment& alignment);
+
+        // access auxiliary data
+        std::string GetHeaderText(void) const;
+        SamHeader GetSamHeader(void) const;
+        int GetReferenceCount(void) const;
+        const RefVector& GetReferenceData(void) const;
+        int GetReferenceID(const std::string& refName) const;
+
+        // index operations
+        bool CreateIndex(const BamIndex::IndexType& type);
+        bool HasIndex(void) const;
+        bool LocateIndex(const BamIndex::IndexType& preferredType);
+        bool OpenIndex(const std::string& indexFilename);
+        void SetIndex(BamIndex* index);
+
+        // error handling
+        std::string GetErrorString(void) const;
+        void SetErrorString(const std::string& where, const std::string& what);
+
+    // internal methods, but available as a BamReaderPrivate 'interface'
+    //
+    // these methods should only be used by BamTools::Internal classes
+    // (currently only used by the BamIndex subclasses)
+    public:
+        // retrieves header text from BAM file
+        void LoadHeaderData(void);
+        // retrieves BAM alignment under file pointer
+        // (does no overlap checking or character data parsing)
+        bool LoadNextAlignment(BamAlignment& alignment);
+        // builds reference data structure from BAM file
+        bool LoadReferenceData(void);
+        // seek reader to file position
+        bool Seek(const int64_t& position);
+        // return reader's file position
+        int64_t Tell(void) const;
+
+    // data members
+    public:
+
+        // general BAM file data
+        int64_t     m_alignmentsBeginOffset;
+        std::string m_filename;
+        RefVector   m_references;
+
+        // system data
+        bool m_isBigEndian;
+
+        // parent BamReader
+        BamReader* m_parent;
+
+        // BamReaderPrivate components
+        BamHeader m_header;
+        BamRandomAccessController m_randomAccessController;
+        BgzfStream m_stream;
+
+        // error handling
+        std::string m_errorString;
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // BAMREADER_P_H
diff --git a/src/api/internal/bam/BamWriter_p.cpp b/src/api/internal/bam/BamWriter_p.cpp

new file mode 100644 (file)

index 0000000..ba4989f
--- /dev/null
+++ b/src/api/internal/bam/BamWriter_p.cpp
@@ -0,0 +1,462 @@
+// ***************************************************************************
+// BamWriter_p.cpp (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 25 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides the basic functionality for producing BAM files
+// ***************************************************************************
+
+#include "api/BamAlignment.h"
+#include "api/BamConstants.h"
+#include "api/IBamIODevice.h"
+#include "api/internal/bam/BamWriter_p.h"
+#include "api/internal/utils/BamException_p.h"
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+#include <cstdlib>
+#include <cstring>
+using namespace std;
+
+// ctor
+BamWriterPrivate::BamWriterPrivate(void)
+    : m_isBigEndian( BamTools::SystemIsBigEndian() )
+{ }
+
+// dtor
+BamWriterPrivate::~BamWriterPrivate(void) {
+    Close();
+}
+
+// calculates minimum bin for a BAM alignment interval [begin, end)
+uint32_t BamWriterPrivate::CalculateMinimumBin(const int begin, int end) const {
+    --end;
+    if ( (begin >> 14) == (end >> 14) ) return 4681 + (begin >> 14);
+    if ( (begin >> 17) == (end >> 17) ) return  585 + (begin >> 17);
+    if ( (begin >> 20) == (end >> 20) ) return   73 + (begin >> 20);
+    if ( (begin >> 23) == (end >> 23) ) return    9 + (begin >> 23);
+    if ( (begin >> 26) == (end >> 26) ) return    1 + (begin >> 26);
+    return 0;
+}
+
+// closes the alignment archive
+void BamWriterPrivate::Close(void) {
+
+    // skip if file not open
+    if ( !IsOpen() ) return;
+
+    // close output stream
+    try {
+        m_stream.Close();
+    } catch ( BamException& e ) {
+        m_errorString = e.what();
+    }
+}
+
+// creates a cigar string from the supplied alignment
+void BamWriterPrivate::CreatePackedCigar(const vector<CigarOp>& cigarOperations, string& packedCigar) {
+
+    // initialize
+    const size_t numCigarOperations = cigarOperations.size();
+    packedCigar.resize(numCigarOperations * Constants::BAM_SIZEOF_INT);
+
+    // pack the cigar data into the string
+    unsigned int* pPackedCigar = (unsigned int*)packedCigar.data();
+
+    // iterate over cigar operations
+    vector<CigarOp>::const_iterator coIter = cigarOperations.begin();
+    vector<CigarOp>::const_iterator coEnd  = cigarOperations.end();
+    for ( ; coIter != coEnd; ++coIter ) {
+
+        // store op in packedCigar
+        uint8_t cigarOp;
+        switch ( coIter->Type ) {
+            case (Constants::BAM_CIGAR_MATCH_CHAR)    : cigarOp = Constants::BAM_CIGAR_MATCH;    break;
+            case (Constants::BAM_CIGAR_INS_CHAR)      : cigarOp = Constants::BAM_CIGAR_INS;      break;
+            case (Constants::BAM_CIGAR_DEL_CHAR)      : cigarOp = Constants::BAM_CIGAR_DEL;      break;
+            case (Constants::BAM_CIGAR_REFSKIP_CHAR)  : cigarOp = Constants::BAM_CIGAR_REFSKIP;  break;
+            case (Constants::BAM_CIGAR_SOFTCLIP_CHAR) : cigarOp = Constants::BAM_CIGAR_SOFTCLIP; break;
+            case (Constants::BAM_CIGAR_HARDCLIP_CHAR) : cigarOp = Constants::BAM_CIGAR_HARDCLIP; break;
+            case (Constants::BAM_CIGAR_PAD_CHAR)      : cigarOp = Constants::BAM_CIGAR_PAD;      break;
+            case (Constants::BAM_CIGAR_SEQMATCH_CHAR) : cigarOp = Constants::BAM_CIGAR_SEQMATCH; break;
+            case (Constants::BAM_CIGAR_MISMATCH_CHAR) : cigarOp = Constants::BAM_CIGAR_MISMATCH; break;
+            default:
+                const string message = string("invalid CIGAR operation type") + coIter->Type;
+                throw BamException("BamWriter::CreatePackedCigar", message);
+        }
+
+        *pPackedCigar = coIter->Length << Constants::BAM_CIGAR_SHIFT | cigarOp;
+        pPackedCigar++;
+    }
+}
+
+// encodes the supplied query sequence into 4-bit notation
+void BamWriterPrivate::EncodeQuerySequence(const string& query, string& encodedQuery) {
+
+    // prepare the encoded query string
+    const size_t queryLength = query.size();
+    const size_t encodedQueryLength = static_cast<size_t>((queryLength+1)/2);
+    encodedQuery.resize(encodedQueryLength);
+    char* pEncodedQuery = (char*)encodedQuery.data();
+    const char* pQuery = (const char*)query.data();
+
+    // walk through original query sequence, encoding its bases
+    unsigned char nucleotideCode;
+    bool useHighWord = true;
+    while ( *pQuery ) {
+        switch ( *pQuery ) {
+            case (Constants::BAM_DNA_EQUAL) : nucleotideCode = Constants::BAM_BASECODE_EQUAL; break;
+            case (Constants::BAM_DNA_A)     : nucleotideCode = Constants::BAM_BASECODE_A;     break;
+            case (Constants::BAM_DNA_C)     : nucleotideCode = Constants::BAM_BASECODE_C;     break;
+            case (Constants::BAM_DNA_M)     : nucleotideCode = Constants::BAM_BASECODE_M;     break;
+            case (Constants::BAM_DNA_G)     : nucleotideCode = Constants::BAM_BASECODE_G;     break;
+            case (Constants::BAM_DNA_R)     : nucleotideCode = Constants::BAM_BASECODE_R;     break;
+            case (Constants::BAM_DNA_S)     : nucleotideCode = Constants::BAM_BASECODE_S;     break;
+            case (Constants::BAM_DNA_V)     : nucleotideCode = Constants::BAM_BASECODE_V;     break;
+            case (Constants::BAM_DNA_T)     : nucleotideCode = Constants::BAM_BASECODE_T;     break;
+            case (Constants::BAM_DNA_W)     : nucleotideCode = Constants::BAM_BASECODE_W;     break;
+            case (Constants::BAM_DNA_Y)     : nucleotideCode = Constants::BAM_BASECODE_Y;     break;
+            case (Constants::BAM_DNA_H)     : nucleotideCode = Constants::BAM_BASECODE_H;     break;
+            case (Constants::BAM_DNA_K)     : nucleotideCode = Constants::BAM_BASECODE_K;     break;
+            case (Constants::BAM_DNA_D)     : nucleotideCode = Constants::BAM_BASECODE_D;     break;
+            case (Constants::BAM_DNA_B)     : nucleotideCode = Constants::BAM_BASECODE_B;     break;
+            case (Constants::BAM_DNA_N)     : nucleotideCode = Constants::BAM_BASECODE_N;     break;
+            default:
+                const string message = string("invalid base: ") + *pQuery;
+                throw BamException("BamWriter::EncodeQuerySequence", message);
+        }
+
+        // pack the nucleotide code
+        if ( useHighWord ) {
+            *pEncodedQuery = nucleotideCode << 4;
+            useHighWord = false;
+        } else {
+            *pEncodedQuery |= nucleotideCode;
+            ++pEncodedQuery;
+            useHighWord = true;
+        }
+
+        // increment the query position
+        ++pQuery;
+    }
+}
+
+// returns a description of the last error that occurred
+std::string BamWriterPrivate::GetErrorString(void) const {
+    return m_errorString;
+}
+
+// returns whether BAM file is open for writing or not
+bool BamWriterPrivate::IsOpen(void) const {
+    return m_stream.IsOpen();
+}
+
+// opens the alignment archive
+bool BamWriterPrivate::Open(const string& filename,
+                            const string& samHeaderText,
+                            const RefVector& referenceSequences)
+{
+    try {
+
+        // open the BGZF file for writing
+        m_stream.Open(filename, IBamIODevice::WriteOnly);
+
+        // write BAM file 'metadata' components
+        WriteMagicNumber();
+        WriteSamHeaderText(samHeaderText);
+        WriteReferences(referenceSequences);
+
+        // return success
+        return true;
+
+    } catch ( BamException& e ) {
+        m_errorString = e.what();
+        return false;
+    }
+}
+
+// saves the alignment to the alignment archive
+bool BamWriterPrivate::SaveAlignment(const BamAlignment& al) {
+
+    try {
+
+        // if BamAlignment contains only the core data and a raw char data buffer
+        // (as a result of BamReader::GetNextAlignmentCore())
+        if ( al.SupportData.HasCoreOnly )
+            WriteCoreAlignment(al);
+
+        // otherwise, BamAlignment should contain character in the standard fields: Name, QueryBases, etc
+        // (resulting from BamReader::GetNextAlignment() *OR* being generated directly by client code)
+        else WriteAlignment(al);
+
+        // if we get here, everything OK
+        return true;
+
+    } catch ( BamException& e ) {
+        m_errorString = e.what();
+        return false;
+    }
+}
+
+void BamWriterPrivate::SetWriteCompressed(bool ok) {
+    // modifying compression is not allowed if BAM file is open
+    if ( !IsOpen() )
+        m_stream.SetWriteCompressed(ok);
+}
+
+void BamWriterPrivate::WriteAlignment(const BamAlignment& al) {
+
+    // calculate char lengths
+    const unsigned int nameLength         = al.Name.size() + 1;
+    const unsigned int numCigarOperations = al.CigarData.size();
+    const unsigned int queryLength        = al.QueryBases.size();
+    const unsigned int tagDataLength      = al.TagData.size();
+
+    // no way to tell if alignment's bin is already defined (there is no default, invalid value)
+    // so we'll go ahead calculate its bin ID before storing
+    const uint32_t alignmentBin = CalculateMinimumBin(al.Position, al.GetEndPosition());
+
+    // create our packed cigar string
+    string packedCigar;
+    CreatePackedCigar(al.CigarData, packedCigar);
+    const unsigned int packedCigarLength = packedCigar.size();
+
+    // encode the query
+    string encodedQuery;
+    EncodeQuerySequence(al.QueryBases, encodedQuery);
+    const unsigned int encodedQueryLength = encodedQuery.size();
+
+    // write the block size
+    const unsigned int dataBlockSize = nameLength +
+                                       packedCigarLength +
+                                       encodedQueryLength +
+                                       queryLength +
+                                       tagDataLength;
+    unsigned int blockSize = Constants::BAM_CORE_SIZE + dataBlockSize;
+    if ( m_isBigEndian ) BamTools::SwapEndian_32(blockSize);
+    m_stream.Write((char*)&blockSize, Constants::BAM_SIZEOF_INT);
+
+    // assign the BAM core data
+    uint32_t buffer[Constants::BAM_CORE_BUFFER_SIZE];
+    buffer[0] = al.RefID;
+    buffer[1] = al.Position;
+    buffer[2] = (alignmentBin << 16) | (al.MapQuality << 8) | nameLength;
+    buffer[3] = (al.AlignmentFlag << 16) | numCigarOperations;
+    buffer[4] = queryLength;
+    buffer[5] = al.MateRefID;
+    buffer[6] = al.MatePosition;
+    buffer[7] = al.InsertSize;
+
+    // swap BAM core endian-ness, if necessary
+    if ( m_isBigEndian ) {
+        for ( int i = 0; i < 8; ++i )
+            BamTools::SwapEndian_32(buffer[i]);
+    }
+
+    // write the BAM core
+    m_stream.Write((char*)&buffer, Constants::BAM_CORE_SIZE);
+
+    // write the query name
+    m_stream.Write(al.Name.c_str(), nameLength);
+
+    // write the packed cigar
+    if ( m_isBigEndian ) {
+        char* cigarData = new char[packedCigarLength]();
+        memcpy(cigarData, packedCigar.data(), packedCigarLength);
+        if ( m_isBigEndian ) {
+            for ( size_t i = 0; i < packedCigarLength; ++i )
+                BamTools::SwapEndian_32p(&cigarData[i]);
+        }
+        m_stream.Write(cigarData, packedCigarLength);
+        delete[] cigarData; // TODO: cleanup on Write exception thrown?
+    }
+    else
+        m_stream.Write(packedCigar.data(), packedCigarLength);
+
+    // write the encoded query sequence
+    m_stream.Write(encodedQuery.data(), encodedQueryLength);
+
+    // write the base qualities
+    char* pBaseQualities = (char*)al.Qualities.data();
+    for ( size_t i = 0; i < queryLength; ++i )
+        pBaseQualities[i] -= 33; // FASTQ conversion
+    m_stream.Write(pBaseQualities, queryLength);
+
+    // write the read group tag
+    if ( m_isBigEndian ) {
+
+        char* tagData = new char[tagDataLength]();
+        memcpy(tagData, al.TagData.data(), tagDataLength);
+
+        size_t i = 0;
+        while ( i < tagDataLength ) {
+
+            i += Constants::BAM_TAG_TAGSIZE;  // skip tag chars (e.g. "RG", "NM", etc.)
+            const char type = tagData[i];     // get tag type at position i
+            ++i;
+
+            switch ( type ) {
+
+                case(Constants::BAM_TAG_TYPE_ASCII) :
+                case(Constants::BAM_TAG_TYPE_INT8)  :
+                case(Constants::BAM_TAG_TYPE_UINT8) :
+                    ++i;
+                    break;
+
+                case(Constants::BAM_TAG_TYPE_INT16)  :
+                case(Constants::BAM_TAG_TYPE_UINT16) :
+                    BamTools::SwapEndian_16p(&tagData[i]);
+                    i += sizeof(uint16_t);
+                    break;
+
+                case(Constants::BAM_TAG_TYPE_FLOAT)  :
+                case(Constants::BAM_TAG_TYPE_INT32)  :
+                case(Constants::BAM_TAG_TYPE_UINT32) :
+                    BamTools::SwapEndian_32p(&tagData[i]);
+                    i += sizeof(uint32_t);
+                    break;
+
+                case(Constants::BAM_TAG_TYPE_HEX) :
+                case(Constants::BAM_TAG_TYPE_STRING) :
+                    // no endian swapping necessary for hex-string/string data
+                    while ( tagData[i] )
+                        ++i;
+                    // increment one more for null terminator
+                    ++i;
+                    break;
+
+                case(Constants::BAM_TAG_TYPE_ARRAY) :
+
+                {
+                    // read array type
+                    const char arrayType = tagData[i];
+                    ++i;
+
+                    // swap endian-ness of number of elements in place, then retrieve for loop
+                    BamTools::SwapEndian_32p(&tagData[i]);
+                    int32_t numElements;
+                    memcpy(&numElements, &tagData[i], sizeof(uint32_t));
+                    i += sizeof(uint32_t);
+
+                    // swap endian-ness of array elements
+                    for ( int j = 0; j < numElements; ++j ) {
+                        switch (arrayType) {
+                            case (Constants::BAM_TAG_TYPE_INT8)  :
+                            case (Constants::BAM_TAG_TYPE_UINT8) :
+                                // no endian-swapping necessary
+                                ++i;
+                                break;
+                            case (Constants::BAM_TAG_TYPE_INT16)  :
+                            case (Constants::BAM_TAG_TYPE_UINT16) :
+                                BamTools::SwapEndian_16p(&tagData[i]);
+                                i += sizeof(uint16_t);
+                                break;
+                            case (Constants::BAM_TAG_TYPE_FLOAT)  :
+                            case (Constants::BAM_TAG_TYPE_INT32)  :
+                            case (Constants::BAM_TAG_TYPE_UINT32) :
+                                BamTools::SwapEndian_32p(&tagData[i]);
+                                i += sizeof(uint32_t);
+                                break;
+                            default:
+                                delete[] tagData;
+                                const string message = string("invalid binary array type: ") + arrayType;
+                                throw BamException("BamWriter::SaveAlignment", message);
+                        }
+                    }
+
+                    break;
+                }
+
+                default :
+                    delete[] tagData;
+                    const string message = string("invalid tag type: ") + type;
+                    throw BamException("BamWriter::SaveAlignment", message);
+            }
+        }
+
+        m_stream.Write(tagData, tagDataLength);
+        delete[] tagData; // TODO: cleanup on Write exception thrown?
+    }
+    else
+        m_stream.Write(al.TagData.data(), tagDataLength);
+}
+
+void BamWriterPrivate::WriteCoreAlignment(const BamAlignment& al) {
+
+    // write the block size
+    unsigned int blockSize = al.SupportData.BlockLength;
+    if ( m_isBigEndian ) BamTools::SwapEndian_32(blockSize);
+    m_stream.Write((char*)&blockSize, Constants::BAM_SIZEOF_INT);
+
+    // re-calculate bin (in case BamAlignment's position has been previously modified)
+    const uint32_t alignmentBin = CalculateMinimumBin(al.Position, al.GetEndPosition());
+
+    // assign the BAM core data
+    uint32_t buffer[Constants::BAM_CORE_BUFFER_SIZE];
+    buffer[0] = al.RefID;
+    buffer[1] = al.Position;
+    buffer[2] = (alignmentBin << 16) | (al.MapQuality << 8) | al.SupportData.QueryNameLength;
+    buffer[3] = (al.AlignmentFlag << 16) | al.SupportData.NumCigarOperations;
+    buffer[4] = al.SupportData.QuerySequenceLength;
+    buffer[5] = al.MateRefID;
+    buffer[6] = al.MatePosition;
+    buffer[7] = al.InsertSize;
+
+    // swap BAM core endian-ness, if necessary
+    if ( m_isBigEndian ) {
+        for ( int i = 0; i < 8; ++i )
+            BamTools::SwapEndian_32(buffer[i]);
+    }
+
+    // write the BAM core
+    m_stream.Write((char*)&buffer, Constants::BAM_CORE_SIZE);
+
+    // write the raw char data
+    m_stream.Write((char*)al.SupportData.AllCharData.data(),
+                   al.SupportData.BlockLength-Constants::BAM_CORE_SIZE);
+}
+
+void BamWriterPrivate::WriteMagicNumber(void) {
+    // write BAM file 'magic number'
+    m_stream.Write(Constants::BAM_HEADER_MAGIC, Constants::BAM_HEADER_MAGIC_LENGTH);
+}
+
+void BamWriterPrivate::WriteReferences(const BamTools::RefVector& referenceSequences) {
+
+    // write the number of reference sequences
+    uint32_t numReferenceSequences = referenceSequences.size();
+    if ( m_isBigEndian ) BamTools::SwapEndian_32(numReferenceSequences);
+    m_stream.Write((char*)&numReferenceSequences, Constants::BAM_SIZEOF_INT);
+
+    // foreach reference sequence
+    RefVector::const_iterator rsIter = referenceSequences.begin();
+    RefVector::const_iterator rsEnd  = referenceSequences.end();
+    for ( ; rsIter != rsEnd; ++rsIter ) {
+
+        // write the reference sequence name length
+        uint32_t referenceSequenceNameLen = rsIter->RefName.size() + 1;
+        if ( m_isBigEndian ) BamTools::SwapEndian_32(referenceSequenceNameLen);
+        m_stream.Write((char*)&referenceSequenceNameLen, Constants::BAM_SIZEOF_INT);
+
+        // write the reference sequence name
+        m_stream.Write(rsIter->RefName.c_str(), referenceSequenceNameLen);
+
+        // write the reference sequence length
+        int32_t referenceLength = rsIter->RefLength;
+        if ( m_isBigEndian ) BamTools::SwapEndian_32(referenceLength);
+        m_stream.Write((char*)&referenceLength, Constants::BAM_SIZEOF_INT);
+    }
+}
+
+void BamWriterPrivate::WriteSamHeaderText(const std::string& samHeaderText) {
+
+    // write the SAM header  text length
+    uint32_t samHeaderLen = samHeaderText.size();
+    if ( m_isBigEndian ) BamTools::SwapEndian_32(samHeaderLen);
+    m_stream.Write((char*)&samHeaderLen, Constants::BAM_SIZEOF_INT);
+
+    // write the SAM header text
+    if ( samHeaderLen > 0 )
+        m_stream.Write(samHeaderText.data(), samHeaderLen);
+}
diff --git a/src/api/internal/bam/BamWriter_p.h b/src/api/internal/bam/BamWriter_p.h

new file mode 100644 (file)

index 0000000..d5bbe8d
--- /dev/null
+++ b/src/api/internal/bam/BamWriter_p.h
@@ -0,0 +1,73 @@
+// ***************************************************************************
+// BamWriter_p.h (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 25 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides the basic functionality for producing BAM files
+// ***************************************************************************
+
+#ifndef BAMWRITER_P_H
+#define BAMWRITER_P_H
+
+//  -------------
+//  W A R N I N G
+//  -------------
+//
+// This file is not part of the BamTools API.  It exists purely as an
+// implementation detail.  This header file may change from version to
+// version without notice, or even be removed.
+//
+// We mean it.
+
+#include "api/BamAux.h"
+#include "api/internal/io/BgzfStream_p.h"
+#include <string>
+#include <vector>
+
+namespace BamTools {
+
+class BamAlignment;
+
+namespace Internal {
+
+class BamWriterPrivate {
+
+    // ctor & dtor
+    public:
+        BamWriterPrivate(void);
+        ~BamWriterPrivate(void);
+
+    // interface methods
+    public:
+        void Close(void);
+        std::string GetErrorString(void) const;
+        bool IsOpen(void) const;
+        bool Open(const std::string& filename,
+                  const std::string& samHeaderText,
+                  const BamTools::RefVector& referenceSequences);
+        bool SaveAlignment(const BamAlignment& al);
+        void SetWriteCompressed(bool ok);
+
+    // 'internal' methods
+    public:
+        uint32_t CalculateMinimumBin(const int begin, int end) const;
+        void CreatePackedCigar(const std::vector<BamTools::CigarOp>& cigarOperations, std::string& packedCigar);
+        void EncodeQuerySequence(const std::string& query, std::string& encodedQuery);
+        void WriteAlignment(const BamAlignment& al);
+        void WriteCoreAlignment(const BamAlignment& al);
+        void WriteMagicNumber(void);
+        void WriteReferences(const BamTools::RefVector& referenceSequences);
+        void WriteSamHeaderText(const std::string& samHeaderText);
+
+    // data members
+    private:
+        BgzfStream m_stream;
+        bool m_isBigEndian;
+        std::string m_errorString;
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // BAMWRITER_P_H
diff --git a/src/api/internal/bam/CMakeLists.txt b/src/api/internal/bam/CMakeLists.txt

new file mode 100644 (file)

index 0000000..64d8534
--- /dev/null
+++ b/src/api/internal/bam/CMakeLists.txt
@@ -0,0 +1,19 @@
+# ==========================
+# BamTools CMakeLists.txt
+# (c) 2011 Derek Barnett
+#
+# src/api/internal/bam
+# ==========================
+
+set ( InternalBamDir "${InternalDir}/bam" )
+
+set ( InternalBamSources
+        ${InternalBamDir}/BamHeader_p.cpp
+        ${InternalBamDir}/BamMultiReader_p.cpp
+        ${InternalBamDir}/BamRandomAccessController_p.cpp
+        ${InternalBamDir}/BamReader_p.cpp
+        ${InternalBamDir}/BamWriter_p.cpp
+
+        PARENT_SCOPE # <-- leave this last
+)
+
diff --git a/src/api/internal/index/BamIndexFactory_p.cpp b/src/api/internal/index/BamIndexFactory_p.cpp

new file mode 100644 (file)

index 0000000..ab7751f
--- /dev/null
+++ b/src/api/internal/index/BamIndexFactory_p.cpp
@@ -0,0 +1,107 @@
+// ***************************************************************************
+// BamIndexFactory_p.cpp (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 November 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides interface for generating BamIndex implementations
+// ***************************************************************************
+
+#include "api/internal/index/BamIndexFactory_p.h"
+#include "api/internal/index/BamStandardIndex_p.h"
+#include "api/internal/index/BamToolsIndex_p.h"
+using namespace BamTools;
+using namespace BamTools::Internal;
+using namespace std;
+
+// generates index filename from BAM filename (depending on requested type)
+// if type is unknown, returns empty string
+const string BamIndexFactory::CreateIndexFilename(const string& bamFilename,
+                                                  const BamIndex::IndexType& type)
+{
+    switch ( type ) {
+        case ( BamIndex::STANDARD ) : return ( bamFilename + BamStandardIndex::Extension() );
+        case ( BamIndex::BAMTOOLS ) : return ( bamFilename + BamToolsIndex::Extension() );
+        default :
+            return string();
+    }
+}
+
+// creates a new BamIndex object, depending on extension of @indexFilename
+BamIndex* BamIndexFactory::CreateIndexFromFilename(const string& indexFilename, BamReaderPrivate* reader) {
+
+    // get file extension from index filename, including dot (".EXT")
+    // if can't get file extension, return null index
+    const string extension = FileExtension(indexFilename);
+    if ( extension.empty() )
+        return 0;
+
+    // create index based on extension
+    if      ( extension == BamStandardIndex::Extension() ) return new BamStandardIndex(reader);
+    else if ( extension == BamToolsIndex::Extension()    ) return new BamToolsIndex(reader);
+    else
+        return 0;
+}
+
+// creates a new BamIndex, object of requested @type
+BamIndex* BamIndexFactory::CreateIndexOfType(const BamIndex::IndexType& type,
+                                             BamReaderPrivate* reader)
+{
+    switch ( type ) {
+        case ( BamIndex::STANDARD ) : return new BamStandardIndex(reader);
+        case ( BamIndex::BAMTOOLS ) : return new BamToolsIndex(reader);
+        default :
+            return 0;
+    }
+}
+
+// retrieves file extension (including '.')
+const string BamIndexFactory::FileExtension(const string& filename) {
+
+    // if filename cannot contain valid path + extension, return empty string
+    if ( filename.empty() || filename.length() <= 4 )
+        return string();
+
+    // look for last dot in filename
+    const size_t lastDotPosition = filename.find_last_of('.');
+
+    // if none found, return empty string
+    if ( lastDotPosition == string::npos )
+        return string();
+
+    // return substring from last dot position
+    return filename.substr(lastDotPosition);
+}
+
+// returns name of existing index file that corresponds to @bamFilename
+// will defer to @preferredType if possible, if not will attempt to load any supported type
+// returns empty string if not found
+const string BamIndexFactory::FindIndexFilename(const string& bamFilename,
+                                                const BamIndex::IndexType& preferredType)
+{
+    // skip if BAM filename provided is empty
+    if ( bamFilename.empty() )
+        return string();
+
+    // try to find index of preferred type first
+    // return index filename if found
+    string indexFilename = CreateIndexFilename(bamFilename, preferredType);
+    if ( !indexFilename.empty() )
+        return indexFilename;
+
+    // couldn't find preferred type, try the other supported types
+    // return index filename if found
+    if ( preferredType != BamIndex::STANDARD ) {
+        indexFilename = CreateIndexFilename(bamFilename, BamIndex::STANDARD);
+        if ( !indexFilename.empty() )
+            return indexFilename;
+    }
+    if ( preferredType != BamIndex::BAMTOOLS ) {
+        indexFilename = CreateIndexFilename(bamFilename, BamIndex::BAMTOOLS);
+        if ( !indexFilename.empty() )
+            return indexFilename;
+    }
+
+    // otherwise couldn't find any index matching this filename
+    return string();
+}
diff --git a/src/api/internal/index/BamIndexFactory_p.h b/src/api/internal/index/BamIndexFactory_p.h

new file mode 100644 (file)

index 0000000..4e4f1cf
--- /dev/null
+++ b/src/api/internal/index/BamIndexFactory_p.h
@@ -0,0 +1,49 @@
+// ***************************************************************************
+// BamIndexFactory_p.h (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides interface for generating BamIndex implementations
+// ***************************************************************************
+
+#ifndef BAMINDEX_FACTORY_P_H
+#define BAMINDEX_FACTORY_P_H
+
+#include "api/BamIndex.h"
+#include <string>
+
+namespace BamTools {
+namespace Internal {
+
+class BamIndexFactory {
+
+    // static interface methods
+    public:
+        // creates a new BamIndex object, depending on extension of @indexFilename
+        static BamIndex* CreateIndexFromFilename(const std::string& indexFilename,
+                                                 BamReaderPrivate* reader);
+        // creates a new BamIndex object, of requested @type
+        static BamIndex* CreateIndexOfType(const BamIndex::IndexType& type,
+                                           BamReaderPrivate* reader);
+        // returns name of existing index file that corresponds to @bamFilename
+        // will defer to @preferredType if possible
+        // if @preferredType not found, will attempt to load any supported index type
+        // returns empty string if no index file (of any type) is found
+        static const std::string FindIndexFilename(const std::string& bamFilename,
+                                                   const BamIndex::IndexType& preferredType);
+
+    // internal methods
+    public:
+        // generates index filename from BAM filename (depending on requested type)
+        // if type is unknown, returns empty string
+        static const std::string CreateIndexFilename(const std::string& bamFilename,
+                                                     const BamIndex::IndexType& type);
+        // retrieves file extension (including '.')
+        static const std::string FileExtension(const std::string& filename);
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // BAMINDEX_FACTORY_P_H
diff --git a/src/api/internal/index/BamStandardIndex_p.cpp b/src/api/internal/index/BamStandardIndex_p.cpp

new file mode 100644 (file)

index 0000000..dcdec8d
--- /dev/null
+++ b/src/api/internal/index/BamStandardIndex_p.cpp
@@ -0,0 +1,965 @@
+// ***************************************************************************
+// BamStandardIndex.cpp (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 November 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides index operations for the standardized BAM index format (".bai")
+// ***************************************************************************
+
+#include "api/BamAlignment.h"
+#include "api/internal/bam/BamReader_p.h"
+#include "api/internal/index/BamStandardIndex_p.h"
+#include "api/internal/io/BamDeviceFactory_p.h"
+#include "api/internal/utils/BamException_p.h"
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <algorithm>
+#include <sstream>
+using namespace std;
+
+// -----------------------------------
+// static BamStandardIndex constants
+// -----------------------------------
+
+const int BamStandardIndex::MAX_BIN               = 37450;  // =(8^6-1)/7+1
+const int BamStandardIndex::BAM_LIDX_SHIFT        = 14;
+const string BamStandardIndex::BAI_EXTENSION      = ".bai";
+const char* const BamStandardIndex::BAI_MAGIC     = "BAI\1";
+const int BamStandardIndex::SIZEOF_ALIGNMENTCHUNK = sizeof(uint64_t)*2;
+const int BamStandardIndex::SIZEOF_BINCORE        = sizeof(uint32_t) + sizeof(int32_t);
+const int BamStandardIndex::SIZEOF_LINEAROFFSET   = sizeof(uint64_t);
+
+// ----------------------------
+// RaiiWrapper implementation
+// ----------------------------
+
+BamStandardIndex::RaiiWrapper::RaiiWrapper(void)
+    : Device(0)
+    , Buffer(0)
+{ }
+
+BamStandardIndex::RaiiWrapper::~RaiiWrapper(void) {
+
+    if ( Device ) {
+        Device->Close();
+        delete Device;
+        Device = 0;
+    }
+
+    if ( Buffer ) {
+        delete[] Buffer;
+        Buffer = 0;
+    }
+}
+
+// ---------------------------------
+// BamStandardIndex implementation
+// ---------------------------------
+
+// ctor
+BamStandardIndex::BamStandardIndex(Internal::BamReaderPrivate* reader)
+    : BamIndex(reader)
+    , m_bufferLength(0)
+{
+     m_isBigEndian = BamTools::SystemIsBigEndian();
+}
+
+// dtor
+BamStandardIndex::~BamStandardIndex(void) {
+    CloseFile();
+}
+
+void BamStandardIndex::AdjustRegion(const BamRegion& region, uint32_t& begin, uint32_t& end) {
+
+    // retrieve references from reader
+    const RefVector& references = m_reader->GetReferenceData();
+
+    // LeftPosition cannot be greater than or equal to reference length
+    if ( region.LeftPosition >= references.at(region.LeftRefID).RefLength )
+        throw BamException("BamStandardIndex::AdjustRegion", "invalid region requested");
+
+    // set region 'begin'
+    begin = (unsigned int)region.LeftPosition;
+
+    // if right bound specified AND left&right bounds are on same reference
+    // OK to use right bound position as region 'end'
+    if ( region.isRightBoundSpecified() && ( region.LeftRefID == region.RightRefID ) )
+        end = (unsigned int)region.RightPosition;
+
+    // otherwise, set region 'end' to last reference base
+    else end = (unsigned int)references.at(region.LeftRefID).RefLength;
+}
+
+// [begin, end)
+void BamStandardIndex::CalculateCandidateBins(const uint32_t& begin,
+                                              const uint32_t& end,
+                                              set<uint16_t>& candidateBins)
+{
+    // initialize list, bin '0' is always a valid bin
+    candidateBins.insert(0);
+
+    // get rest of bins that contain this region
+    unsigned int k;
+    for (k =    1 + (begin>>26); k <=    1 + (end>>26); ++k) { candidateBins.insert(k); }
+    for (k =    9 + (begin>>23); k <=    9 + (end>>23); ++k) { candidateBins.insert(k); }
+    for (k =   73 + (begin>>20); k <=   73 + (end>>20); ++k) { candidateBins.insert(k); }
+    for (k =  585 + (begin>>17); k <=  585 + (end>>17); ++k) { candidateBins.insert(k); }
+    for (k = 4681 + (begin>>14); k <= 4681 + (end>>14); ++k) { candidateBins.insert(k); }
+}
+
+void BamStandardIndex::CalculateCandidateOffsets(const BaiReferenceSummary& refSummary,
+                                                 const uint64_t& minOffset,
+                                                 set<uint16_t>& candidateBins,
+                                                 vector<int64_t>& offsets)
+{
+    // seek to first bin
+    Seek(refSummary.FirstBinFilePosition, SEEK_SET);
+
+    // iterate over reference bins
+    uint32_t binId;
+    int32_t numAlignmentChunks;
+    set<uint16_t>::iterator candidateBinIter;
+    for ( int i = 0; i < refSummary.NumBins; ++i ) {
+
+        // read bin contents (if successful, alignment chunks are now in m_buffer)
+        ReadBinIntoBuffer(binId, numAlignmentChunks);
+
+        // see if bin is a 'candidate bin'
+        candidateBinIter = candidateBins.find(binId);
+
+        // if not, move on to next bin
+        if ( candidateBinIter == candidateBins.end() )
+            continue;
+
+        // otherwise, check bin's contents against for overlap
+        else {
+
+            size_t offset = 0;
+            uint64_t chunkStart;
+            uint64_t chunkStop;
+
+            // iterate over alignment chunks
+            for ( int j = 0; j < numAlignmentChunks; ++j ) {
+
+                // read chunk start & stop from buffer
+                memcpy((char*)&chunkStart, m_resources.Buffer+offset, sizeof(uint64_t));
+                offset += sizeof(uint64_t);
+                memcpy((char*)&chunkStop, m_resources.Buffer+offset, sizeof(uint64_t));
+                offset += sizeof(uint64_t);
+
+                // swap endian-ness if necessary
+                if ( m_isBigEndian ) {
+                    SwapEndian_64(chunkStart);
+                    SwapEndian_64(chunkStop);
+                }
+
+                // store alignment chunk's start offset
+                // if its stop offset is larger than our 'minOffset'
+                if ( chunkStop >= minOffset )
+                    offsets.push_back(chunkStart);
+            }
+
+            // 'pop' bin ID from candidate bins set
+            candidateBins.erase(candidateBinIter);
+
+            // quit if no more candidates
+            if ( candidateBins.empty() )
+                break;
+        }
+    }
+}
+
+uint64_t BamStandardIndex::CalculateMinOffset(const BaiReferenceSummary& refSummary,
+                                              const uint32_t& begin)
+{
+    // if no linear offsets exist, return 0
+    if ( refSummary.NumLinearOffsets == 0 )
+        return 0;
+
+    // if 'begin' starts beyond last linear offset, use the last linear offset as minimum
+    // else use the offset corresponding to the requested start position
+    const int shiftedBegin = begin>>BamStandardIndex::BAM_LIDX_SHIFT;
+    if ( shiftedBegin >= refSummary.NumLinearOffsets )
+        return LookupLinearOffset( refSummary, refSummary.NumLinearOffsets-1 );
+    else
+        return LookupLinearOffset( refSummary, shiftedBegin );
+}
+
+void BamStandardIndex::CheckBufferSize(char*& buffer,
+                                       unsigned int& bufferLength,
+                                       const unsigned int& requestedBytes)
+{
+    try {
+        if ( requestedBytes > bufferLength ) {
+            bufferLength = requestedBytes + 10;
+            delete[] buffer;
+            buffer = new char[bufferLength];
+        }
+    } catch ( std::bad_alloc&  ) {
+        stringstream s("");
+        s << "out of memory when allocating " << requestedBytes << " bytes";
+        throw BamException("BamStandardIndex::CheckBufferSize", s.str());
+    }
+}
+
+void BamStandardIndex::CheckBufferSize(unsigned char*& buffer,
+                                       unsigned int& bufferLength,
+                                       const unsigned int& requestedBytes)
+{
+    try {
+        if ( requestedBytes > bufferLength ) {
+            bufferLength = requestedBytes + 10;
+            delete[] buffer;
+            buffer = new unsigned char[bufferLength];
+        }
+    } catch ( std::bad_alloc& ) {
+        stringstream s("");
+        s << "out of memory when allocating " << requestedBytes << " bytes";
+        throw BamException("BamStandardIndex::CheckBufferSize", s.str());
+    }
+}
+
+void BamStandardIndex::CheckMagicNumber(void) {
+
+    // check 'magic number' to see if file is BAI index
+    char magic[4];
+    const int64_t numBytesRead = m_resources.Device->Read(magic, sizeof(magic));
+    if ( numBytesRead != 4 )
+        throw BamException("BamStandardIndex::CheckMagicNumber", "could not read BAI magic number");
+
+    // compare to expected value
+    if ( strncmp(magic, BamStandardIndex::BAI_MAGIC, 4) != 0 )
+        throw BamException("BamStandardIndex::CheckMagicNumber", "invalid BAI magic number");
+}
+
+void BamStandardIndex::ClearReferenceEntry(BaiReferenceEntry& refEntry) {
+    refEntry.ID = -1;
+    refEntry.Bins.clear();
+    refEntry.LinearOffsets.clear();
+}
+
+void BamStandardIndex::CloseFile(void) {
+
+    // close file stream
+    if ( IsDeviceOpen() ) {
+        m_resources.Device->Close();
+        delete m_resources.Device;
+        m_resources.Device = 0;
+    }
+
+    // clear index file summary data
+    m_indexFileSummary.clear();
+
+    // clean up I/O buffer
+    delete[] m_resources.Buffer;
+    m_resources.Buffer = 0;
+    m_bufferLength = 0;
+}
+
+// builds index from associated BAM file & writes out to index file
+bool BamStandardIndex::Create(void) {
+
+    // skip if BamReader is invalid or not open
+    if ( m_reader == 0 || !m_reader->IsOpen() ) {
+        SetErrorString("BamStandardIndex::Create", "could not create index: reader is not open");
+        return false;
+    }
+
+    // rewind BamReader
+    if ( !m_reader->Rewind() ) {
+        const string readerError = m_reader->GetErrorString();
+        const string message = "could not create index: \n\t" + readerError;
+        SetErrorString("BamStandardIndex::Create", message);
+        return false;
+    }
+
+    try {
+
+        // open new index file (read & write)
+        string indexFilename = m_reader->Filename() + Extension();
+        OpenFile(indexFilename, IBamIODevice::ReadWrite);
+
+        // initialize BaiFileSummary with number of references
+        const int& numReferences = m_reader->GetReferenceCount();
+        ReserveForSummary(numReferences);
+
+        // initialize output file
+        WriteHeader();
+
+        // set up bin, ID, offset, & coordinate markers
+        const uint32_t defaultValue = 0xffffffffu;
+        uint32_t currentBin    = defaultValue;
+        uint32_t lastBin       = defaultValue;
+        int32_t  currentRefID  = defaultValue;
+        int32_t  lastRefID     = defaultValue;
+        uint64_t currentOffset = (uint64_t)m_reader->Tell();
+        uint64_t lastOffset    = currentOffset;
+        int32_t  lastPosition  = defaultValue;
+
+        // iterate through alignments in BAM file
+        BamAlignment al;
+        BaiReferenceEntry refEntry;
+        while ( m_reader->LoadNextAlignment(al) ) {
+
+            // changed to new reference
+            if ( lastRefID != al.RefID ) {
+
+                // if not first reference, save previous reference data
+                if ( lastRefID != (int32_t)defaultValue ) {
+
+                    SaveAlignmentChunkToBin(refEntry.Bins, currentBin, currentOffset, lastOffset);
+                    WriteReferenceEntry(refEntry);
+                    ClearReferenceEntry(refEntry);
+
+                    // write any empty references between (but *NOT* including) lastRefID & al.RefID
+                    for ( int i = lastRefID+1; i < al.RefID; ++i ) {
+                        BaiReferenceEntry emptyEntry(i);
+                        WriteReferenceEntry(emptyEntry);
+                    }
+
+                    // update bin markers
+                    currentOffset = lastOffset;
+                    currentBin    = al.Bin;
+                    lastBin       = al.Bin;
+                    currentRefID  = al.RefID;
+                }
+
+                // otherwise, this is first pass
+                // be sure to write any empty references up to (but *NOT* including) current RefID
+                else {
+                    for ( int i = 0; i < al.RefID; ++i ) {
+                        BaiReferenceEntry emptyEntry(i);
+                        WriteReferenceEntry(emptyEntry);
+                    }
+                }
+
+                // update reference markers
+                refEntry.ID = al.RefID;
+                lastRefID   = al.RefID;
+                lastBin     = defaultValue;
+            }
+
+            // if lastPosition greater than current alignment position - file not sorted properly
+            else if ( lastPosition > al.Position ) {
+                stringstream s("");
+                s << "BAM file is not properly sorted by coordinate" << endl
+                  << "Current alignment position: " << al.Position
+                  << " < previous alignment position: " << lastPosition
+                  << " on reference ID: " << al.RefID << endl;
+                SetErrorString("BamStandardIndex::Create", s.str());
+                return false;
+            }
+
+            // if alignment's ref ID is valid & its bin is not a 'leaf'
+            if ( (al.RefID >= 0) && (al.Bin < 4681) )
+                SaveLinearOffsetEntry(refEntry.LinearOffsets, al.Position, al.GetEndPosition(), lastOffset);
+
+            // changed to new BAI bin
+            if ( al.Bin != lastBin ) {
+
+                // if not first bin on reference, save previous bin data
+                if ( currentBin != defaultValue )
+                    SaveAlignmentChunkToBin(refEntry.Bins, currentBin, currentOffset, lastOffset);
+
+                // update markers
+                currentOffset = lastOffset;
+                currentBin    = al.Bin;
+                lastBin       = al.Bin;
+                currentRefID  = al.RefID;
+
+                // if invalid RefID, break out
+                if ( currentRefID < 0 )
+                    break;
+            }
+
+            // make sure that current file pointer is beyond lastOffset
+            if ( m_reader->Tell() <= (int64_t)lastOffset ) {
+                SetErrorString("BamStandardIndex::Create", "calculating offsets failed");
+                return false;
+            }
+
+            // update lastOffset & lastPosition
+            lastOffset   = m_reader->Tell();
+            lastPosition = al.Position;
+        }
+
+        // after finishing alignments, if any data was read, check:
+        if ( currentRefID >= 0 ) {
+
+            // store last alignment chunk to its bin, then write last reference entry with data
+            SaveAlignmentChunkToBin(refEntry.Bins, currentBin, currentOffset, lastOffset);
+            WriteReferenceEntry(refEntry);
+
+            // then write any empty references remaining at end of file
+            for ( int i = currentRefID+1; i < numReferences; ++i ) {
+                BaiReferenceEntry emptyEntry(i);
+                WriteReferenceEntry(emptyEntry);
+            }
+        }
+
+    } catch ( BamException& e) {
+        m_errorString = e.what();
+        return false;
+    }
+
+    // rewind BamReader
+    if ( !m_reader->Rewind() ) {
+        const string readerError = m_reader->GetErrorString();
+        const string message = "could not create index: \n\t" + readerError;
+        SetErrorString("BamStandardIndex::Create", message);
+        return false;
+    }
+
+    // return success
+    return true;
+}
+
+// returns format's file extension
+const string BamStandardIndex::Extension(void) {
+    return BamStandardIndex::BAI_EXTENSION;
+}
+
+void BamStandardIndex::GetOffset(const BamRegion& region, int64_t& offset, bool* hasAlignmentsInRegion) {
+
+    // cannot calculate offsets if unknown/invalid reference ID requested
+    if ( region.LeftRefID < 0 || region.LeftRefID >= (int)m_indexFileSummary.size() )
+        throw BamException("BamStandardIndex::GetOffset", "invalid reference ID requested");
+
+    // retrieve index summary for left bound reference
+    const BaiReferenceSummary& refSummary = m_indexFileSummary.at(region.LeftRefID);
+
+    // set up region boundaries based on actual BamReader data
+    uint32_t begin;
+    uint32_t end;
+    AdjustRegion(region, begin, end);
+
+    // retrieve all candidate bin IDs for region
+    set<uint16_t> candidateBins;
+    CalculateCandidateBins(begin, end, candidateBins);
+
+    // use reference's linear offsets to calculate the minimum offset
+    // that must be considered to find overlap
+    const uint64_t& minOffset = CalculateMinOffset(refSummary, begin);
+
+    // attempt to use reference summary, minOffset, & candidateBins to calculate offsets
+    // no data should not be error, just bail
+    vector<int64_t> offsets;
+    CalculateCandidateOffsets(refSummary, minOffset, candidateBins, offsets);
+    if ( offsets.empty() )
+        return;
+    
+    // ensure that offsets are sorted before processing
+    sort( offsets.begin(), offsets.end() );
+
+    // binary search for an overlapping block (may not be first one though)
+    BamAlignment al;
+    typedef vector<int64_t>::const_iterator OffsetConstIterator;
+    OffsetConstIterator offsetFirst = offsets.begin();
+    OffsetConstIterator offsetIter  = offsetFirst;
+    OffsetConstIterator offsetLast  = offsets.end();
+    iterator_traits<OffsetConstIterator>::difference_type count = distance(offsetFirst, offsetLast);
+    iterator_traits<OffsetConstIterator>::difference_type step;
+    while ( count > 0 ) {
+        offsetIter = offsetFirst;
+        step = count/2;
+        advance(offsetIter, step);
+
+        // attempt seek to candidate offset
+        const int64_t& candidateOffset = (*offsetIter);
+        if ( !m_reader->Seek(candidateOffset) ) {
+            const string readerError = m_reader->GetErrorString();
+            const string message = "could not seek in BAM file: \n\t" + readerError;
+            throw BamException("BamToolsIndex::GetOffset", message);
+        }
+
+        // load first available alignment, setting flag to true if data exists
+        *hasAlignmentsInRegion = m_reader->LoadNextAlignment(al);
+
+        // check alignment against region
+        if ( al.GetEndPosition() <= region.LeftPosition ) {
+            offsetFirst = ++offsetIter;
+            count -= step+1;
+        } else count = step;
+    }
+
+    // step back to the offset before the 'current offset' (to make sure we cover overlaps)
+    if ( offsetIter != offsets.begin() )
+        --offsetIter;
+    offset = (*offsetIter);
+}
+
+// returns whether reference has alignments or no
+bool BamStandardIndex::HasAlignments(const int& referenceID) const {
+    if ( referenceID < 0 || referenceID >= (int)m_indexFileSummary.size() )
+        return false;
+    const BaiReferenceSummary& refSummary = m_indexFileSummary.at(referenceID);
+    return ( refSummary.NumBins > 0 );
+}
+
+bool BamStandardIndex::IsDeviceOpen(void) const {
+    if ( m_resources.Device == 0 )
+        return false;
+    return m_resources.Device->IsOpen();
+}
+
+// attempts to use index data to jump to @region, returns success/fail
+// a "successful" jump indicates no error, but not whether this region has data
+//   * thus, the method sets a flag to indicate whether there are alignments
+//     available after the jump position
+bool BamStandardIndex::Jump(const BamRegion& region, bool* hasAlignmentsInRegion) {
+
+    // clear out flag
+    *hasAlignmentsInRegion = false;
+
+    // skip if invalid reader or not open
+    if ( m_reader == 0 || !m_reader->IsOpen() ) {
+        SetErrorString("BamStandardIndex::Jump", "could not jump: reader is not open");
+        return false;
+    }
+
+    // calculate nearest offset to jump to
+    int64_t offset;
+    try {
+        GetOffset(region, offset, hasAlignmentsInRegion);
+    } catch ( BamException& e ) {
+        m_errorString = e.what();
+        return false;
+    }
+
+    // if region has alignments, return success/fail of seeking there
+    if ( *hasAlignmentsInRegion )
+        return m_reader->Seek(offset);
+
+    // otherwise, simply return true (but hasAlignmentsInRegion flag has been set to false)
+    // (this is OK, BamReader will check this flag before trying to load data)
+    return true;
+}
+
+// loads existing data from file into memory
+bool BamStandardIndex::Load(const std::string& filename) {
+
+    try {
+
+        // attempt to open file (read-only)
+        OpenFile(filename, IBamIODevice::ReadOnly);
+
+        // validate format
+        CheckMagicNumber();
+
+        // load in-memory summary of index data
+        SummarizeIndexFile();
+
+        // return success
+        return true;
+
+    } catch ( BamException& e ) {
+        m_errorString = e.what();
+        return false;
+    }
+}
+
+uint64_t BamStandardIndex::LookupLinearOffset(const BaiReferenceSummary& refSummary, const int& index) {
+
+    // attempt seek to proper index file position
+    const int64_t linearOffsetFilePosition = (int64_t)refSummary.FirstLinearOffsetFilePosition +
+                                             index*BamStandardIndex::SIZEOF_LINEAROFFSET;
+    Seek(linearOffsetFilePosition, SEEK_SET);
+
+    // read linear offset from BAI file
+    uint64_t linearOffset;
+    ReadLinearOffset(linearOffset);
+    return linearOffset;
+}
+
+void BamStandardIndex::MergeAlignmentChunks(BaiAlignmentChunkVector& chunks) {
+
+    // skip if chunks are empty, nothing to merge
+    if ( chunks.empty() )
+        return;
+
+    // set up merged alignment chunk container
+    BaiAlignmentChunkVector mergedChunks;
+    mergedChunks.push_back( chunks[0] );
+
+    // iterate over chunks
+    int i = 0;
+    BaiAlignmentChunkVector::iterator chunkIter = chunks.begin();
+    BaiAlignmentChunkVector::iterator chunkEnd  = chunks.end();
+    for ( ++chunkIter; chunkIter != chunkEnd; ++chunkIter) {
+
+        // get 'currentMergeChunk' based on numeric index
+        BaiAlignmentChunk& currentMergeChunk = mergedChunks[i];
+
+        // get sourceChunk based on source vector iterator
+        BaiAlignmentChunk& sourceChunk = (*chunkIter);
+
+        // if currentMergeChunk ends where sourceChunk starts, then merge the two
+        if ( currentMergeChunk.Stop>>16 == sourceChunk.Start>>16 )
+            currentMergeChunk.Stop = sourceChunk.Stop;
+
+        // otherwise
+        else {
+            // append sourceChunk after currentMergeChunk
+            mergedChunks.push_back(sourceChunk);
+
+            // update i, so the next iteration will consider the
+            // recently-appended sourceChunk as new mergeChunk candidate
+            ++i;
+        }
+    }
+
+    // saved newly-merged chunks into (parameter) chunks
+    chunks = mergedChunks;
+}
+
+void BamStandardIndex::OpenFile(const std::string& filename, IBamIODevice::OpenMode mode) {
+
+    // make sure any previous index file is closed
+    CloseFile();
+
+    m_resources.Device = BamDeviceFactory::CreateDevice(filename);
+    if ( m_resources.Device == 0 ) {
+        const string message = string("could not open file: ") + filename;
+        throw BamException("BamStandardIndex::OpenFile", message);
+    }
+
+    // attempt to open file
+    m_resources.Device->Open(mode);
+    if ( !IsDeviceOpen() ) {
+        const string message = string("could not open file: ") + filename;
+        throw BamException("BamStandardIndex::OpenFile", message);
+    }
+}
+
+void BamStandardIndex::ReadBinID(uint32_t& binId) {
+    const int64_t numBytesRead = m_resources.Device->Read((char*)&binId, sizeof(binId));
+    if ( m_isBigEndian ) SwapEndian_32(binId);
+    if ( numBytesRead != sizeof(binId) )
+        throw BamException("BamStandardIndex::ReadBinID", "could not read BAI bin ID");
+}
+
+void BamStandardIndex::ReadBinIntoBuffer(uint32_t& binId, int32_t& numAlignmentChunks) {
+
+    // read bin header
+    ReadBinID(binId);
+    ReadNumAlignmentChunks(numAlignmentChunks);
+
+    // read bin contents
+    const unsigned int bytesRequested = numAlignmentChunks*BamStandardIndex::SIZEOF_ALIGNMENTCHUNK;
+    ReadIntoBuffer(bytesRequested);
+}
+
+void BamStandardIndex::ReadIntoBuffer(const unsigned int& bytesRequested) {
+
+    // ensure that our buffer is big enough for request
+    BamStandardIndex::CheckBufferSize(m_resources.Buffer, m_bufferLength, bytesRequested);
+
+    // read from BAI file stream
+    const int64_t bytesRead = m_resources.Device->Read(m_resources.Buffer, bytesRequested);
+    if ( bytesRead != (int64_t)bytesRequested ) {
+        stringstream s("");
+        s << "expected to read: " << bytesRequested << " bytes, "
+          << "but instead read: " << bytesRead;
+        throw BamException("BamStandardIndex::ReadIntoBuffer", s.str());
+    }
+}
+
+void BamStandardIndex::ReadLinearOffset(uint64_t& linearOffset) {
+    const int64_t numBytesRead = m_resources.Device->Read((char*)&linearOffset, sizeof(linearOffset));
+    if ( m_isBigEndian ) SwapEndian_64(linearOffset);
+    if ( numBytesRead != sizeof(linearOffset) )
+        throw BamException("BamStandardIndex::ReadLinearOffset", "could not read BAI linear offset");
+}
+
+void BamStandardIndex::ReadNumAlignmentChunks(int& numAlignmentChunks) {
+    const int64_t numBytesRead = m_resources.Device->Read((char*)&numAlignmentChunks, sizeof(numAlignmentChunks));
+    if ( m_isBigEndian ) SwapEndian_32(numAlignmentChunks);
+    if ( numBytesRead != sizeof(numAlignmentChunks) )
+        throw BamException("BamStandardIndex::ReadNumAlignmentChunks", "could not read BAI chunk count");
+}
+
+void BamStandardIndex::ReadNumBins(int& numBins) {
+    const int64_t numBytesRead = m_resources.Device->Read((char*)&numBins, sizeof(numBins));
+    if ( m_isBigEndian ) SwapEndian_32(numBins);
+    if ( numBytesRead != sizeof(numBins) )
+        throw BamException("BamStandardIndex::ReadNumBins", "could not read BAI bin count");
+}
+
+void BamStandardIndex::ReadNumLinearOffsets(int& numLinearOffsets) {
+    const int64_t numBytesRead = m_resources.Device->Read((char*)&numLinearOffsets, sizeof(numLinearOffsets));
+    if ( m_isBigEndian ) SwapEndian_32(numLinearOffsets);
+    if ( numBytesRead != sizeof(numLinearOffsets) )
+        throw BamException("BamStandardIndex::ReadNumAlignmentChunks", "could not read BAI linear offset count");
+}
+
+void BamStandardIndex::ReadNumReferences(int& numReferences) {
+    const int64_t numBytesRead = m_resources.Device->Read((char*)&numReferences, sizeof(numReferences));
+    if ( m_isBigEndian ) SwapEndian_32(numReferences);
+    if ( numBytesRead != sizeof(numReferences) )
+        throw BamException("BamStandardIndex::ReadNumReferences", "could not read reference count");
+}
+
+void BamStandardIndex::ReserveForSummary(const int& numReferences) {
+    m_indexFileSummary.clear();
+    m_indexFileSummary.assign( numReferences, BaiReferenceSummary() );
+}
+
+void BamStandardIndex::SaveAlignmentChunkToBin(BaiBinMap& binMap,
+                                               const uint32_t& currentBin,
+                                               const uint64_t& currentOffset,
+                                               const uint64_t& lastOffset)
+{
+    // create new alignment chunk
+    BaiAlignmentChunk newChunk(currentOffset, lastOffset);
+
+    // if no entry exists yet for this bin, create one and store alignment chunk
+    BaiBinMap::iterator binIter = binMap.find(currentBin);
+    if ( binIter == binMap.end() ) {
+        BaiAlignmentChunkVector newChunks;
+        newChunks.push_back(newChunk);
+        binMap.insert( pair<uint32_t, BaiAlignmentChunkVector>(currentBin, newChunks));
+    }
+
+    // otherwise, just append alignment chunk
+    else {
+        BaiAlignmentChunkVector& binChunks = (*binIter).second;
+        binChunks.push_back( newChunk );
+    }
+}
+
+void BamStandardIndex::SaveBinsSummary(const int& refId, const int& numBins) {
+    BaiReferenceSummary& refSummary = m_indexFileSummary.at(refId);
+    refSummary.NumBins = numBins;
+    refSummary.FirstBinFilePosition = Tell();
+}
+
+void BamStandardIndex::SaveLinearOffsetEntry(BaiLinearOffsetVector& offsets,
+                                             const int& alignmentStartPosition,
+                                             const int& alignmentStopPosition,
+                                             const uint64_t& lastOffset)
+{
+    // get converted offsets
+    const int beginOffset = alignmentStartPosition >> BamStandardIndex::BAM_LIDX_SHIFT;
+    const int endOffset   = (alignmentStopPosition - 1) >> BamStandardIndex::BAM_LIDX_SHIFT;
+
+    // resize vector if necessary
+    int oldSize = offsets.size();
+    int newSize = endOffset + 1;
+    if ( oldSize < newSize )
+        offsets.resize(newSize, 0);
+
+    // store offset
+    for( int i = beginOffset + 1; i <= endOffset; ++i ) {
+        if ( offsets[i] == 0 )
+            offsets[i] = lastOffset;
+    }
+}
+
+void BamStandardIndex::SaveLinearOffsetsSummary(const int& refId, const int& numLinearOffsets) {
+    BaiReferenceSummary& refSummary = m_indexFileSummary.at(refId);
+    refSummary.NumLinearOffsets = numLinearOffsets;
+    refSummary.FirstLinearOffsetFilePosition = Tell();
+}
+
+// seek to position in index file stream
+void BamStandardIndex::Seek(const int64_t& position, const int origin) {
+    if ( !m_resources.Device->Seek(position, origin) )
+        throw BamException("BamStandardIndex::Seek", "could not seek in BAI file");
+}
+
+void BamStandardIndex::SkipBins(const int& numBins) {
+    uint32_t binId;
+    int32_t numAlignmentChunks;
+    for (int i = 0; i < numBins; ++i)
+        ReadBinIntoBuffer(binId, numAlignmentChunks); // results & buffer ignored
+}
+
+void BamStandardIndex::SkipLinearOffsets(const int& numLinearOffsets) {
+    const unsigned int bytesRequested = numLinearOffsets*BamStandardIndex::SIZEOF_LINEAROFFSET;
+    ReadIntoBuffer(bytesRequested);
+}
+
+void BamStandardIndex::SortLinearOffsets(BaiLinearOffsetVector& linearOffsets) {
+    sort( linearOffsets.begin(), linearOffsets.end() );
+}
+
+void BamStandardIndex::SummarizeBins(BaiReferenceSummary& refSummary) {
+
+    // load number of bins
+    int numBins;
+    ReadNumBins(numBins);
+
+    // store bins summary for this reference
+    refSummary.NumBins = numBins;
+    refSummary.FirstBinFilePosition = Tell();
+
+    // skip this reference's bins
+    SkipBins(numBins);
+}
+
+void BamStandardIndex::SummarizeIndexFile(void) {
+
+    // load number of reference sequences
+    int numReferences;
+    ReadNumReferences(numReferences);
+
+    // initialize file summary data
+    ReserveForSummary(numReferences);
+
+    // iterate over reference entries
+    BaiFileSummary::iterator summaryIter = m_indexFileSummary.begin();
+    BaiFileSummary::iterator summaryEnd  = m_indexFileSummary.end();
+    for ( int i = 0; summaryIter != summaryEnd; ++summaryIter, ++i )
+        SummarizeReference(*summaryIter);
+}
+
+void BamStandardIndex::SummarizeLinearOffsets(BaiReferenceSummary& refSummary) {
+
+    // load number of linear offsets
+    int numLinearOffsets;
+    ReadNumLinearOffsets(numLinearOffsets);
+
+    // store bin summary data for this reference
+    refSummary.NumLinearOffsets = numLinearOffsets;
+    refSummary.FirstLinearOffsetFilePosition = Tell();
+
+    // skip linear offsets in index file
+    SkipLinearOffsets(numLinearOffsets);
+}
+
+void BamStandardIndex::SummarizeReference(BaiReferenceSummary& refSummary) {
+    SummarizeBins(refSummary);
+    SummarizeLinearOffsets(refSummary);
+}
+
+// return position of file pointer in index file stream
+int64_t BamStandardIndex::Tell(void) const {
+    return m_resources.Device->Tell();
+}
+
+void BamStandardIndex::WriteAlignmentChunk(const BaiAlignmentChunk& chunk) {
+
+    // localize alignment chunk offsets
+    uint64_t start = chunk.Start;
+    uint64_t stop  = chunk.Stop;
+
+    // swap endian-ness if necessary
+    if ( m_isBigEndian ) {
+        SwapEndian_64(start);
+        SwapEndian_64(stop);
+    }
+
+    // write to index file
+    int64_t numBytesWritten = 0;
+    numBytesWritten += m_resources.Device->Write((const char*)&start, sizeof(start));
+    numBytesWritten += m_resources.Device->Write((const char*)&stop, sizeof(stop));
+    if ( numBytesWritten != (sizeof(start)+sizeof(stop)) )
+        throw BamException("BamStandardIndex::WriteAlignmentChunk", "could not write BAI alignment chunk");
+}
+
+void BamStandardIndex::WriteAlignmentChunks(BaiAlignmentChunkVector& chunks) {
+
+    // make sure chunks are merged (simplified) before writing & saving summary
+    MergeAlignmentChunks(chunks);
+
+    // write chunks
+    int32_t chunkCount = chunks.size();
+    if ( m_isBigEndian ) SwapEndian_32(chunkCount);
+    const int64_t numBytesWritten = m_resources.Device->Write((const char*)&chunkCount, sizeof(chunkCount));
+    if ( numBytesWritten != sizeof(chunkCount) )
+        throw BamException("BamStandardIndex::WriteAlignmentChunks", "could not write BAI chunk count");
+
+    // iterate over chunks
+    BaiAlignmentChunkVector::const_iterator chunkIter = chunks.begin();
+    BaiAlignmentChunkVector::const_iterator chunkEnd  = chunks.end();
+    for ( ; chunkIter != chunkEnd; ++chunkIter )
+        WriteAlignmentChunk( (*chunkIter) );
+}
+
+void BamStandardIndex::WriteBin(const uint32_t& binId, BaiAlignmentChunkVector& chunks) {
+
+    // write BAM bin ID
+    uint32_t binKey = binId;
+    if ( m_isBigEndian ) SwapEndian_32(binKey);
+    const int64_t numBytesWritten = m_resources.Device->Write((const char*)&binKey, sizeof(binKey));
+    if ( numBytesWritten != sizeof(binKey) )
+        throw BamException("BamStandardIndex::WriteBin", "could not write bin ID");
+
+    // write bin's alignment chunks
+    WriteAlignmentChunks(chunks);
+}
+
+void BamStandardIndex::WriteBins(const int& refId, BaiBinMap& bins) {
+
+    // write number of bins
+    int32_t binCount = bins.size();
+    if ( m_isBigEndian ) SwapEndian_32(binCount);
+    const int64_t numBytesWritten = m_resources.Device->Write((const char*)&binCount, sizeof(binCount));
+    if ( numBytesWritten != sizeof(binCount) )
+        throw BamException("BamStandardIndex::WriteBins", "could not write bin count");
+
+    // save summary for reference's bins
+    SaveBinsSummary(refId, bins.size());
+
+    // iterate over bins
+    BaiBinMap::iterator binIter = bins.begin();
+    BaiBinMap::iterator binEnd  = bins.end();
+    for ( ; binIter != binEnd; ++binIter )
+        WriteBin( (*binIter).first, (*binIter).second );
+}
+
+void BamStandardIndex::WriteHeader(void) {
+
+    int64_t numBytesWritten = 0;
+
+    // write magic number
+    numBytesWritten += m_resources.Device->Write(BamStandardIndex::BAI_MAGIC, 4);
+
+    // write number of reference sequences
+    int32_t numReferences = m_indexFileSummary.size();
+    if ( m_isBigEndian ) SwapEndian_32(numReferences);
+    numBytesWritten += m_resources.Device->Write((const char*)&numReferences, sizeof(numReferences));
+
+    if ( numBytesWritten != sizeof(numReferences)+4 )
+        throw BamException("BamStandardIndex::WriteHeader", "could not write BAI header");
+}
+
+void BamStandardIndex::WriteLinearOffsets(const int& refId, BaiLinearOffsetVector& linearOffsets) {
+
+    // make sure linear offsets are sorted before writing & saving summary
+    SortLinearOffsets(linearOffsets);
+
+    int64_t numBytesWritten = 0;
+
+    // write number of linear offsets
+    int32_t offsetCount = linearOffsets.size();
+    if ( m_isBigEndian ) SwapEndian_32(offsetCount);
+    numBytesWritten += m_resources.Device->Write((const char*)&offsetCount, sizeof(offsetCount));
+
+    // save summary for reference's linear offsets
+    SaveLinearOffsetsSummary(refId, linearOffsets.size());
+
+    // iterate over linear offsets
+    BaiLinearOffsetVector::const_iterator offsetIter = linearOffsets.begin();
+    BaiLinearOffsetVector::const_iterator offsetEnd  = linearOffsets.end();
+    for ( ; offsetIter != offsetEnd; ++offsetIter ) {
+
+        // write linear offset
+        uint64_t linearOffset = (*offsetIter);
+        if ( m_isBigEndian ) SwapEndian_64(linearOffset);
+        numBytesWritten += m_resources.Device->Write((const char*)&linearOffset, sizeof(linearOffset));
+    }
+
+    if ( numBytesWritten != (sizeof(offsetCount) + linearOffsets.size()*sizeof(uint64_t)) )
+        throw BamException("BamStandardIndex::WriteLinearOffsets", "could not write BAI linear offsets");
+}
+
+void BamStandardIndex::WriteReferenceEntry(BaiReferenceEntry& refEntry) {
+    WriteBins(refEntry.ID, refEntry.Bins);
+    WriteLinearOffsets(refEntry.ID, refEntry.LinearOffsets);
+}
diff --git a/src/api/internal/index/BamStandardIndex_p.h b/src/api/internal/index/BamStandardIndex_p.h

new file mode 100644 (file)

index 0000000..273d56e
--- /dev/null
+++ b/src/api/internal/index/BamStandardIndex_p.h
@@ -0,0 +1,237 @@
+// ***************************************************************************
+// BamStandardIndex.h (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 November 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides index operations for the standardized BAM index format (".bai")
+// ***************************************************************************
+
+#ifndef BAM_STANDARD_INDEX_FORMAT_H
+#define BAM_STANDARD_INDEX_FORMAT_H
+
+//  -------------
+//  W A R N I N G
+//  -------------
+//
+// This file is not part of the BamTools API.  It exists purely as an
+// implementation detail.  This header file may change from version to
+// version without notice, or even be removed.
+//
+// We mean it.
+
+#include "api/BamAux.h"
+#include "api/BamIndex.h"
+#include "api/IBamIODevice.h"
+#include <map>
+#include <set>
+#include <string>
+#include <vector>
+
+namespace BamTools {
+namespace Internal {
+
+// -----------------------------------------------------------------------------
+// BamStandardIndex data structures
+
+// defines start and end of a contiguous run of alignments
+struct BaiAlignmentChunk {
+
+    // data members
+    uint64_t Start;
+    uint64_t Stop;
+
+    // constructor
+    BaiAlignmentChunk(const uint64_t& start = 0,
+                      const uint64_t& stop = 0)
+        : Start(start)
+        , Stop(stop)
+    { }
+};
+
+// comparison operator (for sorting)
+inline
+bool operator<(const BaiAlignmentChunk& lhs, const BaiAlignmentChunk& rhs) {
+    return lhs.Start < rhs.Start;
+}
+
+// convenience typedef for a list of all alignment 'chunks' in a BAI bin
+typedef std::vector<BaiAlignmentChunk> BaiAlignmentChunkVector;
+
+// convenience typedef for a map of all BAI bins in a reference (ID => chunks)
+typedef std::map<uint32_t, BaiAlignmentChunkVector> BaiBinMap;
+
+// convenience typedef for a list of all 'linear offsets' in a reference
+typedef std::vector<uint64_t> BaiLinearOffsetVector;
+
+// contains all fields necessary for building, loading, & writing
+// full BAI index data for a single reference
+struct BaiReferenceEntry {
+
+    // data members
+    int32_t ID;
+    BaiBinMap Bins;
+    BaiLinearOffsetVector LinearOffsets;
+
+    // ctor
+    BaiReferenceEntry(const int32_t& id = -1)
+        : ID(id)
+    { }
+};
+
+// provides (persistent) summary of BaiReferenceEntry's index data
+struct BaiReferenceSummary {
+
+    // data members
+    int NumBins;
+    int NumLinearOffsets;
+    uint64_t FirstBinFilePosition;
+    uint64_t FirstLinearOffsetFilePosition;
+
+    // ctor
+    BaiReferenceSummary(void)
+        : NumBins(0)
+        , NumLinearOffsets(0)
+        , FirstBinFilePosition(0)
+        , FirstLinearOffsetFilePosition(0)
+    { }
+};
+
+// convenience typedef for describing a full BAI index file summary
+typedef std::vector<BaiReferenceSummary> BaiFileSummary;
+
+// end BamStandardIndex data structures
+// -----------------------------------------------------------------------------
+
+class BamStandardIndex : public BamIndex {
+
+    // ctor & dtor
+    public:
+        BamStandardIndex(Internal::BamReaderPrivate* reader);
+        ~BamStandardIndex(void);
+
+    // BamIndex implementation
+    public:
+        // builds index from associated BAM file & writes out to index file
+        bool Create(void);
+        // returns whether reference has alignments or no
+        bool HasAlignments(const int& referenceID) const;
+        // attempts to use index data to jump to @region, returns success/fail
+        // a "successful" jump indicates no error, but not whether this region has data
+        //   * thus, the method sets a flag to indicate whether there are alignments
+        //     available after the jump position
+        bool Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion);
+        // loads existing data from file into memory
+        bool Load(const std::string& filename);
+        BamIndex::IndexType Type(void) const { return BamIndex::STANDARD; }
+    public:
+        // returns format's file extension
+        static const std::string Extension(void);
+
+    // internal methods
+    private:
+
+        // index file ops
+        void CheckMagicNumber(void);
+        void CloseFile(void);
+        bool IsDeviceOpen(void) const;
+        void OpenFile(const std::string& filename, IBamIODevice::OpenMode mode);
+        void Seek(const int64_t& position, const int origin);
+        int64_t Tell(void) const;
+
+        // BAI index building methods
+        void ClearReferenceEntry(BaiReferenceEntry& refEntry);
+        void SaveAlignmentChunkToBin(BaiBinMap& binMap,
+                                     const uint32_t& currentBin,
+                                     const uint64_t& currentOffset,
+                                     const uint64_t& lastOffset);
+        void SaveLinearOffsetEntry(BaiLinearOffsetVector& offsets,
+                                   const int& alignmentStartPosition,
+                                   const int& alignmentStopPosition,
+                                   const uint64_t& lastOffset);
+
+        // random-access methods
+        void AdjustRegion(const BamRegion& region, uint32_t& begin, uint32_t& end);
+        void CalculateCandidateBins(const uint32_t& begin,
+                                    const uint32_t& end,
+                                    std::set<uint16_t>& candidateBins);
+        void CalculateCandidateOffsets(const BaiReferenceSummary& refSummary,
+                                       const uint64_t& minOffset,
+                                       std::set<uint16_t>& candidateBins,
+                                       std::vector<int64_t>& offsets);
+        uint64_t CalculateMinOffset(const BaiReferenceSummary& refSummary, const uint32_t& begin);
+        void GetOffset(const BamRegion& region, int64_t& offset, bool* hasAlignmentsInRegion);
+        uint64_t LookupLinearOffset(const BaiReferenceSummary& refSummary, const int& index);
+
+        // BAI summary (create/load) methods
+        void ReserveForSummary(const int& numReferences);
+        void SaveBinsSummary(const int& refId, const int& numBins);
+        void SaveLinearOffsetsSummary(const int& refId, const int& numLinearOffsets);
+        void SkipBins(const int& numBins);
+        void SkipLinearOffsets(const int& numLinearOffsets);
+        void SummarizeBins(BaiReferenceSummary& refSummary);
+        void SummarizeIndexFile(void);
+        void SummarizeLinearOffsets(BaiReferenceSummary& refSummary);
+        void SummarizeReference(BaiReferenceSummary& refSummary);
+
+        // BAI full index input methods
+        void ReadBinID(uint32_t& binId);
+        void ReadBinIntoBuffer(uint32_t& binId, int32_t& numAlignmentChunks);
+        void ReadIntoBuffer(const unsigned int& bytesRequested);
+        void ReadLinearOffset(uint64_t& linearOffset);
+        void ReadNumAlignmentChunks(int& numAlignmentChunks);
+        void ReadNumBins(int& numBins);
+        void ReadNumLinearOffsets(int& numLinearOffsets);
+        void ReadNumReferences(int& numReferences);
+
+        // BAI full index output methods
+        void MergeAlignmentChunks(BaiAlignmentChunkVector& chunks);
+        void SortLinearOffsets(BaiLinearOffsetVector& linearOffsets);
+        void WriteAlignmentChunk(const BaiAlignmentChunk& chunk);
+        void WriteAlignmentChunks(BaiAlignmentChunkVector& chunks);
+        void WriteBin(const uint32_t& binId, BaiAlignmentChunkVector& chunks);
+        void WriteBins(const int& refId, BaiBinMap& bins);
+        void WriteHeader(void);
+        void WriteLinearOffsets(const int& refId, BaiLinearOffsetVector& linearOffsets);
+        void WriteReferenceEntry(BaiReferenceEntry& refEntry);
+
+    // data members
+    private:
+        bool m_isBigEndian;
+        BaiFileSummary m_indexFileSummary;
+
+        // our input buffer
+        unsigned int m_bufferLength;
+        struct RaiiWrapper {
+            IBamIODevice* Device;
+            char* Buffer;
+            RaiiWrapper(void);
+            ~RaiiWrapper(void);
+        };
+        RaiiWrapper m_resources;
+
+    // static methods
+    private:
+        // checks if the buffer is large enough to accomodate the requested size
+        static void CheckBufferSize(char*& buffer,
+                                    unsigned int& bufferLength,
+                                    const unsigned int& requestedBytes);
+        // checks if the buffer is large enough to accomodate the requested size
+        static void CheckBufferSize(unsigned char*& buffer,
+                                    unsigned int& bufferLength,
+                                    const unsigned int& requestedBytes);
+    // static constants
+    private:
+        static const int MAX_BIN;
+        static const int BAM_LIDX_SHIFT;
+        static const std::string BAI_EXTENSION;
+        static const char* const BAI_MAGIC;
+        static const int SIZEOF_ALIGNMENTCHUNK;
+        static const int SIZEOF_BINCORE;
+        static const int SIZEOF_LINEAROFFSET;
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // BAM_STANDARD_INDEX_FORMAT_H
diff --git a/src/api/internal/index/BamToolsIndex_p.cpp b/src/api/internal/index/BamToolsIndex_p.cpp

new file mode 100644 (file)

index 0000000..bb09bc9
--- /dev/null
+++ b/src/api/internal/index/BamToolsIndex_p.cpp
@@ -0,0 +1,642 @@
+// ***************************************************************************
+// BamToolsIndex.cpp (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 November 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides index operations for the BamTools index format (".bti")
+// ***************************************************************************
+
+#include "api/BamAlignment.h"
+#include "api/internal/bam/BamReader_p.h"
+#include "api/internal/index/BamToolsIndex_p.h"
+#include "api/internal/io/BamDeviceFactory_p.h"
+#include "api/internal/io/BgzfStream_p.h"
+#include "api/internal/utils/BamException_p.h"
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <algorithm>
+#include <iostream>
+#include <iterator>
+#include <map>
+using namespace std;
+
+// --------------------------------
+// static BamToolsIndex constants
+// --------------------------------
+
+const uint32_t BamToolsIndex::DEFAULT_BLOCK_LENGTH = 1000;
+const string BamToolsIndex::BTI_EXTENSION     = ".bti";
+const char* const BamToolsIndex::BTI_MAGIC    = "BTI\1";
+const int BamToolsIndex::SIZEOF_BLOCK         = sizeof(int32_t)*2 + sizeof(int64_t);
+
+// ----------------------------
+// RaiiWrapper implementation
+// ----------------------------
+
+BamToolsIndex::RaiiWrapper::RaiiWrapper(void)
+    : Device(0)
+{ }
+
+BamToolsIndex::RaiiWrapper::~RaiiWrapper(void) {
+    if ( Device ) {
+        Device->Close();
+        delete Device;
+        Device = 0;
+    }
+}
+
+// ------------------------------
+// BamToolsIndex implementation
+// ------------------------------
+
+// ctor
+BamToolsIndex::BamToolsIndex(Internal::BamReaderPrivate* reader)
+    : BamIndex(reader)
+    , m_blockSize(BamToolsIndex::DEFAULT_BLOCK_LENGTH)
+    , m_inputVersion(0)
+    , m_outputVersion(BTI_2_0) // latest version - used for writing new index files
+{
+    m_isBigEndian = BamTools::SystemIsBigEndian();
+}
+
+// dtor
+BamToolsIndex::~BamToolsIndex(void) {
+    CloseFile();
+}
+
+void BamToolsIndex::CheckMagicNumber(void) {
+
+    // read magic number
+    char magic[4];
+    const int64_t numBytesRead = m_resources.Device->Read(magic, 4);
+    if ( numBytesRead != 4 )
+        throw BamException("BamToolsIndex::CheckMagicNumber", "could not read BTI magic number");
+
+    // validate expected magic number
+    if ( strncmp(magic, BamToolsIndex::BTI_MAGIC, 4) != 0 )
+        throw BamException("BamToolsIndex::CheckMagicNumber", "invalid BTI magic number");
+}
+
+// check index file version, return true if OK
+void BamToolsIndex::CheckVersion(void) {
+
+    // read version from file
+    const int64_t numBytesRead = m_resources.Device->Read((char*)&m_inputVersion, sizeof(m_inputVersion));
+    if ( numBytesRead != sizeof(m_inputVersion) )
+        throw BamException("BamToolsIndex::CheckVersion", "could not read format version");
+    if ( m_isBigEndian ) SwapEndian_32(m_inputVersion);
+
+    // if version is negative, or zero
+    if ( m_inputVersion <= 0 )
+        throw BamException("BamToolsIndex::CheckVersion", "invalid format version");
+
+    // if version is newer than can be supported by this version of bamtools
+    else if ( m_inputVersion > m_outputVersion ) {
+        const string message = "unsupported format: this index was created by a newer version of BamTools. "
+                               "Update your local version of BamTools to use the index file.";
+        throw BamException("BamToolsIndex::CheckVersion", message);
+    }
+
+    // ------------------------------------------------------------------
+    // check for deprecated, unsupported versions
+    // (the format had to be modified to accomodate a particular bug fix)
+
+    // Version 2.0: introduced support for half-open intervals, instead of the old closed intervals
+    //   respondBy: throwing exception - we're not going to try to handle the old BTI files.
+    else if ( (Version)m_inputVersion < BamToolsIndex::BTI_2_0 ) {
+        const string message = "unsupported format: this version of the index may not properly handle "
+                               "coordinate intervals. Please run 'bamtools index -bti -in yourData.bam' "
+                               "to generate an up-to-date, fixed BTI file.";
+        throw BamException("BamToolsIndex::CheckVersion", message);
+    }
+}
+
+void BamToolsIndex::ClearReferenceEntry(BtiReferenceEntry& refEntry) {
+    refEntry.ID = -1;
+    refEntry.Blocks.clear();
+}
+
+void BamToolsIndex::CloseFile(void) {
+    if ( IsDeviceOpen() ) {
+        m_resources.Device->Close();
+        delete m_resources.Device;
+        m_resources.Device = 0;
+    }
+    m_indexFileSummary.clear();
+}
+
+// builds index from associated BAM file & writes out to index file
+bool BamToolsIndex::Create(void) {
+
+    // skip if BamReader is invalid or not open
+    if ( m_reader == 0 || !m_reader->IsOpen() ) {
+        SetErrorString("BamToolsIndex::Create", "could not create index: reader is not open");
+        return false;
+    }
+
+    // rewind BamReader
+    if ( !m_reader->Rewind() ) {
+        const string readerError = m_reader->GetErrorString();
+        const string message = "could not create index: \n\t" + readerError;
+        SetErrorString("BamToolsIndex::Create", message);
+        return false;
+    }
+
+    try {
+        // open new index file (read & write)
+        const string indexFilename = m_reader->Filename() + Extension();
+        OpenFile(indexFilename, IBamIODevice::ReadWrite);
+
+        // initialize BtiFileSummary with number of references
+        const int& numReferences = m_reader->GetReferenceCount();
+        InitializeFileSummary(numReferences);
+
+        // intialize output file header
+        WriteHeader();
+
+        // index building markers
+        uint32_t currentBlockCount      = 0;
+        int64_t currentAlignmentOffset  = m_reader->Tell();
+        int32_t blockRefId              = -1;
+        int32_t blockMaxEndPosition     = -1;
+        int64_t blockStartOffset        = currentAlignmentOffset;
+        int32_t blockStartPosition      = -1;
+
+        // plow through alignments, storing index entries
+        BamAlignment al;
+        BtiReferenceEntry refEntry;
+        while ( m_reader->LoadNextAlignment(al) ) {
+
+            // if moved to new reference
+            if ( al.RefID != blockRefId ) {
+
+                // if first pass, check:
+                if ( currentBlockCount == 0 ) {
+
+                    // write any empty references up to (but not including) al.RefID
+                    for ( int i = 0; i < al.RefID; ++i )
+                        WriteReferenceEntry( BtiReferenceEntry(i) );
+                }
+
+                // not first pass:
+                else {
+
+                    // store previous BTI block data in reference entry
+                    const BtiBlock block(blockMaxEndPosition, blockStartOffset, blockStartPosition);
+                    refEntry.Blocks.push_back(block);
+
+                    // write reference entry, then clear
+                    WriteReferenceEntry(refEntry);
+                    ClearReferenceEntry(refEntry);
+
+                    // write any empty references between (but not including)
+                    // the last blockRefID and current al.RefID
+                    for ( int i = blockRefId+1; i < al.RefID; ++i )
+                        WriteReferenceEntry( BtiReferenceEntry(i) );
+
+                    // reset block count
+                    currentBlockCount = 0;
+                }
+
+                // set ID for new reference entry
+                refEntry.ID = al.RefID;
+            }
+
+            // if beginning of block, update counters
+            if ( currentBlockCount == 0 ) {
+                blockRefId          = al.RefID;
+                blockStartOffset    = currentAlignmentOffset;
+                blockStartPosition  = al.Position;
+                blockMaxEndPosition = al.GetEndPosition();
+            }
+
+            // increment block counter
+            ++currentBlockCount;
+
+            // check end position
+            const int32_t alignmentEndPosition = al.GetEndPosition();
+            if ( alignmentEndPosition > blockMaxEndPosition )
+                blockMaxEndPosition = alignmentEndPosition;
+
+            // if block is full, get offset for next block, reset currentBlockCount
+            if ( currentBlockCount == m_blockSize ) {
+
+                // store previous block data in reference entry
+                const BtiBlock block(blockMaxEndPosition, blockStartOffset, blockStartPosition);
+                refEntry.Blocks.push_back(block);
+
+                // update markers
+                blockStartOffset  = m_reader->Tell();
+                currentBlockCount = 0;
+            }
+
+            // not the best name, but for the next iteration, this value will be the offset of the
+            // *current* alignment. this is necessary because we won't know if this next alignment
+            // is on a new reference until we actually read it
+            currentAlignmentOffset = m_reader->Tell();
+        }
+
+        // after finishing alignments, if any data was read, check:
+        if ( blockRefId >= 0 ) {
+
+            // store last BTI block data in reference entry
+            const BtiBlock block(blockMaxEndPosition, blockStartOffset, blockStartPosition);
+            refEntry.Blocks.push_back(block);
+
+            // write last reference entry, then clear
+            WriteReferenceEntry(refEntry);
+            ClearReferenceEntry(refEntry);
+
+            // then write any empty references remaining at end of file
+            for ( int i = blockRefId+1; i < numReferences; ++i )
+                WriteReferenceEntry( BtiReferenceEntry(i) );
+        }
+
+    } catch ( BamException& e ) {
+        m_errorString = e.what();
+        return false;
+    }
+
+    // rewind BamReader
+    if ( !m_reader->Rewind() ) {
+        const string readerError = m_reader->GetErrorString();
+        const string message = "could not create index: \n\t" + readerError;
+        SetErrorString("BamToolsIndex::Create", message);
+        return false;
+    }
+
+    // return success
+    return true;
+}
+
+// returns format's file extension
+const std::string BamToolsIndex::Extension(void) {
+    return BamToolsIndex::BTI_EXTENSION;
+}
+
+void BamToolsIndex::GetOffset(const BamRegion& region, int64_t& offset, bool* hasAlignmentsInRegion) {
+
+    // return false ref ID is not a valid index in file summary data
+    if ( region.LeftRefID < 0 || region.LeftRefID >= (int)m_indexFileSummary.size() )
+        throw BamException("BamToolsIndex::GetOffset", "invalid region requested");
+
+    // retrieve reference index data for left bound reference
+    BtiReferenceEntry refEntry(region.LeftRefID);
+    ReadReferenceEntry(refEntry);
+
+    // binary search for an overlapping block (may not be first one though)
+    bool found = false;
+    typedef BtiBlockVector::const_iterator BtiBlockConstIterator;
+    BtiBlockConstIterator blockFirst = refEntry.Blocks.begin();
+    BtiBlockConstIterator blockIter  = blockFirst;
+    BtiBlockConstIterator blockLast  = refEntry.Blocks.end();
+    iterator_traits<BtiBlockConstIterator>::difference_type count = distance(blockFirst, blockLast);
+    iterator_traits<BtiBlockConstIterator>::difference_type step;
+    while ( count > 0 ) {
+        blockIter = blockFirst;
+        step = count/2;
+        advance(blockIter, step);
+
+        const BtiBlock& block = (*blockIter);
+        if ( block.StartPosition <= region.RightPosition ) {
+            if ( block.MaxEndPosition > region.LeftPosition ) {
+                offset = block.StartOffset;
+                break;
+            }
+            blockFirst = ++blockIter;
+            count -= step+1;
+        }
+        else count = step;
+    }
+
+    // if we didn't search "off the end" of the blocks
+    if ( blockIter != blockLast ) {
+
+        // "walk back" until we've gone too far
+        while ( blockIter != blockFirst ) {
+            const BtiBlock& currentBlock = (*blockIter);
+
+            --blockIter;
+            const BtiBlock& previousBlock = (*blockIter);
+            if ( previousBlock.MaxEndPosition <= region.LeftPosition ) {
+                offset = currentBlock.StartOffset;
+                found = true;
+                break;
+            }
+        }
+
+        // if we walked all the way to first block, just return that and let the reader's
+        // region overlap parsing do the rest
+        if ( blockIter == blockFirst ) {
+            const BtiBlock& block = (*blockIter);
+            offset = block.StartOffset;
+            found = true;
+        }
+    }
+
+
+    // sets to false if blocks container is empty, or if no matching block could be found
+    *hasAlignmentsInRegion = found;
+}
+
+// returns whether reference has alignments or no
+bool BamToolsIndex::HasAlignments(const int& referenceID) const {
+    if ( referenceID < 0 || referenceID >= (int)m_indexFileSummary.size() )
+        return false;
+    const BtiReferenceSummary& refSummary = m_indexFileSummary.at(referenceID);
+    return ( refSummary.NumBlocks > 0 );
+}
+
+// pre-allocates space for each reference's summary data
+void BamToolsIndex::InitializeFileSummary(const int& numReferences) {
+    m_indexFileSummary.clear();
+    for ( int i = 0; i < numReferences; ++i )
+        m_indexFileSummary.push_back( BtiReferenceSummary() );
+}
+
+// returns true if the index stream is open
+bool BamToolsIndex::IsDeviceOpen(void) const {
+    if ( m_resources.Device == 0 )
+        return false;
+    return m_resources.Device->IsOpen();
+}
+
+// attempts to use index data to jump to @region, returns success/fail
+// a "successful" jump indicates no error, but not whether this region has data
+//   * thus, the method sets a flag to indicate whether there are alignments
+//     available after the jump position
+bool BamToolsIndex::Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion) {
+
+    // clear flag
+    *hasAlignmentsInRegion = false;
+
+    // skip if invalid reader or not open
+    if ( m_reader == 0 || !m_reader->IsOpen() ) {
+        SetErrorString("BamToolsIndex::Jump", "could not jump: reader is not open");
+        return false;
+    }
+
+    // make sure left-bound position is valid
+    const RefVector& references = m_reader->GetReferenceData();
+    if ( region.LeftPosition > references.at(region.LeftRefID).RefLength ) {
+        SetErrorString("BamToolsIndex::Jump", "could not create index: invalid region requested");
+        return false;
+    }
+
+    // calculate nearest offset to jump to
+    int64_t offset;
+    try {
+        GetOffset(region, offset, hasAlignmentsInRegion);
+    } catch ( BamException& e ) {
+        m_errorString = e.what();
+        return false;
+    }
+
+    // return success/failure of seek
+    return m_reader->Seek(offset);
+}
+
+// loads existing data from file into memory
+bool BamToolsIndex::Load(const std::string& filename) {
+
+    try {
+
+        // attempt to open file (read-only)
+        OpenFile(filename, IBamIODevice::ReadOnly);
+
+        // load metadata & generate in-memory summary
+        LoadHeader();
+        LoadFileSummary();
+
+        // return success
+        return true;
+
+    } catch ( BamException& e ) {
+        m_errorString = e.what();
+        return false;
+    }
+}
+
+void BamToolsIndex::LoadFileSummary(void) {
+
+    // load number of reference sequences
+    int numReferences;
+    LoadNumReferences(numReferences);
+
+    // initialize file summary data
+    InitializeFileSummary(numReferences);
+
+    // load summary for each reference
+    BtiFileSummary::iterator summaryIter = m_indexFileSummary.begin();
+    BtiFileSummary::iterator summaryEnd  = m_indexFileSummary.end();
+    for ( ; summaryIter != summaryEnd; ++summaryIter )
+        LoadReferenceSummary(*summaryIter);
+}
+
+void BamToolsIndex::LoadHeader(void) {
+
+    // check BTI file metadata
+    CheckMagicNumber();
+    CheckVersion();
+
+    // use file's BTI block size to set member variable
+    const int64_t numBytesRead = m_resources.Device->Read((char*)&m_blockSize, sizeof(m_blockSize));
+    if ( m_isBigEndian ) SwapEndian_32(m_blockSize);
+    if ( numBytesRead != sizeof(m_blockSize) )
+        throw BamException("BamToolsIndex::LoadHeader", "could not read BTI block size");
+}
+
+void BamToolsIndex::LoadNumBlocks(int& numBlocks) {
+    const int64_t numBytesRead = m_resources.Device->Read((char*)&numBlocks, sizeof(numBlocks));
+    if ( m_isBigEndian ) SwapEndian_32(numBlocks);
+    if ( numBytesRead != sizeof(numBlocks) )
+        throw BamException("BamToolsIndex::LoadNumBlocks", "could not read number of BTI blocks");
+}
+
+void BamToolsIndex::LoadNumReferences(int& numReferences) {
+    const int64_t numBytesRead = m_resources.Device->Read((char*)&numReferences, sizeof(numReferences));
+    if ( m_isBigEndian ) SwapEndian_32(numReferences);
+    if ( numBytesRead != sizeof(numReferences) )
+        throw BamException("BamToolsIndex::LoadNumReferences", "could not read number of references");
+}
+
+void BamToolsIndex::LoadReferenceSummary(BtiReferenceSummary& refSummary) {
+
+    // load number of blocks
+    int numBlocks;
+    LoadNumBlocks(numBlocks);
+
+    // store block summary data for this reference
+    refSummary.NumBlocks = numBlocks;
+    refSummary.FirstBlockFilePosition = Tell();
+
+    // skip reference's blocks
+    SkipBlocks(numBlocks);
+}
+
+void BamToolsIndex::OpenFile(const std::string& filename, IBamIODevice::OpenMode mode) {
+
+    // make sure any previous index file is closed
+    CloseFile();
+
+    m_resources.Device = BamDeviceFactory::CreateDevice(filename);
+    if ( m_resources.Device == 0 ) {
+        const string message = string("could not open file: ") + filename;
+        throw BamException("BamStandardIndex::OpenFile", message);
+    }
+
+    // attempt to open file
+    m_resources.Device->Open(mode);
+    if ( !IsDeviceOpen() ) {
+        const string message = string("could not open file: ") + filename;
+        throw BamException("BamToolsIndex::OpenFile", message);
+    }
+}
+
+void BamToolsIndex::ReadBlock(BtiBlock& block) {
+
+    // read in block data members
+    int64_t numBytesRead = 0;
+    numBytesRead += m_resources.Device->Read((char*)&block.MaxEndPosition, sizeof(block.MaxEndPosition));
+    numBytesRead += m_resources.Device->Read((char*)&block.StartOffset,    sizeof(block.StartOffset));
+    numBytesRead += m_resources.Device->Read((char*)&block.StartPosition,  sizeof(block.StartPosition));
+
+    // swap endian-ness if necessary
+    if ( m_isBigEndian ) {
+        SwapEndian_32(block.MaxEndPosition);
+        SwapEndian_64(block.StartOffset);
+        SwapEndian_32(block.StartPosition);
+    }
+
+    // check block read ok
+    const int expectedBytes = sizeof(block.MaxEndPosition) +
+                              sizeof(block.StartOffset) +
+                              sizeof(block.StartPosition);
+    if ( numBytesRead != expectedBytes )
+        throw BamException("BamToolsIndex::ReadBlock", "could not read block");
+}
+
+void BamToolsIndex::ReadBlocks(const BtiReferenceSummary& refSummary, BtiBlockVector& blocks) {
+
+    // prep blocks container
+    blocks.clear();
+    blocks.reserve(refSummary.NumBlocks);
+
+    // skip to first block entry
+    Seek( refSummary.FirstBlockFilePosition, SEEK_SET );
+
+    // read & store block entries
+    BtiBlock block;
+    for ( int i = 0; i < refSummary.NumBlocks; ++i ) {
+        ReadBlock(block);
+        blocks.push_back(block);
+    }
+}
+
+void BamToolsIndex::ReadReferenceEntry(BtiReferenceEntry& refEntry) {
+
+    // return false if refId not valid index in file summary structure
+    if ( refEntry.ID < 0 || refEntry.ID >= (int)m_indexFileSummary.size() )
+        throw BamException("BamToolsIndex::ReadReferenceEntry", "invalid reference requested");
+
+    // use index summary to assist reading the reference's BTI blocks
+    const BtiReferenceSummary& refSummary = m_indexFileSummary.at(refEntry.ID);
+    ReadBlocks(refSummary, refEntry.Blocks);
+}
+
+void BamToolsIndex::Seek(const int64_t& position, const int origin) {
+    if ( !m_resources.Device->Seek(position, origin) )
+        throw BamException("BamToolsIndex::Seek", "could not seek in BAI file");
+}
+
+void BamToolsIndex::SkipBlocks(const int& numBlocks) {
+    Seek( numBlocks*BamToolsIndex::SIZEOF_BLOCK, SEEK_CUR );
+}
+
+int64_t BamToolsIndex::Tell(void) const {
+    return m_resources.Device->Tell();
+}
+
+void BamToolsIndex::WriteBlock(const BtiBlock& block) {
+
+    // copy entry data
+    int32_t maxEndPosition = block.MaxEndPosition;
+    int64_t startOffset    = block.StartOffset;
+    int32_t startPosition  = block.StartPosition;
+
+    // swap endian-ness if necessary
+    if ( m_isBigEndian ) {
+        SwapEndian_32(maxEndPosition);
+        SwapEndian_64(startOffset);
+        SwapEndian_32(startPosition);
+    }
+
+    // write the reference index entry
+    int64_t numBytesWritten = 0;
+    numBytesWritten += m_resources.Device->Write((const char*)&maxEndPosition, sizeof(maxEndPosition));
+    numBytesWritten += m_resources.Device->Write((const char*)&startOffset,    sizeof(startOffset));
+    numBytesWritten += m_resources.Device->Write((const char*)&startPosition,  sizeof(startPosition));
+
+    // check block written ok
+    const int expectedBytes = sizeof(maxEndPosition) +
+                              sizeof(startOffset) +
+                              sizeof(startPosition);
+    if ( numBytesWritten != expectedBytes )
+        throw BamException("BamToolsIndex::WriteBlock", "could not write BTI block");
+}
+
+void BamToolsIndex::WriteBlocks(const BtiBlockVector& blocks) {
+    BtiBlockVector::const_iterator blockIter = blocks.begin();
+    BtiBlockVector::const_iterator blockEnd  = blocks.end();
+    for ( ; blockIter != blockEnd; ++blockIter )
+        WriteBlock(*blockIter);
+}
+
+void BamToolsIndex::WriteHeader(void) {
+
+    int64_t numBytesWritten = 0 ;
+
+    // write BTI index format 'magic number'
+    numBytesWritten += m_resources.Device->Write(BamToolsIndex::BTI_MAGIC, 4);
+
+    // write BTI index format version
+    int32_t currentVersion = (int32_t)m_outputVersion;
+    if ( m_isBigEndian ) SwapEndian_32(currentVersion);
+    numBytesWritten += m_resources.Device->Write((const char*)&currentVersion, sizeof(currentVersion));
+
+    // write block size
+    uint32_t blockSize = m_blockSize;
+    if ( m_isBigEndian ) SwapEndian_32(blockSize);
+    numBytesWritten += m_resources.Device->Write((const char*)&blockSize, sizeof(blockSize));
+
+    // write number of references
+    int32_t numReferences = m_indexFileSummary.size();
+    if ( m_isBigEndian ) SwapEndian_32(numReferences);
+    numBytesWritten += m_resources.Device->Write((const char*)&numReferences, sizeof(numReferences));
+
+    // check header written ok
+    const int expectedBytes = 4 +
+                              sizeof(currentVersion) +
+                              sizeof(blockSize) +
+                              sizeof(numReferences);
+    if ( numBytesWritten != expectedBytes )
+        throw BamException("BamToolsIndex::WriteHeader", "could not write BTI header");
+}
+
+void BamToolsIndex::WriteReferenceEntry(const BtiReferenceEntry& refEntry) {
+
+    // write number of blocks this reference
+    uint32_t numBlocks = refEntry.Blocks.size();
+    if ( m_isBigEndian ) SwapEndian_32(numBlocks);
+    const int64_t numBytesWritten = m_resources.Device->Write((const char*)&numBlocks, sizeof(numBlocks));
+    if ( numBytesWritten != sizeof(numBlocks) )
+        throw BamException("BamToolsIndex::WriteReferenceEntry", "could not write number of blocks");
+
+    // write actual block entries
+    WriteBlocks(refEntry.Blocks);
+}
diff --git a/src/api/internal/index/BamToolsIndex_p.h b/src/api/internal/index/BamToolsIndex_p.h

new file mode 100644 (file)

index 0000000..c1e1aa0
--- /dev/null
+++ b/src/api/internal/index/BamToolsIndex_p.h
@@ -0,0 +1,186 @@
+// ***************************************************************************
+// BamToolsIndex.h (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 November 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides index operations for the BamTools index format (".bti")
+// ***************************************************************************
+
+#ifndef BAMTOOLS_INDEX_FORMAT_H
+#define BAMTOOLS_INDEX_FORMAT_H
+
+//  -------------
+//  W A R N I N G
+//  -------------
+//
+// This file is not part of the BamTools API.  It exists purely as an
+// implementation detail.  This header file may change from version to
+// version without notice, or even be removed.
+//
+// We mean it.
+
+#include "api/BamAux.h"
+#include "api/BamIndex.h"
+#include "api/IBamIODevice.h"
+#include <map>
+#include <string>
+#include <vector>
+
+namespace BamTools {
+namespace Internal {
+
+// contains data for each 'block' in a BTI index
+struct BtiBlock {
+
+    // data members
+    int32_t MaxEndPosition;
+    int64_t StartOffset;
+    int32_t StartPosition;
+
+    // ctor
+    BtiBlock(const int32_t& maxEndPosition = 0,
+             const int64_t& startOffset    = 0,
+             const int32_t& startPosition  = 0)
+        : MaxEndPosition(maxEndPosition)
+        , StartOffset(startOffset)
+        , StartPosition(startPosition)
+    { }
+};
+
+// convenience typedef for describing a a list of BTI blocks on a reference
+typedef std::vector<BtiBlock> BtiBlockVector;
+
+// contains all fields necessary for building, loading, & writing
+// full BTI index data for a single reference
+struct BtiReferenceEntry {
+
+    // data members
+    int32_t ID;
+    BtiBlockVector Blocks;
+
+    // ctor
+    BtiReferenceEntry(const int& id = -1)
+        : ID(id)
+    { }
+};
+
+// provides (persistent) summary of BtiReferenceEntry's index data
+struct BtiReferenceSummary {
+
+    // data members
+    int NumBlocks;
+    uint64_t FirstBlockFilePosition;
+
+    // ctor
+    BtiReferenceSummary(void)
+        : NumBlocks(0)
+        , FirstBlockFilePosition(0)
+    { }
+};
+
+// convenience typedef for describing a full BTI index file summary
+typedef std::vector<BtiReferenceSummary> BtiFileSummary;
+
+class BamToolsIndex : public BamIndex {
+
+    // keep a list of any supported versions here
+    // (might be useful later to handle any 'legacy' versions if the format changes)
+    // listed for example like: BTI_1_0 = 1, BTI_1_1 = 2, BTI_1_2 = 3, BTI_2_0 = 4, and so on
+    //
+    // so a change introduced in BTI_1_2 may be handled from then on by:
+    //
+    // if ( indexVersion >= BTI_1_2 )
+    //   do something new
+    // else
+    //   do the old thing
+    enum Version { BTI_1_0 = 1
+                 , BTI_1_1
+                 , BTI_1_2
+                 , BTI_2_0
+                 };
+
+    // ctor & dtor
+    public:
+        BamToolsIndex(Internal::BamReaderPrivate* reader);
+        ~BamToolsIndex(void);
+
+    // BamIndex implementation
+    public:
+        // builds index from associated BAM file & writes out to index file
+        bool Create(void);
+        // returns whether reference has alignments or no
+        bool HasAlignments(const int& referenceID) const;
+        // attempts to use index data to jump to @region, returns success/fail
+        // a "successful" jump indicates no error, but not whether this region has data
+        //   * thus, the method sets a flag to indicate whether there are alignments
+        //     available after the jump position
+        bool Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion);
+        // loads existing data from file into memory
+        bool Load(const std::string& filename);
+        BamIndex::IndexType Type(void) const { return BamIndex::BAMTOOLS; }
+    public:
+        // returns format's file extension
+        static const std::string Extension(void);
+
+    // internal methods
+    private:
+
+        // index file ops
+        void CheckMagicNumber(void);
+        void CheckVersion(void);
+        void CloseFile(void);
+        bool IsDeviceOpen(void) const;
+        void OpenFile(const std::string& filename, IBamIODevice::OpenMode mode);
+        void Seek(const int64_t& position, const int origin);
+        int64_t Tell(void) const;
+
+        // index-creation methods
+        void ClearReferenceEntry(BtiReferenceEntry& refEntry);
+        void WriteBlock(const BtiBlock& block);
+        void WriteBlocks(const BtiBlockVector& blocks);
+        void WriteHeader(void);
+        void WriteReferenceEntry(const BtiReferenceEntry& refEntry);
+
+        // random-access methods
+        void GetOffset(const BamRegion& region, int64_t& offset, bool* hasAlignmentsInRegion);
+        void ReadBlock(BtiBlock& block);
+        void ReadBlocks(const BtiReferenceSummary& refSummary, BtiBlockVector& blocks);
+        void ReadReferenceEntry(BtiReferenceEntry& refEntry);
+
+        // BTI summary data methods
+        void InitializeFileSummary(const int& numReferences);
+        void LoadFileSummary(void);
+        void LoadHeader(void);
+        void LoadNumBlocks(int& numBlocks);
+        void LoadNumReferences(int& numReferences);
+        void LoadReferenceSummary(BtiReferenceSummary& refSummary);
+        void SkipBlocks(const int& numBlocks);
+
+    // data members
+    private:
+        bool  m_isBigEndian;
+        BtiFileSummary m_indexFileSummary;
+        uint32_t m_blockSize;
+        int32_t m_inputVersion; // Version is serialized as int
+        Version m_outputVersion;
+
+        struct RaiiWrapper {
+            IBamIODevice* Device;
+            RaiiWrapper(void);
+            ~RaiiWrapper(void);
+        };
+        RaiiWrapper m_resources;
+
+    // static constants
+    private:
+        static const uint32_t DEFAULT_BLOCK_LENGTH;
+        static const std::string BTI_EXTENSION;
+        static const char* const BTI_MAGIC;
+        static const int SIZEOF_BLOCK;
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // BAMTOOLS_INDEX_FORMAT_H
diff --git a/src/api/internal/index/CMakeLists.txt b/src/api/internal/index/CMakeLists.txt

new file mode 100644 (file)

index 0000000..1c78cb9
--- /dev/null
+++ b/src/api/internal/index/CMakeLists.txt
@@ -0,0 +1,17 @@
+# ==========================
+# BamTools CMakeLists.txt
+# (c) 2011 Derek Barnett
+#
+# src/api/internal/index
+# ==========================
+
+set ( InternalIndexDir "${InternalDir}/index" )
+
+set ( InternalIndexSources
+        ${InternalIndexDir}/BamIndexFactory_p.cpp
+        ${InternalIndexDir}/BamStandardIndex_p.cpp
+        ${InternalIndexDir}/BamToolsIndex_p.cpp
+
+        PARENT_SCOPE # <-- leave this last
+)
+
diff --git a/src/api/internal/io/BamDeviceFactory_p.cpp b/src/api/internal/io/BamDeviceFactory_p.cpp

new file mode 100644 (file)

index 0000000..f9c7694
--- /dev/null
+++ b/src/api/internal/io/BamDeviceFactory_p.cpp
@@ -0,0 +1,37 @@
+// ***************************************************************************
+// BamDeviceFactory_p.cpp (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 25 September 2011 (DB)
+// ---------------------------------------------------------------------------
+// Creates built-in concrete implementations of IBamIODevices
+// ***************************************************************************
+
+#include "api/internal/io/BamDeviceFactory_p.h"
+#include "api/internal/io/BamFile_p.h"
+#include "api/internal/io/BamFtp_p.h"
+#include "api/internal/io/BamHttp_p.h"
+#include "api/internal/io/BamPipe_p.h"
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+#include <iostream>
+using namespace std;
+
+IBamIODevice* BamDeviceFactory::CreateDevice(const string& source) {
+
+    // check for requested pipe
+    if ( source == "-" || source == "stdin" || source == "stdout" )
+        return new BamPipe;
+
+    // check for HTTP prefix
+    if ( source.find("http://") == 0 )
+        return new BamHttp(source);
+
+    // check for FTP prefix
+    if ( source.find("ftp://") == 0 )
+        return new BamFtp(source);
+
+    // otherwise assume a "normal" file
+    return new BamFile(source);
+}
diff --git a/src/api/internal/io/BamDeviceFactory_p.h b/src/api/internal/io/BamDeviceFactory_p.h

new file mode 100644 (file)

index 0000000..1d48533
--- /dev/null
+++ b/src/api/internal/io/BamDeviceFactory_p.h
@@ -0,0 +1,37 @@
+// ***************************************************************************
+// BamDeviceFactory_p.h (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Creates built-in concrete implementations of IBamIODevices
+// ***************************************************************************
+
+#ifndef BAMDEVICEFACTORY_P_H
+#define BAMDEVICEFACTORY_P_H
+
+//  -------------
+//  W A R N I N G
+//  -------------
+//
+// This file is not part of the BamTools API.  It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include "api/IBamIODevice.h"
+#include <string>
+
+namespace BamTools {
+namespace Internal {
+
+class BamDeviceFactory {
+    public:
+        static IBamIODevice* CreateDevice(const std::string& source);
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // BAMDEVICEFACTORY_P_H
diff --git a/src/api/internal/io/BamFile_p.cpp b/src/api/internal/io/BamFile_p.cpp

new file mode 100644 (file)

index 0000000..990d9bf
--- /dev/null
+++ b/src/api/internal/io/BamFile_p.cpp
@@ -0,0 +1,69 @@
+// ***************************************************************************
+// BamFile_p.cpp (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 November 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides BAM file-specific IO behavior
+// ***************************************************************************
+
+#include "api/internal/io/BamFile_p.h"
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+#include <cstdio>
+#include <iostream>
+using namespace std;
+
+BamFile::BamFile(const string& filename)
+    : ILocalIODevice()
+    , m_filename(filename)
+{ }
+
+BamFile::~BamFile(void) { }
+
+void BamFile::Close(void) {
+    if ( IsOpen() ) {
+        m_filename.clear();
+        ILocalIODevice::Close();
+    }
+}
+
+bool BamFile::IsRandomAccess(void) const {
+    return true;
+}
+
+bool BamFile::Open(const IBamIODevice::OpenMode mode) {
+
+    // make sure we're starting with a fresh file stream
+    Close();
+
+    // attempt to open FILE* depending on requested openmode
+    if ( mode == IBamIODevice::ReadOnly )
+        m_stream = fopen(m_filename.c_str(), "rb");
+    else if ( mode == IBamIODevice::WriteOnly )
+        m_stream = fopen(m_filename.c_str(), "wb");
+    else if ( mode == IBamIODevice::ReadWrite )
+        m_stream = fopen(m_filename.c_str(), "w+b");
+    else {
+        SetErrorString("BamFile::Open", "unknown open mode requested");
+        return false;
+    }
+
+    // check that we obtained a valid FILE*
+    if ( m_stream == 0 ) {
+        const string message_base = string("could not open file handle for ");
+        const string message = message_base + ( (m_filename.empty()) ? "empty filename" : m_filename );
+        SetErrorString("BamFile::Open", message);
+        return false;
+    }
+
+    // store current IO mode & return success
+    m_mode = mode;
+    return true;
+}
+
+bool BamFile::Seek(const int64_t& position, const int origin) {
+    BT_ASSERT_X( m_stream, "BamFile::Seek() - null stream" );
+    return ( fseek64(m_stream, position, origin) == 0 );
+}
diff --git a/src/api/internal/io/BamFile_p.h b/src/api/internal/io/BamFile_p.h

new file mode 100644 (file)

index 0000000..ed61813
--- /dev/null
+++ b/src/api/internal/io/BamFile_p.h
@@ -0,0 +1,51 @@
+// ***************************************************************************
+// BamFile_p.h (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 November 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides BAM file-specific IO behavior
+// ***************************************************************************
+
+#ifndef BAMFILE_P_H
+#define BAMFILE_P_H
+
+//  -------------
+//  W A R N I N G
+//  -------------
+//
+// This file is not part of the BamTools API.  It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include "api/internal/io/ILocalIODevice_p.h"
+#include <string>
+
+namespace BamTools {
+namespace Internal {
+
+class BamFile : public ILocalIODevice {
+
+    // ctor & dtor
+    public:
+        BamFile(const std::string& filename);
+        ~BamFile(void);
+
+    // ILocalIODevice implementation
+    public:
+        void Close(void);
+        bool IsRandomAccess(void) const;
+        bool Open(const IBamIODevice::OpenMode mode);
+        bool Seek(const int64_t& position, const int origin = SEEK_SET);
+
+    // data members
+    private:
+        std::string m_filename;
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // BAMFILE_P_H
diff --git a/src/api/internal/io/BamFtp_p.cpp b/src/api/internal/io/BamFtp_p.cpp

new file mode 100644 (file)

index 0000000..d9f933c
--- /dev/null
+++ b/src/api/internal/io/BamFtp_p.cpp
@@ -0,0 +1,498 @@
+// ***************************************************************************
+// BamFtp_p.cpp (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 November 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides reading/writing of BAM files on FTP server
+// ***************************************************************************
+
+#include "api/BamAux.h"
+#include "api/internal/io/BamFtp_p.h"
+#include "api/internal/io/TcpSocket_p.h"
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+#include <cctype>
+#include <cstdlib>
+#include <sstream>
+#include <vector>
+using namespace std;
+
+namespace BamTools {
+namespace Internal {
+
+// -----------
+// constants
+// -----------
+
+static const uint16_t FTP_PORT          = 21;
+static const string   FTP_PREFIX        = "ftp://";
+static const size_t   FTP_PREFIX_LENGTH = 6;
+static const string   FTP_NEWLINE       = "\r\n";
+
+static const string DEFAULT_USER = "anonymous";
+static const string DEFAULT_PASS = "anonymous@";
+
+static const string ABOR_CMD = "ABOR";
+static const string USER_CMD = "USER";
+static const string PASS_CMD = "PASS";
+static const string PASV_CMD = "PASV";
+static const string REIN_CMD = "REIN";
+static const string REST_CMD = "REST";
+static const string RETR_CMD = "RETR";
+static const string TYPE_CMD = "TYPE";
+
+static const char CMD_SEPARATOR  = ' ';
+static const char HOST_SEPARATOR = '/';
+static const char IP_SEPARATOR   = '.';
+
+static const char MULTILINE_CONTINUE = '-';
+
+static const char PASV_REPLY_PREFIX    = '(';
+static const char PASV_REPLY_SEPARATOR = ',';
+static const char PASV_REPLY_SUFFIX    = ')';
+
+// -----------------
+// utility methods
+// -----------------
+
+static inline
+vector<string> split(const string& source, const char delim) {
+
+    stringstream ss(source);
+    string field;
+    vector<string> fields;
+
+    while ( getline(ss, field, delim) )
+        fields.push_back(field);
+    return fields;
+}
+
+static inline
+bool startsWith(const string& source, const string& pattern) {
+    return ( source.find(pattern) == 0 );
+}
+
+static inline
+string toLower(const string& s) {
+    string out;
+    const size_t sSize = s.size();
+    out.reserve(sSize);
+    for ( size_t i = 0; i < sSize; ++i )
+        out[i] = tolower(s[i]);
+    return out;
+}
+
+} // namespace Internal
+} // namespace BamTools
+
+// -----------------------
+// BamFtp implementation
+// -----------------------
+
+BamFtp::BamFtp(const string& url)
+    : IBamIODevice()
+    , m_commandSocket(new TcpSocket)
+    , m_dataSocket(new TcpSocket)
+    , m_port(FTP_PORT)
+    , m_dataPort(0)
+    , m_username(DEFAULT_USER)
+    , m_password(DEFAULT_PASS)
+    , m_isUrlParsed(false)
+    , m_filePosition(-1)
+{
+    ParseUrl(url);
+}
+
+BamFtp::~BamFtp(void) {
+
+    // close connection & clean up
+    Close();
+    if ( m_commandSocket )
+        delete m_commandSocket;
+    if ( m_dataSocket )
+        delete m_dataSocket;
+}
+
+void BamFtp::Close(void) {
+
+    // disconnect socket
+    m_commandSocket->DisconnectFromHost();
+    m_dataSocket->DisconnectFromHost();
+
+    // reset state - necessary??
+    m_isUrlParsed = false;
+    m_filePosition = -1;
+    m_username = DEFAULT_USER;
+    m_password = DEFAULT_PASS;
+    m_dataHostname.clear();
+    m_dataPort = 0;
+}
+
+bool BamFtp::ConnectCommandSocket(void) {
+
+    BT_ASSERT_X(m_commandSocket, "null command socket?");
+
+    // connect to FTP server
+    if ( !m_commandSocket->ConnectToHost(m_hostname, m_port, m_mode) ) {
+        SetErrorString("BamFtp::ConnectCommandSocket", "could not connect to host");
+        return false;
+    }
+
+    // receive initial reply from host
+    if ( !ReceiveReply() ) {
+        Close();
+        return false;
+    }
+
+    // send USER command
+    string userCommand = USER_CMD + CMD_SEPARATOR + m_username + FTP_NEWLINE;
+    if ( !SendCommand(userCommand, true) ) {
+        Close();
+        return false;
+    }
+
+    // send PASS command
+    string passwordCommand = PASS_CMD + CMD_SEPARATOR + m_password + FTP_NEWLINE;
+    if ( !SendCommand(passwordCommand, true) ) {
+        Close();
+        return false;
+    }
+
+    // send TYPE command
+    string typeCommand = TYPE_CMD + CMD_SEPARATOR + 'I' + FTP_NEWLINE;
+    if ( !SendCommand(typeCommand, true) ) {
+        Close();
+        return false;
+    }
+
+    // return success
+    return true;
+}
+
+bool BamFtp::ConnectDataSocket(void) {
+
+    // failure if can't connect to command socket first
+    if ( !m_commandSocket->IsConnected() ) {
+        if ( !ConnectCommandSocket() )
+            return false;
+    }
+
+    // make sure we're starting with a fresh data channel
+    if ( m_dataSocket->IsConnected() ) 
+        m_dataSocket->DisconnectFromHost();
+
+    // send passive connection command
+    const string passiveCommand = PASV_CMD + FTP_NEWLINE;
+    if ( !SendCommand(passiveCommand, true) ) {
+        // TODO: set error string
+        return false;
+    }
+
+    // retrieve passive connection port
+    if ( !ParsePassiveResponse() ) {
+        // TODO: set error string
+        return false;
+    }
+
+    // set up restart command (tell server where to start fetching bytes from)
+    if ( m_filePosition >= 0 ) {
+
+        stringstream fpStream("");
+        fpStream << m_filePosition;
+        string restartCommand = REST_CMD + CMD_SEPARATOR + fpStream.str() + FTP_NEWLINE;
+        if ( !SendCommand(restartCommand, true) ) {
+            // TODO: set error string
+            return false;
+        }
+    }
+
+    // main file retrieval request
+    string retrieveCommand = RETR_CMD + CMD_SEPARATOR + m_filename + FTP_NEWLINE;
+    if ( !SendCommand(retrieveCommand, false) ) {
+        // TODO: set error string
+        return false;
+    }
+
+    // make data channel connection
+    if ( !m_dataSocket->ConnectToHost(m_dataHostname, m_dataPort) ) {
+        // TODO: set error string
+        return false;
+    }
+
+    // fetch intial reply from server
+    if ( !ReceiveReply() ) {
+        // TODO: set error string
+        m_dataSocket->DisconnectFromHost();
+        return false;
+    }
+    
+    // make sure we have reply code 150 (all good)
+    if ( !startsWith(m_response, "150") ) {
+        // TODO: set error string
+        m_dataSocket->DisconnectFromHost();
+        return false;
+    }
+
+    // return success
+    return true;
+}
+
+bool BamFtp::IsOpen(void) const {
+    return IBamIODevice::IsOpen() && m_isUrlParsed;
+}
+
+bool BamFtp::IsRandomAccess(void) const {
+    return true;
+}
+
+bool BamFtp::Open(const IBamIODevice::OpenMode mode) {
+
+    // BamFtp only supports read-only access
+    if ( mode != IBamIODevice::ReadOnly ) {
+        SetErrorString("BamFtp::Open", "writing on this device is not supported");
+        return false;
+    }
+
+    // initialize basic valid state
+    m_mode = mode;
+    m_filePosition = 0;
+
+    // attempt connection to command & data sockets
+    return ( ConnectCommandSocket() && ConnectDataSocket() );
+}
+
+bool BamFtp::ParsePassiveResponse(void) {
+
+    // fail if empty
+    if ( m_response.empty() )
+        return false;
+
+    // find parentheses
+    const size_t leftParenFound  = m_response.find(PASV_REPLY_PREFIX);
+    const size_t rightParenFound = m_response.find(PASV_REPLY_SUFFIX);
+    if ( leftParenFound == string::npos || rightParenFound == string::npos )
+        return false;
+
+    // grab everything between ( should be "h1,h2,h3,h4,p1,p2" )
+    string::const_iterator responseBegin = m_response.begin();
+    const string hostAndPort(responseBegin+leftParenFound+1, responseBegin+rightParenFound);
+
+    // parse into string fields
+    vector<string> fields = split(hostAndPort, PASV_REPLY_SEPARATOR);
+    if ( fields.size() != 6 )
+        return false;
+
+    // fetch passive connection IP
+    m_dataHostname = fields[0] + IP_SEPARATOR +
+                     fields[1] + IP_SEPARATOR +
+                     fields[2] + IP_SEPARATOR +
+                     fields[3];
+
+    // fetch passive connection port
+    const uint8_t portUpper = static_cast<uint8_t>(atoi(fields[4].c_str()));
+    const uint8_t portLower = static_cast<uint8_t>(atoi(fields[5].c_str()));
+    m_dataPort = ( portUpper<<8 ) + portLower;
+
+    // return success
+    return true;
+}
+
+void BamFtp::ParseUrl(const string& url) {
+
+    // clear flag to start
+    m_isUrlParsed = false;
+
+    // make sure url starts with "ftp://", case-insensitive
+    string tempUrl(url);
+    toLower(tempUrl);
+    const size_t prefixFound = tempUrl.find(FTP_PREFIX);
+    if ( prefixFound == string::npos )
+        return;
+
+    // find end of host name portion (first '/' hit after the prefix)
+    const size_t firstSlashFound = tempUrl.find(HOST_SEPARATOR, FTP_PREFIX_LENGTH);
+    if ( firstSlashFound == string::npos ) {
+        ;  // no slash found... no filename given along with host?
+    }
+
+    // fetch hostname
+    string hostname = tempUrl.substr(FTP_PREFIX_LENGTH, (firstSlashFound - FTP_PREFIX_LENGTH));
+    m_hostname = hostname;
+    m_port = FTP_PORT;
+
+    // store remainder of URL as filename (must be non-empty)
+    string filename = tempUrl.substr(firstSlashFound);
+    if ( filename.empty() )
+        return;
+    m_filename = filename;
+
+    // set parsed OK flag
+    m_isUrlParsed = true;
+}
+
+int64_t BamFtp::Read(char* data, const unsigned int numBytes) {
+
+    // if BamHttp not in a valid state
+    if ( !IsOpen() )
+        return -1;
+
+    // read until hit desired @numBytes
+    int64_t bytesReadSoFar = 0;
+    while ( bytesReadSoFar < numBytes ) {
+
+        // calculate number of bytes we're going to try to read this iteration
+        const size_t remainingBytes = ( numBytes - bytesReadSoFar );
+
+        // if either disconnected somehow, or (more likely) we have seeked since last read
+        if ( !m_dataSocket->IsConnected() ) {
+            if ( !ConnectDataSocket() ) {
+                // TODO: set error string
+                return -1;
+            }
+        }
+
+        // read bytes from data socket
+        const int64_t socketBytesRead = ReadDataSocket(data+bytesReadSoFar, remainingBytes);
+        if ( socketBytesRead < 0 )
+            return -1;
+        bytesReadSoFar += socketBytesRead;
+        m_filePosition += socketBytesRead;
+    }
+
+    // return actual number bytes successfully read
+    return bytesReadSoFar;
+}
+
+int64_t BamFtp::ReadCommandSocket(char* data, const unsigned int maxNumBytes) {
+
+    // try to read 'remainingBytes' from socket
+    const int64_t numBytesRead = m_commandSocket->Read(data, maxNumBytes);
+    if ( numBytesRead < 0 )
+        return -1;
+    return numBytesRead;
+}
+
+int64_t BamFtp::ReadDataSocket(char* data, const unsigned int maxNumBytes) {
+
+    // try to read 'remainingBytes' from socket
+    const int64_t numBytesRead = m_dataSocket->Read(data, maxNumBytes);
+    if ( numBytesRead < 0 )
+        return -1;
+    return numBytesRead;
+}
+
+bool BamFtp::ReceiveReply(void) {
+
+    // failure if not connected
+    if ( !m_commandSocket->IsConnected() ) {
+        SetErrorString("BamFtp::ReceiveReply()", "command socket not connected");
+        return false;
+    }
+
+    m_response.clear();
+
+    // read header data (& discard for now)
+    bool headerEnd = false;
+    while ( !headerEnd ) {
+
+        const string headerLine = m_commandSocket->ReadLine();
+        m_response += headerLine;
+
+        // if line is of form 'xyz ', quit reading lines
+        if ( (headerLine.length() >= 4 ) &&
+             isdigit(headerLine[0]) &&
+             isdigit(headerLine[1]) &&
+             isdigit(headerLine[2]) &&
+             ( headerLine[3] != MULTILINE_CONTINUE )
+           )
+        {
+            headerEnd = true;
+        }
+    }
+
+    // return success, depending on response
+    if ( m_response.empty() ) {
+        SetErrorString("BamFtp::ReceiveReply", "error reading server reply");
+        return false;
+    }
+    return true;
+}
+
+bool BamFtp::Seek(const int64_t& position, const int origin) {
+
+    // if FTP device not in a valid state
+    if ( !IsOpen() ) {
+        // TODO: set error string
+        return false;
+    }
+
+    // ----------------------
+    // UGLY !! but works??
+    // ----------------------
+    // disconnect from server
+    m_dataSocket->DisconnectFromHost();
+    m_commandSocket->DisconnectFromHost();
+
+    // update file position & return success
+    if ( origin == SEEK_CUR )
+        m_filePosition += position;
+    else if ( origin == SEEK_SET)
+        m_filePosition = position;
+    else {
+        // TODO: set error string
+        return false;
+    }
+    return true;
+}
+
+bool BamFtp::SendCommand(const string& command, bool waitForReply) {
+
+    // failure if not connected
+    if ( !m_commandSocket->IsConnected() ) {
+        SetErrorString("BamFtp::SendCommand", "command socket not connected");
+        return false;
+    }
+
+    // write command to 'command socket'
+    if ( WriteCommandSocket(command.c_str(), command.length()) == -1 ) {
+        SetErrorString("BamFtp::SendCommand", "error writing to socket");
+        // get actual error from command socket??
+        return false;
+    }
+
+    // if we sent a command that receives a response
+    if ( waitForReply )
+        return ReceiveReply();
+
+    // return success
+    return true;
+}
+
+int64_t BamFtp::Tell(void) const {
+    return ( IsOpen() ? m_filePosition : -1 );
+}
+
+int64_t BamFtp::Write(const char* data, const unsigned int numBytes) {
+    (void)data;
+    (void)numBytes;
+    BT_ASSERT_X(false, "BamFtp::Write : write-mode not supported on this device");
+    SetErrorString("BamFtp::Write", "write-mode not supported on this device");
+    return -1;
+}
+
+int64_t BamFtp::WriteCommandSocket(const char* data, const unsigned int numBytes) {
+    if ( !m_commandSocket->IsConnected() )
+        return -1;
+    m_commandSocket->ClearBuffer();
+    return m_commandSocket->Write(data, numBytes);
+}
+
+int64_t BamFtp::WriteDataSocket(const char* data, const unsigned int numBytes) {
+    (void)data;
+    (void)numBytes;
+    BT_ASSERT_X(false, "BamFtp::WriteDataSocket: write-mode not supported on this device");
+    SetErrorString("BamFtp::Write", "write-mode not supported on this device");
+    return -1;
+}
diff --git a/src/api/internal/io/BamFtp_p.h b/src/api/internal/io/BamFtp_p.h

new file mode 100644 (file)

index 0000000..11f549c
--- /dev/null
+++ b/src/api/internal/io/BamFtp_p.h
@@ -0,0 +1,91 @@
+// ***************************************************************************
+// BamFtp_p.h (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 November 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides reading/writing of BAM files on FTP server
+// ***************************************************************************
+
+#ifndef BAMFTP_P_H
+#define BAMFTP_P_H
+
+//  -------------
+//  W A R N I N G
+//  -------------
+//
+// This file is not part of the BamTools API.  It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include "api/IBamIODevice.h"
+#include <string>
+
+namespace BamTools {
+namespace Internal {
+
+class TcpSocket;
+
+class BamFtp : public IBamIODevice {
+
+    // ctor & dtor
+    public:
+        BamFtp(const std::string& url);
+        ~BamFtp(void);
+
+    // IBamIODevice implementation
+    public:
+        void Close(void);
+        bool IsOpen(void) const;
+        bool IsRandomAccess(void) const;
+        bool Open(const IBamIODevice::OpenMode mode);
+        int64_t Read(char* data, const unsigned int numBytes);
+        bool Seek(const int64_t& position, const int origin = SEEK_SET);
+        int64_t Tell(void) const;
+        int64_t Write(const char* data, const unsigned int numBytes);
+
+    // internal methods
+    private:
+        bool ConnectCommandSocket(void);
+        bool ConnectDataSocket(void);        
+        bool ParsePassiveResponse(void);
+        void ParseUrl(const std::string& url);
+        int64_t ReadCommandSocket(char* data, const unsigned int numBytes);
+        int64_t ReadDataSocket(char* data, const unsigned int numBytes);
+        bool ReceiveReply(void);
+        bool SendCommand(const std::string& command, bool waitForReply);
+        int64_t WriteCommandSocket(const char* data, const unsigned int numBytes);
+        int64_t WriteDataSocket(const char* data, const unsigned int numBytes);
+
+    // data members
+    private:
+
+        // our main sockets
+        TcpSocket* m_commandSocket;
+        TcpSocket* m_dataSocket;
+
+        // our connection data
+        std::string m_hostname;
+        uint16_t    m_port;
+        std::string m_dataHostname;
+        uint16_t    m_dataPort;
+        std::string m_filename;
+
+        std::string m_username;
+        std::string m_password;
+
+        std::string m_response;
+
+        // internal state flags
+        bool m_isUrlParsed;
+
+        // file position
+        int64_t m_filePosition;
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // BAMFTP_P_H
diff --git a/src/api/internal/io/BamHttp_p.cpp b/src/api/internal/io/BamHttp_p.cpp

new file mode 100644 (file)

index 0000000..e2ade70
--- /dev/null
+++ b/src/api/internal/io/BamHttp_p.cpp
@@ -0,0 +1,411 @@
+// ***************************************************************************
+// BamHttp_p.cpp (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 November 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides reading/writing of BAM files on HTTP server
+// ***************************************************************************
+
+#include "api/BamAux.h"
+#include "api/internal/io/BamHttp_p.h"
+#include "api/internal/io/HttpHeader_p.h"
+#include "api/internal/io/TcpSocket_p.h"
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+#include <cassert>
+#include <cctype>
+#include <algorithm>
+#include <sstream>
+using namespace std;
+
+namespace BamTools {
+namespace Internal {
+
+// -----------
+// constants
+// -----------
+
+static const string HTTP_PORT   = "80";
+static const string HTTP_PREFIX = "http://";
+static const size_t HTTP_PREFIX_LENGTH = 7;
+
+static const string DOUBLE_NEWLINE = "\n\n";
+
+static const string GET_METHOD   = "GET";
+static const string HOST_HEADER  = "Host";
+static const string RANGE_HEADER = "Range";
+static const string BYTES_PREFIX = "bytes=";
+
+static const char HOST_SEPARATOR  = '/';
+static const char PROXY_SEPARATOR = ':';
+
+// -----------------
+// utility methods
+// -----------------
+
+static inline
+bool endsWith(const string& source, const string& pattern) {
+    return ( source.find(pattern) == (source.length() - pattern.length()) );
+}
+
+static inline
+string toLower(const string& s) {
+    string out;
+    const size_t sSize = s.size();
+    out.reserve(sSize);
+    for ( size_t i = 0; i < sSize; ++i )
+        out[i] = tolower(s[i]);
+    return out;
+}
+
+} // namespace Internal
+} // namespace BamTools
+
+// ------------------------
+// BamHttp implementation
+// ------------------------
+
+BamHttp::BamHttp(const string& url)
+    : IBamIODevice()
+    , m_socket(new TcpSocket)
+    , m_port(HTTP_PORT)
+    , m_request(0)
+    , m_response(0)
+    , m_isUrlParsed(false)
+    , m_filePosition(-1)
+    , m_endRangeFilePosition(-1)
+{
+    ParseUrl(url);
+}
+
+BamHttp::~BamHttp(void) {
+
+    // close connection & clean up
+    Close();
+    if ( m_socket )
+        delete m_socket;
+}
+
+void BamHttp::Close(void) {
+
+    // disconnect socket
+    m_socket->DisconnectFromHost();
+
+    // clean up request & response
+    if ( m_request )  {
+        delete m_request;
+        m_request = 0;
+    }
+    if ( m_response ) {
+        delete m_response;
+        m_response = 0;
+    }
+
+    // reset state - necessary??
+    m_isUrlParsed = false;
+    m_filePosition = -1;
+    m_endRangeFilePosition = -1;
+}
+
+bool BamHttp::ConnectSocket(void) {
+
+    BT_ASSERT_X(m_socket, "null socket?");
+
+    // any state checks, etc?
+    if ( !m_socket->ConnectToHost(m_hostname, m_port, m_mode) ) {
+        // TODO: set error string
+        return false;
+    }
+
+    // attempt initial request
+    m_filePosition = 0;
+    m_endRangeFilePosition = -1;
+    if ( !SendRequest() ) {
+        // TODO: set error string
+        Close();
+        return false;
+    }
+
+    // wait for response from server
+    if ( !ReceiveResponse() ) {
+        // TODO: set error string
+        Close();
+        return false;
+    }
+
+    // return success
+    return true;
+}
+
+bool BamHttp::EnsureSocketConnection(void) {
+    if ( m_socket->IsConnected() )
+        return true;
+    else return ConnectSocket();
+}
+
+bool BamHttp::IsOpen(void) const {
+    return IBamIODevice::IsOpen() && m_isUrlParsed;
+}
+
+bool BamHttp::IsRandomAccess(void) const {
+    return true;
+}
+
+bool BamHttp::Open(const IBamIODevice::OpenMode mode) {
+
+    // BamHttp only supports read-only access
+    if ( mode != IBamIODevice::ReadOnly ) {
+        SetErrorString("BamHttp::Open", "writing on this device is not supported");
+        return false;
+    }
+    m_mode = mode;
+
+    // attempt connection to socket
+    if ( !ConnectSocket() ) {
+        SetErrorString("BamHttp::Open", m_socket->GetErrorString());
+        return false;
+    }
+
+    // return success
+    return true;
+}
+
+void BamHttp::ParseUrl(const string& url) {
+
+    // clear flag to start
+    m_isUrlParsed = false;
+
+    // make sure url starts with "http://", case-insensitive
+    string tempUrl(url);
+    toLower(tempUrl);
+    const size_t prefixFound = tempUrl.find(HTTP_PREFIX);
+    if ( prefixFound == string::npos )
+        return;
+
+    // find end of host name portion (first '/' hit after the prefix)
+    const size_t firstSlashFound = tempUrl.find(HOST_SEPARATOR, HTTP_PREFIX_LENGTH);
+    if ( firstSlashFound == string::npos ) {
+        ;  // no slash found... no filename given along with host?
+    }
+
+    // fetch hostname (check for proxy port)
+    string hostname = tempUrl.substr(HTTP_PREFIX_LENGTH, (firstSlashFound - HTTP_PREFIX_LENGTH));
+    const size_t colonFound = hostname.find(PROXY_SEPARATOR);
+    if ( colonFound != string::npos ) {
+        ; // TODO: handle proxy port (later, just skip for now)
+    } else {
+        m_hostname = hostname;
+        m_port = HTTP_PORT;
+    }
+
+    // store remainder of URL as filename (must be non-empty)
+    string filename = tempUrl.substr(firstSlashFound);
+    if ( filename.empty() )
+        return;
+    m_filename = filename;
+
+    // set parsed OK flag
+    m_isUrlParsed = true;
+}
+
+int64_t BamHttp::Read(char* data, const unsigned int numBytes) {
+
+    // if BamHttp not in a valid state
+    if ( !IsOpen() )
+        return -1;
+
+    // read until hit desired @numBytes
+    int64_t bytesReadSoFar = 0;
+    while ( bytesReadSoFar < numBytes ) {
+
+        // calculate number of bytes we're going to try to read this iteration
+        const size_t remainingBytes = ( numBytes - bytesReadSoFar );
+
+        // if socket has access to entire file contents
+        // i.e. we received response with full data (status code == 200)
+        if ( m_endRangeFilePosition < 0 ) {
+
+            // try to read 'remainingBytes' from socket
+            const int64_t socketBytesRead = ReadFromSocket(data+bytesReadSoFar, remainingBytes);
+            if ( socketBytesRead < 0 )
+                return -1;
+            bytesReadSoFar += socketBytesRead;
+            m_filePosition += socketBytesRead;
+        }
+
+        // socket has access to a range of data (might already be in buffer)
+        // i.e. we received response with partial data (status code == 206)
+        else {
+
+            // there is data left from last request
+            if ( m_endRangeFilePosition > m_filePosition ) {
+
+                // try to read either the total 'remainingBytes' or
+                // whatever we have remaining from last request range
+                const size_t rangeRemainingBytes = m_endRangeFilePosition - m_filePosition;
+                const size_t bytesToRead = std::min(remainingBytes, rangeRemainingBytes);
+                const int64_t socketBytesRead = ReadFromSocket(data+bytesReadSoFar, bytesToRead);
+                if ( socketBytesRead < 0 )
+                    return -1;
+                bytesReadSoFar += socketBytesRead;
+                m_filePosition += socketBytesRead;
+            }
+
+            // otherwise, this is a 1st-time read or
+            // we already read everything from the last GET request
+            else {
+
+                // request for next range
+                if ( !SendRequest(remainingBytes) || !ReceiveResponse() ) {
+                    Close();
+                    return -1;
+                }
+            }
+        }
+    }
+
+    // return actual number bytes successfully read
+    return bytesReadSoFar;
+}
+
+int64_t BamHttp::ReadFromSocket(char* data, const unsigned int maxNumBytes) {
+
+    // try to read 'remainingBytes' from socket
+    const int64_t numBytesRead = m_socket->Read(data, maxNumBytes);
+    if ( numBytesRead < 0 )
+        return -1;
+    return numBytesRead;
+}
+
+bool BamHttp::ReceiveResponse(void) {
+
+    // clear any prior response
+    if ( m_response )
+        delete m_response;
+
+    // make sure we're connected
+    if ( !EnsureSocketConnection() )
+        return false;
+
+    // fetch header, up until double new line
+    string responseHeader;
+    do {
+        // read line & append to full header
+        const string headerLine = m_socket->ReadLine();
+        responseHeader += headerLine;
+
+    } while ( !endsWith(responseHeader, DOUBLE_NEWLINE) );
+
+    // sanity check
+    if ( responseHeader.empty() ) {
+        // TODO: set error string
+        Close();
+        return false;
+    }
+
+    // create response from header text
+    m_response = new HttpResponseHeader(responseHeader);
+    if ( !m_response->IsValid() ) {
+        // TODO: set error string
+        Close();
+        return false;
+    }
+
+    // if we got range response as requested
+    if ( m_response->GetStatusCode() == 206 )
+        return true;
+
+    // if we got the full file contents instead of range
+    else if ( m_response->GetStatusCode() == 200 ) {
+
+        // skip up to current file position
+        RaiiBuffer tmp(0x8000);
+        int64_t numBytesRead = 0;
+        while ( numBytesRead < m_filePosition ) {
+            int64_t result = ReadFromSocket(tmp.Buffer, 0x8000);
+            if ( result < 0 ) {
+                Close();
+                return false;
+            }
+            numBytesRead += result;
+        }
+
+        // return success
+        return true;
+    }
+
+    // on any other reponse status
+    // TODO: set error string
+    Close();
+    return false;
+}
+
+bool BamHttp::Seek(const int64_t& position, const int origin) {
+
+    // if HTTP device not in a valid state
+    if ( !IsOpen() ) {
+        // TODO: set error string
+        return false;
+    }
+
+    // discard socket's buffer contents, update positions, & return success
+    m_socket->ClearBuffer();
+
+    if ( origin == SEEK_CUR )
+        m_filePosition += position;
+    else if ( origin == SEEK_SET )
+        m_filePosition = position;
+    else {
+        // TODO: set error string
+        return false;
+    }
+    m_endRangeFilePosition = m_filePosition;
+    return true;
+}
+
+bool BamHttp::SendRequest(const size_t numBytes) {
+
+    // remove any currently active request
+    if ( m_request )
+        delete m_request;
+
+    // create range string
+    m_endRangeFilePosition = m_filePosition + numBytes;
+    stringstream range("");
+    range << BYTES_PREFIX << m_filePosition << '-' << m_endRangeFilePosition;
+
+    // make sure we're connected
+    if ( !EnsureSocketConnection() )
+        return false;
+
+    // create request
+    m_request = new HttpRequestHeader(GET_METHOD, m_filename);
+    m_request->SetField(HOST_HEADER,  m_hostname);
+    m_request->SetField(RANGE_HEADER, range.str());
+
+    // write request to socket
+    const string requestHeader = m_request->ToString();
+    const size_t headerSize    = requestHeader.size();
+    return ( WriteToSocket(requestHeader.c_str(), headerSize) == headerSize );
+}
+
+int64_t BamHttp::Tell(void) const {
+    return ( IsOpen() ? m_filePosition : -1 );
+}
+
+int64_t BamHttp::Write(const char* data, const unsigned int numBytes) {
+    (void)data;
+    (void)numBytes;
+    BT_ASSERT_X(false, "BamHttp::Write : write-mode not supported on this device");
+    SetErrorString("BamHttp::Write", "write-mode not supported on this device");
+    return -1;
+}
+
+int64_t BamHttp::WriteToSocket(const char* data, const unsigned int numBytes) {
+    if ( !m_socket->IsConnected() )
+        return -1;
+    m_socket->ClearBuffer();
+    return m_socket->Write(data, numBytes);
+}
diff --git a/src/api/internal/io/BamHttp_p.h b/src/api/internal/io/BamHttp_p.h

new file mode 100644 (file)

index 0000000..371ccce
--- /dev/null
+++ b/src/api/internal/io/BamHttp_p.h
@@ -0,0 +1,87 @@
+// ***************************************************************************
+// BamHttp_p.h (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 November 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides reading/writing of BAM files on HTTP server
+// ***************************************************************************
+
+#ifndef BAMHTTP_P_H
+#define BAMHTTP_P_H
+
+//  -------------
+//  W A R N I N G
+//  -------------
+//
+// This file is not part of the BamTools API.  It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include "api/IBamIODevice.h"
+#include <string>
+
+namespace BamTools {
+namespace Internal {
+
+class HttpRequestHeader;
+class HttpResponseHeader;
+class TcpSocket;
+
+class BamHttp : public IBamIODevice {
+
+    // ctor & dtor
+    public:
+        BamHttp(const std::string& url);
+        ~BamHttp(void);
+
+    // IBamIODevice implementation
+    public:
+        void Close(void);
+        bool IsOpen(void) const;
+        bool IsRandomAccess(void) const;
+        bool Open(const IBamIODevice::OpenMode mode);
+        int64_t Read(char* data, const unsigned int numBytes);
+        bool Seek(const int64_t& position, const int origin = SEEK_SET);
+        int64_t Tell(void) const;
+        int64_t Write(const char* data, const unsigned int numBytes);
+
+    // internal methods
+    private:
+        bool ConnectSocket(void);
+        bool EnsureSocketConnection(void);
+        void ParseUrl(const std::string& url);
+        int64_t ReadFromSocket(char* data, const unsigned int numBytes);
+        bool ReceiveResponse(void);
+        bool SendRequest(const size_t numBytes = 0);
+        int64_t WriteToSocket(const char* data, const unsigned int numBytes);
+
+    // data members
+    private:
+
+        // our main socket
+        TcpSocket* m_socket;
+
+        // our connection data
+        std::string m_hostname;
+        std::string m_port;
+        std::string m_filename;
+
+        // our last (active) request & response info
+        HttpRequestHeader*  m_request;
+        HttpResponseHeader* m_response;
+
+        // internal state flags
+        bool m_isUrlParsed;
+
+        // file position
+        int64_t m_filePosition;
+        int64_t m_endRangeFilePosition;
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // BAMHTTP_P_H
diff --git a/src/api/internal/io/BamPipe_p.cpp b/src/api/internal/io/BamPipe_p.cpp

new file mode 100644 (file)

index 0000000..2d571fd
--- /dev/null
+++ b/src/api/internal/io/BamPipe_p.cpp
@@ -0,0 +1,61 @@
+// ***************************************************************************
+// BamPipe_p.cpp (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 November 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides BAM pipe-specific IO behavior
+// ***************************************************************************
+
+#include "api/internal/io/BamPipe_p.h"
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+#include <cstdio>
+#include <iostream>
+using namespace std;
+
+BamPipe::BamPipe(void) : ILocalIODevice() { }
+
+BamPipe::~BamPipe(void) { }
+
+bool BamPipe::IsRandomAccess(void) const {
+    return false;
+}
+
+bool BamPipe::Open(const IBamIODevice::OpenMode mode) {
+
+    // make sure we're starting with a fresh pipe
+    Close();
+
+    // open stdin/stdout depending on requested openmode
+    if ( mode == IBamIODevice::ReadOnly )
+        m_stream = freopen(0, "rb", stdin);
+    else if ( mode == IBamIODevice::WriteOnly )
+        m_stream = freopen(0, "wb", stdout);
+    else {
+        const string errorType = string( (mode == IBamIODevice::ReadWrite) ? "unsupported"
+                                                                           : "unknown" );
+        const string message = errorType + " open mode requested";
+        SetErrorString("BamPipe::Open", message);
+        return false;
+    }
+
+    // check that we obtained a valid FILE*
+    if ( m_stream == 0 ) {
+        const string message_base = string("could not open handle on ");
+        const string message = message_base + ( (mode == IBamIODevice::ReadOnly) ? "stdin"
+                                                                                 : "stdout" );
+        SetErrorString("BamPipe::Open", message);
+        return false;
+    }
+
+    // store current IO mode & return success
+    m_mode = mode;
+    return true;
+}
+
+bool BamPipe::Seek(const int64_t&, const int) {
+    SetErrorString("BamPipe::Seek", "random access not allowed in FIFO pipe");
+    return false;
+}
diff --git a/src/api/internal/io/BamPipe_p.h b/src/api/internal/io/BamPipe_p.h

new file mode 100644 (file)

index 0000000..1a95cc7
--- /dev/null
+++ b/src/api/internal/io/BamPipe_p.h
@@ -0,0 +1,46 @@
+// ***************************************************************************
+// BamPipe_p.h (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 November 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides BAM pipe-specific IO behavior
+// ***************************************************************************
+
+#ifndef BAMPIPE_P_H
+#define BAMPIPE_P_H
+
+//  -------------
+//  W A R N I N G
+//  -------------
+//
+// This file is not part of the BamTools API.  It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include "api/internal/io/ILocalIODevice_p.h"
+#include <string>
+
+namespace BamTools {
+namespace Internal {
+
+class BamPipe : public ILocalIODevice {
+
+    // ctor & dtor
+    public:
+        BamPipe(void);
+        ~BamPipe(void);
+
+    // IBamIODevice implementation
+    public:
+        bool IsRandomAccess(void) const;
+        bool Open(const IBamIODevice::OpenMode mode);
+        bool Seek(const int64_t& position, const int origin = SEEK_SET);
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // BAMPIPE_P_H
diff --git a/src/api/internal/io/BgzfStream_p.cpp b/src/api/internal/io/BgzfStream_p.cpp

new file mode 100644 (file)

index 0000000..7f73d67
--- /dev/null
+++ b/src/api/internal/io/BgzfStream_p.cpp
@@ -0,0 +1,470 @@
+// ***************************************************************************
+// BgzfStream_p.cpp (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 25 October 2011(DB)
+// ---------------------------------------------------------------------------
+// Based on BGZF routines developed at the Broad Institute.
+// Provides the basic functionality for reading & writing BGZF files
+// Replaces the old BGZF.* files to avoid clashing with other toolkits
+// ***************************************************************************
+
+#include "api/BamAux.h"
+#include "api/BamConstants.h"
+#include "api/internal/io/BamDeviceFactory_p.h"
+#include "api/internal/io/BgzfStream_p.h"
+#include "api/internal/utils/BamException_p.h"
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+#include "zlib.h"
+
+#include <cstring>
+#include <algorithm>
+#include <iostream>
+#include <sstream>
+using namespace std;
+
+// ---------------------------
+// BgzfStream implementation
+// ---------------------------
+
+// constructor
+BgzfStream::BgzfStream(void)
+  : m_blockLength(0)
+  , m_blockOffset(0)
+  , m_blockAddress(0)
+  , m_isWriteCompressed(true)
+  , m_device(0)
+  , m_uncompressedBlock(Constants::BGZF_DEFAULT_BLOCK_SIZE)
+  , m_compressedBlock(Constants::BGZF_MAX_BLOCK_SIZE)
+{ }
+
+// destructor
+BgzfStream::~BgzfStream(void) {
+    Close();
+}
+
+// checks BGZF block header
+bool BgzfStream::CheckBlockHeader(char* header) {
+    return (header[0] == Constants::GZIP_ID1 &&
+            header[1] == Constants::GZIP_ID2 &&
+            header[2] == Z_DEFLATED &&
+            (header[3] & Constants::FLG_FEXTRA) != 0 &&
+            BamTools::UnpackUnsignedShort(&header[10]) == Constants::BGZF_XLEN &&
+            header[12] == Constants::BGZF_ID1 &&
+            header[13] == Constants::BGZF_ID2 &&
+            BamTools::UnpackUnsignedShort(&header[14]) == Constants::BGZF_LEN );
+}
+
+// closes BGZF file
+void BgzfStream::Close(void) {
+
+    // skip if no device open
+    if ( m_device == 0 ) return;
+
+    // if writing to file, flush the current BGZF block,
+    // then write an empty block (as EOF marker)
+    if ( m_device->IsOpen() && (m_device->Mode() == IBamIODevice::WriteOnly) ) {
+        FlushBlock();
+        const size_t blockLength = DeflateBlock();
+        m_device->Write(m_compressedBlock.Buffer, blockLength);
+    }
+
+    // close device
+    m_device->Close();
+    delete m_device;
+    m_device = 0;
+
+    // ensure our buffers are cleared out
+    m_uncompressedBlock.Clear();
+    m_compressedBlock.Clear();
+
+    // reset state
+    m_blockLength = 0;
+    m_blockOffset = 0;
+    m_blockAddress = 0;
+    m_isWriteCompressed = true;
+}
+
+// compresses the current block
+size_t BgzfStream::DeflateBlock(void) {
+
+    // initialize the gzip header
+    char* buffer = m_compressedBlock.Buffer;
+    memset(buffer, 0, 18);
+    buffer[0]  = Constants::GZIP_ID1;
+    buffer[1]  = Constants::GZIP_ID2;
+    buffer[2]  = Constants::CM_DEFLATE;
+    buffer[3]  = Constants::FLG_FEXTRA;
+    buffer[9]  = Constants::OS_UNKNOWN;
+    buffer[10] = Constants::BGZF_XLEN;
+    buffer[12] = Constants::BGZF_ID1;
+    buffer[13] = Constants::BGZF_ID2;
+    buffer[14] = Constants::BGZF_LEN;
+
+    // set compression level
+    const int compressionLevel = ( m_isWriteCompressed ? Z_DEFAULT_COMPRESSION : 0 );
+
+    // loop to retry for blocks that do not compress enough
+    int inputLength = m_blockOffset;
+    size_t compressedLength = 0;
+    const unsigned int bufferSize = Constants::BGZF_MAX_BLOCK_SIZE;
+
+    while ( true ) {
+
+        // initialize zstream values
+        z_stream zs;
+        zs.zalloc    = NULL;
+        zs.zfree     = NULL;
+        zs.next_in   = (Bytef*)m_uncompressedBlock.Buffer;
+        zs.avail_in  = inputLength;
+        zs.next_out  = (Bytef*)&buffer[Constants::BGZF_BLOCK_HEADER_LENGTH];
+        zs.avail_out = bufferSize -
+                       Constants::BGZF_BLOCK_HEADER_LENGTH -
+                       Constants::BGZF_BLOCK_FOOTER_LENGTH;
+
+        // initialize the zlib compression algorithm
+        int status = deflateInit2(&zs,
+                                  compressionLevel,
+                                  Z_DEFLATED,
+                                  Constants::GZIP_WINDOW_BITS,
+                                  Constants::Z_DEFAULT_MEM_LEVEL,
+                                  Z_DEFAULT_STRATEGY);
+        if ( status != Z_OK )
+            throw BamException("BgzfStream::DeflateBlock", "zlib deflateInit2 failed");
+
+        // compress the data
+        status = deflate(&zs, Z_FINISH);
+
+        // if not at stream end
+        if ( status != Z_STREAM_END ) {
+
+            deflateEnd(&zs);
+
+            // there was not enough space available in buffer
+            // try to reduce the input length & re-start loop
+            if ( status == Z_OK ) {
+                inputLength -= 1024;
+                if ( inputLength < 0 )
+                    throw BamException("BgzfStream::DeflateBlock", "input reduction failed");
+                continue;
+            }
+
+            throw BamException("BgzfStream::DeflateBlock", "zlib deflate failed");
+        }
+
+        // finalize the compression routine
+        status = deflateEnd(&zs);
+        if ( status != Z_OK )
+            throw BamException("BgzfStream::DeflateBlock", "zlib deflateEnd failed");
+
+        // update compressedLength
+        compressedLength = zs.total_out +
+                           Constants::BGZF_BLOCK_HEADER_LENGTH +
+                           Constants::BGZF_BLOCK_FOOTER_LENGTH;
+        if ( compressedLength > Constants::BGZF_MAX_BLOCK_SIZE )
+            throw BamException("BgzfStream::DeflateBlock", "deflate overflow");
+
+        // quit while loop
+        break;
+    }
+
+    // store the compressed length
+    BamTools::PackUnsignedShort(&buffer[16], static_cast<uint16_t>(compressedLength - 1));
+
+    // store the CRC32 checksum
+    uint32_t crc = crc32(0, NULL, 0);
+    crc = crc32(crc, (Bytef*)m_uncompressedBlock.Buffer, inputLength);
+    BamTools::PackUnsignedInt(&buffer[compressedLength - 8], crc);
+    BamTools::PackUnsignedInt(&buffer[compressedLength - 4], inputLength);
+
+    // ensure that we have less than a block of data left
+    int remaining = m_blockOffset - inputLength;
+    if ( remaining > 0 ) {
+        if ( remaining > inputLength )
+            throw BamException("BgzfStream::DeflateBlock", "after deflate, remainder too large");
+        memcpy(m_uncompressedBlock.Buffer, m_uncompressedBlock.Buffer + inputLength, remaining);
+    }
+
+    // update block data
+    m_blockOffset = remaining;
+
+    // return result
+    return compressedLength;
+}
+
+// flushes the data in the BGZF block
+void BgzfStream::FlushBlock(void) {
+
+    BT_ASSERT_X( m_device, "BgzfStream::FlushBlock() - attempting to flush to null device" );
+
+    // flush all of the remaining blocks
+    while ( m_blockOffset > 0 ) {
+
+        // compress the data block
+        const size_t blockLength = DeflateBlock();
+
+        // flush the data to our output device
+        const int64_t numBytesWritten = m_device->Write(m_compressedBlock.Buffer, blockLength);
+
+        // check for device error
+        if ( numBytesWritten < 0 ) {
+            const string message = string("device error: ") + m_device->GetErrorString();
+            throw BamException("BgzfStream::FlushBlock", message);
+        }
+
+        // check that we wrote expected numBytes
+        if ( numBytesWritten != static_cast<int64_t>(blockLength) ) {
+            stringstream s("");
+            s << "expected to write " << blockLength
+              << " bytes during flushing, but wrote " << numBytesWritten;
+            throw BamException("BgzfStream::FlushBlock", s.str());
+        }
+
+        // update block data
+        m_blockAddress += blockLength;
+    }
+}
+
+// decompresses the current block
+size_t BgzfStream::InflateBlock(const size_t& blockLength) {
+
+    // setup zlib stream object
+    z_stream zs;
+    zs.zalloc    = NULL;
+    zs.zfree     = NULL;
+    zs.next_in   = (Bytef*)m_compressedBlock.Buffer + 18;
+    zs.avail_in  = blockLength - 16;
+    zs.next_out  = (Bytef*)m_uncompressedBlock.Buffer;
+    zs.avail_out = Constants::BGZF_DEFAULT_BLOCK_SIZE;
+
+    // initialize
+    int status = inflateInit2(&zs, Constants::GZIP_WINDOW_BITS);
+    if ( status != Z_OK )
+        throw BamException("BgzfStream::InflateBlock", "zlib inflateInit failed");
+
+    // decompress
+    status = inflate(&zs, Z_FINISH);
+    if ( status != Z_STREAM_END ) {
+        inflateEnd(&zs);
+        throw BamException("BgzfStream::InflateBlock", "zlib inflate failed");
+    }
+
+    // finalize
+    status = inflateEnd(&zs);
+    if ( status != Z_OK ) {
+        inflateEnd(&zs);
+        throw BamException("BgzfStream::InflateBlock", "zlib inflateEnd failed");
+    }
+
+    // return result
+    return zs.total_out;
+}
+
+bool BgzfStream::IsOpen(void) const {
+    if ( m_device == 0 )
+        return false;
+    return m_device->IsOpen();
+}
+
+void BgzfStream::Open(const string& filename, const IBamIODevice::OpenMode mode) {
+
+    // close current device if necessary
+    Close();
+    BT_ASSERT_X( (m_device == 0), "BgzfStream::Open() - unable to properly close previous IO device" );
+
+    // retrieve new IO device depending on filename
+    m_device = BamDeviceFactory::CreateDevice(filename);
+    BT_ASSERT_X( m_device, "BgzfStream::Open() - unable to create IO device from filename" );
+
+    // if device fails to open
+    if ( !m_device->Open(mode) ) {
+        const string deviceError = m_device->GetErrorString();
+        const string message = string("could not open BGZF stream: \n\t") + deviceError;
+        throw BamException("BgzfStream::Open", message);
+    }
+}
+
+// reads BGZF data into a byte buffer
+size_t BgzfStream::Read(char* data, const size_t dataLength) {
+
+    if ( dataLength == 0 )
+        return 0;
+
+    // if stream not open for reading
+    BT_ASSERT_X( m_device, "BgzfStream::Read() - trying to read from null device");
+    if ( !m_device->IsOpen() || (m_device->Mode() != IBamIODevice::ReadOnly) )
+        return 0;
+
+    // read blocks as needed until desired data length is retrieved
+    char* output = data;
+    size_t numBytesRead = 0;
+    while ( numBytesRead < dataLength ) {
+
+        // determine bytes available in current block
+        int bytesAvailable = m_blockLength - m_blockOffset;
+
+        // read (and decompress) next block if needed
+        if ( bytesAvailable <= 0 ) {
+            ReadBlock();
+            bytesAvailable = m_blockLength - m_blockOffset;
+            if ( bytesAvailable <= 0 )
+                break;
+        }
+
+        // copy data from uncompressed source buffer into data destination buffer
+        const size_t copyLength = min( (dataLength-numBytesRead), (size_t)bytesAvailable );
+        memcpy(output, m_uncompressedBlock.Buffer + m_blockOffset, copyLength);
+
+        // update counters
+        m_blockOffset += copyLength;
+        output        += copyLength;
+        numBytesRead  += copyLength;
+    }
+
+    // update block data
+    if ( m_blockOffset == m_blockLength ) {
+        m_blockAddress = m_device->Tell();
+        m_blockOffset  = 0;
+        m_blockLength  = 0;
+
+    }
+
+    // return actual number of bytes read
+    return numBytesRead;
+}
+
+// reads a BGZF block
+void BgzfStream::ReadBlock(void) {
+
+    BT_ASSERT_X( m_device, "BgzfStream::ReadBlock() - trying to read from null IO device");
+
+    // store block's starting address
+    int64_t blockAddress = m_device->Tell();
+
+    // read block header from file
+    char header[Constants::BGZF_BLOCK_HEADER_LENGTH];
+    int64_t numBytesRead = m_device->Read(header, Constants::BGZF_BLOCK_HEADER_LENGTH);
+
+    // check for device error
+    if ( numBytesRead < 0 ) {
+        const string message = string("device error: ") + m_device->GetErrorString();
+        throw BamException("BgzfStream::ReadBlock", message);
+    }
+
+    // if block header empty
+    if ( numBytesRead == 0 ) {
+        m_blockLength = 0;
+        return;
+    }
+
+    // if block header invalid size
+    if ( numBytesRead != static_cast<int8_t>(Constants::BGZF_BLOCK_HEADER_LENGTH) )
+        throw BamException("BgzfStream::ReadBlock", "invalid block header size");
+
+    // validate block header contents
+    if ( !BgzfStream::CheckBlockHeader(header) )
+        throw BamException("BgzfStream::ReadBlock", "invalid block header contents");
+
+    // copy header contents to compressed buffer
+    const size_t blockLength = BamTools::UnpackUnsignedShort(&header[16]) + 1;
+    memcpy(m_compressedBlock.Buffer, header, Constants::BGZF_BLOCK_HEADER_LENGTH);
+
+    // read remainder of block
+    const size_t remaining = blockLength - Constants::BGZF_BLOCK_HEADER_LENGTH;
+    numBytesRead = m_device->Read(&m_compressedBlock.Buffer[Constants::BGZF_BLOCK_HEADER_LENGTH], remaining);
+
+    // check for device error
+    if ( numBytesRead < 0 ) {
+        const string message = string("device error: ") + m_device->GetErrorString();
+        throw BamException("BgzfStream::ReadBlock", message);
+    }
+
+    // check that we read in expected numBytes
+    if ( numBytesRead != static_cast<int64_t>(remaining) )
+        throw BamException("BgzfStream::ReadBlock", "could not read data from block");
+
+    // decompress block data
+    const size_t newBlockLength = InflateBlock(blockLength);
+
+    // update block data
+    if ( m_blockLength != 0 )
+        m_blockOffset = 0;
+    m_blockAddress = blockAddress;
+    m_blockLength  = newBlockLength;
+}
+
+// seek to position in BGZF file
+void BgzfStream::Seek(const int64_t& position) {
+
+    BT_ASSERT_X( m_device, "BgzfStream::Seek() - trying to seek on null IO device");
+
+    // skip if device is not open
+    if ( !IsOpen() ) return;
+
+    // determine adjusted offset & address
+    int     blockOffset  = (position & 0xFFFF);
+    int64_t blockAddress = (position >> 16) & 0xFFFFFFFFFFFFLL;
+
+    // attempt seek in file
+    if ( m_device->IsRandomAccess() && m_device->Seek(blockAddress) ) {
+
+        // update block data & return success
+        m_blockLength  = 0;
+        m_blockAddress = blockAddress;
+        m_blockOffset  = blockOffset;
+    }
+    else {
+        stringstream s("");
+        s << "unable to seek to position: " << position;
+        throw BamException("BgzfStream::Seek", s.str());
+    }
+}
+
+void BgzfStream::SetWriteCompressed(bool ok) {
+    m_isWriteCompressed = ok;
+}
+
+// get file position in BGZF file
+int64_t BgzfStream::Tell(void) const {
+    if ( !IsOpen() )
+        return 0;
+    return ( (m_blockAddress << 16) | (m_blockOffset & 0xFFFF) );
+}
+
+// writes the supplied data into the BGZF buffer
+size_t BgzfStream::Write(const char* data, const size_t dataLength) {
+
+    BT_ASSERT_X( m_device, "BgzfStream::Write() - trying to write to null IO device");
+    BT_ASSERT_X( (m_device->Mode() == IBamIODevice::WriteOnly),
+                 "BgzfStream::Write() - trying to write to non-writable IO device");
+
+    // skip if file not open for writing
+    if ( !IsOpen() )
+        return 0;
+
+    // write blocks as needed til all data is written
+    size_t numBytesWritten = 0;
+    const char* input = data;
+    const size_t blockLength = Constants::BGZF_DEFAULT_BLOCK_SIZE;
+    while ( numBytesWritten < dataLength ) {
+
+        // copy data contents to uncompressed output buffer
+        unsigned int copyLength = min(blockLength - m_blockOffset, dataLength - numBytesWritten);
+        char* buffer = m_uncompressedBlock.Buffer;
+        memcpy(buffer + m_blockOffset, input, copyLength);
+
+        // update counter
+        m_blockOffset   += copyLength;
+        input           += copyLength;
+        numBytesWritten += copyLength;
+
+        // flush (& compress) output buffer when full
+        if ( m_blockOffset == blockLength )
+            FlushBlock();
+    }
+
+    // return actual number of bytes written
+    return numBytesWritten;
+}
diff --git a/src/api/internal/io/BgzfStream_p.h b/src/api/internal/io/BgzfStream_p.h

new file mode 100644 (file)

index 0000000..47b3609
--- /dev/null
+++ b/src/api/internal/io/BgzfStream_p.h
@@ -0,0 +1,93 @@
+// ***************************************************************************
+// BgzfStream_p.h (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 25 October 2011(DB)
+// ---------------------------------------------------------------------------
+// Based on BGZF routines developed at the Broad Institute.
+// Provides the basic functionality for reading & writing BGZF files
+// Replaces the old BGZF.* files to avoid clashing with other toolkits
+// ***************************************************************************
+
+#ifndef BGZFSTREAM_P_H
+#define BGZFSTREAM_P_H
+
+//  -------------
+//  W A R N I N G
+//  -------------
+//
+// This file is not part of the BamTools API.  It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include "api/api_global.h"
+#include "api/BamAux.h"
+#include "api/IBamIODevice.h"
+#include <string>
+
+namespace BamTools {
+namespace Internal {
+
+class BgzfStream {
+
+    // constructor & destructor
+    public:
+        BgzfStream(void);
+        ~BgzfStream(void);
+
+    // main interface methods
+    public:
+        // closes BGZF file
+        void Close(void);
+        // returns true if BgzfStream open for IO
+        bool IsOpen(void) const;
+        // opens the BGZF file
+        void Open(const std::string& filename, const IBamIODevice::OpenMode mode);
+        // reads BGZF data into a byte buffer
+        size_t Read(char* data, const size_t dataLength);
+        // seek to position in BGZF file
+        void Seek(const int64_t& position);
+        // sets IO device (closes previous, if any, but does not attempt to open)
+        void SetIODevice(IBamIODevice* device);
+        // enable/disable compressed output
+        void SetWriteCompressed(bool ok);
+        // get file position in BGZF file
+        int64_t Tell(void) const;
+        // writes the supplied data into the BGZF buffer
+        size_t Write(const char* data, const size_t dataLength);
+
+    // internal methods
+    private:
+        // compresses the current block
+        size_t DeflateBlock(void);
+        // flushes the data in the BGZF block
+        void FlushBlock(void);
+        // de-compresses the current block
+        size_t InflateBlock(const size_t& blockLength);
+        // reads a BGZF block
+        void ReadBlock(void);
+
+    // static 'utility' methods
+    public:
+        // checks BGZF block header
+        static bool CheckBlockHeader(char* header);
+
+    // data members
+    public:
+        unsigned int m_blockLength;
+        unsigned int m_blockOffset;
+        uint64_t     m_blockAddress;
+
+        bool m_isWriteCompressed;
+        IBamIODevice* m_device;
+
+        RaiiBuffer m_uncompressedBlock;
+        RaiiBuffer m_compressedBlock;
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // BGZFSTREAM_P_H
diff --git a/src/api/internal/io/ByteArray_p.cpp b/src/api/internal/io/ByteArray_p.cpp

new file mode 100644 (file)

index 0000000..5f54c83
--- /dev/null
+++ b/src/api/internal/io/ByteArray_p.cpp
@@ -0,0 +1,111 @@
+// ***************************************************************************
+// ByteArray_p.cpp (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 November 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides a dynamic, variable-length byte buffer
+// ***************************************************************************
+
+#include "api/internal/io/ByteArray_p.h"
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+#include <cstdlib>
+#include <cstring>
+using namespace std;
+
+// --------------------------
+// ByteArray implementation
+// --------------------------
+
+ByteArray::ByteArray(void)
+    : m_data()
+{ }
+
+ByteArray::ByteArray(const string& value)
+    : m_data(value.begin(), value.end())
+{ }
+
+ByteArray::ByteArray(const vector<char>& value)
+    : m_data(value)
+{ }
+
+ByteArray::ByteArray(const char* value, size_t n) {
+    const string s(value, n);
+    m_data.assign(s.begin(), s.end());
+}
+
+ByteArray::ByteArray(const ByteArray& other)
+    : m_data(other.m_data)
+{ }
+
+ByteArray::~ByteArray(void) { }
+
+ByteArray& ByteArray::operator=(const ByteArray& other) {
+    m_data = other.m_data;
+    return *this;
+}
+
+void ByteArray::Clear(void) {
+    m_data.clear();
+}
+
+const char* ByteArray::ConstData(void) const {
+    return &m_data[0];
+}
+
+char* ByteArray::Data(void) {
+    return &m_data[0];
+}
+
+const char& ByteArray::operator[](size_t i) const {
+    return m_data[i];
+}
+
+char& ByteArray::operator[](size_t i) {
+    return m_data[i];
+}
+
+size_t ByteArray::IndexOf(const char c, const size_t from, const size_t to) const {
+    const size_t size = ( (to == 0 ) ? m_data.size() : to );
+    for ( size_t i = from; i < size; ++i ) {
+        if ( m_data.at(i) == c ) 
+            return i;
+    }
+    return m_data.size();
+}
+
+ByteArray& ByteArray::Remove(size_t from, size_t n) {
+
+    // if 'from' outside range, just return
+    const size_t originalSize = m_data.size();
+    if ( from >= originalSize )
+        return *this;
+
+    // if asked to clip from 'from' to end (or beyond), simply resize
+    if ( from + n >= originalSize )
+        Resize(from);
+
+    // otherwise, shift data & resize
+    else {
+        memmove( &m_data[from], &m_data[from+n], (originalSize-from-n) );
+        Resize(originalSize - n);
+    }
+
+    // return reference to modified byte array
+    return *this;
+}
+
+void ByteArray::Resize(size_t n) {
+    m_data.resize(n, 0);
+}
+
+size_t ByteArray::Size(void) const {
+    return m_data.size();
+}
+
+void ByteArray::Squeeze(void) {
+    vector<char> t(m_data);
+    t.swap(m_data);
+}
diff --git a/src/api/internal/io/ByteArray_p.h b/src/api/internal/io/ByteArray_p.h

new file mode 100644 (file)

index 0000000..7e95f6e
--- /dev/null
+++ b/src/api/internal/io/ByteArray_p.h
@@ -0,0 +1,69 @@
+// ***************************************************************************
+// ByteArray_p.h (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 November 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides a dynamic, variable-length byte buffer
+// ***************************************************************************
+
+#ifndef BYTEARRAY_P_H
+#define BYTEARRAY_P_H
+
+//  -------------
+//  W A R N I N G
+//  -------------
+//
+// This file is not part of the BamTools API.  It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include "api/api_global.h"
+#include <string>
+#include <vector>
+
+namespace BamTools {
+namespace Internal {
+
+// provides a wrapper around a byte vector
+class ByteArray {
+
+    // ctors & dtor
+    public:
+        ByteArray(void);
+        ByteArray(const std::string& value);
+        ByteArray(const std::vector<char>& value);
+        ByteArray(const char* value, size_t n);
+        ByteArray(const ByteArray& other);
+        ~ByteArray(void);
+
+        ByteArray& operator=(const ByteArray& other);
+
+    // ByteArray interface
+    public:
+
+        // data access
+        const char* ConstData(void) const;
+        char* Data(void);
+        const char& operator[](size_t i) const;
+        char& operator[](size_t i);
+
+        // byte array manipulation
+        void Clear(void);
+        size_t IndexOf(const char c, const size_t from = 0, const size_t to = 0) const;
+        ByteArray& Remove(size_t from, size_t n);
+        void Resize(size_t n);
+        size_t Size(void) const;
+        void Squeeze(void);
+
+    // data members
+    private:
+        std::vector<char> m_data;
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // BYTEARRAY_P_H
diff --git a/src/api/internal/io/CMakeLists.txt b/src/api/internal/io/CMakeLists.txt

new file mode 100644 (file)

index 0000000..d9da416
--- /dev/null
+++ b/src/api/internal/io/CMakeLists.txt
@@ -0,0 +1,52 @@
+# ==========================
+# BamTools CMakeLists.txt
+# (c) 2011 Derek Barnett
+#
+# src/api/internal/io
+# ==========================
+
+set ( InternalIODir "${InternalDir}/io" )
+
+#--------------------------
+# platform-independent IO
+#--------------------------
+set ( CommonIOSources
+        ${InternalIODir}/BamDeviceFactory_p.cpp
+        ${InternalIODir}/BamFile_p.cpp
+        ${InternalIODir}/BamFtp_p.cpp
+        ${InternalIODir}/BamHttp_p.cpp
+        ${InternalIODir}/BamPipe_p.cpp
+        ${InternalIODir}/BgzfStream_p.cpp
+        ${InternalIODir}/ByteArray_p.cpp
+        ${InternalIODir}/HostAddress_p.cpp
+        ${InternalIODir}/HostInfo_p.cpp
+        ${InternalIODir}/HttpHeader_p.cpp
+        ${InternalIODir}/ILocalIODevice_p.cpp
+        ${InternalIODir}/RollingBuffer_p.cpp
+        ${InternalIODir}/TcpSocket_p.cpp
+        ${InternalIODir}/TcpSocketEngine_p.cpp
+)
+
+#------------------------
+# platform-dependent IO
+#------------------------
+if ( _WIN32 )
+    set ( PlatformIOSources
+            ${InternalIODir}/TcpSocketEngine_win_p.cpp
+    )
+else ( _WIN32 )
+    set ( PlatformIOSources
+            ${InternalIODir}/TcpSocketEngine_unix_p.cpp
+    )
+endif ( _WIN32 )
+
+#---------------------------
+# make build-specific list
+#---------------------------
+set ( InternalIOSources 
+        ${CommonIOSources} 
+        ${PlatformIOSources} 
+
+        PARENT_SCOPE # <-- leave this last
+)
+
diff --git a/src/api/internal/io/HostAddress_p.cpp b/src/api/internal/io/HostAddress_p.cpp

new file mode 100644 (file)

index 0000000..873087b
--- /dev/null
+++ b/src/api/internal/io/HostAddress_p.cpp
@@ -0,0 +1,396 @@
+// ***************************************************************************
+// HostAddress_p.cpp (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 November 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides a generic IP address container
+// ***************************************************************************
+
+#include "api/internal/io/HostAddress_p.h"
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+#include <cctype>
+#include <cstdlib>
+#include <sstream>
+#include <vector>
+using namespace std;
+
+// ------------------------
+// static utility methods
+// ------------------------
+
+namespace BamTools {
+namespace Internal {
+
+// split a string into fields, on delimiter character
+static inline
+vector<string> Split(const string& source, char delim) {
+    stringstream ss(source);
+    string field;
+    vector<string> fields;
+    while ( getline(ss, field, delim) )
+        fields.push_back(field);
+    return fields;
+}
+
+// return number of occurrences of @pattern in @source
+static inline
+uint8_t CountHits(const string& source, const string& pattern) {
+
+    uint8_t count(0);
+    size_t found = source.find(pattern);
+    while ( found != string::npos ) {
+        ++count;
+        found = source.find(pattern, found+1);
+    }
+    return count;
+}
+
+static
+bool ParseIp4(const string& address, uint32_t& maybeIp4 ) {
+
+    // split IP address into string fields
+    vector<string> addressFields = Split(address, '.');
+    if ( addressFields.size() != 4 )
+        return false;
+
+    // convert each field to integer value
+    uint32_t ipv4(0);
+    for ( uint8_t i = 0; i < 4; ++i ) {
+
+        const string& field = addressFields.at(i);
+        const size_t fieldSize = field.size();
+        for ( size_t j = 0; j < fieldSize; ++j ) {
+            if ( !isdigit(field[j]) )
+                return false;
+        }
+
+        int value = atoi( addressFields.at(i).c_str() );
+        if ( value < 0 || value > 255 )
+            return false;
+
+        // append byte value
+        ipv4 <<= 8;
+        ipv4 += value;
+    }
+
+    // store 32-bit IP address & return success
+    maybeIp4 = ipv4;
+    return true;
+}
+
+static
+bool ParseIp6(const string& address, uint8_t* maybeIp6 ) {
+
+    string tmp = address;
+
+    // look for '%' char (if found, lop off that part of address)
+    // we're going to ignore any link-local zone index, for now at least
+    const size_t percentFound = tmp.rfind('%');
+    if ( percentFound != string::npos )
+        tmp = tmp.substr(0, percentFound);
+
+    // split IP address into string fields
+    vector<string> fields = Split(tmp, ':');
+    const uint8_t numFields = fields.size();
+    if ( numFields < 3 || numFields > 8 )
+        return false;
+
+    // get number of '::' separators
+    const uint8_t numColonColons = CountHits(tmp, "::");
+    if ( numFields == 8 && numColonColons > 1 )
+        return false;
+
+    // check valid IPv6 'compression'
+    // must be valid 'pure' IPv6 or mixed IPv4/6 notation
+    const size_t dotFound = tmp.find('.');
+    const bool isMixed = ( dotFound != string::npos );
+    if ( numColonColons != 1 && (numFields < (isMixed ? 7 : 8)) )
+        return false;
+
+    // iterate over provided fields
+    size_t index = 16;
+    size_t fillCount = 9 - numFields;
+    for ( int8_t i = numFields - 1; i >= 0; --i ) {
+        if ( index == 0 )
+            return false;
+        const string& field = fields.at(i);
+
+        // if field empty
+        if ( field.empty() ) {
+
+            // if last field empty
+            if ( i == numFields - 1 ) {
+                const string& previousField = fields.at(i-1);
+                if ( previousField.empty() )
+                    return false;
+                maybeIp6[--index] = 0;
+                maybeIp6[--index] = 0;
+            }
+
+            // if first field empty
+            else if ( i == 0 ) {
+                // make sure ':' isn't first character
+                const string& nextField = fields.at(i+1);
+                if ( nextField.empty() ) return false;
+                maybeIp6[--index] = 0;
+                maybeIp6[--index] = 0;
+            }
+
+            // fill in 'compressed' 0s
+            else {
+                for ( uint8_t j = 0; j < fillCount; ++j ) {
+                    if ( index == 0 ) return false;
+                    maybeIp6[--index] = 0;
+                    maybeIp6[--index] = 0;
+                }
+            }
+        }
+
+        // field has data
+        else {
+            uint32_t value = static_cast<uint32_t>( strtoul(field.c_str(), 0, 16) );
+
+            if ( value <= 0xffff ) {
+                maybeIp6[--index] =  value       & 0xff;
+                maybeIp6[--index] = (value >> 8) & 0xff;
+            }
+
+            // possible mixed IPv4/6 notation
+            else {
+
+                // mixed field must be last
+                if ( i != numFields - 1 )
+                    return false;
+
+                // parse the IPv4 section
+                uint32_t maybeIp4;
+                if ( !ParseIp4(field, maybeIp4) )
+                    return false;
+
+                // store IPv4 fields in IPv6 container
+                maybeIp6[--index] =  maybeIp4        & 0xff;
+                maybeIp6[--index] = (maybeIp4 >> 8)  & 0xff;
+                maybeIp6[--index] = (maybeIp4 >> 16) & 0xff;
+                maybeIp6[--index] = (maybeIp4 >> 24) & 0xff;
+                --fillCount;
+            }
+        }
+    }
+
+    // should have parsed OK, return success
+    return true;
+}
+
+} // namespace Internal
+} // namespace BamTools
+
+// ----------------------------
+// HostAddress implementation
+// ----------------------------
+
+HostAddress::HostAddress(void)
+    : m_protocol(HostAddress::UnknownNetworkProtocol)
+    , m_ip4Address(0)
+    , m_hasIpAddress(true)
+{ }
+
+HostAddress::HostAddress(const uint32_t ip4Address)
+    : m_protocol(HostAddress::UnknownNetworkProtocol)
+    , m_ip4Address(0)
+    , m_hasIpAddress(true)
+{
+    SetAddress(ip4Address);
+}
+
+HostAddress::HostAddress(const uint8_t* ip6Address)
+    : m_protocol(HostAddress::UnknownNetworkProtocol)
+    , m_ip4Address(0)
+    , m_hasIpAddress(true)
+{
+    SetAddress(ip6Address);
+}
+
+HostAddress::HostAddress(const IPv6Address& ip6Address)
+    : m_protocol(HostAddress::UnknownNetworkProtocol)
+    , m_ip4Address(0)
+    , m_hasIpAddress(true)
+{
+    SetAddress(ip6Address);
+}
+
+HostAddress::HostAddress(const std::string& address)
+    : m_protocol(HostAddress::UnknownNetworkProtocol)
+    , m_ip4Address(0)
+{
+    SetAddress(address);
+}
+
+HostAddress::HostAddress(const HostAddress& other)
+    : m_protocol(other.m_protocol)
+    , m_ip4Address(other.m_ip4Address)
+    , m_ip6Address(other.m_ip6Address)
+    , m_ipString(other.m_ipString)
+    , m_hasIpAddress(other.m_hasIpAddress)
+{ }
+
+HostAddress::~HostAddress(void) { }
+
+bool HostAddress::operator==(const HostAddress& other) const {
+
+    // if self is IPv4
+    if ( m_protocol == HostAddress::IPv4Protocol ) {
+        return ( other.m_protocol == HostAddress::IPv4Protocol &&
+                 m_ip4Address == other.m_ip4Address
+               );
+    }
+
+    // if self is IPv6
+    else if ( m_protocol == HostAddress::IPv6Protocol ) {
+        return ( other.m_protocol == HostAddress::IPv6Protocol &&
+                 memcmp(&m_ip6Address, &other.m_ip6Address, sizeof(IPv6Address)) == 0
+               );
+    }
+
+    // otherwise compare protocols
+    else return m_protocol == other.m_protocol;
+}
+
+bool HostAddress::operator<(const HostAddress& other) const {
+
+    // if self is IPv4
+    if ( m_protocol == HostAddress::IPv4Protocol ) {
+        if ( other.m_protocol == HostAddress::IPv4Protocol )
+            return m_ip4Address < m_ip4Address;
+    }
+
+    // if self is IPv6
+    else if ( m_protocol == HostAddress::IPv6Protocol ) {
+        if ( other.m_protocol == HostAddress::IPv6Protocol )
+            return (memcmp(&m_ip6Address, &other.m_ip6Address, sizeof(IPv6Address)) < 0);
+    }
+
+    // otherwise compare protocol types
+    return m_protocol < other.m_protocol;
+}
+
+void HostAddress::Clear(void) {
+
+    m_protocol = HostAddress::UnknownNetworkProtocol;
+    m_ip4Address = 0;
+    memset(&m_ip6Address, 0, sizeof(IPv6Address));
+    m_ipString.clear();
+
+    // this may feel funny, but cleared IP (equivalent to '0.0.0.0') is technically valid
+    // and that's not really what this flag is checking anyway
+    //
+    // this flag is false *iff* the string passed in is a 'plain-text' hostname (www.foo.bar)
+    m_hasIpAddress = true;
+}
+
+bool HostAddress::HasIPAddress(void) const {
+    return m_hasIpAddress;
+}
+
+bool HostAddress::IsNull(void) const {
+    return m_protocol == HostAddress::UnknownNetworkProtocol;
+}
+
+uint32_t HostAddress::GetIPv4Address(void) const {
+    return m_ip4Address;
+}
+
+IPv6Address HostAddress::GetIPv6Address(void) const {
+    return m_ip6Address;
+}
+
+std::string HostAddress::GetIPString(void) const {
+
+    stringstream ss("");
+
+    // IPv4 format
+    if ( m_protocol == HostAddress::IPv4Protocol ) {
+        ss << ( (m_ip4Address>>24) & 0xff ) << '.'
+           << ( (m_ip4Address>>16) & 0xff ) << '.'
+           << ( (m_ip4Address>> 8) & 0xff ) << '.'
+           << (  m_ip4Address      & 0xff );
+
+    }
+
+    // IPv6 format
+    else if ( m_protocol == HostAddress::IPv6Protocol ) {
+        for ( uint8_t i = 0; i < 8; ++i ) {
+            if ( i != 0 )
+                ss << ':';
+                ss << hex << ( (uint16_t(m_ip6Address[2*i]) << 8) |
+                               (uint16_t(m_ip6Address[2*i+1]))
+                             );
+        }
+    }
+
+    // return result (empty string if unknown protocol)
+    return ss.str();
+}
+
+HostAddress::NetworkProtocol HostAddress::GetProtocol(void) const {
+    return m_protocol;
+}
+
+bool HostAddress::ParseAddress(void) {
+
+    // all IPv6 addresses should have a ':'
+    string s = m_ipString;
+    size_t found = s.find(':');
+    if ( found != string::npos ) {
+        // try parse IP6 address
+        uint8_t maybeIp6[16];
+        if ( ParseIp6(s, maybeIp6) ) {
+            SetAddress(maybeIp6);
+            m_protocol = HostAddress::IPv6Protocol;
+            return true;
+        }
+    }
+
+    // all IPv4 addresses should have a '.'
+    found = s.find('.');
+    if ( found != string::npos ) {
+        uint32_t maybeIp4(0);
+        if ( ParseIp4(s, maybeIp4) ) {
+            SetAddress(maybeIp4);
+            m_protocol = HostAddress::IPv4Protocol;
+            return true;
+        }
+    }
+
+    // else likely just a plain-text host name "www.foo.bar"
+    // will need to look up IP address info later
+    m_protocol = HostAddress::UnknownNetworkProtocol;
+    return false;
+}
+
+void HostAddress::SetAddress(const uint32_t ip4Address) {
+    m_ip4Address = ip4Address;
+    m_protocol = HostAddress::IPv4Protocol;
+    m_hasIpAddress = true;
+}
+
+void HostAddress::SetAddress(const uint8_t* ip6Address) {
+    for ( uint8_t i = 0; i < 16; ++i )
+        m_ip6Address[i] = ip6Address[i];
+    m_protocol = HostAddress::IPv6Protocol;
+    m_hasIpAddress = true;
+}
+
+void HostAddress::SetAddress(const IPv6Address& ip6Address) {
+    m_ip6Address = ip6Address;
+    m_ip4Address = 0;
+    m_protocol = HostAddress::IPv6Protocol;
+    m_hasIpAddress = true;
+}
+
+void HostAddress::SetAddress(const std::string& address) {
+    m_ipString = address;
+    m_hasIpAddress = ParseAddress();
+}
diff --git a/src/api/internal/io/HostAddress_p.h b/src/api/internal/io/HostAddress_p.h

new file mode 100644 (file)

index 0000000..4c1b360
--- /dev/null
+++ b/src/api/internal/io/HostAddress_p.h
@@ -0,0 +1,100 @@
+// ***************************************************************************
+// HostAddress_p.h (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 November 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides a generic IP address container
+// ***************************************************************************
+
+#ifndef HOSTADDRESS_P_H
+#define HOSTADDRESS_P_H
+
+//  -------------
+//  W A R N I N G
+//  -------------
+//
+// This file is not part of the BamTools API.  It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include "api/api_global.h"
+#include <cstring>
+#include <string>
+
+namespace BamTools {
+namespace Internal {
+
+struct IPv6Address {
+
+    // ctor
+    inline IPv6Address(void) { memset(&data, 0, sizeof(uint8_t)*16); }
+
+    // data access (no bounds checking)
+    inline uint8_t& operator[](size_t index)       { return data[index]; }
+    inline uint8_t  operator[](size_t index) const { return data[index]; }
+
+    // data
+    uint8_t data[16];
+};
+
+class HostAddress {
+
+    // enums
+    public:
+        enum NetworkProtocol { UnknownNetworkProtocol = -1
+                             , IPv4Protocol = 0
+                             , IPv6Protocol
+                             };
+
+    // ctors & dtor
+    public:
+        HostAddress(void);
+        explicit HostAddress(const uint32_t ip4Address);
+        explicit HostAddress(const uint8_t* ip6Address);
+        explicit HostAddress(const IPv6Address& ip6Address);
+        explicit HostAddress(const std::string& address);
+        HostAddress(const HostAddress& other);
+        ~HostAddress(void);
+
+    // HostAddress interface
+    public:
+        void Clear(void);
+        bool HasIPAddress(void) const; // returns whether string address could be converted to IP address
+        bool IsNull(void) const;
+
+        uint32_t    GetIPv4Address(void) const;
+        IPv6Address GetIPv6Address(void) const;
+        std::string GetIPString(void) const;
+        HostAddress::NetworkProtocol GetProtocol(void) const;
+
+        void SetAddress(const uint32_t ip4Address);
+        void SetAddress(const uint8_t* ip6Address);
+        void SetAddress(const IPv6Address& ip6Address);
+        void SetAddress(const std::string& address);
+
+    // HostAddress comparison operators
+    public:
+        bool operator==(const HostAddress& other) const;
+        bool operator!=(const HostAddress& other) const { return !( operator==(other) ); }
+        bool operator<(const HostAddress& other) const;
+
+    // internal methods
+    private:
+        bool ParseAddress(void);
+
+    // data members
+    private:
+        HostAddress::NetworkProtocol m_protocol;
+        uint32_t    m_ip4Address;
+        IPv6Address m_ip6Address;
+        std::string m_ipString;
+        bool        m_hasIpAddress; // true until string passed in, then signifies whether string was an IP
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // HOSTADDRESS_P_H
diff --git a/src/api/internal/io/HostInfo_p.cpp b/src/api/internal/io/HostInfo_p.cpp

new file mode 100644 (file)

index 0000000..80343f1
--- /dev/null
+++ b/src/api/internal/io/HostInfo_p.cpp
@@ -0,0 +1,223 @@
+// ***************************************************************************
+// HostInfo_p.cpp (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 November 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides DNS lookup functionality for hostname & its discovered addresses
+// ***************************************************************************
+
+#include "api/internal/io/HostInfo_p.h"
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+// platorm-specifics
+#ifdef _WIN32
+#  include "api/internal/io/NetWin_p.h"
+#else
+#  include "api/internal/io/NetUnix_p.h"
+#endif
+
+// standard C++ includes
+#include <cstdlib>
+#include <cstring>
+#include <set>
+using namespace std;
+
+// -------------------------
+// HostInfo implementation
+// -------------------------
+
+HostInfo::HostInfo(void)
+    : m_error(HostInfo::NoError)
+{ }
+
+HostInfo::HostInfo(const HostInfo& other)
+    : m_hostName(other.m_hostName)
+    , m_addresses(other.m_addresses)
+    , m_error(other.m_error)
+    , m_errorString(other.m_errorString)
+{ }
+
+HostInfo::~HostInfo(void) { }
+
+vector<HostAddress> HostInfo::Addresses(void) const {
+    return m_addresses;
+}
+
+HostInfo::ErrorType HostInfo::GetError(void) const {
+    return m_error;
+}
+
+string HostInfo::GetErrorString(void) const {
+    return m_errorString;
+}
+
+string HostInfo::HostName(void) const {
+    return m_hostName;
+}
+
+void HostInfo::SetAddresses(const std::vector<HostAddress>& addresses) {
+    m_addresses = addresses;
+}
+
+void HostInfo::SetError(const HostInfo::ErrorType error) {
+    m_error = error;
+}
+
+void HostInfo::SetErrorString(const std::string& errorString) {
+    m_errorString = errorString;
+}
+
+void HostInfo::SetHostName(const string& name) {
+    m_hostName = name;
+}
+
+// ---------------------------------
+// HostInfo::Lookup(host, port)
+//  - the real "heavy-lifter" here
+// ---------------------------------
+
+HostInfo HostInfo::Lookup(const string& hostname, const string& port) {
+
+    HostInfo result;
+    set<HostAddress> uniqueAddresses;
+
+#ifdef _WIN32
+    WindowsSockInit init;
+#endif
+
+    HostAddress address;
+    address.SetAddress(hostname);
+
+    // if hostname is an IP string ('0.0.0.0' or IPv6 format)
+    // do reverse lookup for host domain name
+    //
+    // TODO: might just remove this... not sure if proper 'hostname' from IP string is needed
+    //
+    //       so far, haven't been able to successfully fetch a domain name with reverse DNS
+    //       getnameinfo() on test sites just returns original IP string. BUT this is likely a rare
+    //       case that client code tries to use an IP string and the connection should work fine
+    //       anyway. GetHostName() just won't quite show what I was hoping for. :(
+    if ( address.HasIPAddress() ) {
+
+        const uint16_t portNum = static_cast<uint16_t>( atoi(port.c_str()) );
+
+        sockaddr_in  sa4;
+        sockaddr_in6 sa6;
+        sockaddr* sa = 0;
+        BT_SOCKLEN_T saSize = 0;
+
+        // IPv4
+        if ( address.GetProtocol() == HostAddress::IPv4Protocol ) {
+            sa = (sockaddr*)&sa4;
+            saSize = sizeof(sa4);
+            memset(&sa4, 0, sizeof(sa4));
+            sa4.sin_family = AF_INET;
+            sa4.sin_addr.s_addr = htonl(address.GetIPv4Address());
+            sa4.sin_port = htons(portNum);
+        }
+
+        // IPv6
+        else if ( address.GetProtocol() == HostAddress::IPv4Protocol ){
+            sa = (sockaddr*)&sa6;
+            saSize = sizeof(sa6);
+            memset(&sa6, 0, sizeof(sa6));
+            sa6.sin6_family = AF_INET6;
+            memcpy(sa6.sin6_addr.s6_addr, address.GetIPv6Address().data, sizeof(sa6.sin6_addr.s6_addr));
+            sa6.sin6_port = htons(portNum);
+        }
+
+        // unknown (should be unreachable)
+        else BT_ASSERT_X(false, "HostInfo::Lookup: unknown network protocol");
+
+        // lookup name for IP
+        char hbuf[NI_MAXHOST];
+        char serv[NI_MAXSERV];
+        if ( sa && (getnameinfo(sa, saSize, hbuf, sizeof(hbuf), serv, sizeof(serv), 0) == 0) )
+            result.SetHostName(string(hbuf));
+
+        // if no domain name found, just use the original address's IP string
+        if ( result.HostName().empty() )
+            result.SetHostName(address.GetIPString());
+
+        // store address in HostInfo
+        uniqueAddresses.insert(address);
+    }
+
+    // otherwise, hostname is a domain name ('www.foo.bar')
+    // do 'normal' lookup
+    else {
+
+        // setup address lookup 'hints'
+        addrinfo hints;
+        memset(&hints, 0, sizeof(hints));
+        hints.ai_family   = AF_UNSPEC;   // allow either IPv4 or IPv6
+        hints.ai_socktype = SOCK_STREAM; // for TCP
+        hints.ai_protocol = IPPROTO_TCP;
+
+        // fetch addresses for requested hostname/port
+        addrinfo* res;
+        int status = getaddrinfo(hostname.c_str(), port.c_str(), &hints, &res );
+
+        // if everything OK
+        if ( status == 0 ) {
+
+            // iterate over all IP addresses found
+            addrinfo* p = res;
+            for ( ; p != NULL; p = p->ai_next ) {
+
+                // IPv4
+                if ( p->ai_family == AF_INET ) {
+                    sockaddr_in* ipv4 = (sockaddr_in*)p->ai_addr;
+                    HostAddress a( ntohl(ipv4->sin_addr.s_addr) );
+                    uniqueAddresses.insert(a);
+                }
+
+                // IPv6
+                else if ( p->ai_family == AF_INET6 ) {
+                    sockaddr_in6* ipv6 = (sockaddr_in6*)p->ai_addr;
+                    HostAddress a(ipv6->sin6_addr.s6_addr);
+                    uniqueAddresses.insert(a);
+                }
+            }
+
+            // if we iterated, but no addresses were stored
+            if ( uniqueAddresses.empty() && (p == NULL) ) {
+                result.SetError(HostInfo::UnknownError);
+                result.SetErrorString("HostInfo: unknown address types found");
+            }
+        }
+
+        // handle error cases
+        else if (
+#ifndef _WIN32
+                     status == EAI_NONAME
+                  || status == EAI_FAIL
+#  ifdef EAI_NODATA
+                  || status == EAI_NODATA  // officially deprecated, but just in case we happen to hit it
+#  endif // EAI_NODATA
+
+#else  // _WIN32
+                     WSAGetLastError() == WSAHOST_NOT_FOUND
+                  || WSAGetLastError() == WSANO_DATA
+                  || WSAGetLastError() == WSANO_RECOVERY
+#endif // _WIN32
+                )
+        {
+            result.SetError(HostInfo::HostNotFound);
+            result.SetErrorString("HostInfo: host not found");
+        }
+        else {
+            result.SetError(HostInfo::UnknownError);
+            result.SetErrorString("HostInfo: unknown error encountered");
+        }
+
+        // cleanup
+        freeaddrinfo(res);
+    }
+
+    // store fetched addresses (converting set -> vector) in result & return
+    result.SetAddresses( vector<HostAddress>(uniqueAddresses.begin(), uniqueAddresses.end()) );
+    return result;
+}
diff --git a/src/api/internal/io/HostInfo_p.h b/src/api/internal/io/HostInfo_p.h

new file mode 100644 (file)

index 0000000..ad03d37
--- /dev/null
+++ b/src/api/internal/io/HostInfo_p.h
@@ -0,0 +1,76 @@
+// ***************************************************************************
+// HostInfo_p.h (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 November 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides DNS lookup functionality for hostname/IP addresses
+// ***************************************************************************
+
+#ifndef HOSTINFO_P_H
+#define HOSTINFO_P_H
+
+//  -------------
+//  W A R N I N G
+//  -------------
+//
+// This file is not part of the BamTools API.  It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include "api/internal/io/HostAddress_p.h"
+#include <string>
+#include <vector>
+
+namespace BamTools {
+namespace Internal {
+
+class HostInfo {
+
+    public:
+        enum ErrorType { NoError = 0
+                       , HostNotFound
+                       , UnknownError
+                       };
+
+    // ctors & dtor
+    public:
+        HostInfo(void);
+        HostInfo(const HostInfo& other);
+        ~HostInfo(void);
+
+    // HostInfo interface
+    public:
+        std::string HostName(void) const;
+        void SetHostName(const std::string& name);
+
+        std::vector<HostAddress> Addresses(void) const;
+        void SetAddresses(const std::vector<HostAddress>& addresses);
+
+        HostInfo::ErrorType GetError(void) const;
+        std::string GetErrorString(void) const;
+
+    // internal methods
+    private:
+        void SetError(const HostInfo::ErrorType error);
+        void SetErrorString(const std::string& errorString);
+
+    // static methods
+    public:
+        static HostInfo Lookup(const std::string& hostname,
+                               const std::string& port);
+
+    // data members
+    private:
+        std::string m_hostName;
+        std::vector<HostAddress> m_addresses;
+        HostInfo::ErrorType m_error;
+        std::string m_errorString;
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // HOSTINFO_P_H
diff --git a/src/api/internal/io/HttpHeader_p.cpp b/src/api/internal/io/HttpHeader_p.cpp

new file mode 100644 (file)

index 0000000..c4f78b6
--- /dev/null
+++ b/src/api/internal/io/HttpHeader_p.cpp
@@ -0,0 +1,395 @@
+// ***************************************************************************
+// HttpHeader_p.cpp (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 November 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides a generic interface for parsing/generating HTTP headers, along
+// with specialized request & response header types
+// ***************************************************************************
+
+#include "api/internal/io/HttpHeader_p.h"
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+#include <cstdlib>
+#include <sstream>
+#include <vector>
+using namespace std;
+
+namespace BamTools {
+
+// -----------
+// constants
+// -----------
+
+namespace Constants {
+
+static const char CAR_RET_CHAR = '\r';
+static const char COLON_CHAR   = ':';
+static const char DOT_CHAR     = '.';
+static const char NEWLINE_CHAR = '\n';
+static const char SPACE_CHAR   = ' ';
+static const char TAB_CHAR     = '\t';
+
+static const string FIELD_NEWLINE   = "\r\n";
+static const string FIELD_SEPARATOR = ": ";
+static const string HTTP_STRING     = "HTTP/";
+
+} // namespace Constants
+
+// ------------------------
+// static utility methods
+// ------------------------
+
+namespace Internal {
+
+static inline
+bool IsSpace(const char c) {
+    const int n = static_cast<int>(c);
+    return ( n== 0 || (n <= 13 && n >= 9) );
+}
+
+// split on hitting single char delim
+static vector<string> Split(const string& source, const char delim) {
+    stringstream ss(source);
+    string field;
+    vector<string> fields;
+    while ( getline(ss, field, delim) )
+        fields.push_back(field);
+    return fields;
+}
+
+static string Trim(const string& source) {
+
+    // skip if empty string
+    if ( source.empty() )
+        return source;
+
+    // fetch string data
+    const char*  s    = source.data(); // ignoring null-term on purpose
+    const size_t size = source.size();
+    size_t start = 0;
+    size_t end   = size-1;
+
+    // skip if no spaces at start or end
+    if ( !IsSpace(s[start]) && !IsSpace( s[end] ) )
+        return source;
+
+    // remove leading whitespace
+    while ( (start != end) && IsSpace(s[start]) )
+        ++start;
+
+    // remove trailing whitespace
+    if ( start <= end ) {
+        while ( end && IsSpace(s[end]) )
+            --end;
+    }
+
+    // return result
+    return string(s + start, (end-start) + 1);
+}
+
+} // namespace Internal
+} // namespace BamTools
+
+// ---------------------------
+// HttpHeader implementation
+// ---------------------------
+
+HttpHeader::HttpHeader(void)
+    : m_isValid(true)
+    , m_majorVersion(1)
+    , m_minorVersion(1)
+{ }
+
+HttpHeader::HttpHeader(const string& s)
+    : m_isValid(true)
+    , m_majorVersion(1)
+    , m_minorVersion(1)
+{
+    Parse(s);
+}
+
+HttpHeader::~HttpHeader(void) { }
+
+bool HttpHeader::ContainsKey(const string& key) const {
+    return ( m_fields.find(key) != m_fields.end() );
+}
+
+int HttpHeader::GetMajorVersion(void) const {
+    return m_majorVersion;
+}
+
+int HttpHeader::GetMinorVersion(void) const {
+    return m_minorVersion;
+}
+
+string HttpHeader::GetValue(const string& key) const {
+    if ( ContainsKey(key) )
+        return m_fields.at(key);
+    else return string();
+}
+
+bool HttpHeader::IsValid(void) const {
+    return m_isValid;
+}
+
+void HttpHeader::Parse(const string& s) {
+
+    // trim whitespace from input string
+    const string trimmed = Trim(s);
+
+    // split into list of header lines
+    vector<string> rawFields = Split(trimmed, Constants::NEWLINE_CHAR);
+
+    // prep our 'cleaned' fields container
+    vector<string> cleanFields;
+    cleanFields.reserve(rawFields.size());
+
+    // remove any empty fields and clean any trailing windows-style carriage returns ('\r')
+    vector<string>::iterator rawFieldIter = rawFields.begin();
+    vector<string>::iterator rawFieldEnd  = rawFields.end();
+    for ( ; rawFieldIter != rawFieldEnd; ++rawFieldIter ) {
+        string& field = (*rawFieldIter);
+
+        // skip empty fields
+        if ( field.empty() )
+            continue;
+
+        // remove carriage returns
+        const size_t fieldSize = field.size();
+        if ( field[fieldSize-1] == Constants::CAR_RET_CHAR )
+            field.resize(fieldSize-1);
+
+        // store cleaned field
+        cleanFields.push_back(field);
+    }
+
+    // skip add'l processing if nothing here
+    if ( cleanFields.empty() )
+        return;
+
+    // parse header lines
+    int lineNumber = 0;
+    vector<string>::const_iterator fieldIter = cleanFields.begin();
+    vector<string>::const_iterator fieldEnd  = cleanFields.end();
+    for ( ; fieldIter != fieldEnd; ++fieldIter, ++lineNumber ) {
+        if ( !ParseLine( (*fieldIter), lineNumber ) ) {
+            m_isValid = false;
+            return;
+        }
+    }
+}
+
+bool HttpHeader::ParseLine(const string& line, int) {
+
+    // find colon position, return failure if not found
+    const size_t colonFound = line.find(Constants::COLON_CHAR);
+    if ( colonFound == string::npos )
+        return false;
+
+    // store key/value (without leading/trailing whitespace) & return success
+    const string key   = Trim(line.substr(0, colonFound));
+    const string value = Trim(line.substr(colonFound+1));
+    m_fields[key] = value;
+    return true;
+}
+
+void HttpHeader::RemoveField(const string& key) {
+    m_fields.erase(key);
+}
+
+void HttpHeader::SetField(const string& key, const string& value) {
+    m_fields[key] = value;
+}
+
+void HttpHeader::SetValid(bool ok) {
+    m_isValid = ok;
+}
+
+void HttpHeader::SetVersion(int major, int minor) {
+    m_majorVersion = major;
+    m_minorVersion = minor;
+}
+
+string HttpHeader::ToString(void) const {
+    string result("");
+    if ( m_isValid ) {
+        map<string, string>::const_iterator fieldIter = m_fields.begin();
+        map<string, string>::const_iterator fieldEnd  = m_fields.end();
+        for ( ; fieldIter != fieldEnd; ++fieldIter ) {
+            const string& key   = (*fieldIter).first;
+            const string& value = (*fieldIter).second;
+            const string& line  = key   + Constants::FIELD_SEPARATOR +
+                                  value + Constants::FIELD_NEWLINE;
+            result += line;
+        }
+    }
+    return result;
+}
+
+// ----------------------------------
+// HttpRequestHeader implementation
+// ----------------------------------
+
+HttpRequestHeader::HttpRequestHeader(const string& method,
+                                     const string& resource,
+                                     int majorVersion,
+                                     int minorVersion)
+    : HttpHeader()
+    , m_method(method)
+    , m_resource(resource)
+{
+    SetVersion(majorVersion, minorVersion);
+}
+
+HttpRequestHeader::~HttpRequestHeader(void) { }
+
+string HttpRequestHeader::GetMethod(void) const {
+    return m_method;
+}
+
+string HttpRequestHeader::GetResource(void) const {
+    return m_resource;
+}
+
+bool HttpRequestHeader::ParseLine(const string& line, int lineNumber) {
+
+    // if not 'request line', just let base class parse
+    if ( lineNumber != 0 )
+        return HttpHeader::ParseLine(line, lineNumber);
+
+    // fail if empty line
+    if ( line.empty() )
+        return false;
+
+    // walk through request line, storing positions
+    //    GET /path/to/resource HTTP/1.1
+    //    ^  ^^                ^^
+    const size_t foundMethod = line.find_first_not_of(Constants::SPACE_CHAR); // skip any leading whitespace
+    if ( foundMethod == string::npos ) return false;
+    const size_t foundFirstSpace = line.find(Constants::SPACE_CHAR, foundMethod+1);
+    if ( foundFirstSpace == string::npos ) return false;
+    const size_t foundResource = line.find_first_not_of(Constants::SPACE_CHAR, foundFirstSpace+1);
+    if ( foundResource == string::npos ) return false;
+    const size_t foundSecondSpace = line.find(Constants::SPACE_CHAR, foundResource+1);
+    if ( foundSecondSpace == string::npos ) return false;
+    const size_t foundVersion= line.find_first_not_of(Constants::SPACE_CHAR, foundSecondSpace+1);
+    if ( foundVersion == string::npos ) return false;
+
+    // parse out method & resource
+    m_method   = line.substr(foundMethod,   foundFirstSpace  - foundMethod);
+    m_resource = line.substr(foundResource, foundSecondSpace - foundResource);
+
+    // parse out version numbers
+    const string temp = line.substr(foundVersion);
+    if ( (temp.find(Constants::HTTP_STRING) != 0) || (temp.size() != 8) )
+        return false;
+    const int major = static_cast<int>(temp.at(5) - '0');
+    const int minor = static_cast<int>(temp.at(7) - '0');
+    SetVersion(major, minor);
+
+    // if we get here, return success
+    return true;
+}
+
+string HttpRequestHeader::ToString(void) const {
+    stringstream request("");
+    request << m_method   << Constants::SPACE_CHAR
+            << m_resource << Constants::SPACE_CHAR
+            << Constants::HTTP_STRING << GetMajorVersion() << Constants::DOT_CHAR << GetMinorVersion()
+            << Constants::FIELD_NEWLINE
+            << HttpHeader::ToString()
+            << Constants::FIELD_NEWLINE;
+    return request.str();
+}
+
+// -----------------------------------
+// HttpResponseHeader implementation
+// -----------------------------------
+
+HttpResponseHeader::HttpResponseHeader(const int statusCode,
+                                       const string& reason,
+                                       int majorVersion,
+                                       int minorVersion)
+
+    : HttpHeader()
+    , m_statusCode(statusCode)
+    , m_reason(reason)
+{
+    SetVersion(majorVersion, minorVersion);
+}
+
+HttpResponseHeader::HttpResponseHeader(const string& s)
+    : HttpHeader()
+    , m_statusCode(0)
+{
+    Parse(s);
+}
+
+HttpResponseHeader::~HttpResponseHeader(void) { }
+
+string HttpResponseHeader::GetReason(void) const  {
+    return m_reason;
+}
+
+int HttpResponseHeader::GetStatusCode(void) const {
+    return m_statusCode;
+}
+
+bool HttpResponseHeader::ParseLine(const string& line, int lineNumber) {
+
+    // if not 'status line', just let base class
+    if ( lineNumber != 0 )
+        return HttpHeader::ParseLine(line, lineNumber);
+
+    // fail if empty line
+    if ( line.empty() )
+        return false;
+
+    // walk through status line, storing positions
+    //    HTTP/1.1 200 OK
+    //    ^       ^^  ^^
+
+    const size_t foundVersion = line.find_first_not_of(Constants::SPACE_CHAR); // skip any leading whitespace
+    if ( foundVersion == string::npos ) return false;
+    const size_t foundFirstSpace = line.find(Constants::SPACE_CHAR, foundVersion+1);
+    if ( foundFirstSpace == string::npos ) return false;
+    const size_t foundStatusCode = line.find_first_not_of(Constants::SPACE_CHAR, foundFirstSpace+1);
+    if ( foundStatusCode == string::npos ) return false;
+    const size_t foundSecondSpace = line.find(Constants::SPACE_CHAR, foundStatusCode+1);
+    if ( foundSecondSpace == string::npos ) return false;
+    const size_t foundReason= line.find_first_not_of(Constants::SPACE_CHAR, foundSecondSpace+1);
+    if ( foundReason == string::npos ) return false;
+
+    // parse version numbers
+    string temp = line.substr(foundVersion, foundFirstSpace - foundVersion);
+    if ( (temp.find(Constants::HTTP_STRING) != 0) || (temp.size() != 8) )
+        return false;
+    const int major = static_cast<int>(temp.at(5) - '0');
+    const int minor = static_cast<int>(temp.at(7) - '0');
+    SetVersion(major, minor);
+
+    // parse status code
+    temp = line.substr(foundStatusCode, foundSecondSpace - foundStatusCode);
+    if ( temp.size() != 3 ) return false;
+    m_statusCode = atoi( temp.c_str() );
+
+    // reason phrase should be everything else left
+    m_reason = line.substr(foundReason);
+
+    // if we get here, return success
+    return true;
+}
+
+string HttpResponseHeader::ToString(void) const {
+    stringstream response("");
+    response << Constants::HTTP_STRING << GetMajorVersion() << Constants::DOT_CHAR << GetMinorVersion()
+             << Constants::SPACE_CHAR  << m_statusCode
+             << Constants::SPACE_CHAR  << m_reason
+             << Constants::FIELD_NEWLINE
+             << HttpHeader::ToString()
+             << Constants::FIELD_NEWLINE;
+    return response.str();
+}
diff --git a/src/api/internal/io/HttpHeader_p.h b/src/api/internal/io/HttpHeader_p.h

new file mode 100644 (file)

index 0000000..6330235
--- /dev/null
+++ b/src/api/internal/io/HttpHeader_p.h
@@ -0,0 +1,132 @@
+// ***************************************************************************
+// HttpHeader_p.h (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 November 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides a generic interface for parsing/generating HTTP headers, along
+// with specialized request & response header types
+// ***************************************************************************
+
+#ifndef HTTP_HEADER_P_H
+#define HTTP_HEADER_P_H
+
+//  -------------
+//  W A R N I N G
+//  -------------
+//
+// This file is not part of the BamTools API.  It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include "api/api_global.h"
+#include <map>
+#include <string>
+
+namespace BamTools {
+namespace Internal {
+
+class HttpHeader {
+
+    // ctors & dtor
+    public:
+        HttpHeader(void);
+        HttpHeader(const std::string& s);
+        virtual ~HttpHeader(void);
+
+    // HttpHeader interface
+    public:
+
+        // header field=>value access
+        bool ContainsKey(const std::string& key) const;
+        std::string GetValue(const std::string& key) const;
+        void RemoveField(const std::string& key);
+        void SetField(const std::string& key, const std::string& value);
+
+        // get formatted header string
+        virtual std::string ToString(void) const;
+
+        // query HTTP version used
+        int GetMajorVersion(void) const;
+        int GetMinorVersion(void) const;
+
+        // see if header was parsed OK
+        bool IsValid(void) const;
+
+    // internal methods
+    protected:
+        void Parse(const std::string& s);
+        virtual bool ParseLine(const std::string& line, int lineNumber);
+        void SetValid(bool ok);
+        void SetVersion(int major, int minor);
+
+    // data members
+    private:
+        std::map<std::string, std::string> m_fields;
+
+        bool m_isValid;       // should usually be true, only false if error processing a header line
+        int  m_majorVersion;
+        int  m_minorVersion;
+};
+
+class HttpRequestHeader : public HttpHeader {
+
+    // ctor & dtor
+    public:
+        HttpRequestHeader(const std::string& method,      // "GET", "PUT", etc
+                          const std::string& resource,    // filename
+                          int majorVersion = 1,           // version info
+                          int minorVersion = 1);
+        ~HttpRequestHeader(void);
+
+    // HttpRequestHeader interface
+    public:
+        std::string GetMethod(void) const;
+        std::string GetResource(void) const;
+
+    // HttpHeader implementation
+    public:
+        std::string ToString(void) const;
+    protected:
+        bool ParseLine(const std::string& line, int lineNumber);
+
+    // data members
+    private:
+        std::string m_method;
+        std::string m_resource;
+};
+
+class HttpResponseHeader : public HttpHeader {
+
+    // ctor & dtor
+    public:
+        HttpResponseHeader(const int statusCode,                       // 200, 404, etc
+                           const std::string& reason = std::string(),  // 'reason phrase' for code
+                           int majorVersion = 1,                       // version info
+                           int minorVersion = 1);
+        HttpResponseHeader(const std::string& s);
+        ~HttpResponseHeader(void);
+
+    // HttpRequestHeader interface
+    public:
+        std::string GetReason(void) const;
+        int GetStatusCode(void) const;
+
+    // HttpHeader implementation
+    public:
+        std::string ToString(void) const;
+    protected:
+        bool ParseLine(const std::string& line, int lineNumber);
+
+    // data members
+    private:
+        int m_statusCode;
+        std::string m_reason;
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // HTTP_HEADER_P_H
diff --git a/src/api/internal/io/ILocalIODevice_p.cpp b/src/api/internal/io/ILocalIODevice_p.cpp

new file mode 100644 (file)

index 0000000..19cc1da
--- /dev/null
+++ b/src/api/internal/io/ILocalIODevice_p.cpp
@@ -0,0 +1,56 @@
+// ***************************************************************************
+// ILocalIODevice_p.cpp (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 25 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides shared behavior for files & pipes
+// ***************************************************************************
+
+#include "api/internal/io/ILocalIODevice_p.h"
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+#include <cstdio>
+using namespace std;
+
+ILocalIODevice::ILocalIODevice(void)
+    : IBamIODevice()
+    , m_stream(0)
+{ }
+
+ILocalIODevice::~ILocalIODevice(void) {
+    Close();
+}
+
+void ILocalIODevice::Close(void) {
+
+    // skip if not open
+    if ( !IsOpen() )
+        return;
+
+    // flush & close FILE*
+    fflush(m_stream);
+    fclose(m_stream);
+    m_stream = 0;
+
+    // reset other device state
+    m_mode = IBamIODevice::NotOpen;
+}
+
+int64_t ILocalIODevice::Read(char* data, const unsigned int numBytes) {
+    BT_ASSERT_X( m_stream, "ILocalIODevice::Read: trying to read from null stream" );
+    BT_ASSERT_X( (m_mode == IBamIODevice::ReadOnly), "ILocalIODevice::Read: device not in read-only mode");
+    return static_cast<int64_t>( fread(data, sizeof(char), numBytes, m_stream) );
+}
+
+int64_t ILocalIODevice::Tell(void) const {
+    BT_ASSERT_X( m_stream, "ILocalIODevice::Tell: trying to get file position fromnull stream" );
+    return ftell64(m_stream);
+}
+
+int64_t ILocalIODevice::Write(const char* data, const unsigned int numBytes) {
+    BT_ASSERT_X( m_stream, "ILocalIODevice::Write: tryint to write to null stream" );
+    BT_ASSERT_X( (m_mode == IBamIODevice::WriteOnly), "ILocalIODevice::Write: device not in write-only mode" );
+    return static_cast<int64_t>( fwrite(data, sizeof(char), numBytes, m_stream) );
+}
diff --git a/src/api/internal/io/ILocalIODevice_p.h b/src/api/internal/io/ILocalIODevice_p.h

new file mode 100644 (file)

index 0000000..cf01f90
--- /dev/null
+++ b/src/api/internal/io/ILocalIODevice_p.h
@@ -0,0 +1,50 @@
+// ***************************************************************************
+// ILocalIODevice_p.h (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides shared behavior for files & pipes
+// ***************************************************************************
+
+#ifndef ILOCALIODEVICE_P_H
+#define ILOCALIODEVICE_P_H
+
+//  -------------
+//  W A R N I N G
+//  -------------
+//
+// This file is not part of the BamTools API.  It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include "api/IBamIODevice.h"
+
+namespace BamTools {
+namespace Internal {
+
+class ILocalIODevice : public IBamIODevice {
+
+    // ctor & dtor
+    public:
+        ILocalIODevice(void);
+        virtual ~ILocalIODevice(void);
+
+    // IBamIODevice implementation
+    public:
+        virtual void Close(void);
+        virtual int64_t Read(char* data, const unsigned int numBytes);
+        virtual int64_t Tell(void) const;
+        virtual int64_t Write(const char* data, const unsigned int numBytes);
+
+    // data members
+    protected:
+        FILE* m_stream;
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // ILOCALIODEVICE_P_H
diff --git a/src/api/internal/io/NetUnix_p.h b/src/api/internal/io/NetUnix_p.h

new file mode 100644 (file)

index 0000000..8cf75f8
--- /dev/null
+++ b/src/api/internal/io/NetUnix_p.h
@@ -0,0 +1,39 @@
+// ***************************************************************************
+// NetUnix_p.h (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 November 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides common networking-related includes, etc. for all UNIX-like systems
+// ***************************************************************************
+
+#ifndef NETUNIX_P_H
+#define NETUNIX_P_H
+
+//  -------------
+//  W A R N I N G
+//  -------------
+//
+// This file is not part of the BamTools API.  It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#ifndef _WIN32 // <-- source files only include the proper Net*_p.h, but this is a double-check
+
+#include <arpa/inet.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <netdb.h>
+#include <unistd.h>
+
+#ifndef   BT_SOCKLEN_T
+#  define BT_SOCKLEN_T socklen_t
+#endif
+
+#endif // _WIN32
+#endif // NETUNIX_P_H
diff --git a/src/api/internal/io/NetWin_p.h b/src/api/internal/io/NetWin_p.h

new file mode 100644 (file)

index 0000000..bcef955
--- /dev/null
+++ b/src/api/internal/io/NetWin_p.h
@@ -0,0 +1,60 @@
+// ***************************************************************************
+// NetWin_p.h (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 November 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides common networking-related includes, etc. for Windows systems
+//
+// Note: only supports XP and later
+// ***************************************************************************
+
+#ifndef NETWIN_P_H
+#define NETWIN_P_H
+
+//  -------------
+//  W A R N I N G
+//  -------------
+//
+// This file is not part of the BamTools API.  It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#ifdef _WIN32 // <-- source files only include the proper Net*_p.h, but this is a double-check
+
+#include <winsock2.h>  // <-- should bring 'windows.h' along with it
+#include <Ws2tcpip.h>
+
+#ifndef   BT_SOCKLEN_T
+#  define BT_SOCKLEN_T int
+#endif
+
+#ifdef _MSC_VER
+#  pragma comment(lib, "ws2_32.lib")
+#endif
+
+namespace BamTools {
+namespace Internal {
+
+// use RAII to ensure WSA is en
+class WindowsSockInit {
+    public:
+        WindowsSockInit(void) {
+            WSAData wsadata;
+            WSAStartup(MAKEWORD(2,2), &wsadata); // catch error ?
+        }
+
+        ~WindowsSockInit(void) {
+            WSACleanup();
+        }
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // _WIN32
+
+#endif // NETWIN_P_H
+
diff --git a/src/api/internal/io/RollingBuffer_p.cpp b/src/api/internal/io/RollingBuffer_p.cpp

new file mode 100644 (file)

index 0000000..c3f709d
--- /dev/null
+++ b/src/api/internal/io/RollingBuffer_p.cpp
@@ -0,0 +1,310 @@
+// ***************************************************************************
+// RollingBuffer_p.cpp (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 November 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides a dynamic I/O FIFO byte queue, which removes bytes as they are
+// read from the front of the buffer and grows to accept bytes being written
+// to buffer end.
+//
+// implementation note: basically a 'smart' wrapper around 1..* ByteArrays
+// ***************************************************************************
+
+#include "api/internal/io/RollingBuffer_p.h"
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+#include <climits>
+#include <cstring>
+#include <algorithm>
+#include <string>
+using namespace std;
+
+// ------------------------------
+// RollingBuffer implementation
+// ------------------------------
+
+RollingBuffer::RollingBuffer(size_t growth)
+    : m_bufferGrowth(growth)
+{
+    // buffer always contains at least 1 (maybe empty) byte array
+    m_data.push_back( ByteArray() );
+
+    // set cleared state
+    Clear();
+}
+
+RollingBuffer::~RollingBuffer(void) { }
+
+size_t RollingBuffer::BlockSize(void) const {
+
+    // if only one byte array in buffer <- needed?
+    if ( m_tailBufferIndex == 0 )
+        return m_tail - m_head;
+
+    // otherwise return remaining num bytes in first array
+    const ByteArray& first = m_data.front();
+    return first.Size() - m_head;
+}
+
+bool RollingBuffer::CanReadLine(void) const {
+    return IndexOf('\n') != string::npos;
+}
+
+void RollingBuffer::Chop(size_t n) {
+
+    // update buffer size
+    if ( n > m_totalBufferSize )
+        m_totalBufferSize = 0;
+    else
+        m_totalBufferSize -= n;
+
+    // loop until target case hit
+    for ( ; ; ) {
+
+        // if only one array, decrement tail
+        if ( m_tailBufferIndex == 0 ) {
+            m_tail -= n;
+
+            // if all data chopped
+            if ( m_tail <= m_head ) {
+                m_head = 0;
+                m_tail = 0;
+            }
+            return;
+        }
+
+        // if there's room in last byte array to 'chop', just decrement tail
+        if ( n <= m_tail ) {
+            m_tail -= n;
+            return;
+        }
+
+        // otherwise we're going to overlap our internal byte arrays
+        // reduce our chop amount by the amount of data in the last byte array
+        n -= m_tail;
+
+        // remove last byte array & set tail to it's end
+        m_data.pop_back();
+        --m_tailBufferIndex;
+        m_tail = m_data.at(m_tailBufferIndex).Size();
+    }
+
+    // if buffer is now empty, reset state & clear up memory
+    if ( IsEmpty() )
+        Clear();
+}
+
+void RollingBuffer::Clear(void) {
+
+    // remove all byte arrays (except first)
+    m_data.erase( m_data.begin()+1, m_data.end() );
+
+    // clear out first byte array
+    m_data[0].Resize(0);
+    m_data[0].Squeeze();
+
+    // reset index & size markers
+    m_head = 0;
+    m_tail = 0;
+    m_tailBufferIndex = 0;
+    m_totalBufferSize = 0;
+}
+
+void RollingBuffer::Free(size_t n) {
+
+    // update buffer size
+    if ( n > m_totalBufferSize )
+        m_totalBufferSize = 0;
+    else
+        m_totalBufferSize -= n;
+
+    // loop until target case hit
+    for ( ; ; ) {
+
+        const size_t blockSize = BlockSize();
+
+        // if there's room in current array
+        if ( n < blockSize ) {
+
+            // shift 'head' over @n bytes
+            m_head += n;
+
+            // check for emptied, single byte array
+            if ( m_head == m_tail && m_tailBufferIndex == 0 ) {
+                m_head = 0;
+                m_tail = 0;
+            }
+
+            break;
+        }
+
+        // otherwise we need to check next byte array
+        // first update amount to remove
+        n -= blockSize;
+
+        // special case - there was only 1 array
+        if ( m_data.size() == 1 ) {
+            if ( m_data.at(0).Size() != m_bufferGrowth )
+                m_data[0].Resize(m_bufferGrowth);
+            m_head = 0;
+            m_tail = 0;
+            m_tailBufferIndex = 0;
+            break;
+        }
+
+        // otherwise, remove first array and move to next iteration
+        m_data.pop_front();
+        --m_tailBufferIndex;
+        m_head = 0;
+    }
+
+    // if buffer is now empty, reset state & clear up memory
+    if ( IsEmpty() )
+        Clear();
+}
+
+size_t RollingBuffer::IndexOf(char c) const {
+
+    size_t index(0);
+
+    // iterate over byte arrays
+    const size_t numBuffers = m_data.size();
+    for ( size_t i = 0; i < numBuffers; ++i ) {
+        const ByteArray& current = m_data.at(i);
+
+        // if on first array, use head; else 0
+        const size_t start = ( (i==0) ? m_head : 0 );
+
+        // if on last array, set end; else use current byte array size
+        const size_t end   = ( (i==m_tailBufferIndex) ? m_tail : current.Size());
+
+        // look through this iteration's byte array for @c
+        const char* p = current.ConstData()+start;
+        for ( size_t j = start; j < end; ++j ) {
+            if ( *p++ == c )
+                return index;
+            ++index;
+        }
+    }
+
+    // no match found
+    return string::npos;
+}
+
+bool RollingBuffer::IsEmpty(void) const {
+    return (m_tailBufferIndex == 0) && (m_tail == 0);
+}
+
+size_t RollingBuffer::Read(char* dest, size_t max) {
+
+    size_t bytesToRead    = std::min(Size(), max);
+    size_t bytesReadSoFar = 0;
+
+    while ( bytesReadSoFar < bytesToRead ) {
+        const char* readPtr = ReadPointer();
+        size_t blockBytes = std::min( (bytesToRead - bytesReadSoFar), BlockSize() );
+        if ( dest )
+            memcpy(dest+bytesReadSoFar, readPtr, blockBytes);
+        bytesReadSoFar += blockBytes;
+        Free(blockBytes);
+    }
+
+    return bytesReadSoFar;
+}
+
+size_t RollingBuffer::ReadLine(char* dest, size_t max) {
+
+    // if we can't read line or if max is 0
+    if ( !CanReadLine() || max == 0 )
+        return 0;
+
+    // otherwise, read until we hit newline
+    size_t bytesReadSoFar = 0;
+    bool finished = false;
+    while ( !finished ) {
+
+        const size_t index = IndexOf('\n');
+        const char* readPtr = ReadPointer();
+        size_t bytesToRead = std::min( (index+1)-bytesReadSoFar, BlockSize() );
+        bytesToRead = std::min( bytesToRead, (max-1)-bytesReadSoFar );
+        memcpy(dest+bytesReadSoFar, readPtr, bytesToRead);
+        bytesReadSoFar += bytesToRead;
+        Free(bytesToRead);
+
+        if ( !((bytesReadSoFar < index+1)&&(bytesReadSoFar < max-1)) )
+            finished = true;
+    }
+
+    // null terminate 'dest' & return numBytesRead
+    dest[bytesReadSoFar] = '\0';
+    return bytesReadSoFar;
+}
+
+const char* RollingBuffer::ReadPointer(void) const {
+
+    // return null if empty buffer
+    if ( m_data.empty() )
+        return 0;
+
+    // otherwise return pointer to current position
+    const ByteArray& first = m_data.front();
+    return first.ConstData() + m_head;
+}
+
+char* RollingBuffer::Reserve(size_t n) {
+
+    // if empty buffer
+    if ( m_totalBufferSize == 0 ) {
+        m_data[0].Resize( std::max(m_bufferGrowth, n) );
+        m_totalBufferSize += n;
+        m_tail = n;
+        return m_data[m_tailBufferIndex].Data();
+    }
+
+    // increment buffer's byte count
+    m_totalBufferSize += n;
+
+    // if buffer already contains enough space to fit @n more bytes
+    if ( (m_tail + n) <= m_data.at(m_tailBufferIndex).Size() ) {
+
+        // fetch write pointer at current 'tail', increment tail by @n & return
+        char* ptr = m_data[m_tailBufferIndex].Data() + m_tail;
+        m_tail += n;
+        return ptr;
+    }
+
+    // if last byte array isn't half full
+    if ( m_tail < m_data.at(m_tailBufferIndex).Size()/2 ) {
+
+        // we'll allow simple resize
+        m_data[m_tailBufferIndex].Resize(m_tail + n);
+
+        // fetch write pointer at current 'tail', increment tail by @n & return
+        char* ptr = m_data[m_tailBufferIndex].Data() + m_tail;
+        m_tail += n;
+        return ptr;
+    }
+
+    // otherwise, shrink last byte array to current used size
+    m_data[m_tailBufferIndex].Resize(m_tail);
+
+    // then append new byte array
+    m_data.push_back( ByteArray() );
+    ++m_tailBufferIndex;
+    m_data[m_tailBufferIndex].Resize( std::max(m_bufferGrowth, n) );
+    m_tail = n;
+
+    // return write-able pointer on new array
+    return m_data[m_tailBufferIndex].Data();
+}
+
+size_t RollingBuffer::Size(void) const {
+    return m_totalBufferSize;
+}
+
+void RollingBuffer::Write(const char* src, size_t n) {
+    char* writePtr = Reserve(n);
+    memcpy(writePtr, src, n);
+}
diff --git a/src/api/internal/io/RollingBuffer_p.h b/src/api/internal/io/RollingBuffer_p.h

new file mode 100644 (file)

index 0000000..e995f26
--- /dev/null
+++ b/src/api/internal/io/RollingBuffer_p.h
@@ -0,0 +1,84 @@
+// ***************************************************************************
+// RollingBuffer_p.h (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 November 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides a dynamic I/O FIFO byte queue, which removes bytes as they are
+// read from the front of the buffer and grows to accept bytes being written
+// to buffer end.
+//
+// implementation note: basically a 'smart' wrapper around 1..* ByteArrays
+// ***************************************************************************
+
+#ifndef ROLLINGBUFFER_P_H
+#define ROLLINGBUFFER_P_H
+
+//  -------------
+//  W A R N I N G
+//  -------------
+//
+// This file is not part of the BamTools API.  It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include "api/api_global.h"
+#include "api/internal/io/ByteArray_p.h"
+#include <deque>
+#include <string>
+
+namespace BamTools {
+namespace Internal {
+
+class RollingBuffer {
+
+    // ctors & dtor
+    public:
+        RollingBuffer(size_t growth);
+        ~RollingBuffer(void);
+
+    // RollingBuffer interface
+    public:
+
+        // returns current buffer size
+        size_t BlockSize(void) const;
+        // checks buffer for new line
+        bool CanReadLine(void) const;
+        // frees @n bytes from end of buffer
+        void Chop(size_t n);
+        // clears entire buffer structure
+        void Clear(void);
+        // frees @n bytes from front of buffer
+        void Free(size_t n);
+        // checks buffer for @c
+        size_t IndexOf(char c) const;
+        // returns whether buffer contains data
+        bool IsEmpty(void) const;
+        // reads up to @maxLen bytes into @dest
+        // returns exactly how many bytes were read from buffer
+        size_t Read(char* dest, size_t max);
+        // reads until newline (or up to @maxLen bytes)
+        // returns exactly how many bytes were read from buffer
+        size_t ReadLine(char* dest, size_t max);
+
+        const char* ReadPointer(void) const;   // returns a C-fxn compatible char* to byte data
+        char* Reserve(size_t n);               // ensures that buffer contains space for @n incoming bytes, returns write-able char*
+        size_t Size(void) const;               // returns current number of bytes stored in buffer
+        void Write(const char* src, size_t n); // reserves space for @n bytes, then appends contents of @src to buffer
+
+    // data members
+    private:
+        size_t m_head;                // index into current data (next char)
+        size_t m_tail;                // index into last data position
+        size_t m_tailBufferIndex;     // m_data::size() - 1
+        size_t m_totalBufferSize;     // total buffer size
+        size_t m_bufferGrowth;        // new buffers are typically initialized with this size
+        std::deque<ByteArray> m_data; // basic 'buffer of buffers'
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // ROLLINGBUFFER_P_H
diff --git a/src/api/internal/io/TcpSocketEngine_p.cpp b/src/api/internal/io/TcpSocketEngine_p.cpp

new file mode 100644 (file)

index 0000000..467eaeb
--- /dev/null
+++ b/src/api/internal/io/TcpSocketEngine_p.cpp
@@ -0,0 +1,195 @@
+// ***************************************************************************
+// TcpSocketEngine_p.cpp (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 November 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides low-level implementation of TCP I/O
+// ***************************************************************************
+
+// N.B. - this file contains the top-level, platform-independent logic. "Native" methods
+//        are called as needed from the TcpSocketEngine_<X>.cpp files. Selection of the proper
+//        native method file should have been handled at build-time by CMake.
+
+#include "api/internal/io/HostInfo_p.h"
+#include "api/internal/io/TcpSocketEngine_p.h"
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+TcpSocketEngine::TcpSocketEngine(void)
+    : m_socketDescriptor(-1)
+//    , m_localPort(0)
+    , m_remotePort(0)
+    , m_socketError(TcpSocket::UnknownSocketError)
+    , m_socketState(TcpSocket::UnconnectedState)
+{ }
+
+TcpSocketEngine::TcpSocketEngine(const TcpSocketEngine& other)
+    : m_socketDescriptor(other.m_socketDescriptor)
+//    , m_localAddress(other.m_localAddress)
+    , m_remoteAddress(other.m_remoteAddress)
+//    , m_localPort(other.m_localPort)
+    , m_remotePort(other.m_remotePort)
+    , m_socketError(other.m_socketError)
+    , m_socketState(other.m_socketState)
+    , m_errorString(other.m_errorString)
+{ }
+
+TcpSocketEngine::~TcpSocketEngine(void) {
+    Close();
+}
+
+void TcpSocketEngine::Close(void) {
+
+    // close socket if we have valid FD
+    if ( m_socketDescriptor != -1 ) {
+        nativeClose();
+        m_socketDescriptor = -1;
+    }
+
+    // reset state
+    m_socketState = TcpSocket::UnconnectedState;
+//    m_localAddress.Clear();
+    m_remoteAddress.Clear();
+//    m_localPort = 0;
+    m_remotePort = 0;
+}
+
+bool TcpSocketEngine::Connect(const HostAddress& address, const uint16_t port) {
+
+    // return failure if invalid FD or already connected
+    if ( !IsValid() || (m_socketState == TcpSocket::ConnectedState) ) {
+        // TODO: set error string
+        return false;
+    }
+
+    // attempt to connect to host address on requested port
+    if ( !nativeConnect(address, port) ) {
+        // TODO: set error string
+        return false;
+    }
+
+    // if successful, store remote host address port & return success
+    // TODO: (later) fetch proxied remote & local host/port  here
+    m_remoteAddress = address;
+    m_remotePort    = port;
+    return true;
+}
+
+std::string TcpSocketEngine::GetErrorString(void) const {
+    return m_errorString;
+}
+
+//HostAddress TcpSocketEngine::GetLocalAddress(void) const {
+//    return m_localAddress;
+//}
+
+//uint16_t TcpSocketEngine::GetLocalPort(void) const {
+//    return m_localPort;
+//}
+
+HostAddress TcpSocketEngine::GetRemoteAddress(void) const {
+    return m_remoteAddress;
+}
+
+uint16_t TcpSocketEngine::GetRemotePort(void) const {
+    return m_remotePort;
+}
+
+int TcpSocketEngine::GetSocketDescriptor(void) const {
+    return m_socketDescriptor;
+}
+
+TcpSocket::SocketError TcpSocketEngine::GetSocketError(void) {
+    return m_socketError;
+}
+
+TcpSocket::SocketState TcpSocketEngine::GetSocketState(void) {
+    return m_socketState;
+}
+
+bool TcpSocketEngine::Initialize(HostAddress::NetworkProtocol protocol) {
+
+    // close current socket if we have one open
+    if ( IsValid() )
+        Close();
+
+    // attempt to create new socket
+    return nativeCreateSocket(protocol);
+}
+
+bool TcpSocketEngine::IsValid(void) const {
+    return (m_socketDescriptor != -1);
+}
+
+int64_t TcpSocketEngine::NumBytesAvailable(void) const {
+
+    // return 0 if socket FD is invalid
+    if ( !IsValid() ) {
+        // TODO: set error string
+        return -1;
+    }
+
+    // otherwise check socket to see how much is ready
+    return nativeNumBytesAvailable();
+}
+
+int64_t TcpSocketEngine::Read(char* dest, size_t max) {
+
+    // return failure if can't read
+    if ( !IsValid() || (m_socketState != TcpSocket::ConnectedState) )
+        return -1;
+
+    // otherwise return number of bytes read
+    return nativeRead(dest, max);
+}
+
+bool TcpSocketEngine::WaitForRead(int msec, bool* timedOut) {
+
+    // reset timedOut flag
+    *timedOut = false;
+
+    // need to wait for our socket to be ready to read
+    int ret = nativeSelect(msec, true);
+
+    // if timed out
+    if ( ret == 0 ) {
+        *timedOut = true;
+        m_socketError = TcpSocket::SocketTimeoutError;
+        m_errorString = "socket timed out";
+    }
+
+    // return if any sockets available for reading
+    return ( ret > 0 );
+}
+
+bool TcpSocketEngine::WaitForWrite(int msec, bool* timedOut) {
+
+    // reset timedOut flag
+    *timedOut = false;
+
+    // need to wait for our socket to be ready to write
+    int ret = nativeSelect(msec, false);
+
+    // if timed out
+    if ( ret == 0 ) {
+        *timedOut = true;
+        m_socketError = TcpSocket::SocketTimeoutError;
+        m_errorString = "socket timed out";
+    }
+
+    // return if any sockets available for reading
+    return ( ret > 0 );
+}
+
+int64_t TcpSocketEngine::Write(const char* data, size_t length) {
+
+    // return failure if can't write
+    if ( !IsValid() || (m_socketState != TcpSocket::ConnectedState) ) {
+        // TODO: set error string
+        return -1;
+    }
+
+    // otherwise return number of bytes written
+    return nativeWrite(data, length);
+}
diff --git a/src/api/internal/io/TcpSocketEngine_p.h b/src/api/internal/io/TcpSocketEngine_p.h

new file mode 100644 (file)

index 0000000..1a1a944
--- /dev/null
+++ b/src/api/internal/io/TcpSocketEngine_p.h
@@ -0,0 +1,95 @@
+// ***************************************************************************
+// TcpSocketEngine_p.h (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 November 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides low-level implementation of TCP I/O
+// ***************************************************************************
+
+#ifndef TCPSOCKETENGINE_P_H
+#define TCPSOCKETENGINE_P_H
+
+//  -------------
+//  W A R N I N G
+//  -------------
+//
+// This file is not part of the BamTools API.  It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include "api/internal/io/HostAddress_p.h"
+#include "api/internal/io/TcpSocket_p.h"
+
+namespace BamTools {
+namespace Internal {
+
+struct TcpSocketEngine {
+
+    // ctors & dtor
+    public:
+        TcpSocketEngine(void);
+        TcpSocketEngine(const TcpSocketEngine& other);
+        ~TcpSocketEngine(void);
+
+    // TcpSocketEngine interface
+    public:
+
+        // connection-related methods
+        void Close(void);
+        bool Connect(const HostAddress& address, const uint16_t port);
+        bool Initialize(HostAddress::NetworkProtocol protocol);
+        bool IsValid(void) const;
+
+        // IO-related methods
+        int64_t NumBytesAvailable(void) const;
+        int64_t Read(char* dest, size_t max);
+        int64_t Write(const char* data, size_t length);
+
+        bool WaitForRead(int msec, bool* timedOut);
+        bool WaitForWrite(int msec, bool* timedOut);
+
+        // query connection state
+//        HostAddress GetLocalAddress(void) const;
+//        uint16_t GetLocalPort(void) const;
+        HostAddress GetRemoteAddress(void) const;
+        uint16_t    GetRemotePort(void) const;
+
+        int GetSocketDescriptor(void) const;
+        TcpSocket::SocketError GetSocketError(void);
+        TcpSocket::SocketState GetSocketState(void);
+
+        std::string GetErrorString(void) const;
+
+    // platform-dependent internal methods
+    // provided in the corresponding TcpSocketEngine_<OS>_p.cpp
+    private:
+        void    nativeClose(void);
+        bool    nativeConnect(const HostAddress& address, const uint16_t port);
+        bool    nativeCreateSocket(HostAddress::NetworkProtocol protocol);
+        void    nativeDisconnect(void);
+        int64_t nativeNumBytesAvailable(void) const;
+        int64_t nativeRead(char* dest, size_t max);
+        int     nativeSelect(int msecs, bool isRead) const;
+        int64_t nativeWrite(const char* data, size_t length);
+
+    // data members
+    private:
+        int m_socketDescriptor;
+
+//        HostAddress m_localAddress;
+        HostAddress m_remoteAddress;
+//        uint16_t m_localPort;
+        uint16_t m_remotePort;
+
+        TcpSocket::SocketError m_socketError;
+        TcpSocket::SocketState m_socketState;
+        std::string m_errorString;
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // TCPSOCKETENGINE_P_H
diff --git a/src/api/internal/io/TcpSocketEngine_unix_p.cpp b/src/api/internal/io/TcpSocketEngine_unix_p.cpp

new file mode 100644 (file)

index 0000000..efcdf8d
--- /dev/null
+++ b/src/api/internal/io/TcpSocketEngine_unix_p.cpp
@@ -0,0 +1,247 @@
+// ***************************************************************************
+// TcpSocketEngine_unix_p.cpp (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 15 November 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides low-level implementation of TCP I/O for all UNIX-like systems
+// ***************************************************************************
+
+#include "api/internal/io/TcpSocketEngine_p.h"
+#include "api/internal/io/NetUnix_p.h"
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+#include <cerrno>
+#include <ctime>
+#include <iostream>
+using namespace std;
+
+// ------------------------
+// static utility methods
+// ------------------------
+
+namespace BamTools {
+namespace Internal {
+
+} // namespace Internal
+} // namespace BamTools
+
+// --------------------------------
+// TcpSocketEngine implementation
+// --------------------------------
+
+void TcpSocketEngine::nativeClose(void) {
+    close(m_socketDescriptor);
+}
+
+bool TcpSocketEngine::nativeConnect(const HostAddress& address, const uint16_t port) {
+
+    // setup connection parameters from address/port
+    sockaddr_in  sockAddrIPv4;
+    sockaddr_in6 sockAddrIPv6;
+    sockaddr*    sockAddrPtr  = 0;
+    BT_SOCKLEN_T sockAddrSize = 0;
+
+    // IPv6
+    if ( address.GetProtocol() == HostAddress::IPv6Protocol ) {
+
+        memset(&sockAddrIPv6, 0, sizeof(sockAddrIPv6));
+        sockAddrIPv6.sin6_family = AF_INET6;
+        sockAddrIPv6.sin6_port   = htons(port);
+
+        IPv6Address ip6 = address.GetIPv6Address();
+        memcpy(&sockAddrIPv6.sin6_addr.s6_addr, &ip6, sizeof(ip6));
+
+        sockAddrSize = sizeof(sockAddrIPv6);
+        sockAddrPtr  = (sockaddr*)&sockAddrIPv6;
+    }
+
+    // IPv4
+    else if ( address.GetProtocol() == HostAddress::IPv4Protocol ) {
+
+        memset(&sockAddrIPv4, 0, sizeof(sockAddrIPv4));
+        sockAddrIPv4.sin_family      = AF_INET;
+        sockAddrIPv4.sin_port        = htons(port);
+        sockAddrIPv4.sin_addr.s_addr = htonl(address.GetIPv4Address());
+
+        sockAddrSize = sizeof(sockAddrIPv4);
+        sockAddrPtr  = (sockaddr*)&sockAddrIPv4;
+    }
+
+    // unknown (should be unreachable)
+    else BT_ASSERT_X(false, "TcpSocketEngine::nativeConnect() : unknown network protocol");
+
+    // attempt connection
+    int connectResult = connect(m_socketDescriptor, sockAddrPtr, sockAddrSize);
+
+    // if failed, handle error
+    if ( connectResult == -1 ) {
+
+        // ensure state is set before checking errno
+        m_socketState = TcpSocket::UnconnectedState;
+
+        // set error type/message depending on errno
+        switch ( errno ) { // <-- potential thread issues later? but can't get error type from connectResult
+
+            case EISCONN:
+                m_socketState = TcpSocket::ConnectedState; // socket was already connected
+                break;
+            case ECONNREFUSED:
+            case EINVAL:
+                m_socketError = TcpSocket::ConnectionRefusedError;
+                m_errorString = "connection refused";
+                break;
+            case ETIMEDOUT:
+                m_socketError = TcpSocket::NetworkError;
+                m_errorString = "connection timed out";
+                break;
+            case EHOSTUNREACH:
+                m_socketError = TcpSocket::NetworkError;
+                m_errorString = "host unreachable";
+                break;
+            case ENETUNREACH:
+                m_socketError = TcpSocket::NetworkError;
+                m_errorString = "network unreachable";
+                break;
+            case EADDRINUSE:
+                m_socketError = TcpSocket::SocketResourceError;
+                m_errorString = "address already in use";
+                break;
+            case EACCES:
+            case EPERM:
+                m_socketError = TcpSocket::SocketAccessError;
+                m_errorString = "permission denied";
+                break;
+            default:
+                break;
+        }
+
+        // double check that we're not in 'connected' state; if so, return failure
+        if ( m_socketState != TcpSocket::ConnectedState )
+            return false;
+    }
+
+    // otherwise, we should be good
+    // update state & return success
+    m_socketState = TcpSocket::ConnectedState;
+    return true;
+}
+
+bool TcpSocketEngine::nativeCreateSocket(HostAddress::NetworkProtocol protocol) {
+
+    // get protocol value for requested protocol type
+    const int protocolNum = ( (protocol == HostAddress::IPv6Protocol) ? AF_INET6
+                                                                      : AF_INET );
+
+    // attempt to create socket
+    int socketFd = socket(protocolNum, SOCK_STREAM, IPPROTO_TCP);
+
+    // if we fetched an invalid socket descriptor
+    if ( socketFd <= 0 ) {
+
+        // see what error we got
+        switch ( errno ) {
+            case EPROTONOSUPPORT:
+            case EAFNOSUPPORT:
+            case EINVAL:
+                m_socketError = TcpSocket::UnsupportedSocketOperationError;
+                m_errorString = "protocol not supported";
+                break;
+            case ENFILE:
+            case EMFILE:
+            case ENOBUFS:
+            case ENOMEM:
+                m_socketError = TcpSocket::SocketResourceError;
+                m_errorString = "out of resources";
+                break;
+            case EACCES:
+                m_socketError = TcpSocket::SocketAccessError;
+                m_errorString = "permission denied";
+                break;
+            default:
+                break;
+        }
+
+        // return failure
+        return false;
+    }
+
+    // otherwise, store our socket FD & return success
+    m_socketDescriptor = socketFd;
+    return true;
+}
+
+int64_t TcpSocketEngine::nativeNumBytesAvailable(void) const {
+
+    // fetch number of bytes, return 0 on error
+    int numBytes(0);
+    if ( ioctl(m_socketDescriptor, FIONREAD, (char*)&numBytes) < 0 )
+        return -1;
+    return static_cast<int64_t>(numBytes);
+}
+
+int64_t TcpSocketEngine::nativeRead(char* dest, size_t max) {
+
+    if ( !IsValid() )
+        return -1;
+
+    ssize_t ret = read(m_socketDescriptor, dest, max);
+    if ( ret < 0 ) {
+        ret = -1;
+        switch ( errno ) {
+            case EAGAIN :
+                // No data was available for reading
+                ret = -2;
+                break;
+            case ECONNRESET :
+                ret = 0;
+                break;
+            default:
+                break;
+        }
+    }
+    return static_cast<int64_t>(ret);
+}
+
+// negative value for msecs will block (forever) until ready
+int TcpSocketEngine::nativeSelect(int msecs, bool isRead) const {
+
+    // set up FD set
+    fd_set fds;
+    FD_ZERO(&fds);
+    FD_SET(m_socketDescriptor, &fds);
+
+    // setup our timeout
+    timeval tv;
+    tv.tv_sec  = msecs / 1000;
+    tv.tv_usec = (msecs % 1000) * 1000;
+
+    // do 'select'
+    if ( isRead )
+        return select(m_socketDescriptor + 1, &fds, 0, 0, (msecs < 0 ? 0 : &tv));
+    else
+        return select(m_socketDescriptor + 1, 0, &fds, 0, (msecs < 0 ? 0 : &tv));
+}
+
+int64_t TcpSocketEngine::nativeWrite(const char* data, size_t length) {
+
+    ssize_t writtenBytes = write(m_socketDescriptor, data, length);
+    if ( writtenBytes < 0 ) {
+        switch (errno) {
+            case EPIPE:
+            case ECONNRESET:
+                writtenBytes = -1;
+                m_socketError = TcpSocket::RemoteHostClosedError;
+                m_errorString = "remote host closed connection";
+                Close();
+                break;
+            case EAGAIN:
+                writtenBytes = 0;
+                break;
+            default:
+                break;
+        }
+    }
+    return static_cast<int64_t>(writtenBytes);
+}
diff --git a/src/api/internal/io/TcpSocketEngine_win_p.cpp b/src/api/internal/io/TcpSocketEngine_win_p.cpp

new file mode 100644 (file)

index 0000000..d1691ac
--- /dev/null
+++ b/src/api/internal/io/TcpSocketEngine_win_p.cpp
@@ -0,0 +1,275 @@
+// ***************************************************************************
+// TcpSocketEngine_win_p.cpp (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 15 November 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides low-level implementation of TCP I/O for all Windows systems
+// ***************************************************************************
+
+#include "api/internal/io/TcpSocketEngine_p.h"
+#include "api/internal/io/NetWin_p.h"
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+#include <cstring>
+#include <iostream>
+using namespace std;
+
+// ------------------------
+// static utility methods
+// ------------------------
+
+namespace BamTools {
+namespace Internal {
+
+
+} // namespace Internal
+} // namespace BamTools
+
+// --------------------------------
+// TcpSocketEngine implementation
+// --------------------------------
+
+void TcpSocketEngine::nativeClose(void) {
+    closesocket(m_socketDescriptor);
+}
+
+bool TcpSocketEngine::nativeConnect(const HostAddress& address, const uint16_t port) {
+
+    // setup connection parameters from address/port
+    sockaddr_in  sockAddrIPv4;
+    sockaddr_in6 sockAddrIPv6;
+    sockaddr*    sockAddrPtr  = 0;
+    BT_SOCKLEN_T sockAddrSize = 0;
+
+    // IPv6
+    if ( address.GetProtocol() == HostAddress::IPv6Protocol ) {
+
+        memset(&sockAddrIPv6, 0, sizeof(sockAddrIPv6));
+        sockAddrIPv6.sin6_family = AF_INET6;
+        sockAddrIPv6.sin6_port   = htons(port);
+
+        IPv6Address ip6 = address.GetIPv6Address();
+        memcpy(&sockAddrIPv6.sin6_addr.s6_addr, &ip6, sizeof(ip6));
+
+        sockAddrSize = sizeof(sockAddrIPv6);
+        sockAddrPtr  = (sockaddr*)&sockAddrIPv6;
+    }
+
+    // IPv4
+    else if ( address.GetProtocol() == HostAddress::IPv4Protocol ) {
+
+        memset(&sockAddrIPv4, 0, sizeof(sockAddrIPv4));
+        sockAddrIPv4.sin_family      = AF_INET;
+        sockAddrIPv4.sin_port        = htons(port);
+        sockAddrIPv4.sin_addr.s_addr = htonl(address.GetIPv4Address());
+
+        sockAddrSize = sizeof(sockAddrIPv4);
+        sockAddrPtr  = (sockaddr*)&sockAddrIPv4;
+    }
+
+    // unknown (should be unreachable)
+    else BT_ASSERT_X(false, "TcpSocketEngine::nativeConnect() : unknown network protocol");
+
+    // attempt conenction
+    const int connectResult = WSAConnect(m_socketDescriptor, sockAddrPtr, sockAddrSize, 0, 0, 0, 0);
+
+    // if failed, handle error
+    if ( connectResult == SOCKET_ERROR ) {
+
+        // ensure state is set before checking error code
+        m_socketState = TcpSocket::UnconnectedState;
+
+        // set error type/message depending on errorCode
+        const int errorCode = WSAGetLastError();
+        switch ( errorCode ) {
+            case WSANOTINITIALISED:
+                m_socketError = TcpSocket::UnknownSocketError;
+                m_errorString = "Windows socket functionality not properly initialized";
+                break;
+            case WSAEISCONN:
+                m_socketState = TcpSocket::ConnectedState; // socket already connected
+                break;
+            case WSAECONNREFUSED:
+            case WSAEINVAL:
+                m_socketError = TcpSocket::ConnectionRefusedError;
+                m_errorString = "connection refused";
+                break;
+            case WSAETIMEDOUT:
+                m_socketError = TcpSocket::NetworkError;
+                m_errorString = "connection timed out";
+                break;
+            case WSAEHOSTUNREACH:
+                m_socketError = TcpSocket::NetworkError;
+                m_errorString = "host unreachable";
+                break;
+            case WSAENETUNREACH:
+                m_socketError = TcpSocket::NetworkError;
+                m_errorString = "network unreachable";
+                break;
+            case WSAEADDRINUSE:
+                m_socketError = TcpSocket::SocketResourceError;
+                m_errorString = "address already in use";
+                break;
+            case WSAEACCES:
+                m_socketError = TcpSocket::SocketAccessError;
+                m_errorString = "permission denied";
+                break;
+            default:
+                break;
+        }
+
+        // double check that we're not in 'connected' state; if so, return failure
+        if ( m_socketState != TcpSocket::ConnectedState )
+            return false;
+    }
+
+    // otherwise, we should be good
+    // update state & return success
+    m_socketState = TcpSocket::ConnectedState;
+    return true;
+}
+
+bool TcpSocketEngine::nativeCreateSocket(HostAddress::NetworkProtocol protocol) {
+
+    // get protocol value for requested protocol type
+    const int protocolNum = ( (protocol == HostAddress::IPv6Protocol) ? AF_INET6 : AF_INET );
+
+    // attempt to create socket
+    SOCKET socketFd = WSASocket(protocolNum, SOCK_STREAM, IPPROTO_TCP, 0, 0, WSA_FLAG_OVERLAPPED);
+
+    // if we fetched an invalid socket descriptor
+    if ( socketFd == INVALID_SOCKET ) {
+
+        // set error type/message depending on error code
+        const int errorCode = WSAGetLastError();
+        switch ( errorCode ) {
+            case WSANOTINITIALISED:
+                m_socketError = TcpSocket::UnknownSocketError;
+                m_errorString = "Windows socket functionality not properly initialized";
+                break;
+            case WSAEAFNOSUPPORT:
+            case WSAESOCKTNOSUPPORT:
+            case WSAEPROTOTYPE:
+            case WSAEINVAL:
+                m_socketError = TcpSocket::UnsupportedSocketOperationError;
+                m_errorString = "protocol not supported";
+                break;
+            case WSAEMFILE:
+            case WSAENOBUFS:
+                m_socketError = TcpSocket::SocketResourceError;
+                m_errorString = "out of resources";
+                break;
+            default:
+                break;
+        }
+
+        // return failure
+        return false;
+    }
+
+    // otherwise, store our socket FD & return success
+    m_socketDescriptor = static_cast<int>(socketFd);
+    return true;
+}
+
+int64_t TcpSocketEngine::nativeNumBytesAvailable(void) const {
+
+    int64_t numBytes(0);
+    int64_t dummy(0);
+    DWORD bytesWritten(0);
+
+    const int ioctlResult = WSAIoctl( m_socketDescriptor, FIONREAD
+                                    , &dummy, sizeof(dummy)
+                                    , &numBytes, sizeof(numBytes)
+                                    , &bytesWritten, 0, 0
+                                    );
+    return ( ioctlResult == SOCKET_ERROR ? -1 : numBytes );
+}
+
+int64_t TcpSocketEngine::nativeRead(char* dest, size_t max) {
+
+    // skip if invalid socket
+    if ( !IsValid() )
+        return -1;
+
+    // set up our WSA output buffer
+    WSABUF buf;
+    buf.buf = dest;
+    buf.len = max;
+
+    // attempt to read bytes
+    DWORD flags = 0;
+    DWORD bytesRead = 0;
+    const int readResult = WSARecv(m_socketDescriptor, &buf, 1, &bytesRead, &flags, 0, 0);
+
+    // if error encountered
+    if ( readResult == SOCKET_ERROR ) {
+        const int errorCode = WSAGetLastError();
+        switch ( errorCode ) {
+            case WSAEWOULDBLOCK: // nothing read this time, but more coming later
+                return -2;
+            default:
+                return -1;        // on any other errors
+        }
+    }
+
+    //  check if nothing was read this time, but more is coming
+    if ( WSAGetLastError() == WSAEWOULDBLOCK )
+        return -2;
+
+    // otherwise return number of bytes read
+    return static_cast<int64_t>(bytesRead);
+}
+
+// negative value for msecs will block (forever) until
+int TcpSocketEngine::nativeSelect(int msecs, bool isRead) const {
+
+    fd_set fds;
+    FD_ZERO(&fds);
+    FD_SET(m_socketDescriptor, &fds);
+
+    timeval tv;
+    tv.tv_sec  = msecs / 1000;
+    tv.tv_usec = (msecs % 1000) * 1000;
+
+    // do 'select'
+    if ( isRead )
+        return select(0, &fds, 0, 0, (msecs < 0 ? 0 : &tv));
+    else
+        return select(0, 0, &fds, 0, (msecs < 0 ? 0 : &tv));
+}
+
+int64_t TcpSocketEngine::nativeWrite(const char* data, size_t length) {
+
+    // setup our WSA write buffer
+    WSABUF buf;
+    buf.buf = (char*)data;
+    buf.len = length;
+
+    // attempt to write bytes
+    DWORD flags = 0;
+    DWORD bytesWritten = 0;
+    const int writeResult = WSASend(m_socketDescriptor, &buf, 1, &bytesWritten, flags, 0, 0);
+
+    // error encountered
+    if ( writeResult == SOCKET_ERROR )  {
+
+        const int errorCode = WSAGetLastError();
+        switch ( errorCode ) {
+            case WSAEWOULDBLOCK:
+                return 0;
+            case WSAECONNRESET:
+            case WSAECONNABORTED:
+                m_socketError = TcpSocket::NetworkError;
+                m_errorString = "connection reset or aborted";
+                return -1;
+            default:
+                return -1;
+        }
+    }
+
+    // otherwise return number of bytes written
+    return static_cast<int64_t>(bytesWritten);
+}
diff --git a/src/api/internal/io/TcpSocket_p.cpp b/src/api/internal/io/TcpSocket_p.cpp

new file mode 100644 (file)

index 0000000..4ff53a8
--- /dev/null
+++ b/src/api/internal/io/TcpSocket_p.cpp
@@ -0,0 +1,432 @@
+// ***************************************************************************
+// TcpSocket_p.cpp (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 November 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides basic TCP I/O interface
+// ***************************************************************************
+
+#include "api/internal/io/ByteArray_p.h"
+#include "api/internal/io/TcpSocket_p.h"
+#include "api/internal/io/TcpSocketEngine_p.h"
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+#include <algorithm>
+#include <sstream>
+#include <vector>
+using namespace std;
+
+// ------------------------------------
+// static utility methods & constants
+// ------------------------------------
+
+namespace BamTools {
+namespace Internal {
+
+// constants
+static const size_t DEFAULT_BUFFER_SIZE = 0x4000;
+
+} // namespace Internal
+} // namespace BamTools
+
+// --------------------------
+// TcpSocket implementation
+// --------------------------
+
+TcpSocket::TcpSocket(void)
+    : m_mode(IBamIODevice::NotOpen)
+//    , m_localPort(0)
+    , m_remotePort(0)
+    , m_engine(0)
+    , m_cachedSocketDescriptor(-1)
+    , m_readBuffer(DEFAULT_BUFFER_SIZE)
+    , m_error(TcpSocket::UnknownSocketError)
+    , m_state(TcpSocket::UnconnectedState)
+{ }
+
+TcpSocket::~TcpSocket(void) {
+    if ( m_state == TcpSocket::ConnectedState )
+        DisconnectFromHost();
+}
+
+size_t TcpSocket::BufferBytesAvailable(void) const {
+    return m_readBuffer.Size();
+}
+
+bool TcpSocket::CanReadLine(void) const {
+    return m_readBuffer.CanReadLine();
+}
+
+void TcpSocket::ClearBuffer(void) {
+    m_readBuffer.Clear();
+}
+
+bool TcpSocket::ConnectImpl(const HostInfo& hostInfo,
+                            const std::string& port,
+                            IBamIODevice::OpenMode mode)
+{
+    // skip if we're already connected
+    if ( m_state == TcpSocket::ConnectedState ) {
+        m_error = TcpSocket::SocketResourceError; 
+        m_errorString = "socket already connected";
+        return false;
+    }
+
+    // reset socket state
+    m_hostName   = hostInfo.HostName();
+    m_mode       = mode;    
+    m_state      = TcpSocket::UnconnectedState;
+    m_error      = TcpSocket::UnknownSocketError;
+//    m_localPort  = 0;
+    m_remotePort = 0;
+//    m_localAddress.Clear();
+    m_remoteAddress.Clear();
+    m_readBuffer.Clear();
+
+    // fetch candidate addresses for requested host
+    vector<HostAddress> addresses = hostInfo.Addresses();
+    if ( addresses.empty() ) {
+        m_error = TcpSocket::HostNotFoundError;
+        m_errorString = "no IP addresses found for host";
+        return false;
+    }
+
+    // convert port string to integer
+    stringstream ss(port);
+    uint16_t portNumber(0);
+    ss >> portNumber;
+
+    // iterate through adddresses
+    vector<HostAddress>::const_iterator addrIter = addresses.begin();
+    vector<HostAddress>::const_iterator addrEnd  = addresses.end();
+    for ( ; addrIter != addrEnd; ++addrIter) {
+        const HostAddress& addr = (*addrIter);
+
+        // try to initialize socket engine with this address
+        if ( !InitializeSocketEngine(addr.GetProtocol()) ) {
+            // failure to initialize is OK here
+            // we'll just try the next available address
+            continue;
+        }
+
+        // attempt actual connection
+        if ( m_engine->Connect(addr, portNumber) ) {
+
+            // if connection successful, update our state & return true
+            m_mode = mode;
+//            m_localAddress  = m_engine->GetLocalAddress();
+//            m_localPort     = m_engine->GetLocalPort();
+            m_remoteAddress = m_engine->GetRemoteAddress();
+            m_remotePort    = m_engine->GetRemotePort();
+            m_cachedSocketDescriptor = m_engine->GetSocketDescriptor();
+            m_state = TcpSocket::ConnectedState;
+            return true;
+        }
+    }
+
+    // if we get here, no connection could be made
+    m_error = TcpSocket::HostNotFoundError;
+    m_errorString = "could not connect to any host addresses";
+    return false;
+}
+
+bool TcpSocket::ConnectToHost(const string& hostName,
+                              uint16_t port,
+                              IBamIODevice::OpenMode mode)
+{
+    stringstream ss("");
+    ss << port;
+    return ConnectToHost(hostName, ss.str(), mode);
+
+}
+
+bool TcpSocket::ConnectToHost(const string& hostName,
+                              const string& port,
+                              IBamIODevice::OpenMode mode)
+{
+    // create new address object with requested host name
+    HostAddress hostAddress;
+    hostAddress.SetAddress(hostName);
+
+    HostInfo info;
+    // if host name was IP address ("x.x.x.x" or IPv6 format)
+    // otherwise host name was 'plain-text' ("www.foo.bar")
+    // we need to look up IP address(es)
+    if ( hostAddress.HasIPAddress() ) 
+        info.SetAddresses( vector<HostAddress>(1, hostAddress) );
+    else
+        info = HostInfo::Lookup(hostName, port);
+
+    // attempt connection on requested port
+    return ConnectImpl(info, port, mode);
+}
+
+void TcpSocket::DisconnectFromHost(void) {
+
+    // close socket engine & delete
+    if ( m_state == TcpSocket::ConnectedState )
+        ResetSocketEngine();
+
+    // reset connection state
+//    m_localPort = 0;
+    m_remotePort = 0;
+//    m_localAddress.Clear();
+    m_remoteAddress.Clear();
+    m_hostName.clear();
+    m_cachedSocketDescriptor = -1;
+
+    // for future, make sure there's outgoing data that needs to be flushed
+    m_readBuffer.Clear();
+}
+
+TcpSocket::SocketError TcpSocket::GetError(void) const {
+    return m_error;
+}
+
+std::string TcpSocket::GetErrorString(void) const {
+    return m_errorString;
+}
+
+std::string TcpSocket::GetHostName(void) const {
+    return m_hostName;
+}
+
+//HostAddress TcpSocket::GetLocalAddress(void) const {
+//    return m_localAddress;
+//}
+
+//uint16_t TcpSocket::GetLocalPort(void) const {
+//    return m_localPort;
+//}
+
+HostAddress TcpSocket::GetRemoteAddress(void) const {
+    return m_remoteAddress;
+}
+
+uint16_t TcpSocket::GetRemotePort(void) const {
+    return m_remotePort;
+}
+
+TcpSocket::SocketState TcpSocket::GetState(void) const {
+    return m_state;
+}
+
+bool TcpSocket::InitializeSocketEngine(HostAddress::NetworkProtocol protocol) {
+    ResetSocketEngine();
+    m_engine = new TcpSocketEngine;
+    return m_engine->Initialize(protocol);
+}
+
+bool TcpSocket::IsConnected(void) const {
+    if ( m_engine == 0 )
+        return false;
+    return ( m_engine->IsValid() && (m_state == TcpSocket::ConnectedState) );
+}
+
+// may be read in a look until desired data amount has been read
+// returns: number of bytes read, or -1 if error
+int64_t TcpSocket::Read(char* data, const unsigned int numBytes) {
+
+    // if we have data in buffer, just return it
+    if ( !m_readBuffer.IsEmpty() ) {
+        const size_t bytesRead = m_readBuffer.Read(data, numBytes);
+        return static_cast<int64_t>(bytesRead);
+    }
+
+    // otherwise, we'll need to fetch data from socket
+    // first make sure we have a valid socket engine
+    if ( m_engine == 0 ) {
+        // TODO: set error string/state?
+        return -1;
+    }
+
+    // fetch data from socket, return 0 for success, -1 for failure
+    // since this should be called in a loop, we'll pull the actual bytes on next iteration
+    return ( ReadFromSocket() ? 0 : -1 );
+}
+
+bool TcpSocket::ReadFromSocket(void) {
+
+    // check for any socket engine errors
+    if ( !m_engine->IsValid() ) {
+        m_errorString = "TcpSocket::ReadFromSocket - socket disconnected";
+        ResetSocketEngine();
+        return false;
+    }
+
+    // wait for ready read
+    bool timedOut;
+    bool isReadyRead = m_engine->WaitForRead(5000, &timedOut);
+
+    // if not ready
+    if ( !isReadyRead ) {
+
+        // if we simply timed out
+        if ( timedOut ) {
+            m_errorString = "TcpSocket::ReadFromSocket - timed out waiting for ready read";
+            // get error from engine ?
+            return false;
+        }
+
+        // otherwise, there was an error
+        else {
+            m_errorString = "TcpSocket::ReadFromSocket - encountered error while waiting for ready read";
+            // get error from engine ?
+            return false;
+        }
+    }
+
+    // #########################################################################
+    // clean this up - smells funky, but it's a key step so it has to be right
+    // #########################################################################
+
+    // get number of bytes available from socket
+    // (if 0, still try to read some data so we don't trigger any OS event behavior
+    //  that respond to repeated access to a remote closed socket)
+    int64_t bytesToRead = m_engine->NumBytesAvailable();
+    if ( bytesToRead < 0 ) {
+        m_errorString = "TcpSocket::ReadFromSocket - encountered error while determining numBytesAvailable";
+        // get error from engine ?
+        return false;
+    }
+    else if ( bytesToRead == 0 )
+        bytesToRead = 4096;
+
+    // make space in buffer & read from socket
+    char* buffer = m_readBuffer.Reserve(bytesToRead);
+    int64_t numBytesRead = m_engine->Read(buffer, bytesToRead);
+
+    // if error while reading
+    if ( numBytesRead == -1 ) {
+        m_errorString = "TcpSocket::ReadFromSocket - encountered error while reading bytes";
+        // get error from engine ?
+        return false;
+    }
+
+    // handle special case (no data, but not error)
+    if ( numBytesRead == -2 ) 
+        m_readBuffer.Chop(bytesToRead);
+
+    // return success
+    return true;
+}
+
+string TcpSocket::ReadLine(int64_t max) {
+
+    // prep result byte buffer
+    ByteArray result;
+
+    size_t bufferMax = ((max > static_cast<int64_t>(string::npos)) ? string::npos : static_cast<size_t>(max));
+    result.Resize(bufferMax);
+
+    // read data
+    int64_t readBytes(0);
+    if ( result.Size() == 0 ) {
+
+        if ( bufferMax == 0 )
+            bufferMax = string::npos;
+
+        result.Resize(1);
+
+        int64_t readResult;
+        do {
+            result.Resize( static_cast<size_t>(std::min(bufferMax, result.Size() + DEFAULT_BUFFER_SIZE)) );
+            readResult = ReadLine(result.Data()+readBytes, result.Size()-readBytes);
+            if ( readResult > 0 || readBytes == 0 )
+                readBytes += readResult;
+        } while ( readResult == DEFAULT_BUFFER_SIZE && result[static_cast<size_t>(readBytes-1)] != '\n' );
+
+    } else
+        readBytes = ReadLine(result.Data(), result.Size());
+
+    // clean up byte buffer
+    if ( readBytes <= 0 )
+        result.Clear();
+    else
+        result.Resize(static_cast<size_t>(readBytes));
+
+    // return byte buffer as string
+    return string( result.ConstData(), result.Size() );
+}
+
+int64_t TcpSocket::ReadLine(char* dest, size_t max) {
+    
+    // wait for buffer to contain line contents
+    if ( !WaitForReadLine() ) {
+        m_errorString = "TcpSocket::ReadLine - error waiting for read line";
+        return -1;
+    }
+    
+    // leave room for null term
+    if ( max < 2 )
+        return -1;
+    --max;
+
+    // read from buffer, handle newlines
+    int64_t readSoFar = m_readBuffer.ReadLine(dest, max);
+    if ( readSoFar && dest[readSoFar-1] == '\n' ) {
+
+        // adjust for windows-style '\r\n'
+        if ( readSoFar > 1 && dest[readSoFar-2] == '\r') {
+            --readSoFar;
+            dest[readSoFar-1] = '\n';
+        }
+    }
+
+    // null terminate & return number of bytes read
+    dest[readSoFar] = '\0';
+    return readSoFar;
+}
+
+void TcpSocket::ResetSocketEngine(void) {
+
+    // shut down socket engine
+    if ( m_engine ) {
+        m_engine->Close();
+        delete m_engine;
+        m_engine = 0;
+    }
+
+    // reset our state & cached socket handle
+    m_state = TcpSocket::UnconnectedState;
+    m_cachedSocketDescriptor = -1;
+}
+
+bool TcpSocket::WaitForReadLine(void) {
+
+    // wait until we can read a line (will return immediately if already capable)
+    while ( !CanReadLine() ) {
+        if ( !ReadFromSocket() ) 
+            return false;
+    }
+
+    // if we get here, success  
+    return true;
+}
+
+int64_t TcpSocket::Write(const char* data, const unsigned int numBytes) {
+
+    // single-shot attempt at write (not buffered, just try to shove the data through socket)
+    // this method purely exists to send 'small' HTTP requests/FTP commands from client to server
+
+    int64_t bytesWritten(0);
+
+    // wait for our socket to be write-able
+    bool timedOut;
+    bool isReadyWrite = m_engine->WaitForWrite(3000, &timedOut);
+    if ( isReadyWrite )
+        bytesWritten = m_engine->Write(data, numBytes);
+    else {
+        // timeout is OK (with current setup), we'll just return 0 & try again
+        // but we need to report if engine encountered some other error
+        if ( !timedOut ) {
+            // TODO: set error string
+            bytesWritten = -1;
+        }
+    }
+
+    // return actual number of bytes written to socket
+    return bytesWritten;
+}
diff --git a/src/api/internal/io/TcpSocket_p.h b/src/api/internal/io/TcpSocket_p.h

new file mode 100644 (file)

index 0000000..3c5f2fc
--- /dev/null
+++ b/src/api/internal/io/TcpSocket_p.h
@@ -0,0 +1,124 @@
+// ***************************************************************************
+// TcpSocket_p.h (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 November 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides basic TCP I/O interface
+// ***************************************************************************
+
+#ifndef TCPSOCKET_P_H
+#define TCPSOCKET_P_H
+
+//  -------------
+//  W A R N I N G
+//  -------------
+//
+// This file is not part of the BamTools API.  It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include "api/IBamIODevice.h"
+#include "api/internal/io/HostInfo_p.h"
+#include "api/internal/io/RollingBuffer_p.h"
+#include <string>
+
+namespace BamTools {
+namespace Internal {
+
+class TcpSocketEngine;
+
+class TcpSocket {
+
+    // enums
+    public:
+        enum SocketError { UnknownSocketError     = -1
+                         , ConnectionRefusedError = 0
+                         , RemoteHostClosedError
+                         , HostNotFoundError
+                         , SocketAccessError
+                         , SocketResourceError
+                         , SocketTimeoutError
+                         , NetworkError
+                         , UnsupportedSocketOperationError
+                         };
+
+        enum SocketState { UnconnectedState = 0
+                         , ConnectedState
+                         };
+
+    // ctor & dtor
+    public:
+        TcpSocket(void);
+        ~TcpSocket(void);
+
+    // TcpSocket interface
+    public:
+
+        // connection methods
+        bool ConnectToHost(const std::string& hostName,
+                           const uint16_t port,        // Connect("host", 80)
+                           IBamIODevice::OpenMode mode = IBamIODevice::ReadOnly);
+        bool ConnectToHost(const std::string& hostName,
+                           const std::string& port,    // Connect("host", "80")
+                           IBamIODevice::OpenMode mode = IBamIODevice::ReadOnly);
+        void DisconnectFromHost(void);
+        bool IsConnected(void) const;
+
+        // I/O methods
+        size_t BufferBytesAvailable(void) const;
+        bool CanReadLine(void) const;
+        void ClearBuffer(void); // force buffer to clear (not a 'flush', just a 'discard')
+        int64_t Read(char* data, const unsigned int numBytes);
+        std::string ReadLine(int64_t max = 0);
+        int64_t ReadLine(char* dest, size_t max);
+        bool WaitForReadLine(void);
+        int64_t Write(const char* data, const unsigned int numBytes);
+
+        // connection values
+        std::string GetHostName(void) const;
+//        HostAddress GetLocalAddress(void) const;
+//        uint16_t    GetLocalPort(void) const;
+        HostAddress GetRemoteAddress(void) const;
+        uint16_t    GetRemotePort(void) const;
+
+        // connection status
+        TcpSocket::SocketError GetError(void) const;
+        TcpSocket::SocketState GetState(void) const;
+        std::string GetErrorString(void) const;
+
+    // internal methods
+    private:
+        bool ConnectImpl(const HostInfo& hostInfo,
+                         const std::string& port,
+                         IBamIODevice::OpenMode mode);
+        bool InitializeSocketEngine(HostAddress::NetworkProtocol protocol);
+        bool ReadFromSocket(void);
+        void ResetSocketEngine(void);
+
+    // data members
+    private:
+        IBamIODevice::OpenMode m_mode;
+
+        std::string m_hostName;
+//        uint16_t    m_localPort;
+        uint16_t    m_remotePort;
+//        HostAddress m_localAddress;
+        HostAddress m_remoteAddress;
+
+        TcpSocketEngine* m_engine;
+        int m_cachedSocketDescriptor;
+
+        RollingBuffer m_readBuffer;
+
+        TcpSocket::SocketError m_error;
+        TcpSocket::SocketState m_state;
+        std::string m_errorString;
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // TCPSOCKET_P_H
diff --git a/src/api/internal/sam/CMakeLists.txt b/src/api/internal/sam/CMakeLists.txt

new file mode 100644 (file)

index 0000000..4b2bce2
--- /dev/null
+++ b/src/api/internal/sam/CMakeLists.txt
@@ -0,0 +1,17 @@
+# ==========================
+# BamTools CMakeLists.txt
+# (c) 2011 Derek Barnett
+#
+# src/api/internal/sam
+# ==========================
+
+set ( InternalSamDir "${InternalDir}/sam" )
+
+set ( InternalSamSources
+        ${InternalSamDir}/SamFormatParser_p.cpp
+        ${InternalSamDir}/SamFormatPrinter_p.cpp
+        ${InternalSamDir}/SamHeaderValidator_p.cpp
+
+        PARENT_SCOPE # <-- leave this last
+)
+
diff --git a/src/api/internal/sam/SamFormatParser_p.cpp b/src/api/internal/sam/SamFormatParser_p.cpp

new file mode 100644 (file)

index 0000000..74c1fed
--- /dev/null
+++ b/src/api/internal/sam/SamFormatParser_p.cpp
@@ -0,0 +1,222 @@
+// ***************************************************************************
+// SamFormatParser.cpp (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 25 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides functionality for parsing SAM header text into SamHeader object
+// ***************************************************************************
+
+#include "api/SamConstants.h"
+#include "api/SamHeader.h"
+#include "api/internal/sam/SamFormatParser_p.h"
+#include "api/internal/utils/BamException_p.h"
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+#include <iostream>
+#include <sstream>
+#include <vector>
+using namespace std;
+
+SamFormatParser::SamFormatParser(SamHeader& header)
+    : m_header(header)
+{ }
+
+SamFormatParser::~SamFormatParser(void) { }
+
+void SamFormatParser::Parse(const string& headerText) {
+
+    // clear header's prior contents
+    m_header.Clear();
+
+    // empty header is OK, but skip processing
+    if ( headerText.empty() )
+        return;
+
+    // other wise parse SAM lines
+    istringstream headerStream(headerText);
+    string headerLine("");
+    while ( getline(headerStream, headerLine) )
+         ParseSamLine(headerLine);
+}
+
+void SamFormatParser::ParseSamLine(const string& line) {
+
+    // skip if line is not long enough to contain true values
+    if ( line.length() < 5 ) return;
+
+    // determine token at beginning of line
+    const string firstToken = line.substr(0,3);
+    string restOfLine = line.substr(4);
+    if      ( firstToken == Constants::SAM_HD_BEGIN_TOKEN) ParseHDLine(restOfLine);
+    else if ( firstToken == Constants::SAM_SQ_BEGIN_TOKEN) ParseSQLine(restOfLine);
+    else if ( firstToken == Constants::SAM_RG_BEGIN_TOKEN) ParseRGLine(restOfLine);
+    else if ( firstToken == Constants::SAM_PG_BEGIN_TOKEN) ParsePGLine(restOfLine);
+    else if ( firstToken == Constants::SAM_CO_BEGIN_TOKEN) ParseCOLine(restOfLine);
+    else {
+        const string message = string("unknown token: ") + firstToken;
+        throw BamException("SamFormatParser::ParseSamLine", message);
+    }
+}
+
+void SamFormatParser::ParseHDLine(const string& line) {
+
+    // split HD lines into tokens
+    vector<string> tokens = Split(line, Constants::SAM_TAB);
+
+    // iterate over tokens
+    vector<string>::const_iterator tokenIter = tokens.begin();
+    vector<string>::const_iterator tokenEnd  = tokens.end();
+    for ( ; tokenIter != tokenEnd; ++tokenIter ) {
+
+        // get tag/value
+        const string tokenTag = (*tokenIter).substr(0,2);
+        const string tokenValue = (*tokenIter).substr(3);
+
+        // set header contents
+        if      ( tokenTag == Constants::SAM_HD_VERSION_TAG    ) m_header.Version    = tokenValue;
+        else if ( tokenTag == Constants::SAM_HD_SORTORDER_TAG  ) m_header.SortOrder  = tokenValue;
+        else if ( tokenTag == Constants::SAM_HD_GROUPORDER_TAG ) m_header.GroupOrder = tokenValue;
+        else {
+            const string message = string("unknown HD tag: ") + tokenTag;
+            throw BamException("SamFormatParser::ParseHDLine", message);
+        }
+    }
+
+    // check for required tags
+    if ( !m_header.HasVersion() )
+        throw BamException("SamFormatParser::ParseHDLine", "@HD line is missing VN tag");
+}
+
+void SamFormatParser::ParseSQLine(const string& line) {
+
+    SamSequence seq;
+
+    // split SQ line into tokens
+    vector<string> tokens = Split(line, Constants::SAM_TAB);
+
+    // iterate over tokens
+    vector<string>::const_iterator tokenIter = tokens.begin();
+    vector<string>::const_iterator tokenEnd  = tokens.end();
+    for ( ; tokenIter != tokenEnd; ++tokenIter ) {
+
+        // get tag/value
+        const string tokenTag = (*tokenIter).substr(0,2);
+        const string tokenValue = (*tokenIter).substr(3);
+
+        // set sequence contents
+        if      ( tokenTag == Constants::SAM_SQ_NAME_TAG       ) seq.Name = tokenValue;
+        else if ( tokenTag == Constants::SAM_SQ_LENGTH_TAG     ) seq.Length = tokenValue;
+        else if ( tokenTag == Constants::SAM_SQ_ASSEMBLYID_TAG ) seq.AssemblyID = tokenValue;
+        else if ( tokenTag == Constants::SAM_SQ_CHECKSUM_TAG   ) seq.Checksum = tokenValue;
+        else if ( tokenTag == Constants::SAM_SQ_SPECIES_TAG    ) seq.Species = tokenValue;
+        else if ( tokenTag == Constants::SAM_SQ_URI_TAG        ) seq.URI = tokenValue;
+        else {
+            const string message = string("unknown SQ tag: ") + tokenTag;
+            throw BamException("SamFormatParser::ParseSQLine", message);
+        }
+    }
+
+    // check for required tags
+    if ( !seq.HasName() )
+        throw BamException("SamFormatParser::ParseSQLine", "@SQ line is missing SN tag");
+    if ( !seq.HasLength() )
+        throw BamException("SamFormatParser::ParseSQLine", "@SQ line is missing LN tag");
+
+    // store SAM sequence entry
+    m_header.Sequences.Add(seq);
+}
+
+void SamFormatParser::ParseRGLine(const string& line) {
+
+    SamReadGroup rg;
+
+    // split string into tokens
+    vector<string> tokens = Split(line, Constants::SAM_TAB);
+
+    // iterate over tokens
+    vector<string>::const_iterator tokenIter = tokens.begin();
+    vector<string>::const_iterator tokenEnd  = tokens.end();
+    for ( ; tokenIter != tokenEnd; ++tokenIter ) {
+
+        // get token tag/value
+        const string tokenTag = (*tokenIter).substr(0,2);
+        const string tokenValue = (*tokenIter).substr(3);
+
+        // set read group contents
+        if      ( tokenTag == Constants::SAM_RG_ID_TAG                  ) rg.ID = tokenValue;
+        else if ( tokenTag == Constants::SAM_RG_DESCRIPTION_TAG         ) rg.Description = tokenValue;
+        else if ( tokenTag == Constants::SAM_RG_FLOWORDER_TAG           ) rg.FlowOrder = tokenValue;
+        else if ( tokenTag == Constants::SAM_RG_KEYSEQUENCE_TAG         ) rg.KeySequence = tokenValue;
+        else if ( tokenTag == Constants::SAM_RG_LIBRARY_TAG             ) rg.Library = tokenValue;
+        else if ( tokenTag == Constants::SAM_RG_PLATFORMUNIT_TAG        ) rg.PlatformUnit = tokenValue;
+        else if ( tokenTag == Constants::SAM_RG_PREDICTEDINSERTSIZE_TAG ) rg.PredictedInsertSize = tokenValue;
+        else if ( tokenTag == Constants::SAM_RG_PRODUCTIONDATE_TAG      ) rg.ProductionDate = tokenValue;
+        else if ( tokenTag == Constants::SAM_RG_PROGRAM_TAG             ) rg.Program = tokenValue;
+        else if ( tokenTag == Constants::SAM_RG_SAMPLE_TAG              ) rg.Sample = tokenValue;
+        else if ( tokenTag == Constants::SAM_RG_SEQCENTER_TAG           ) rg.SequencingCenter = tokenValue;
+        else if ( tokenTag == Constants::SAM_RG_SEQTECHNOLOGY_TAG       ) rg.SequencingTechnology = tokenValue;
+        else {
+            const string message = string("unknown RG tag: ") + tokenTag;
+            throw BamException("SamFormatParser::ParseRGLine", message);
+        }
+    }
+
+    // check for required tags
+    if ( !rg.HasID() )
+        throw BamException("SamFormatParser::ParseRGLine", "@RG line is missing ID tag");
+
+    // store SAM read group entry
+    m_header.ReadGroups.Add(rg);
+}
+
+void SamFormatParser::ParsePGLine(const string& line) {
+
+    SamProgram pg;
+
+    // split string into tokens
+    vector<string> tokens = Split(line, Constants::SAM_TAB);
+
+    // iterate over tokens
+    vector<string>::const_iterator tokenIter = tokens.begin();
+    vector<string>::const_iterator tokenEnd  = tokens.end();
+    for ( ; tokenIter != tokenEnd; ++tokenIter ) {
+
+        // get token tag/value
+        const string tokenTag = (*tokenIter).substr(0,2);
+        const string tokenValue = (*tokenIter).substr(3);
+
+        // set program record contents
+        if      ( tokenTag == Constants::SAM_PG_ID_TAG              ) pg.ID = tokenValue;
+        else if ( tokenTag == Constants::SAM_PG_NAME_TAG            ) pg.Name = tokenValue;
+        else if ( tokenTag == Constants::SAM_PG_COMMANDLINE_TAG     ) pg.CommandLine = tokenValue;
+        else if ( tokenTag == Constants::SAM_PG_PREVIOUSPROGRAM_TAG ) pg.PreviousProgramID = tokenValue;
+        else if ( tokenTag == Constants::SAM_PG_VERSION_TAG         ) pg.Version = tokenValue;
+        else {
+            const string message = string("unknown PG tag: ") + tokenTag;
+            throw BamException("SamFormatParser::ParsePGLine", message);
+        }
+    }
+
+    // check for required tags
+    if ( !pg.HasID() )
+        throw BamException("SamFormatParser::ParsePGLine", "@PG line is missing ID tag");
+
+    // store SAM program entry
+    m_header.Programs.Add(pg);
+}
+
+void SamFormatParser::ParseCOLine(const string& line) {
+    // simply add line to comments list
+    m_header.Comments.push_back(line);
+}
+
+const vector<string> SamFormatParser::Split(const string& line, const char delim) {
+    vector<string> tokens;
+    stringstream lineStream(line);
+    string token;
+    while ( getline(lineStream, token, delim) )
+        tokens.push_back(token);
+    return tokens;
+}
diff --git a/src/api/internal/sam/SamFormatParser_p.h b/src/api/internal/sam/SamFormatParser_p.h

new file mode 100644 (file)

index 0000000..cf6d54c
--- /dev/null
+++ b/src/api/internal/sam/SamFormatParser_p.h
@@ -0,0 +1,61 @@
+// ***************************************************************************
+// SamFormatParser.h (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 23 December 2010 (DB)
+// ---------------------------------------------------------------------------
+// Provides functionality for parsing SAM header text into SamHeader object
+// ***************************************************************************
+
+#ifndef SAM_FORMAT_PARSER_H
+#define SAM_FORMAT_PARSER_H
+
+//  -------------
+//  W A R N I N G
+//  -------------
+//
+// This file is not part of the BamTools API.  It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include <string>
+#include <vector>
+
+namespace BamTools {
+
+class SamHeader;
+
+namespace Internal {
+
+class SamFormatParser {
+
+    // ctor & dtor
+    public:
+        SamFormatParser(BamTools::SamHeader& header);
+        ~SamFormatParser(void);
+
+    // parse text & populate header data
+    public:
+        void Parse(const std::string& headerText);
+
+    // internal methods
+    private:
+        void ParseSamLine(const std::string& line);
+        void ParseHDLine(const std::string& line);
+        void ParseSQLine(const std::string& line);
+        void ParseRGLine(const std::string& line);
+        void ParsePGLine(const std::string& line);
+        void ParseCOLine(const std::string& line);
+        const std::vector<std::string> Split(const std::string& line, const char delim);
+
+    // data members
+    private:
+        SamHeader& m_header;
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // SAM_FORMAT_PARSER_H
diff --git a/src/api/internal/sam/SamFormatPrinter_p.cpp b/src/api/internal/sam/SamFormatPrinter_p.cpp

new file mode 100644 (file)

index 0000000..5a51a2f
--- /dev/null
+++ b/src/api/internal/sam/SamFormatPrinter_p.cpp
@@ -0,0 +1,219 @@
+// ***************************************************************************
+// SamFormatPrinter.cpp (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 25 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides functionality for printing formatted SAM header to string
+// ***************************************************************************
+
+#include "api/SamConstants.h"
+#include "api/SamHeader.h"
+#include "api/internal/sam/SamFormatPrinter_p.h"
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+#include <iostream>
+#include <sstream>
+#include <vector>
+using namespace std;
+
+// ------------------------
+// static utility methods
+// ------------------------
+
+static inline
+const string FormatTag(const string& tag, const string& value) {
+    return string(Constants::SAM_TAB + tag + Constants::SAM_COLON + value);
+}
+
+// ---------------------------------
+// SamFormatPrinter implementation
+// ---------------------------------
+
+SamFormatPrinter::SamFormatPrinter(const SamHeader& header)
+    : m_header(header)
+{ }
+
+SamFormatPrinter::~SamFormatPrinter(void) { }
+
+const string SamFormatPrinter::ToString(void) const {
+
+    // clear out stream
+    stringstream out("");
+
+    // generate formatted header text
+    PrintHD(out);
+    PrintSQ(out);
+    PrintRG(out);
+    PrintPG(out);
+    PrintCO(out);
+
+    // return result
+    return out.str();
+}
+
+void SamFormatPrinter::PrintHD(std::stringstream& out) const {
+
+    // if header has @HD data
+    if ( m_header.HasVersion() ) {
+
+        // @HD VN:<Version>
+        out << Constants::SAM_HD_BEGIN_TOKEN
+            << FormatTag(Constants::SAM_HD_VERSION_TAG, m_header.Version);
+
+        // SO:<SortOrder>
+        if ( m_header.HasSortOrder() )
+            out << FormatTag(Constants::SAM_HD_SORTORDER_TAG, m_header.SortOrder);
+
+        // GO:<GroupOrder>
+        if ( m_header.HasGroupOrder() )
+            out << FormatTag(Constants::SAM_HD_GROUPORDER_TAG, m_header.GroupOrder);
+
+        // newline
+        out << endl;
+    }
+}
+
+void SamFormatPrinter::PrintSQ(std::stringstream& out) const {
+
+    // iterate over sequence entries
+    SamSequenceConstIterator seqIter = m_header.Sequences.ConstBegin();
+    SamSequenceConstIterator seqEnd  = m_header.Sequences.ConstEnd();
+    for ( ; seqIter != seqEnd; ++seqIter ) {
+        const SamSequence& seq = (*seqIter);
+
+        // @SQ SN:<Name> LN:<Length>
+        out << Constants::SAM_SQ_BEGIN_TOKEN
+            << FormatTag(Constants::SAM_SQ_NAME_TAG, seq.Name)
+            << FormatTag(Constants::SAM_SQ_LENGTH_TAG, seq.Length);
+
+        // AS:<AssemblyID>
+        if ( seq.HasAssemblyID() )
+            out << FormatTag(Constants::SAM_SQ_ASSEMBLYID_TAG, seq.AssemblyID);
+
+        // M5:<Checksum>
+        if ( seq.HasChecksum() )
+            out << FormatTag(Constants::SAM_SQ_CHECKSUM_TAG, seq.Checksum);
+
+        // SP:<Species>
+        if ( seq.HasSpecies() )
+            out << FormatTag(Constants::SAM_SQ_SPECIES_TAG, seq.Species);
+
+        // UR:<URI>
+        if ( seq.HasURI() )
+            out << FormatTag(Constants::SAM_SQ_URI_TAG, seq.URI);
+
+        // newline
+        out << endl;
+    }
+}
+
+void SamFormatPrinter::PrintRG(std::stringstream& out) const {
+
+    // iterate over read group entries
+    SamReadGroupConstIterator rgIter = m_header.ReadGroups.ConstBegin();
+    SamReadGroupConstIterator rgEnd  = m_header.ReadGroups.ConstEnd();
+    for ( ; rgIter != rgEnd; ++rgIter ) {
+        const SamReadGroup& rg = (*rgIter);
+
+        // @RG ID:<ID>
+        out << Constants::SAM_RG_BEGIN_TOKEN
+            << FormatTag(Constants::SAM_RG_ID_TAG, rg.ID);
+
+        // CN:<SequencingCenter>
+        if ( rg.HasSequencingCenter() )
+            out << FormatTag(Constants::SAM_RG_SEQCENTER_TAG, rg.SequencingCenter);
+
+        // DS:<Description>
+        if ( rg.HasDescription() )
+            out << FormatTag(Constants::SAM_RG_DESCRIPTION_TAG, rg.Description);
+
+        // DT:<ProductionDate>
+        if ( rg.HasProductionDate() )
+            out << FormatTag(Constants::SAM_RG_PRODUCTIONDATE_TAG, rg.ProductionDate);
+
+        // FO:<FlowOrder>
+        if ( rg.HasFlowOrder() )
+            out << FormatTag(Constants::SAM_RG_FLOWORDER_TAG, rg.FlowOrder);
+
+        // KS:<KeySequence>
+        if ( rg.HasKeySequence() )
+            out << FormatTag(Constants::SAM_RG_KEYSEQUENCE_TAG, rg.KeySequence);
+
+        // LB:<Library>
+        if ( rg.HasLibrary() )
+            out << FormatTag(Constants::SAM_RG_LIBRARY_TAG, rg.Library);
+
+        // PG:<Program>
+        if ( rg.HasProgram() )
+            out << FormatTag(Constants::SAM_RG_PROGRAM_TAG, rg.Program);
+
+        // PI:<PredictedInsertSize>
+        if ( rg.HasPredictedInsertSize() )
+            out << FormatTag(Constants::SAM_RG_PREDICTEDINSERTSIZE_TAG, rg.PredictedInsertSize);
+
+        // PL:<SequencingTechnology>
+        if ( rg.HasSequencingTechnology() )
+            out << FormatTag(Constants::SAM_RG_SEQTECHNOLOGY_TAG, rg.SequencingTechnology);
+
+        // PU:<PlatformUnit>
+        if ( rg.HasPlatformUnit() )
+            out << FormatTag(Constants::SAM_RG_PLATFORMUNIT_TAG, rg.PlatformUnit);
+
+        // SM:<Sample>
+        if ( rg.HasSample() )
+            out << FormatTag(Constants::SAM_RG_SAMPLE_TAG, rg.Sample);
+
+        // newline
+        out << endl;
+    }
+}
+
+void SamFormatPrinter::PrintPG(std::stringstream& out) const {
+
+    // iterate over program record entries
+    SamProgramConstIterator pgIter = m_header.Programs.ConstBegin();
+    SamProgramConstIterator pgEnd  = m_header.Programs.ConstEnd();
+    for ( ; pgIter != pgEnd; ++pgIter ) {
+        const SamProgram& pg = (*pgIter);
+
+        // @PG ID:<ID>
+        out << Constants::SAM_PG_BEGIN_TOKEN
+            << FormatTag(Constants::SAM_PG_ID_TAG, pg.ID);
+
+        // PN:<Name>
+        if ( pg.HasName() )
+            out << FormatTag(Constants::SAM_PG_NAME_TAG, pg.Name);
+
+        // CL:<CommandLine>
+        if ( pg.HasCommandLine() )
+            out << FormatTag(Constants::SAM_PG_COMMANDLINE_TAG, pg.CommandLine);
+
+        // PP:<PreviousProgramID>
+        if ( pg.HasPreviousProgramID() )
+            out << FormatTag(Constants::SAM_PG_PREVIOUSPROGRAM_TAG, pg.PreviousProgramID);
+
+        // VN:<Version>
+        if ( pg.HasVersion() )
+            out << FormatTag(Constants::SAM_PG_VERSION_TAG, pg.Version);
+
+        // newline
+        out << endl;
+    }
+}
+
+void SamFormatPrinter::PrintCO(std::stringstream& out) const {
+
+    // iterate over comments
+    vector<string>::const_iterator commentIter = m_header.Comments.begin();
+    vector<string>::const_iterator commentEnd  = m_header.Comments.end();
+    for ( ; commentIter != commentEnd; ++commentIter ) {
+
+        // @CO <Comment>
+        out << Constants::SAM_CO_BEGIN_TOKEN
+            << Constants::SAM_TAB
+            << (*commentIter)
+            << endl;
+    }
+}
diff --git a/src/api/internal/sam/SamFormatPrinter_p.h b/src/api/internal/sam/SamFormatPrinter_p.h

new file mode 100644 (file)

index 0000000..ea29181
--- /dev/null
+++ b/src/api/internal/sam/SamFormatPrinter_p.h
@@ -0,0 +1,59 @@
+// ***************************************************************************
+// SamFormatPrinter.h (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 6 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides functionality for printing formatted SAM header to string
+// ***************************************************************************
+
+#ifndef SAM_FORMAT_PRINTER_H
+#define SAM_FORMAT_PRINTER_H
+
+//  -------------
+//  W A R N I N G
+//  -------------
+//
+// This file is not part of the BamTools API.  It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include <sstream>
+#include <string>
+
+namespace BamTools {
+
+class SamHeader;
+
+namespace Internal {
+
+class SamFormatPrinter {
+
+    // ctor & dtor
+    public:
+        SamFormatPrinter(const BamTools::SamHeader& header);
+        ~SamFormatPrinter(void);
+
+    // generates SAM-formatted string from header data
+    public:
+        const std::string ToString(void) const;
+
+    // internal methods
+    private:
+        void PrintHD(std::stringstream& out) const;
+        void PrintSQ(std::stringstream& out) const;
+        void PrintRG(std::stringstream& out) const;
+        void PrintPG(std::stringstream& out) const;
+        void PrintCO(std::stringstream& out) const;
+
+    // data members
+    private:
+        const SamHeader& m_header;
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // SAM_FORMAT_PRINTER_H
diff --git a/src/api/internal/sam/SamHeaderValidator_p.cpp b/src/api/internal/sam/SamHeaderValidator_p.cpp

new file mode 100644 (file)

index 0000000..6bcb8a9
--- /dev/null
+++ b/src/api/internal/sam/SamHeaderValidator_p.cpp
@@ -0,0 +1,524 @@
+// ***************************************************************************
+// SamHeaderValidator.cpp (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 25 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides functionality for validating SamHeader data
+// ***************************************************************************
+
+#include "api/SamConstants.h"
+#include "api/SamHeader.h"
+#include "api/internal/sam/SamHeaderValidator_p.h"
+#include "api/internal/sam/SamHeaderVersion_p.h"
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+#include <cctype>
+#include <set>
+#include <sstream>
+using namespace std;
+
+// ------------------------
+// static utility methods
+// -------------------------
+
+static
+bool caseInsensitiveCompare(const string& lhs, const string& rhs) {
+
+    // can omit checking chars if lengths not equal
+    const int lhsLength = lhs.length();
+    const int rhsLength = rhs.length();
+    if ( lhsLength != rhsLength )
+        return false;
+
+    // do *basic* toupper checks on each string char's
+    for ( int i = 0; i < lhsLength; ++i ) {
+        if ( toupper( (int)lhs.at(i)) != toupper( (int)rhs.at(i)) )
+            return false;
+    }
+
+    // otherwise OK
+    return true;
+}
+
+// ------------------------------------------------------------------------
+// Allow validation rules to vary, as needed, between SAM header versions
+//
+// use SAM_VERSION_X_Y to tag important changes
+//
+// Together, they will allow for comparisons like:
+// if ( m_version < SAM_VERSION_2_0 ) {
+//     // use some older rule
+// else
+//     // use rule introduced with version 2.0
+
+static const SamHeaderVersion SAM_VERSION_1_0 = SamHeaderVersion(1,0);
+static const SamHeaderVersion SAM_VERSION_1_1 = SamHeaderVersion(1,1);
+static const SamHeaderVersion SAM_VERSION_1_2 = SamHeaderVersion(1,2);
+static const SamHeaderVersion SAM_VERSION_1_3 = SamHeaderVersion(1,3);
+static const SamHeaderVersion SAM_VERSION_1_4 = SamHeaderVersion(1,4);
+
+// TODO: This functionality is currently unused.
+//       Make validation "version-aware."
+//
+// ------------------------------------------------------------------------
+
+const string SamHeaderValidator::ERROR_PREFIX = "ERROR: ";
+const string SamHeaderValidator::WARN_PREFIX  = "WARNING: ";
+const string SamHeaderValidator::NEWLINE      = "\n";
+
+SamHeaderValidator::SamHeaderValidator(const SamHeader& header)
+    : m_header(header)
+{ }
+
+SamHeaderValidator::~SamHeaderValidator(void) { }
+
+void SamHeaderValidator::AddError(const string& message) {
+    m_errorMessages.push_back(ERROR_PREFIX + message + NEWLINE);
+}
+
+void SamHeaderValidator::AddWarning(const string& message) {
+    m_warningMessages.push_back(WARN_PREFIX + message + NEWLINE);
+}
+
+void SamHeaderValidator::PrintErrorMessages(ostream& stream) {
+
+    // skip if no error messages
+    if ( m_errorMessages.empty() )
+        return;
+
+    // print error header line
+    stream << "* SAM header has " << m_errorMessages.size() << " errors:" << endl;
+
+    // print each error message
+    vector<string>::const_iterator errorIter = m_errorMessages.begin();
+    vector<string>::const_iterator errorEnd  = m_errorMessages.end();
+    for ( ; errorIter != errorEnd; ++errorIter )
+        stream << (*errorIter);
+}
+
+void SamHeaderValidator::PrintMessages(ostream& stream) {
+    PrintErrorMessages(stream);
+    PrintWarningMessages(stream);
+}
+
+void SamHeaderValidator::PrintWarningMessages(ostream& stream) {
+
+    // skip if no warning messages
+    if ( m_warningMessages.empty() )
+        return;
+
+    // print warning header line
+    stream << "* SAM header has " << m_warningMessages.size() << " warnings:" << endl;
+
+    // print each warning message
+    vector<string>::const_iterator warnIter = m_warningMessages.begin();
+    vector<string>::const_iterator warnEnd  = m_warningMessages.end();
+    for ( ; warnIter != warnEnd; ++warnIter )
+        stream << (*warnIter);
+}
+
+// entry point for validation
+bool SamHeaderValidator::Validate(void) {
+    bool isValid = true;
+    isValid &= ValidateMetadata();
+    isValid &= ValidateSequenceDictionary();
+    isValid &= ValidateReadGroupDictionary();
+    isValid &= ValidateProgramChain();
+    return isValid;
+}
+
+// check all SAM header 'metadata'
+bool SamHeaderValidator::ValidateMetadata(void) {
+    bool isValid = true;
+    isValid &= ValidateVersion();
+    isValid &= ValidateSortOrder();
+    isValid &= ValidateGroupOrder();
+    return isValid;
+}
+
+// check SAM header version tag
+bool SamHeaderValidator::ValidateVersion(void) {
+
+    const string& version = m_header.Version;
+
+    // warn if version not present
+    if ( version.empty() ) {
+        AddWarning("Version (VN) missing. Not required, but strongly recommended");
+        return true;
+    }
+
+    // invalid if version does not contain a period
+    const size_t periodFound = version.find(Constants::SAM_PERIOD);
+    if ( periodFound == string::npos ) {
+        AddError("Invalid version (VN) format: " + version);
+        return false;
+    }
+
+    // invalid if major version is empty or contains non-digits
+    const string majorVersion = version.substr(0, periodFound);
+    if ( majorVersion.empty() || !ContainsOnlyDigits(majorVersion) ) {
+        AddError("Invalid version (VN) format: " + version);
+        return false;
+    }
+
+    // invalid if major version is empty or contains non-digits
+    const string minorVersion = version.substr(periodFound + 1);
+    if ( minorVersion.empty() || !ContainsOnlyDigits(minorVersion) ) {
+        AddError("Invalid version (VN) format: " + version);
+        return false;
+    }
+
+    // TODO: check if version is not just syntactically OK,
+    // but is also a valid SAM version ( 1.0 .. CURRENT )
+
+    // all checked out this far, then version is OK
+    return true;
+}
+
+// assumes non-empty input string
+bool SamHeaderValidator::ContainsOnlyDigits(const string& s) {
+    const size_t nonDigitPosition = s.find_first_not_of(Constants::SAM_DIGITS);
+    return ( nonDigitPosition == string::npos ) ;
+}
+
+// validate SAM header sort order tag
+bool SamHeaderValidator::ValidateSortOrder(void) {
+
+    const string& sortOrder = m_header.SortOrder;
+
+    // warn if sort order not present
+    if ( sortOrder.empty() ) {
+        AddWarning("Sort order (SO) missing. Not required, but strongly recommended");
+        return true;
+    }
+
+    // if sort order is valid keyword
+    if ( sortOrder == Constants::SAM_HD_SORTORDER_COORDINATE ||
+         sortOrder == Constants::SAM_HD_SORTORDER_QUERYNAME  ||
+         sortOrder == Constants::SAM_HD_SORTORDER_UNSORTED
+       )
+    {
+        return true;
+    }
+
+    // otherwise
+    AddError("Invalid sort order (SO): " + sortOrder);
+    return false;
+}
+
+// validate SAM header group order tag
+bool SamHeaderValidator::ValidateGroupOrder(void) {
+
+    const string& groupOrder = m_header.GroupOrder;
+
+    // if no group order, no problem, just return OK
+    if ( groupOrder.empty() )
+        return true;
+
+    // if group order is valid keyword
+    if ( groupOrder == Constants::SAM_HD_GROUPORDER_NONE  ||
+         groupOrder == Constants::SAM_HD_GROUPORDER_QUERY ||
+         groupOrder == Constants::SAM_HD_GROUPORDER_REFERENCE
+       )
+    {
+        return true;
+    }
+
+    // otherwise
+    AddError("Invalid group order (GO): " + groupOrder);
+    return false;
+}
+
+// validate SAM header sequence dictionary
+bool SamHeaderValidator::ValidateSequenceDictionary(void) {
+
+    bool isValid = true;
+
+    // check for unique sequence names
+    isValid &= ContainsUniqueSequenceNames();
+
+    // iterate over sequences
+    const SamSequenceDictionary& sequences = m_header.Sequences;
+    SamSequenceConstIterator seqIter = sequences.ConstBegin();
+    SamSequenceConstIterator seqEnd  = sequences.ConstEnd();
+    for ( ; seqIter != seqEnd; ++seqIter ) {
+        const SamSequence& seq = (*seqIter);
+        isValid &= ValidateSequence(seq);
+    }
+
+    // return validation state
+    return isValid;
+}
+
+// make sure all SQ names are unique
+bool SamHeaderValidator::ContainsUniqueSequenceNames(void) {
+
+    bool isValid = true;
+    set<string> sequenceNames;
+    set<string>::iterator nameIter;
+
+    // iterate over sequences
+    const SamSequenceDictionary& sequences = m_header.Sequences;
+    SamSequenceConstIterator seqIter = sequences.ConstBegin();
+    SamSequenceConstIterator seqEnd  = sequences.ConstEnd();
+    for ( ; seqIter != seqEnd; ++seqIter ) {
+        const SamSequence& seq = (*seqIter);
+
+        // lookup sequence name
+        const string& name = seq.Name;
+        nameIter = sequenceNames.find(name);
+
+        // error if found (duplicate entry)
+        if ( nameIter != sequenceNames.end() ) {
+            AddError("Sequence name (SN): " + name + " is not unique");
+            isValid = false;
+        }
+
+        // otherwise ok, store name
+        sequenceNames.insert(name);
+    }
+
+    // return validation state
+    return isValid;
+}
+
+// validate SAM header sequence entry
+bool SamHeaderValidator::ValidateSequence(const SamSequence& seq) {
+    bool isValid = true;
+    isValid &= CheckNameFormat(seq.Name);
+    isValid &= CheckLengthInRange(seq.Length);
+    return isValid;
+}
+
+// check sequence name is valid format
+bool SamHeaderValidator::CheckNameFormat(const string& name) {
+
+    // invalid if name is empty
+    if ( name.empty() ) {
+        AddError("Sequence entry (@SQ) is missing SN tag");
+        return false;
+    }
+
+    // invalid if first character is a reserved char
+    const char firstChar = name.at(0);
+    if ( firstChar == Constants::SAM_EQUAL || firstChar == Constants::SAM_STAR ) {
+        AddError("Invalid sequence name (SN): " + name);
+        return false;
+    }
+    // otherwise OK
+    return true;
+}
+
+// check that sequence length is within accepted range
+bool SamHeaderValidator::CheckLengthInRange(const string& length) {
+
+    // invalid if empty
+    if ( length.empty() ) {
+        AddError("Sequence entry (@SQ) is missing LN tag");
+        return false;
+    }
+
+    // convert string length to numeric
+    stringstream lengthStream(length);
+    unsigned int sequenceLength;
+    lengthStream >> sequenceLength;
+
+    // invalid if length outside accepted range
+    if ( sequenceLength < Constants::SAM_SQ_LENGTH_MIN || sequenceLength > Constants::SAM_SQ_LENGTH_MAX ) {
+        AddError("Sequence length (LN): " + length + " out of range");
+        return false;
+    }
+
+    // otherwise OK
+    return true;
+}
+
+// validate SAM header read group dictionary
+bool SamHeaderValidator::ValidateReadGroupDictionary(void) {
+
+    bool isValid = true;
+
+    // check for unique read group IDs & platform units
+    isValid &= ContainsUniqueIDsAndPlatformUnits();
+
+    // iterate over read groups
+    const SamReadGroupDictionary& readGroups = m_header.ReadGroups;
+    SamReadGroupConstIterator rgIter = readGroups.ConstBegin();
+    SamReadGroupConstIterator rgEnd  = readGroups.ConstEnd();
+    for ( ; rgIter != rgEnd; ++rgIter ) {
+        const SamReadGroup& rg = (*rgIter);
+        isValid &= ValidateReadGroup(rg);
+    }
+
+    // return validation state
+    return isValid;
+}
+
+// make sure RG IDs and platform units are unique
+bool SamHeaderValidator::ContainsUniqueIDsAndPlatformUnits(void) {
+
+    bool isValid = true;
+    set<string> readGroupIds;
+    set<string> platformUnits;
+    set<string>::iterator idIter;
+    set<string>::iterator puIter;
+
+    // iterate over sequences
+    const SamReadGroupDictionary& readGroups = m_header.ReadGroups;
+    SamReadGroupConstIterator rgIter = readGroups.ConstBegin();
+    SamReadGroupConstIterator rgEnd  = readGroups.ConstEnd();
+    for ( ; rgIter != rgEnd; ++rgIter ) {
+        const SamReadGroup& rg = (*rgIter);
+
+        // --------------------------------
+        // check for unique ID
+
+        // lookup read group ID
+        const string& id = rg.ID;
+        idIter = readGroupIds.find(id);
+
+        // error if found (duplicate entry)
+        if ( idIter != readGroupIds.end() ) {
+            AddError("Read group ID (ID): " + id + " is not unique");
+            isValid = false;
+        }
+
+        // otherwise ok, store id
+        readGroupIds.insert(id);
+
+        // --------------------------------
+        // check for unique platform unit
+
+        // lookup platform unit
+        const string& pu = rg.PlatformUnit;
+        puIter = platformUnits.find(pu);
+
+        // error if found (duplicate entry)
+        if ( puIter != platformUnits.end() ) {
+            AddError("Platform unit (PU): " + pu + " is not unique");
+            isValid = false;
+        }
+
+        // otherwise ok, store platform unit
+        platformUnits.insert(pu);
+    }
+
+    // return validation state
+    return isValid;
+}
+
+// validate SAM header read group entry
+bool SamHeaderValidator::ValidateReadGroup(const SamReadGroup& rg) {
+    bool isValid = true;
+    isValid &= CheckReadGroupID(rg.ID);
+    isValid &= CheckSequencingTechnology(rg.SequencingTechnology);
+    return isValid;
+}
+
+// make sure RG ID exists
+bool SamHeaderValidator::CheckReadGroupID(const string& id) {
+
+    // invalid if empty
+    if ( id.empty() ) {
+        AddError("Read group entry (@RG) is missing ID tag");
+        return false;
+    }
+
+    // otherwise OK
+    return true;
+}
+
+// make sure RG sequencing tech is one of the accepted keywords
+bool SamHeaderValidator::CheckSequencingTechnology(const string& technology) {
+
+    // if no technology provided, no problem, just return OK
+    if ( technology.empty() )
+        return true;
+
+    // if technology is valid keyword
+    if ( caseInsensitiveCompare(technology, Constants::SAM_RG_SEQTECHNOLOGY_CAPILLARY)  ||
+         caseInsensitiveCompare(technology, Constants::SAM_RG_SEQTECHNOLOGY_HELICOS)    ||
+         caseInsensitiveCompare(technology, Constants::SAM_RG_SEQTECHNOLOGY_ILLUMINA)   ||
+         caseInsensitiveCompare(technology, Constants::SAM_RG_SEQTECHNOLOGY_IONTORRENT) ||
+         caseInsensitiveCompare(technology, Constants::SAM_RG_SEQTECHNOLOGY_LS454)      ||
+         caseInsensitiveCompare(technology, Constants::SAM_RG_SEQTECHNOLOGY_PACBIO)     ||
+         caseInsensitiveCompare(technology, Constants::SAM_RG_SEQTECHNOLOGY_SOLID)
+       )
+    {
+        return true;
+    }
+
+    // otherwise
+    AddError("Invalid read group sequencing platform (PL): " + technology);
+    return false;
+}
+
+// validate the SAM header "program chain"
+bool SamHeaderValidator::ValidateProgramChain(void) {
+    bool isValid = true;
+    isValid &= ContainsUniqueProgramIds();
+    isValid &= ValidatePreviousProgramIds();
+    return isValid;
+}
+
+// make sure all PG IDs are unique
+bool SamHeaderValidator::ContainsUniqueProgramIds(void) {
+
+    bool isValid = true;
+    set<string> programIds;
+    set<string>::iterator pgIdIter;
+
+    // iterate over program records
+    const SamProgramChain& programs = m_header.Programs;
+    SamProgramConstIterator pgIter = programs.ConstBegin();
+    SamProgramConstIterator pgEnd  = programs.ConstEnd();
+    for ( ; pgIter != pgEnd; ++pgIter ) {
+        const SamProgram& pg = (*pgIter);
+
+        // lookup program ID
+        const string& pgId = pg.ID;
+        pgIdIter = programIds.find(pgId);
+
+        // error if found (duplicate entry)
+        if ( pgIdIter != programIds.end() ) {
+            AddError("Program ID (ID): " + pgId + " is not unique");
+            isValid = false;
+        }
+
+        // otherwise ok, store ID
+        programIds.insert(pgId);
+    }
+
+    // return validation state
+    return isValid;
+}
+
+// make sure that any PP tags present point to existing @PG IDs
+bool SamHeaderValidator::ValidatePreviousProgramIds(void) {
+
+    bool isValid = true;
+
+    // iterate over program records
+    const SamProgramChain& programs = m_header.Programs;
+    SamProgramConstIterator pgIter = programs.ConstBegin();
+    SamProgramConstIterator pgEnd  = programs.ConstEnd();
+    for ( ; pgIter != pgEnd; ++pgIter ) {
+        const SamProgram& pg = (*pgIter);
+
+        // ignore record for validation if PreviousProgramID is empty
+        const string& ppId = pg.PreviousProgramID;
+        if ( ppId.empty() )
+            continue;
+
+        // see if program "chain" contains an entry for ppId
+        if ( !programs.Contains(ppId) ) {
+            AddError("PreviousProgramID (PP): " + ppId + " is not a known ID");
+            isValid = false;
+        }
+    }
+
+    // return validation state
+    return isValid;
+}
diff --git a/src/api/internal/sam/SamHeaderValidator_p.h b/src/api/internal/sam/SamHeaderValidator_p.h

new file mode 100644 (file)

index 0000000..7d0c60a
--- /dev/null
+++ b/src/api/internal/sam/SamHeaderValidator_p.h
@@ -0,0 +1,105 @@
+// ***************************************************************************
+// SamHeaderValidator.h (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 6 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides functionality for validating SamHeader data
+// ***************************************************************************
+
+#ifndef SAM_HEADER_VALIDATOR_P_H
+#define SAM_HEADER_VALIDATOR_P_H
+
+//  -------------
+//  W A R N I N G
+//  -------------
+//
+// This file is not part of the BamTools API.  It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include <iostream>
+#include <string>
+#include <vector>
+
+namespace BamTools {
+
+class SamHeader;
+class SamReadGroup;
+class SamSequence;
+
+namespace Internal {
+
+class SamHeaderValidator {
+
+    // ctor & dtor
+    public:
+        SamHeaderValidator(const SamHeader& header);
+        ~SamHeaderValidator(void);
+
+    // SamHeaderValidator interface
+    public:
+
+        // prints error & warning messages
+        void PrintMessages(std::ostream& stream);
+
+        // validates SamHeader data, returns true/false accordingly
+        bool Validate(void);
+
+    // internal methods
+    private:
+
+        // validate header metadata
+        bool ValidateMetadata(void);
+        bool ValidateVersion(void);
+        bool ContainsOnlyDigits(const std::string& s);
+        bool ValidateSortOrder(void);
+        bool ValidateGroupOrder(void);
+
+        // validate sequence dictionary
+        bool ValidateSequenceDictionary(void);
+        bool ContainsUniqueSequenceNames(void);
+        bool CheckNameFormat(const std::string& name);
+        bool ValidateSequence(const SamSequence& seq);
+        bool CheckLengthInRange(const std::string& length);
+
+        // validate read group dictionary
+        bool ValidateReadGroupDictionary(void);
+        bool ContainsUniqueIDsAndPlatformUnits(void);
+        bool ValidateReadGroup(const SamReadGroup& rg);
+        bool CheckReadGroupID(const std::string& id);
+        bool CheckSequencingTechnology(const std::string& technology);
+
+        // validate program data
+        bool ValidateProgramChain(void);
+        bool ContainsUniqueProgramIds(void);
+        bool ValidatePreviousProgramIds(void);
+
+        // error reporting
+        void AddError(const std::string& message);
+        void AddWarning(const std::string& message);
+        void PrintErrorMessages(std::ostream& stream);
+        void PrintWarningMessages(std::ostream& stream);
+
+    // data members
+    private:
+
+        // SamHeader being validated
+        const SamHeader& m_header;
+
+        // error reporting helpers
+        static const std::string ERROR_PREFIX;
+        static const std::string WARN_PREFIX;
+        static const std::string NEWLINE;
+
+        // error reporting messages
+        std::vector<std::string> m_errorMessages;
+        std::vector<std::string> m_warningMessages;
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // SAM_HEADER_VALIDATOR_P_H
diff --git a/src/api/internal/sam/SamHeaderVersion_p.h b/src/api/internal/sam/SamHeaderVersion_p.h

new file mode 100644 (file)

index 0000000..4f85df0
--- /dev/null
+++ b/src/api/internal/sam/SamHeaderVersion_p.h
@@ -0,0 +1,134 @@
+// ***************************************************************************
+// SamHeaderVersion.h (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides functionality for comparing SAM header versions
+// *************************************************************************
+
+#ifndef SAM_HEADERVERSION_P_H
+#define SAM_HEADERVERSION_P_H
+
+//  -------------
+//  W A R N I N G
+//  -------------
+//
+// This file is not part of the BamTools API.  It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include "api/SamConstants.h"
+#include <sstream>
+#include <string>
+
+namespace BamTools {
+namespace Internal {
+
+class SamHeaderVersion {
+
+    // ctors & dtor
+    public:
+        SamHeaderVersion(void)
+            : m_majorVersion(0)
+            , m_minorVersion(0)
+        { }
+
+        explicit SamHeaderVersion(const std::string& version)
+            : m_majorVersion(0)
+            , m_minorVersion(0)
+        {
+            SetVersion(version);
+        }
+
+        SamHeaderVersion(const unsigned int& major, const unsigned int& minor)
+            : m_majorVersion(major)
+            , m_minorVersion(minor)
+        { }
+
+        ~SamHeaderVersion(void) {
+            m_majorVersion = 0;
+            m_minorVersion = 0;
+        }
+    
+    // acess data
+    public:
+        unsigned int MajorVersion(void) const { return m_majorVersion; }
+        unsigned int MinorVersion(void) const { return m_minorVersion; }
+
+        void SetVersion(const std::string& version);
+        std::string ToString(void) const;
+
+    // data members
+    private:
+        unsigned int m_majorVersion;
+        unsigned int m_minorVersion;
+};
+
+inline
+void SamHeaderVersion::SetVersion(const std::string& version) {
+
+    // do nothing if version is empty
+    if ( !version.empty() ) {
+
+        std::stringstream versionStream("");
+
+        // do nothing if period not found
+        const size_t periodFound = version.find(Constants::SAM_PERIOD);
+        if ( periodFound != std::string::npos ) {
+
+            // store major version if non-empty and contains only digits
+            const std::string& majorVersion = version.substr(0, periodFound);
+            versionStream.str(majorVersion);
+            if ( !majorVersion.empty() ) {
+                const size_t nonDigitFound = majorVersion.find_first_not_of(Constants::SAM_DIGITS);
+                if ( nonDigitFound == std::string::npos )
+                    versionStream >> m_majorVersion;
+            }
+
+            // store minor version if non-empty and contains only digits
+            const std::string& minorVersion = version.substr(periodFound + 1);
+            versionStream.str(minorVersion);
+            if ( !minorVersion.empty() ) {
+                const size_t nonDigitFound = minorVersion.find_first_not_of(Constants::SAM_DIGITS);
+                if ( nonDigitFound == std::string::npos )
+                    versionStream >> m_minorVersion;
+            }
+        }
+    }
+}
+
+// -----------------------------------------------------
+// printing
+
+inline std::string SamHeaderVersion::ToString(void) const {
+    std::stringstream version;
+    version << m_majorVersion << Constants::SAM_PERIOD << m_minorVersion;
+    return version.str();
+}
+
+// -----------------------------------------------------
+// comparison operators
+
+inline bool operator==(const SamHeaderVersion& lhs, const SamHeaderVersion& rhs) {
+    return (lhs.MajorVersion() == rhs.MajorVersion()) &&
+           (lhs.MinorVersion() == rhs.MinorVersion());
+}
+
+inline bool operator<(const SamHeaderVersion& lhs, const SamHeaderVersion& rhs) {
+    if ( lhs.MajorVersion() == rhs.MajorVersion() )
+        return lhs.MinorVersion() < rhs.MinorVersion();
+    else 
+        return lhs.MajorVersion() < rhs.MajorVersion();
+}
+
+inline bool operator> (const SamHeaderVersion& lhs, const SamHeaderVersion& rhs) { return rhs < lhs;  }
+inline bool operator<=(const SamHeaderVersion& lhs, const SamHeaderVersion& rhs) { return !(lhs>rhs); }
+inline bool operator>=(const SamHeaderVersion& lhs, const SamHeaderVersion& rhs) { return !(lhs<rhs); }
+
+} // namespace Internal 
+} // namespace BamTools
+
+#endif // SAM_HEADERVERSION_P_H
diff --git a/src/api/internal/utils/BamException_p.cpp b/src/api/internal/utils/BamException_p.cpp

new file mode 100644 (file)

index 0000000..103e34b
--- /dev/null
+++ b/src/api/internal/utils/BamException_p.cpp
@@ -0,0 +1,15 @@
+// ***************************************************************************
+// BamException_p.cpp (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 25 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides a basic exception class for BamTools internals
+// ***************************************************************************
+
+#include "api/internal/utils/BamException_p.h"
+using namespace BamTools;
+using namespace BamTools::Internal;
+using namespace std;
+
+const string BamException::SEPARATOR = ": ";
diff --git a/src/api/internal/utils/BamException_p.h b/src/api/internal/utils/BamException_p.h

new file mode 100644 (file)

index 0000000..5199737
--- /dev/null
+++ b/src/api/internal/utils/BamException_p.h
@@ -0,0 +1,51 @@
+// ***************************************************************************
+// BamException_p.h (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 6 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides a basic exception class for BamTools internals
+// ***************************************************************************
+
+#ifndef BAMEXCEPTION_P_H
+#define BAMEXCEPTION_P_H
+
+//  -------------
+//  W A R N I N G
+//  -------------
+//
+// This file is not part of the BamTools API.  It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include <exception>
+#include <string>
+
+namespace BamTools {
+namespace Internal {
+
+class BamException : public std::exception {
+
+    public:
+        inline BamException(const std::string& where, const std::string& message)
+            : std::exception()
+            , m_errorString(where + SEPARATOR + message)
+        { }
+
+        inline ~BamException(void) throw() { }
+
+        inline const char* what(void) const throw() {
+            return m_errorString.c_str();
+        }
+
+    private:
+        std::string m_errorString;
+        static const std::string SEPARATOR;
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // BAMEXCEPTION_P_H
diff --git a/src/api/internal/utils/CMakeLists.txt b/src/api/internal/utils/CMakeLists.txt

new file mode 100644 (file)

index 0000000..38a6957
--- /dev/null
+++ b/src/api/internal/utils/CMakeLists.txt
@@ -0,0 +1,15 @@
+# ==========================
+# BamTools CMakeLists.txt
+# (c) 2011 Derek Barnett
+#
+# src/api/internal/utils
+# ==========================
+
+set ( InternalUtilsDir "${InternalDir}/utils" )
+
+set ( InternalUtilsSources
+        ${InternalUtilsDir}/BamException_p.cpp
+
+        PARENT_SCOPE # <-- leave this last
+)
+
author	derek <derekwbarnett@gmail.com>
	Mon, 28 Nov 2011 23:55:31 +0000 (18:55 -0500)
committer	derek <derekwbarnett@gmail.com>
	Mon, 28 Nov 2011 23:55:31 +0000 (18:55 -0500)
src/api/BamAux.h		patch \| blob \| history
src/api/BamMultiReader.cpp		patch \| blob \| history
src/api/BamMultiReader.h		patch \| blob \| history
src/api/BamReader.cpp		patch \| blob \| history
src/api/BamWriter.cpp		patch \| blob \| history
src/api/CMakeLists.txt		patch \| blob \| history
src/api/IBamIODevice.h		patch \| blob \| history
src/api/SamHeader.cpp		patch \| blob \| history
src/api/internal/BamDeviceFactory_p.cpp	[deleted file]	patch \| blob \| history
src/api/internal/BamDeviceFactory_p.h	[deleted file]	patch \| blob \| history
src/api/internal/BamException_p.cpp	[deleted file]	patch \| blob \| history
src/api/internal/BamException_p.h	[deleted file]	patch \| blob \| history
src/api/internal/BamFile_p.cpp	[deleted file]	patch \| blob \| history
src/api/internal/BamFile_p.h	[deleted file]	patch \| blob \| history
src/api/internal/BamFtp_p.cpp	[deleted file]	patch \| blob \| history
src/api/internal/BamFtp_p.h	[deleted file]	patch \| blob \| history
src/api/internal/BamHeader_p.cpp	[deleted file]	patch \| blob \| history
src/api/internal/BamHeader_p.h	[deleted file]	patch \| blob \| history
src/api/internal/BamHttp_p.cpp	[deleted file]	patch \| blob \| history
src/api/internal/BamHttp_p.h	[deleted file]	patch \| blob \| history
src/api/internal/BamIndexFactory_p.cpp	[deleted file]	patch \| blob \| history
src/api/internal/BamIndexFactory_p.h	[deleted file]	patch \| blob \| history
src/api/internal/BamMultiMerger_p.h	[deleted file]	patch \| blob \| history
src/api/internal/BamMultiReader_p.cpp	[deleted file]	patch \| blob \| history
src/api/internal/BamMultiReader_p.h	[deleted file]	patch \| blob \| history
src/api/internal/BamPipe_p.cpp	[deleted file]	patch \| blob \| history
src/api/internal/BamPipe_p.h	[deleted file]	patch \| blob \| history
src/api/internal/BamRandomAccessController_p.cpp	[deleted file]	patch \| blob \| history
src/api/internal/BamRandomAccessController_p.h	[deleted file]	patch \| blob \| history
src/api/internal/BamReader_p.cpp	[deleted file]	patch \| blob \| history
src/api/internal/BamReader_p.h	[deleted file]	patch \| blob \| history
src/api/internal/BamStandardIndex_p.cpp	[deleted file]	patch \| blob \| history
src/api/internal/BamStandardIndex_p.h	[deleted file]	patch \| blob \| history
src/api/internal/BamToolsIndex_p.cpp	[deleted file]	patch \| blob \| history
src/api/internal/BamToolsIndex_p.h	[deleted file]	patch \| blob \| history
src/api/internal/BamWriter_p.cpp	[deleted file]	patch \| blob \| history
src/api/internal/BamWriter_p.h	[deleted file]	patch \| blob \| history
src/api/internal/BgzfStream_p.cpp	[deleted file]	patch \| blob \| history
src/api/internal/BgzfStream_p.h	[deleted file]	patch \| blob \| history
src/api/internal/CMakeLists.txt	[new file with mode: 0644]	patch \| blob
src/api/internal/ILocalIODevice_p.cpp	[deleted file]	patch \| blob \| history
src/api/internal/ILocalIODevice_p.h	[deleted file]	patch \| blob \| history
src/api/internal/IRemoteIODevice_p.cpp	[deleted file]	patch \| blob \| history
src/api/internal/IRemoteIODevice_p.h	[deleted file]	patch \| blob \| history
src/api/internal/SamFormatParser_p.cpp	[deleted file]	patch \| blob \| history
src/api/internal/SamFormatParser_p.h	[deleted file]	patch \| blob \| history
src/api/internal/SamFormatPrinter_p.cpp	[deleted file]	patch \| blob \| history
src/api/internal/SamFormatPrinter_p.h	[deleted file]	patch \| blob \| history
src/api/internal/SamHeaderValidator_p.cpp	[deleted file]	patch \| blob \| history
src/api/internal/SamHeaderValidator_p.h	[deleted file]	patch \| blob \| history
src/api/internal/SamHeaderVersion_p.h	[deleted file]	patch \| blob \| history
src/api/internal/bam/BamHeader_p.cpp	[new file with mode: 0644]	patch \| blob
src/api/internal/bam/BamHeader_p.h	[new file with mode: 0644]	patch \| blob
src/api/internal/bam/BamMultiMerger_p.h	[new file with mode: 0644]	patch \| blob
src/api/internal/bam/BamMultiReader_p.cpp	[new file with mode: 0644]	patch \| blob
src/api/internal/bam/BamMultiReader_p.h	[new file with mode: 0644]	patch \| blob
src/api/internal/bam/BamRandomAccessController_p.cpp	[new file with mode: 0644]	patch \| blob
src/api/internal/bam/BamRandomAccessController_p.h	[new file with mode: 0644]	patch \| blob
src/api/internal/bam/BamReader_p.cpp	[new file with mode: 0644]	patch \| blob
src/api/internal/bam/BamReader_p.h	[new file with mode: 0644]	patch \| blob
src/api/internal/bam/BamWriter_p.cpp	[new file with mode: 0644]	patch \| blob
src/api/internal/bam/BamWriter_p.h	[new file with mode: 0644]	patch \| blob
src/api/internal/bam/CMakeLists.txt	[new file with mode: 0644]	patch \| blob
src/api/internal/index/BamIndexFactory_p.cpp	[new file with mode: 0644]	patch \| blob
src/api/internal/index/BamIndexFactory_p.h	[new file with mode: 0644]	patch \| blob
src/api/internal/index/BamStandardIndex_p.cpp	[new file with mode: 0644]	patch \| blob
src/api/internal/index/BamStandardIndex_p.h	[new file with mode: 0644]	patch \| blob
src/api/internal/index/BamToolsIndex_p.cpp	[new file with mode: 0644]	patch \| blob
src/api/internal/index/BamToolsIndex_p.h	[new file with mode: 0644]	patch \| blob
src/api/internal/index/CMakeLists.txt	[new file with mode: 0644]	patch \| blob
src/api/internal/io/BamDeviceFactory_p.cpp	[new file with mode: 0644]	patch \| blob
src/api/internal/io/BamDeviceFactory_p.h	[new file with mode: 0644]	patch \| blob
src/api/internal/io/BamFile_p.cpp	[new file with mode: 0644]	patch \| blob
src/api/internal/io/BamFile_p.h	[new file with mode: 0644]	patch \| blob
src/api/internal/io/BamFtp_p.cpp	[new file with mode: 0644]	patch \| blob
src/api/internal/io/BamFtp_p.h	[new file with mode: 0644]	patch \| blob
src/api/internal/io/BamHttp_p.cpp	[new file with mode: 0644]	patch \| blob
src/api/internal/io/BamHttp_p.h	[new file with mode: 0644]	patch \| blob
src/api/internal/io/BamPipe_p.cpp	[new file with mode: 0644]	patch \| blob
src/api/internal/io/BamPipe_p.h	[new file with mode: 0644]	patch \| blob
src/api/internal/io/BgzfStream_p.cpp	[new file with mode: 0644]	patch \| blob
src/api/internal/io/BgzfStream_p.h	[new file with mode: 0644]	patch \| blob
src/api/internal/io/ByteArray_p.cpp	[new file with mode: 0644]	patch \| blob
src/api/internal/io/ByteArray_p.h	[new file with mode: 0644]	patch \| blob
src/api/internal/io/CMakeLists.txt	[new file with mode: 0644]	patch \| blob
src/api/internal/io/HostAddress_p.cpp	[new file with mode: 0644]	patch \| blob
src/api/internal/io/HostAddress_p.h	[new file with mode: 0644]	patch \| blob
src/api/internal/io/HostInfo_p.cpp	[new file with mode: 0644]	patch \| blob
src/api/internal/io/HostInfo_p.h	[new file with mode: 0644]	patch \| blob
src/api/internal/io/HttpHeader_p.cpp	[new file with mode: 0644]	patch \| blob
src/api/internal/io/HttpHeader_p.h	[new file with mode: 0644]	patch \| blob
src/api/internal/io/ILocalIODevice_p.cpp	[new file with mode: 0644]	patch \| blob
src/api/internal/io/ILocalIODevice_p.h	[new file with mode: 0644]	patch \| blob
src/api/internal/io/NetUnix_p.h	[new file with mode: 0644]	patch \| blob
src/api/internal/io/NetWin_p.h	[new file with mode: 0644]	patch \| blob
src/api/internal/io/RollingBuffer_p.cpp	[new file with mode: 0644]	patch \| blob
src/api/internal/io/RollingBuffer_p.h	[new file with mode: 0644]	patch \| blob
src/api/internal/io/TcpSocketEngine_p.cpp	[new file with mode: 0644]	patch \| blob
src/api/internal/io/TcpSocketEngine_p.h	[new file with mode: 0644]	patch \| blob
src/api/internal/io/TcpSocketEngine_unix_p.cpp	[new file with mode: 0644]	patch \| blob
src/api/internal/io/TcpSocketEngine_win_p.cpp	[new file with mode: 0644]	patch \| blob
src/api/internal/io/TcpSocket_p.cpp	[new file with mode: 0644]	patch \| blob
src/api/internal/io/TcpSocket_p.h	[new file with mode: 0644]	patch \| blob
src/api/internal/sam/CMakeLists.txt	[new file with mode: 0644]	patch \| blob
src/api/internal/sam/SamFormatParser_p.cpp	[new file with mode: 0644]	patch \| blob
src/api/internal/sam/SamFormatParser_p.h	[new file with mode: 0644]	patch \| blob
src/api/internal/sam/SamFormatPrinter_p.cpp	[new file with mode: 0644]	patch \| blob
src/api/internal/sam/SamFormatPrinter_p.h	[new file with mode: 0644]	patch \| blob
src/api/internal/sam/SamHeaderValidator_p.cpp	[new file with mode: 0644]	patch \| blob
src/api/internal/sam/SamHeaderValidator_p.h	[new file with mode: 0644]	patch \| blob
src/api/internal/sam/SamHeaderVersion_p.h	[new file with mode: 0644]	patch \| blob
src/api/internal/utils/BamException_p.cpp	[new file with mode: 0644]	patch \| blob
src/api/internal/utils/BamException_p.h	[new file with mode: 0644]	patch \| blob
src/api/internal/utils/CMakeLists.txt	[new file with mode: 0644]	patch \| blob