]> git.donarmstrong.com Git - bamtools.git/commitdiff
Organized api/internal into subdirs
authorderek <derekwbarnett@gmail.com>
Tue, 25 Oct 2011 16:33:54 +0000 (12:33 -0400)
committerderek <derekwbarnett@gmail.com>
Tue, 25 Oct 2011 16:33:54 +0000 (12:33 -0400)
90 files changed:
src/api/BamMultiReader.cpp
src/api/BamMultiReader.h
src/api/BamReader.cpp
src/api/BamWriter.cpp
src/api/CMakeLists.txt
src/api/SamHeader.cpp
src/api/internal/BamDeviceFactory_p.cpp [deleted file]
src/api/internal/BamDeviceFactory_p.h [deleted file]
src/api/internal/BamException_p.cpp [deleted file]
src/api/internal/BamException_p.h [deleted file]
src/api/internal/BamFile_p.cpp [deleted file]
src/api/internal/BamFile_p.h [deleted file]
src/api/internal/BamFtp_p.cpp [deleted file]
src/api/internal/BamFtp_p.h [deleted file]
src/api/internal/BamHeader_p.cpp [deleted file]
src/api/internal/BamHeader_p.h [deleted file]
src/api/internal/BamHttp_p.cpp [deleted file]
src/api/internal/BamHttp_p.h [deleted file]
src/api/internal/BamIndexFactory_p.cpp [deleted file]
src/api/internal/BamIndexFactory_p.h [deleted file]
src/api/internal/BamMultiMerger_p.h [deleted file]
src/api/internal/BamMultiReader_p.cpp [deleted file]
src/api/internal/BamMultiReader_p.h [deleted file]
src/api/internal/BamPipe_p.cpp [deleted file]
src/api/internal/BamPipe_p.h [deleted file]
src/api/internal/BamRandomAccessController_p.cpp [deleted file]
src/api/internal/BamRandomAccessController_p.h [deleted file]
src/api/internal/BamReader_p.cpp [deleted file]
src/api/internal/BamReader_p.h [deleted file]
src/api/internal/BamStandardIndex_p.cpp [deleted file]
src/api/internal/BamStandardIndex_p.h [deleted file]
src/api/internal/BamToolsIndex_p.cpp [deleted file]
src/api/internal/BamToolsIndex_p.h [deleted file]
src/api/internal/BamWriter_p.cpp [deleted file]
src/api/internal/BamWriter_p.h [deleted file]
src/api/internal/BgzfStream_p.cpp [deleted file]
src/api/internal/BgzfStream_p.h [deleted file]
src/api/internal/ILocalIODevice_p.cpp [deleted file]
src/api/internal/ILocalIODevice_p.h [deleted file]
src/api/internal/IRemoteIODevice_p.cpp [deleted file]
src/api/internal/IRemoteIODevice_p.h [deleted file]
src/api/internal/SamFormatParser_p.cpp [deleted file]
src/api/internal/SamFormatParser_p.h [deleted file]
src/api/internal/SamFormatPrinter_p.cpp [deleted file]
src/api/internal/SamFormatPrinter_p.h [deleted file]
src/api/internal/SamHeaderValidator_p.cpp [deleted file]
src/api/internal/SamHeaderValidator_p.h [deleted file]
src/api/internal/SamHeaderVersion_p.h [deleted file]
src/api/internal/bam/BamHeader_p.cpp [new file with mode: 0644]
src/api/internal/bam/BamHeader_p.h [new file with mode: 0644]
src/api/internal/bam/BamMultiMerger_p.h [new file with mode: 0644]
src/api/internal/bam/BamMultiReader_p.cpp [new file with mode: 0644]
src/api/internal/bam/BamMultiReader_p.h [new file with mode: 0644]
src/api/internal/bam/BamRandomAccessController_p.cpp [new file with mode: 0644]
src/api/internal/bam/BamRandomAccessController_p.h [new file with mode: 0644]
src/api/internal/bam/BamReader_p.cpp [new file with mode: 0644]
src/api/internal/bam/BamReader_p.h [new file with mode: 0644]
src/api/internal/bam/BamWriter_p.cpp [new file with mode: 0644]
src/api/internal/bam/BamWriter_p.h [new file with mode: 0644]
src/api/internal/index/BamIndexFactory_p.cpp [new file with mode: 0644]
src/api/internal/index/BamIndexFactory_p.h [new file with mode: 0644]
src/api/internal/index/BamStandardIndex_p.cpp [new file with mode: 0644]
src/api/internal/index/BamStandardIndex_p.h [new file with mode: 0644]
src/api/internal/index/BamToolsIndex_p.cpp [new file with mode: 0644]
src/api/internal/index/BamToolsIndex_p.h [new file with mode: 0644]
src/api/internal/io/BamDeviceFactory_p.cpp [new file with mode: 0644]
src/api/internal/io/BamDeviceFactory_p.h [new file with mode: 0644]
src/api/internal/io/BamFile_p.cpp [new file with mode: 0644]
src/api/internal/io/BamFile_p.h [new file with mode: 0644]
src/api/internal/io/BamFtp_p.cpp [new file with mode: 0644]
src/api/internal/io/BamFtp_p.h [new file with mode: 0644]
src/api/internal/io/BamHttp_p.cpp [new file with mode: 0644]
src/api/internal/io/BamHttp_p.h [new file with mode: 0644]
src/api/internal/io/BamPipe_p.cpp [new file with mode: 0644]
src/api/internal/io/BamPipe_p.h [new file with mode: 0644]
src/api/internal/io/BgzfStream_p.cpp [new file with mode: 0644]
src/api/internal/io/BgzfStream_p.h [new file with mode: 0644]
src/api/internal/io/ILocalIODevice_p.cpp [new file with mode: 0644]
src/api/internal/io/ILocalIODevice_p.h [new file with mode: 0644]
src/api/internal/io/IRemoteIODevice_p.cpp [new file with mode: 0644]
src/api/internal/io/IRemoteIODevice_p.h [new file with mode: 0644]
src/api/internal/sam/SamFormatParser_p.cpp [new file with mode: 0644]
src/api/internal/sam/SamFormatParser_p.h [new file with mode: 0644]
src/api/internal/sam/SamFormatPrinter_p.cpp [new file with mode: 0644]
src/api/internal/sam/SamFormatPrinter_p.h [new file with mode: 0644]
src/api/internal/sam/SamHeaderValidator_p.cpp [new file with mode: 0644]
src/api/internal/sam/SamHeaderValidator_p.h [new file with mode: 0644]
src/api/internal/sam/SamHeaderVersion_p.h [new file with mode: 0644]
src/api/internal/utils/BamException_p.cpp [new file with mode: 0644]
src/api/internal/utils/BamException_p.h [new file with mode: 0644]

index ef38469651c292e49fdbfec9317cc903791c24f3..f61aa2648f372c7214df85a8cb38aa0f14631d07 100644 (file)
@@ -2,7 +2,7 @@
 // BamMultiReader.cpp (c) 2010 Erik Garrison, Derek Barnett
 // Marth Lab, Department of Biology, Boston College
 // ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
+// Last modified: 25 October 2011 (DB)
 // ---------------------------------------------------------------------------
 // Convenience class for reading multiple BAM files.
 //
@@ -13,7 +13,7 @@
 // ***************************************************************************
 
 #include "api/BamMultiReader.h"
-#include "api/internal/BamMultiReader_p.h"
+#include "api/internal/bam/BamMultiReader_p.h"
 using namespace BamTools;
 
 #include <string>
index ea068d0c0ad58a9736fde8b4abb34de6632fcefe..e5fc9c987fb3d1f78be383cd355126205aa2524c 100644 (file)
@@ -2,7 +2,7 @@
 // BamMultiReader.h (c) 2010 Erik Garrison, Derek Barnett
 // Marth Lab, Department of Biology, Boston College
 // ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
+// Last modified: 25 October 2011 (DB)
 // ---------------------------------------------------------------------------
 // Convenience class for reading multiple BAM files.
 // ***************************************************************************
index 6080b3656198d29fa22f6d6cb82d64f102ae5edf..ae2adec94e498dadc97115807ab0d52dbe178804 100644 (file)
@@ -2,13 +2,13 @@
 // BamReader.cpp (c) 2009 Derek Barnett, Michael Str�mberg
 // Marth Lab, Department of Biology, Boston College
 // ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
+// Last modified: 25 October 2011 (DB)
 // ---------------------------------------------------------------------------
 // Provides read access to BAM files.
 // ***************************************************************************
 
 #include "api/BamReader.h"
-#include "api/internal/BamReader_p.h"
+#include "api/internal/bam/BamReader_p.h"
 using namespace BamTools;
 using namespace BamTools::Internal;
 
index b1582a86a8236ff4d9bca170b06a4148f4870dd5..cbbfdae7f02eef0b6f49c1205bcd8e0dd55cfdf2 100644 (file)
@@ -2,7 +2,7 @@
 // BamWriter.cpp (c) 2009 Michael Str�mberg, Derek Barnett\r
 // Marth Lab, Department of Biology, Boston College\r
 // ---------------------------------------------------------------------------\r
-// Last modified: 10 October 2011 (DB)\r
+// Last modified: 25 October 2011 (DB)\r
 // ---------------------------------------------------------------------------\r
 // Provides the basic functionality for producing BAM files\r
 // ***************************************************************************\r
@@ -10,7 +10,7 @@
 #include "api/BamAlignment.h"\r
 #include "api/BamWriter.h"\r
 #include "api/SamHeader.h"\r
-#include "api/internal/BamWriter_p.h"\r
+#include "api/internal/bam/BamWriter_p.h"\r
 using namespace BamTools;\r
 using namespace BamTools::Internal;\r
 using namespace std;\r
index 55f256025c00834a4c0df3c98bcc88649324dc75..d6a5b100a7de36e34a103cc7b965b94ecfee37be 100644 (file)
@@ -25,26 +25,26 @@ set( BamToolsAPISources
         SamReadGroupDictionary.cpp
         SamSequence.cpp
         SamSequenceDictionary.cpp
-        internal/BamDeviceFactory_p.cpp
-        internal/BamException_p.cpp
-        internal/BamFile_p.cpp
-        internal/BamFtp_p.cpp
-        internal/BamHeader_p.cpp
-        internal/BamHttp_p.cpp
-        internal/BamIndexFactory_p.cpp
-        internal/BamMultiReader_p.cpp
-        internal/BamPipe_p.cpp
-        internal/BamRandomAccessController_p.cpp
-        internal/BamReader_p.cpp
-        internal/BamStandardIndex_p.cpp
-        internal/BamToolsIndex_p.cpp
-        internal/BamWriter_p.cpp
-        internal/BgzfStream_p.cpp
-        internal/ILocalIODevice_p.cpp
-        internal/IRemoteIODevice_p.cpp
-        internal/SamFormatParser_p.cpp
-        internal/SamFormatPrinter_p.cpp
-        internal/SamHeaderValidator_p.cpp
+        internal/bam/BamHeader_p.cpp
+        internal/bam/BamMultiReader_p.cpp
+        internal/bam/BamRandomAccessController_p.cpp
+        internal/bam/BamReader_p.cpp
+        internal/bam/BamWriter_p.cpp
+        internal/index/BamIndexFactory_p.cpp
+        internal/index/BamStandardIndex_p.cpp
+        internal/index/BamToolsIndex_p.cpp
+        internal/io/BamDeviceFactory_p.cpp
+        internal/io/BamFile_p.cpp
+        internal/io/BamFtp_p.cpp
+        internal/io/BamHttp_p.cpp
+        internal/io/BamPipe_p.cpp
+        internal/io/BgzfStream_p.cpp
+        internal/io/ILocalIODevice_p.cpp
+        internal/io/IRemoteIODevice_p.cpp
+        internal/sam/SamFormatParser_p.cpp
+        internal/sam/SamFormatPrinter_p.cpp
+        internal/sam/SamHeaderValidator_p.cpp
+        internal/utils/BamException_p.cpp
 )
 
 # create main BamTools API shared library
index 5de2abcd9006d6eb86693973c416b0a61b9b2998..b2925f2a5d6575729ba785e4926c85bab8bb9fd5 100644 (file)
@@ -2,17 +2,17 @@
 // SamHeader.cpp (c) 2010 Derek Barnett
 // Marth Lab, Department of Biology, Boston College
 // ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
+// Last modified: 25 October 2011 (DB)
 // ---------------------------------------------------------------------------
 // Provides direct read/write access to the SAM header data fields.
 // ***************************************************************************
 
 #include "api/SamConstants.h"
 #include "api/SamHeader.h"
-#include "api/internal/BamException_p.h"
-#include "api/internal/SamFormatParser_p.h"
-#include "api/internal/SamFormatPrinter_p.h"
-#include "api/internal/SamHeaderValidator_p.h"
+#include "api/internal/utils/BamException_p.h"
+#include "api/internal/sam/SamFormatParser_p.h"
+#include "api/internal/sam/SamFormatPrinter_p.h"
+#include "api/internal/sam/SamHeaderValidator_p.h"
 using namespace BamTools;
 using namespace BamTools::Internal;
 using namespace std;
diff --git a/src/api/internal/BamDeviceFactory_p.cpp b/src/api/internal/BamDeviceFactory_p.cpp
deleted file mode 100644 (file)
index 895d08c..0000000
+++ /dev/null
@@ -1,37 +0,0 @@
-// ***************************************************************************
-// BamDeviceFactory_p.cpp (c) 2011 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 September 2011 (DB)
-// ---------------------------------------------------------------------------
-// Creates built-in concrete implementations of IBamIODevices
-// ***************************************************************************
-
-#include "api/internal/BamDeviceFactory_p.h"
-#include "api/internal/BamFile_p.h"
-#include "api/internal/BamFtp_p.h"
-#include "api/internal/BamHttp_p.h"
-#include "api/internal/BamPipe_p.h"
-using namespace BamTools;
-using namespace BamTools::Internal;
-
-#include <iostream>
-using namespace std;
-
-IBamIODevice* BamDeviceFactory::CreateDevice(const string& source) {
-
-    // check for requested pipe
-    if ( source == "-" || source == "stdin" || source == "stdout" )
-        return new BamPipe;
-
-    // check for HTTP prefix
-    if ( source.find("http://") == 0 )
-        return new BamHttp(source);
-
-    // check for FTP prefix
-    if ( source.find("ftp://") == 0 )
-        return new BamFtp(source);
-
-    // otherwise assume a "normal" file
-    return new BamFile(source);
-}
diff --git a/src/api/internal/BamDeviceFactory_p.h b/src/api/internal/BamDeviceFactory_p.h
deleted file mode 100644 (file)
index 1d48533..0000000
+++ /dev/null
@@ -1,37 +0,0 @@
-// ***************************************************************************
-// BamDeviceFactory_p.h (c) 2011 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Creates built-in concrete implementations of IBamIODevices
-// ***************************************************************************
-
-#ifndef BAMDEVICEFACTORY_P_H
-#define BAMDEVICEFACTORY_P_H
-
-//  -------------
-//  W A R N I N G
-//  -------------
-//
-// This file is not part of the BamTools API.  It exists purely as an
-// implementation detail. This header file may change from version to version
-// without notice, or even be removed.
-//
-// We mean it.
-
-#include "api/IBamIODevice.h"
-#include <string>
-
-namespace BamTools {
-namespace Internal {
-
-class BamDeviceFactory {
-    public:
-        static IBamIODevice* CreateDevice(const std::string& source);
-};
-
-} // namespace Internal
-} // namespace BamTools
-
-#endif // BAMDEVICEFACTORY_P_H
diff --git a/src/api/internal/BamException_p.cpp b/src/api/internal/BamException_p.cpp
deleted file mode 100644 (file)
index 38469e7..0000000
+++ /dev/null
@@ -1,15 +0,0 @@
-// ***************************************************************************
-// BamException_p.cpp (c) 2011 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides a basic exception class for BamTools internals
-// ***************************************************************************
-
-#include "api/internal/BamException_p.h"
-using namespace BamTools;
-using namespace BamTools::Internal;
-using namespace std;
-
-const string BamException::SEPARATOR = ": ";
diff --git a/src/api/internal/BamException_p.h b/src/api/internal/BamException_p.h
deleted file mode 100644 (file)
index 5199737..0000000
+++ /dev/null
@@ -1,51 +0,0 @@
-// ***************************************************************************
-// BamException_p.h (c) 2011 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 6 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides a basic exception class for BamTools internals
-// ***************************************************************************
-
-#ifndef BAMEXCEPTION_P_H
-#define BAMEXCEPTION_P_H
-
-//  -------------
-//  W A R N I N G
-//  -------------
-//
-// This file is not part of the BamTools API.  It exists purely as an
-// implementation detail. This header file may change from version to version
-// without notice, or even be removed.
-//
-// We mean it.
-
-#include <exception>
-#include <string>
-
-namespace BamTools {
-namespace Internal {
-
-class BamException : public std::exception {
-
-    public:
-        inline BamException(const std::string& where, const std::string& message)
-            : std::exception()
-            , m_errorString(where + SEPARATOR + message)
-        { }
-
-        inline ~BamException(void) throw() { }
-
-        inline const char* what(void) const throw() {
-            return m_errorString.c_str();
-        }
-
-    private:
-        std::string m_errorString;
-        static const std::string SEPARATOR;
-};
-
-} // namespace Internal
-} // namespace BamTools
-
-#endif // BAMEXCEPTION_P_H
diff --git a/src/api/internal/BamFile_p.cpp b/src/api/internal/BamFile_p.cpp
deleted file mode 100644 (file)
index 74c4ed6..0000000
+++ /dev/null
@@ -1,67 +0,0 @@
-// ***************************************************************************
-// BamFile_p.cpp (c) 2011 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides BAM file-specific IO behavior
-// ***************************************************************************
-
-#include "api/internal/BamFile_p.h"
-using namespace BamTools;
-using namespace BamTools::Internal;
-
-#include <cstdio>
-#include <iostream>
-using namespace std;
-
-BamFile::BamFile(const string& filename)
-    : ILocalIODevice()
-    , m_filename(filename)
-{ }
-
-BamFile::~BamFile(void) { }
-
-void BamFile::Close(void) {
-    if ( IsOpen() ) {
-        m_filename.clear();
-        ILocalIODevice::Close();
-    }
-}
-
-bool BamFile::IsRandomAccess(void) const {
-    return true;
-}
-
-bool BamFile::Open(const IBamIODevice::OpenMode mode) {
-
-    // make sure we're starting with a fresh file stream
-    Close();
-
-    // attempt to open FILE* depending on requested openmode
-    if ( mode == IBamIODevice::ReadOnly )
-        m_stream = fopen(m_filename.c_str(), "rb");
-    else if ( mode == IBamIODevice::WriteOnly )
-        m_stream = fopen(m_filename.c_str(), "wb");
-    else {
-        SetErrorString("BamFile::Open", "unknown open mode requested");
-        return false;
-    }
-
-    // check that we obtained a valid FILE*
-    if ( m_stream == 0 ) {
-        const string message_base = string("could not open file handle for ");
-        const string message = message_base + ( (m_filename.empty()) ? "empty filename" : m_filename );
-        SetErrorString("BamFile::Open", message);
-        return false;
-    }
-
-    // store current IO mode & return success
-    m_mode = mode;
-    return true;
-}
-
-bool BamFile::Seek(const int64_t& position) {
-    BT_ASSERT_X( m_stream, "BamFile::Seek() - null stream" );
-    return ( fseek64(m_stream, position, SEEK_SET) == 0 );
-}
diff --git a/src/api/internal/BamFile_p.h b/src/api/internal/BamFile_p.h
deleted file mode 100644 (file)
index 873e71a..0000000
+++ /dev/null
@@ -1,51 +0,0 @@
-// ***************************************************************************
-// BamFile_p.h (c) 2011 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides BAM file-specific IO behavior
-// ***************************************************************************
-
-#ifndef BAMFILE_P_H
-#define BAMFILE_P_H
-
-//  -------------
-//  W A R N I N G
-//  -------------
-//
-// This file is not part of the BamTools API.  It exists purely as an
-// implementation detail. This header file may change from version to version
-// without notice, or even be removed.
-//
-// We mean it.
-
-#include "api/internal/ILocalIODevice_p.h"
-#include <string>
-
-namespace BamTools {
-namespace Internal {
-
-class BamFile : public ILocalIODevice {
-
-    // ctor & dtor
-    public:
-        BamFile(const std::string& filename);
-        ~BamFile(void);
-
-    // ILocalIODevice implementation
-    public:
-        void Close(void);
-        bool IsRandomAccess(void) const;
-        bool Open(const IBamIODevice::OpenMode mode);
-        bool Seek(const int64_t& position);
-
-    // data members
-    private:
-        std::string m_filename;
-};
-
-} // namespace Internal
-} // namespace BamTools
-
-#endif // BAMFILE_P_H
diff --git a/src/api/internal/BamFtp_p.cpp b/src/api/internal/BamFtp_p.cpp
deleted file mode 100644 (file)
index 779d099..0000000
+++ /dev/null
@@ -1,56 +0,0 @@
-// ***************************************************************************
-// BamFtp_p.cpp (c) 2011 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides reading/writing of BAM files on FTP server
-// ***************************************************************************
-
-#include "api/internal/BamFtp_p.h"
-using namespace BamTools;
-using namespace BamTools::Internal;
-
-using namespace std;
-
-BamFtp::BamFtp(const string& url)
-    : IBamIODevice()
-{
-    BT_ASSERT_X(false, "BamFtp not yet implemented");
-}
-
-BamFtp::~BamFtp(void) { }
-
-void BamFtp::Close(void) {
-    return ;
-}
-
-bool BamFtp::IsRandomAccess(void) const {
-    return true;
-}
-
-bool BamFtp::Open(const IBamIODevice::OpenMode mode) {
-    (void) mode;
-    return true;
-}
-
-size_t BamFtp::Read(char* data, const unsigned int numBytes) {
-    (void)data;
-    (void)numBytes;
-    return 0;
-}
-
-bool BamFtp::Seek(const int64_t& position) {
-    (void)position;
-    return true;
-}
-
-int64_t BamFtp::Tell(void) const {
-    return -1;
-}
-
-size_t BamFtp::Write(const char* data, const unsigned int numBytes) {
-    (void)data;
-    (void)numBytes;
-    return 0;
-}
diff --git a/src/api/internal/BamFtp_p.h b/src/api/internal/BamFtp_p.h
deleted file mode 100644 (file)
index 1f5ee0f..0000000
+++ /dev/null
@@ -1,56 +0,0 @@
-// ***************************************************************************
-// BamFtp_p.h (c) 2011 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides reading/writing of BAM files on FTP server
-// ***************************************************************************
-
-#ifndef BAMFTP_P_H
-#define BAMFTP_P_H
-
-//  -------------
-//  W A R N I N G
-//  -------------
-//
-// This file is not part of the BamTools API.  It exists purely as an
-// implementation detail. This header file may change from version to version
-// without notice, or even be removed.
-//
-// We mean it.
-
-#include "api/IBamIODevice.h"
-#include <string>
-
-namespace BamTools {
-namespace Internal {
-
-class BamFtp : public IBamIODevice {
-
-    // ctor & dtor
-    public:
-        BamFtp(const std::string& url);
-        ~BamFtp(void);
-
-    // IBamIODevice implementation
-    public:
-        void Close(void);
-        bool IsRandomAccess(void) const;
-        bool Open(const IBamIODevice::OpenMode mode);
-        size_t Read(char* data, const unsigned int numBytes);
-        bool Seek(const int64_t& position);
-        int64_t Tell(void) const;
-        size_t Write(const char* data, const unsigned int numBytes);
-
-    // internal methods
-    private:
-
-    // data members
-    private:
-};
-
-} // namespace Internal
-} // namespace BamTools
-
-#endif // BAMFTP_P_H
diff --git a/src/api/internal/BamHeader_p.cpp b/src/api/internal/BamHeader_p.cpp
deleted file mode 100644 (file)
index dc734bf..0000000
+++ /dev/null
@@ -1,120 +0,0 @@
-// ***************************************************************************
-// BamHeader_p.cpp (c) 2010 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides the basic functionality for handling BAM headers.
-// ***************************************************************************
-
-#include "api/BamAux.h"
-#include "api/BamConstants.h"
-#include "api/internal/BamException_p.h"
-#include "api/internal/BamHeader_p.h"
-#include "api/internal/BgzfStream_p.h"
-using namespace BamTools;
-using namespace BamTools::Internal;
-
-#include <cstdlib>
-#include <cstring>
-using namespace std;
-
-// ------------------------
-// static utility methods
-// ------------------------
-
-static inline
-bool isValidMagicNumber(const char* buffer) {
-    return ( strncmp(buffer, Constants::BAM_HEADER_MAGIC,
-                     Constants::BAM_HEADER_MAGIC_LENGTH) == 0 );
-}
-
-// --------------------------
-// BamHeader implementation
-// --------------------------
-
-// ctor
-BamHeader::BamHeader(void) { }
-
-// dtor
-BamHeader::~BamHeader(void) { }
-
-// reads magic number from BGZF stream, returns true if valid
-void BamHeader::CheckMagicNumber(BgzfStream* stream) {
-
-    // try to read magic number
-    char buffer[Constants::BAM_HEADER_MAGIC_LENGTH];
-    const size_t numBytesRead = stream->Read(buffer, Constants::BAM_HEADER_MAGIC_LENGTH);
-    if ( numBytesRead != (int)Constants::BAM_HEADER_MAGIC_LENGTH )
-        throw BamException("BamHeader::CheckMagicNumber", "could not read magic number");
-
-    // validate magic number
-    if ( !isValidMagicNumber(buffer) )
-        throw BamException("BamHeader::CheckMagicNumber", "invalid magic number");
-}
-
-// clear SamHeader data
-void BamHeader::Clear(void) {
-    m_header.Clear();
-}
-
-// return true if SamHeader data is valid
-bool BamHeader::IsValid(void) const {
-    return m_header.IsValid();
-}
-
-// load BAM header ('magic number' and SAM header text) from BGZF stream
-void BamHeader::Load(BgzfStream* stream) {
-
-    // read & check magic number
-    CheckMagicNumber(stream);
-
-    // read header (length, then actual text)
-    uint32_t length(0);
-    ReadHeaderLength(stream, length);
-    ReadHeaderText(stream, length);
-}
-
-// reads SAM header text length from BGZF stream, stores it in @length
-void BamHeader::ReadHeaderLength(BgzfStream* stream, uint32_t& length) {
-
-    // read BAM header text length
-    char buffer[sizeof(uint32_t)];
-    const size_t numBytesRead = stream->Read(buffer, sizeof(uint32_t));
-    if ( numBytesRead != sizeof(uint32_t) )
-        throw BamException("BamHeader::ReadHeaderLength", "could not read header length");
-
-    // convert char buffer to length
-    length = BamTools::UnpackUnsignedInt(buffer);
-    if ( BamTools::SystemIsBigEndian() )
-        BamTools::SwapEndian_32(length);
-}
-
-// reads SAM header text from BGZF stream, stores in SamHeader object
-void BamHeader::ReadHeaderText(BgzfStream* stream, const uint32_t& length) {
-
-    // read header text
-    char* headerText = (char*)calloc(length + 1, 1);
-    const size_t bytesRead = stream->Read(headerText, length);
-
-    // if error reading, clean up buffer & throw
-    if ( bytesRead != length ) {
-        free(headerText);
-        throw BamException("BamHeader::ReadHeaderText", "could not read header text");
-    }
-
-    // otherwise, text was read OK
-    // store & cleanup
-    m_header.SetHeaderText( (string)((const char*)headerText) );
-    free(headerText);
-}
-
-// returns *copy* of SamHeader data object
-SamHeader BamHeader::ToSamHeader(void) const {
-    return m_header;
-}
-
-// returns SAM-formatted string of header data
-string BamHeader::ToString(void) const {
-    return m_header.ToString();
-}
diff --git a/src/api/internal/BamHeader_p.h b/src/api/internal/BamHeader_p.h
deleted file mode 100644 (file)
index 499ad96..0000000
+++ /dev/null
@@ -1,69 +0,0 @@
-// ***************************************************************************
-// BamHeader_p.h (c) 2010 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides the basic functionality for handling BAM headers.
-// ***************************************************************************
-
-#ifndef BAMHEADER_P_H
-#define BAMHEADER_P_H
-
-//  -------------
-//  W A R N I N G
-//  -------------
-//
-// This file is not part of the BamTools API.  It exists purely as an
-// implementation detail. This header file may change from version to version
-// without notice, or even be removed.
-//
-// We mean it.
-
-#include "api/SamHeader.h"
-#include <string>
-
-namespace BamTools {
-namespace Internal {
-
-class BgzfStream;
-
-class BamHeader {
-
-    // ctor & dtor
-    public:
-        BamHeader(void);
-        ~BamHeader(void);
-
-    // BamHeader interface
-    public:
-        // clear SamHeader data
-        void Clear(void);
-        // return true if SamHeader data is valid
-        bool IsValid(void) const;
-        // load BAM header ('magic number' and SAM header text) from BGZF stream
-        // returns true if all OK
-        void Load(BgzfStream* stream);
-        // returns (editable) copy of SamHeader data object
-        SamHeader ToSamHeader(void) const;
-        // returns SAM-formatted string of header data
-        std::string ToString(void) const;
-
-    // internal methods
-    private:
-        // reads magic number from BGZF stream
-        void CheckMagicNumber(BgzfStream* stream);
-        // reads SAM header length from BGZF stream, stores it in @length
-        void ReadHeaderLength(BgzfStream* stream, uint32_t& length);
-        // reads SAM header text from BGZF stream, stores in SamHeader object
-        void ReadHeaderText(BgzfStream* stream, const uint32_t& length);
-
-    // data members
-    private:
-        SamHeader m_header;
-};
-
-} // namespace Internal
-} // namespace BamTools
-
-#endif // BAMHEADER_P_H
diff --git a/src/api/internal/BamHttp_p.cpp b/src/api/internal/BamHttp_p.cpp
deleted file mode 100644 (file)
index 83b4c3b..0000000
+++ /dev/null
@@ -1,56 +0,0 @@
-// ***************************************************************************
-// BamHttp_p.cpp (c) 2011 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides reading/writing of BAM files on HTTP server
-// ***************************************************************************
-
-#include "api/internal/BamHttp_p.h"
-using namespace BamTools;
-using namespace BamTools::Internal;
-
-using namespace std;
-
-BamHttp::BamHttp(const string& url)
-    : IBamIODevice()
-{
-    BT_ASSERT_X(false, "BamHttp not yet implemented");
-}
-
-BamHttp::~BamHttp(void) { }
-
-void BamHttp::Close(void) {
-    return ;
-}
-
-bool BamHttp::IsRandomAccess(void) const {
-    return true;
-}
-
-bool BamHttp::Open(const IBamIODevice::OpenMode mode) {
-    (void) mode;
-    return true;
-}
-
-size_t BamHttp::Read(char* data, const unsigned int numBytes) {
-    (void)data;
-    (void)numBytes;
-    return 0;
-}
-
-bool BamHttp::Seek(const int64_t& position) {
-    (void)position;
-    return true;
-}
-
-int64_t BamHttp::Tell(void) const {
-    return -1;
-}
-
-size_t BamHttp::Write(const char* data, const unsigned int numBytes) {
-    (void)data;
-    (void)numBytes;
-    return 0;
-}
diff --git a/src/api/internal/BamHttp_p.h b/src/api/internal/BamHttp_p.h
deleted file mode 100644 (file)
index 38e94b7..0000000
+++ /dev/null
@@ -1,56 +0,0 @@
-// ***************************************************************************
-// BamHttp_p.h (c) 2011 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides reading/writing of BAM files on HTTP server
-// ***************************************************************************
-
-#ifndef BAMHTTP_P_H
-#define BAMHTTP_P_H
-
-//  -------------
-//  W A R N I N G
-//  -------------
-//
-// This file is not part of the BamTools API.  It exists purely as an
-// implementation detail. This header file may change from version to version
-// without notice, or even be removed.
-//
-// We mean it.
-
-#include "api/IBamIODevice.h"
-#include <string>
-
-namespace BamTools {
-namespace Internal {
-
-class BamHttp : public IBamIODevice {
-
-    // ctor & dtor
-    public:
-        BamHttp(const std::string& url);
-        ~BamHttp(void);
-
-    // IBamIODevice implementation
-    public:
-        void Close(void);
-        bool IsRandomAccess(void) const;
-        bool Open(const IBamIODevice::OpenMode mode);
-        size_t Read(char* data, const unsigned int numBytes);
-        bool Seek(const int64_t& position);
-        int64_t Tell(void) const;
-        size_t Write(const char* data, const unsigned int numBytes);
-
-    // internal methods
-    private:
-
-    // data members
-    private:
-};
-
-} // namespace Internal
-} // namespace BamTools
-
-#endif // BAMHTTP_P_H
diff --git a/src/api/internal/BamIndexFactory_p.cpp b/src/api/internal/BamIndexFactory_p.cpp
deleted file mode 100644 (file)
index 2cf871f..0000000
+++ /dev/null
@@ -1,112 +0,0 @@
-// ***************************************************************************
-// BamIndexFactory_p.cpp (c) 2011 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides interface for generating BamIndex implementations
-// ***************************************************************************
-
-#include "api/BamAux.h"
-#include "api/internal/BamIndexFactory_p.h"
-#include "api/internal/BamStandardIndex_p.h"
-#include "api/internal/BamToolsIndex_p.h"
-using namespace BamTools;
-using namespace BamTools::Internal;
-using namespace std;
-
-// generates index filename from BAM filename (depending on requested type)
-// if type is unknown, returns empty string
-const string BamIndexFactory::CreateIndexFilename(const string& bamFilename,
-                                                  const BamIndex::IndexType& type)
-{
-    switch ( type ) {
-        case ( BamIndex::STANDARD ) : return ( bamFilename + BamStandardIndex::Extension() );
-        case ( BamIndex::BAMTOOLS ) : return ( bamFilename + BamToolsIndex::Extension() );
-        default :
-            return string();
-    }
-}
-
-// creates a new BamIndex object, depending on extension of @indexFilename
-BamIndex* BamIndexFactory::CreateIndexFromFilename(const string& indexFilename, BamReaderPrivate* reader) {
-
-    // if file doesn't exist, return null index
-    if ( !BamTools::FileExists(indexFilename) )
-        return 0;
-
-    // get file extension from index filename, including dot (".EXT")
-    // if can't get file extension, return null index
-    const string extension = FileExtension(indexFilename);
-    if ( extension.empty() )
-        return 0;
-
-    // create index based on extension
-    if      ( extension == BamStandardIndex::Extension() ) return new BamStandardIndex(reader);
-    else if ( extension == BamToolsIndex::Extension()    ) return new BamToolsIndex(reader);
-    else
-        return 0;
-}
-
-// creates a new BamIndex, object of requested @type
-BamIndex* BamIndexFactory::CreateIndexOfType(const BamIndex::IndexType& type,
-                                             BamReaderPrivate* reader)
-{
-    switch ( type ) {
-        case ( BamIndex::STANDARD ) : return new BamStandardIndex(reader);
-        case ( BamIndex::BAMTOOLS ) : return new BamToolsIndex(reader);
-        default :
-            return 0;
-    }
-}
-
-// retrieves file extension (including '.')
-const string BamIndexFactory::FileExtension(const string& filename) {
-
-    // if filename cannot contain valid path + extension, return empty string
-    if ( filename.empty() || filename.length() <= 4 )
-        return string();
-
-    // look for last dot in filename
-    const size_t lastDotPosition = filename.find_last_of('.');
-
-    // if none found, return empty string
-    if ( lastDotPosition == string::npos )
-        return string();
-
-    // return substring from last dot position
-    return filename.substr(lastDotPosition);
-}
-
-// returns name of existing index file that corresponds to @bamFilename
-// will defer to @preferredType if possible, if not will attempt to load any supported type
-// returns empty string if not found
-const string BamIndexFactory::FindIndexFilename(const string& bamFilename,
-                                                const BamIndex::IndexType& preferredType)
-{
-    // skip if BAM filename provided is empty
-    if ( bamFilename.empty() )
-        return string();
-
-    // try to find index of preferred type first
-    // return index filename if found
-    string indexFilename = CreateIndexFilename(bamFilename, preferredType);
-    if ( !indexFilename.empty() && BamTools::FileExists(indexFilename) )
-        return indexFilename;
-
-    // couldn't find preferred type, try the other supported types
-    // return index filename if found
-    if ( preferredType != BamIndex::STANDARD ) {
-        indexFilename = CreateIndexFilename(bamFilename, BamIndex::STANDARD);
-        if ( !indexFilename.empty() && BamTools::FileExists(indexFilename) )
-            return indexFilename;
-    }
-    if ( preferredType != BamIndex::BAMTOOLS ) {
-        indexFilename = CreateIndexFilename(bamFilename, BamIndex::BAMTOOLS);
-        if ( !indexFilename.empty() && BamTools::FileExists(indexFilename) )
-            return indexFilename;
-    }
-
-    // otherwise couldn't find any index matching this filename
-    return string();
-}
diff --git a/src/api/internal/BamIndexFactory_p.h b/src/api/internal/BamIndexFactory_p.h
deleted file mode 100644 (file)
index 4e4f1cf..0000000
+++ /dev/null
@@ -1,49 +0,0 @@
-// ***************************************************************************
-// BamIndexFactory_p.h (c) 2011 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides interface for generating BamIndex implementations
-// ***************************************************************************
-
-#ifndef BAMINDEX_FACTORY_P_H
-#define BAMINDEX_FACTORY_P_H
-
-#include "api/BamIndex.h"
-#include <string>
-
-namespace BamTools {
-namespace Internal {
-
-class BamIndexFactory {
-
-    // static interface methods
-    public:
-        // creates a new BamIndex object, depending on extension of @indexFilename
-        static BamIndex* CreateIndexFromFilename(const std::string& indexFilename,
-                                                 BamReaderPrivate* reader);
-        // creates a new BamIndex object, of requested @type
-        static BamIndex* CreateIndexOfType(const BamIndex::IndexType& type,
-                                           BamReaderPrivate* reader);
-        // returns name of existing index file that corresponds to @bamFilename
-        // will defer to @preferredType if possible
-        // if @preferredType not found, will attempt to load any supported index type
-        // returns empty string if no index file (of any type) is found
-        static const std::string FindIndexFilename(const std::string& bamFilename,
-                                                   const BamIndex::IndexType& preferredType);
-
-    // internal methods
-    public:
-        // generates index filename from BAM filename (depending on requested type)
-        // if type is unknown, returns empty string
-        static const std::string CreateIndexFilename(const std::string& bamFilename,
-                                                     const BamIndex::IndexType& type);
-        // retrieves file extension (including '.')
-        static const std::string FileExtension(const std::string& filename);
-};
-
-} // namespace Internal
-} // namespace BamTools
-
-#endif // BAMINDEX_FACTORY_P_H
diff --git a/src/api/internal/BamMultiMerger_p.h b/src/api/internal/BamMultiMerger_p.h
deleted file mode 100644 (file)
index 3000097..0000000
+++ /dev/null
@@ -1,266 +0,0 @@
-// ***************************************************************************
-// BamMultiMerger_p.h (c) 2010 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides merging functionality for BamMultiReader.  At this point, supports
-// sorting results by (refId, position) or by read name.
-// ***************************************************************************
-
-#ifndef BAMMULTIMERGER_P_H
-#define BAMMULTIMERGER_P_H
-
-//  -------------
-//  W A R N I N G
-//  -------------
-//
-// This file is not part of the BamTools API.  It exists purely as an
-// implementation detail. This header file may change from version to version
-// without notice, or even be removed.
-//
-// We mean it.
-
-#include "api/BamAlignment.h"
-#include "api/BamReader.h"
-#include "api/algorithms/Sort.h"
-#include <deque>
-#include <functional>
-#include <set>
-#include <string>
-
-namespace BamTools {
-namespace Internal {
-
-struct MergeItem {
-
-    // data members
-    BamReader*    Reader;
-    BamAlignment* Alignment;
-
-    // ctors & dtor
-    MergeItem(BamReader* reader = 0,
-              BamAlignment* alignment = 0)
-        : Reader(reader)
-        , Alignment(alignment)
-    { }
-
-    MergeItem(const MergeItem& other)
-        : Reader(other.Reader)
-        , Alignment(other.Alignment)
-    { }
-
-    ~MergeItem(void) { }
-};
-
-template<typename Compare>
-struct MergeItemSorter : public std::binary_function<MergeItem, MergeItem, bool> {
-
-    public:
-        MergeItemSorter(const Compare& comp = Compare())
-            : m_comp(comp)
-        { }
-
-        bool operator()(const MergeItem& lhs, const MergeItem& rhs) {
-            const BamAlignment& l = *lhs.Alignment;
-            const BamAlignment& r = *rhs.Alignment;
-            return m_comp(l,r);
-        }
-
-    private:
-        Compare m_comp;
-};
-
-// pure ABC so we can just work polymorphically with any specific merger implementation
-class IMultiMerger {
-
-    public:
-        IMultiMerger(void) { }
-        virtual ~IMultiMerger(void) { }
-    public:
-        virtual void Add(MergeItem item) =0;
-        virtual void Clear(void) =0;
-        virtual const MergeItem& First(void) const =0;
-        virtual bool IsEmpty(void) const =0;
-        virtual void Remove(BamReader* reader) =0;
-        virtual int Size(void) const =0;
-        virtual MergeItem TakeFirst(void) =0;
-};
-
-// general merger
-template<typename Compare>
-class MultiMerger : public IMultiMerger {
-
-    public:
-        typedef Compare                      CompareType;
-        typedef MergeItemSorter<CompareType> MergeType;
-
-    public:
-        explicit MultiMerger(const Compare& comp = Compare())
-            : IMultiMerger()
-            , m_data( MergeType(comp) )
-        { }
-        ~MultiMerger(void) { }
-
-    public:
-        void Add(MergeItem item);
-        void Clear(void);
-        const MergeItem& First(void) const;
-        bool IsEmpty(void) const;
-        void Remove(BamReader* reader);
-        int Size(void) const;
-        MergeItem TakeFirst(void);
-
-    private:
-        typedef MergeItem                              ValueType;
-        typedef std::multiset<ValueType, MergeType>    ContainerType;
-        typedef typename ContainerType::iterator       DataIterator;
-        typedef typename ContainerType::const_iterator DataConstIterator;
-        ContainerType m_data;
-};
-
-template <typename Compare>
-inline void MultiMerger<Compare>::Add(MergeItem item) {
-
-    // N.B. - any future custom Compare types must define this method
-    //        see algorithms/Sort.h
-
-    if ( CompareType::UsesCharData() )
-        item.Alignment->BuildCharData();
-    m_data.insert(item);
-}
-
-template <typename Compare>
-inline void MultiMerger<Compare>::Clear(void) {
-    m_data.clear();
-}
-
-template <typename Compare>
-inline const MergeItem& MultiMerger<Compare>::First(void) const {
-    const ValueType& entry = (*m_data.begin());
-    return entry;
-}
-
-template <typename Compare>
-inline bool MultiMerger<Compare>::IsEmpty(void) const {
-    return m_data.empty();
-}
-template <typename Compare>
-inline void MultiMerger<Compare>::Remove(BamReader* reader) {
-
-    if ( reader == 0 ) return;
-    const std::string& filenameToRemove = reader->GetFilename();
-
-    // iterate over readers in cache
-    DataIterator dataIter = m_data.begin();
-    DataIterator dataEnd  = m_data.end();
-    for ( ; dataIter != dataEnd; ++dataIter ) {
-        const MergeItem& item = (*dataIter);
-        const BamReader* itemReader = item.Reader;
-        if ( itemReader == 0 ) continue;
-
-        // remove iterator on match
-        if ( itemReader->GetFilename() == filenameToRemove ) {
-            m_data.erase(dataIter);
-            return;
-        }
-    }
-}
-template <typename Compare>
-inline int MultiMerger<Compare>::Size(void) const {
-    return m_data.size();
-}
-
-template <typename Compare>
-inline MergeItem MultiMerger<Compare>::TakeFirst(void) {
-    DataIterator firstIter = m_data.begin();
-    MergeItem    firstItem = (*firstIter);
-    m_data.erase(firstIter);
-    return firstItem;
-}
-
-// unsorted "merger"
-template<>
-class MultiMerger<Algorithms::Sort::Unsorted> : public IMultiMerger {
-
-    public:
-        explicit MultiMerger(const Algorithms::Sort::Unsorted& comp = Algorithms::Sort::Unsorted())
-            : IMultiMerger()
-        { }
-        ~MultiMerger(void) { }
-
-    public:
-        void Add(MergeItem item);
-        void Clear(void);
-        const MergeItem& First(void) const;
-        bool IsEmpty(void) const;
-        void Remove(BamReader* reader);
-        int Size(void) const;
-        MergeItem TakeFirst(void);
-
-    private:
-        typedef MergeItem                     ValueType;
-        typedef std::deque<ValueType>         ContainerType;
-        typedef ContainerType::iterator       DataIterator;
-        typedef ContainerType::const_iterator DataConstIterator;
-        ContainerType m_data;
-};
-
-inline
-void MultiMerger<Algorithms::Sort::Unsorted>::Add(MergeItem item) {
-    m_data.push_back(item);
-}
-
-inline
-void MultiMerger<Algorithms::Sort::Unsorted>::Clear(void) {
-    m_data.clear();
-}
-
-inline
-const MergeItem& MultiMerger<Algorithms::Sort::Unsorted>::First(void) const {
-    return m_data.front();
-}
-
-inline
-bool MultiMerger<Algorithms::Sort::Unsorted>::IsEmpty(void) const {
-    return m_data.empty();
-}
-
-inline
-void MultiMerger<Algorithms::Sort::Unsorted>::Remove(BamReader* reader) {
-
-    if ( reader == 0 ) return;
-    const std::string filenameToRemove = reader->GetFilename();
-
-    // iterate over readers in cache
-    DataIterator dataIter = m_data.begin();
-    DataIterator dataEnd  = m_data.end();
-    for ( ; dataIter != dataEnd; ++dataIter ) {
-        const MergeItem& item = (*dataIter);
-        const BamReader* itemReader = item.Reader;
-        if ( itemReader == 0 ) continue;
-
-        // remove iterator on match
-        if ( itemReader->GetFilename() == filenameToRemove ) {
-            m_data.erase(dataIter);
-            return;
-        }
-    }
-}
-
-inline
-int MultiMerger<Algorithms::Sort::Unsorted>::Size(void) const {
-    return m_data.size();
-}
-
-inline
-MergeItem MultiMerger<Algorithms::Sort::Unsorted>::TakeFirst(void) {
-    MergeItem firstItem = m_data.front();
-    m_data.pop_front();
-    return firstItem;
-}
-
-} // namespace Internal
-} // namespace BamTools
-
-#endif // BAMMULTIMERGER_P_H
diff --git a/src/api/internal/BamMultiReader_p.cpp b/src/api/internal/BamMultiReader_p.cpp
deleted file mode 100644 (file)
index 55ae615..0000000
+++ /dev/null
@@ -1,799 +0,0 @@
-// ***************************************************************************
-// BamMultiReader_p.cpp (c) 2010 Derek Barnett, Erik Garrison
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 14 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Functionality for simultaneously reading multiple BAM files
-// *************************************************************************
-
-#include "api/BamAlignment.h"
-#include "api/BamMultiReader.h"
-#include "api/SamConstants.h"
-#include "api/algorithms/Sort.h"
-#include "api/internal/BamMultiReader_p.h"
-using namespace BamTools;
-using namespace BamTools::Internal;
-
-#include <algorithm>
-#include <fstream>
-#include <iostream>
-#include <iterator>
-#include <sstream>
-using namespace std;
-
-// ctor
-BamMultiReaderPrivate::BamMultiReaderPrivate(void)
-    : m_alignmentCache(0)
-{ }
-
-// dtor
-BamMultiReaderPrivate::~BamMultiReaderPrivate(void) {
-    Close();
-}
-
-// close all BAM files
-bool BamMultiReaderPrivate::Close(void) {
-
-    m_errorString.clear();
-
-    if ( CloseFiles(Filenames()) )
-        return true;
-    else {
-        const string currentError = m_errorString;
-        const string message = string("error encountered while closing all files: \n\t") + currentError;
-        SetErrorString("BamMultiReader::Close", message);
-        return false;
-    }
-}
-
-// close requested BAM file
-bool BamMultiReaderPrivate::CloseFile(const string& filename) {
-
-    m_errorString.clear();
-
-    vector<string> filenames(1, filename);
-    if ( CloseFiles(filenames) )
-        return true;
-    else {
-        const string currentError = m_errorString;
-        const string message = string("error while closing file: ") + filename + "\n" + currentError;
-        SetErrorString("BamMultiReader::CloseFile", message);
-        return false;
-    }
-}
-
-// close requested BAM files
-bool BamMultiReaderPrivate::CloseFiles(const vector<string>& filenames) {
-
-    bool errorsEncountered = false;
-    m_errorString.clear();
-
-    // iterate over filenames
-    vector<string>::const_iterator filesIter = filenames.begin();
-    vector<string>::const_iterator filesEnd  = filenames.end();
-    for ( ; filesIter != filesEnd; ++filesIter ) {
-        const string& filename = (*filesIter);
-        if ( filename.empty() ) continue;
-
-        // iterate over readers
-        vector<MergeItem>::iterator readerIter = m_readers.begin();
-        vector<MergeItem>::iterator readerEnd  = m_readers.end();
-        for ( ; readerIter != readerEnd; ++readerIter ) {
-            MergeItem& item = (*readerIter);
-            BamReader* reader = item.Reader;
-            if ( reader == 0 ) continue;
-
-            // if reader matches requested filename
-            if ( reader->GetFilename() == filename ) {
-
-                // remove reader's entry from alignment cache
-                m_alignmentCache->Remove(reader);
-
-                // clean up reader & its alignment
-                if ( !reader->Close() ) {
-                    m_errorString.append(1, '\t');
-                    m_errorString.append(reader->GetErrorString());
-                    m_errorString.append(1, '\n');
-                    errorsEncountered = true;
-                }
-                delete reader;
-                reader = 0;
-
-                // delete reader's alignment entry
-                BamAlignment* alignment = item.Alignment;
-                delete alignment;
-                alignment = 0;
-
-                // remove reader from reader list
-                m_readers.erase(readerIter);
-
-                // on match, just go on to next filename
-                // (no need to keep looking and item iterator is invalid now anyway)
-                break;
-            }
-        }
-    }
-
-    // make sure alignment cache is cleaned up if all readers closed
-    if ( m_readers.empty() && m_alignmentCache ) {
-        m_alignmentCache->Clear();
-        delete m_alignmentCache;
-        m_alignmentCache = 0;
-    }
-
-    // return whether all readers closed OK
-    return !errorsEncountered;
-}
-
-// creates index files for BAM files that don't have them
-bool BamMultiReaderPrivate::CreateIndexes(const BamIndex::IndexType& type) {
-
-    bool errorsEncountered = false;
-    m_errorString.clear();
-
-    // iterate over readers
-    vector<MergeItem>::iterator itemIter = m_readers.begin();
-    vector<MergeItem>::iterator itemEnd  = m_readers.end();
-    for ( ; itemIter != itemEnd; ++itemIter ) {
-        MergeItem& item = (*itemIter);
-        BamReader* reader = item.Reader;
-        if ( reader == 0 ) continue;
-
-        // if reader doesn't have an index, create one
-        if ( !reader->HasIndex() ) {
-            if ( !reader->CreateIndex(type) ) {
-                m_errorString.append(1, '\t');
-                m_errorString.append(reader->GetErrorString());
-                m_errorString.append(1, '\n');
-                errorsEncountered = true;
-            }
-        }
-    }
-
-    // check for errors encountered before returning success/fail
-    if ( errorsEncountered ) {
-        const string currentError = m_errorString;
-        const string message = string("error while creating index files: ") + "\n" + currentError;
-        SetErrorString("BamMultiReader::CreateIndexes", message);
-        return false;
-    } else
-        return true;
-}
-
-IMultiMerger* BamMultiReaderPrivate::CreateAlignmentCache(void) const {
-
-    // fetch SamHeader
-    SamHeader header = GetHeader();
-
-    // if BAM files are sorted by position
-    if ( header.SortOrder == Constants::SAM_HD_SORTORDER_COORDINATE )
-        return new MultiMerger<Algorithms::Sort::ByPosition>();
-
-    // if BAM files are sorted by read name
-    if ( header.SortOrder == Constants::SAM_HD_SORTORDER_QUERYNAME )
-        return new MultiMerger<Algorithms::Sort::ByName>();
-
-    // otherwise "unknown" or "unsorted", use unsorted merger and just read in
-    return new MultiMerger<Algorithms::Sort::Unsorted>();
-}
-
-const vector<string> BamMultiReaderPrivate::Filenames(void) const {
-
-    // init filename container
-    vector<string> filenames;
-    filenames.reserve( m_readers.size() );
-
-    // iterate over readers
-    vector<MergeItem>::const_iterator itemIter = m_readers.begin();
-    vector<MergeItem>::const_iterator itemEnd  = m_readers.end();
-    for ( ; itemIter != itemEnd; ++itemIter ) {
-        const MergeItem& item = (*itemIter);
-        const BamReader* reader = item.Reader;
-        if ( reader == 0 ) continue;
-
-        // store filename if not empty
-        const string& filename = reader->GetFilename();
-        if ( !filename.empty() )
-            filenames.push_back(filename);
-    }
-
-    // return result
-    return filenames;
-}
-
-string BamMultiReaderPrivate::GetErrorString(void) const {
-    return m_errorString;
-}
-
-SamHeader BamMultiReaderPrivate::GetHeader(void) const {
-    const string& text = GetHeaderText();
-    return SamHeader(text);
-}
-
-// makes a virtual, unified header for all the bam files in the multireader
-string BamMultiReaderPrivate::GetHeaderText(void) const {
-
-    // N.B. - right now, simply copies all header data from first BAM,
-    //        and then appends RG's from other BAM files
-    // TODO: make this more intelligent wrt other header lines/fields
-
-    // if no readers open
-    const size_t numReaders = m_readers.size();
-    if ( numReaders == 0 ) return string();
-
-    // retrieve first reader's header
-    const MergeItem& firstItem = m_readers.front();
-    const BamReader* reader = firstItem.Reader;
-    if ( reader == 0 ) return string();
-    SamHeader mergedHeader = reader->GetHeader();
-
-    // iterate over any remaining readers (skipping the first)
-    for ( size_t i = 1; i < numReaders; ++i ) {
-        const MergeItem& item = m_readers.at(i);
-        const BamReader* reader = item.Reader;
-        if ( reader == 0 ) continue;
-
-        // retrieve current reader's header
-        const SamHeader currentHeader = reader->GetHeader();
-
-        // append current reader's RG entries to merged header
-        // N.B. - SamReadGroupDictionary handles duplicate-checking
-        mergedHeader.ReadGroups.Add(currentHeader.ReadGroups);
-
-        // TODO: merge anything else??
-    }
-
-    // return stringified header
-    return mergedHeader.ToString();
-}
-
-// get next alignment among all files
-bool BamMultiReaderPrivate::GetNextAlignment(BamAlignment& al) {
-    return PopNextCachedAlignment(al, true);
-}
-
-// get next alignment among all files without parsing character data from alignments
-bool BamMultiReaderPrivate::GetNextAlignmentCore(BamAlignment& al) {
-    return PopNextCachedAlignment(al, false);
-}
-
-// ---------------------------------------------------------------------------------------
-//
-// NB: The following GetReferenceX() functions assume that we have identical
-// references for all BAM files.  We enforce this by invoking the
-// ValidateReaders() method to verify that our reference data is the same
-// across all files on Open - so we will not encounter a situation in which
-// there is a mismatch and we are still live.
-//
-// ---------------------------------------------------------------------------------------
-
-// returns the number of reference sequences
-int BamMultiReaderPrivate::GetReferenceCount(void) const {
-
-    // handle empty multireader
-    if ( m_readers.empty() ) return 0;
-
-    // return reference count from first reader
-    const MergeItem& item = m_readers.front();
-    const BamReader* reader = item.Reader;
-    if ( reader == 0 ) return 0;
-    else
-        return reader->GetReferenceCount();
-}
-
-// returns vector of reference objects
-const RefVector BamMultiReaderPrivate::GetReferenceData(void) const {
-
-    // handle empty multireader
-    if ( m_readers.empty() ) return RefVector();
-
-    // return reference data from first BamReader
-    const MergeItem& item = m_readers.front();
-    const BamReader* reader = item.Reader;
-    if ( reader == 0 ) return RefVector();
-    else
-        return reader->GetReferenceData();
-}
-
-// returns refID from reference name
-int BamMultiReaderPrivate::GetReferenceID(const string& refName) const {
-
-    // handle empty multireader
-    if ( m_readers.empty() ) return -1;
-
-    // return reference ID from first BamReader
-    const MergeItem& item = m_readers.front();
-    const BamReader* reader = item.Reader;
-    if ( reader == 0 ) return -1;
-    else
-        return reader->GetReferenceID(refName);
-}
-// ---------------------------------------------------------------------------------------
-
-// returns true if all readers have index data available
-// this is useful to indicate whether Jump() or SetRegion() are possible
-bool BamMultiReaderPrivate::HasIndexes(void) const {
-
-    // handle empty multireader
-    if ( m_readers.empty() )
-        return false;
-
-    bool result = true;
-
-    // iterate over readers
-    vector<MergeItem>::const_iterator readerIter = m_readers.begin();
-    vector<MergeItem>::const_iterator readerEnd  = m_readers.end();
-    for ( ; readerIter != readerEnd; ++readerIter ) {
-        const MergeItem& item = (*readerIter);
-        const BamReader* reader = item.Reader;
-        if ( reader  == 0 ) continue;
-
-        // see if current reader has index data
-        result &= reader->HasIndex();
-    }
-
-    return result;
-}
-
-// returns true if multireader has open readers
-bool BamMultiReaderPrivate::HasOpenReaders(void) {
-
-    // iterate over readers
-    vector<MergeItem>::const_iterator readerIter = m_readers.begin();
-    vector<MergeItem>::const_iterator readerEnd  = m_readers.end();
-    for ( ; readerIter != readerEnd; ++readerIter ) {
-        const MergeItem& item = (*readerIter);
-        const BamReader* reader = item.Reader;
-        if ( reader == 0 ) continue;
-
-        // return true whenever an open reader is found
-        if ( reader->IsOpen() ) return true;
-    }
-
-    // no readers open
-    return false;
-}
-
-// performs random-access jump using (refID, position) as a left-bound
-bool BamMultiReaderPrivate::Jump(int refID, int position) {
-
-    // NB: While it may make sense to track readers in which we can
-    // successfully Jump, in practice a failure of Jump means "no
-    // alignments here."  It makes sense to simply accept the failure,
-    // UpdateAlignments(), and continue.
-
-    // iterate over readers
-    vector<MergeItem>::iterator readerIter = m_readers.begin();
-    vector<MergeItem>::iterator readerEnd  = m_readers.end();
-    for ( ; readerIter != readerEnd; ++readerIter ) {
-        MergeItem& item = (*readerIter);
-        BamReader* reader = item.Reader;
-        if ( reader == 0 ) continue;
-
-        // jump in each BamReader to position of interest
-        reader->Jump(refID, position);
-    }
-
-    // returns status of cache update
-    return UpdateAlignmentCache();
-}
-
-// locate (& load) index files for BAM readers that don't already have one loaded
-bool BamMultiReaderPrivate::LocateIndexes(const BamIndex::IndexType& preferredType) {
-
-    bool errorsEncountered = false;
-    m_errorString.clear();
-
-    // iterate over readers
-    vector<MergeItem>::iterator readerIter = m_readers.begin();
-    vector<MergeItem>::iterator readerEnd  = m_readers.end();
-    for ( ; readerIter != readerEnd; ++readerIter ) {
-        MergeItem& item = (*readerIter);
-        BamReader* reader = item.Reader;
-        if ( reader == 0 ) continue;
-
-        // if reader has no index, try to locate one
-        if ( !reader->HasIndex() ) {
-            if ( !reader->LocateIndex(preferredType) ) {
-                m_errorString.append(1, '\t');
-                m_errorString.append(reader->GetErrorString());
-                m_errorString.append(1, '\n');
-                errorsEncountered = true;
-            }
-        }
-    }
-
-    // check for errors encountered before returning success/fail
-    if ( errorsEncountered ) {
-        const string currentError = m_errorString;
-        const string message = string("error while locating index files: ") + "\n" + currentError;
-        SetErrorString("BamMultiReader::LocatingIndexes", message);
-        return false;
-    } else
-        return true;
-}
-
-// opens BAM files
-bool BamMultiReaderPrivate::Open(const vector<string>& filenames) {
-
-    m_errorString.clear();
-
-    // put all current readers back at beginning (refreshes alignment cache)
-    if ( !Rewind() ) {
-        const string currentError = m_errorString;
-        const string message = string("unable to rewind existing readers: \n\t") + currentError;
-        SetErrorString("BamMultiReader::Open", message);
-        return false;
-    }
-
-    // iterate over filenames
-    bool errorsEncountered = false;
-    vector<string>::const_iterator filenameIter = filenames.begin();
-    vector<string>::const_iterator filenameEnd  = filenames.end();
-    for ( ; filenameIter != filenameEnd; ++filenameIter ) {
-        const string& filename = (*filenameIter);
-        if ( filename.empty() ) continue;
-
-        // attempt to open BamReader
-        BamReader* reader = new BamReader;
-        const bool readerOpened = reader->Open(filename);
-
-        // if opened OK, store it
-        if ( readerOpened )
-            m_readers.push_back( MergeItem(reader, new BamAlignment) );
-
-        // otherwise store error & clean up invalid reader
-        else {
-            m_errorString.append(1, '\t');
-            m_errorString += string("unable to open file: ") + filename;
-            m_errorString.append(1, '\n');
-            errorsEncountered = true;
-
-            delete reader;
-            reader = 0;
-        }
-    }
-
-    // check for errors while opening
-    if ( errorsEncountered ) {
-        const string currentError = m_errorString;
-        const string message = string("unable to open all files: \t\n") + currentError;
-        SetErrorString("BamMultiReader::Open", message);
-        return false;
-    }
-
-    // check for BAM file consistency
-    if ( !ValidateReaders() ) {
-        const string currentError = m_errorString;
-        const string message = string("unable to open inconsistent files: \t\n") + currentError;
-        SetErrorString("BamMultiReader::Open", message);
-        return false;
-    }
-
-    // update alignment cache
-    return UpdateAlignmentCache();
-}
-
-bool BamMultiReaderPrivate::OpenFile(const std::string& filename) {
-    vector<string> filenames(1, filename);
-    if ( Open(filenames) )
-        return true;
-    else {
-        const string currentError = m_errorString;
-        const string message = string("could not open file: ") + filename + "\n\t" + currentError;
-        SetErrorString("BamMultiReader::OpenFile", message);
-        return false;
-    }
-}
-
-bool BamMultiReaderPrivate::OpenIndexes(const vector<string>& indexFilenames) {
-
-    // TODO: This needs to be cleaner - should not assume same order.
-    //       And either way, shouldn't start at first reader.  Should start at
-    //       first reader without an index?
-
-    // make sure same number of index filenames as readers
-    if ( m_readers.size() != indexFilenames.size() ) {
-        const string message("size of index file list does not match current BAM file count");
-        SetErrorString("BamMultiReader::OpenIndexes", message);
-        return false;
-    }
-
-    bool errorsEncountered = false;
-    m_errorString.clear();
-
-    // iterate over BamReaders
-    vector<string>::const_iterator indexFilenameIter = indexFilenames.begin();
-    vector<string>::const_iterator indexFilenameEnd  = indexFilenames.end();
-    vector<MergeItem>::iterator readerIter = m_readers.begin();
-    vector<MergeItem>::iterator readerEnd  = m_readers.end();
-    for ( ; readerIter != readerEnd; ++readerIter ) {
-        MergeItem& item = (*readerIter);
-        BamReader* reader = item.Reader;
-
-        // open index filename on reader
-        if ( reader ) {
-            const string& indexFilename = (*indexFilenameIter);
-            if ( !reader->OpenIndex(indexFilename) ) {
-                m_errorString.append(1, '\t');
-                m_errorString += reader->GetErrorString();
-                m_errorString.append(1, '\n');
-                errorsEncountered = true;
-            }
-        }
-
-        // increment filename iterator, skip if no more index files to open
-        if ( ++indexFilenameIter == indexFilenameEnd )
-            break;
-    }
-
-    // return success/fail
-    if ( errorsEncountered ) {
-        const string currentError = m_errorString;
-        const string message = string("could not open all index files: \n\t") + currentError;
-        SetErrorString("BamMultiReader::OpenIndexes", message);
-        return false;
-    } else
-        return true;
-}
-
-bool BamMultiReaderPrivate::PopNextCachedAlignment(BamAlignment& al, const bool needCharData) {
-
-    // skip if no alignments available
-    if ( m_alignmentCache == 0 || m_alignmentCache->IsEmpty() )
-        return false;
-
-    // pop next merge item entry from cache
-    MergeItem item = m_alignmentCache->TakeFirst();
-    BamReader* reader = item.Reader;
-    BamAlignment* alignment = item.Alignment;
-    if ( reader == 0 || alignment == 0 )
-        return false;
-
-    // set char data if requested
-    if ( needCharData ) {
-        alignment->BuildCharData();
-        alignment->Filename = reader->GetFilename();
-    }
-
-    // store cached alignment into destination parameter (by copy)
-    al = *alignment;
-
-    // load next alignment from reader & store in cache
-    SaveNextAlignment(reader, alignment);
-    return true;
-}
-
-// returns BAM file pointers to beginning of alignment data & resets alignment cache
-bool BamMultiReaderPrivate::Rewind(void) {
-
-    // skip if no readers open
-    if ( m_readers.empty() )
-        return true;
-
-    // attempt to rewind files
-    if ( !RewindReaders() ) {
-        const string currentError = m_errorString;
-        const string message = string("could not rewind readers: \n\t") + currentError;
-        SetErrorString("BamMultiReader::Rewind", message);
-        return false;
-    }
-
-    // return status of cache update
-    return UpdateAlignmentCache();
-}
-
-// returns BAM file pointers to beginning of alignment data
-bool BamMultiReaderPrivate::RewindReaders(void) {
-
-    m_errorString.clear();
-    bool errorsEncountered = false;
-
-    // iterate over readers
-    vector<MergeItem>::iterator readerIter = m_readers.begin();
-    vector<MergeItem>::iterator readerEnd  = m_readers.end();
-    for ( ; readerIter != readerEnd; ++readerIter ) {
-        MergeItem& item = (*readerIter);
-        BamReader* reader = item.Reader;
-        if ( reader == 0 ) continue;
-
-        // attempt rewind on BamReader
-        if ( !reader->Rewind() ) {
-            m_errorString.append(1, '\t');
-            m_errorString.append( reader->GetErrorString() );
-            m_errorString.append(1, '\n');
-            errorsEncountered = true;
-        }
-    }
-
-    return !errorsEncountered;
-}
-
-void BamMultiReaderPrivate::SaveNextAlignment(BamReader* reader, BamAlignment* alignment) {
-
-    // if can read alignment from reader, store in cache
-    //
-    // N.B. - lazy building of alignment's char data - populated only:
-    //        automatically by alignment cache to maintain its sorting OR
-    //        on demand from client call to future call to GetNextAlignment()
-
-    if ( reader->GetNextAlignmentCore(*alignment) )
-        m_alignmentCache->Add( MergeItem(reader, alignment) );
-}
-
-void BamMultiReaderPrivate::SetErrorString(const string& where, const string& what) const {
-    static const string SEPARATOR = ": ";
-    m_errorString = where + SEPARATOR + what;
-}
-
-bool BamMultiReaderPrivate::SetRegion(const BamRegion& region) {
-
-    // NB: While it may make sense to track readers in which we can
-    // successfully SetRegion, In practice a failure of SetRegion means "no
-    // alignments here."  It makes sense to simply accept the failure,
-    // UpdateAlignments(), and continue.
-
-    // iterate over alignments
-    vector<MergeItem>::iterator readerIter = m_readers.begin();
-    vector<MergeItem>::iterator readerEnd  = m_readers.end();
-    for ( ; readerIter != readerEnd; ++readerIter ) {
-        MergeItem& item = (*readerIter);
-        BamReader* reader = item.Reader;
-        if ( reader == 0 ) continue;
-
-        // set region of interest
-        reader->SetRegion(region);
-    }
-
-    // return status of cache update
-    return UpdateAlignmentCache();
-}
-
-// updates our alignment cache
-bool BamMultiReaderPrivate::UpdateAlignmentCache(void) {
-
-    // create alignment cache if not created yet
-    if ( m_alignmentCache == 0 ) {
-        m_alignmentCache = CreateAlignmentCache();
-        if ( m_alignmentCache == 0 ) {
-            SetErrorString("BamMultiReader::UpdateAlignmentCache", "unable to create new alignment cache");
-            return false;
-        }
-    }
-
-    // clear any prior cache data
-    m_alignmentCache->Clear();
-
-    // iterate over readers
-    vector<MergeItem>::iterator readerIter = m_readers.begin();
-    vector<MergeItem>::iterator readerEnd  = m_readers.end();
-    for ( ; readerIter != readerEnd; ++readerIter ) {
-        MergeItem& item = (*readerIter);
-        BamReader* reader = item.Reader;
-        BamAlignment* alignment = item.Alignment;
-        if ( reader == 0 || alignment == 0 ) continue;
-
-        // save next alignment from each reader in cache
-        SaveNextAlignment(reader, alignment);
-    }
-
-    // if we get here, ok
-    return true;
-}
-
-// ValidateReaders checks that all the readers point to BAM files representing
-// alignments against the same set of reference sequences, and that the
-// sequences are identically ordered.  If these checks fail the operation of
-// the multireader is undefined, so we force program exit.
-bool BamMultiReaderPrivate::ValidateReaders(void) const {
-
-    m_errorString.clear();
-
-    // skip if 0 or 1 readers opened
-    if ( m_readers.empty() || (m_readers.size() == 1) )
-        return true;
-
-    // retrieve first reader
-    const MergeItem& firstItem = m_readers.front();
-    const BamReader* firstReader = firstItem.Reader;
-    if ( firstReader == 0 ) return false;
-
-    // retrieve first reader's header data
-    const SamHeader& firstReaderHeader = firstReader->GetHeader();
-    const string& firstReaderSortOrder = firstReaderHeader.SortOrder;
-
-    // retrieve first reader's reference data
-    const RefVector& firstReaderRefData = firstReader->GetReferenceData();
-    const int firstReaderRefCount = firstReader->GetReferenceCount();
-    const int firstReaderRefSize = firstReaderRefData.size();
-
-    // iterate over all readers
-    vector<MergeItem>::const_iterator readerIter = m_readers.begin();
-    vector<MergeItem>::const_iterator readerEnd  = m_readers.end();
-    for ( ; readerIter != readerEnd; ++readerIter ) {
-        const MergeItem& item = (*readerIter);
-        BamReader* reader = item.Reader;
-        if ( reader == 0 ) continue;
-
-        // get current reader's header data
-        const SamHeader& currentReaderHeader = reader->GetHeader();
-        const string& currentReaderSortOrder = currentReaderHeader.SortOrder;
-
-        // check compatible sort order
-        if ( currentReaderSortOrder != firstReaderSortOrder ) {
-            const string message = string("mismatched sort order in ") + reader->GetFilename() +
-                                   ", expected " + firstReaderSortOrder +
-                                   ", but found " + currentReaderSortOrder;
-            SetErrorString("BamMultiReader::ValidateReaders", message);
-            return false;
-        }
-
-        // get current reader's reference data
-        const RefVector currentReaderRefData = reader->GetReferenceData();
-        const int currentReaderRefCount = reader->GetReferenceCount();
-        const int currentReaderRefSize  = currentReaderRefData.size();
-
-        // init reference data iterators
-        RefVector::const_iterator firstRefIter   = firstReaderRefData.begin();
-        RefVector::const_iterator firstRefEnd    = firstReaderRefData.end();
-        RefVector::const_iterator currentRefIter = currentReaderRefData.begin();
-
-        // compare reference counts from BamReader ( & container size, in case of BR error)
-        if ( (currentReaderRefCount != firstReaderRefCount) ||
-             (firstReaderRefSize    != currentReaderRefSize) )
-        {
-            stringstream s("");
-            s << "mismatched reference count in " << reader->GetFilename()
-              << ", expected " << firstReaderRefCount
-              << ", but found " << currentReaderRefCount;
-            SetErrorString("BamMultiReader::ValidateReaders", s.str());
-            return false;
-        }
-
-        // this will be ok; we just checked above that we have identically-sized sets of references
-        // here we simply check if they are all, in fact, equal in content
-        while ( firstRefIter != firstRefEnd ) {
-            const RefData& firstRef   = (*firstRefIter);
-            const RefData& currentRef = (*currentRefIter);
-
-            // compare reference name & length
-            if ( (firstRef.RefName   != currentRef.RefName) ||
-                 (firstRef.RefLength != currentRef.RefLength) )
-            {
-                stringstream s("");
-                s << "mismatched references found in" << reader->GetFilename()
-                  << "expected: " << endl;
-
-                // print first reader's reference data
-                RefVector::const_iterator refIter = firstReaderRefData.begin();
-                RefVector::const_iterator refEnd  = firstReaderRefData.end();
-                for ( ; refIter != refEnd; ++refIter ) {
-                    const RefData& entry = (*refIter);
-                    stringstream s("");
-                    s << entry.RefName << " " << endl;
-                }
-
-                s << "but found: " << endl;
-
-                // print current reader's reference data
-                refIter = currentReaderRefData.begin();
-                refEnd  = currentReaderRefData.end();
-                for ( ; refIter != refEnd; ++refIter ) {
-                    const RefData& entry = (*refIter);
-                    s << entry.RefName << " " << entry.RefLength << endl;
-                }
-
-                SetErrorString("BamMultiReader::ValidateReaders", s.str());
-                return false;
-            }
-
-            // update iterators
-            ++firstRefIter;
-            ++currentRefIter;
-        }
-    }
-
-    // if we get here, everything checks out
-    return true;
-}
diff --git a/src/api/internal/BamMultiReader_p.h b/src/api/internal/BamMultiReader_p.h
deleted file mode 100644 (file)
index 9d001f5..0000000
+++ /dev/null
@@ -1,99 +0,0 @@
-// ***************************************************************************
-// BamMultiReader_p.h (c) 2010 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Functionality for simultaneously reading multiple BAM files
-// *************************************************************************
-
-#ifndef BAMMULTIREADER_P_H
-#define BAMMULTIREADER_P_H
-
-//  -------------
-//  W A R N I N G
-//  -------------
-//
-// This file is not part of the BamTools API.  It exists purely as an
-// implementation detail. This header file may change from version to version
-// without notice, or even be removed.
-//
-// We mean it.
-
-#include "api/SamHeader.h"
-#include "api/BamMultiReader.h"
-#include "api/internal/BamMultiMerger_p.h"
-#include <string>
-#include <vector>
-
-namespace BamTools {
-namespace Internal {
-
-class BamMultiReaderPrivate {
-
-    // typedefs
-    public:
-        typedef std::pair<BamReader*, BamAlignment*> ReaderAlignment;
-
-    // constructor / destructor
-    public:
-        BamMultiReaderPrivate(void);
-        ~BamMultiReaderPrivate(void);
-
-    // public interface
-    public:
-
-        // file operations
-        bool Close(void);
-        bool CloseFile(const std::string& filename);
-        const std::vector<std::string> Filenames(void) const;
-        bool Jump(int refID, int position = 0);
-        bool Open(const std::vector<std::string>& filenames);
-        bool OpenFile(const std::string& filename);
-        bool Rewind(void);
-        bool SetRegion(const BamRegion& region);
-
-        // access alignment data
-        bool GetNextAlignment(BamAlignment& al);
-        bool GetNextAlignmentCore(BamAlignment& al);
-        bool HasOpenReaders(void);
-
-        // access auxiliary data
-        SamHeader GetHeader(void) const;
-        std::string GetHeaderText(void) const;
-        int GetReferenceCount(void) const;
-        const BamTools::RefVector GetReferenceData(void) const;
-        int GetReferenceID(const std::string& refName) const;
-
-        // BAM index operations
-        bool CreateIndexes(const BamIndex::IndexType& type = BamIndex::STANDARD);
-        bool HasIndexes(void) const;
-        bool LocateIndexes(const BamIndex::IndexType& preferredType = BamIndex::STANDARD);
-        bool OpenIndexes(const std::vector<std::string>& indexFilenames);
-
-        // error handling
-        std::string GetErrorString(void) const;
-
-    // 'internal' methods
-    public:
-
-        bool CloseFiles(const std::vector<std::string>& filenames);
-        IMultiMerger* CreateAlignmentCache(void) const;
-        bool PopNextCachedAlignment(BamAlignment& al, const bool needCharData);
-        bool RewindReaders(void);
-        void SaveNextAlignment(BamReader* reader, BamAlignment* alignment);
-        void SetErrorString(const std::string& where, const std::string& what) const; //
-        bool UpdateAlignmentCache(void);
-        bool ValidateReaders(void) const;
-
-    // data members
-    public:
-        std::vector<MergeItem> m_readers;
-        IMultiMerger* m_alignmentCache;
-        mutable std::string m_errorString;
-};
-
-} // namespace Internal
-} // namespace BamTools
-
-#endif // BAMMULTIREADER_P_H
diff --git a/src/api/internal/BamPipe_p.cpp b/src/api/internal/BamPipe_p.cpp
deleted file mode 100644 (file)
index e13ad7c..0000000
+++ /dev/null
@@ -1,57 +0,0 @@
-// ***************************************************************************
-// BamPipe_p.cpp (c) 2011 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides BAM pipe-specific IO behavior
-// ***************************************************************************
-
-#include "api/internal/BamPipe_p.h"
-using namespace BamTools;
-using namespace BamTools::Internal;
-
-#include <cstdio>
-#include <iostream>
-using namespace std;
-
-BamPipe::BamPipe(void) : ILocalIODevice() { }
-
-BamPipe::~BamPipe(void) { }
-
-bool BamPipe::IsRandomAccess(void) const {
-    return false;
-}
-
-bool BamPipe::Open(const IBamIODevice::OpenMode mode) {
-
-    // make sure we're starting with a fresh pipe
-    Close();
-
-    // open stdin/stdout depending on requested openmode
-    if ( mode == IBamIODevice::ReadOnly )
-        m_stream = freopen(0, "rb", stdin);
-    else if ( mode == IBamIODevice::WriteOnly )
-        m_stream = freopen(0, "wb", stdout);
-    else {
-        SetErrorString("BamPipe::Open", "unknown open mode requested");
-        return false;
-    }
-
-    // check that we obtained a valid FILE*
-    if ( m_stream == 0 ) {
-        const string message_base = string("could not open handle on ");
-        const string message = message_base + ( (mode == IBamIODevice::ReadOnly) ? "stdin" : "stdout" );
-        SetErrorString("BamPipe::Open", message);
-        return false;
-    }
-
-    // store current IO mode & return success
-    m_mode = mode;
-    return true;
-}
-
-bool BamPipe::Seek(const int64_t& ) {
-    SetErrorString("BamPipe::Seek", "random access not allowed in FIFO pipe");
-    return false;
-}
diff --git a/src/api/internal/BamPipe_p.h b/src/api/internal/BamPipe_p.h
deleted file mode 100644 (file)
index 8996766..0000000
+++ /dev/null
@@ -1,46 +0,0 @@
-// ***************************************************************************
-// BamPipe_p.h (c) 2011 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides BAM pipe-specific IO behavior
-// ***************************************************************************
-
-#ifndef BAMPIPE_P_H
-#define BAMPIPE_P_H
-
-//  -------------
-//  W A R N I N G
-//  -------------
-//
-// This file is not part of the BamTools API.  It exists purely as an
-// implementation detail. This header file may change from version to version
-// without notice, or even be removed.
-//
-// We mean it.
-
-#include "api/internal/ILocalIODevice_p.h"
-#include <string>
-
-namespace BamTools {
-namespace Internal {
-
-class BamPipe : public ILocalIODevice {
-
-    // ctor & dtor
-    public:
-        BamPipe(void);
-        ~BamPipe(void);
-
-    // IBamIODevice implementation
-    public:
-        bool IsRandomAccess(void) const;
-        bool Open(const IBamIODevice::OpenMode mode);
-        bool Seek(const int64_t& position);
-};
-
-} // namespace Internal
-} // namespace BamTools
-
-#endif // BAMPIPE_P_H
diff --git a/src/api/internal/BamRandomAccessController_p.cpp b/src/api/internal/BamRandomAccessController_p.cpp
deleted file mode 100644 (file)
index c223ed7..0000000
+++ /dev/null
@@ -1,289 +0,0 @@
-// ***************************************************************************
-// BamRandomAccessController_p.cpp (c) 2011 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011(DB)
-// ---------------------------------------------------------------------------
-// Manages random access operations in a BAM file
-// **************************************************************************
-
-#include "api/BamIndex.h"
-#include "api/internal/BamException_p.h"
-#include "api/internal/BamRandomAccessController_p.h"
-#include "api/internal/BamReader_p.h"
-#include "api/internal/BamIndexFactory_p.h"
-using namespace BamTools;
-using namespace BamTools::Internal;
-
-#include <cassert>
-#include <sstream>
-using namespace std;
-
-BamRandomAccessController::BamRandomAccessController(void)
-    : m_index(0)
-    , m_hasAlignmentsInRegion(true)
-{ }
-
-BamRandomAccessController::~BamRandomAccessController(void) {
-    Close();
-}
-
-void BamRandomAccessController::AdjustRegion(const int& referenceCount) {
-
-    // skip if no index available
-    if ( m_index == 0 )
-        return;
-
-    // see if any references in region have alignments
-    m_hasAlignmentsInRegion = false;
-    int currentId = m_region.LeftRefID;
-    const int rightBoundRefId = ( m_region.isRightBoundSpecified() ? m_region.RightRefID : referenceCount - 1 );
-    while ( currentId <= rightBoundRefId ) {
-        m_hasAlignmentsInRegion = m_index->HasAlignments(currentId);
-        if ( m_hasAlignmentsInRegion ) break;
-        ++currentId;
-    }
-
-    // if no data found on any reference in region
-    if ( !m_hasAlignmentsInRegion )
-        return;
-
-    // if left bound of desired region had no data, use first reference that had data
-    // otherwise, leave requested region as-is
-    if ( currentId != m_region.LeftRefID ) {
-        m_region.LeftRefID = currentId;
-        m_region.LeftPosition = 0;
-    }
-}
-
-// returns alignments' "RegionState": { Before|Overlaps|After } current region
-BamRandomAccessController::RegionState
-BamRandomAccessController::AlignmentState(const BamAlignment& alignment) const {
-
-    // if region has no left bound at all
-    if ( !m_region.isLeftBoundSpecified() )
-        return OverlapsRegion;
-
-    // handle unmapped reads - return AFTER region to halt processing
-    if ( alignment.RefID == -1 )
-        return AfterRegion;
-
-    // if alignment is on any reference before left bound reference
-    if ( alignment.RefID < m_region.LeftRefID )
-        return BeforeRegion;
-
-    // if alignment is on left bound reference
-    else if ( alignment.RefID == m_region.LeftRefID ) {
-
-        // if alignment starts at or after left bound position
-        if ( alignment.Position >= m_region.LeftPosition) {
-
-            if ( m_region.isRightBoundSpecified() &&             // right bound is specified AND
-                 m_region.LeftRefID == m_region.RightRefID &&    // left & right bounds on same reference AND
-                 alignment.Position >= m_region.RightPosition )  // alignment starts on or after right bound position
-                return AfterRegion;
-
-            // otherwise, alignment overlaps region
-            else return OverlapsRegion;
-        }
-
-        // alignment starts before left bound position
-        else {
-
-            // if alignment overlaps left bound position
-            if ( alignment.GetEndPosition() > m_region.LeftPosition )
-                return OverlapsRegion;
-            else
-                return BeforeRegion;
-        }
-    }
-
-    // otherwise alignment is on a reference after left bound reference
-    else {
-
-        // if region has a right bound
-        if ( m_region.isRightBoundSpecified() ) {
-
-            // alignment is on any reference between boundaries
-            if ( alignment.RefID < m_region.RightRefID )
-                return OverlapsRegion;
-
-            // alignment is on any reference after right boundary
-            else if ( alignment.RefID > m_region.RightRefID )
-                return AfterRegion;
-
-            // alignment is on right bound reference
-            else {
-
-                // if alignment starts before right bound position
-                if ( alignment.Position < m_region.RightPosition )
-                    return OverlapsRegion;
-                else
-                    return AfterRegion;
-            }
-        }
-
-        // otherwise, alignment starts after left bound and there is no right bound given
-        else return OverlapsRegion;
-    }
-}
-
-void BamRandomAccessController::Close(void) {
-    ClearIndex();
-    ClearRegion();
-}
-
-void BamRandomAccessController::ClearIndex(void) {
-    if ( m_index ) {
-        delete m_index;
-        m_index = 0;
-    }
-}
-
-void BamRandomAccessController::ClearRegion(void) {
-    m_region.clear();
-    m_hasAlignmentsInRegion = true;
-}
-
-bool BamRandomAccessController::CreateIndex(BamReaderPrivate* reader,
-                                            const BamIndex::IndexType& type)
-{
-    // skip if reader is invalid
-    assert(reader);
-    if ( !reader->IsOpen() ) {
-        SetErrorString("BamRandomAccessController::CreateIndex",
-                       "cannot create index for unopened reader");
-        return false;
-    }
-
-    // create new index of requested type
-    BamIndex* newIndex = BamIndexFactory::CreateIndexOfType(type, reader);
-    if ( newIndex == 0 ) {
-        stringstream s("");
-        s << "could not create index of type: " << type;
-        SetErrorString("BamRandomAccessController::CreateIndex", s.str());
-        return false;
-    }
-
-    // attempt to build index from current BamReader file
-    if ( !newIndex->Create() ) {
-        const string indexError = newIndex->GetErrorString();
-        const string message = "could not create index: \n\t" + indexError;
-        SetErrorString("BamRandomAccessController::CreateIndex", message);
-        return false;
-    }
-
-    // save new index & return success
-    SetIndex(newIndex);
-    return true;
-}
-
-string BamRandomAccessController::GetErrorString(void) const {
-    return m_errorString;
-}
-
-bool BamRandomAccessController::HasIndex(void) const {
-    return ( m_index != 0 );
-}
-
-bool BamRandomAccessController::HasRegion(void) const  {
-    return ( !m_region.isNull() );
-}
-
-bool BamRandomAccessController::IndexHasAlignmentsForReference(const int& refId) {
-    return m_index->HasAlignments(refId);
-}
-
-bool BamRandomAccessController::LocateIndex(BamReaderPrivate* reader,
-                                            const BamIndex::IndexType& preferredType)
-{
-    // look up index filename, deferring to preferredType if possible
-    assert(reader);
-    const string& indexFilename = BamIndexFactory::FindIndexFilename(reader->Filename(), preferredType);
-
-    // if no index file found (of any type)
-    if ( indexFilename.empty() ) {
-        const string message = string("could not find index file for:") + reader->Filename();
-        SetErrorString("BamRandomAccessController::LocateIndex", message);
-        return false;
-    }
-
-    // otherwise open & use index file that was found
-    return OpenIndex(indexFilename, reader);
-}
-
-bool BamRandomAccessController::OpenIndex(const string& indexFilename, BamReaderPrivate* reader) {
-
-    // attempt create new index of type based on filename
-    BamIndex* index = BamIndexFactory::CreateIndexFromFilename(indexFilename, reader);
-    if ( index == 0 ) {
-        const string message = string("could not open index file: ") + indexFilename;
-        SetErrorString("BamRandomAccessController::OpenIndex", message);
-        return false;
-    }
-
-    // attempt to load data from index file
-    if ( !index->Load(indexFilename) ) {
-        const string indexError = index->GetErrorString();
-        const string message = string("could not load index data from file: ") + indexFilename +
-                               "\n\t" + indexError;
-        SetErrorString("BamRandomAccessController::OpenIndex", message);
-        return false;
-    }
-
-    // save new index & return success
-    SetIndex(index);
-    return true;
-}
-
-bool BamRandomAccessController::RegionHasAlignments(void) const {
-    return m_hasAlignmentsInRegion;
-}
-
-void BamRandomAccessController::SetErrorString(const string& where, const string& what) {
-    m_errorString = where + ": " + what;
-}
-
-void BamRandomAccessController::SetIndex(BamIndex* index) {
-    if ( m_index )
-        ClearIndex();
-    m_index = index;
-}
-
-bool BamRandomAccessController::SetRegion(const BamRegion& region, const int& referenceCount) {
-
-    // store region
-    m_region = region;
-
-    // cannot jump when no index is available
-    if ( !HasIndex() ) {
-        SetErrorString("BamRandomAccessController", "cannot jump if no index data available");
-        return false;
-    }
-
-    // adjust region as necessary to reflect where data actually begins
-    AdjustRegion(referenceCount);
-
-    // if no data present, return true
-    //   * Not an error, but future attempts to access alignments in this region will not return data
-    //     Returning true is useful in a BamMultiReader setting where some BAM files may
-    //     lack alignments in regions where other files still have data available.
-    if ( !m_hasAlignmentsInRegion )
-        return true;
-
-    // return success/failure of jump to specified region,
-    //
-    //  * Index::Jump() is allowed to modify the m_hasAlignmentsInRegion flag
-    //    This covers 'corner case' where a region is requested that lies beyond the last
-    //    alignment on a reference. If this occurs, any subsequent calls to GetNextAlignment[Core]
-    //    will not return data. BamMultiReader will still be able to successfully pull alignments
-    //    from a region from other files even if this one has no data.
-    if ( !m_index->Jump(m_region, &m_hasAlignmentsInRegion) ) {
-        const string indexError = m_index->GetErrorString();
-        const string message = string("could not set region\n\t") + indexError;
-        SetErrorString("BamRandomAccessController::OpenIndex", message);
-        return false;
-    }
-    else
-        return true;
-}
diff --git a/src/api/internal/BamRandomAccessController_p.h b/src/api/internal/BamRandomAccessController_p.h
deleted file mode 100644 (file)
index 9262a61..0000000
+++ /dev/null
@@ -1,94 +0,0 @@
-// ***************************************************************************
-// BamRandomAccessController_p.h (c) 2011 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011(DB)
-// ---------------------------------------------------------------------------
-// Manages random access operations in a BAM file
-// ***************************************************************************
-
-#ifndef BAMRACONTROLLER_P_H
-#define BAMRACONTROLLER_P_H
-
-//  -------------
-//  W A R N I N G
-//  -------------
-//
-// This file is not part of the BamTools API.  It exists purely as an
-// implementation detail. This header file may change from version to version
-// without notice, or even be removed.
-//
-// We mean it.
-
-#include "api/BamAux.h"
-#include "api/BamIndex.h"
-
-namespace BamTools {
-
-class BamAlignment;
-
-namespace Internal {
-
-class BamReaderPrivate;
-
-class BamRandomAccessController {
-
-    // enums
-    public: enum RegionState { BeforeRegion = 0
-                             , OverlapsRegion
-                             , AfterRegion
-                             };
-
-    // ctor & dtor
-    public:
-        BamRandomAccessController(void);
-        ~BamRandomAccessController(void);
-
-    // BamRandomAccessController interface
-    public:
-
-        // index methods
-        void ClearIndex(void);
-        bool CreateIndex(BamReaderPrivate* reader, const BamIndex::IndexType& type);
-        bool HasIndex(void) const;
-        bool IndexHasAlignmentsForReference(const int& refId);
-        bool LocateIndex(BamReaderPrivate* reader, const BamIndex::IndexType& preferredType);
-        bool OpenIndex(const std::string& indexFilename, BamReaderPrivate* reader);
-        void SetIndex(BamIndex* index);
-
-        // region methods
-        void ClearRegion(void);
-        bool HasRegion(void) const;
-        RegionState AlignmentState(const BamAlignment& alignment) const;
-        bool RegionHasAlignments(void) const;
-        bool SetRegion(const BamRegion& region, const int& referenceCount);
-
-        // general methods
-        void Close(void);
-        std::string GetErrorString(void) const;
-
-    // internal methods
-    private:
-        // adjusts requested region if necessary (depending on where data actually begins)
-        void AdjustRegion(const int& referenceCount);
-        // error-string handling
-        void SetErrorString(const std::string& where, const std::string& what);
-
-    // data members
-    private:
-
-        // index data
-        BamIndex* m_index;  // owns the index, not a copy - responsible for deleting
-
-        // region data
-        BamRegion m_region;
-        bool m_hasAlignmentsInRegion;
-
-        // general data
-        std::string m_errorString;
-};
-
-} // namespace Internal
-} // namespace BamTools
-
-#endif // BAMRACONTROLLER_P_H
diff --git a/src/api/internal/BamReader_p.cpp b/src/api/internal/BamReader_p.cpp
deleted file mode 100644 (file)
index dc6792f..0000000
+++ /dev/null
@@ -1,466 +0,0 @@
-// ***************************************************************************
-// BamReader_p.cpp (c) 2009 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides the basic functionality for reading BAM files
-// ***************************************************************************
-
-#include "api/BamConstants.h"
-#include "api/BamReader.h"
-#include "api/IBamIODevice.h"
-#include "api/internal/BamDeviceFactory_p.h"
-#include "api/internal/BamException_p.h"
-#include "api/internal/BamHeader_p.h"
-#include "api/internal/BamRandomAccessController_p.h"
-#include "api/internal/BamReader_p.h"
-#include "api/internal/BamStandardIndex_p.h"
-#include "api/internal/BamToolsIndex_p.h"
-using namespace BamTools;
-using namespace BamTools::Internal;
-
-#include <algorithm>
-#include <cassert>
-#include <iostream>
-#include <iterator>
-#include <vector>
-using namespace std;
-
-// constructor
-BamReaderPrivate::BamReaderPrivate(BamReader* parent)
-    : m_alignmentsBeginOffset(0)
-    , m_parent(parent)
-{
-    m_isBigEndian = BamTools::SystemIsBigEndian();
-}
-
-// destructor
-BamReaderPrivate::~BamReaderPrivate(void) {
-    Close();
-}
-
-// closes the BAM file
-bool BamReaderPrivate::Close(void) {
-
-    // clear BAM metadata
-    m_references.clear();
-    m_header.Clear();
-
-    // clear filename
-    m_filename.clear();
-
-    // close random access controller
-    m_randomAccessController.Close();
-
-    // if stream is open, attempt close
-    if ( IsOpen() ) {
-        try {
-            m_stream.Close();
-        } catch ( BamException& e ) {
-            const string streamError = e.what();
-            const string message = string("encountered error closing BAM file: \n\t") + streamError;
-            SetErrorString("BamReader::Close", message);
-            return false;
-        }
-    }
-
-    // return success
-    return true;
-}
-
-// creates an index file of requested type on current BAM file
-bool BamReaderPrivate::CreateIndex(const BamIndex::IndexType& type) {
-
-    // skip if BAM file not open
-    if ( !IsOpen() ) {
-        SetErrorString("BamReader::CreateIndex", "cannot create index on unopened BAM file");
-        return false;
-    }
-
-    // attempt to create index
-    if ( m_randomAccessController.CreateIndex(this, type) )
-        return true;
-    else {
-        const string bracError = m_randomAccessController.GetErrorString();
-        const string message = string("could not create index: \n\t") + bracError;
-        SetErrorString("BamReader::CreateIndex", message);
-        return false;
-    }
-}
-
-// return path & filename of current BAM file
-const string BamReaderPrivate::Filename(void) const {
-    return m_filename;
-}
-
-string BamReaderPrivate::GetErrorString(void) const {
-    return m_errorString;
-}
-
-// return header data as std::string
-string BamReaderPrivate::GetHeaderText(void) const {
-    return m_header.ToString();
-}
-
-// return header data as SamHeader object
-SamHeader BamReaderPrivate::GetSamHeader(void) const {
-    return m_header.ToSamHeader();
-}
-
-// get next alignment (with character data fully parsed)
-bool BamReaderPrivate::GetNextAlignment(BamAlignment& alignment) {
-
-    // if valid alignment found
-    if ( GetNextAlignmentCore(alignment) ) {
-
-        // store alignment's "source" filename
-        alignment.Filename = m_filename;
-
-        // return success/failure of parsing char data
-        if ( alignment.BuildCharData() )
-            return true;
-        else {
-            const string alError = alignment.GetErrorString();
-            const string message = string("could not populate alignment data: \n\t") + alError;
-            SetErrorString("BamReader::GetNextAlignment", message);
-            return false;
-        }
-    }
-
-    // no valid alignment found
-    return false;
-}
-
-// retrieves next available alignment core data (returns success/fail)
-// ** DOES NOT populate any character data fields (read name, bases, qualities, tag data, filename)
-//    these can be accessed, if necessary, from the supportData
-// useful for operations requiring ONLY positional or other alignment-related information
-bool BamReaderPrivate::GetNextAlignmentCore(BamAlignment& alignment) {
-
-    // skip if stream not opened
-    if ( !m_stream.IsOpen() )
-        return false;
-
-    try {
-
-        // skip if region is set but has no alignments
-        if ( m_randomAccessController.HasRegion() &&
-             !m_randomAccessController.RegionHasAlignments() )
-        {
-            return false;
-        }
-
-        // if can't read next alignment
-        if ( !LoadNextAlignment(alignment) )
-            return false;
-
-        // check alignment's region-overlap state
-        BamRandomAccessController::RegionState state = m_randomAccessController.AlignmentState(alignment);
-
-        // if alignment starts after region, no need to keep reading
-        if ( state == BamRandomAccessController::AfterRegion )
-            return false;
-
-        // read until overlap is found
-        while ( state != BamRandomAccessController::OverlapsRegion ) {
-
-            // if can't read next alignment
-            if ( !LoadNextAlignment(alignment) )
-                return false;
-
-            // check alignment's region-overlap state
-            state = m_randomAccessController.AlignmentState(alignment);
-
-            // if alignment starts after region, no need to keep reading
-            if ( state == BamRandomAccessController::AfterRegion )
-                return false;
-        }
-
-        // if we get here, we found the next 'valid' alignment
-        // (e.g. overlaps current region if one was set, simply the next alignment if not)
-        alignment.SupportData.HasCoreOnly = true;
-        return true;
-
-    } catch ( BamException& e ) {
-        const string streamError = e.what();
-        const string message = string("encountered error reading BAM alignment: \n\t") + streamError;
-        SetErrorString("BamReader::GetNextAlignmentCore", message);
-        return false;
-    }
-}
-
-int BamReaderPrivate::GetReferenceCount(void) const {
-    return m_references.size();
-}
-
-const RefVector& BamReaderPrivate::GetReferenceData(void) const {
-    return m_references;
-}
-
-// returns RefID for given RefName (returns References.size() if not found)
-int BamReaderPrivate::GetReferenceID(const string& refName) const {
-
-    // retrieve names from reference data
-    vector<string> refNames;
-    RefVector::const_iterator refIter = m_references.begin();
-    RefVector::const_iterator refEnd  = m_references.end();
-    for ( ; refIter != refEnd; ++refIter)
-        refNames.push_back( (*refIter).RefName );
-
-    // return 'index-of' refName (or -1 if not found)
-    int index = distance(refNames.begin(), find(refNames.begin(), refNames.end(), refName));
-    if ( index == (int)m_references.size() ) return -1;
-    else return index;
-}
-
-bool BamReaderPrivate::HasIndex(void) const {
-    return m_randomAccessController.HasIndex();
-}
-
-bool BamReaderPrivate::IsOpen(void) const {
-    return m_stream.IsOpen();
-}
-
-// load BAM header data
-void BamReaderPrivate::LoadHeaderData(void) {
-    m_header.Load(&m_stream);
-}
-
-// populates BamAlignment with alignment data under file pointer, returns success/fail
-bool BamReaderPrivate::LoadNextAlignment(BamAlignment& alignment) {
-
-    // read in the 'block length' value, make sure it's not zero
-    char buffer[sizeof(uint32_t)];
-    m_stream.Read(buffer, sizeof(uint32_t));
-    alignment.SupportData.BlockLength = BamTools::UnpackUnsignedInt(buffer);
-    if ( m_isBigEndian ) BamTools::SwapEndian_32(alignment.SupportData.BlockLength);
-    if ( alignment.SupportData.BlockLength == 0 )
-        return false;
-
-    // read in core alignment data, make sure the right size of data was read
-    char x[Constants::BAM_CORE_SIZE];
-    if ( m_stream.Read(x, Constants::BAM_CORE_SIZE) != Constants::BAM_CORE_SIZE )
-        return false;
-
-    // swap core endian-ness if necessary
-    if ( m_isBigEndian ) {
-        for ( unsigned int i = 0; i < Constants::BAM_CORE_SIZE; i+=sizeof(uint32_t) )
-            BamTools::SwapEndian_32p(&x[i]);
-    }
-
-    // set BamAlignment 'core' and 'support' data
-    alignment.RefID    = BamTools::UnpackSignedInt(&x[0]);
-    alignment.Position = BamTools::UnpackSignedInt(&x[4]);
-
-    unsigned int tempValue = BamTools::UnpackUnsignedInt(&x[8]);
-    alignment.Bin        = tempValue >> 16;
-    alignment.MapQuality = tempValue >> 8 & 0xff;
-    alignment.SupportData.QueryNameLength = tempValue & 0xff;
-
-    tempValue = BamTools::UnpackUnsignedInt(&x[12]);
-    alignment.AlignmentFlag = tempValue >> 16;
-    alignment.SupportData.NumCigarOperations = tempValue & 0xffff;
-
-    alignment.SupportData.QuerySequenceLength = BamTools::UnpackUnsignedInt(&x[16]);
-    alignment.MateRefID    = BamTools::UnpackSignedInt(&x[20]);
-    alignment.MatePosition = BamTools::UnpackSignedInt(&x[24]);
-    alignment.InsertSize   = BamTools::UnpackSignedInt(&x[28]);
-
-    // set BamAlignment length
-    alignment.Length = alignment.SupportData.QuerySequenceLength;
-
-    // read in character data - make sure proper data size was read
-    bool readCharDataOK = false;
-    const unsigned int dataLength = alignment.SupportData.BlockLength - Constants::BAM_CORE_SIZE;
-    RaiiBuffer allCharData(dataLength);
-
-    if ( m_stream.Read(allCharData.Buffer, dataLength) == dataLength ) {
-
-        // store 'allCharData' in supportData structure
-        alignment.SupportData.AllCharData.assign((const char*)allCharData.Buffer, dataLength);
-
-        // set success flag
-        readCharDataOK = true;
-
-        // save CIGAR ops
-        // need to calculate this here so that  BamAlignment::GetEndPosition() performs correctly,
-        // even when GetNextAlignmentCore() is called
-        const unsigned int cigarDataOffset = alignment.SupportData.QueryNameLength;
-        uint32_t* cigarData = (uint32_t*)(allCharData.Buffer + cigarDataOffset);
-        CigarOp op;
-        alignment.CigarData.clear();
-        alignment.CigarData.reserve(alignment.SupportData.NumCigarOperations);
-        for ( unsigned int i = 0; i < alignment.SupportData.NumCigarOperations; ++i ) {
-
-            // swap endian-ness if necessary
-            if ( m_isBigEndian ) BamTools::SwapEndian_32(cigarData[i]);
-
-            // build CigarOp structure
-            op.Length = (cigarData[i] >> Constants::BAM_CIGAR_SHIFT);
-            op.Type   = Constants::BAM_CIGAR_LOOKUP[ (cigarData[i] & Constants::BAM_CIGAR_MASK) ];
-
-            // save CigarOp
-            alignment.CigarData.push_back(op);
-        }
-    }
-
-    // return success/failure
-    return readCharDataOK;
-}
-
-// loads reference data from BAM file
-bool BamReaderPrivate::LoadReferenceData(void) {
-
-    // get number of reference sequences
-    char buffer[sizeof(uint32_t)];
-    m_stream.Read(buffer, sizeof(uint32_t));
-    uint32_t numberRefSeqs = BamTools::UnpackUnsignedInt(buffer);
-    if ( m_isBigEndian ) BamTools::SwapEndian_32(numberRefSeqs);
-    m_references.reserve((int)numberRefSeqs);
-
-    // iterate over all references in header
-    for ( unsigned int i = 0; i != numberRefSeqs; ++i ) {
-
-        // get length of reference name
-        m_stream.Read(buffer, sizeof(uint32_t));
-        uint32_t refNameLength = BamTools::UnpackUnsignedInt(buffer);
-        if ( m_isBigEndian ) BamTools::SwapEndian_32(refNameLength);
-        RaiiBuffer refName(refNameLength);
-
-        // get reference name and reference sequence length
-        m_stream.Read(refName.Buffer, refNameLength);
-        m_stream.Read(buffer, sizeof(int32_t));
-        int32_t refLength = BamTools::UnpackSignedInt(buffer);
-        if ( m_isBigEndian ) BamTools::SwapEndian_32(refLength);
-
-        // store data for reference
-        RefData aReference;
-        aReference.RefName   = (string)((const char*)refName.Buffer);
-        aReference.RefLength = refLength;
-        m_references.push_back(aReference);
-    }
-
-    // return success
-    return true;
-}
-
-bool BamReaderPrivate::LocateIndex(const BamIndex::IndexType& preferredType) {
-
-    if ( m_randomAccessController.LocateIndex(this, preferredType) )
-        return true;
-    else {
-        const string bracError = m_randomAccessController.GetErrorString();
-        const string message = string("could not locate index: \n\t") + bracError;
-        SetErrorString("BamReader::LocateIndex", message);
-        return false;
-    }
-}
-
-// opens BAM file (and index)
-bool BamReaderPrivate::Open(const string& filename) {
-
-    try {
-
-        // make sure we're starting with fresh state
-        Close();
-
-        // open BgzfStream
-        m_stream.Open(filename, IBamIODevice::ReadOnly);
-        assert(m_stream);
-
-        // load BAM metadata
-        LoadHeaderData();
-        LoadReferenceData();
-
-        // store filename & offset of first alignment
-        m_filename = filename;
-        m_alignmentsBeginOffset = m_stream.Tell();
-
-        // return success
-        return true;
-
-    } catch ( BamException& e ) {
-        const string error = e.what();
-        const string message = string("could not open file: ") + filename +
-                               "\n\t" + error;
-        SetErrorString("BamReader::Open", message);
-        return false;
-    }
-}
-
-bool BamReaderPrivate::OpenIndex(const std::string& indexFilename) {
-
-    if ( m_randomAccessController.OpenIndex(indexFilename, this) )
-        return true;
-    else {
-        const string bracError = m_randomAccessController.GetErrorString();
-        const string message = string("could not open index: \n\t") + bracError;
-        SetErrorString("BamReader::OpenIndex", message);
-        return false;
-    }
-}
-
-// returns BAM file pointer to beginning of alignment data
-bool BamReaderPrivate::Rewind(void) {
-
-    // reset region
-    m_randomAccessController.ClearRegion();
-
-    // return status of seeking back to first alignment
-    if ( Seek(m_alignmentsBeginOffset) )
-        return true;
-    else {
-        const string currentError = m_errorString;
-        const string message = string("could not rewind: \n\t") + currentError;
-        SetErrorString("BamReader::Rewind", message);
-        return false;
-    }
-}
-
-bool BamReaderPrivate::Seek(const int64_t& position) {
-
-    // skip if BAM file not open
-    if ( !IsOpen() ) {
-        SetErrorString("BamReader::Seek", "cannot seek on unopened BAM file");
-        return false;
-    }
-
-    try {
-        m_stream.Seek(position);
-        return true;
-    }
-    catch ( BamException& e ) {
-        const string streamError = e.what();
-        const string message = string("could not seek in BAM file: \n\t") + streamError;
-        SetErrorString("BamReader::Seek", message);
-        return false;
-    }
-}
-
-void BamReaderPrivate::SetErrorString(const string& where, const string& what) {
-    static const string SEPARATOR = ": ";
-    m_errorString = where + SEPARATOR + what;
-}
-
-void BamReaderPrivate::SetIndex(BamIndex* index) {
-    m_randomAccessController.SetIndex(index);
-}
-
-// sets current region & attempts to jump to it
-// returns success/failure
-bool BamReaderPrivate::SetRegion(const BamRegion& region) {
-
-    if ( m_randomAccessController.SetRegion(region, m_references.size()) )
-        return true;
-    else {
-        const string bracError = m_randomAccessController.GetErrorString();
-        const string message = string("could not set region: \n\t") + bracError;
-        SetErrorString("BamReader::SetRegion", message);
-        return false;
-    }
-}
-
-int64_t BamReaderPrivate::Tell(void) const {
-    return m_stream.Tell();
-}
diff --git a/src/api/internal/BamReader_p.h b/src/api/internal/BamReader_p.h
deleted file mode 100644 (file)
index f928273..0000000
+++ /dev/null
@@ -1,118 +0,0 @@
-// ***************************************************************************
-// BamReader_p.h (c) 2010 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides the basic functionality for reading BAM files
-// ***************************************************************************
-
-#ifndef BAMREADER_P_H
-#define BAMREADER_P_H
-
-//  -------------
-//  W A R N I N G
-//  -------------
-//
-// This file is not part of the BamTools API.  It exists purely as an
-// implementation detail. This header file may change from version to version
-// without notice, or even be removed.
-//
-// We mean it.
-
-#include "api/BamAlignment.h"
-#include "api/BamIndex.h"
-#include "api/BamReader.h"
-#include "api/SamHeader.h"
-#include "api/internal/BamHeader_p.h"
-#include "api/internal/BamRandomAccessController_p.h"
-#include "api/internal/BgzfStream_p.h"
-#include <string>
-
-namespace BamTools {
-namespace Internal {
-
-class BamReaderPrivate {
-
-    // ctor & dtor
-    public:
-        BamReaderPrivate(BamReader* parent);
-        ~BamReaderPrivate(void);
-
-    // BamReader interface
-    public:
-
-        // file operations
-        bool Close(void);
-        const std::string Filename(void) const;
-        bool IsOpen(void) const;
-        bool Open(const std::string& filename);
-        bool Rewind(void);
-        bool SetRegion(const BamRegion& region);
-
-        // access alignment data
-        bool GetNextAlignment(BamAlignment& alignment);
-        bool GetNextAlignmentCore(BamAlignment& alignment);
-
-        // access auxiliary data
-        std::string GetHeaderText(void) const;
-        SamHeader GetSamHeader(void) const;
-        int GetReferenceCount(void) const;
-        const RefVector& GetReferenceData(void) const;
-        int GetReferenceID(const std::string& refName) const;
-
-        // index operations
-        bool CreateIndex(const BamIndex::IndexType& type);
-        bool HasIndex(void) const;
-        bool LocateIndex(const BamIndex::IndexType& preferredType);
-        bool OpenIndex(const std::string& indexFilename);
-        void SetIndex(BamIndex* index);
-
-        // error handling
-        std::string GetErrorString(void) const;
-        void SetErrorString(const std::string& where, const std::string& what);
-
-    // internal methods, but available as a BamReaderPrivate 'interface'
-    //
-    // these methods should only be used by BamTools::Internal classes
-    // (currently only used by the BamIndex subclasses)
-    public:
-        // retrieves header text from BAM file
-        void LoadHeaderData(void);
-        // retrieves BAM alignment under file pointer
-        // (does no overlap checking or character data parsing)
-        bool LoadNextAlignment(BamAlignment& alignment);
-        // builds reference data structure from BAM file
-        bool LoadReferenceData(void);
-        // seek reader to file position
-        bool Seek(const int64_t& position);
-        // return reader's file position
-        int64_t Tell(void) const;
-
-    // data members
-    public:
-
-        // general BAM file data
-        int64_t     m_alignmentsBeginOffset;
-        std::string m_filename;
-        RefVector   m_references;
-
-        // system data
-        bool m_isBigEndian;
-
-        // parent BamReader
-        BamReader* m_parent;
-
-        // BamReaderPrivate components
-        BamHeader m_header;
-        BamRandomAccessController m_randomAccessController;
-        BgzfStream m_stream;
-
-        // error handling
-        std::string m_errorString;
-};
-
-} // namespace Internal
-} // namespace BamTools
-
-#endif // BAMREADER_P_H
diff --git a/src/api/internal/BamStandardIndex_p.cpp b/src/api/internal/BamStandardIndex_p.cpp
deleted file mode 100644 (file)
index 8b23f74..0000000
+++ /dev/null
@@ -1,954 +0,0 @@
-// ***************************************************************************
-// BamStandardIndex.cpp (c) 2010 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides index operations for the standardized BAM index format (".bai")
-// ***************************************************************************
-
-#include "api/BamAlignment.h"
-#include "api/internal/BamException_p.h"
-#include "api/internal/BamReader_p.h"
-#include "api/internal/BamStandardIndex_p.h"
-using namespace BamTools;
-using namespace BamTools::Internal;
-
-#include <cstdio>
-#include <cstdlib>
-#include <cstring>
-#include <algorithm>
-#include <sstream>
-using namespace std;
-
-// -----------------------------------
-// static BamStandardIndex constants
-// -----------------------------------
-
-const int BamStandardIndex::MAX_BIN               = 37450;  // =(8^6-1)/7+1
-const int BamStandardIndex::BAM_LIDX_SHIFT        = 14;
-const string BamStandardIndex::BAI_EXTENSION      = ".bai";
-const char* const BamStandardIndex::BAI_MAGIC     = "BAI\1";
-const int BamStandardIndex::SIZEOF_ALIGNMENTCHUNK = sizeof(uint64_t)*2;
-const int BamStandardIndex::SIZEOF_BINCORE        = sizeof(uint32_t) + sizeof(int32_t);
-const int BamStandardIndex::SIZEOF_LINEAROFFSET   = sizeof(uint64_t);
-
-// ----------------------------
-// RaiiWrapper implementation
-// ----------------------------
-
-BamStandardIndex::RaiiWrapper::RaiiWrapper(void)
-    : IndexStream(0)
-    , Buffer(0)
-{ }
-
-BamStandardIndex::RaiiWrapper::~RaiiWrapper(void) {
-
-    if ( IndexStream ) {
-        fclose(IndexStream);
-        IndexStream = 0;
-    }
-
-    if ( Buffer ) {
-        delete[] Buffer;
-        Buffer = 0;
-    }
-}
-
-// ---------------------------------
-// BamStandardIndex implementation
-// ---------------------------------
-
-// ctor
-BamStandardIndex::BamStandardIndex(Internal::BamReaderPrivate* reader)
-    : BamIndex(reader)
-    , m_bufferLength(0)
-{
-     m_isBigEndian = BamTools::SystemIsBigEndian();
-}
-
-// dtor
-BamStandardIndex::~BamStandardIndex(void) {
-    CloseFile();
-}
-
-void BamStandardIndex::AdjustRegion(const BamRegion& region, uint32_t& begin, uint32_t& end) {
-
-    // retrieve references from reader
-    const RefVector& references = m_reader->GetReferenceData();
-
-    // LeftPosition cannot be greater than or equal to reference length
-    if ( region.LeftPosition >= references.at(region.LeftRefID).RefLength )
-        throw BamException("BamStandardIndex::AdjustRegion", "invalid region requested");
-
-    // set region 'begin'
-    begin = (unsigned int)region.LeftPosition;
-
-    // if right bound specified AND left&right bounds are on same reference
-    // OK to use right bound position as region 'end'
-    if ( region.isRightBoundSpecified() && ( region.LeftRefID == region.RightRefID ) )
-        end = (unsigned int)region.RightPosition;
-
-    // otherwise, set region 'end' to last reference base
-    else end = (unsigned int)references.at(region.LeftRefID).RefLength;
-}
-
-// [begin, end)
-void BamStandardIndex::CalculateCandidateBins(const uint32_t& begin,
-                                              const uint32_t& end,
-                                              set<uint16_t>& candidateBins)
-{
-    // initialize list, bin '0' is always a valid bin
-    candidateBins.insert(0);
-
-    // get rest of bins that contain this region
-    unsigned int k;
-    for (k =    1 + (begin>>26); k <=    1 + (end>>26); ++k) { candidateBins.insert(k); }
-    for (k =    9 + (begin>>23); k <=    9 + (end>>23); ++k) { candidateBins.insert(k); }
-    for (k =   73 + (begin>>20); k <=   73 + (end>>20); ++k) { candidateBins.insert(k); }
-    for (k =  585 + (begin>>17); k <=  585 + (end>>17); ++k) { candidateBins.insert(k); }
-    for (k = 4681 + (begin>>14); k <= 4681 + (end>>14); ++k) { candidateBins.insert(k); }
-}
-
-void BamStandardIndex::CalculateCandidateOffsets(const BaiReferenceSummary& refSummary,
-                                                 const uint64_t& minOffset,
-                                                 set<uint16_t>& candidateBins,
-                                                 vector<int64_t>& offsets)
-{
-    // seek to first bin
-    Seek(refSummary.FirstBinFilePosition, SEEK_SET);
-
-    // iterate over reference bins
-    uint32_t binId;
-    int32_t numAlignmentChunks;
-    set<uint16_t>::iterator candidateBinIter;
-    for ( int i = 0; i < refSummary.NumBins; ++i ) {
-
-        // read bin contents (if successful, alignment chunks are now in m_buffer)
-        ReadBinIntoBuffer(binId, numAlignmentChunks);
-
-        // see if bin is a 'candidate bin'
-        candidateBinIter = candidateBins.find(binId);
-
-        // if not, move on to next bin
-        if ( candidateBinIter == candidateBins.end() )
-            continue;
-
-        // otherwise, check bin's contents against for overlap
-        else {
-
-            size_t offset = 0;
-            uint64_t chunkStart;
-            uint64_t chunkStop;
-
-            // iterate over alignment chunks
-            for ( int j = 0; j < numAlignmentChunks; ++j ) {
-
-                // read chunk start & stop from buffer
-                memcpy((char*)&chunkStart, Resources.Buffer+offset, sizeof(uint64_t));
-                offset += sizeof(uint64_t);
-                memcpy((char*)&chunkStop, Resources.Buffer+offset, sizeof(uint64_t));
-                offset += sizeof(uint64_t);
-
-                // swap endian-ness if necessary
-                if ( m_isBigEndian ) {
-                    SwapEndian_64(chunkStart);
-                    SwapEndian_64(chunkStop);
-                }
-
-                // store alignment chunk's start offset
-                // if its stop offset is larger than our 'minOffset'
-                if ( chunkStop >= minOffset )
-                    offsets.push_back(chunkStart);
-            }
-
-            // 'pop' bin ID from candidate bins set
-            candidateBins.erase(candidateBinIter);
-
-            // quit if no more candidates
-            if ( candidateBins.empty() )
-                break;
-        }
-    }
-}
-
-uint64_t BamStandardIndex::CalculateMinOffset(const BaiReferenceSummary& refSummary,
-                                              const uint32_t& begin)
-{
-    // if no linear offsets exist, return 0
-    if ( refSummary.NumLinearOffsets == 0 )
-        return 0;
-
-    // if 'begin' starts beyond last linear offset, use the last linear offset as minimum
-    // else use the offset corresponding to the requested start position
-    const int shiftedBegin = begin>>BamStandardIndex::BAM_LIDX_SHIFT;
-    if ( shiftedBegin >= refSummary.NumLinearOffsets )
-        return LookupLinearOffset( refSummary, refSummary.NumLinearOffsets-1 );
-    else
-        return LookupLinearOffset( refSummary, shiftedBegin );
-}
-
-void BamStandardIndex::CheckBufferSize(char*& buffer,
-                                       unsigned int& bufferLength,
-                                       const unsigned int& requestedBytes)
-{
-    try {
-        if ( requestedBytes > bufferLength ) {
-            bufferLength = requestedBytes + 10;
-            delete[] buffer;
-            buffer = new char[bufferLength];
-        }
-    } catch ( std::bad_alloc&  ) {
-        stringstream s("");
-        s << "out of memory when allocating " << requestedBytes << " bytes";
-        throw BamException("BamStandardIndex::CheckBufferSize", s.str());
-    }
-}
-
-void BamStandardIndex::CheckBufferSize(unsigned char*& buffer,
-                                       unsigned int& bufferLength,
-                                       const unsigned int& requestedBytes)
-{
-    try {
-        if ( requestedBytes > bufferLength ) {
-            bufferLength = requestedBytes + 10;
-            delete[] buffer;
-            buffer = new unsigned char[bufferLength];
-        }
-    } catch ( std::bad_alloc& ) {
-        stringstream s("");
-        s << "out of memory when allocating " << requestedBytes << " bytes";
-        throw BamException("BamStandardIndex::CheckBufferSize", s.str());
-    }
-}
-
-void BamStandardIndex::CheckMagicNumber(void) {
-
-    // check 'magic number' to see if file is BAI index
-    char magic[4];
-    const size_t elementsRead = fread(magic, sizeof(char), 4, Resources.IndexStream);
-    if ( elementsRead != 4 )
-        throw BamException("BamStandardIndex::CheckMagicNumber", "could not read BAI magic number");
-
-    // compare to expected value
-    if ( strncmp(magic, BamStandardIndex::BAI_MAGIC, 4) != 0 )
-        throw BamException("BamStandardIndex::CheckMagicNumber", "invalid BAI magic number");
-}
-
-void BamStandardIndex::ClearReferenceEntry(BaiReferenceEntry& refEntry) {
-    refEntry.ID = -1;
-    refEntry.Bins.clear();
-    refEntry.LinearOffsets.clear();
-}
-
-void BamStandardIndex::CloseFile(void) {
-
-    // close file stream
-    if ( IsFileOpen() ) {
-        fclose(Resources.IndexStream);
-        Resources.IndexStream = 0;
-    }
-
-    // clear index file summary data
-    m_indexFileSummary.clear();
-
-    // clean up I/O buffer
-    delete[] Resources.Buffer;
-    Resources.Buffer = 0;
-    m_bufferLength = 0;
-}
-
-// builds index from associated BAM file & writes out to index file
-bool BamStandardIndex::Create(void) {
-
-    // skip if BamReader is invalid or not open
-    if ( m_reader == 0 || !m_reader->IsOpen() ) {
-        SetErrorString("BamStandardIndex::Create", "could not create index: reader is not open");
-        return false;
-    }
-
-    // rewind BamReader
-    if ( !m_reader->Rewind() ) {
-        const string readerError = m_reader->GetErrorString();
-        const string message = "could not create index: \n\t" + readerError;
-        SetErrorString("BamStandardIndex::Create", message);
-        return false;
-    }
-
-    try {
-
-        // open new index file (read & write)
-        string indexFilename = m_reader->Filename() + Extension();
-        OpenFile(indexFilename, "w+b");
-
-        // initialize BaiFileSummary with number of references
-        const int& numReferences = m_reader->GetReferenceCount();
-        ReserveForSummary(numReferences);
-
-        // initialize output file
-        WriteHeader();
-
-        // set up bin, ID, offset, & coordinate markers
-        const uint32_t defaultValue = 0xffffffffu;
-        uint32_t currentBin    = defaultValue;
-        uint32_t lastBin       = defaultValue;
-        int32_t  currentRefID  = defaultValue;
-        int32_t  lastRefID     = defaultValue;
-        uint64_t currentOffset = (uint64_t)m_reader->Tell();
-        uint64_t lastOffset    = currentOffset;
-        int32_t  lastPosition  = defaultValue;
-
-        // iterate through alignments in BAM file
-        BamAlignment al;
-        BaiReferenceEntry refEntry;
-        while ( m_reader->LoadNextAlignment(al) ) {
-
-            // changed to new reference
-            if ( lastRefID != al.RefID ) {
-
-                // if not first reference, save previous reference data
-                if ( lastRefID != (int32_t)defaultValue ) {
-
-                    SaveAlignmentChunkToBin(refEntry.Bins, currentBin, currentOffset, lastOffset);
-                    WriteReferenceEntry(refEntry);
-                    ClearReferenceEntry(refEntry);
-
-                    // write any empty references between (but *NOT* including) lastRefID & al.RefID
-                    for ( int i = lastRefID+1; i < al.RefID; ++i ) {
-                        BaiReferenceEntry emptyEntry(i);
-                        WriteReferenceEntry(emptyEntry);
-                    }
-
-                    // update bin markers
-                    currentOffset = lastOffset;
-                    currentBin    = al.Bin;
-                    lastBin       = al.Bin;
-                    currentRefID  = al.RefID;
-                }
-
-                // otherwise, this is first pass
-                // be sure to write any empty references up to (but *NOT* including) current RefID
-                else {
-                    for ( int i = 0; i < al.RefID; ++i ) {
-                        BaiReferenceEntry emptyEntry(i);
-                        WriteReferenceEntry(emptyEntry);
-                    }
-                }
-
-                // update reference markers
-                refEntry.ID = al.RefID;
-                lastRefID   = al.RefID;
-                lastBin     = defaultValue;
-            }
-
-            // if lastPosition greater than current alignment position - file not sorted properly
-            else if ( lastPosition > al.Position ) {
-                stringstream s("");
-                s << "BAM file is not properly sorted by coordinate" << endl
-                  << "Current alignment position: " << al.Position
-                  << " < previous alignment position: " << lastPosition
-                  << " on reference ID: " << al.RefID << endl;
-                SetErrorString("BamStandardIndex::Create", s.str());
-                return false;
-            }
-
-            // if alignment's ref ID is valid & its bin is not a 'leaf'
-            if ( (al.RefID >= 0) && (al.Bin < 4681) )
-                SaveLinearOffsetEntry(refEntry.LinearOffsets, al.Position, al.GetEndPosition(), lastOffset);
-
-            // changed to new BAI bin
-            if ( al.Bin != lastBin ) {
-
-                // if not first bin on reference, save previous bin data
-                if ( currentBin != defaultValue )
-                    SaveAlignmentChunkToBin(refEntry.Bins, currentBin, currentOffset, lastOffset);
-
-                // update markers
-                currentOffset = lastOffset;
-                currentBin    = al.Bin;
-                lastBin       = al.Bin;
-                currentRefID  = al.RefID;
-
-                // if invalid RefID, break out
-                if ( currentRefID < 0 )
-                    break;
-            }
-
-            // make sure that current file pointer is beyond lastOffset
-            if ( m_reader->Tell() <= (int64_t)lastOffset ) {
-                SetErrorString("BamStandardIndex::Create", "calculating offsets failed");
-                return false;
-            }
-
-            // update lastOffset & lastPosition
-            lastOffset   = m_reader->Tell();
-            lastPosition = al.Position;
-        }
-
-        // after finishing alignments, if any data was read, check:
-        if ( currentRefID >= 0 ) {
-
-            // store last alignment chunk to its bin, then write last reference entry with data
-            SaveAlignmentChunkToBin(refEntry.Bins, currentBin, currentOffset, lastOffset);
-            WriteReferenceEntry(refEntry);
-
-            // then write any empty references remaining at end of file
-            for ( int i = currentRefID+1; i < numReferences; ++i ) {
-                BaiReferenceEntry emptyEntry(i);
-                WriteReferenceEntry(emptyEntry);
-            }
-        }
-
-    } catch ( BamException& e) {
-        m_errorString = e.what();
-        return false;
-    }
-
-    // rewind BamReader
-    if ( !m_reader->Rewind() ) {
-        const string readerError = m_reader->GetErrorString();
-        const string message = "could not create index: \n\t" + readerError;
-        SetErrorString("BamStandardIndex::Create", message);
-        return false;
-    }
-
-    // return success
-    return true;
-}
-
-// returns format's file extension
-const string BamStandardIndex::Extension(void) {
-    return BamStandardIndex::BAI_EXTENSION;
-}
-
-void BamStandardIndex::GetOffset(const BamRegion& region, int64_t& offset, bool* hasAlignmentsInRegion) {
-
-    // cannot calculate offsets if unknown/invalid reference ID requested
-    if ( region.LeftRefID < 0 || region.LeftRefID >= (int)m_indexFileSummary.size() )
-        throw BamException("BamStandardIndex::GetOffset", "invalid reference ID requested");
-
-    // retrieve index summary for left bound reference
-    const BaiReferenceSummary& refSummary = m_indexFileSummary.at(region.LeftRefID);
-
-    // set up region boundaries based on actual BamReader data
-    uint32_t begin;
-    uint32_t end;
-    AdjustRegion(region, begin, end);
-
-    // retrieve all candidate bin IDs for region
-    set<uint16_t> candidateBins;
-    CalculateCandidateBins(begin, end, candidateBins);
-
-    // use reference's linear offsets to calculate the minimum offset
-    // that must be considered to find overlap
-    const uint64_t& minOffset = CalculateMinOffset(refSummary, begin);
-
-    // attempt to use reference summary, minOffset, & candidateBins to calculate offsets
-    // no data should not be error, just bail
-    vector<int64_t> offsets;
-    CalculateCandidateOffsets(refSummary, minOffset, candidateBins, offsets);
-    if ( offsets.empty() )
-        return;
-    
-    // ensure that offsets are sorted before processing
-    sort( offsets.begin(), offsets.end() );
-
-    // binary search for an overlapping block (may not be first one though)
-    BamAlignment al;
-    typedef vector<int64_t>::const_iterator OffsetConstIterator;
-    OffsetConstIterator offsetFirst = offsets.begin();
-    OffsetConstIterator offsetIter  = offsetFirst;
-    OffsetConstIterator offsetLast  = offsets.end();
-    iterator_traits<OffsetConstIterator>::difference_type count = distance(offsetFirst, offsetLast);
-    iterator_traits<OffsetConstIterator>::difference_type step;
-    while ( count > 0 ) {
-        offsetIter = offsetFirst;
-        step = count/2;
-        advance(offsetIter, step);
-
-        // attempt seek to candidate offset
-        const int64_t& candidateOffset = (*offsetIter);
-        if ( !m_reader->Seek(candidateOffset) ) {
-            const string readerError = m_reader->GetErrorString();
-            const string message = "could not seek in BAM file: \n\t" + readerError;
-            throw BamException("BamToolsIndex::GetOffset", message);
-        }
-
-        // load first available alignment, setting flag to true if data exists
-        *hasAlignmentsInRegion = m_reader->LoadNextAlignment(al);
-
-        // check alignment against region
-        if ( al.GetEndPosition() <= region.LeftPosition ) {
-            offsetFirst = ++offsetIter;
-            count -= step+1;
-        } else count = step;
-    }
-
-    // step back to the offset before the 'current offset' (to make sure we cover overlaps)
-    if ( offsetIter != offsets.begin() )
-        --offsetIter;
-    offset = (*offsetIter);
-}
-
-// returns whether reference has alignments or no
-bool BamStandardIndex::HasAlignments(const int& referenceID) const {
-    if ( referenceID < 0 || referenceID >= (int)m_indexFileSummary.size() )
-        return false;
-    const BaiReferenceSummary& refSummary = m_indexFileSummary.at(referenceID);
-    return ( refSummary.NumBins > 0 );
-}
-
-bool BamStandardIndex::IsFileOpen(void) const {
-    return ( Resources.IndexStream != 0 );
-}
-
-// attempts to use index data to jump to @region, returns success/fail
-// a "successful" jump indicates no error, but not whether this region has data
-//   * thus, the method sets a flag to indicate whether there are alignments
-//     available after the jump position
-bool BamStandardIndex::Jump(const BamRegion& region, bool* hasAlignmentsInRegion) {
-
-    // clear out flag
-    *hasAlignmentsInRegion = false;
-
-    // skip if invalid reader or not open
-    if ( m_reader == 0 || !m_reader->IsOpen() ) {
-        SetErrorString("BamStandardIndex::Jump", "could not jump: reader is not open");
-        return false;
-    }
-
-    // calculate nearest offset to jump to
-    int64_t offset;
-    try {
-        GetOffset(region, offset, hasAlignmentsInRegion);
-    } catch ( BamException& e ) {
-        m_errorString = e.what();
-        return false;
-    }
-
-    // if region has alignments, return success/fail of seeking there
-    if ( *hasAlignmentsInRegion )
-        return m_reader->Seek(offset);
-
-    // otherwise, simply return true (but hasAlignmentsInRegion flag has been set to false)
-    // (this is OK, BamReader will check this flag before trying to load data)
-    return true;
-}
-
-// loads existing data from file into memory
-bool BamStandardIndex::Load(const std::string& filename) {
-
-    try {
-
-        // attempt to open file (read-only)
-        OpenFile(filename, "rb");
-
-        // validate format
-        CheckMagicNumber();
-
-        // load in-memory summary of index data
-        SummarizeIndexFile();
-
-        // return success
-        return true;
-
-    } catch ( BamException& e ) {
-        m_errorString = e.what();
-        return false;
-    }
-}
-
-uint64_t BamStandardIndex::LookupLinearOffset(const BaiReferenceSummary& refSummary, const int& index) {
-
-    // attempt seek to proper index file position
-    const int64_t linearOffsetFilePosition = (int64_t)refSummary.FirstLinearOffsetFilePosition +
-                                             index*BamStandardIndex::SIZEOF_LINEAROFFSET;
-    Seek(linearOffsetFilePosition, SEEK_SET);
-
-    // read linear offset from BAI file
-    uint64_t linearOffset;
-    ReadLinearOffset(linearOffset);
-    return linearOffset;
-}
-
-void BamStandardIndex::MergeAlignmentChunks(BaiAlignmentChunkVector& chunks) {
-
-    // skip if chunks are empty, nothing to merge
-    if ( chunks.empty() )
-        return;
-
-    // set up merged alignment chunk container
-    BaiAlignmentChunkVector mergedChunks;
-    mergedChunks.push_back( chunks[0] );
-
-    // iterate over chunks
-    int i = 0;
-    BaiAlignmentChunkVector::iterator chunkIter = chunks.begin();
-    BaiAlignmentChunkVector::iterator chunkEnd  = chunks.end();
-    for ( ++chunkIter; chunkIter != chunkEnd; ++chunkIter) {
-
-        // get 'currentMergeChunk' based on numeric index
-        BaiAlignmentChunk& currentMergeChunk = mergedChunks[i];
-
-        // get sourceChunk based on source vector iterator
-        BaiAlignmentChunk& sourceChunk = (*chunkIter);
-
-        // if currentMergeChunk ends where sourceChunk starts, then merge the two
-        if ( currentMergeChunk.Stop>>16 == sourceChunk.Start>>16 )
-            currentMergeChunk.Stop = sourceChunk.Stop;
-
-        // otherwise
-        else {
-            // append sourceChunk after currentMergeChunk
-            mergedChunks.push_back(sourceChunk);
-
-            // update i, so the next iteration will consider the
-            // recently-appended sourceChunk as new mergeChunk candidate
-            ++i;
-        }
-    }
-
-    // saved newly-merged chunks into (parameter) chunks
-    chunks = mergedChunks;
-}
-
-void BamStandardIndex::OpenFile(const std::string& filename, const char* mode) {
-
-    // make sure any previous index file is closed
-    CloseFile();
-
-    // attempt to open file
-    Resources.IndexStream = fopen(filename.c_str(), mode);
-    if ( !IsFileOpen() ) {
-        const string message = string("could not open file: ") + filename;
-        throw BamException("BamStandardIndex::OpenFile", message);
-    }
-}
-
-void BamStandardIndex::ReadBinID(uint32_t& binId) {
-    const size_t elementsRead = fread(&binId, sizeof(binId), 1, Resources.IndexStream);
-    if ( m_isBigEndian ) SwapEndian_32(binId);
-    if ( elementsRead != 1 )
-        throw BamException("BamStandardIndex::ReadBinID", "could not read BAI bin ID");
-}
-
-void BamStandardIndex::ReadBinIntoBuffer(uint32_t& binId, int32_t& numAlignmentChunks) {
-
-    // read bin header
-    ReadBinID(binId);
-    ReadNumAlignmentChunks(numAlignmentChunks);
-
-    // read bin contents
-    const unsigned int bytesRequested = numAlignmentChunks*BamStandardIndex::SIZEOF_ALIGNMENTCHUNK;
-    ReadIntoBuffer(bytesRequested);
-}
-
-void BamStandardIndex::ReadIntoBuffer(const unsigned int& bytesRequested) {
-
-    // ensure that our buffer is big enough for request
-    BamStandardIndex::CheckBufferSize(Resources.Buffer, m_bufferLength, bytesRequested);
-
-    // read from BAI file stream
-    const size_t bytesRead = fread( Resources.Buffer, sizeof(char), bytesRequested, Resources.IndexStream );
-    if ( bytesRead != (size_t)bytesRequested ) {
-        stringstream s("");
-        s << "expected to read: " << bytesRequested << " bytes, "
-          << "but instead read: " << bytesRead;
-        throw BamException("BamStandardIndex::ReadIntoBuffer", s.str());
-    }
-}
-
-void BamStandardIndex::ReadLinearOffset(uint64_t& linearOffset) {
-    const size_t elementsRead = fread(&linearOffset, sizeof(linearOffset), 1, Resources.IndexStream);
-    if ( m_isBigEndian ) SwapEndian_64(linearOffset);
-    if ( elementsRead != 1 )
-        throw BamException("BamStandardIndex::ReadLinearOffset", "could not read BAI linear offset");
-}
-
-void BamStandardIndex::ReadNumAlignmentChunks(int& numAlignmentChunks) {
-    const size_t elementsRead = fread(&numAlignmentChunks, sizeof(numAlignmentChunks), 1, Resources.IndexStream);
-    if ( m_isBigEndian ) SwapEndian_32(numAlignmentChunks);
-    if ( elementsRead != 1 )
-        throw BamException("BamStandardIndex::ReadNumAlignmentChunks", "could not read BAI chunk count");
-}
-
-void BamStandardIndex::ReadNumBins(int& numBins) {
-    const size_t elementsRead = fread(&numBins, sizeof(numBins), 1, Resources.IndexStream);
-    if ( m_isBigEndian ) SwapEndian_32(numBins);
-    if ( elementsRead != 1 )
-        throw BamException("BamStandardIndex::ReadNumBins", "could not read BAI bin count");
-}
-
-void BamStandardIndex::ReadNumLinearOffsets(int& numLinearOffsets) {
-    const size_t elementsRead = fread(&numLinearOffsets, sizeof(numLinearOffsets), 1, Resources.IndexStream);
-    if ( m_isBigEndian ) SwapEndian_32(numLinearOffsets);
-    if ( elementsRead != 1 )
-        throw BamException("BamStandardIndex::ReadNumAlignmentChunks", "could not read BAI linear offset count");
-}
-
-void BamStandardIndex::ReadNumReferences(int& numReferences) {
-    const size_t elementsRead = fread(&numReferences, sizeof(numReferences), 1, Resources.IndexStream);
-    if ( m_isBigEndian ) SwapEndian_32(numReferences);
-    if ( elementsRead != 1 )
-        throw BamException("BamStandardIndex::ReadNumReferences", "could not read reference count");
-}
-
-void BamStandardIndex::ReserveForSummary(const int& numReferences) {
-    m_indexFileSummary.clear();
-    m_indexFileSummary.assign( numReferences, BaiReferenceSummary() );
-}
-
-void BamStandardIndex::SaveAlignmentChunkToBin(BaiBinMap& binMap,
-                                               const uint32_t& currentBin,
-                                               const uint64_t& currentOffset,
-                                               const uint64_t& lastOffset)
-{
-    // create new alignment chunk
-    BaiAlignmentChunk newChunk(currentOffset, lastOffset);
-
-    // if no entry exists yet for this bin, create one and store alignment chunk
-    BaiBinMap::iterator binIter = binMap.find(currentBin);
-    if ( binIter == binMap.end() ) {
-        BaiAlignmentChunkVector newChunks;
-        newChunks.push_back(newChunk);
-        binMap.insert( pair<uint32_t, BaiAlignmentChunkVector>(currentBin, newChunks));
-    }
-
-    // otherwise, just append alignment chunk
-    else {
-        BaiAlignmentChunkVector& binChunks = (*binIter).second;
-        binChunks.push_back( newChunk );
-    }
-}
-
-void BamStandardIndex::SaveBinsSummary(const int& refId, const int& numBins) {
-    BaiReferenceSummary& refSummary = m_indexFileSummary.at(refId);
-    refSummary.NumBins = numBins;
-    refSummary.FirstBinFilePosition = Tell();
-}
-
-void BamStandardIndex::SaveLinearOffsetEntry(BaiLinearOffsetVector& offsets,
-                                             const int& alignmentStartPosition,
-                                             const int& alignmentStopPosition,
-                                             const uint64_t& lastOffset)
-{
-    // get converted offsets
-    const int beginOffset = alignmentStartPosition >> BamStandardIndex::BAM_LIDX_SHIFT;
-    const int endOffset   = (alignmentStopPosition - 1) >> BamStandardIndex::BAM_LIDX_SHIFT;
-
-    // resize vector if necessary
-    int oldSize = offsets.size();
-    int newSize = endOffset + 1;
-    if ( oldSize < newSize )
-        offsets.resize(newSize, 0);
-
-    // store offset
-    for( int i = beginOffset + 1; i <= endOffset; ++i ) {
-        if ( offsets[i] == 0 )
-            offsets[i] = lastOffset;
-    }
-}
-
-void BamStandardIndex::SaveLinearOffsetsSummary(const int& refId, const int& numLinearOffsets) {
-    BaiReferenceSummary& refSummary = m_indexFileSummary.at(refId);
-    refSummary.NumLinearOffsets = numLinearOffsets;
-    refSummary.FirstLinearOffsetFilePosition = Tell();
-}
-
-// seek to position in index file stream
-void BamStandardIndex::Seek(const int64_t& position, const int& origin) {
-    if ( fseek64(Resources.IndexStream, position, origin) != 0 )
-        throw BamException("BamStandardIndex::Seek", "could not seek in BAI file");
-}
-
-void BamStandardIndex::SkipBins(const int& numBins) {
-    uint32_t binId;
-    int32_t numAlignmentChunks;
-    for (int i = 0; i < numBins; ++i)
-        ReadBinIntoBuffer(binId, numAlignmentChunks); // results & buffer ignored
-}
-
-void BamStandardIndex::SkipLinearOffsets(const int& numLinearOffsets) {
-    const unsigned int bytesRequested = numLinearOffsets*BamStandardIndex::SIZEOF_LINEAROFFSET;
-    ReadIntoBuffer(bytesRequested);
-}
-
-void BamStandardIndex::SortLinearOffsets(BaiLinearOffsetVector& linearOffsets) {
-    sort( linearOffsets.begin(), linearOffsets.end() );
-}
-
-void BamStandardIndex::SummarizeBins(BaiReferenceSummary& refSummary) {
-
-    // load number of bins
-    int numBins;
-    ReadNumBins(numBins);
-
-    // store bins summary for this reference
-    refSummary.NumBins = numBins;
-    refSummary.FirstBinFilePosition = Tell();
-
-    // skip this reference's bins
-    SkipBins(numBins);
-}
-
-void BamStandardIndex::SummarizeIndexFile(void) {
-
-    // load number of reference sequences
-    int numReferences;
-    ReadNumReferences(numReferences);
-
-    // initialize file summary data
-    ReserveForSummary(numReferences);
-
-    // iterate over reference entries
-    BaiFileSummary::iterator summaryIter = m_indexFileSummary.begin();
-    BaiFileSummary::iterator summaryEnd  = m_indexFileSummary.end();
-    for ( int i = 0; summaryIter != summaryEnd; ++summaryIter, ++i )
-        SummarizeReference(*summaryIter);
-}
-
-void BamStandardIndex::SummarizeLinearOffsets(BaiReferenceSummary& refSummary) {
-
-    // load number of linear offsets
-    int numLinearOffsets;
-    ReadNumLinearOffsets(numLinearOffsets);
-
-    // store bin summary data for this reference
-    refSummary.NumLinearOffsets = numLinearOffsets;
-    refSummary.FirstLinearOffsetFilePosition = Tell();
-
-    // skip linear offsets in index file
-    SkipLinearOffsets(numLinearOffsets);
-}
-
-void BamStandardIndex::SummarizeReference(BaiReferenceSummary& refSummary) {
-    SummarizeBins(refSummary);
-    SummarizeLinearOffsets(refSummary);
-}
-
-// return position of file pointer in index file stream
-int64_t BamStandardIndex::Tell(void) const {
-    return ftell64(Resources.IndexStream);
-}
-
-void BamStandardIndex::WriteAlignmentChunk(const BaiAlignmentChunk& chunk) {
-
-    // localize alignment chunk offsets
-    uint64_t start = chunk.Start;
-    uint64_t stop  = chunk.Stop;
-
-    // swap endian-ness if necessary
-    if ( m_isBigEndian ) {
-        SwapEndian_64(start);
-        SwapEndian_64(stop);
-    }
-
-    // write to index file
-    size_t elementsWritten = 0;
-    elementsWritten += fwrite(&start, sizeof(start), 1, Resources.IndexStream);
-    elementsWritten += fwrite(&stop,  sizeof(stop),  1, Resources.IndexStream);
-    if ( elementsWritten != 2 )
-        throw BamException("BamStandardIndex::WriteAlignmentChunk", "could not write BAI alignment chunk");
-}
-
-void BamStandardIndex::WriteAlignmentChunks(BaiAlignmentChunkVector& chunks) {
-
-    // make sure chunks are merged (simplified) before writing & saving summary
-    MergeAlignmentChunks(chunks);
-
-    // write chunks
-    int32_t chunkCount = chunks.size();
-    if ( m_isBigEndian ) SwapEndian_32(chunkCount);
-    const size_t elementsWritten = fwrite(&chunkCount, sizeof(chunkCount), 1, Resources.IndexStream);
-    if ( elementsWritten != 1 )
-        throw BamException("BamStandardIndex::WriteAlignmentChunks", "could not write BAI chunk count");
-
-    // iterate over chunks
-    BaiAlignmentChunkVector::const_iterator chunkIter = chunks.begin();
-    BaiAlignmentChunkVector::const_iterator chunkEnd  = chunks.end();
-    for ( ; chunkIter != chunkEnd; ++chunkIter )
-        WriteAlignmentChunk( (*chunkIter) );
-}
-
-void BamStandardIndex::WriteBin(const uint32_t& binId, BaiAlignmentChunkVector& chunks) {
-
-    // write BAM bin ID
-    uint32_t binKey = binId;
-    if ( m_isBigEndian ) SwapEndian_32(binKey);
-    const size_t elementsWritten = fwrite(&binKey, sizeof(binKey), 1, Resources.IndexStream);
-    if ( elementsWritten != 1 )
-        throw BamException("BamStandardIndex::WriteBin", "could not write bin ID");
-
-    // write bin's alignment chunks
-    WriteAlignmentChunks(chunks);
-}
-
-void BamStandardIndex::WriteBins(const int& refId, BaiBinMap& bins) {
-
-    // write number of bins
-    int32_t binCount = bins.size();
-    if ( m_isBigEndian ) SwapEndian_32(binCount);
-    const size_t elementsWritten = fwrite(&binCount, sizeof(binCount), 1, Resources.IndexStream);
-    if ( elementsWritten != 1 )
-        throw BamException("BamStandardIndex::WriteBins", "could not write bin count");
-
-    // save summary for reference's bins
-    SaveBinsSummary(refId, bins.size());
-
-    // iterate over bins
-    BaiBinMap::iterator binIter = bins.begin();
-    BaiBinMap::iterator binEnd  = bins.end();
-    for ( ; binIter != binEnd; ++binIter )
-        WriteBin( (*binIter).first, (*binIter).second );
-}
-
-void BamStandardIndex::WriteHeader(void) {
-
-    size_t elementsWritten = 0;
-
-    // write magic number
-    elementsWritten += fwrite(BamStandardIndex::BAI_MAGIC, sizeof(char), 4, Resources.IndexStream);
-
-    // write number of reference sequences
-    int32_t numReferences = m_indexFileSummary.size();
-    if ( m_isBigEndian ) SwapEndian_32(numReferences);
-    elementsWritten += fwrite(&numReferences, sizeof(numReferences), 1, Resources.IndexStream);
-
-    if ( elementsWritten != 5 )
-        throw BamException("BamStandardIndex::WriteHeader", "could not write BAI header");
-}
-
-void BamStandardIndex::WriteLinearOffsets(const int& refId, BaiLinearOffsetVector& linearOffsets) {
-
-    // make sure linear offsets are sorted before writing & saving summary
-    SortLinearOffsets(linearOffsets);
-
-    size_t elementsWritten = 0;
-
-    // write number of linear offsets
-    int32_t offsetCount = linearOffsets.size();
-    if ( m_isBigEndian ) SwapEndian_32(offsetCount);
-    elementsWritten += fwrite(&offsetCount, sizeof(offsetCount), 1, Resources.IndexStream);
-
-    // save summary for reference's linear offsets
-    SaveLinearOffsetsSummary(refId, linearOffsets.size());
-
-    // iterate over linear offsets
-    BaiLinearOffsetVector::const_iterator offsetIter = linearOffsets.begin();
-    BaiLinearOffsetVector::const_iterator offsetEnd  = linearOffsets.end();
-    for ( ; offsetIter != offsetEnd; ++offsetIter ) {
-
-        // write linear offset
-        uint64_t linearOffset = (*offsetIter);
-        if ( m_isBigEndian ) SwapEndian_64(linearOffset);
-        elementsWritten += fwrite(&linearOffset, sizeof(linearOffset), 1, Resources.IndexStream);
-    }
-
-    if ( elementsWritten != (linearOffsets.size() + 1) )
-        throw BamException("BamStandardIndex::WriteLinearOffsets", "could not write BAI linear offsets");
-}
-
-void BamStandardIndex::WriteReferenceEntry(BaiReferenceEntry& refEntry) {
-    WriteBins(refEntry.ID, refEntry.Bins);
-    WriteLinearOffsets(refEntry.ID, refEntry.LinearOffsets);
-}
diff --git a/src/api/internal/BamStandardIndex_p.h b/src/api/internal/BamStandardIndex_p.h
deleted file mode 100644 (file)
index 03e0042..0000000
+++ /dev/null
@@ -1,236 +0,0 @@
-// ***************************************************************************
-// BamStandardIndex.h (c) 2010 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides index operations for the standardized BAM index format (".bai")
-// ***************************************************************************
-
-#ifndef BAM_STANDARD_INDEX_FORMAT_H
-#define BAM_STANDARD_INDEX_FORMAT_H
-
-//  -------------
-//  W A R N I N G
-//  -------------
-//
-// This file is not part of the BamTools API.  It exists purely as an
-// implementation detail.  This header file may change from version to
-// version without notice, or even be removed.
-//
-// We mean it.
-
-#include "api/BamAux.h"
-#include "api/BamIndex.h"
-#include <map>
-#include <set>
-#include <string>
-#include <vector>
-
-namespace BamTools {
-namespace Internal {
-
-// -----------------------------------------------------------------------------
-// BamStandardIndex data structures
-
-// defines start and end of a contiguous run of alignments
-struct BaiAlignmentChunk {
-
-    // data members
-    uint64_t Start;
-    uint64_t Stop;
-
-    // constructor
-    BaiAlignmentChunk(const uint64_t& start = 0,
-                      const uint64_t& stop = 0)
-        : Start(start)
-        , Stop(stop)
-    { }
-};
-
-// comparison operator (for sorting)
-inline
-bool operator<(const BaiAlignmentChunk& lhs, const BaiAlignmentChunk& rhs) {
-    return lhs.Start < rhs.Start;
-}
-
-// convenience typedef for a list of all alignment 'chunks' in a BAI bin
-typedef std::vector<BaiAlignmentChunk> BaiAlignmentChunkVector;
-
-// convenience typedef for a map of all BAI bins in a reference (ID => chunks)
-typedef std::map<uint32_t, BaiAlignmentChunkVector> BaiBinMap;
-
-// convenience typedef for a list of all 'linear offsets' in a reference
-typedef std::vector<uint64_t> BaiLinearOffsetVector;
-
-// contains all fields necessary for building, loading, & writing
-// full BAI index data for a single reference
-struct BaiReferenceEntry {
-
-    // data members
-    int32_t ID;
-    BaiBinMap Bins;
-    BaiLinearOffsetVector LinearOffsets;
-
-    // ctor
-    BaiReferenceEntry(const int32_t& id = -1)
-        : ID(id)
-    { }
-};
-
-// provides (persistent) summary of BaiReferenceEntry's index data
-struct BaiReferenceSummary {
-
-    // data members
-    int NumBins;
-    int NumLinearOffsets;
-    uint64_t FirstBinFilePosition;
-    uint64_t FirstLinearOffsetFilePosition;
-
-    // ctor
-    BaiReferenceSummary(void)
-        : NumBins(0)
-        , NumLinearOffsets(0)
-        , FirstBinFilePosition(0)
-        , FirstLinearOffsetFilePosition(0)
-    { }
-};
-
-// convenience typedef for describing a full BAI index file summary
-typedef std::vector<BaiReferenceSummary> BaiFileSummary;
-
-// end BamStandardIndex data structures
-// -----------------------------------------------------------------------------
-
-class BamStandardIndex : public BamIndex {
-
-    // ctor & dtor
-    public:
-        BamStandardIndex(Internal::BamReaderPrivate* reader);
-        ~BamStandardIndex(void);
-
-    // BamIndex implementation
-    public:
-        // builds index from associated BAM file & writes out to index file
-        bool Create(void);
-        // returns whether reference has alignments or no
-        bool HasAlignments(const int& referenceID) const;
-        // attempts to use index data to jump to @region, returns success/fail
-        // a "successful" jump indicates no error, but not whether this region has data
-        //   * thus, the method sets a flag to indicate whether there are alignments
-        //     available after the jump position
-        bool Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion);
-        // loads existing data from file into memory
-        bool Load(const std::string& filename);
-    public:
-        // returns format's file extension
-        static const std::string Extension(void);
-
-    // internal methods
-    private:
-
-        // index file ops
-        void CheckMagicNumber(void);
-        void CloseFile(void);
-        bool IsFileOpen(void) const;
-        void OpenFile(const std::string& filename, const char* mode);
-        void Seek(const int64_t& position, const int& origin);
-        int64_t Tell(void) const;
-
-        // BAI index building methods
-        void ClearReferenceEntry(BaiReferenceEntry& refEntry);
-        void SaveAlignmentChunkToBin(BaiBinMap& binMap,
-                                     const uint32_t& currentBin,
-                                     const uint64_t& currentOffset,
-                                     const uint64_t& lastOffset);
-        void SaveLinearOffsetEntry(BaiLinearOffsetVector& offsets,
-                                   const int& alignmentStartPosition,
-                                   const int& alignmentStopPosition,
-                                   const uint64_t& lastOffset);
-
-        // random-access methods
-        void AdjustRegion(const BamRegion& region, uint32_t& begin, uint32_t& end);
-        void CalculateCandidateBins(const uint32_t& begin,
-                                    const uint32_t& end,
-                                    std::set<uint16_t>& candidateBins);
-        void CalculateCandidateOffsets(const BaiReferenceSummary& refSummary,
-                                       const uint64_t& minOffset,
-                                       std::set<uint16_t>& candidateBins,
-                                       std::vector<int64_t>& offsets);
-        uint64_t CalculateMinOffset(const BaiReferenceSummary& refSummary, const uint32_t& begin);
-        void GetOffset(const BamRegion& region, int64_t& offset, bool* hasAlignmentsInRegion);
-        uint64_t LookupLinearOffset(const BaiReferenceSummary& refSummary, const int& index);
-
-        // BAI summary (create/load) methods
-        void ReserveForSummary(const int& numReferences);
-        void SaveBinsSummary(const int& refId, const int& numBins);
-        void SaveLinearOffsetsSummary(const int& refId, const int& numLinearOffsets);
-        void SkipBins(const int& numBins);
-        void SkipLinearOffsets(const int& numLinearOffsets);
-        void SummarizeBins(BaiReferenceSummary& refSummary);
-        void SummarizeIndexFile(void);
-        void SummarizeLinearOffsets(BaiReferenceSummary& refSummary);
-        void SummarizeReference(BaiReferenceSummary& refSummary);
-
-        // BAI full index input methods
-        void ReadBinID(uint32_t& binId);
-        void ReadBinIntoBuffer(uint32_t& binId, int32_t& numAlignmentChunks);
-        void ReadIntoBuffer(const unsigned int& bytesRequested);
-        void ReadLinearOffset(uint64_t& linearOffset);
-        void ReadNumAlignmentChunks(int& numAlignmentChunks);
-        void ReadNumBins(int& numBins);
-        void ReadNumLinearOffsets(int& numLinearOffsets);
-        void ReadNumReferences(int& numReferences);
-
-        // BAI full index output methods
-        void MergeAlignmentChunks(BaiAlignmentChunkVector& chunks);
-        void SortLinearOffsets(BaiLinearOffsetVector& linearOffsets);
-        void WriteAlignmentChunk(const BaiAlignmentChunk& chunk);
-        void WriteAlignmentChunks(BaiAlignmentChunkVector& chunks);
-        void WriteBin(const uint32_t& binId, BaiAlignmentChunkVector& chunks);
-        void WriteBins(const int& refId, BaiBinMap& bins);
-        void WriteHeader(void);
-        void WriteLinearOffsets(const int& refId, BaiLinearOffsetVector& linearOffsets);
-        void WriteReferenceEntry(BaiReferenceEntry& refEntry);
-
-    // data members
-    private:
-        bool m_isBigEndian;
-        BaiFileSummary m_indexFileSummary;
-
-        // our input buffer
-        unsigned int m_bufferLength;
-
-        struct RaiiWrapper {
-            FILE* IndexStream;
-            char* Buffer;
-            RaiiWrapper(void);
-            ~RaiiWrapper(void);
-        };
-        RaiiWrapper Resources;
-
-    // static methods
-    private:
-        // checks if the buffer is large enough to accomodate the requested size
-        static void CheckBufferSize(char*& buffer,
-                                    unsigned int& bufferLength,
-                                    const unsigned int& requestedBytes);
-        // checks if the buffer is large enough to accomodate the requested size
-        static void CheckBufferSize(unsigned char*& buffer,
-                                    unsigned int& bufferLength,
-                                    const unsigned int& requestedBytes);
-    // static constants
-    private:
-        static const int MAX_BIN;
-        static const int BAM_LIDX_SHIFT;
-        static const std::string BAI_EXTENSION;
-        static const char* const BAI_MAGIC;
-        static const int SIZEOF_ALIGNMENTCHUNK;
-        static const int SIZEOF_BINCORE;
-        static const int SIZEOF_LINEAROFFSET;
-};
-
-} // namespace Internal
-} // namespace BamTools
-
-#endif // BAM_STANDARD_INDEX_FORMAT_H
diff --git a/src/api/internal/BamToolsIndex_p.cpp b/src/api/internal/BamToolsIndex_p.cpp
deleted file mode 100644 (file)
index cdf3d10..0000000
+++ /dev/null
@@ -1,615 +0,0 @@
-// ***************************************************************************
-// BamToolsIndex.cpp (c) 2010 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides index operations for the BamTools index format (".bti")
-// ***************************************************************************
-
-#include "api/BamAlignment.h"
-#include "api/internal/BamException_p.h"
-#include "api/internal/BamReader_p.h"
-#include "api/internal/BamToolsIndex_p.h"
-#include "api/internal/BgzfStream_p.h"
-using namespace BamTools;
-using namespace BamTools::Internal;
-
-#include <cstdio>
-#include <cstdlib>
-#include <cstring>
-#include <algorithm>
-#include <iostream>
-#include <iterator>
-#include <map>
-using namespace std;
-
-// --------------------------------
-// static BamToolsIndex constants
-// --------------------------------
-
-const uint32_t BamToolsIndex::DEFAULT_BLOCK_LENGTH = 1000;
-const string BamToolsIndex::BTI_EXTENSION     = ".bti";
-const char* const BamToolsIndex::BTI_MAGIC    = "BTI\1";
-const int BamToolsIndex::SIZEOF_BLOCK         = sizeof(int32_t)*2 + sizeof(int64_t);
-
-// ----------------------------
-// RaiiWrapper implementation
-// ----------------------------
-
-BamToolsIndex::RaiiWrapper::RaiiWrapper(void)
-    : IndexStream(0)
-{ }
-
-BamToolsIndex::RaiiWrapper::~RaiiWrapper(void) {
-    if ( IndexStream )
-        fclose(IndexStream);
-}
-
-// ------------------------------
-// BamToolsIndex implementation
-// ------------------------------
-
-// ctor
-BamToolsIndex::BamToolsIndex(Internal::BamReaderPrivate* reader)
-    : BamIndex(reader)
-    , m_blockSize(BamToolsIndex::DEFAULT_BLOCK_LENGTH)
-    , m_inputVersion(0)
-    , m_outputVersion(BTI_2_0) // latest version - used for writing new index files
-{
-    m_isBigEndian = BamTools::SystemIsBigEndian();
-}
-
-// dtor
-BamToolsIndex::~BamToolsIndex(void) {
-    CloseFile();
-}
-
-void BamToolsIndex::CheckMagicNumber(void) {
-
-    // read magic number
-    char magic[4];
-    size_t elementsRead = fread(magic, sizeof(char), 4, Resources.IndexStream);
-    if ( elementsRead != 4 )
-        throw BamException("BamToolsIndex::CheckMagicNumber", "could not read BTI magic number");
-
-    // validate expected magic number
-    if ( strncmp(magic, BamToolsIndex::BTI_MAGIC, 4) != 0 )
-        throw BamException("BamToolsIndex::CheckMagicNumber", "invalid BTI magic number");
-}
-
-// check index file version, return true if OK
-void BamToolsIndex::CheckVersion(void) {
-
-    // read version from file
-    size_t elementsRead = fread(&m_inputVersion, sizeof(m_inputVersion), 1, Resources.IndexStream);
-    if ( elementsRead != 1 )
-        throw BamException("BamToolsIndex::CheckVersion", "could not read format version");
-    if ( m_isBigEndian ) SwapEndian_32(m_inputVersion);
-
-    // if version is negative, or zero
-    if ( m_inputVersion <= 0 )
-        throw BamException("BamToolsIndex::CheckVersion", "invalid format version");
-
-    // if version is newer than can be supported by this version of bamtools
-    else if ( m_inputVersion > m_outputVersion ) {
-        const string message = "unsupported format: this index was created by a newer version of BamTools. "
-                               "Update your local version of BamTools to use the index file.";
-        throw BamException("BamToolsIndex::CheckVersion", message);
-    }
-
-    // ------------------------------------------------------------------
-    // check for deprecated, unsupported versions
-    // (the format had to be modified to accomodate a particular bug fix)
-
-    // Version 2.0: introduced support for half-open intervals, instead of the old closed intervals
-    //   respondBy: throwing exception - we're not going to try to handle the old BTI files.
-    else if ( (Version)m_inputVersion < BamToolsIndex::BTI_2_0 ) {
-        const string message = "unsupported format: this version of the index may not properly handle "
-                               "coordinate intervals. Please run 'bamtools index -bti -in yourData.bam' "
-                               "to generate an up-to-date, fixed BTI file.";
-        throw BamException("BamToolsIndex::CheckVersion", message);
-    }
-}
-
-void BamToolsIndex::ClearReferenceEntry(BtiReferenceEntry& refEntry) {
-    refEntry.ID = -1;
-    refEntry.Blocks.clear();
-}
-
-void BamToolsIndex::CloseFile(void) {
-    if ( IsFileOpen() ) {
-        fclose(Resources.IndexStream);
-        Resources.IndexStream = 0;
-    }
-    m_indexFileSummary.clear();
-}
-
-// builds index from associated BAM file & writes out to index file
-bool BamToolsIndex::Create(void) {
-
-    // skip if BamReader is invalid or not open
-    if ( m_reader == 0 || !m_reader->IsOpen() ) {
-        SetErrorString("BamToolsIndex::Create", "could not create index: reader is not open");
-        return false;
-    }
-
-    // rewind BamReader
-    if ( !m_reader->Rewind() ) {
-        const string readerError = m_reader->GetErrorString();
-        const string message = "could not create index: \n\t" + readerError;
-        SetErrorString("BamToolsIndex::Create", message);
-        return false;
-    }
-
-    try {
-        // open new index file (read & write)
-        const string indexFilename = m_reader->Filename() + Extension();
-        OpenFile(indexFilename, "w+b");
-
-        // initialize BtiFileSummary with number of references
-        const int& numReferences = m_reader->GetReferenceCount();
-        InitializeFileSummary(numReferences);
-
-        // intialize output file header
-        WriteHeader();
-
-        // index building markers
-        uint32_t currentBlockCount      = 0;
-        int64_t currentAlignmentOffset  = m_reader->Tell();
-        int32_t blockRefId              = -1;
-        int32_t blockMaxEndPosition     = -1;
-        int64_t blockStartOffset        = currentAlignmentOffset;
-        int32_t blockStartPosition      = -1;
-
-        // plow through alignments, storing index entries
-        BamAlignment al;
-        BtiReferenceEntry refEntry;
-        while ( m_reader->LoadNextAlignment(al) ) {
-
-            // if moved to new reference
-            if ( al.RefID != blockRefId ) {
-
-                // if first pass, check:
-                if ( currentBlockCount == 0 ) {
-
-                    // write any empty references up to (but not including) al.RefID
-                    for ( int i = 0; i < al.RefID; ++i )
-                        WriteReferenceEntry( BtiReferenceEntry(i) );
-                }
-
-                // not first pass:
-                else {
-
-                    // store previous BTI block data in reference entry
-                    const BtiBlock block(blockMaxEndPosition, blockStartOffset, blockStartPosition);
-                    refEntry.Blocks.push_back(block);
-
-                    // write reference entry, then clear
-                    WriteReferenceEntry(refEntry);
-                    ClearReferenceEntry(refEntry);
-
-                    // write any empty references between (but not including)
-                    // the last blockRefID and current al.RefID
-                    for ( int i = blockRefId+1; i < al.RefID; ++i )
-                        WriteReferenceEntry( BtiReferenceEntry(i) );
-
-                    // reset block count
-                    currentBlockCount = 0;
-                }
-
-                // set ID for new reference entry
-                refEntry.ID = al.RefID;
-            }
-
-            // if beginning of block, update counters
-            if ( currentBlockCount == 0 ) {
-                blockRefId          = al.RefID;
-                blockStartOffset    = currentAlignmentOffset;
-                blockStartPosition  = al.Position;
-                blockMaxEndPosition = al.GetEndPosition();
-            }
-
-            // increment block counter
-            ++currentBlockCount;
-
-            // check end position
-            const int32_t alignmentEndPosition = al.GetEndPosition();
-            if ( alignmentEndPosition > blockMaxEndPosition )
-                blockMaxEndPosition = alignmentEndPosition;
-
-            // if block is full, get offset for next block, reset currentBlockCount
-            if ( currentBlockCount == m_blockSize ) {
-
-                // store previous block data in reference entry
-                const BtiBlock block(blockMaxEndPosition, blockStartOffset, blockStartPosition);
-                refEntry.Blocks.push_back(block);
-
-                // update markers
-                blockStartOffset  = m_reader->Tell();
-                currentBlockCount = 0;
-            }
-
-            // not the best name, but for the next iteration, this value will be the offset of the
-            // *current* alignment. this is necessary because we won't know if this next alignment
-            // is on a new reference until we actually read it
-            currentAlignmentOffset = m_reader->Tell();
-        }
-
-        // after finishing alignments, if any data was read, check:
-        if ( blockRefId >= 0 ) {
-
-            // store last BTI block data in reference entry
-            const BtiBlock block(blockMaxEndPosition, blockStartOffset, blockStartPosition);
-            refEntry.Blocks.push_back(block);
-
-            // write last reference entry, then clear
-            WriteReferenceEntry(refEntry);
-            ClearReferenceEntry(refEntry);
-
-            // then write any empty references remaining at end of file
-            for ( int i = blockRefId+1; i < numReferences; ++i )
-                WriteReferenceEntry( BtiReferenceEntry(i) );
-        }
-
-    } catch ( BamException& e ) {
-        m_errorString = e.what();
-        return false;
-    }
-
-    // rewind BamReader
-    if ( !m_reader->Rewind() ) {
-        const string readerError = m_reader->GetErrorString();
-        const string message = "could not create index: \n\t" + readerError;
-        SetErrorString("BamToolsIndex::Create", message);
-        return false;
-    }
-
-    // return success
-    return true;
-}
-
-// returns format's file extension
-const std::string BamToolsIndex::Extension(void) {
-    return BamToolsIndex::BTI_EXTENSION;
-}
-
-void BamToolsIndex::GetOffset(const BamRegion& region, int64_t& offset, bool* hasAlignmentsInRegion) {
-
-    // return false ref ID is not a valid index in file summary data
-    if ( region.LeftRefID < 0 || region.LeftRefID >= (int)m_indexFileSummary.size() )
-        throw BamException("BamToolsIndex::GetOffset", "invalid region requested");
-
-    // retrieve reference index data for left bound reference
-    BtiReferenceEntry refEntry(region.LeftRefID);
-    ReadReferenceEntry(refEntry);
-
-    // binary search for an overlapping block (may not be first one though)
-    bool found = false;
-    typedef BtiBlockVector::const_iterator BtiBlockConstIterator;
-    BtiBlockConstIterator blockFirst = refEntry.Blocks.begin();
-    BtiBlockConstIterator blockIter  = blockFirst;
-    BtiBlockConstIterator blockLast  = refEntry.Blocks.end();
-    iterator_traits<BtiBlockConstIterator>::difference_type count = distance(blockFirst, blockLast);
-    iterator_traits<BtiBlockConstIterator>::difference_type step;
-    while ( count > 0 ) {
-        blockIter = blockFirst;
-        step = count/2;
-        advance(blockIter, step);
-
-        const BtiBlock& block = (*blockIter);
-        if ( block.StartPosition <= region.RightPosition ) {
-            if ( block.MaxEndPosition > region.LeftPosition ) {
-                offset = block.StartOffset;
-                break;
-            }
-            blockFirst = ++blockIter;
-            count -= step+1;
-        }
-        else count = step;
-    }
-
-    // if we didn't search "off the end" of the blocks
-    if ( blockIter != blockLast ) {
-
-        // "walk back" until we've gone too far
-        while ( blockIter != blockFirst ) {
-            const BtiBlock& currentBlock = (*blockIter);
-
-            --blockIter;
-            const BtiBlock& previousBlock = (*blockIter);
-            if ( previousBlock.MaxEndPosition <= region.LeftPosition ) {
-                offset = currentBlock.StartOffset;
-                found = true;
-                break;
-            }
-        }
-
-        // if we walked all the way to first block, just return that and let the reader's
-        // region overlap parsing do the rest
-        if ( blockIter == blockFirst ) {
-            const BtiBlock& block = (*blockIter);
-            offset = block.StartOffset;
-            found = true;
-        }
-    }
-
-
-    // sets to false if blocks container is empty, or if no matching block could be found
-    *hasAlignmentsInRegion = found;
-}
-
-// returns whether reference has alignments or no
-bool BamToolsIndex::HasAlignments(const int& referenceID) const {
-    if ( referenceID < 0 || referenceID >= (int)m_indexFileSummary.size() )
-        return false;
-    const BtiReferenceSummary& refSummary = m_indexFileSummary.at(referenceID);
-    return ( refSummary.NumBlocks > 0 );
-}
-
-// pre-allocates space for each reference's summary data
-void BamToolsIndex::InitializeFileSummary(const int& numReferences) {
-    m_indexFileSummary.clear();
-    for ( int i = 0; i < numReferences; ++i )
-        m_indexFileSummary.push_back( BtiReferenceSummary() );
-}
-
-// returns true if the index stream is open
-bool BamToolsIndex::IsFileOpen(void) const {
-    return ( Resources.IndexStream != 0 );
-}
-
-// attempts to use index data to jump to @region, returns success/fail
-// a "successful" jump indicates no error, but not whether this region has data
-//   * thus, the method sets a flag to indicate whether there are alignments
-//     available after the jump position
-bool BamToolsIndex::Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion) {
-
-    // clear flag
-    *hasAlignmentsInRegion = false;
-
-    // skip if invalid reader or not open
-    if ( m_reader == 0 || !m_reader->IsOpen() ) {
-        SetErrorString("BamToolsIndex::Jump", "could not jump: reader is not open");
-        return false;
-    }
-
-    // make sure left-bound position is valid
-    const RefVector& references = m_reader->GetReferenceData();
-    if ( region.LeftPosition > references.at(region.LeftRefID).RefLength ) {
-        SetErrorString("BamToolsIndex::Jump", "could not create index: invalid region requested");
-        return false;
-    }
-
-    // calculate nearest offset to jump to
-    int64_t offset;
-    try {
-        GetOffset(region, offset, hasAlignmentsInRegion);
-    } catch ( BamException& e ) {
-        m_errorString = e.what();
-        return false;
-    }
-
-    // return success/failure of seek
-    return m_reader->Seek(offset);
-}
-
-// loads existing data from file into memory
-bool BamToolsIndex::Load(const std::string& filename) {
-
-    try {
-
-        // attempt to open file (read-only)
-        OpenFile(filename, "rb");
-
-        // load metadata & generate in-memory summary
-        LoadHeader();
-        LoadFileSummary();
-
-        // return success
-        return true;
-
-    } catch ( BamException& e ) {
-        m_errorString = e.what();
-        return false;
-    }
-}
-
-void BamToolsIndex::LoadFileSummary(void) {
-
-    // load number of reference sequences
-    int numReferences;
-    LoadNumReferences(numReferences);
-
-    // initialize file summary data
-    InitializeFileSummary(numReferences);
-
-    // load summary for each reference
-    BtiFileSummary::iterator summaryIter = m_indexFileSummary.begin();
-    BtiFileSummary::iterator summaryEnd  = m_indexFileSummary.end();
-    for ( ; summaryIter != summaryEnd; ++summaryIter )
-        LoadReferenceSummary(*summaryIter);
-}
-
-void BamToolsIndex::LoadHeader(void) {
-
-    // check BTI file metadata
-    CheckMagicNumber();
-    CheckVersion();
-
-    // use file's BTI block size to set member variable
-    const size_t elementsRead = fread(&m_blockSize, sizeof(m_blockSize), 1, Resources.IndexStream);
-    if ( m_isBigEndian ) SwapEndian_32(m_blockSize);
-    if ( elementsRead != 1 )
-        throw BamException("BamToolsIndex::LoadHeader", "could not read BTI block size");
-}
-
-void BamToolsIndex::LoadNumBlocks(int& numBlocks) {
-    const size_t elementsRead = fread(&numBlocks, sizeof(numBlocks), 1, Resources.IndexStream);
-    if ( m_isBigEndian ) SwapEndian_32(numBlocks);
-    if ( elementsRead != 1 )
-        throw BamException("BamToolsIndex::LoadNumBlocks", "could not read number of BTI blocks");
-}
-
-void BamToolsIndex::LoadNumReferences(int& numReferences) {
-    const size_t elementsRead = fread(&numReferences, sizeof(numReferences), 1, Resources.IndexStream);
-    if ( m_isBigEndian ) SwapEndian_32(numReferences);
-    if ( elementsRead != 1 )
-        throw BamException("BamToolsIndex::LoadNumReferences", "could not read number of references");
-}
-
-void BamToolsIndex::LoadReferenceSummary(BtiReferenceSummary& refSummary) {
-
-    // load number of blocks
-    int numBlocks;
-    LoadNumBlocks(numBlocks);
-
-    // store block summary data for this reference
-    refSummary.NumBlocks = numBlocks;
-    refSummary.FirstBlockFilePosition = Tell();
-
-    // skip reference's blocks
-    SkipBlocks(numBlocks);
-}
-
-void BamToolsIndex::OpenFile(const std::string& filename, const char* mode) {
-
-    // make sure any previous index file is closed
-    CloseFile();
-
-    // attempt to open file
-    Resources.IndexStream = fopen(filename.c_str(), mode);
-    if ( !IsFileOpen() ) {
-        const string message = string("could not open file: ") + filename;
-        throw BamException("BamToolsIndex::OpenFile", message);
-    }
-}
-
-void BamToolsIndex::ReadBlock(BtiBlock& block) {
-
-    // read in block data members
-    size_t elementsRead = 0;
-    elementsRead += fread(&block.MaxEndPosition, sizeof(block.MaxEndPosition), 1, Resources.IndexStream);
-    elementsRead += fread(&block.StartOffset,    sizeof(block.StartOffset),    1, Resources.IndexStream);
-    elementsRead += fread(&block.StartPosition,  sizeof(block.StartPosition),  1, Resources.IndexStream);
-
-    // swap endian-ness if necessary
-    if ( m_isBigEndian ) {
-        SwapEndian_32(block.MaxEndPosition);
-        SwapEndian_64(block.StartOffset);
-        SwapEndian_32(block.StartPosition);
-    }
-
-    if ( elementsRead != 3 )
-        throw BamException("BamToolsIndex::ReadBlock", "could not read block");
-}
-
-void BamToolsIndex::ReadBlocks(const BtiReferenceSummary& refSummary, BtiBlockVector& blocks) {
-
-    // prep blocks container
-    blocks.clear();
-    blocks.reserve(refSummary.NumBlocks);
-
-    // skip to first block entry
-    Seek( refSummary.FirstBlockFilePosition, SEEK_SET );
-
-    // read & store block entries
-    BtiBlock block;
-    for ( int i = 0; i < refSummary.NumBlocks; ++i ) {
-        ReadBlock(block);
-        blocks.push_back(block);
-    }
-}
-
-void BamToolsIndex::ReadReferenceEntry(BtiReferenceEntry& refEntry) {
-
-    // return false if refId not valid index in file summary structure
-    if ( refEntry.ID < 0 || refEntry.ID >= (int)m_indexFileSummary.size() )
-        throw BamException("BamToolsIndex::ReadReferenceEntry", "invalid reference requested");
-
-    // use index summary to assist reading the reference's BTI blocks
-    const BtiReferenceSummary& refSummary = m_indexFileSummary.at(refEntry.ID);
-    ReadBlocks(refSummary, refEntry.Blocks);
-}
-
-void BamToolsIndex::Seek(const int64_t& position, const int& origin) {
-    if ( fseek64(Resources.IndexStream, position, origin) != 0 )
-        throw BamException("BamToolsIndex::Seek", "could not seek in BAI file");
-}
-
-void BamToolsIndex::SkipBlocks(const int& numBlocks) {
-    Seek( numBlocks*BamToolsIndex::SIZEOF_BLOCK, SEEK_CUR );
-}
-
-int64_t BamToolsIndex::Tell(void) const {
-    return ftell64(Resources.IndexStream);
-}
-
-void BamToolsIndex::WriteBlock(const BtiBlock& block) {
-
-    // copy entry data
-    int32_t maxEndPosition = block.MaxEndPosition;
-    int64_t startOffset    = block.StartOffset;
-    int32_t startPosition  = block.StartPosition;
-
-    // swap endian-ness if necessary
-    if ( m_isBigEndian ) {
-        SwapEndian_32(maxEndPosition);
-        SwapEndian_64(startOffset);
-        SwapEndian_32(startPosition);
-    }
-
-    // write the reference index entry
-    size_t elementsWritten = 0;
-    elementsWritten += fwrite(&maxEndPosition, sizeof(maxEndPosition), 1, Resources.IndexStream);
-    elementsWritten += fwrite(&startOffset,    sizeof(startOffset),    1, Resources.IndexStream);
-    elementsWritten += fwrite(&startPosition,  sizeof(startPosition),  1, Resources.IndexStream);
-    if ( elementsWritten != 3 )
-        throw BamException("BamToolsIndex::WriteBlock", "could not write BTI block");
-}
-
-void BamToolsIndex::WriteBlocks(const BtiBlockVector& blocks) {
-    BtiBlockVector::const_iterator blockIter = blocks.begin();
-    BtiBlockVector::const_iterator blockEnd  = blocks.end();
-    for ( ; blockIter != blockEnd; ++blockIter )
-        WriteBlock(*blockIter);
-}
-
-void BamToolsIndex::WriteHeader(void) {
-
-    size_t elementsWritten = 0;
-
-    // write BTI index format 'magic number'
-    elementsWritten += fwrite(BamToolsIndex::BTI_MAGIC, 1, 4, Resources.IndexStream);
-
-    // write BTI index format version
-    int32_t currentVersion = (int32_t)m_outputVersion;
-    if ( m_isBigEndian ) SwapEndian_32(currentVersion);
-    elementsWritten += fwrite(&currentVersion, sizeof(currentVersion), 1, Resources.IndexStream);
-
-    // write block size
-    uint32_t blockSize = m_blockSize;
-    if ( m_isBigEndian ) SwapEndian_32(blockSize);
-    elementsWritten += fwrite(&blockSize, sizeof(blockSize), 1, Resources.IndexStream);
-
-    // write number of references
-    int32_t numReferences = m_indexFileSummary.size();
-    if ( m_isBigEndian ) SwapEndian_32(numReferences);
-    elementsWritten += fwrite(&numReferences, sizeof(numReferences), 1, Resources.IndexStream);
-
-    if ( elementsWritten != 7 )
-        throw BamException("BamToolsIndex::WriteHeader", "could not write BTI header");
-}
-
-void BamToolsIndex::WriteReferenceEntry(const BtiReferenceEntry& refEntry) {
-
-    // write number of blocks this reference
-    uint32_t numBlocks = refEntry.Blocks.size();
-    if ( m_isBigEndian ) SwapEndian_32(numBlocks);
-    const size_t elementsWritten = fwrite(&numBlocks, sizeof(numBlocks), 1, Resources.IndexStream);
-    if ( elementsWritten != 1 )
-        throw BamException("BamToolsIndex::WriteReferenceEntry", "could not write number of blocks");
-
-    // write actual block entries
-    WriteBlocks(refEntry.Blocks);
-}
diff --git a/src/api/internal/BamToolsIndex_p.h b/src/api/internal/BamToolsIndex_p.h
deleted file mode 100644 (file)
index 7c1550b..0000000
+++ /dev/null
@@ -1,184 +0,0 @@
-// ***************************************************************************
-// BamToolsIndex.h (c) 2010 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides index operations for the BamTools index format (".bti")
-// ***************************************************************************
-
-#ifndef BAMTOOLS_INDEX_FORMAT_H
-#define BAMTOOLS_INDEX_FORMAT_H
-
-//  -------------
-//  W A R N I N G
-//  -------------
-//
-// This file is not part of the BamTools API.  It exists purely as an
-// implementation detail.  This header file may change from version to
-// version without notice, or even be removed.
-//
-// We mean it.
-
-#include "api/BamAux.h"
-#include "api/BamIndex.h"
-#include <map>
-#include <string>
-#include <vector>
-
-namespace BamTools {
-namespace Internal {
-
-// contains data for each 'block' in a BTI index
-struct BtiBlock {
-
-    // data members
-    int32_t MaxEndPosition;
-    int64_t StartOffset;
-    int32_t StartPosition;
-
-    // ctor
-    BtiBlock(const int32_t& maxEndPosition = 0,
-             const int64_t& startOffset    = 0,
-             const int32_t& startPosition  = 0)
-        : MaxEndPosition(maxEndPosition)
-        , StartOffset(startOffset)
-        , StartPosition(startPosition)
-    { }
-};
-
-// convenience typedef for describing a a list of BTI blocks on a reference
-typedef std::vector<BtiBlock> BtiBlockVector;
-
-// contains all fields necessary for building, loading, & writing
-// full BTI index data for a single reference
-struct BtiReferenceEntry {
-
-    // data members
-    int32_t ID;
-    BtiBlockVector Blocks;
-
-    // ctor
-    BtiReferenceEntry(const int& id = -1)
-        : ID(id)
-    { }
-};
-
-// provides (persistent) summary of BtiReferenceEntry's index data
-struct BtiReferenceSummary {
-
-    // data members
-    int NumBlocks;
-    uint64_t FirstBlockFilePosition;
-
-    // ctor
-    BtiReferenceSummary(void)
-        : NumBlocks(0)
-        , FirstBlockFilePosition(0)
-    { }
-};
-
-// convenience typedef for describing a full BTI index file summary
-typedef std::vector<BtiReferenceSummary> BtiFileSummary;
-
-class BamToolsIndex : public BamIndex {
-
-    // keep a list of any supported versions here
-    // (might be useful later to handle any 'legacy' versions if the format changes)
-    // listed for example like: BTI_1_0 = 1, BTI_1_1 = 2, BTI_1_2 = 3, BTI_2_0 = 4, and so on
-    //
-    // so a change introduced in BTI_1_2 may be handled from then on by:
-    //
-    // if ( indexVersion >= BTI_1_2 )
-    //   do something new
-    // else
-    //   do the old thing
-    enum Version { BTI_1_0 = 1
-                 , BTI_1_1
-                 , BTI_1_2
-                 , BTI_2_0
-                 };
-
-    // ctor & dtor
-    public:
-        BamToolsIndex(Internal::BamReaderPrivate* reader);
-        ~BamToolsIndex(void);
-
-    // BamIndex implementation
-    public:
-        // builds index from associated BAM file & writes out to index file
-        bool Create(void);
-        // returns whether reference has alignments or no
-        bool HasAlignments(const int& referenceID) const;
-        // attempts to use index data to jump to @region, returns success/fail
-        // a "successful" jump indicates no error, but not whether this region has data
-        //   * thus, the method sets a flag to indicate whether there are alignments
-        //     available after the jump position
-        bool Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion);
-        // loads existing data from file into memory
-        bool Load(const std::string& filename);
-    public:
-        // returns format's file extension
-        static const std::string Extension(void);
-
-    // internal methods
-    private:
-
-        // index file ops
-        void CheckMagicNumber(void);
-        void CheckVersion(void);
-        void CloseFile(void);
-        bool IsFileOpen(void) const;
-        void OpenFile(const std::string& filename, const char* mode);
-        void Seek(const int64_t& position, const int& origin);
-        int64_t Tell(void) const;
-
-        // index-creation methods
-        void ClearReferenceEntry(BtiReferenceEntry& refEntry);
-        void WriteBlock(const BtiBlock& block);
-        void WriteBlocks(const BtiBlockVector& blocks);
-        void WriteHeader(void);
-        void WriteReferenceEntry(const BtiReferenceEntry& refEntry);
-
-        // random-access methods
-        void GetOffset(const BamRegion& region, int64_t& offset, bool* hasAlignmentsInRegion);
-        void ReadBlock(BtiBlock& block);
-        void ReadBlocks(const BtiReferenceSummary& refSummary, BtiBlockVector& blocks);
-        void ReadReferenceEntry(BtiReferenceEntry& refEntry);
-
-        // BTI summary data methods
-        void InitializeFileSummary(const int& numReferences);
-        void LoadFileSummary(void);
-        void LoadHeader(void);
-        void LoadNumBlocks(int& numBlocks);
-        void LoadNumReferences(int& numReferences);
-        void LoadReferenceSummary(BtiReferenceSummary& refSummary);
-        void SkipBlocks(const int& numBlocks);
-
-    // data members
-    private:
-        bool  m_isBigEndian;
-        BtiFileSummary m_indexFileSummary;
-        uint32_t m_blockSize;
-        int32_t m_inputVersion; // Version is serialized as int
-        Version m_outputVersion;
-
-        struct RaiiWrapper {
-            FILE* IndexStream;
-            RaiiWrapper(void);
-            ~RaiiWrapper(void);
-        };
-        RaiiWrapper Resources;
-
-    // static constants
-    private:
-        static const uint32_t DEFAULT_BLOCK_LENGTH;
-        static const std::string BTI_EXTENSION;
-        static const char* const BTI_MAGIC;
-        static const int SIZEOF_BLOCK;
-};
-
-} // namespace Internal
-} // namespace BamTools
-
-#endif // BAMTOOLS_INDEX_FORMAT_H
diff --git a/src/api/internal/BamWriter_p.cpp b/src/api/internal/BamWriter_p.cpp
deleted file mode 100644 (file)
index 1b1a3f2..0000000
+++ /dev/null
@@ -1,462 +0,0 @@
-// ***************************************************************************
-// BamWriter_p.cpp (c) 2010 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides the basic functionality for producing BAM files
-// ***************************************************************************
-
-#include "api/BamAlignment.h"
-#include "api/BamConstants.h"
-#include "api/IBamIODevice.h"
-#include "api/internal/BamException_p.h"
-#include "api/internal/BamWriter_p.h"
-using namespace BamTools;
-using namespace BamTools::Internal;
-
-#include <cstdlib>
-#include <cstring>
-using namespace std;
-
-// ctor
-BamWriterPrivate::BamWriterPrivate(void)
-    : m_isBigEndian( BamTools::SystemIsBigEndian() )
-{ }
-
-// dtor
-BamWriterPrivate::~BamWriterPrivate(void) {
-    Close();
-}
-
-// calculates minimum bin for a BAM alignment interval [begin, end)
-uint32_t BamWriterPrivate::CalculateMinimumBin(const int begin, int end) const {
-    --end;
-    if ( (begin >> 14) == (end >> 14) ) return 4681 + (begin >> 14);
-    if ( (begin >> 17) == (end >> 17) ) return  585 + (begin >> 17);
-    if ( (begin >> 20) == (end >> 20) ) return   73 + (begin >> 20);
-    if ( (begin >> 23) == (end >> 23) ) return    9 + (begin >> 23);
-    if ( (begin >> 26) == (end >> 26) ) return    1 + (begin >> 26);
-    return 0;
-}
-
-// closes the alignment archive
-void BamWriterPrivate::Close(void) {
-
-    // skip if file not open
-    if ( !IsOpen() ) return;
-
-    // close output stream
-    try {
-        m_stream.Close();
-    } catch ( BamException& e ) {
-        m_errorString = e.what();
-    }
-}
-
-// creates a cigar string from the supplied alignment
-void BamWriterPrivate::CreatePackedCigar(const vector<CigarOp>& cigarOperations, string& packedCigar) {
-
-    // initialize
-    const size_t numCigarOperations = cigarOperations.size();
-    packedCigar.resize(numCigarOperations * Constants::BAM_SIZEOF_INT);
-
-    // pack the cigar data into the string
-    unsigned int* pPackedCigar = (unsigned int*)packedCigar.data();
-
-    // iterate over cigar operations
-    vector<CigarOp>::const_iterator coIter = cigarOperations.begin();
-    vector<CigarOp>::const_iterator coEnd  = cigarOperations.end();
-    for ( ; coIter != coEnd; ++coIter ) {
-
-        // store op in packedCigar
-        uint8_t cigarOp;
-        switch ( coIter->Type ) {
-            case (Constants::BAM_CIGAR_MATCH_CHAR)    : cigarOp = Constants::BAM_CIGAR_MATCH;    break;
-            case (Constants::BAM_CIGAR_INS_CHAR)      : cigarOp = Constants::BAM_CIGAR_INS;      break;
-            case (Constants::BAM_CIGAR_DEL_CHAR)      : cigarOp = Constants::BAM_CIGAR_DEL;      break;
-            case (Constants::BAM_CIGAR_REFSKIP_CHAR)  : cigarOp = Constants::BAM_CIGAR_REFSKIP;  break;
-            case (Constants::BAM_CIGAR_SOFTCLIP_CHAR) : cigarOp = Constants::BAM_CIGAR_SOFTCLIP; break;
-            case (Constants::BAM_CIGAR_HARDCLIP_CHAR) : cigarOp = Constants::BAM_CIGAR_HARDCLIP; break;
-            case (Constants::BAM_CIGAR_PAD_CHAR)      : cigarOp = Constants::BAM_CIGAR_PAD;      break;
-            case (Constants::BAM_CIGAR_SEQMATCH_CHAR) : cigarOp = Constants::BAM_CIGAR_SEQMATCH; break;
-            case (Constants::BAM_CIGAR_MISMATCH_CHAR) : cigarOp = Constants::BAM_CIGAR_MISMATCH; break;
-            default:
-                const string message = string("invalid CIGAR operation type") + coIter->Type;
-                throw BamException("BamWriter::CreatePackedCigar", message);
-        }
-
-        *pPackedCigar = coIter->Length << Constants::BAM_CIGAR_SHIFT | cigarOp;
-        pPackedCigar++;
-    }
-}
-
-// encodes the supplied query sequence into 4-bit notation
-void BamWriterPrivate::EncodeQuerySequence(const string& query, string& encodedQuery) {
-
-    // prepare the encoded query string
-    const size_t queryLength = query.size();
-    const size_t encodedQueryLength = static_cast<size_t>((queryLength+1)/2);
-    encodedQuery.resize(encodedQueryLength);
-    char* pEncodedQuery = (char*)encodedQuery.data();
-    const char* pQuery = (const char*)query.data();
-
-    // walk through original query sequence, encoding its bases
-    unsigned char nucleotideCode;
-    bool useHighWord = true;
-    while ( *pQuery ) {
-        switch ( *pQuery ) {
-            case (Constants::BAM_DNA_EQUAL) : nucleotideCode = Constants::BAM_BASECODE_EQUAL; break;
-            case (Constants::BAM_DNA_A)     : nucleotideCode = Constants::BAM_BASECODE_A;     break;
-            case (Constants::BAM_DNA_C)     : nucleotideCode = Constants::BAM_BASECODE_C;     break;
-            case (Constants::BAM_DNA_M)     : nucleotideCode = Constants::BAM_BASECODE_M;     break;
-            case (Constants::BAM_DNA_G)     : nucleotideCode = Constants::BAM_BASECODE_G;     break;
-            case (Constants::BAM_DNA_R)     : nucleotideCode = Constants::BAM_BASECODE_R;     break;
-            case (Constants::BAM_DNA_S)     : nucleotideCode = Constants::BAM_BASECODE_S;     break;
-            case (Constants::BAM_DNA_V)     : nucleotideCode = Constants::BAM_BASECODE_V;     break;
-            case (Constants::BAM_DNA_T)     : nucleotideCode = Constants::BAM_BASECODE_T;     break;
-            case (Constants::BAM_DNA_W)     : nucleotideCode = Constants::BAM_BASECODE_W;     break;
-            case (Constants::BAM_DNA_Y)     : nucleotideCode = Constants::BAM_BASECODE_Y;     break;
-            case (Constants::BAM_DNA_H)     : nucleotideCode = Constants::BAM_BASECODE_H;     break;
-            case (Constants::BAM_DNA_K)     : nucleotideCode = Constants::BAM_BASECODE_K;     break;
-            case (Constants::BAM_DNA_D)     : nucleotideCode = Constants::BAM_BASECODE_D;     break;
-            case (Constants::BAM_DNA_B)     : nucleotideCode = Constants::BAM_BASECODE_B;     break;
-            case (Constants::BAM_DNA_N)     : nucleotideCode = Constants::BAM_BASECODE_N;     break;
-            default:
-                const string message = string("invalid base: ") + *pQuery;
-                throw BamException("BamWriter::EncodeQuerySequence", message);
-        }
-
-        // pack the nucleotide code
-        if ( useHighWord ) {
-            *pEncodedQuery = nucleotideCode << 4;
-            useHighWord = false;
-        } else {
-            *pEncodedQuery |= nucleotideCode;
-            ++pEncodedQuery;
-            useHighWord = true;
-        }
-
-        // increment the query position
-        ++pQuery;
-    }
-}
-
-// returns a description of the last error that occurred
-std::string BamWriterPrivate::GetErrorString(void) const {
-    return m_errorString;
-}
-
-// returns whether BAM file is open for writing or not
-bool BamWriterPrivate::IsOpen(void) const {
-    return m_stream.IsOpen();
-}
-
-// opens the alignment archive
-bool BamWriterPrivate::Open(const string& filename,
-                            const string& samHeaderText,
-                            const RefVector& referenceSequences)
-{
-    try {
-
-        // open the BGZF file for writing
-        m_stream.Open(filename, IBamIODevice::WriteOnly);
-
-        // write BAM file 'metadata' components
-        WriteMagicNumber();
-        WriteSamHeaderText(samHeaderText);
-        WriteReferences(referenceSequences);
-
-        // return success
-        return true;
-
-    } catch ( BamException& e ) {
-        m_errorString = e.what();
-        return false;
-    }
-}
-
-// saves the alignment to the alignment archive
-bool BamWriterPrivate::SaveAlignment(const BamAlignment& al) {
-
-    try {
-
-        // if BamAlignment contains only the core data and a raw char data buffer
-        // (as a result of BamReader::GetNextAlignmentCore())
-        if ( al.SupportData.HasCoreOnly )
-            WriteCoreAlignment(al);
-
-        // otherwise, BamAlignment should contain character in the standard fields: Name, QueryBases, etc
-        // (resulting from BamReader::GetNextAlignment() *OR* being generated directly by client code)
-        else WriteAlignment(al);
-
-        // if we get here, everything OK
-        return true;
-
-    } catch ( BamException& e ) {
-        m_errorString = e.what();
-        return false;
-    }
-}
-
-void BamWriterPrivate::SetWriteCompressed(bool ok) {
-    // modifying compression is not allowed if BAM file is open
-    if ( !IsOpen() )
-        m_stream.SetWriteCompressed(ok);
-}
-
-void BamWriterPrivate::WriteAlignment(const BamAlignment& al) {
-
-    // calculate char lengths
-    const unsigned int nameLength         = al.Name.size() + 1;
-    const unsigned int numCigarOperations = al.CigarData.size();
-    const unsigned int queryLength        = al.QueryBases.size();
-    const unsigned int tagDataLength      = al.TagData.size();
-
-    // no way to tell if alignment's bin is already defined (there is no default, invalid value)
-    // so we'll go ahead calculate its bin ID before storing
-    const uint32_t alignmentBin = CalculateMinimumBin(al.Position, al.GetEndPosition());
-
-    // create our packed cigar string
-    string packedCigar;
-    CreatePackedCigar(al.CigarData, packedCigar);
-    const unsigned int packedCigarLength = packedCigar.size();
-
-    // encode the query
-    string encodedQuery;
-    EncodeQuerySequence(al.QueryBases, encodedQuery);
-    const unsigned int encodedQueryLength = encodedQuery.size();
-
-    // write the block size
-    const unsigned int dataBlockSize = nameLength +
-                                       packedCigarLength +
-                                       encodedQueryLength +
-                                       queryLength +
-                                       tagDataLength;
-    unsigned int blockSize = Constants::BAM_CORE_SIZE + dataBlockSize;
-    if ( m_isBigEndian ) BamTools::SwapEndian_32(blockSize);
-    m_stream.Write((char*)&blockSize, Constants::BAM_SIZEOF_INT);
-
-    // assign the BAM core data
-    uint32_t buffer[Constants::BAM_CORE_BUFFER_SIZE];
-    buffer[0] = al.RefID;
-    buffer[1] = al.Position;
-    buffer[2] = (alignmentBin << 16) | (al.MapQuality << 8) | nameLength;
-    buffer[3] = (al.AlignmentFlag << 16) | numCigarOperations;
-    buffer[4] = queryLength;
-    buffer[5] = al.MateRefID;
-    buffer[6] = al.MatePosition;
-    buffer[7] = al.InsertSize;
-
-    // swap BAM core endian-ness, if necessary
-    if ( m_isBigEndian ) {
-        for ( int i = 0; i < 8; ++i )
-            BamTools::SwapEndian_32(buffer[i]);
-    }
-
-    // write the BAM core
-    m_stream.Write((char*)&buffer, Constants::BAM_CORE_SIZE);
-
-    // write the query name
-    m_stream.Write(al.Name.c_str(), nameLength);
-
-    // write the packed cigar
-    if ( m_isBigEndian ) {
-        char* cigarData = new char[packedCigarLength]();
-        memcpy(cigarData, packedCigar.data(), packedCigarLength);
-        if ( m_isBigEndian ) {
-            for ( size_t i = 0; i < packedCigarLength; ++i )
-                BamTools::SwapEndian_32p(&cigarData[i]);
-        }
-        m_stream.Write(cigarData, packedCigarLength);
-        delete[] cigarData; // TODO: cleanup on Write exception thrown?
-    }
-    else
-        m_stream.Write(packedCigar.data(), packedCigarLength);
-
-    // write the encoded query sequence
-    m_stream.Write(encodedQuery.data(), encodedQueryLength);
-
-    // write the base qualities
-    char* pBaseQualities = (char*)al.Qualities.data();
-    for ( size_t i = 0; i < queryLength; ++i )
-        pBaseQualities[i] -= 33; // FASTQ conversion
-    m_stream.Write(pBaseQualities, queryLength);
-
-    // write the read group tag
-    if ( m_isBigEndian ) {
-
-        char* tagData = new char[tagDataLength]();
-        memcpy(tagData, al.TagData.data(), tagDataLength);
-
-        size_t i = 0;
-        while ( i < tagDataLength ) {
-
-            i += Constants::BAM_TAG_TAGSIZE;  // skip tag chars (e.g. "RG", "NM", etc.)
-            const char type = tagData[i];     // get tag type at position i
-            ++i;
-
-            switch ( type ) {
-
-                case(Constants::BAM_TAG_TYPE_ASCII) :
-                case(Constants::BAM_TAG_TYPE_INT8)  :
-                case(Constants::BAM_TAG_TYPE_UINT8) :
-                    ++i;
-                    break;
-
-                case(Constants::BAM_TAG_TYPE_INT16)  :
-                case(Constants::BAM_TAG_TYPE_UINT16) :
-                    BamTools::SwapEndian_16p(&tagData[i]);
-                    i += sizeof(uint16_t);
-                    break;
-
-                case(Constants::BAM_TAG_TYPE_FLOAT)  :
-                case(Constants::BAM_TAG_TYPE_INT32)  :
-                case(Constants::BAM_TAG_TYPE_UINT32) :
-                    BamTools::SwapEndian_32p(&tagData[i]);
-                    i += sizeof(uint32_t);
-                    break;
-
-                case(Constants::BAM_TAG_TYPE_HEX) :
-                case(Constants::BAM_TAG_TYPE_STRING) :
-                    // no endian swapping necessary for hex-string/string data
-                    while ( tagData[i] )
-                        ++i;
-                    // increment one more for null terminator
-                    ++i;
-                    break;
-
-                case(Constants::BAM_TAG_TYPE_ARRAY) :
-
-                {
-                    // read array type
-                    const char arrayType = tagData[i];
-                    ++i;
-
-                    // swap endian-ness of number of elements in place, then retrieve for loop
-                    BamTools::SwapEndian_32p(&tagData[i]);
-                    int32_t numElements;
-                    memcpy(&numElements, &tagData[i], sizeof(uint32_t));
-                    i += sizeof(uint32_t);
-
-                    // swap endian-ness of array elements
-                    for ( int j = 0; j < numElements; ++j ) {
-                        switch (arrayType) {
-                            case (Constants::BAM_TAG_TYPE_INT8)  :
-                            case (Constants::BAM_TAG_TYPE_UINT8) :
-                                // no endian-swapping necessary
-                                ++i;
-                                break;
-                            case (Constants::BAM_TAG_TYPE_INT16)  :
-                            case (Constants::BAM_TAG_TYPE_UINT16) :
-                                BamTools::SwapEndian_16p(&tagData[i]);
-                                i += sizeof(uint16_t);
-                                break;
-                            case (Constants::BAM_TAG_TYPE_FLOAT)  :
-                            case (Constants::BAM_TAG_TYPE_INT32)  :
-                            case (Constants::BAM_TAG_TYPE_UINT32) :
-                                BamTools::SwapEndian_32p(&tagData[i]);
-                                i += sizeof(uint32_t);
-                                break;
-                            default:
-                                delete[] tagData;
-                                const string message = string("invalid binary array type: ") + arrayType;
-                                throw BamException("BamWriter::SaveAlignment", message);
-                        }
-                    }
-
-                    break;
-                }
-
-                default :
-                    delete[] tagData;
-                    const string message = string("invalid tag type: ") + type;
-                    throw BamException("BamWriter::SaveAlignment", message);
-            }
-        }
-
-        m_stream.Write(tagData, tagDataLength);
-        delete[] tagData; // TODO: cleanup on Write exception thrown?
-    }
-    else
-        m_stream.Write(al.TagData.data(), tagDataLength);
-}
-
-void BamWriterPrivate::WriteCoreAlignment(const BamAlignment& al) {
-
-    // write the block size
-    unsigned int blockSize = al.SupportData.BlockLength;
-    if ( m_isBigEndian ) BamTools::SwapEndian_32(blockSize);
-    m_stream.Write((char*)&blockSize, Constants::BAM_SIZEOF_INT);
-
-    // re-calculate bin (in case BamAlignment's position has been previously modified)
-    const uint32_t alignmentBin = CalculateMinimumBin(al.Position, al.GetEndPosition());
-
-    // assign the BAM core data
-    uint32_t buffer[Constants::BAM_CORE_BUFFER_SIZE];
-    buffer[0] = al.RefID;
-    buffer[1] = al.Position;
-    buffer[2] = (alignmentBin << 16) | (al.MapQuality << 8) | al.SupportData.QueryNameLength;
-    buffer[3] = (al.AlignmentFlag << 16) | al.SupportData.NumCigarOperations;
-    buffer[4] = al.SupportData.QuerySequenceLength;
-    buffer[5] = al.MateRefID;
-    buffer[6] = al.MatePosition;
-    buffer[7] = al.InsertSize;
-
-    // swap BAM core endian-ness, if necessary
-    if ( m_isBigEndian ) {
-        for ( int i = 0; i < 8; ++i )
-            BamTools::SwapEndian_32(buffer[i]);
-    }
-
-    // write the BAM core
-    m_stream.Write((char*)&buffer, Constants::BAM_CORE_SIZE);
-
-    // write the raw char data
-    m_stream.Write((char*)al.SupportData.AllCharData.data(),
-                   al.SupportData.BlockLength-Constants::BAM_CORE_SIZE);
-}
-
-void BamWriterPrivate::WriteMagicNumber(void) {
-    // write BAM file 'magic number'
-    m_stream.Write(Constants::BAM_HEADER_MAGIC, Constants::BAM_HEADER_MAGIC_LENGTH);
-}
-
-void BamWriterPrivate::WriteReferences(const BamTools::RefVector& referenceSequences) {
-
-    // write the number of reference sequences
-    uint32_t numReferenceSequences = referenceSequences.size();
-    if ( m_isBigEndian ) BamTools::SwapEndian_32(numReferenceSequences);
-    m_stream.Write((char*)&numReferenceSequences, Constants::BAM_SIZEOF_INT);
-
-    // foreach reference sequence
-    RefVector::const_iterator rsIter = referenceSequences.begin();
-    RefVector::const_iterator rsEnd  = referenceSequences.end();
-    for ( ; rsIter != rsEnd; ++rsIter ) {
-
-        // write the reference sequence name length
-        uint32_t referenceSequenceNameLen = rsIter->RefName.size() + 1;
-        if ( m_isBigEndian ) BamTools::SwapEndian_32(referenceSequenceNameLen);
-        m_stream.Write((char*)&referenceSequenceNameLen, Constants::BAM_SIZEOF_INT);
-
-        // write the reference sequence name
-        m_stream.Write(rsIter->RefName.c_str(), referenceSequenceNameLen);
-
-        // write the reference sequence length
-        int32_t referenceLength = rsIter->RefLength;
-        if ( m_isBigEndian ) BamTools::SwapEndian_32(referenceLength);
-        m_stream.Write((char*)&referenceLength, Constants::BAM_SIZEOF_INT);
-    }
-}
-
-void BamWriterPrivate::WriteSamHeaderText(const std::string& samHeaderText) {
-
-    // write the SAM header  text length
-    uint32_t samHeaderLen = samHeaderText.size();
-    if ( m_isBigEndian ) BamTools::SwapEndian_32(samHeaderLen);
-    m_stream.Write((char*)&samHeaderLen, Constants::BAM_SIZEOF_INT);
-
-    // write the SAM header text
-    if ( samHeaderLen > 0 )
-        m_stream.Write(samHeaderText.data(), samHeaderLen);
-}
diff --git a/src/api/internal/BamWriter_p.h b/src/api/internal/BamWriter_p.h
deleted file mode 100644 (file)
index cf10941..0000000
+++ /dev/null
@@ -1,73 +0,0 @@
-// ***************************************************************************
-// BamWriter_p.h (c) 2010 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides the basic functionality for producing BAM files
-// ***************************************************************************
-
-#ifndef BAMWRITER_P_H
-#define BAMWRITER_P_H
-
-//  -------------
-//  W A R N I N G
-//  -------------
-//
-// This file is not part of the BamTools API.  It exists purely as an
-// implementation detail.  This header file may change from version to
-// version without notice, or even be removed.
-//
-// We mean it.
-
-#include "api/BamAux.h"
-#include "api/internal/BgzfStream_p.h"
-#include <string>
-#include <vector>
-
-namespace BamTools {
-
-class BamAlignment;
-
-namespace Internal {
-
-class BamWriterPrivate {
-
-    // ctor & dtor
-    public:
-        BamWriterPrivate(void);
-        ~BamWriterPrivate(void);
-
-    // interface methods
-    public:
-        void Close(void);
-        std::string GetErrorString(void) const;
-        bool IsOpen(void) const;
-        bool Open(const std::string& filename,
-                  const std::string& samHeaderText,
-                  const BamTools::RefVector& referenceSequences);
-        bool SaveAlignment(const BamAlignment& al);
-        void SetWriteCompressed(bool ok);
-
-    // 'internal' methods
-    public:
-        uint32_t CalculateMinimumBin(const int begin, int end) const;
-        void CreatePackedCigar(const std::vector<BamTools::CigarOp>& cigarOperations, std::string& packedCigar);
-        void EncodeQuerySequence(const std::string& query, std::string& encodedQuery);
-        void WriteAlignment(const BamAlignment& al);
-        void WriteCoreAlignment(const BamAlignment& al);
-        void WriteMagicNumber(void);
-        void WriteReferences(const BamTools::RefVector& referenceSequences);
-        void WriteSamHeaderText(const std::string& samHeaderText);
-
-    // data members
-    private:
-        BgzfStream m_stream;
-        bool m_isBigEndian;
-        std::string m_errorString;
-};
-
-} // namespace Internal
-} // namespace BamTools
-
-#endif // BAMWRITER_P_H
diff --git a/src/api/internal/BgzfStream_p.cpp b/src/api/internal/BgzfStream_p.cpp
deleted file mode 100644 (file)
index 5891067..0000000
+++ /dev/null
@@ -1,460 +0,0 @@
-// ***************************************************************************
-// BgzfStream_p.cpp (c) 2011 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 11 October 2011(DB)
-// ---------------------------------------------------------------------------
-// Based on BGZF routines developed at the Broad Institute.
-// Provides the basic functionality for reading & writing BGZF files
-// Replaces the old BGZF.* files to avoid clashing with other toolkits
-// ***************************************************************************
-
-#include "api/BamAux.h"
-#include "api/BamConstants.h"
-#include "api/internal/BamDeviceFactory_p.h"
-#include "api/internal/BamException_p.h"
-#include "api/internal/BgzfStream_p.h"
-using namespace BamTools;
-using namespace BamTools::Internal;
-
-#include "zlib.h"
-
-#include <cstring>
-#include <algorithm>
-#include <iostream>
-#include <sstream>
-using namespace std;
-
-// ----------------------------
-// RaiiWrapper implementation
-// ----------------------------
-
-BgzfStream::RaiiWrapper::RaiiWrapper(void) {
-    CompressedBlock   = new char[Constants::BGZF_MAX_BLOCK_SIZE];
-    UncompressedBlock = new char[Constants::BGZF_DEFAULT_BLOCK_SIZE];
-}
-
-BgzfStream::RaiiWrapper::~RaiiWrapper(void) {
-
-    // clean up buffers
-    delete[] CompressedBlock;
-    delete[] UncompressedBlock;
-    CompressedBlock = 0;
-    UncompressedBlock = 0;
-}
-
-// ---------------------------
-// BgzfStream implementation
-// ---------------------------
-
-// constructor
-BgzfStream::BgzfStream(void)
-  : m_blockLength(0)
-  , m_blockOffset(0)
-  , m_blockAddress(0)
-  , m_isWriteCompressed(true)
-  , m_device(0)
-{ }
-
-// destructor
-BgzfStream::~BgzfStream(void) {
-    Close();
-}
-
-// checks BGZF block header
-bool BgzfStream::CheckBlockHeader(char* header) {
-    return (header[0] == Constants::GZIP_ID1 &&
-            header[1] == Constants::GZIP_ID2 &&
-            header[2] == Z_DEFLATED &&
-            (header[3] & Constants::FLG_FEXTRA) != 0 &&
-            BamTools::UnpackUnsignedShort(&header[10]) == Constants::BGZF_XLEN &&
-            header[12] == Constants::BGZF_ID1 &&
-            header[13] == Constants::BGZF_ID2 &&
-            BamTools::UnpackUnsignedShort(&header[14]) == Constants::BGZF_LEN );
-}
-
-// closes BGZF file
-void BgzfStream::Close(void) {
-
-    // skip if no device open
-    if ( m_device == 0 ) return;
-
-    // if writing to file, flush the current BGZF block,
-    // then write an empty block (as EOF marker)
-    if ( m_device->IsOpen() && (m_device->Mode() == IBamIODevice::WriteOnly) ) {
-        FlushBlock();
-        const size_t blockLength = DeflateBlock();
-        m_device->Write(Resources.CompressedBlock, blockLength);
-    }
-
-    // close device
-    m_device->Close();
-    delete m_device;
-    m_device = 0;
-
-    // reset state
-    m_blockLength = 0;
-    m_blockOffset = 0;
-    m_blockAddress = 0;
-    m_isWriteCompressed = true;
-}
-
-// compresses the current block
-size_t BgzfStream::DeflateBlock(void) {
-
-    // initialize the gzip header
-    char* buffer = Resources.CompressedBlock;
-    memset(buffer, 0, 18);
-    buffer[0]  = Constants::GZIP_ID1;
-    buffer[1]  = Constants::GZIP_ID2;
-    buffer[2]  = Constants::CM_DEFLATE;
-    buffer[3]  = Constants::FLG_FEXTRA;
-    buffer[9]  = Constants::OS_UNKNOWN;
-    buffer[10] = Constants::BGZF_XLEN;
-    buffer[12] = Constants::BGZF_ID1;
-    buffer[13] = Constants::BGZF_ID2;
-    buffer[14] = Constants::BGZF_LEN;
-
-    // set compression level
-    const int compressionLevel = ( m_isWriteCompressed ? Z_DEFAULT_COMPRESSION : 0 );
-
-    // loop to retry for blocks that do not compress enough
-    int inputLength = m_blockOffset;
-    size_t compressedLength = 0;
-    const unsigned int bufferSize = Constants::BGZF_MAX_BLOCK_SIZE;
-
-    while ( true ) {
-
-        // initialize zstream values
-        z_stream zs;
-        zs.zalloc    = NULL;
-        zs.zfree     = NULL;
-        zs.next_in   = (Bytef*)Resources.UncompressedBlock;
-        zs.avail_in  = inputLength;
-        zs.next_out  = (Bytef*)&buffer[Constants::BGZF_BLOCK_HEADER_LENGTH];
-        zs.avail_out = bufferSize -
-                       Constants::BGZF_BLOCK_HEADER_LENGTH -
-                       Constants::BGZF_BLOCK_FOOTER_LENGTH;
-
-        // initialize the zlib compression algorithm
-        int status = deflateInit2(&zs,
-                                  compressionLevel,
-                                  Z_DEFLATED,
-                                  Constants::GZIP_WINDOW_BITS,
-                                  Constants::Z_DEFAULT_MEM_LEVEL,
-                                  Z_DEFAULT_STRATEGY);
-        if ( status != Z_OK )
-            throw BamException("BgzfStream::DeflateBlock", "zlib deflateInit2 failed");
-
-        // compress the data
-        status = deflate(&zs, Z_FINISH);
-
-        // if not at stream end
-        if ( status != Z_STREAM_END ) {
-
-            deflateEnd(&zs);
-
-            // there was not enough space available in buffer
-            // try to reduce the input length & re-start loop
-            if ( status == Z_OK ) {
-                inputLength -= 1024;
-                if ( inputLength < 0 )
-                    throw BamException("BgzfStream::DeflateBlock", "input reduction failed");
-                continue;
-            }
-
-            throw BamException("BgzfStream::DeflateBlock", "zlib deflate failed");
-        }
-
-        // finalize the compression routine
-        status = deflateEnd(&zs);
-        if ( status != Z_OK )
-            throw BamException("BgzfStream::DeflateBlock", "zlib deflateEnd failed");
-
-        // update compressedLength
-        compressedLength = zs.total_out +
-                           Constants::BGZF_BLOCK_HEADER_LENGTH +
-                           Constants::BGZF_BLOCK_FOOTER_LENGTH;
-        if ( compressedLength > Constants::BGZF_MAX_BLOCK_SIZE )
-            throw BamException("BgzfStream::DeflateBlock", "deflate overflow");
-
-        // quit while loop
-        break;
-    }
-
-    // store the compressed length
-    BamTools::PackUnsignedShort(&buffer[16], static_cast<uint16_t>(compressedLength - 1));
-
-    // store the CRC32 checksum
-    uint32_t crc = crc32(0, NULL, 0);
-    crc = crc32(crc, (Bytef*)Resources.UncompressedBlock, inputLength);
-    BamTools::PackUnsignedInt(&buffer[compressedLength - 8], crc);
-    BamTools::PackUnsignedInt(&buffer[compressedLength - 4], inputLength);
-
-    // ensure that we have less than a block of data left
-    int remaining = m_blockOffset - inputLength;
-    if ( remaining > 0 ) {
-        if ( remaining > inputLength )
-            throw BamException("BgzfStream::DeflateBlock", "after deflate, remainder too large");
-        memcpy(Resources.UncompressedBlock, Resources.UncompressedBlock + inputLength, remaining);
-    }
-
-    // update block data
-    m_blockOffset = remaining;
-
-    // return result
-    return compressedLength;
-}
-
-// flushes the data in the BGZF block
-void BgzfStream::FlushBlock(void) {
-
-    BT_ASSERT_X( m_device, "BgzfStream::FlushBlock() - attempting to flush to null device" );
-
-    // flush all of the remaining blocks
-    while ( m_blockOffset > 0 ) {
-
-        // compress the data block
-        const size_t blockLength = DeflateBlock();
-
-        // flush the data to our output device
-        const size_t numBytesWritten = m_device->Write(Resources.CompressedBlock, blockLength);
-        if ( numBytesWritten != blockLength ) {
-            stringstream s("");
-            s << "expected to write " << blockLength
-              << " bytes during flushing, but wrote " << numBytesWritten;
-            throw BamException("BgzfStream::FlushBlock", s.str());
-        }
-
-        // update block data
-        m_blockAddress += blockLength;
-    }
-}
-
-// decompresses the current block
-size_t BgzfStream::InflateBlock(const size_t& blockLength) {
-
-    // setup zlib stream object
-    z_stream zs;
-    zs.zalloc    = NULL;
-    zs.zfree     = NULL;
-    zs.next_in   = (Bytef*)Resources.CompressedBlock + 18;
-    zs.avail_in  = blockLength - 16;
-    zs.next_out  = (Bytef*)Resources.UncompressedBlock;
-    zs.avail_out = Constants::BGZF_DEFAULT_BLOCK_SIZE;
-
-    // initialize
-    int status = inflateInit2(&zs, Constants::GZIP_WINDOW_BITS);
-    if ( status != Z_OK )
-        throw BamException("BgzfStream::InflateBlock", "zlib inflateInit failed");
-
-    // decompress
-    status = inflate(&zs, Z_FINISH);
-    if ( status != Z_STREAM_END ) {
-        inflateEnd(&zs);
-        throw BamException("BgzfStream::InflateBlock", "zlib inflate failed");
-    }
-
-    // finalize
-    status = inflateEnd(&zs);
-    if ( status != Z_OK ) {
-        inflateEnd(&zs);
-        throw BamException("BgzfStream::InflateBlock", "zlib inflateEnd failed");
-    }
-
-    // return result
-    return zs.total_out;
-}
-
-bool BgzfStream::IsOpen(void) const {
-    if ( m_device == 0 )
-        return false;
-    return m_device->IsOpen();
-}
-
-void BgzfStream::Open(const string& filename, const IBamIODevice::OpenMode mode) {
-
-    // close current device if necessary
-    Close();
-    BT_ASSERT_X( (m_device == 0), "BgzfStream::Open() - unable to properly close previous IO device" );
-
-    // retrieve new IO device depending on filename
-    m_device = BamDeviceFactory::CreateDevice(filename);
-    BT_ASSERT_X( m_device, "BgzfStream::Open() - unable to create IO device from filename" );
-
-    // if device fails to open
-    if ( !m_device->Open(mode) ) {
-        const string deviceError = m_device->GetErrorString();
-        const string message = string("could not open BGZF stream: \n\t") + deviceError;
-        throw BamException("BgzfStream::Open", message);
-    }
-}
-
-// reads BGZF data into a byte buffer
-size_t BgzfStream::Read(char* data, const size_t dataLength) {
-
-    if ( dataLength == 0 )
-        return 0;
-
-    // if stream not open for reading
-    BT_ASSERT_X( m_device, "BgzfStream::Read() - trying to read from null device");
-    if ( !m_device->IsOpen() || (m_device->Mode() != IBamIODevice::ReadOnly) )
-        return 0;
-
-    // read blocks as needed until desired data length is retrieved
-    char* output = data;
-    size_t numBytesRead = 0;
-    while ( numBytesRead < dataLength ) {
-
-        // determine bytes available in current block
-        int bytesAvailable = m_blockLength - m_blockOffset;
-
-        // read (and decompress) next block if needed
-        if ( bytesAvailable <= 0 ) {
-            ReadBlock();
-            bytesAvailable = m_blockLength - m_blockOffset;
-            if ( bytesAvailable <= 0 )
-                break;
-        }
-
-        // copy data from uncompressed source buffer into data destination buffer
-        const size_t copyLength = min( (dataLength-numBytesRead), (size_t)bytesAvailable );
-        memcpy(output, Resources.UncompressedBlock + m_blockOffset, copyLength);
-
-        // update counters
-        m_blockOffset += copyLength;
-        output        += copyLength;
-        numBytesRead  += copyLength;
-    }
-
-    // update block data
-    if ( m_blockOffset == m_blockLength ) {
-        m_blockAddress = m_device->Tell();
-        m_blockOffset  = 0;
-        m_blockLength  = 0;
-
-    }
-
-    // return actual number of bytes read
-    return numBytesRead;
-}
-
-// reads a BGZF block
-void BgzfStream::ReadBlock(void) {
-
-    BT_ASSERT_X( m_device, "BgzfStream::ReadBlock() - trying to read from null IO device");
-
-    // store block's starting address
-    int64_t blockAddress = m_device->Tell();
-
-    // read block header from file
-    char header[Constants::BGZF_BLOCK_HEADER_LENGTH];
-    size_t numBytesRead = m_device->Read(header, Constants::BGZF_BLOCK_HEADER_LENGTH);
-
-    // if block header empty
-    if ( numBytesRead == 0 ) {
-        m_blockLength = 0;
-        return;
-    }
-
-    // if block header invalid size
-    if ( numBytesRead != Constants::BGZF_BLOCK_HEADER_LENGTH )
-        throw BamException("BgzfStream::ReadBlock", "invalid block header size");
-
-    // validate block header contents
-    if ( !BgzfStream::CheckBlockHeader(header) )
-        throw BamException("BgzfStream::ReadBlock", "invalid block header contents");
-
-    // copy header contents to compressed buffer
-    const size_t blockLength = BamTools::UnpackUnsignedShort(&header[16]) + 1;
-    memcpy(Resources.CompressedBlock, header, Constants::BGZF_BLOCK_HEADER_LENGTH);
-
-    // read remainder of block
-    const size_t remaining = blockLength - Constants::BGZF_BLOCK_HEADER_LENGTH;
-    numBytesRead = m_device->Read(&Resources.CompressedBlock[Constants::BGZF_BLOCK_HEADER_LENGTH], remaining);
-    if ( numBytesRead != remaining )
-        throw BamException("BgzfStream::ReadBlock", "could not read data from block");
-
-    // decompress block data
-    numBytesRead = InflateBlock(blockLength);
-
-    // update block data
-    if ( m_blockLength != 0 )
-        m_blockOffset = 0;
-    m_blockAddress = blockAddress;
-    m_blockLength  = numBytesRead;
-}
-
-// seek to position in BGZF file
-void BgzfStream::Seek(const int64_t& position) {
-
-    BT_ASSERT_X( m_device, "BgzfStream::Seek() - trying to seek on null IO device");
-
-    // skip if device is not open
-    if ( !IsOpen() ) return;
-
-    // determine adjusted offset & address
-    int     blockOffset  = (position & 0xFFFF);
-    int64_t blockAddress = (position >> 16) & 0xFFFFFFFFFFFFLL;
-
-    // attempt seek in file
-    if ( m_device->IsRandomAccess() && m_device->Seek(blockAddress) ) {
-
-        // update block data & return success
-        m_blockLength  = 0;
-        m_blockAddress = blockAddress;
-        m_blockOffset  = blockOffset;
-    }
-    else {
-        stringstream s("");
-        s << "unable to seek to position: " << position;
-        throw BamException("BgzfStream::Seek", s.str());
-    }
-}
-
-void BgzfStream::SetWriteCompressed(bool ok) {
-    m_isWriteCompressed = ok;
-}
-
-// get file position in BGZF file
-int64_t BgzfStream::Tell(void) const {
-    if ( !IsOpen() )
-        return 0;
-    return ( (m_blockAddress << 16) | (m_blockOffset & 0xFFFF) );
-}
-
-// writes the supplied data into the BGZF buffer
-size_t BgzfStream::Write(const char* data, const size_t dataLength) {
-
-    BT_ASSERT_X( m_device, "BgzfStream::Write() - trying to write to null IO device");
-    BT_ASSERT_X( (m_device->Mode() == IBamIODevice::WriteOnly),
-                 "BgzfStream::Write() - trying to write to non-writable IO device");
-
-    // skip if file not open for writing
-    if ( !IsOpen() )
-        return 0;
-
-    // write blocks as needed til all data is written
-    size_t numBytesWritten = 0;
-    const char* input = data;
-    const size_t blockLength = Constants::BGZF_DEFAULT_BLOCK_SIZE;
-    while ( numBytesWritten < dataLength ) {
-
-        // copy data contents to uncompressed output buffer
-        unsigned int copyLength = min(blockLength - m_blockOffset, dataLength - numBytesWritten);
-        char* buffer = Resources.UncompressedBlock;
-        memcpy(buffer + m_blockOffset, input, copyLength);
-
-        // update counter
-        m_blockOffset   += copyLength;
-        input           += copyLength;
-        numBytesWritten += copyLength;
-
-        // flush (& compress) output buffer when full
-        if ( m_blockOffset == blockLength )
-            FlushBlock();
-    }
-
-    // return actual number of bytes written
-    return numBytesWritten;
-}
diff --git a/src/api/internal/BgzfStream_p.h b/src/api/internal/BgzfStream_p.h
deleted file mode 100644 (file)
index 88d7472..0000000
+++ /dev/null
@@ -1,97 +0,0 @@
-// ***************************************************************************
-// BgzfStream_p.h (c) 2011 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011(DB)
-// ---------------------------------------------------------------------------
-// Based on BGZF routines developed at the Broad Institute.
-// Provides the basic functionality for reading & writing BGZF files
-// Replaces the old BGZF.* files to avoid clashing with other toolkits
-// ***************************************************************************
-
-#ifndef BGZFSTREAM_P_H
-#define BGZFSTREAM_P_H
-
-//  -------------
-//  W A R N I N G
-//  -------------
-//
-// This file is not part of the BamTools API.  It exists purely as an
-// implementation detail. This header file may change from version to version
-// without notice, or even be removed.
-//
-// We mean it.
-
-#include "api/api_global.h"
-#include "api/IBamIODevice.h"
-#include <string>
-
-namespace BamTools {
-namespace Internal {
-
-class BgzfStream {
-
-    // constructor & destructor
-    public:
-        BgzfStream(void);
-        ~BgzfStream(void);
-
-    // main interface methods
-    public:
-        // closes BGZF file
-        void Close(void);
-        // returns true if BgzfStream open for IO
-        bool IsOpen(void) const;
-        // opens the BGZF file
-        void Open(const std::string& filename, const IBamIODevice::OpenMode mode);
-        // reads BGZF data into a byte buffer
-        size_t Read(char* data, const size_t dataLength);
-        // seek to position in BGZF file
-        void Seek(const int64_t& position);
-        // sets IO device (closes previous, if any, but does not attempt to open)
-        void SetIODevice(IBamIODevice* device);
-        // enable/disable compressed output
-        void SetWriteCompressed(bool ok);
-        // get file position in BGZF file
-        int64_t Tell(void) const;
-        // writes the supplied data into the BGZF buffer
-        size_t Write(const char* data, const size_t dataLength);
-
-    // internal methods
-    private:
-        // compresses the current block
-        size_t DeflateBlock(void);
-        // flushes the data in the BGZF block
-        void FlushBlock(void);
-        // de-compresses the current block
-        size_t InflateBlock(const size_t& blockLength);
-        // reads a BGZF block
-        void ReadBlock(void);
-
-    // static 'utility' methods
-    public:
-        // checks BGZF block header
-        static bool CheckBlockHeader(char* header);
-
-    // data members
-    public:
-        unsigned int m_blockLength;
-        unsigned int m_blockOffset;
-        uint64_t     m_blockAddress;
-
-        bool m_isWriteCompressed;
-        IBamIODevice* m_device;
-
-        struct RaiiWrapper {
-            RaiiWrapper(void);
-            ~RaiiWrapper(void);
-            char* UncompressedBlock;
-            char* CompressedBlock;
-        };
-        RaiiWrapper Resources;
-};
-
-} // namespace Internal
-} // namespace BamTools
-
-#endif // BGZFSTREAM_P_H
diff --git a/src/api/internal/ILocalIODevice_p.cpp b/src/api/internal/ILocalIODevice_p.cpp
deleted file mode 100644 (file)
index 8730a91..0000000
+++ /dev/null
@@ -1,56 +0,0 @@
-// ***************************************************************************
-// ILocalIODevice_p.cpp (c) 2011 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides shared behavior for files & pipes
-// ***************************************************************************
-
-#include "api/internal/ILocalIODevice_p.h"
-using namespace BamTools;
-using namespace BamTools::Internal;
-
-#include <cstdio>
-using namespace std;
-
-ILocalIODevice::ILocalIODevice(void)
-    : IBamIODevice()
-    , m_stream(0)
-{ }
-
-ILocalIODevice::~ILocalIODevice(void) {
-    Close();
-}
-
-void ILocalIODevice::Close(void) {
-
-    // skip if not open
-    if ( !IsOpen() )
-        return;
-
-    // flush & close FILE*
-    fflush(m_stream);
-    fclose(m_stream);
-    m_stream = 0;
-
-    // reset other device state
-    m_mode = IBamIODevice::NotOpen;
-}
-
-size_t ILocalIODevice::Read(char* data, const unsigned int numBytes) {
-    BT_ASSERT_X( m_stream, "ILocalIODevice::Read: trying to read from null stream" );
-    BT_ASSERT_X( (m_mode == IBamIODevice::ReadOnly), "ILocalIODevice::Read: device not in read-only mode");
-    return fread(data, sizeof(char), numBytes, m_stream);
-}
-
-int64_t ILocalIODevice::Tell(void) const {
-    BT_ASSERT_X( m_stream, "ILocalIODevice::Tell: trying to get file position fromnull stream" );
-    return ftell64(m_stream);
-}
-
-size_t ILocalIODevice::Write(const char* data, const unsigned int numBytes) {
-    BT_ASSERT_X( m_stream, "ILocalIODevice::Write: tryint to write to null stream" );
-    BT_ASSERT_X( (m_mode == IBamIODevice::WriteOnly), "ILocalIODevice::Write: device not in write-only mode" );
-    return fwrite(data, sizeof(char), numBytes, m_stream);
-}
diff --git a/src/api/internal/ILocalIODevice_p.h b/src/api/internal/ILocalIODevice_p.h
deleted file mode 100644 (file)
index a71f378..0000000
+++ /dev/null
@@ -1,50 +0,0 @@
-// ***************************************************************************
-// ILocalIODevice_p.h (c) 2011 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides shared behavior for files & pipes
-// ***************************************************************************
-
-#ifndef ILOCALIODEVICE_P_H
-#define ILOCALIODEVICE_P_H
-
-//  -------------
-//  W A R N I N G
-//  -------------
-//
-// This file is not part of the BamTools API.  It exists purely as an
-// implementation detail. This header file may change from version to version
-// without notice, or even be removed.
-//
-// We mean it.
-
-#include "api/IBamIODevice.h"
-
-namespace BamTools {
-namespace Internal {
-
-class ILocalIODevice : public IBamIODevice {
-
-    // ctor & dtor
-    public:
-        ILocalIODevice(void);
-        virtual ~ILocalIODevice(void);
-
-    // IBamIODevice implementation
-    public:
-        virtual void Close(void);
-        virtual size_t Read(char* data, const unsigned int numBytes);
-        virtual int64_t Tell(void) const;
-        virtual size_t Write(const char* data, const unsigned int numBytes);
-
-    // data members
-    protected:
-        FILE* m_stream;
-};
-
-} // namespace Internal
-} // namespace BamTools
-
-#endif // ILOCALIODEVICE_P_H
diff --git a/src/api/internal/IRemoteIODevice_p.cpp b/src/api/internal/IRemoteIODevice_p.cpp
deleted file mode 100644 (file)
index e69de29..0000000
diff --git a/src/api/internal/IRemoteIODevice_p.h b/src/api/internal/IRemoteIODevice_p.h
deleted file mode 100644 (file)
index e69de29..0000000
diff --git a/src/api/internal/SamFormatParser_p.cpp b/src/api/internal/SamFormatParser_p.cpp
deleted file mode 100644 (file)
index 195fdcd..0000000
+++ /dev/null
@@ -1,222 +0,0 @@
-// ***************************************************************************
-// SamFormatParser.cpp (c) 2010 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides functionality for parsing SAM header text into SamHeader object
-// ***************************************************************************
-
-#include "api/SamConstants.h"
-#include "api/SamHeader.h"
-#include "api/internal/BamException_p.h"
-#include "api/internal/SamFormatParser_p.h"
-using namespace BamTools;
-using namespace BamTools::Internal;
-
-#include <iostream>
-#include <sstream>
-#include <vector>
-using namespace std;
-
-SamFormatParser::SamFormatParser(SamHeader& header)
-    : m_header(header)
-{ }
-
-SamFormatParser::~SamFormatParser(void) { }
-
-void SamFormatParser::Parse(const string& headerText) {
-
-    // clear header's prior contents
-    m_header.Clear();
-
-    // empty header is OK, but skip processing
-    if ( headerText.empty() )
-        return;
-
-    // other wise parse SAM lines
-    istringstream headerStream(headerText);
-    string headerLine("");
-    while ( getline(headerStream, headerLine) )
-         ParseSamLine(headerLine);
-}
-
-void SamFormatParser::ParseSamLine(const string& line) {
-
-    // skip if line is not long enough to contain true values
-    if ( line.length() < 5 ) return;
-
-    // determine token at beginning of line
-    const string firstToken = line.substr(0,3);
-    string restOfLine = line.substr(4);
-    if      ( firstToken == Constants::SAM_HD_BEGIN_TOKEN) ParseHDLine(restOfLine);
-    else if ( firstToken == Constants::SAM_SQ_BEGIN_TOKEN) ParseSQLine(restOfLine);
-    else if ( firstToken == Constants::SAM_RG_BEGIN_TOKEN) ParseRGLine(restOfLine);
-    else if ( firstToken == Constants::SAM_PG_BEGIN_TOKEN) ParsePGLine(restOfLine);
-    else if ( firstToken == Constants::SAM_CO_BEGIN_TOKEN) ParseCOLine(restOfLine);
-    else {
-        const string message = string("unknown token: ") + firstToken;
-        throw BamException("SamFormatParser::ParseSamLine", message);
-    }
-}
-
-void SamFormatParser::ParseHDLine(const string& line) {
-
-    // split HD lines into tokens
-    vector<string> tokens = Split(line, Constants::SAM_TAB);
-
-    // iterate over tokens
-    vector<string>::const_iterator tokenIter = tokens.begin();
-    vector<string>::const_iterator tokenEnd  = tokens.end();
-    for ( ; tokenIter != tokenEnd; ++tokenIter ) {
-
-        // get tag/value
-        const string tokenTag = (*tokenIter).substr(0,2);
-        const string tokenValue = (*tokenIter).substr(3);
-
-        // set header contents
-        if      ( tokenTag == Constants::SAM_HD_VERSION_TAG    ) m_header.Version    = tokenValue;
-        else if ( tokenTag == Constants::SAM_HD_SORTORDER_TAG  ) m_header.SortOrder  = tokenValue;
-        else if ( tokenTag == Constants::SAM_HD_GROUPORDER_TAG ) m_header.GroupOrder = tokenValue;
-        else {
-            const string message = string("unknown HD tag: ") + tokenTag;
-            throw BamException("SamFormatParser::ParseHDLine", message);
-        }
-    }
-
-    // check for required tags
-    if ( !m_header.HasVersion() )
-        throw BamException("SamFormatParser::ParseHDLine", "@HD line is missing VN tag");
-}
-
-void SamFormatParser::ParseSQLine(const string& line) {
-
-    SamSequence seq;
-
-    // split SQ line into tokens
-    vector<string> tokens = Split(line, Constants::SAM_TAB);
-
-    // iterate over tokens
-    vector<string>::const_iterator tokenIter = tokens.begin();
-    vector<string>::const_iterator tokenEnd  = tokens.end();
-    for ( ; tokenIter != tokenEnd; ++tokenIter ) {
-
-        // get tag/value
-        const string tokenTag = (*tokenIter).substr(0,2);
-        const string tokenValue = (*tokenIter).substr(3);
-
-        // set sequence contents
-        if      ( tokenTag == Constants::SAM_SQ_NAME_TAG       ) seq.Name = tokenValue;
-        else if ( tokenTag == Constants::SAM_SQ_LENGTH_TAG     ) seq.Length = tokenValue;
-        else if ( tokenTag == Constants::SAM_SQ_ASSEMBLYID_TAG ) seq.AssemblyID = tokenValue;
-        else if ( tokenTag == Constants::SAM_SQ_CHECKSUM_TAG   ) seq.Checksum = tokenValue;
-        else if ( tokenTag == Constants::SAM_SQ_SPECIES_TAG    ) seq.Species = tokenValue;
-        else if ( tokenTag == Constants::SAM_SQ_URI_TAG        ) seq.URI = tokenValue;
-        else {
-            const string message = string("unknown SQ tag: ") + tokenTag;
-            throw BamException("SamFormatParser::ParseSQLine", message);
-        }
-    }
-
-    // check for required tags
-    if ( !seq.HasName() )
-        throw BamException("SamFormatParser::ParseSQLine", "@SQ line is missing SN tag");
-    if ( !seq.HasLength() )
-        throw BamException("SamFormatParser::ParseSQLine", "@SQ line is missing LN tag");
-
-    // store SAM sequence entry
-    m_header.Sequences.Add(seq);
-}
-
-void SamFormatParser::ParseRGLine(const string& line) {
-
-    SamReadGroup rg;
-
-    // split string into tokens
-    vector<string> tokens = Split(line, Constants::SAM_TAB);
-
-    // iterate over tokens
-    vector<string>::const_iterator tokenIter = tokens.begin();
-    vector<string>::const_iterator tokenEnd  = tokens.end();
-    for ( ; tokenIter != tokenEnd; ++tokenIter ) {
-
-        // get token tag/value
-        const string tokenTag = (*tokenIter).substr(0,2);
-        const string tokenValue = (*tokenIter).substr(3);
-
-        // set read group contents
-        if      ( tokenTag == Constants::SAM_RG_ID_TAG                  ) rg.ID = tokenValue;
-        else if ( tokenTag == Constants::SAM_RG_DESCRIPTION_TAG         ) rg.Description = tokenValue;
-        else if ( tokenTag == Constants::SAM_RG_FLOWORDER_TAG           ) rg.FlowOrder = tokenValue;
-        else if ( tokenTag == Constants::SAM_RG_KEYSEQUENCE_TAG         ) rg.KeySequence = tokenValue;
-        else if ( tokenTag == Constants::SAM_RG_LIBRARY_TAG             ) rg.Library = tokenValue;
-        else if ( tokenTag == Constants::SAM_RG_PLATFORMUNIT_TAG        ) rg.PlatformUnit = tokenValue;
-        else if ( tokenTag == Constants::SAM_RG_PREDICTEDINSERTSIZE_TAG ) rg.PredictedInsertSize = tokenValue;
-        else if ( tokenTag == Constants::SAM_RG_PRODUCTIONDATE_TAG      ) rg.ProductionDate = tokenValue;
-        else if ( tokenTag == Constants::SAM_RG_PROGRAM_TAG             ) rg.Program = tokenValue;
-        else if ( tokenTag == Constants::SAM_RG_SAMPLE_TAG              ) rg.Sample = tokenValue;
-        else if ( tokenTag == Constants::SAM_RG_SEQCENTER_TAG           ) rg.SequencingCenter = tokenValue;
-        else if ( tokenTag == Constants::SAM_RG_SEQTECHNOLOGY_TAG       ) rg.SequencingTechnology = tokenValue;
-        else {
-            const string message = string("unknown RG tag: ") + tokenTag;
-            throw BamException("SamFormatParser::ParseRGLine", message);
-        }
-    }
-
-    // check for required tags
-    if ( !rg.HasID() )
-        throw BamException("SamFormatParser::ParseRGLine", "@RG line is missing ID tag");
-
-    // store SAM read group entry
-    m_header.ReadGroups.Add(rg);
-}
-
-void SamFormatParser::ParsePGLine(const string& line) {
-
-    SamProgram pg;
-
-    // split string into tokens
-    vector<string> tokens = Split(line, Constants::SAM_TAB);
-
-    // iterate over tokens
-    vector<string>::const_iterator tokenIter = tokens.begin();
-    vector<string>::const_iterator tokenEnd  = tokens.end();
-    for ( ; tokenIter != tokenEnd; ++tokenIter ) {
-
-        // get token tag/value
-        const string tokenTag = (*tokenIter).substr(0,2);
-        const string tokenValue = (*tokenIter).substr(3);
-
-        // set program record contents
-        if      ( tokenTag == Constants::SAM_PG_ID_TAG              ) pg.ID = tokenValue;
-        else if ( tokenTag == Constants::SAM_PG_NAME_TAG            ) pg.Name = tokenValue;
-        else if ( tokenTag == Constants::SAM_PG_COMMANDLINE_TAG     ) pg.CommandLine = tokenValue;
-        else if ( tokenTag == Constants::SAM_PG_PREVIOUSPROGRAM_TAG ) pg.PreviousProgramID = tokenValue;
-        else if ( tokenTag == Constants::SAM_PG_VERSION_TAG         ) pg.Version = tokenValue;
-        else {
-            const string message = string("unknown PG tag: ") + tokenTag;
-            throw BamException("SamFormatParser::ParsePGLine", message);
-        }
-    }
-
-    // check for required tags
-    if ( !pg.HasID() )
-        throw BamException("SamFormatParser::ParsePGLine", "@PG line is missing ID tag");
-
-    // store SAM program entry
-    m_header.Programs.Add(pg);
-}
-
-void SamFormatParser::ParseCOLine(const string& line) {
-    // simply add line to comments list
-    m_header.Comments.push_back(line);
-}
-
-const vector<string> SamFormatParser::Split(const string& line, const char delim) {
-    vector<string> tokens;
-    stringstream lineStream(line);
-    string token;
-    while ( getline(lineStream, token, delim) )
-        tokens.push_back(token);
-    return tokens;
-}
diff --git a/src/api/internal/SamFormatParser_p.h b/src/api/internal/SamFormatParser_p.h
deleted file mode 100644 (file)
index cf6d54c..0000000
+++ /dev/null
@@ -1,61 +0,0 @@
-// ***************************************************************************
-// SamFormatParser.h (c) 2010 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 23 December 2010 (DB)
-// ---------------------------------------------------------------------------
-// Provides functionality for parsing SAM header text into SamHeader object
-// ***************************************************************************
-
-#ifndef SAM_FORMAT_PARSER_H
-#define SAM_FORMAT_PARSER_H
-
-//  -------------
-//  W A R N I N G
-//  -------------
-//
-// This file is not part of the BamTools API.  It exists purely as an
-// implementation detail. This header file may change from version to version
-// without notice, or even be removed.
-//
-// We mean it.
-
-#include <string>
-#include <vector>
-
-namespace BamTools {
-
-class SamHeader;
-
-namespace Internal {
-
-class SamFormatParser {
-
-    // ctor & dtor
-    public:
-        SamFormatParser(BamTools::SamHeader& header);
-        ~SamFormatParser(void);
-
-    // parse text & populate header data
-    public:
-        void Parse(const std::string& headerText);
-
-    // internal methods
-    private:
-        void ParseSamLine(const std::string& line);
-        void ParseHDLine(const std::string& line);
-        void ParseSQLine(const std::string& line);
-        void ParseRGLine(const std::string& line);
-        void ParsePGLine(const std::string& line);
-        void ParseCOLine(const std::string& line);
-        const std::vector<std::string> Split(const std::string& line, const char delim);
-
-    // data members
-    private:
-        SamHeader& m_header;
-};
-
-} // namespace Internal
-} // namespace BamTools
-
-#endif // SAM_FORMAT_PARSER_H
diff --git a/src/api/internal/SamFormatPrinter_p.cpp b/src/api/internal/SamFormatPrinter_p.cpp
deleted file mode 100644 (file)
index f9a118e..0000000
+++ /dev/null
@@ -1,219 +0,0 @@
-// ***************************************************************************
-// SamFormatPrinter.cpp (c) 2010 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 14 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides functionality for printing formatted SAM header to string
-// ***************************************************************************
-
-#include "api/SamConstants.h"
-#include "api/SamHeader.h"
-#include "api/internal/SamFormatPrinter_p.h"
-using namespace BamTools;
-using namespace BamTools::Internal;
-
-#include <iostream>
-#include <sstream>
-#include <vector>
-using namespace std;
-
-// ------------------------
-// static utility methods
-// ------------------------
-
-static inline
-const string FormatTag(const string& tag, const string& value) {
-    return string(Constants::SAM_TAB + tag + Constants::SAM_COLON + value);
-}
-
-// ---------------------------------
-// SamFormatPrinter implementation
-// ---------------------------------
-
-SamFormatPrinter::SamFormatPrinter(const SamHeader& header)
-    : m_header(header)
-{ }
-
-SamFormatPrinter::~SamFormatPrinter(void) { }
-
-const string SamFormatPrinter::ToString(void) const {
-
-    // clear out stream
-    stringstream out("");
-
-    // generate formatted header text
-    PrintHD(out);
-    PrintSQ(out);
-    PrintRG(out);
-    PrintPG(out);
-    PrintCO(out);
-
-    // return result
-    return out.str();
-}
-
-void SamFormatPrinter::PrintHD(std::stringstream& out) const {
-
-    // if header has @HD data
-    if ( m_header.HasVersion() ) {
-
-        // @HD VN:<Version>
-        out << Constants::SAM_HD_BEGIN_TOKEN
-            << FormatTag(Constants::SAM_HD_VERSION_TAG, m_header.Version);
-
-        // SO:<SortOrder>
-        if ( m_header.HasSortOrder() )
-            out << FormatTag(Constants::SAM_HD_SORTORDER_TAG, m_header.SortOrder);
-
-        // GO:<GroupOrder>
-        if ( m_header.HasGroupOrder() )
-            out << FormatTag(Constants::SAM_HD_GROUPORDER_TAG, m_header.GroupOrder);
-
-        // newline
-        out << endl;
-    }
-}
-
-void SamFormatPrinter::PrintSQ(std::stringstream& out) const {
-
-    // iterate over sequence entries
-    SamSequenceConstIterator seqIter = m_header.Sequences.ConstBegin();
-    SamSequenceConstIterator seqEnd  = m_header.Sequences.ConstEnd();
-    for ( ; seqIter != seqEnd; ++seqIter ) {
-        const SamSequence& seq = (*seqIter);
-
-        // @SQ SN:<Name> LN:<Length>
-        out << Constants::SAM_SQ_BEGIN_TOKEN
-            << FormatTag(Constants::SAM_SQ_NAME_TAG, seq.Name)
-            << FormatTag(Constants::SAM_SQ_LENGTH_TAG, seq.Length);
-
-        // AS:<AssemblyID>
-        if ( seq.HasAssemblyID() )
-            out << FormatTag(Constants::SAM_SQ_ASSEMBLYID_TAG, seq.AssemblyID);
-
-        // M5:<Checksum>
-        if ( seq.HasChecksum() )
-            out << FormatTag(Constants::SAM_SQ_CHECKSUM_TAG, seq.Checksum);
-
-        // SP:<Species>
-        if ( seq.HasSpecies() )
-            out << FormatTag(Constants::SAM_SQ_SPECIES_TAG, seq.Species);
-
-        // UR:<URI>
-        if ( seq.HasURI() )
-            out << FormatTag(Constants::SAM_SQ_URI_TAG, seq.URI);
-
-        // newline
-        out << endl;
-    }
-}
-
-void SamFormatPrinter::PrintRG(std::stringstream& out) const {
-
-    // iterate over read group entries
-    SamReadGroupConstIterator rgIter = m_header.ReadGroups.ConstBegin();
-    SamReadGroupConstIterator rgEnd  = m_header.ReadGroups.ConstEnd();
-    for ( ; rgIter != rgEnd; ++rgIter ) {
-        const SamReadGroup& rg = (*rgIter);
-
-        // @RG ID:<ID>
-        out << Constants::SAM_RG_BEGIN_TOKEN
-            << FormatTag(Constants::SAM_RG_ID_TAG, rg.ID);
-
-        // CN:<SequencingCenter>
-        if ( rg.HasSequencingCenter() )
-            out << FormatTag(Constants::SAM_RG_SEQCENTER_TAG, rg.SequencingCenter);
-
-        // DS:<Description>
-        if ( rg.HasDescription() )
-            out << FormatTag(Constants::SAM_RG_DESCRIPTION_TAG, rg.Description);
-
-        // DT:<ProductionDate>
-        if ( rg.HasProductionDate() )
-            out << FormatTag(Constants::SAM_RG_PRODUCTIONDATE_TAG, rg.ProductionDate);
-
-        // FO:<FlowOrder>
-        if ( rg.HasFlowOrder() )
-            out << FormatTag(Constants::SAM_RG_FLOWORDER_TAG, rg.FlowOrder);
-
-        // KS:<KeySequence>
-        if ( rg.HasKeySequence() )
-            out << FormatTag(Constants::SAM_RG_KEYSEQUENCE_TAG, rg.KeySequence);
-
-        // LB:<Library>
-        if ( rg.HasLibrary() )
-            out << FormatTag(Constants::SAM_RG_LIBRARY_TAG, rg.Library);
-
-        // PG:<Program>
-        if ( rg.HasProgram() )
-            out << FormatTag(Constants::SAM_RG_PROGRAM_TAG, rg.Program);
-
-        // PI:<PredictedInsertSize>
-        if ( rg.HasPredictedInsertSize() )
-            out << FormatTag(Constants::SAM_RG_PREDICTEDINSERTSIZE_TAG, rg.PredictedInsertSize);
-
-        // PL:<SequencingTechnology>
-        if ( rg.HasSequencingTechnology() )
-            out << FormatTag(Constants::SAM_RG_SEQTECHNOLOGY_TAG, rg.SequencingTechnology);
-
-        // PU:<PlatformUnit>
-        if ( rg.HasPlatformUnit() )
-            out << FormatTag(Constants::SAM_RG_PLATFORMUNIT_TAG, rg.PlatformUnit);
-
-        // SM:<Sample>
-        if ( rg.HasSample() )
-            out << FormatTag(Constants::SAM_RG_SAMPLE_TAG, rg.Sample);
-
-        // newline
-        out << endl;
-    }
-}
-
-void SamFormatPrinter::PrintPG(std::stringstream& out) const {
-
-    // iterate over program record entries
-    SamProgramConstIterator pgIter = m_header.Programs.ConstBegin();
-    SamProgramConstIterator pgEnd  = m_header.Programs.ConstEnd();
-    for ( ; pgIter != pgEnd; ++pgIter ) {
-        const SamProgram& pg = (*pgIter);
-
-        // @PG ID:<ID>
-        out << Constants::SAM_PG_BEGIN_TOKEN
-            << FormatTag(Constants::SAM_PG_ID_TAG, pg.ID);
-
-        // PN:<Name>
-        if ( pg.HasName() )
-            out << FormatTag(Constants::SAM_PG_NAME_TAG, pg.Name);
-
-        // CL:<CommandLine>
-        if ( pg.HasCommandLine() )
-            out << FormatTag(Constants::SAM_PG_COMMANDLINE_TAG, pg.CommandLine);
-
-        // PP:<PreviousProgramID>
-        if ( pg.HasPreviousProgramID() )
-            out << FormatTag(Constants::SAM_PG_PREVIOUSPROGRAM_TAG, pg.PreviousProgramID);
-
-        // VN:<Version>
-        if ( pg.HasVersion() )
-            out << FormatTag(Constants::SAM_PG_VERSION_TAG, pg.Version);
-
-        // newline
-        out << endl;
-    }
-}
-
-void SamFormatPrinter::PrintCO(std::stringstream& out) const {
-
-    // iterate over comments
-    vector<string>::const_iterator commentIter = m_header.Comments.begin();
-    vector<string>::const_iterator commentEnd  = m_header.Comments.end();
-    for ( ; commentIter != commentEnd; ++commentIter ) {
-
-        // @CO <Comment>
-        out << Constants::SAM_CO_BEGIN_TOKEN
-            << Constants::SAM_TAB
-            << (*commentIter)
-            << endl;
-    }
-}
diff --git a/src/api/internal/SamFormatPrinter_p.h b/src/api/internal/SamFormatPrinter_p.h
deleted file mode 100644 (file)
index ea29181..0000000
+++ /dev/null
@@ -1,59 +0,0 @@
-// ***************************************************************************
-// SamFormatPrinter.h (c) 2010 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 6 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides functionality for printing formatted SAM header to string
-// ***************************************************************************
-
-#ifndef SAM_FORMAT_PRINTER_H
-#define SAM_FORMAT_PRINTER_H
-
-//  -------------
-//  W A R N I N G
-//  -------------
-//
-// This file is not part of the BamTools API.  It exists purely as an
-// implementation detail. This header file may change from version to version
-// without notice, or even be removed.
-//
-// We mean it.
-
-#include <sstream>
-#include <string>
-
-namespace BamTools {
-
-class SamHeader;
-
-namespace Internal {
-
-class SamFormatPrinter {
-
-    // ctor & dtor
-    public:
-        SamFormatPrinter(const BamTools::SamHeader& header);
-        ~SamFormatPrinter(void);
-
-    // generates SAM-formatted string from header data
-    public:
-        const std::string ToString(void) const;
-
-    // internal methods
-    private:
-        void PrintHD(std::stringstream& out) const;
-        void PrintSQ(std::stringstream& out) const;
-        void PrintRG(std::stringstream& out) const;
-        void PrintPG(std::stringstream& out) const;
-        void PrintCO(std::stringstream& out) const;
-
-    // data members
-    private:
-        const SamHeader& m_header;
-};
-
-} // namespace Internal
-} // namespace BamTools
-
-#endif // SAM_FORMAT_PRINTER_H
diff --git a/src/api/internal/SamHeaderValidator_p.cpp b/src/api/internal/SamHeaderValidator_p.cpp
deleted file mode 100644 (file)
index c76fff9..0000000
+++ /dev/null
@@ -1,524 +0,0 @@
-// ***************************************************************************
-// SamHeaderValidator.cpp (c) 2010 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 14 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides functionality for validating SamHeader data
-// ***************************************************************************
-
-#include "api/SamConstants.h"
-#include "api/SamHeader.h"
-#include "api/internal/SamHeaderValidator_p.h"
-#include "api/internal/SamHeaderVersion_p.h"
-using namespace BamTools;
-using namespace BamTools::Internal;
-
-#include <cctype>
-#include <set>
-#include <sstream>
-using namespace std;
-
-// ------------------------
-// static utility methods
-// -------------------------
-
-static
-bool caseInsensitiveCompare(const string& lhs, const string& rhs) {
-
-    // can omit checking chars if lengths not equal
-    const int lhsLength = lhs.length();
-    const int rhsLength = rhs.length();
-    if ( lhsLength != rhsLength )
-        return false;
-
-    // do *basic* toupper checks on each string char's
-    for ( int i = 0; i < lhsLength; ++i ) {
-        if ( toupper( (int)lhs.at(i)) != toupper( (int)rhs.at(i)) )
-            return false;
-    }
-
-    // otherwise OK
-    return true;
-}
-
-// ------------------------------------------------------------------------
-// Allow validation rules to vary, as needed, between SAM header versions
-//
-// use SAM_VERSION_X_Y to tag important changes
-//
-// Together, they will allow for comparisons like:
-// if ( m_version < SAM_VERSION_2_0 ) {
-//     // use some older rule
-// else
-//     // use rule introduced with version 2.0
-
-static const SamHeaderVersion SAM_VERSION_1_0 = SamHeaderVersion(1,0);
-static const SamHeaderVersion SAM_VERSION_1_1 = SamHeaderVersion(1,1);
-static const SamHeaderVersion SAM_VERSION_1_2 = SamHeaderVersion(1,2);
-static const SamHeaderVersion SAM_VERSION_1_3 = SamHeaderVersion(1,3);
-static const SamHeaderVersion SAM_VERSION_1_4 = SamHeaderVersion(1,4);
-
-// TODO: This functionality is currently unused.
-//       Make validation "version-aware."
-//
-// ------------------------------------------------------------------------
-
-const string SamHeaderValidator::ERROR_PREFIX = "ERROR: ";
-const string SamHeaderValidator::WARN_PREFIX  = "WARNING: ";
-const string SamHeaderValidator::NEWLINE      = "\n";
-
-SamHeaderValidator::SamHeaderValidator(const SamHeader& header)
-    : m_header(header)
-{ }
-
-SamHeaderValidator::~SamHeaderValidator(void) { }
-
-void SamHeaderValidator::AddError(const string& message) {
-    m_errorMessages.push_back(ERROR_PREFIX + message + NEWLINE);
-}
-
-void SamHeaderValidator::AddWarning(const string& message) {
-    m_warningMessages.push_back(WARN_PREFIX + message + NEWLINE);
-}
-
-void SamHeaderValidator::PrintErrorMessages(ostream& stream) {
-
-    // skip if no error messages
-    if ( m_errorMessages.empty() )
-        return;
-
-    // print error header line
-    stream << "* SAM header has " << m_errorMessages.size() << " errors:" << endl;
-
-    // print each error message
-    vector<string>::const_iterator errorIter = m_errorMessages.begin();
-    vector<string>::const_iterator errorEnd  = m_errorMessages.end();
-    for ( ; errorIter != errorEnd; ++errorIter )
-        stream << (*errorIter);
-}
-
-void SamHeaderValidator::PrintMessages(ostream& stream) {
-    PrintErrorMessages(stream);
-    PrintWarningMessages(stream);
-}
-
-void SamHeaderValidator::PrintWarningMessages(ostream& stream) {
-
-    // skip if no warning messages
-    if ( m_warningMessages.empty() )
-        return;
-
-    // print warning header line
-    stream << "* SAM header has " << m_warningMessages.size() << " warnings:" << endl;
-
-    // print each warning message
-    vector<string>::const_iterator warnIter = m_warningMessages.begin();
-    vector<string>::const_iterator warnEnd  = m_warningMessages.end();
-    for ( ; warnIter != warnEnd; ++warnIter )
-        stream << (*warnIter);
-}
-
-// entry point for validation
-bool SamHeaderValidator::Validate(void) {
-    bool isValid = true;
-    isValid &= ValidateMetadata();
-    isValid &= ValidateSequenceDictionary();
-    isValid &= ValidateReadGroupDictionary();
-    isValid &= ValidateProgramChain();
-    return isValid;
-}
-
-// check all SAM header 'metadata'
-bool SamHeaderValidator::ValidateMetadata(void) {
-    bool isValid = true;
-    isValid &= ValidateVersion();
-    isValid &= ValidateSortOrder();
-    isValid &= ValidateGroupOrder();
-    return isValid;
-}
-
-// check SAM header version tag
-bool SamHeaderValidator::ValidateVersion(void) {
-
-    const string& version = m_header.Version;
-
-    // warn if version not present
-    if ( version.empty() ) {
-        AddWarning("Version (VN) missing. Not required, but strongly recommended");
-        return true;
-    }
-
-    // invalid if version does not contain a period
-    const size_t periodFound = version.find(Constants::SAM_PERIOD);
-    if ( periodFound == string::npos ) {
-        AddError("Invalid version (VN) format: " + version);
-        return false;
-    }
-
-    // invalid if major version is empty or contains non-digits
-    const string majorVersion = version.substr(0, periodFound);
-    if ( majorVersion.empty() || !ContainsOnlyDigits(majorVersion) ) {
-        AddError("Invalid version (VN) format: " + version);
-        return false;
-    }
-
-    // invalid if major version is empty or contains non-digits
-    const string minorVersion = version.substr(periodFound + 1);
-    if ( minorVersion.empty() || !ContainsOnlyDigits(minorVersion) ) {
-        AddError("Invalid version (VN) format: " + version);
-        return false;
-    }
-
-    // TODO: check if version is not just syntactically OK,
-    // but is also a valid SAM version ( 1.0 .. CURRENT )
-
-    // all checked out this far, then version is OK
-    return true;
-}
-
-// assumes non-empty input string
-bool SamHeaderValidator::ContainsOnlyDigits(const string& s) {
-    const size_t nonDigitPosition = s.find_first_not_of(Constants::SAM_DIGITS);
-    return ( nonDigitPosition == string::npos ) ;
-}
-
-// validate SAM header sort order tag
-bool SamHeaderValidator::ValidateSortOrder(void) {
-
-    const string& sortOrder = m_header.SortOrder;
-
-    // warn if sort order not present
-    if ( sortOrder.empty() ) {
-        AddWarning("Sort order (SO) missing. Not required, but strongly recommended");
-        return true;
-    }
-
-    // if sort order is valid keyword
-    if ( sortOrder == Constants::SAM_HD_SORTORDER_COORDINATE ||
-         sortOrder == Constants::SAM_HD_SORTORDER_QUERYNAME  ||
-         sortOrder == Constants::SAM_HD_SORTORDER_UNSORTED
-       )
-    {
-        return true;
-    }
-
-    // otherwise
-    AddError("Invalid sort order (SO): " + sortOrder);
-    return false;
-}
-
-// validate SAM header group order tag
-bool SamHeaderValidator::ValidateGroupOrder(void) {
-
-    const string& groupOrder = m_header.GroupOrder;
-
-    // if no group order, no problem, just return OK
-    if ( groupOrder.empty() )
-        return true;
-
-    // if group order is valid keyword
-    if ( groupOrder == Constants::SAM_HD_GROUPORDER_NONE  ||
-         groupOrder == Constants::SAM_HD_GROUPORDER_QUERY ||
-         groupOrder == Constants::SAM_HD_GROUPORDER_REFERENCE
-       )
-    {
-        return true;
-    }
-
-    // otherwise
-    AddError("Invalid group order (GO): " + groupOrder);
-    return false;
-}
-
-// validate SAM header sequence dictionary
-bool SamHeaderValidator::ValidateSequenceDictionary(void) {
-
-    bool isValid = true;
-
-    // check for unique sequence names
-    isValid &= ContainsUniqueSequenceNames();
-
-    // iterate over sequences
-    const SamSequenceDictionary& sequences = m_header.Sequences;
-    SamSequenceConstIterator seqIter = sequences.ConstBegin();
-    SamSequenceConstIterator seqEnd  = sequences.ConstEnd();
-    for ( ; seqIter != seqEnd; ++seqIter ) {
-        const SamSequence& seq = (*seqIter);
-        isValid &= ValidateSequence(seq);
-    }
-
-    // return validation state
-    return isValid;
-}
-
-// make sure all SQ names are unique
-bool SamHeaderValidator::ContainsUniqueSequenceNames(void) {
-
-    bool isValid = true;
-    set<string> sequenceNames;
-    set<string>::iterator nameIter;
-
-    // iterate over sequences
-    const SamSequenceDictionary& sequences = m_header.Sequences;
-    SamSequenceConstIterator seqIter = sequences.ConstBegin();
-    SamSequenceConstIterator seqEnd  = sequences.ConstEnd();
-    for ( ; seqIter != seqEnd; ++seqIter ) {
-        const SamSequence& seq = (*seqIter);
-
-        // lookup sequence name
-        const string& name = seq.Name;
-        nameIter = sequenceNames.find(name);
-
-        // error if found (duplicate entry)
-        if ( nameIter != sequenceNames.end() ) {
-            AddError("Sequence name (SN): " + name + " is not unique");
-            isValid = false;
-        }
-
-        // otherwise ok, store name
-        sequenceNames.insert(name);
-    }
-
-    // return validation state
-    return isValid;
-}
-
-// validate SAM header sequence entry
-bool SamHeaderValidator::ValidateSequence(const SamSequence& seq) {
-    bool isValid = true;
-    isValid &= CheckNameFormat(seq.Name);
-    isValid &= CheckLengthInRange(seq.Length);
-    return isValid;
-}
-
-// check sequence name is valid format
-bool SamHeaderValidator::CheckNameFormat(const string& name) {
-
-    // invalid if name is empty
-    if ( name.empty() ) {
-        AddError("Sequence entry (@SQ) is missing SN tag");
-        return false;
-    }
-
-    // invalid if first character is a reserved char
-    const char firstChar = name.at(0);
-    if ( firstChar == Constants::SAM_EQUAL || firstChar == Constants::SAM_STAR ) {
-        AddError("Invalid sequence name (SN): " + name);
-        return false;
-    }
-    // otherwise OK
-    return true;
-}
-
-// check that sequence length is within accepted range
-bool SamHeaderValidator::CheckLengthInRange(const string& length) {
-
-    // invalid if empty
-    if ( length.empty() ) {
-        AddError("Sequence entry (@SQ) is missing LN tag");
-        return false;
-    }
-
-    // convert string length to numeric
-    stringstream lengthStream(length);
-    unsigned int sequenceLength;
-    lengthStream >> sequenceLength;
-
-    // invalid if length outside accepted range
-    if ( sequenceLength < Constants::SAM_SQ_LENGTH_MIN || sequenceLength > Constants::SAM_SQ_LENGTH_MAX ) {
-        AddError("Sequence length (LN): " + length + " out of range");
-        return false;
-    }
-
-    // otherwise OK
-    return true;
-}
-
-// validate SAM header read group dictionary
-bool SamHeaderValidator::ValidateReadGroupDictionary(void) {
-
-    bool isValid = true;
-
-    // check for unique read group IDs & platform units
-    isValid &= ContainsUniqueIDsAndPlatformUnits();
-
-    // iterate over read groups
-    const SamReadGroupDictionary& readGroups = m_header.ReadGroups;
-    SamReadGroupConstIterator rgIter = readGroups.ConstBegin();
-    SamReadGroupConstIterator rgEnd  = readGroups.ConstEnd();
-    for ( ; rgIter != rgEnd; ++rgIter ) {
-        const SamReadGroup& rg = (*rgIter);
-        isValid &= ValidateReadGroup(rg);
-    }
-
-    // return validation state
-    return isValid;
-}
-
-// make sure RG IDs and platform units are unique
-bool SamHeaderValidator::ContainsUniqueIDsAndPlatformUnits(void) {
-
-    bool isValid = true;
-    set<string> readGroupIds;
-    set<string> platformUnits;
-    set<string>::iterator idIter;
-    set<string>::iterator puIter;
-
-    // iterate over sequences
-    const SamReadGroupDictionary& readGroups = m_header.ReadGroups;
-    SamReadGroupConstIterator rgIter = readGroups.ConstBegin();
-    SamReadGroupConstIterator rgEnd  = readGroups.ConstEnd();
-    for ( ; rgIter != rgEnd; ++rgIter ) {
-        const SamReadGroup& rg = (*rgIter);
-
-        // --------------------------------
-        // check for unique ID
-
-        // lookup read group ID
-        const string& id = rg.ID;
-        idIter = readGroupIds.find(id);
-
-        // error if found (duplicate entry)
-        if ( idIter != readGroupIds.end() ) {
-            AddError("Read group ID (ID): " + id + " is not unique");
-            isValid = false;
-        }
-
-        // otherwise ok, store id
-        readGroupIds.insert(id);
-
-        // --------------------------------
-        // check for unique platform unit
-
-        // lookup platform unit
-        const string& pu = rg.PlatformUnit;
-        puIter = platformUnits.find(pu);
-
-        // error if found (duplicate entry)
-        if ( puIter != platformUnits.end() ) {
-            AddError("Platform unit (PU): " + pu + " is not unique");
-            isValid = false;
-        }
-
-        // otherwise ok, store platform unit
-        platformUnits.insert(pu);
-    }
-
-    // return validation state
-    return isValid;
-}
-
-// validate SAM header read group entry
-bool SamHeaderValidator::ValidateReadGroup(const SamReadGroup& rg) {
-    bool isValid = true;
-    isValid &= CheckReadGroupID(rg.ID);
-    isValid &= CheckSequencingTechnology(rg.SequencingTechnology);
-    return isValid;
-}
-
-// make sure RG ID exists
-bool SamHeaderValidator::CheckReadGroupID(const string& id) {
-
-    // invalid if empty
-    if ( id.empty() ) {
-        AddError("Read group entry (@RG) is missing ID tag");
-        return false;
-    }
-
-    // otherwise OK
-    return true;
-}
-
-// make sure RG sequencing tech is one of the accepted keywords
-bool SamHeaderValidator::CheckSequencingTechnology(const string& technology) {
-
-    // if no technology provided, no problem, just return OK
-    if ( technology.empty() )
-        return true;
-
-    // if technology is valid keyword
-    if ( caseInsensitiveCompare(technology, Constants::SAM_RG_SEQTECHNOLOGY_CAPILLARY)  ||
-         caseInsensitiveCompare(technology, Constants::SAM_RG_SEQTECHNOLOGY_HELICOS)    ||
-         caseInsensitiveCompare(technology, Constants::SAM_RG_SEQTECHNOLOGY_ILLUMINA)   ||
-         caseInsensitiveCompare(technology, Constants::SAM_RG_SEQTECHNOLOGY_IONTORRENT) ||
-         caseInsensitiveCompare(technology, Constants::SAM_RG_SEQTECHNOLOGY_LS454)      ||
-         caseInsensitiveCompare(technology, Constants::SAM_RG_SEQTECHNOLOGY_PACBIO)     ||
-         caseInsensitiveCompare(technology, Constants::SAM_RG_SEQTECHNOLOGY_SOLID)
-       )
-    {
-        return true;
-    }
-
-    // otherwise
-    AddError("Invalid read group sequencing platform (PL): " + technology);
-    return false;
-}
-
-// validate the SAM header "program chain"
-bool SamHeaderValidator::ValidateProgramChain(void) {
-    bool isValid = true;
-    isValid &= ContainsUniqueProgramIds();
-    isValid &= ValidatePreviousProgramIds();
-    return isValid;
-}
-
-// make sure all PG IDs are unique
-bool SamHeaderValidator::ContainsUniqueProgramIds(void) {
-
-    bool isValid = true;
-    set<string> programIds;
-    set<string>::iterator pgIdIter;
-
-    // iterate over program records
-    const SamProgramChain& programs = m_header.Programs;
-    SamProgramConstIterator pgIter = programs.ConstBegin();
-    SamProgramConstIterator pgEnd  = programs.ConstEnd();
-    for ( ; pgIter != pgEnd; ++pgIter ) {
-        const SamProgram& pg = (*pgIter);
-
-        // lookup program ID
-        const string& pgId = pg.ID;
-        pgIdIter = programIds.find(pgId);
-
-        // error if found (duplicate entry)
-        if ( pgIdIter != programIds.end() ) {
-            AddError("Program ID (ID): " + pgId + " is not unique");
-            isValid = false;
-        }
-
-        // otherwise ok, store ID
-        programIds.insert(pgId);
-    }
-
-    // return validation state
-    return isValid;
-}
-
-// make sure that any PP tags present point to existing @PG IDs
-bool SamHeaderValidator::ValidatePreviousProgramIds(void) {
-
-    bool isValid = true;
-
-    // iterate over program records
-    const SamProgramChain& programs = m_header.Programs;
-    SamProgramConstIterator pgIter = programs.ConstBegin();
-    SamProgramConstIterator pgEnd  = programs.ConstEnd();
-    for ( ; pgIter != pgEnd; ++pgIter ) {
-        const SamProgram& pg = (*pgIter);
-
-        // ignore record for validation if PreviousProgramID is empty
-        const string& ppId = pg.PreviousProgramID;
-        if ( ppId.empty() )
-            continue;
-
-        // see if program "chain" contains an entry for ppId
-        if ( !programs.Contains(ppId) ) {
-            AddError("PreviousProgramID (PP): " + ppId + " is not a known ID");
-            isValid = false;
-        }
-    }
-
-    // return validation state
-    return isValid;
-}
diff --git a/src/api/internal/SamHeaderValidator_p.h b/src/api/internal/SamHeaderValidator_p.h
deleted file mode 100644 (file)
index 7d0c60a..0000000
+++ /dev/null
@@ -1,105 +0,0 @@
-// ***************************************************************************
-// SamHeaderValidator.h (c) 2010 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 6 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides functionality for validating SamHeader data
-// ***************************************************************************
-
-#ifndef SAM_HEADER_VALIDATOR_P_H
-#define SAM_HEADER_VALIDATOR_P_H
-
-//  -------------
-//  W A R N I N G
-//  -------------
-//
-// This file is not part of the BamTools API.  It exists purely as an
-// implementation detail. This header file may change from version to version
-// without notice, or even be removed.
-//
-// We mean it.
-
-#include <iostream>
-#include <string>
-#include <vector>
-
-namespace BamTools {
-
-class SamHeader;
-class SamReadGroup;
-class SamSequence;
-
-namespace Internal {
-
-class SamHeaderValidator {
-
-    // ctor & dtor
-    public:
-        SamHeaderValidator(const SamHeader& header);
-        ~SamHeaderValidator(void);
-
-    // SamHeaderValidator interface
-    public:
-
-        // prints error & warning messages
-        void PrintMessages(std::ostream& stream);
-
-        // validates SamHeader data, returns true/false accordingly
-        bool Validate(void);
-
-    // internal methods
-    private:
-
-        // validate header metadata
-        bool ValidateMetadata(void);
-        bool ValidateVersion(void);
-        bool ContainsOnlyDigits(const std::string& s);
-        bool ValidateSortOrder(void);
-        bool ValidateGroupOrder(void);
-
-        // validate sequence dictionary
-        bool ValidateSequenceDictionary(void);
-        bool ContainsUniqueSequenceNames(void);
-        bool CheckNameFormat(const std::string& name);
-        bool ValidateSequence(const SamSequence& seq);
-        bool CheckLengthInRange(const std::string& length);
-
-        // validate read group dictionary
-        bool ValidateReadGroupDictionary(void);
-        bool ContainsUniqueIDsAndPlatformUnits(void);
-        bool ValidateReadGroup(const SamReadGroup& rg);
-        bool CheckReadGroupID(const std::string& id);
-        bool CheckSequencingTechnology(const std::string& technology);
-
-        // validate program data
-        bool ValidateProgramChain(void);
-        bool ContainsUniqueProgramIds(void);
-        bool ValidatePreviousProgramIds(void);
-
-        // error reporting
-        void AddError(const std::string& message);
-        void AddWarning(const std::string& message);
-        void PrintErrorMessages(std::ostream& stream);
-        void PrintWarningMessages(std::ostream& stream);
-
-    // data members
-    private:
-
-        // SamHeader being validated
-        const SamHeader& m_header;
-
-        // error reporting helpers
-        static const std::string ERROR_PREFIX;
-        static const std::string WARN_PREFIX;
-        static const std::string NEWLINE;
-
-        // error reporting messages
-        std::vector<std::string> m_errorMessages;
-        std::vector<std::string> m_warningMessages;
-};
-
-} // namespace Internal
-} // namespace BamTools
-
-#endif // SAM_HEADER_VALIDATOR_P_H
diff --git a/src/api/internal/SamHeaderVersion_p.h b/src/api/internal/SamHeaderVersion_p.h
deleted file mode 100644 (file)
index 4f85df0..0000000
+++ /dev/null
@@ -1,134 +0,0 @@
-// ***************************************************************************
-// SamHeaderVersion.h (c) 2010 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides functionality for comparing SAM header versions
-// *************************************************************************
-
-#ifndef SAM_HEADERVERSION_P_H
-#define SAM_HEADERVERSION_P_H
-
-//  -------------
-//  W A R N I N G
-//  -------------
-//
-// This file is not part of the BamTools API.  It exists purely as an
-// implementation detail. This header file may change from version to version
-// without notice, or even be removed.
-//
-// We mean it.
-
-#include "api/SamConstants.h"
-#include <sstream>
-#include <string>
-
-namespace BamTools {
-namespace Internal {
-
-class SamHeaderVersion {
-
-    // ctors & dtor
-    public:
-        SamHeaderVersion(void)
-            : m_majorVersion(0)
-            , m_minorVersion(0)
-        { }
-
-        explicit SamHeaderVersion(const std::string& version)
-            : m_majorVersion(0)
-            , m_minorVersion(0)
-        {
-            SetVersion(version);
-        }
-
-        SamHeaderVersion(const unsigned int& major, const unsigned int& minor)
-            : m_majorVersion(major)
-            , m_minorVersion(minor)
-        { }
-
-        ~SamHeaderVersion(void) {
-            m_majorVersion = 0;
-            m_minorVersion = 0;
-        }
-    
-    // acess data
-    public:
-        unsigned int MajorVersion(void) const { return m_majorVersion; }
-        unsigned int MinorVersion(void) const { return m_minorVersion; }
-
-        void SetVersion(const std::string& version);
-        std::string ToString(void) const;
-
-    // data members
-    private:
-        unsigned int m_majorVersion;
-        unsigned int m_minorVersion;
-};
-
-inline
-void SamHeaderVersion::SetVersion(const std::string& version) {
-
-    // do nothing if version is empty
-    if ( !version.empty() ) {
-
-        std::stringstream versionStream("");
-
-        // do nothing if period not found
-        const size_t periodFound = version.find(Constants::SAM_PERIOD);
-        if ( periodFound != std::string::npos ) {
-
-            // store major version if non-empty and contains only digits
-            const std::string& majorVersion = version.substr(0, periodFound);
-            versionStream.str(majorVersion);
-            if ( !majorVersion.empty() ) {
-                const size_t nonDigitFound = majorVersion.find_first_not_of(Constants::SAM_DIGITS);
-                if ( nonDigitFound == std::string::npos )
-                    versionStream >> m_majorVersion;
-            }
-
-            // store minor version if non-empty and contains only digits
-            const std::string& minorVersion = version.substr(periodFound + 1);
-            versionStream.str(minorVersion);
-            if ( !minorVersion.empty() ) {
-                const size_t nonDigitFound = minorVersion.find_first_not_of(Constants::SAM_DIGITS);
-                if ( nonDigitFound == std::string::npos )
-                    versionStream >> m_minorVersion;
-            }
-        }
-    }
-}
-
-// -----------------------------------------------------
-// printing
-
-inline std::string SamHeaderVersion::ToString(void) const {
-    std::stringstream version;
-    version << m_majorVersion << Constants::SAM_PERIOD << m_minorVersion;
-    return version.str();
-}
-
-// -----------------------------------------------------
-// comparison operators
-
-inline bool operator==(const SamHeaderVersion& lhs, const SamHeaderVersion& rhs) {
-    return (lhs.MajorVersion() == rhs.MajorVersion()) &&
-           (lhs.MinorVersion() == rhs.MinorVersion());
-}
-
-inline bool operator<(const SamHeaderVersion& lhs, const SamHeaderVersion& rhs) {
-    if ( lhs.MajorVersion() == rhs.MajorVersion() )
-        return lhs.MinorVersion() < rhs.MinorVersion();
-    else 
-        return lhs.MajorVersion() < rhs.MajorVersion();
-}
-
-inline bool operator> (const SamHeaderVersion& lhs, const SamHeaderVersion& rhs) { return rhs < lhs;  }
-inline bool operator<=(const SamHeaderVersion& lhs, const SamHeaderVersion& rhs) { return !(lhs>rhs); }
-inline bool operator>=(const SamHeaderVersion& lhs, const SamHeaderVersion& rhs) { return !(lhs<rhs); }
-
-} // namespace Internal 
-} // namespace BamTools
-
-#endif // SAM_HEADERVERSION_P_H
diff --git a/src/api/internal/bam/BamHeader_p.cpp b/src/api/internal/bam/BamHeader_p.cpp
new file mode 100644 (file)
index 0000000..02c0a25
--- /dev/null
@@ -0,0 +1,120 @@
+// ***************************************************************************
+// BamHeader_p.cpp (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 25 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides the basic functionality for handling BAM headers.
+// ***************************************************************************
+
+#include "api/BamAux.h"
+#include "api/BamConstants.h"
+#include "api/internal/bam/BamHeader_p.h"
+#include "api/internal/io/BgzfStream_p.h"
+#include "api/internal/utils/BamException_p.h"
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+#include <cstdlib>
+#include <cstring>
+using namespace std;
+
+// ------------------------
+// static utility methods
+// ------------------------
+
+static inline
+bool isValidMagicNumber(const char* buffer) {
+    return ( strncmp(buffer, Constants::BAM_HEADER_MAGIC,
+                     Constants::BAM_HEADER_MAGIC_LENGTH) == 0 );
+}
+
+// --------------------------
+// BamHeader implementation
+// --------------------------
+
+// ctor
+BamHeader::BamHeader(void) { }
+
+// dtor
+BamHeader::~BamHeader(void) { }
+
+// reads magic number from BGZF stream, returns true if valid
+void BamHeader::CheckMagicNumber(BgzfStream* stream) {
+
+    // try to read magic number
+    char buffer[Constants::BAM_HEADER_MAGIC_LENGTH];
+    const size_t numBytesRead = stream->Read(buffer, Constants::BAM_HEADER_MAGIC_LENGTH);
+    if ( numBytesRead != (int)Constants::BAM_HEADER_MAGIC_LENGTH )
+        throw BamException("BamHeader::CheckMagicNumber", "could not read magic number");
+
+    // validate magic number
+    if ( !isValidMagicNumber(buffer) )
+        throw BamException("BamHeader::CheckMagicNumber", "invalid magic number");
+}
+
+// clear SamHeader data
+void BamHeader::Clear(void) {
+    m_header.Clear();
+}
+
+// return true if SamHeader data is valid
+bool BamHeader::IsValid(void) const {
+    return m_header.IsValid();
+}
+
+// load BAM header ('magic number' and SAM header text) from BGZF stream
+void BamHeader::Load(BgzfStream* stream) {
+
+    // read & check magic number
+    CheckMagicNumber(stream);
+
+    // read header (length, then actual text)
+    uint32_t length(0);
+    ReadHeaderLength(stream, length);
+    ReadHeaderText(stream, length);
+}
+
+// reads SAM header text length from BGZF stream, stores it in @length
+void BamHeader::ReadHeaderLength(BgzfStream* stream, uint32_t& length) {
+
+    // read BAM header text length
+    char buffer[sizeof(uint32_t)];
+    const size_t numBytesRead = stream->Read(buffer, sizeof(uint32_t));
+    if ( numBytesRead != sizeof(uint32_t) )
+        throw BamException("BamHeader::ReadHeaderLength", "could not read header length");
+
+    // convert char buffer to length
+    length = BamTools::UnpackUnsignedInt(buffer);
+    if ( BamTools::SystemIsBigEndian() )
+        BamTools::SwapEndian_32(length);
+}
+
+// reads SAM header text from BGZF stream, stores in SamHeader object
+void BamHeader::ReadHeaderText(BgzfStream* stream, const uint32_t& length) {
+
+    // read header text
+    char* headerText = (char*)calloc(length + 1, 1);
+    const size_t bytesRead = stream->Read(headerText, length);
+
+    // if error reading, clean up buffer & throw
+    if ( bytesRead != length ) {
+        free(headerText);
+        throw BamException("BamHeader::ReadHeaderText", "could not read header text");
+    }
+
+    // otherwise, text was read OK
+    // store & cleanup
+    m_header.SetHeaderText( (string)((const char*)headerText) );
+    free(headerText);
+}
+
+// returns *copy* of SamHeader data object
+SamHeader BamHeader::ToSamHeader(void) const {
+    return m_header;
+}
+
+// returns SAM-formatted string of header data
+string BamHeader::ToString(void) const {
+    return m_header.ToString();
+}
diff --git a/src/api/internal/bam/BamHeader_p.h b/src/api/internal/bam/BamHeader_p.h
new file mode 100644 (file)
index 0000000..499ad96
--- /dev/null
@@ -0,0 +1,69 @@
+// ***************************************************************************
+// BamHeader_p.h (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides the basic functionality for handling BAM headers.
+// ***************************************************************************
+
+#ifndef BAMHEADER_P_H
+#define BAMHEADER_P_H
+
+//  -------------
+//  W A R N I N G
+//  -------------
+//
+// This file is not part of the BamTools API.  It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include "api/SamHeader.h"
+#include <string>
+
+namespace BamTools {
+namespace Internal {
+
+class BgzfStream;
+
+class BamHeader {
+
+    // ctor & dtor
+    public:
+        BamHeader(void);
+        ~BamHeader(void);
+
+    // BamHeader interface
+    public:
+        // clear SamHeader data
+        void Clear(void);
+        // return true if SamHeader data is valid
+        bool IsValid(void) const;
+        // load BAM header ('magic number' and SAM header text) from BGZF stream
+        // returns true if all OK
+        void Load(BgzfStream* stream);
+        // returns (editable) copy of SamHeader data object
+        SamHeader ToSamHeader(void) const;
+        // returns SAM-formatted string of header data
+        std::string ToString(void) const;
+
+    // internal methods
+    private:
+        // reads magic number from BGZF stream
+        void CheckMagicNumber(BgzfStream* stream);
+        // reads SAM header length from BGZF stream, stores it in @length
+        void ReadHeaderLength(BgzfStream* stream, uint32_t& length);
+        // reads SAM header text from BGZF stream, stores in SamHeader object
+        void ReadHeaderText(BgzfStream* stream, const uint32_t& length);
+
+    // data members
+    private:
+        SamHeader m_header;
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // BAMHEADER_P_H
diff --git a/src/api/internal/bam/BamMultiMerger_p.h b/src/api/internal/bam/BamMultiMerger_p.h
new file mode 100644 (file)
index 0000000..3000097
--- /dev/null
@@ -0,0 +1,266 @@
+// ***************************************************************************
+// BamMultiMerger_p.h (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides merging functionality for BamMultiReader.  At this point, supports
+// sorting results by (refId, position) or by read name.
+// ***************************************************************************
+
+#ifndef BAMMULTIMERGER_P_H
+#define BAMMULTIMERGER_P_H
+
+//  -------------
+//  W A R N I N G
+//  -------------
+//
+// This file is not part of the BamTools API.  It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include "api/BamAlignment.h"
+#include "api/BamReader.h"
+#include "api/algorithms/Sort.h"
+#include <deque>
+#include <functional>
+#include <set>
+#include <string>
+
+namespace BamTools {
+namespace Internal {
+
+struct MergeItem {
+
+    // data members
+    BamReader*    Reader;
+    BamAlignment* Alignment;
+
+    // ctors & dtor
+    MergeItem(BamReader* reader = 0,
+              BamAlignment* alignment = 0)
+        : Reader(reader)
+        , Alignment(alignment)
+    { }
+
+    MergeItem(const MergeItem& other)
+        : Reader(other.Reader)
+        , Alignment(other.Alignment)
+    { }
+
+    ~MergeItem(void) { }
+};
+
+template<typename Compare>
+struct MergeItemSorter : public std::binary_function<MergeItem, MergeItem, bool> {
+
+    public:
+        MergeItemSorter(const Compare& comp = Compare())
+            : m_comp(comp)
+        { }
+
+        bool operator()(const MergeItem& lhs, const MergeItem& rhs) {
+            const BamAlignment& l = *lhs.Alignment;
+            const BamAlignment& r = *rhs.Alignment;
+            return m_comp(l,r);
+        }
+
+    private:
+        Compare m_comp;
+};
+
+// pure ABC so we can just work polymorphically with any specific merger implementation
+class IMultiMerger {
+
+    public:
+        IMultiMerger(void) { }
+        virtual ~IMultiMerger(void) { }
+    public:
+        virtual void Add(MergeItem item) =0;
+        virtual void Clear(void) =0;
+        virtual const MergeItem& First(void) const =0;
+        virtual bool IsEmpty(void) const =0;
+        virtual void Remove(BamReader* reader) =0;
+        virtual int Size(void) const =0;
+        virtual MergeItem TakeFirst(void) =0;
+};
+
+// general merger
+template<typename Compare>
+class MultiMerger : public IMultiMerger {
+
+    public:
+        typedef Compare                      CompareType;
+        typedef MergeItemSorter<CompareType> MergeType;
+
+    public:
+        explicit MultiMerger(const Compare& comp = Compare())
+            : IMultiMerger()
+            , m_data( MergeType(comp) )
+        { }
+        ~MultiMerger(void) { }
+
+    public:
+        void Add(MergeItem item);
+        void Clear(void);
+        const MergeItem& First(void) const;
+        bool IsEmpty(void) const;
+        void Remove(BamReader* reader);
+        int Size(void) const;
+        MergeItem TakeFirst(void);
+
+    private:
+        typedef MergeItem                              ValueType;
+        typedef std::multiset<ValueType, MergeType>    ContainerType;
+        typedef typename ContainerType::iterator       DataIterator;
+        typedef typename ContainerType::const_iterator DataConstIterator;
+        ContainerType m_data;
+};
+
+template <typename Compare>
+inline void MultiMerger<Compare>::Add(MergeItem item) {
+
+    // N.B. - any future custom Compare types must define this method
+    //        see algorithms/Sort.h
+
+    if ( CompareType::UsesCharData() )
+        item.Alignment->BuildCharData();
+    m_data.insert(item);
+}
+
+template <typename Compare>
+inline void MultiMerger<Compare>::Clear(void) {
+    m_data.clear();
+}
+
+template <typename Compare>
+inline const MergeItem& MultiMerger<Compare>::First(void) const {
+    const ValueType& entry = (*m_data.begin());
+    return entry;
+}
+
+template <typename Compare>
+inline bool MultiMerger<Compare>::IsEmpty(void) const {
+    return m_data.empty();
+}
+template <typename Compare>
+inline void MultiMerger<Compare>::Remove(BamReader* reader) {
+
+    if ( reader == 0 ) return;
+    const std::string& filenameToRemove = reader->GetFilename();
+
+    // iterate over readers in cache
+    DataIterator dataIter = m_data.begin();
+    DataIterator dataEnd  = m_data.end();
+    for ( ; dataIter != dataEnd; ++dataIter ) {
+        const MergeItem& item = (*dataIter);
+        const BamReader* itemReader = item.Reader;
+        if ( itemReader == 0 ) continue;
+
+        // remove iterator on match
+        if ( itemReader->GetFilename() == filenameToRemove ) {
+            m_data.erase(dataIter);
+            return;
+        }
+    }
+}
+template <typename Compare>
+inline int MultiMerger<Compare>::Size(void) const {
+    return m_data.size();
+}
+
+template <typename Compare>
+inline MergeItem MultiMerger<Compare>::TakeFirst(void) {
+    DataIterator firstIter = m_data.begin();
+    MergeItem    firstItem = (*firstIter);
+    m_data.erase(firstIter);
+    return firstItem;
+}
+
+// unsorted "merger"
+template<>
+class MultiMerger<Algorithms::Sort::Unsorted> : public IMultiMerger {
+
+    public:
+        explicit MultiMerger(const Algorithms::Sort::Unsorted& comp = Algorithms::Sort::Unsorted())
+            : IMultiMerger()
+        { }
+        ~MultiMerger(void) { }
+
+    public:
+        void Add(MergeItem item);
+        void Clear(void);
+        const MergeItem& First(void) const;
+        bool IsEmpty(void) const;
+        void Remove(BamReader* reader);
+        int Size(void) const;
+        MergeItem TakeFirst(void);
+
+    private:
+        typedef MergeItem                     ValueType;
+        typedef std::deque<ValueType>         ContainerType;
+        typedef ContainerType::iterator       DataIterator;
+        typedef ContainerType::const_iterator DataConstIterator;
+        ContainerType m_data;
+};
+
+inline
+void MultiMerger<Algorithms::Sort::Unsorted>::Add(MergeItem item) {
+    m_data.push_back(item);
+}
+
+inline
+void MultiMerger<Algorithms::Sort::Unsorted>::Clear(void) {
+    m_data.clear();
+}
+
+inline
+const MergeItem& MultiMerger<Algorithms::Sort::Unsorted>::First(void) const {
+    return m_data.front();
+}
+
+inline
+bool MultiMerger<Algorithms::Sort::Unsorted>::IsEmpty(void) const {
+    return m_data.empty();
+}
+
+inline
+void MultiMerger<Algorithms::Sort::Unsorted>::Remove(BamReader* reader) {
+
+    if ( reader == 0 ) return;
+    const std::string filenameToRemove = reader->GetFilename();
+
+    // iterate over readers in cache
+    DataIterator dataIter = m_data.begin();
+    DataIterator dataEnd  = m_data.end();
+    for ( ; dataIter != dataEnd; ++dataIter ) {
+        const MergeItem& item = (*dataIter);
+        const BamReader* itemReader = item.Reader;
+        if ( itemReader == 0 ) continue;
+
+        // remove iterator on match
+        if ( itemReader->GetFilename() == filenameToRemove ) {
+            m_data.erase(dataIter);
+            return;
+        }
+    }
+}
+
+inline
+int MultiMerger<Algorithms::Sort::Unsorted>::Size(void) const {
+    return m_data.size();
+}
+
+inline
+MergeItem MultiMerger<Algorithms::Sort::Unsorted>::TakeFirst(void) {
+    MergeItem firstItem = m_data.front();
+    m_data.pop_front();
+    return firstItem;
+}
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // BAMMULTIMERGER_P_H
diff --git a/src/api/internal/bam/BamMultiReader_p.cpp b/src/api/internal/bam/BamMultiReader_p.cpp
new file mode 100644 (file)
index 0000000..d3f2b15
--- /dev/null
@@ -0,0 +1,799 @@
+// ***************************************************************************
+// BamMultiReader_p.cpp (c) 2010 Derek Barnett, Erik Garrison
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 25 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Functionality for simultaneously reading multiple BAM files
+// *************************************************************************
+
+#include "api/BamAlignment.h"
+#include "api/BamMultiReader.h"
+#include "api/SamConstants.h"
+#include "api/algorithms/Sort.h"
+#include "api/internal/bam/BamMultiReader_p.h"
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+#include <algorithm>
+#include <fstream>
+#include <iostream>
+#include <iterator>
+#include <sstream>
+using namespace std;
+
+// ctor
+BamMultiReaderPrivate::BamMultiReaderPrivate(void)
+    : m_alignmentCache(0)
+{ }
+
+// dtor
+BamMultiReaderPrivate::~BamMultiReaderPrivate(void) {
+    Close();
+}
+
+// close all BAM files
+bool BamMultiReaderPrivate::Close(void) {
+
+    m_errorString.clear();
+
+    if ( CloseFiles(Filenames()) )
+        return true;
+    else {
+        const string currentError = m_errorString;
+        const string message = string("error encountered while closing all files: \n\t") + currentError;
+        SetErrorString("BamMultiReader::Close", message);
+        return false;
+    }
+}
+
+// close requested BAM file
+bool BamMultiReaderPrivate::CloseFile(const string& filename) {
+
+    m_errorString.clear();
+
+    vector<string> filenames(1, filename);
+    if ( CloseFiles(filenames) )
+        return true;
+    else {
+        const string currentError = m_errorString;
+        const string message = string("error while closing file: ") + filename + "\n" + currentError;
+        SetErrorString("BamMultiReader::CloseFile", message);
+        return false;
+    }
+}
+
+// close requested BAM files
+bool BamMultiReaderPrivate::CloseFiles(const vector<string>& filenames) {
+
+    bool errorsEncountered = false;
+    m_errorString.clear();
+
+    // iterate over filenames
+    vector<string>::const_iterator filesIter = filenames.begin();
+    vector<string>::const_iterator filesEnd  = filenames.end();
+    for ( ; filesIter != filesEnd; ++filesIter ) {
+        const string& filename = (*filesIter);
+        if ( filename.empty() ) continue;
+
+        // iterate over readers
+        vector<MergeItem>::iterator readerIter = m_readers.begin();
+        vector<MergeItem>::iterator readerEnd  = m_readers.end();
+        for ( ; readerIter != readerEnd; ++readerIter ) {
+            MergeItem& item = (*readerIter);
+            BamReader* reader = item.Reader;
+            if ( reader == 0 ) continue;
+
+            // if reader matches requested filename
+            if ( reader->GetFilename() == filename ) {
+
+                // remove reader's entry from alignment cache
+                m_alignmentCache->Remove(reader);
+
+                // clean up reader & its alignment
+                if ( !reader->Close() ) {
+                    m_errorString.append(1, '\t');
+                    m_errorString.append(reader->GetErrorString());
+                    m_errorString.append(1, '\n');
+                    errorsEncountered = true;
+                }
+                delete reader;
+                reader = 0;
+
+                // delete reader's alignment entry
+                BamAlignment* alignment = item.Alignment;
+                delete alignment;
+                alignment = 0;
+
+                // remove reader from reader list
+                m_readers.erase(readerIter);
+
+                // on match, just go on to next filename
+                // (no need to keep looking and item iterator is invalid now anyway)
+                break;
+            }
+        }
+    }
+
+    // make sure alignment cache is cleaned up if all readers closed
+    if ( m_readers.empty() && m_alignmentCache ) {
+        m_alignmentCache->Clear();
+        delete m_alignmentCache;
+        m_alignmentCache = 0;
+    }
+
+    // return whether all readers closed OK
+    return !errorsEncountered;
+}
+
+// creates index files for BAM files that don't have them
+bool BamMultiReaderPrivate::CreateIndexes(const BamIndex::IndexType& type) {
+
+    bool errorsEncountered = false;
+    m_errorString.clear();
+
+    // iterate over readers
+    vector<MergeItem>::iterator itemIter = m_readers.begin();
+    vector<MergeItem>::iterator itemEnd  = m_readers.end();
+    for ( ; itemIter != itemEnd; ++itemIter ) {
+        MergeItem& item = (*itemIter);
+        BamReader* reader = item.Reader;
+        if ( reader == 0 ) continue;
+
+        // if reader doesn't have an index, create one
+        if ( !reader->HasIndex() ) {
+            if ( !reader->CreateIndex(type) ) {
+                m_errorString.append(1, '\t');
+                m_errorString.append(reader->GetErrorString());
+                m_errorString.append(1, '\n');
+                errorsEncountered = true;
+            }
+        }
+    }
+
+    // check for errors encountered before returning success/fail
+    if ( errorsEncountered ) {
+        const string currentError = m_errorString;
+        const string message = string("error while creating index files: ") + "\n" + currentError;
+        SetErrorString("BamMultiReader::CreateIndexes", message);
+        return false;
+    } else
+        return true;
+}
+
+IMultiMerger* BamMultiReaderPrivate::CreateAlignmentCache(void) const {
+
+    // fetch SamHeader
+    SamHeader header = GetHeader();
+
+    // if BAM files are sorted by position
+    if ( header.SortOrder == Constants::SAM_HD_SORTORDER_COORDINATE )
+        return new MultiMerger<Algorithms::Sort::ByPosition>();
+
+    // if BAM files are sorted by read name
+    if ( header.SortOrder == Constants::SAM_HD_SORTORDER_QUERYNAME )
+        return new MultiMerger<Algorithms::Sort::ByName>();
+
+    // otherwise "unknown" or "unsorted", use unsorted merger and just read in
+    return new MultiMerger<Algorithms::Sort::Unsorted>();
+}
+
+const vector<string> BamMultiReaderPrivate::Filenames(void) const {
+
+    // init filename container
+    vector<string> filenames;
+    filenames.reserve( m_readers.size() );
+
+    // iterate over readers
+    vector<MergeItem>::const_iterator itemIter = m_readers.begin();
+    vector<MergeItem>::const_iterator itemEnd  = m_readers.end();
+    for ( ; itemIter != itemEnd; ++itemIter ) {
+        const MergeItem& item = (*itemIter);
+        const BamReader* reader = item.Reader;
+        if ( reader == 0 ) continue;
+
+        // store filename if not empty
+        const string& filename = reader->GetFilename();
+        if ( !filename.empty() )
+            filenames.push_back(filename);
+    }
+
+    // return result
+    return filenames;
+}
+
+string BamMultiReaderPrivate::GetErrorString(void) const {
+    return m_errorString;
+}
+
+SamHeader BamMultiReaderPrivate::GetHeader(void) const {
+    const string& text = GetHeaderText();
+    return SamHeader(text);
+}
+
+// makes a virtual, unified header for all the bam files in the multireader
+string BamMultiReaderPrivate::GetHeaderText(void) const {
+
+    // N.B. - right now, simply copies all header data from first BAM,
+    //        and then appends RG's from other BAM files
+    // TODO: make this more intelligent wrt other header lines/fields
+
+    // if no readers open
+    const size_t numReaders = m_readers.size();
+    if ( numReaders == 0 ) return string();
+
+    // retrieve first reader's header
+    const MergeItem& firstItem = m_readers.front();
+    const BamReader* reader = firstItem.Reader;
+    if ( reader == 0 ) return string();
+    SamHeader mergedHeader = reader->GetHeader();
+
+    // iterate over any remaining readers (skipping the first)
+    for ( size_t i = 1; i < numReaders; ++i ) {
+        const MergeItem& item = m_readers.at(i);
+        const BamReader* reader = item.Reader;
+        if ( reader == 0 ) continue;
+
+        // retrieve current reader's header
+        const SamHeader currentHeader = reader->GetHeader();
+
+        // append current reader's RG entries to merged header
+        // N.B. - SamReadGroupDictionary handles duplicate-checking
+        mergedHeader.ReadGroups.Add(currentHeader.ReadGroups);
+
+        // TODO: merge anything else??
+    }
+
+    // return stringified header
+    return mergedHeader.ToString();
+}
+
+// get next alignment among all files
+bool BamMultiReaderPrivate::GetNextAlignment(BamAlignment& al) {
+    return PopNextCachedAlignment(al, true);
+}
+
+// get next alignment among all files without parsing character data from alignments
+bool BamMultiReaderPrivate::GetNextAlignmentCore(BamAlignment& al) {
+    return PopNextCachedAlignment(al, false);
+}
+
+// ---------------------------------------------------------------------------------------
+//
+// NB: The following GetReferenceX() functions assume that we have identical
+// references for all BAM files.  We enforce this by invoking the
+// ValidateReaders() method to verify that our reference data is the same
+// across all files on Open - so we will not encounter a situation in which
+// there is a mismatch and we are still live.
+//
+// ---------------------------------------------------------------------------------------
+
+// returns the number of reference sequences
+int BamMultiReaderPrivate::GetReferenceCount(void) const {
+
+    // handle empty multireader
+    if ( m_readers.empty() ) return 0;
+
+    // return reference count from first reader
+    const MergeItem& item = m_readers.front();
+    const BamReader* reader = item.Reader;
+    if ( reader == 0 ) return 0;
+    else
+        return reader->GetReferenceCount();
+}
+
+// returns vector of reference objects
+const RefVector BamMultiReaderPrivate::GetReferenceData(void) const {
+
+    // handle empty multireader
+    if ( m_readers.empty() ) return RefVector();
+
+    // return reference data from first BamReader
+    const MergeItem& item = m_readers.front();
+    const BamReader* reader = item.Reader;
+    if ( reader == 0 ) return RefVector();
+    else
+        return reader->GetReferenceData();
+}
+
+// returns refID from reference name
+int BamMultiReaderPrivate::GetReferenceID(const string& refName) const {
+
+    // handle empty multireader
+    if ( m_readers.empty() ) return -1;
+
+    // return reference ID from first BamReader
+    const MergeItem& item = m_readers.front();
+    const BamReader* reader = item.Reader;
+    if ( reader == 0 ) return -1;
+    else
+        return reader->GetReferenceID(refName);
+}
+// ---------------------------------------------------------------------------------------
+
+// returns true if all readers have index data available
+// this is useful to indicate whether Jump() or SetRegion() are possible
+bool BamMultiReaderPrivate::HasIndexes(void) const {
+
+    // handle empty multireader
+    if ( m_readers.empty() )
+        return false;
+
+    bool result = true;
+
+    // iterate over readers
+    vector<MergeItem>::const_iterator readerIter = m_readers.begin();
+    vector<MergeItem>::const_iterator readerEnd  = m_readers.end();
+    for ( ; readerIter != readerEnd; ++readerIter ) {
+        const MergeItem& item = (*readerIter);
+        const BamReader* reader = item.Reader;
+        if ( reader  == 0 ) continue;
+
+        // see if current reader has index data
+        result &= reader->HasIndex();
+    }
+
+    return result;
+}
+
+// returns true if multireader has open readers
+bool BamMultiReaderPrivate::HasOpenReaders(void) {
+
+    // iterate over readers
+    vector<MergeItem>::const_iterator readerIter = m_readers.begin();
+    vector<MergeItem>::const_iterator readerEnd  = m_readers.end();
+    for ( ; readerIter != readerEnd; ++readerIter ) {
+        const MergeItem& item = (*readerIter);
+        const BamReader* reader = item.Reader;
+        if ( reader == 0 ) continue;
+
+        // return true whenever an open reader is found
+        if ( reader->IsOpen() ) return true;
+    }
+
+    // no readers open
+    return false;
+}
+
+// performs random-access jump using (refID, position) as a left-bound
+bool BamMultiReaderPrivate::Jump(int refID, int position) {
+
+    // NB: While it may make sense to track readers in which we can
+    // successfully Jump, in practice a failure of Jump means "no
+    // alignments here."  It makes sense to simply accept the failure,
+    // UpdateAlignments(), and continue.
+
+    // iterate over readers
+    vector<MergeItem>::iterator readerIter = m_readers.begin();
+    vector<MergeItem>::iterator readerEnd  = m_readers.end();
+    for ( ; readerIter != readerEnd; ++readerIter ) {
+        MergeItem& item = (*readerIter);
+        BamReader* reader = item.Reader;
+        if ( reader == 0 ) continue;
+
+        // jump in each BamReader to position of interest
+        reader->Jump(refID, position);
+    }
+
+    // returns status of cache update
+    return UpdateAlignmentCache();
+}
+
+// locate (& load) index files for BAM readers that don't already have one loaded
+bool BamMultiReaderPrivate::LocateIndexes(const BamIndex::IndexType& preferredType) {
+
+    bool errorsEncountered = false;
+    m_errorString.clear();
+
+    // iterate over readers
+    vector<MergeItem>::iterator readerIter = m_readers.begin();
+    vector<MergeItem>::iterator readerEnd  = m_readers.end();
+    for ( ; readerIter != readerEnd; ++readerIter ) {
+        MergeItem& item = (*readerIter);
+        BamReader* reader = item.Reader;
+        if ( reader == 0 ) continue;
+
+        // if reader has no index, try to locate one
+        if ( !reader->HasIndex() ) {
+            if ( !reader->LocateIndex(preferredType) ) {
+                m_errorString.append(1, '\t');
+                m_errorString.append(reader->GetErrorString());
+                m_errorString.append(1, '\n');
+                errorsEncountered = true;
+            }
+        }
+    }
+
+    // check for errors encountered before returning success/fail
+    if ( errorsEncountered ) {
+        const string currentError = m_errorString;
+        const string message = string("error while locating index files: ") + "\n" + currentError;
+        SetErrorString("BamMultiReader::LocatingIndexes", message);
+        return false;
+    } else
+        return true;
+}
+
+// opens BAM files
+bool BamMultiReaderPrivate::Open(const vector<string>& filenames) {
+
+    m_errorString.clear();
+
+    // put all current readers back at beginning (refreshes alignment cache)
+    if ( !Rewind() ) {
+        const string currentError = m_errorString;
+        const string message = string("unable to rewind existing readers: \n\t") + currentError;
+        SetErrorString("BamMultiReader::Open", message);
+        return false;
+    }
+
+    // iterate over filenames
+    bool errorsEncountered = false;
+    vector<string>::const_iterator filenameIter = filenames.begin();
+    vector<string>::const_iterator filenameEnd  = filenames.end();
+    for ( ; filenameIter != filenameEnd; ++filenameIter ) {
+        const string& filename = (*filenameIter);
+        if ( filename.empty() ) continue;
+
+        // attempt to open BamReader
+        BamReader* reader = new BamReader;
+        const bool readerOpened = reader->Open(filename);
+
+        // if opened OK, store it
+        if ( readerOpened )
+            m_readers.push_back( MergeItem(reader, new BamAlignment) );
+
+        // otherwise store error & clean up invalid reader
+        else {
+            m_errorString.append(1, '\t');
+            m_errorString += string("unable to open file: ") + filename;
+            m_errorString.append(1, '\n');
+            errorsEncountered = true;
+
+            delete reader;
+            reader = 0;
+        }
+    }
+
+    // check for errors while opening
+    if ( errorsEncountered ) {
+        const string currentError = m_errorString;
+        const string message = string("unable to open all files: \t\n") + currentError;
+        SetErrorString("BamMultiReader::Open", message);
+        return false;
+    }
+
+    // check for BAM file consistency
+    if ( !ValidateReaders() ) {
+        const string currentError = m_errorString;
+        const string message = string("unable to open inconsistent files: \t\n") + currentError;
+        SetErrorString("BamMultiReader::Open", message);
+        return false;
+    }
+
+    // update alignment cache
+    return UpdateAlignmentCache();
+}
+
+bool BamMultiReaderPrivate::OpenFile(const std::string& filename) {
+    vector<string> filenames(1, filename);
+    if ( Open(filenames) )
+        return true;
+    else {
+        const string currentError = m_errorString;
+        const string message = string("could not open file: ") + filename + "\n\t" + currentError;
+        SetErrorString("BamMultiReader::OpenFile", message);
+        return false;
+    }
+}
+
+bool BamMultiReaderPrivate::OpenIndexes(const vector<string>& indexFilenames) {
+
+    // TODO: This needs to be cleaner - should not assume same order.
+    //       And either way, shouldn't start at first reader.  Should start at
+    //       first reader without an index?
+
+    // make sure same number of index filenames as readers
+    if ( m_readers.size() != indexFilenames.size() ) {
+        const string message("size of index file list does not match current BAM file count");
+        SetErrorString("BamMultiReader::OpenIndexes", message);
+        return false;
+    }
+
+    bool errorsEncountered = false;
+    m_errorString.clear();
+
+    // iterate over BamReaders
+    vector<string>::const_iterator indexFilenameIter = indexFilenames.begin();
+    vector<string>::const_iterator indexFilenameEnd  = indexFilenames.end();
+    vector<MergeItem>::iterator readerIter = m_readers.begin();
+    vector<MergeItem>::iterator readerEnd  = m_readers.end();
+    for ( ; readerIter != readerEnd; ++readerIter ) {
+        MergeItem& item = (*readerIter);
+        BamReader* reader = item.Reader;
+
+        // open index filename on reader
+        if ( reader ) {
+            const string& indexFilename = (*indexFilenameIter);
+            if ( !reader->OpenIndex(indexFilename) ) {
+                m_errorString.append(1, '\t');
+                m_errorString += reader->GetErrorString();
+                m_errorString.append(1, '\n');
+                errorsEncountered = true;
+            }
+        }
+
+        // increment filename iterator, skip if no more index files to open
+        if ( ++indexFilenameIter == indexFilenameEnd )
+            break;
+    }
+
+    // return success/fail
+    if ( errorsEncountered ) {
+        const string currentError = m_errorString;
+        const string message = string("could not open all index files: \n\t") + currentError;
+        SetErrorString("BamMultiReader::OpenIndexes", message);
+        return false;
+    } else
+        return true;
+}
+
+bool BamMultiReaderPrivate::PopNextCachedAlignment(BamAlignment& al, const bool needCharData) {
+
+    // skip if no alignments available
+    if ( m_alignmentCache == 0 || m_alignmentCache->IsEmpty() )
+        return false;
+
+    // pop next merge item entry from cache
+    MergeItem item = m_alignmentCache->TakeFirst();
+    BamReader* reader = item.Reader;
+    BamAlignment* alignment = item.Alignment;
+    if ( reader == 0 || alignment == 0 )
+        return false;
+
+    // set char data if requested
+    if ( needCharData ) {
+        alignment->BuildCharData();
+        alignment->Filename = reader->GetFilename();
+    }
+
+    // store cached alignment into destination parameter (by copy)
+    al = *alignment;
+
+    // load next alignment from reader & store in cache
+    SaveNextAlignment(reader, alignment);
+    return true;
+}
+
+// returns BAM file pointers to beginning of alignment data & resets alignment cache
+bool BamMultiReaderPrivate::Rewind(void) {
+
+    // skip if no readers open
+    if ( m_readers.empty() )
+        return true;
+
+    // attempt to rewind files
+    if ( !RewindReaders() ) {
+        const string currentError = m_errorString;
+        const string message = string("could not rewind readers: \n\t") + currentError;
+        SetErrorString("BamMultiReader::Rewind", message);
+        return false;
+    }
+
+    // return status of cache update
+    return UpdateAlignmentCache();
+}
+
+// returns BAM file pointers to beginning of alignment data
+bool BamMultiReaderPrivate::RewindReaders(void) {
+
+    m_errorString.clear();
+    bool errorsEncountered = false;
+
+    // iterate over readers
+    vector<MergeItem>::iterator readerIter = m_readers.begin();
+    vector<MergeItem>::iterator readerEnd  = m_readers.end();
+    for ( ; readerIter != readerEnd; ++readerIter ) {
+        MergeItem& item = (*readerIter);
+        BamReader* reader = item.Reader;
+        if ( reader == 0 ) continue;
+
+        // attempt rewind on BamReader
+        if ( !reader->Rewind() ) {
+            m_errorString.append(1, '\t');
+            m_errorString.append( reader->GetErrorString() );
+            m_errorString.append(1, '\n');
+            errorsEncountered = true;
+        }
+    }
+
+    return !errorsEncountered;
+}
+
+void BamMultiReaderPrivate::SaveNextAlignment(BamReader* reader, BamAlignment* alignment) {
+
+    // if can read alignment from reader, store in cache
+    //
+    // N.B. - lazy building of alignment's char data - populated only:
+    //        automatically by alignment cache to maintain its sorting OR
+    //        on demand from client call to future call to GetNextAlignment()
+
+    if ( reader->GetNextAlignmentCore(*alignment) )
+        m_alignmentCache->Add( MergeItem(reader, alignment) );
+}
+
+void BamMultiReaderPrivate::SetErrorString(const string& where, const string& what) const {
+    static const string SEPARATOR = ": ";
+    m_errorString = where + SEPARATOR + what;
+}
+
+bool BamMultiReaderPrivate::SetRegion(const BamRegion& region) {
+
+    // NB: While it may make sense to track readers in which we can
+    // successfully SetRegion, In practice a failure of SetRegion means "no
+    // alignments here."  It makes sense to simply accept the failure,
+    // UpdateAlignments(), and continue.
+
+    // iterate over alignments
+    vector<MergeItem>::iterator readerIter = m_readers.begin();
+    vector<MergeItem>::iterator readerEnd  = m_readers.end();
+    for ( ; readerIter != readerEnd; ++readerIter ) {
+        MergeItem& item = (*readerIter);
+        BamReader* reader = item.Reader;
+        if ( reader == 0 ) continue;
+
+        // set region of interest
+        reader->SetRegion(region);
+    }
+
+    // return status of cache update
+    return UpdateAlignmentCache();
+}
+
+// updates our alignment cache
+bool BamMultiReaderPrivate::UpdateAlignmentCache(void) {
+
+    // create alignment cache if not created yet
+    if ( m_alignmentCache == 0 ) {
+        m_alignmentCache = CreateAlignmentCache();
+        if ( m_alignmentCache == 0 ) {
+            SetErrorString("BamMultiReader::UpdateAlignmentCache", "unable to create new alignment cache");
+            return false;
+        }
+    }
+
+    // clear any prior cache data
+    m_alignmentCache->Clear();
+
+    // iterate over readers
+    vector<MergeItem>::iterator readerIter = m_readers.begin();
+    vector<MergeItem>::iterator readerEnd  = m_readers.end();
+    for ( ; readerIter != readerEnd; ++readerIter ) {
+        MergeItem& item = (*readerIter);
+        BamReader* reader = item.Reader;
+        BamAlignment* alignment = item.Alignment;
+        if ( reader == 0 || alignment == 0 ) continue;
+
+        // save next alignment from each reader in cache
+        SaveNextAlignment(reader, alignment);
+    }
+
+    // if we get here, ok
+    return true;
+}
+
+// ValidateReaders checks that all the readers point to BAM files representing
+// alignments against the same set of reference sequences, and that the
+// sequences are identically ordered.  If these checks fail the operation of
+// the multireader is undefined, so we force program exit.
+bool BamMultiReaderPrivate::ValidateReaders(void) const {
+
+    m_errorString.clear();
+
+    // skip if 0 or 1 readers opened
+    if ( m_readers.empty() || (m_readers.size() == 1) )
+        return true;
+
+    // retrieve first reader
+    const MergeItem& firstItem = m_readers.front();
+    const BamReader* firstReader = firstItem.Reader;
+    if ( firstReader == 0 ) return false;
+
+    // retrieve first reader's header data
+    const SamHeader& firstReaderHeader = firstReader->GetHeader();
+    const string& firstReaderSortOrder = firstReaderHeader.SortOrder;
+
+    // retrieve first reader's reference data
+    const RefVector& firstReaderRefData = firstReader->GetReferenceData();
+    const int firstReaderRefCount = firstReader->GetReferenceCount();
+    const int firstReaderRefSize = firstReaderRefData.size();
+
+    // iterate over all readers
+    vector<MergeItem>::const_iterator readerIter = m_readers.begin();
+    vector<MergeItem>::const_iterator readerEnd  = m_readers.end();
+    for ( ; readerIter != readerEnd; ++readerIter ) {
+        const MergeItem& item = (*readerIter);
+        BamReader* reader = item.Reader;
+        if ( reader == 0 ) continue;
+
+        // get current reader's header data
+        const SamHeader& currentReaderHeader = reader->GetHeader();
+        const string& currentReaderSortOrder = currentReaderHeader.SortOrder;
+
+        // check compatible sort order
+        if ( currentReaderSortOrder != firstReaderSortOrder ) {
+            const string message = string("mismatched sort order in ") + reader->GetFilename() +
+                                   ", expected " + firstReaderSortOrder +
+                                   ", but found " + currentReaderSortOrder;
+            SetErrorString("BamMultiReader::ValidateReaders", message);
+            return false;
+        }
+
+        // get current reader's reference data
+        const RefVector currentReaderRefData = reader->GetReferenceData();
+        const int currentReaderRefCount = reader->GetReferenceCount();
+        const int currentReaderRefSize  = currentReaderRefData.size();
+
+        // init reference data iterators
+        RefVector::const_iterator firstRefIter   = firstReaderRefData.begin();
+        RefVector::const_iterator firstRefEnd    = firstReaderRefData.end();
+        RefVector::const_iterator currentRefIter = currentReaderRefData.begin();
+
+        // compare reference counts from BamReader ( & container size, in case of BR error)
+        if ( (currentReaderRefCount != firstReaderRefCount) ||
+             (firstReaderRefSize    != currentReaderRefSize) )
+        {
+            stringstream s("");
+            s << "mismatched reference count in " << reader->GetFilename()
+              << ", expected " << firstReaderRefCount
+              << ", but found " << currentReaderRefCount;
+            SetErrorString("BamMultiReader::ValidateReaders", s.str());
+            return false;
+        }
+
+        // this will be ok; we just checked above that we have identically-sized sets of references
+        // here we simply check if they are all, in fact, equal in content
+        while ( firstRefIter != firstRefEnd ) {
+            const RefData& firstRef   = (*firstRefIter);
+            const RefData& currentRef = (*currentRefIter);
+
+            // compare reference name & length
+            if ( (firstRef.RefName   != currentRef.RefName) ||
+                 (firstRef.RefLength != currentRef.RefLength) )
+            {
+                stringstream s("");
+                s << "mismatched references found in" << reader->GetFilename()
+                  << "expected: " << endl;
+
+                // print first reader's reference data
+                RefVector::const_iterator refIter = firstReaderRefData.begin();
+                RefVector::const_iterator refEnd  = firstReaderRefData.end();
+                for ( ; refIter != refEnd; ++refIter ) {
+                    const RefData& entry = (*refIter);
+                    stringstream s("");
+                    s << entry.RefName << " " << endl;
+                }
+
+                s << "but found: " << endl;
+
+                // print current reader's reference data
+                refIter = currentReaderRefData.begin();
+                refEnd  = currentReaderRefData.end();
+                for ( ; refIter != refEnd; ++refIter ) {
+                    const RefData& entry = (*refIter);
+                    s << entry.RefName << " " << entry.RefLength << endl;
+                }
+
+                SetErrorString("BamMultiReader::ValidateReaders", s.str());
+                return false;
+            }
+
+            // update iterators
+            ++firstRefIter;
+            ++currentRefIter;
+        }
+    }
+
+    // if we get here, everything checks out
+    return true;
+}
diff --git a/src/api/internal/bam/BamMultiReader_p.h b/src/api/internal/bam/BamMultiReader_p.h
new file mode 100644 (file)
index 0000000..9d7c39a
--- /dev/null
@@ -0,0 +1,99 @@
+// ***************************************************************************
+// BamMultiReader_p.h (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 25 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Functionality for simultaneously reading multiple BAM files
+// *************************************************************************
+
+#ifndef BAMMULTIREADER_P_H
+#define BAMMULTIREADER_P_H
+
+//  -------------
+//  W A R N I N G
+//  -------------
+//
+// This file is not part of the BamTools API.  It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include "api/SamHeader.h"
+#include "api/BamMultiReader.h"
+#include "api/internal/bam/BamMultiMerger_p.h"
+#include <string>
+#include <vector>
+
+namespace BamTools {
+namespace Internal {
+
+class BamMultiReaderPrivate {
+
+    // typedefs
+    public:
+        typedef std::pair<BamReader*, BamAlignment*> ReaderAlignment;
+
+    // constructor / destructor
+    public:
+        BamMultiReaderPrivate(void);
+        ~BamMultiReaderPrivate(void);
+
+    // public interface
+    public:
+
+        // file operations
+        bool Close(void);
+        bool CloseFile(const std::string& filename);
+        const std::vector<std::string> Filenames(void) const;
+        bool Jump(int refID, int position = 0);
+        bool Open(const std::vector<std::string>& filenames);
+        bool OpenFile(const std::string& filename);
+        bool Rewind(void);
+        bool SetRegion(const BamRegion& region);
+
+        // access alignment data
+        bool GetNextAlignment(BamAlignment& al);
+        bool GetNextAlignmentCore(BamAlignment& al);
+        bool HasOpenReaders(void);
+
+        // access auxiliary data
+        SamHeader GetHeader(void) const;
+        std::string GetHeaderText(void) const;
+        int GetReferenceCount(void) const;
+        const BamTools::RefVector GetReferenceData(void) const;
+        int GetReferenceID(const std::string& refName) const;
+
+        // BAM index operations
+        bool CreateIndexes(const BamIndex::IndexType& type = BamIndex::STANDARD);
+        bool HasIndexes(void) const;
+        bool LocateIndexes(const BamIndex::IndexType& preferredType = BamIndex::STANDARD);
+        bool OpenIndexes(const std::vector<std::string>& indexFilenames);
+
+        // error handling
+        std::string GetErrorString(void) const;
+
+    // 'internal' methods
+    public:
+
+        bool CloseFiles(const std::vector<std::string>& filenames);
+        IMultiMerger* CreateAlignmentCache(void) const;
+        bool PopNextCachedAlignment(BamAlignment& al, const bool needCharData);
+        bool RewindReaders(void);
+        void SaveNextAlignment(BamReader* reader, BamAlignment* alignment);
+        void SetErrorString(const std::string& where, const std::string& what) const; //
+        bool UpdateAlignmentCache(void);
+        bool ValidateReaders(void) const;
+
+    // data members
+    public:
+        std::vector<MergeItem> m_readers;
+        IMultiMerger* m_alignmentCache;
+        mutable std::string m_errorString;
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // BAMMULTIREADER_P_H
diff --git a/src/api/internal/bam/BamRandomAccessController_p.cpp b/src/api/internal/bam/BamRandomAccessController_p.cpp
new file mode 100644 (file)
index 0000000..848fafd
--- /dev/null
@@ -0,0 +1,289 @@
+// ***************************************************************************
+// BamRandomAccessController_p.cpp (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 25 October 2011(DB)
+// ---------------------------------------------------------------------------
+// Manages random access operations in a BAM file
+// **************************************************************************
+
+#include "api/BamIndex.h"
+#include "api/internal/bam/BamRandomAccessController_p.h"
+#include "api/internal/bam/BamReader_p.h"
+#include "api/internal/index/BamIndexFactory_p.h"
+#include "api/internal/utils/BamException_p.h"
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+#include <cassert>
+#include <sstream>
+using namespace std;
+
+BamRandomAccessController::BamRandomAccessController(void)
+    : m_index(0)
+    , m_hasAlignmentsInRegion(true)
+{ }
+
+BamRandomAccessController::~BamRandomAccessController(void) {
+    Close();
+}
+
+void BamRandomAccessController::AdjustRegion(const int& referenceCount) {
+
+    // skip if no index available
+    if ( m_index == 0 )
+        return;
+
+    // see if any references in region have alignments
+    m_hasAlignmentsInRegion = false;
+    int currentId = m_region.LeftRefID;
+    const int rightBoundRefId = ( m_region.isRightBoundSpecified() ? m_region.RightRefID : referenceCount - 1 );
+    while ( currentId <= rightBoundRefId ) {
+        m_hasAlignmentsInRegion = m_index->HasAlignments(currentId);
+        if ( m_hasAlignmentsInRegion ) break;
+        ++currentId;
+    }
+
+    // if no data found on any reference in region
+    if ( !m_hasAlignmentsInRegion )
+        return;
+
+    // if left bound of desired region had no data, use first reference that had data
+    // otherwise, leave requested region as-is
+    if ( currentId != m_region.LeftRefID ) {
+        m_region.LeftRefID = currentId;
+        m_region.LeftPosition = 0;
+    }
+}
+
+// returns alignments' "RegionState": { Before|Overlaps|After } current region
+BamRandomAccessController::RegionState
+BamRandomAccessController::AlignmentState(const BamAlignment& alignment) const {
+
+    // if region has no left bound at all
+    if ( !m_region.isLeftBoundSpecified() )
+        return OverlapsRegion;
+
+    // handle unmapped reads - return AFTER region to halt processing
+    if ( alignment.RefID == -1 )
+        return AfterRegion;
+
+    // if alignment is on any reference before left bound reference
+    if ( alignment.RefID < m_region.LeftRefID )
+        return BeforeRegion;
+
+    // if alignment is on left bound reference
+    else if ( alignment.RefID == m_region.LeftRefID ) {
+
+        // if alignment starts at or after left bound position
+        if ( alignment.Position >= m_region.LeftPosition) {
+
+            if ( m_region.isRightBoundSpecified() &&             // right bound is specified AND
+                 m_region.LeftRefID == m_region.RightRefID &&    // left & right bounds on same reference AND
+                 alignment.Position >= m_region.RightPosition )  // alignment starts on or after right bound position
+                return AfterRegion;
+
+            // otherwise, alignment overlaps region
+            else return OverlapsRegion;
+        }
+
+        // alignment starts before left bound position
+        else {
+
+            // if alignment overlaps left bound position
+            if ( alignment.GetEndPosition() > m_region.LeftPosition )
+                return OverlapsRegion;
+            else
+                return BeforeRegion;
+        }
+    }
+
+    // otherwise alignment is on a reference after left bound reference
+    else {
+
+        // if region has a right bound
+        if ( m_region.isRightBoundSpecified() ) {
+
+            // alignment is on any reference between boundaries
+            if ( alignment.RefID < m_region.RightRefID )
+                return OverlapsRegion;
+
+            // alignment is on any reference after right boundary
+            else if ( alignment.RefID > m_region.RightRefID )
+                return AfterRegion;
+
+            // alignment is on right bound reference
+            else {
+
+                // if alignment starts before right bound position
+                if ( alignment.Position < m_region.RightPosition )
+                    return OverlapsRegion;
+                else
+                    return AfterRegion;
+            }
+        }
+
+        // otherwise, alignment starts after left bound and there is no right bound given
+        else return OverlapsRegion;
+    }
+}
+
+void BamRandomAccessController::Close(void) {
+    ClearIndex();
+    ClearRegion();
+}
+
+void BamRandomAccessController::ClearIndex(void) {
+    if ( m_index ) {
+        delete m_index;
+        m_index = 0;
+    }
+}
+
+void BamRandomAccessController::ClearRegion(void) {
+    m_region.clear();
+    m_hasAlignmentsInRegion = true;
+}
+
+bool BamRandomAccessController::CreateIndex(BamReaderPrivate* reader,
+                                            const BamIndex::IndexType& type)
+{
+    // skip if reader is invalid
+    assert(reader);
+    if ( !reader->IsOpen() ) {
+        SetErrorString("BamRandomAccessController::CreateIndex",
+                       "cannot create index for unopened reader");
+        return false;
+    }
+
+    // create new index of requested type
+    BamIndex* newIndex = BamIndexFactory::CreateIndexOfType(type, reader);
+    if ( newIndex == 0 ) {
+        stringstream s("");
+        s << "could not create index of type: " << type;
+        SetErrorString("BamRandomAccessController::CreateIndex", s.str());
+        return false;
+    }
+
+    // attempt to build index from current BamReader file
+    if ( !newIndex->Create() ) {
+        const string indexError = newIndex->GetErrorString();
+        const string message = "could not create index: \n\t" + indexError;
+        SetErrorString("BamRandomAccessController::CreateIndex", message);
+        return false;
+    }
+
+    // save new index & return success
+    SetIndex(newIndex);
+    return true;
+}
+
+string BamRandomAccessController::GetErrorString(void) const {
+    return m_errorString;
+}
+
+bool BamRandomAccessController::HasIndex(void) const {
+    return ( m_index != 0 );
+}
+
+bool BamRandomAccessController::HasRegion(void) const  {
+    return ( !m_region.isNull() );
+}
+
+bool BamRandomAccessController::IndexHasAlignmentsForReference(const int& refId) {
+    return m_index->HasAlignments(refId);
+}
+
+bool BamRandomAccessController::LocateIndex(BamReaderPrivate* reader,
+                                            const BamIndex::IndexType& preferredType)
+{
+    // look up index filename, deferring to preferredType if possible
+    assert(reader);
+    const string& indexFilename = BamIndexFactory::FindIndexFilename(reader->Filename(), preferredType);
+
+    // if no index file found (of any type)
+    if ( indexFilename.empty() ) {
+        const string message = string("could not find index file for:") + reader->Filename();
+        SetErrorString("BamRandomAccessController::LocateIndex", message);
+        return false;
+    }
+
+    // otherwise open & use index file that was found
+    return OpenIndex(indexFilename, reader);
+}
+
+bool BamRandomAccessController::OpenIndex(const string& indexFilename, BamReaderPrivate* reader) {
+
+    // attempt create new index of type based on filename
+    BamIndex* index = BamIndexFactory::CreateIndexFromFilename(indexFilename, reader);
+    if ( index == 0 ) {
+        const string message = string("could not open index file: ") + indexFilename;
+        SetErrorString("BamRandomAccessController::OpenIndex", message);
+        return false;
+    }
+
+    // attempt to load data from index file
+    if ( !index->Load(indexFilename) ) {
+        const string indexError = index->GetErrorString();
+        const string message = string("could not load index data from file: ") + indexFilename +
+                               "\n\t" + indexError;
+        SetErrorString("BamRandomAccessController::OpenIndex", message);
+        return false;
+    }
+
+    // save new index & return success
+    SetIndex(index);
+    return true;
+}
+
+bool BamRandomAccessController::RegionHasAlignments(void) const {
+    return m_hasAlignmentsInRegion;
+}
+
+void BamRandomAccessController::SetErrorString(const string& where, const string& what) {
+    m_errorString = where + ": " + what;
+}
+
+void BamRandomAccessController::SetIndex(BamIndex* index) {
+    if ( m_index )
+        ClearIndex();
+    m_index = index;
+}
+
+bool BamRandomAccessController::SetRegion(const BamRegion& region, const int& referenceCount) {
+
+    // store region
+    m_region = region;
+
+    // cannot jump when no index is available
+    if ( !HasIndex() ) {
+        SetErrorString("BamRandomAccessController", "cannot jump if no index data available");
+        return false;
+    }
+
+    // adjust region as necessary to reflect where data actually begins
+    AdjustRegion(referenceCount);
+
+    // if no data present, return true
+    //   * Not an error, but future attempts to access alignments in this region will not return data
+    //     Returning true is useful in a BamMultiReader setting where some BAM files may
+    //     lack alignments in regions where other files still have data available.
+    if ( !m_hasAlignmentsInRegion )
+        return true;
+
+    // return success/failure of jump to specified region,
+    //
+    //  * Index::Jump() is allowed to modify the m_hasAlignmentsInRegion flag
+    //    This covers 'corner case' where a region is requested that lies beyond the last
+    //    alignment on a reference. If this occurs, any subsequent calls to GetNextAlignment[Core]
+    //    will not return data. BamMultiReader will still be able to successfully pull alignments
+    //    from a region from other files even if this one has no data.
+    if ( !m_index->Jump(m_region, &m_hasAlignmentsInRegion) ) {
+        const string indexError = m_index->GetErrorString();
+        const string message = string("could not set region\n\t") + indexError;
+        SetErrorString("BamRandomAccessController::OpenIndex", message);
+        return false;
+    }
+    else
+        return true;
+}
diff --git a/src/api/internal/bam/BamRandomAccessController_p.h b/src/api/internal/bam/BamRandomAccessController_p.h
new file mode 100644 (file)
index 0000000..9262a61
--- /dev/null
@@ -0,0 +1,94 @@
+// ***************************************************************************
+// BamRandomAccessController_p.h (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 October 2011(DB)
+// ---------------------------------------------------------------------------
+// Manages random access operations in a BAM file
+// ***************************************************************************
+
+#ifndef BAMRACONTROLLER_P_H
+#define BAMRACONTROLLER_P_H
+
+//  -------------
+//  W A R N I N G
+//  -------------
+//
+// This file is not part of the BamTools API.  It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include "api/BamAux.h"
+#include "api/BamIndex.h"
+
+namespace BamTools {
+
+class BamAlignment;
+
+namespace Internal {
+
+class BamReaderPrivate;
+
+class BamRandomAccessController {
+
+    // enums
+    public: enum RegionState { BeforeRegion = 0
+                             , OverlapsRegion
+                             , AfterRegion
+                             };
+
+    // ctor & dtor
+    public:
+        BamRandomAccessController(void);
+        ~BamRandomAccessController(void);
+
+    // BamRandomAccessController interface
+    public:
+
+        // index methods
+        void ClearIndex(void);
+        bool CreateIndex(BamReaderPrivate* reader, const BamIndex::IndexType& type);
+        bool HasIndex(void) const;
+        bool IndexHasAlignmentsForReference(const int& refId);
+        bool LocateIndex(BamReaderPrivate* reader, const BamIndex::IndexType& preferredType);
+        bool OpenIndex(const std::string& indexFilename, BamReaderPrivate* reader);
+        void SetIndex(BamIndex* index);
+
+        // region methods
+        void ClearRegion(void);
+        bool HasRegion(void) const;
+        RegionState AlignmentState(const BamAlignment& alignment) const;
+        bool RegionHasAlignments(void) const;
+        bool SetRegion(const BamRegion& region, const int& referenceCount);
+
+        // general methods
+        void Close(void);
+        std::string GetErrorString(void) const;
+
+    // internal methods
+    private:
+        // adjusts requested region if necessary (depending on where data actually begins)
+        void AdjustRegion(const int& referenceCount);
+        // error-string handling
+        void SetErrorString(const std::string& where, const std::string& what);
+
+    // data members
+    private:
+
+        // index data
+        BamIndex* m_index;  // owns the index, not a copy - responsible for deleting
+
+        // region data
+        BamRegion m_region;
+        bool m_hasAlignmentsInRegion;
+
+        // general data
+        std::string m_errorString;
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // BAMRACONTROLLER_P_H
diff --git a/src/api/internal/bam/BamReader_p.cpp b/src/api/internal/bam/BamReader_p.cpp
new file mode 100644 (file)
index 0000000..6484a10
--- /dev/null
@@ -0,0 +1,466 @@
+// ***************************************************************************
+// BamReader_p.cpp (c) 2009 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 25 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides the basic functionality for reading BAM files
+// ***************************************************************************
+
+#include "api/BamConstants.h"
+#include "api/BamReader.h"
+#include "api/IBamIODevice.h"
+#include "api/internal/bam/BamHeader_p.h"
+#include "api/internal/bam/BamRandomAccessController_p.h"
+#include "api/internal/bam/BamReader_p.h"
+#include "api/internal/index/BamStandardIndex_p.h"
+#include "api/internal/index/BamToolsIndex_p.h"
+#include "api/internal/io/BamDeviceFactory_p.h"
+#include "api/internal/utils/BamException_p.h"
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+#include <algorithm>
+#include <cassert>
+#include <iostream>
+#include <iterator>
+#include <vector>
+using namespace std;
+
+// constructor
+BamReaderPrivate::BamReaderPrivate(BamReader* parent)
+    : m_alignmentsBeginOffset(0)
+    , m_parent(parent)
+{
+    m_isBigEndian = BamTools::SystemIsBigEndian();
+}
+
+// destructor
+BamReaderPrivate::~BamReaderPrivate(void) {
+    Close();
+}
+
+// closes the BAM file
+bool BamReaderPrivate::Close(void) {
+
+    // clear BAM metadata
+    m_references.clear();
+    m_header.Clear();
+
+    // clear filename
+    m_filename.clear();
+
+    // close random access controller
+    m_randomAccessController.Close();
+
+    // if stream is open, attempt close
+    if ( IsOpen() ) {
+        try {
+            m_stream.Close();
+        } catch ( BamException& e ) {
+            const string streamError = e.what();
+            const string message = string("encountered error closing BAM file: \n\t") + streamError;
+            SetErrorString("BamReader::Close", message);
+            return false;
+        }
+    }
+
+    // return success
+    return true;
+}
+
+// creates an index file of requested type on current BAM file
+bool BamReaderPrivate::CreateIndex(const BamIndex::IndexType& type) {
+
+    // skip if BAM file not open
+    if ( !IsOpen() ) {
+        SetErrorString("BamReader::CreateIndex", "cannot create index on unopened BAM file");
+        return false;
+    }
+
+    // attempt to create index
+    if ( m_randomAccessController.CreateIndex(this, type) )
+        return true;
+    else {
+        const string bracError = m_randomAccessController.GetErrorString();
+        const string message = string("could not create index: \n\t") + bracError;
+        SetErrorString("BamReader::CreateIndex", message);
+        return false;
+    }
+}
+
+// return path & filename of current BAM file
+const string BamReaderPrivate::Filename(void) const {
+    return m_filename;
+}
+
+string BamReaderPrivate::GetErrorString(void) const {
+    return m_errorString;
+}
+
+// return header data as std::string
+string BamReaderPrivate::GetHeaderText(void) const {
+    return m_header.ToString();
+}
+
+// return header data as SamHeader object
+SamHeader BamReaderPrivate::GetSamHeader(void) const {
+    return m_header.ToSamHeader();
+}
+
+// get next alignment (with character data fully parsed)
+bool BamReaderPrivate::GetNextAlignment(BamAlignment& alignment) {
+
+    // if valid alignment found
+    if ( GetNextAlignmentCore(alignment) ) {
+
+        // store alignment's "source" filename
+        alignment.Filename = m_filename;
+
+        // return success/failure of parsing char data
+        if ( alignment.BuildCharData() )
+            return true;
+        else {
+            const string alError = alignment.GetErrorString();
+            const string message = string("could not populate alignment data: \n\t") + alError;
+            SetErrorString("BamReader::GetNextAlignment", message);
+            return false;
+        }
+    }
+
+    // no valid alignment found
+    return false;
+}
+
+// retrieves next available alignment core data (returns success/fail)
+// ** DOES NOT populate any character data fields (read name, bases, qualities, tag data, filename)
+//    these can be accessed, if necessary, from the supportData
+// useful for operations requiring ONLY positional or other alignment-related information
+bool BamReaderPrivate::GetNextAlignmentCore(BamAlignment& alignment) {
+
+    // skip if stream not opened
+    if ( !m_stream.IsOpen() )
+        return false;
+
+    try {
+
+        // skip if region is set but has no alignments
+        if ( m_randomAccessController.HasRegion() &&
+             !m_randomAccessController.RegionHasAlignments() )
+        {
+            return false;
+        }
+
+        // if can't read next alignment
+        if ( !LoadNextAlignment(alignment) )
+            return false;
+
+        // check alignment's region-overlap state
+        BamRandomAccessController::RegionState state = m_randomAccessController.AlignmentState(alignment);
+
+        // if alignment starts after region, no need to keep reading
+        if ( state == BamRandomAccessController::AfterRegion )
+            return false;
+
+        // read until overlap is found
+        while ( state != BamRandomAccessController::OverlapsRegion ) {
+
+            // if can't read next alignment
+            if ( !LoadNextAlignment(alignment) )
+                return false;
+
+            // check alignment's region-overlap state
+            state = m_randomAccessController.AlignmentState(alignment);
+
+            // if alignment starts after region, no need to keep reading
+            if ( state == BamRandomAccessController::AfterRegion )
+                return false;
+        }
+
+        // if we get here, we found the next 'valid' alignment
+        // (e.g. overlaps current region if one was set, simply the next alignment if not)
+        alignment.SupportData.HasCoreOnly = true;
+        return true;
+
+    } catch ( BamException& e ) {
+        const string streamError = e.what();
+        const string message = string("encountered error reading BAM alignment: \n\t") + streamError;
+        SetErrorString("BamReader::GetNextAlignmentCore", message);
+        return false;
+    }
+}
+
+int BamReaderPrivate::GetReferenceCount(void) const {
+    return m_references.size();
+}
+
+const RefVector& BamReaderPrivate::GetReferenceData(void) const {
+    return m_references;
+}
+
+// returns RefID for given RefName (returns References.size() if not found)
+int BamReaderPrivate::GetReferenceID(const string& refName) const {
+
+    // retrieve names from reference data
+    vector<string> refNames;
+    RefVector::const_iterator refIter = m_references.begin();
+    RefVector::const_iterator refEnd  = m_references.end();
+    for ( ; refIter != refEnd; ++refIter)
+        refNames.push_back( (*refIter).RefName );
+
+    // return 'index-of' refName (or -1 if not found)
+    int index = distance(refNames.begin(), find(refNames.begin(), refNames.end(), refName));
+    if ( index == (int)m_references.size() ) return -1;
+    else return index;
+}
+
+bool BamReaderPrivate::HasIndex(void) const {
+    return m_randomAccessController.HasIndex();
+}
+
+bool BamReaderPrivate::IsOpen(void) const {
+    return m_stream.IsOpen();
+}
+
+// load BAM header data
+void BamReaderPrivate::LoadHeaderData(void) {
+    m_header.Load(&m_stream);
+}
+
+// populates BamAlignment with alignment data under file pointer, returns success/fail
+bool BamReaderPrivate::LoadNextAlignment(BamAlignment& alignment) {
+
+    // read in the 'block length' value, make sure it's not zero
+    char buffer[sizeof(uint32_t)];
+    m_stream.Read(buffer, sizeof(uint32_t));
+    alignment.SupportData.BlockLength = BamTools::UnpackUnsignedInt(buffer);
+    if ( m_isBigEndian ) BamTools::SwapEndian_32(alignment.SupportData.BlockLength);
+    if ( alignment.SupportData.BlockLength == 0 )
+        return false;
+
+    // read in core alignment data, make sure the right size of data was read
+    char x[Constants::BAM_CORE_SIZE];
+    if ( m_stream.Read(x, Constants::BAM_CORE_SIZE) != Constants::BAM_CORE_SIZE )
+        return false;
+
+    // swap core endian-ness if necessary
+    if ( m_isBigEndian ) {
+        for ( unsigned int i = 0; i < Constants::BAM_CORE_SIZE; i+=sizeof(uint32_t) )
+            BamTools::SwapEndian_32p(&x[i]);
+    }
+
+    // set BamAlignment 'core' and 'support' data
+    alignment.RefID    = BamTools::UnpackSignedInt(&x[0]);
+    alignment.Position = BamTools::UnpackSignedInt(&x[4]);
+
+    unsigned int tempValue = BamTools::UnpackUnsignedInt(&x[8]);
+    alignment.Bin        = tempValue >> 16;
+    alignment.MapQuality = tempValue >> 8 & 0xff;
+    alignment.SupportData.QueryNameLength = tempValue & 0xff;
+
+    tempValue = BamTools::UnpackUnsignedInt(&x[12]);
+    alignment.AlignmentFlag = tempValue >> 16;
+    alignment.SupportData.NumCigarOperations = tempValue & 0xffff;
+
+    alignment.SupportData.QuerySequenceLength = BamTools::UnpackUnsignedInt(&x[16]);
+    alignment.MateRefID    = BamTools::UnpackSignedInt(&x[20]);
+    alignment.MatePosition = BamTools::UnpackSignedInt(&x[24]);
+    alignment.InsertSize   = BamTools::UnpackSignedInt(&x[28]);
+
+    // set BamAlignment length
+    alignment.Length = alignment.SupportData.QuerySequenceLength;
+
+    // read in character data - make sure proper data size was read
+    bool readCharDataOK = false;
+    const unsigned int dataLength = alignment.SupportData.BlockLength - Constants::BAM_CORE_SIZE;
+    RaiiBuffer allCharData(dataLength);
+
+    if ( m_stream.Read(allCharData.Buffer, dataLength) == dataLength ) {
+
+        // store 'allCharData' in supportData structure
+        alignment.SupportData.AllCharData.assign((const char*)allCharData.Buffer, dataLength);
+
+        // set success flag
+        readCharDataOK = true;
+
+        // save CIGAR ops
+        // need to calculate this here so that  BamAlignment::GetEndPosition() performs correctly,
+        // even when GetNextAlignmentCore() is called
+        const unsigned int cigarDataOffset = alignment.SupportData.QueryNameLength;
+        uint32_t* cigarData = (uint32_t*)(allCharData.Buffer + cigarDataOffset);
+        CigarOp op;
+        alignment.CigarData.clear();
+        alignment.CigarData.reserve(alignment.SupportData.NumCigarOperations);
+        for ( unsigned int i = 0; i < alignment.SupportData.NumCigarOperations; ++i ) {
+
+            // swap endian-ness if necessary
+            if ( m_isBigEndian ) BamTools::SwapEndian_32(cigarData[i]);
+
+            // build CigarOp structure
+            op.Length = (cigarData[i] >> Constants::BAM_CIGAR_SHIFT);
+            op.Type   = Constants::BAM_CIGAR_LOOKUP[ (cigarData[i] & Constants::BAM_CIGAR_MASK) ];
+
+            // save CigarOp
+            alignment.CigarData.push_back(op);
+        }
+    }
+
+    // return success/failure
+    return readCharDataOK;
+}
+
+// loads reference data from BAM file
+bool BamReaderPrivate::LoadReferenceData(void) {
+
+    // get number of reference sequences
+    char buffer[sizeof(uint32_t)];
+    m_stream.Read(buffer, sizeof(uint32_t));
+    uint32_t numberRefSeqs = BamTools::UnpackUnsignedInt(buffer);
+    if ( m_isBigEndian ) BamTools::SwapEndian_32(numberRefSeqs);
+    m_references.reserve((int)numberRefSeqs);
+
+    // iterate over all references in header
+    for ( unsigned int i = 0; i != numberRefSeqs; ++i ) {
+
+        // get length of reference name
+        m_stream.Read(buffer, sizeof(uint32_t));
+        uint32_t refNameLength = BamTools::UnpackUnsignedInt(buffer);
+        if ( m_isBigEndian ) BamTools::SwapEndian_32(refNameLength);
+        RaiiBuffer refName(refNameLength);
+
+        // get reference name and reference sequence length
+        m_stream.Read(refName.Buffer, refNameLength);
+        m_stream.Read(buffer, sizeof(int32_t));
+        int32_t refLength = BamTools::UnpackSignedInt(buffer);
+        if ( m_isBigEndian ) BamTools::SwapEndian_32(refLength);
+
+        // store data for reference
+        RefData aReference;
+        aReference.RefName   = (string)((const char*)refName.Buffer);
+        aReference.RefLength = refLength;
+        m_references.push_back(aReference);
+    }
+
+    // return success
+    return true;
+}
+
+bool BamReaderPrivate::LocateIndex(const BamIndex::IndexType& preferredType) {
+
+    if ( m_randomAccessController.LocateIndex(this, preferredType) )
+        return true;
+    else {
+        const string bracError = m_randomAccessController.GetErrorString();
+        const string message = string("could not locate index: \n\t") + bracError;
+        SetErrorString("BamReader::LocateIndex", message);
+        return false;
+    }
+}
+
+// opens BAM file (and index)
+bool BamReaderPrivate::Open(const string& filename) {
+
+    try {
+
+        // make sure we're starting with fresh state
+        Close();
+
+        // open BgzfStream
+        m_stream.Open(filename, IBamIODevice::ReadOnly);
+        assert(m_stream);
+
+        // load BAM metadata
+        LoadHeaderData();
+        LoadReferenceData();
+
+        // store filename & offset of first alignment
+        m_filename = filename;
+        m_alignmentsBeginOffset = m_stream.Tell();
+
+        // return success
+        return true;
+
+    } catch ( BamException& e ) {
+        const string error = e.what();
+        const string message = string("could not open file: ") + filename +
+                               "\n\t" + error;
+        SetErrorString("BamReader::Open", message);
+        return false;
+    }
+}
+
+bool BamReaderPrivate::OpenIndex(const std::string& indexFilename) {
+
+    if ( m_randomAccessController.OpenIndex(indexFilename, this) )
+        return true;
+    else {
+        const string bracError = m_randomAccessController.GetErrorString();
+        const string message = string("could not open index: \n\t") + bracError;
+        SetErrorString("BamReader::OpenIndex", message);
+        return false;
+    }
+}
+
+// returns BAM file pointer to beginning of alignment data
+bool BamReaderPrivate::Rewind(void) {
+
+    // reset region
+    m_randomAccessController.ClearRegion();
+
+    // return status of seeking back to first alignment
+    if ( Seek(m_alignmentsBeginOffset) )
+        return true;
+    else {
+        const string currentError = m_errorString;
+        const string message = string("could not rewind: \n\t") + currentError;
+        SetErrorString("BamReader::Rewind", message);
+        return false;
+    }
+}
+
+bool BamReaderPrivate::Seek(const int64_t& position) {
+
+    // skip if BAM file not open
+    if ( !IsOpen() ) {
+        SetErrorString("BamReader::Seek", "cannot seek on unopened BAM file");
+        return false;
+    }
+
+    try {
+        m_stream.Seek(position);
+        return true;
+    }
+    catch ( BamException& e ) {
+        const string streamError = e.what();
+        const string message = string("could not seek in BAM file: \n\t") + streamError;
+        SetErrorString("BamReader::Seek", message);
+        return false;
+    }
+}
+
+void BamReaderPrivate::SetErrorString(const string& where, const string& what) {
+    static const string SEPARATOR = ": ";
+    m_errorString = where + SEPARATOR + what;
+}
+
+void BamReaderPrivate::SetIndex(BamIndex* index) {
+    m_randomAccessController.SetIndex(index);
+}
+
+// sets current region & attempts to jump to it
+// returns success/failure
+bool BamReaderPrivate::SetRegion(const BamRegion& region) {
+
+    if ( m_randomAccessController.SetRegion(region, m_references.size()) )
+        return true;
+    else {
+        const string bracError = m_randomAccessController.GetErrorString();
+        const string message = string("could not set region: \n\t") + bracError;
+        SetErrorString("BamReader::SetRegion", message);
+        return false;
+    }
+}
+
+int64_t BamReaderPrivate::Tell(void) const {
+    return m_stream.Tell();
+}
diff --git a/src/api/internal/bam/BamReader_p.h b/src/api/internal/bam/BamReader_p.h
new file mode 100644 (file)
index 0000000..e8db646
--- /dev/null
@@ -0,0 +1,118 @@
+// ***************************************************************************
+// BamReader_p.h (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 25 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides the basic functionality for reading BAM files
+// ***************************************************************************
+
+#ifndef BAMREADER_P_H
+#define BAMREADER_P_H
+
+//  -------------
+//  W A R N I N G
+//  -------------
+//
+// This file is not part of the BamTools API.  It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include "api/BamAlignment.h"
+#include "api/BamIndex.h"
+#include "api/BamReader.h"
+#include "api/SamHeader.h"
+#include "api/internal/bam/BamHeader_p.h"
+#include "api/internal/bam/BamRandomAccessController_p.h"
+#include "api/internal/io/BgzfStream_p.h"
+#include <string>
+
+namespace BamTools {
+namespace Internal {
+
+class BamReaderPrivate {
+
+    // ctor & dtor
+    public:
+        BamReaderPrivate(BamReader* parent);
+        ~BamReaderPrivate(void);
+
+    // BamReader interface
+    public:
+
+        // file operations
+        bool Close(void);
+        const std::string Filename(void) const;
+        bool IsOpen(void) const;
+        bool Open(const std::string& filename);
+        bool Rewind(void);
+        bool SetRegion(const BamRegion& region);
+
+        // access alignment data
+        bool GetNextAlignment(BamAlignment& alignment);
+        bool GetNextAlignmentCore(BamAlignment& alignment);
+
+        // access auxiliary data
+        std::string GetHeaderText(void) const;
+        SamHeader GetSamHeader(void) const;
+        int GetReferenceCount(void) const;
+        const RefVector& GetReferenceData(void) const;
+        int GetReferenceID(const std::string& refName) const;
+
+        // index operations
+        bool CreateIndex(const BamIndex::IndexType& type);
+        bool HasIndex(void) const;
+        bool LocateIndex(const BamIndex::IndexType& preferredType);
+        bool OpenIndex(const std::string& indexFilename);
+        void SetIndex(BamIndex* index);
+
+        // error handling
+        std::string GetErrorString(void) const;
+        void SetErrorString(const std::string& where, const std::string& what);
+
+    // internal methods, but available as a BamReaderPrivate 'interface'
+    //
+    // these methods should only be used by BamTools::Internal classes
+    // (currently only used by the BamIndex subclasses)
+    public:
+        // retrieves header text from BAM file
+        void LoadHeaderData(void);
+        // retrieves BAM alignment under file pointer
+        // (does no overlap checking or character data parsing)
+        bool LoadNextAlignment(BamAlignment& alignment);
+        // builds reference data structure from BAM file
+        bool LoadReferenceData(void);
+        // seek reader to file position
+        bool Seek(const int64_t& position);
+        // return reader's file position
+        int64_t Tell(void) const;
+
+    // data members
+    public:
+
+        // general BAM file data
+        int64_t     m_alignmentsBeginOffset;
+        std::string m_filename;
+        RefVector   m_references;
+
+        // system data
+        bool m_isBigEndian;
+
+        // parent BamReader
+        BamReader* m_parent;
+
+        // BamReaderPrivate components
+        BamHeader m_header;
+        BamRandomAccessController m_randomAccessController;
+        BgzfStream m_stream;
+
+        // error handling
+        std::string m_errorString;
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // BAMREADER_P_H
diff --git a/src/api/internal/bam/BamWriter_p.cpp b/src/api/internal/bam/BamWriter_p.cpp
new file mode 100644 (file)
index 0000000..ba4989f
--- /dev/null
@@ -0,0 +1,462 @@
+// ***************************************************************************
+// BamWriter_p.cpp (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 25 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides the basic functionality for producing BAM files
+// ***************************************************************************
+
+#include "api/BamAlignment.h"
+#include "api/BamConstants.h"
+#include "api/IBamIODevice.h"
+#include "api/internal/bam/BamWriter_p.h"
+#include "api/internal/utils/BamException_p.h"
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+#include <cstdlib>
+#include <cstring>
+using namespace std;
+
+// ctor
+BamWriterPrivate::BamWriterPrivate(void)
+    : m_isBigEndian( BamTools::SystemIsBigEndian() )
+{ }
+
+// dtor
+BamWriterPrivate::~BamWriterPrivate(void) {
+    Close();
+}
+
+// calculates minimum bin for a BAM alignment interval [begin, end)
+uint32_t BamWriterPrivate::CalculateMinimumBin(const int begin, int end) const {
+    --end;
+    if ( (begin >> 14) == (end >> 14) ) return 4681 + (begin >> 14);
+    if ( (begin >> 17) == (end >> 17) ) return  585 + (begin >> 17);
+    if ( (begin >> 20) == (end >> 20) ) return   73 + (begin >> 20);
+    if ( (begin >> 23) == (end >> 23) ) return    9 + (begin >> 23);
+    if ( (begin >> 26) == (end >> 26) ) return    1 + (begin >> 26);
+    return 0;
+}
+
+// closes the alignment archive
+void BamWriterPrivate::Close(void) {
+
+    // skip if file not open
+    if ( !IsOpen() ) return;
+
+    // close output stream
+    try {
+        m_stream.Close();
+    } catch ( BamException& e ) {
+        m_errorString = e.what();
+    }
+}
+
+// creates a cigar string from the supplied alignment
+void BamWriterPrivate::CreatePackedCigar(const vector<CigarOp>& cigarOperations, string& packedCigar) {
+
+    // initialize
+    const size_t numCigarOperations = cigarOperations.size();
+    packedCigar.resize(numCigarOperations * Constants::BAM_SIZEOF_INT);
+
+    // pack the cigar data into the string
+    unsigned int* pPackedCigar = (unsigned int*)packedCigar.data();
+
+    // iterate over cigar operations
+    vector<CigarOp>::const_iterator coIter = cigarOperations.begin();
+    vector<CigarOp>::const_iterator coEnd  = cigarOperations.end();
+    for ( ; coIter != coEnd; ++coIter ) {
+
+        // store op in packedCigar
+        uint8_t cigarOp;
+        switch ( coIter->Type ) {
+            case (Constants::BAM_CIGAR_MATCH_CHAR)    : cigarOp = Constants::BAM_CIGAR_MATCH;    break;
+            case (Constants::BAM_CIGAR_INS_CHAR)      : cigarOp = Constants::BAM_CIGAR_INS;      break;
+            case (Constants::BAM_CIGAR_DEL_CHAR)      : cigarOp = Constants::BAM_CIGAR_DEL;      break;
+            case (Constants::BAM_CIGAR_REFSKIP_CHAR)  : cigarOp = Constants::BAM_CIGAR_REFSKIP;  break;
+            case (Constants::BAM_CIGAR_SOFTCLIP_CHAR) : cigarOp = Constants::BAM_CIGAR_SOFTCLIP; break;
+            case (Constants::BAM_CIGAR_HARDCLIP_CHAR) : cigarOp = Constants::BAM_CIGAR_HARDCLIP; break;
+            case (Constants::BAM_CIGAR_PAD_CHAR)      : cigarOp = Constants::BAM_CIGAR_PAD;      break;
+            case (Constants::BAM_CIGAR_SEQMATCH_CHAR) : cigarOp = Constants::BAM_CIGAR_SEQMATCH; break;
+            case (Constants::BAM_CIGAR_MISMATCH_CHAR) : cigarOp = Constants::BAM_CIGAR_MISMATCH; break;
+            default:
+                const string message = string("invalid CIGAR operation type") + coIter->Type;
+                throw BamException("BamWriter::CreatePackedCigar", message);
+        }
+
+        *pPackedCigar = coIter->Length << Constants::BAM_CIGAR_SHIFT | cigarOp;
+        pPackedCigar++;
+    }
+}
+
+// encodes the supplied query sequence into 4-bit notation
+void BamWriterPrivate::EncodeQuerySequence(const string& query, string& encodedQuery) {
+
+    // prepare the encoded query string
+    const size_t queryLength = query.size();
+    const size_t encodedQueryLength = static_cast<size_t>((queryLength+1)/2);
+    encodedQuery.resize(encodedQueryLength);
+    char* pEncodedQuery = (char*)encodedQuery.data();
+    const char* pQuery = (const char*)query.data();
+
+    // walk through original query sequence, encoding its bases
+    unsigned char nucleotideCode;
+    bool useHighWord = true;
+    while ( *pQuery ) {
+        switch ( *pQuery ) {
+            case (Constants::BAM_DNA_EQUAL) : nucleotideCode = Constants::BAM_BASECODE_EQUAL; break;
+            case (Constants::BAM_DNA_A)     : nucleotideCode = Constants::BAM_BASECODE_A;     break;
+            case (Constants::BAM_DNA_C)     : nucleotideCode = Constants::BAM_BASECODE_C;     break;
+            case (Constants::BAM_DNA_M)     : nucleotideCode = Constants::BAM_BASECODE_M;     break;
+            case (Constants::BAM_DNA_G)     : nucleotideCode = Constants::BAM_BASECODE_G;     break;
+            case (Constants::BAM_DNA_R)     : nucleotideCode = Constants::BAM_BASECODE_R;     break;
+            case (Constants::BAM_DNA_S)     : nucleotideCode = Constants::BAM_BASECODE_S;     break;
+            case (Constants::BAM_DNA_V)     : nucleotideCode = Constants::BAM_BASECODE_V;     break;
+            case (Constants::BAM_DNA_T)     : nucleotideCode = Constants::BAM_BASECODE_T;     break;
+            case (Constants::BAM_DNA_W)     : nucleotideCode = Constants::BAM_BASECODE_W;     break;
+            case (Constants::BAM_DNA_Y)     : nucleotideCode = Constants::BAM_BASECODE_Y;     break;
+            case (Constants::BAM_DNA_H)     : nucleotideCode = Constants::BAM_BASECODE_H;     break;
+            case (Constants::BAM_DNA_K)     : nucleotideCode = Constants::BAM_BASECODE_K;     break;
+            case (Constants::BAM_DNA_D)     : nucleotideCode = Constants::BAM_BASECODE_D;     break;
+            case (Constants::BAM_DNA_B)     : nucleotideCode = Constants::BAM_BASECODE_B;     break;
+            case (Constants::BAM_DNA_N)     : nucleotideCode = Constants::BAM_BASECODE_N;     break;
+            default:
+                const string message = string("invalid base: ") + *pQuery;
+                throw BamException("BamWriter::EncodeQuerySequence", message);
+        }
+
+        // pack the nucleotide code
+        if ( useHighWord ) {
+            *pEncodedQuery = nucleotideCode << 4;
+            useHighWord = false;
+        } else {
+            *pEncodedQuery |= nucleotideCode;
+            ++pEncodedQuery;
+            useHighWord = true;
+        }
+
+        // increment the query position
+        ++pQuery;
+    }
+}
+
+// returns a description of the last error that occurred
+std::string BamWriterPrivate::GetErrorString(void) const {
+    return m_errorString;
+}
+
+// returns whether BAM file is open for writing or not
+bool BamWriterPrivate::IsOpen(void) const {
+    return m_stream.IsOpen();
+}
+
+// opens the alignment archive
+bool BamWriterPrivate::Open(const string& filename,
+                            const string& samHeaderText,
+                            const RefVector& referenceSequences)
+{
+    try {
+
+        // open the BGZF file for writing
+        m_stream.Open(filename, IBamIODevice::WriteOnly);
+
+        // write BAM file 'metadata' components
+        WriteMagicNumber();
+        WriteSamHeaderText(samHeaderText);
+        WriteReferences(referenceSequences);
+
+        // return success
+        return true;
+
+    } catch ( BamException& e ) {
+        m_errorString = e.what();
+        return false;
+    }
+}
+
+// saves the alignment to the alignment archive
+bool BamWriterPrivate::SaveAlignment(const BamAlignment& al) {
+
+    try {
+
+        // if BamAlignment contains only the core data and a raw char data buffer
+        // (as a result of BamReader::GetNextAlignmentCore())
+        if ( al.SupportData.HasCoreOnly )
+            WriteCoreAlignment(al);
+
+        // otherwise, BamAlignment should contain character in the standard fields: Name, QueryBases, etc
+        // (resulting from BamReader::GetNextAlignment() *OR* being generated directly by client code)
+        else WriteAlignment(al);
+
+        // if we get here, everything OK
+        return true;
+
+    } catch ( BamException& e ) {
+        m_errorString = e.what();
+        return false;
+    }
+}
+
+void BamWriterPrivate::SetWriteCompressed(bool ok) {
+    // modifying compression is not allowed if BAM file is open
+    if ( !IsOpen() )
+        m_stream.SetWriteCompressed(ok);
+}
+
+void BamWriterPrivate::WriteAlignment(const BamAlignment& al) {
+
+    // calculate char lengths
+    const unsigned int nameLength         = al.Name.size() + 1;
+    const unsigned int numCigarOperations = al.CigarData.size();
+    const unsigned int queryLength        = al.QueryBases.size();
+    const unsigned int tagDataLength      = al.TagData.size();
+
+    // no way to tell if alignment's bin is already defined (there is no default, invalid value)
+    // so we'll go ahead calculate its bin ID before storing
+    const uint32_t alignmentBin = CalculateMinimumBin(al.Position, al.GetEndPosition());
+
+    // create our packed cigar string
+    string packedCigar;
+    CreatePackedCigar(al.CigarData, packedCigar);
+    const unsigned int packedCigarLength = packedCigar.size();
+
+    // encode the query
+    string encodedQuery;
+    EncodeQuerySequence(al.QueryBases, encodedQuery);
+    const unsigned int encodedQueryLength = encodedQuery.size();
+
+    // write the block size
+    const unsigned int dataBlockSize = nameLength +
+                                       packedCigarLength +
+                                       encodedQueryLength +
+                                       queryLength +
+                                       tagDataLength;
+    unsigned int blockSize = Constants::BAM_CORE_SIZE + dataBlockSize;
+    if ( m_isBigEndian ) BamTools::SwapEndian_32(blockSize);
+    m_stream.Write((char*)&blockSize, Constants::BAM_SIZEOF_INT);
+
+    // assign the BAM core data
+    uint32_t buffer[Constants::BAM_CORE_BUFFER_SIZE];
+    buffer[0] = al.RefID;
+    buffer[1] = al.Position;
+    buffer[2] = (alignmentBin << 16) | (al.MapQuality << 8) | nameLength;
+    buffer[3] = (al.AlignmentFlag << 16) | numCigarOperations;
+    buffer[4] = queryLength;
+    buffer[5] = al.MateRefID;
+    buffer[6] = al.MatePosition;
+    buffer[7] = al.InsertSize;
+
+    // swap BAM core endian-ness, if necessary
+    if ( m_isBigEndian ) {
+        for ( int i = 0; i < 8; ++i )
+            BamTools::SwapEndian_32(buffer[i]);
+    }
+
+    // write the BAM core
+    m_stream.Write((char*)&buffer, Constants::BAM_CORE_SIZE);
+
+    // write the query name
+    m_stream.Write(al.Name.c_str(), nameLength);
+
+    // write the packed cigar
+    if ( m_isBigEndian ) {
+        char* cigarData = new char[packedCigarLength]();
+        memcpy(cigarData, packedCigar.data(), packedCigarLength);
+        if ( m_isBigEndian ) {
+            for ( size_t i = 0; i < packedCigarLength; ++i )
+                BamTools::SwapEndian_32p(&cigarData[i]);
+        }
+        m_stream.Write(cigarData, packedCigarLength);
+        delete[] cigarData; // TODO: cleanup on Write exception thrown?
+    }
+    else
+        m_stream.Write(packedCigar.data(), packedCigarLength);
+
+    // write the encoded query sequence
+    m_stream.Write(encodedQuery.data(), encodedQueryLength);
+
+    // write the base qualities
+    char* pBaseQualities = (char*)al.Qualities.data();
+    for ( size_t i = 0; i < queryLength; ++i )
+        pBaseQualities[i] -= 33; // FASTQ conversion
+    m_stream.Write(pBaseQualities, queryLength);
+
+    // write the read group tag
+    if ( m_isBigEndian ) {
+
+        char* tagData = new char[tagDataLength]();
+        memcpy(tagData, al.TagData.data(), tagDataLength);
+
+        size_t i = 0;
+        while ( i < tagDataLength ) {
+
+            i += Constants::BAM_TAG_TAGSIZE;  // skip tag chars (e.g. "RG", "NM", etc.)
+            const char type = tagData[i];     // get tag type at position i
+            ++i;
+
+            switch ( type ) {
+
+                case(Constants::BAM_TAG_TYPE_ASCII) :
+                case(Constants::BAM_TAG_TYPE_INT8)  :
+                case(Constants::BAM_TAG_TYPE_UINT8) :
+                    ++i;
+                    break;
+
+                case(Constants::BAM_TAG_TYPE_INT16)  :
+                case(Constants::BAM_TAG_TYPE_UINT16) :
+                    BamTools::SwapEndian_16p(&tagData[i]);
+                    i += sizeof(uint16_t);
+                    break;
+
+                case(Constants::BAM_TAG_TYPE_FLOAT)  :
+                case(Constants::BAM_TAG_TYPE_INT32)  :
+                case(Constants::BAM_TAG_TYPE_UINT32) :
+                    BamTools::SwapEndian_32p(&tagData[i]);
+                    i += sizeof(uint32_t);
+                    break;
+
+                case(Constants::BAM_TAG_TYPE_HEX) :
+                case(Constants::BAM_TAG_TYPE_STRING) :
+                    // no endian swapping necessary for hex-string/string data
+                    while ( tagData[i] )
+                        ++i;
+                    // increment one more for null terminator
+                    ++i;
+                    break;
+
+                case(Constants::BAM_TAG_TYPE_ARRAY) :
+
+                {
+                    // read array type
+                    const char arrayType = tagData[i];
+                    ++i;
+
+                    // swap endian-ness of number of elements in place, then retrieve for loop
+                    BamTools::SwapEndian_32p(&tagData[i]);
+                    int32_t numElements;
+                    memcpy(&numElements, &tagData[i], sizeof(uint32_t));
+                    i += sizeof(uint32_t);
+
+                    // swap endian-ness of array elements
+                    for ( int j = 0; j < numElements; ++j ) {
+                        switch (arrayType) {
+                            case (Constants::BAM_TAG_TYPE_INT8)  :
+                            case (Constants::BAM_TAG_TYPE_UINT8) :
+                                // no endian-swapping necessary
+                                ++i;
+                                break;
+                            case (Constants::BAM_TAG_TYPE_INT16)  :
+                            case (Constants::BAM_TAG_TYPE_UINT16) :
+                                BamTools::SwapEndian_16p(&tagData[i]);
+                                i += sizeof(uint16_t);
+                                break;
+                            case (Constants::BAM_TAG_TYPE_FLOAT)  :
+                            case (Constants::BAM_TAG_TYPE_INT32)  :
+                            case (Constants::BAM_TAG_TYPE_UINT32) :
+                                BamTools::SwapEndian_32p(&tagData[i]);
+                                i += sizeof(uint32_t);
+                                break;
+                            default:
+                                delete[] tagData;
+                                const string message = string("invalid binary array type: ") + arrayType;
+                                throw BamException("BamWriter::SaveAlignment", message);
+                        }
+                    }
+
+                    break;
+                }
+
+                default :
+                    delete[] tagData;
+                    const string message = string("invalid tag type: ") + type;
+                    throw BamException("BamWriter::SaveAlignment", message);
+            }
+        }
+
+        m_stream.Write(tagData, tagDataLength);
+        delete[] tagData; // TODO: cleanup on Write exception thrown?
+    }
+    else
+        m_stream.Write(al.TagData.data(), tagDataLength);
+}
+
+void BamWriterPrivate::WriteCoreAlignment(const BamAlignment& al) {
+
+    // write the block size
+    unsigned int blockSize = al.SupportData.BlockLength;
+    if ( m_isBigEndian ) BamTools::SwapEndian_32(blockSize);
+    m_stream.Write((char*)&blockSize, Constants::BAM_SIZEOF_INT);
+
+    // re-calculate bin (in case BamAlignment's position has been previously modified)
+    const uint32_t alignmentBin = CalculateMinimumBin(al.Position, al.GetEndPosition());
+
+    // assign the BAM core data
+    uint32_t buffer[Constants::BAM_CORE_BUFFER_SIZE];
+    buffer[0] = al.RefID;
+    buffer[1] = al.Position;
+    buffer[2] = (alignmentBin << 16) | (al.MapQuality << 8) | al.SupportData.QueryNameLength;
+    buffer[3] = (al.AlignmentFlag << 16) | al.SupportData.NumCigarOperations;
+    buffer[4] = al.SupportData.QuerySequenceLength;
+    buffer[5] = al.MateRefID;
+    buffer[6] = al.MatePosition;
+    buffer[7] = al.InsertSize;
+
+    // swap BAM core endian-ness, if necessary
+    if ( m_isBigEndian ) {
+        for ( int i = 0; i < 8; ++i )
+            BamTools::SwapEndian_32(buffer[i]);
+    }
+
+    // write the BAM core
+    m_stream.Write((char*)&buffer, Constants::BAM_CORE_SIZE);
+
+    // write the raw char data
+    m_stream.Write((char*)al.SupportData.AllCharData.data(),
+                   al.SupportData.BlockLength-Constants::BAM_CORE_SIZE);
+}
+
+void BamWriterPrivate::WriteMagicNumber(void) {
+    // write BAM file 'magic number'
+    m_stream.Write(Constants::BAM_HEADER_MAGIC, Constants::BAM_HEADER_MAGIC_LENGTH);
+}
+
+void BamWriterPrivate::WriteReferences(const BamTools::RefVector& referenceSequences) {
+
+    // write the number of reference sequences
+    uint32_t numReferenceSequences = referenceSequences.size();
+    if ( m_isBigEndian ) BamTools::SwapEndian_32(numReferenceSequences);
+    m_stream.Write((char*)&numReferenceSequences, Constants::BAM_SIZEOF_INT);
+
+    // foreach reference sequence
+    RefVector::const_iterator rsIter = referenceSequences.begin();
+    RefVector::const_iterator rsEnd  = referenceSequences.end();
+    for ( ; rsIter != rsEnd; ++rsIter ) {
+
+        // write the reference sequence name length
+        uint32_t referenceSequenceNameLen = rsIter->RefName.size() + 1;
+        if ( m_isBigEndian ) BamTools::SwapEndian_32(referenceSequenceNameLen);
+        m_stream.Write((char*)&referenceSequenceNameLen, Constants::BAM_SIZEOF_INT);
+
+        // write the reference sequence name
+        m_stream.Write(rsIter->RefName.c_str(), referenceSequenceNameLen);
+
+        // write the reference sequence length
+        int32_t referenceLength = rsIter->RefLength;
+        if ( m_isBigEndian ) BamTools::SwapEndian_32(referenceLength);
+        m_stream.Write((char*)&referenceLength, Constants::BAM_SIZEOF_INT);
+    }
+}
+
+void BamWriterPrivate::WriteSamHeaderText(const std::string& samHeaderText) {
+
+    // write the SAM header  text length
+    uint32_t samHeaderLen = samHeaderText.size();
+    if ( m_isBigEndian ) BamTools::SwapEndian_32(samHeaderLen);
+    m_stream.Write((char*)&samHeaderLen, Constants::BAM_SIZEOF_INT);
+
+    // write the SAM header text
+    if ( samHeaderLen > 0 )
+        m_stream.Write(samHeaderText.data(), samHeaderLen);
+}
diff --git a/src/api/internal/bam/BamWriter_p.h b/src/api/internal/bam/BamWriter_p.h
new file mode 100644 (file)
index 0000000..d5bbe8d
--- /dev/null
@@ -0,0 +1,73 @@
+// ***************************************************************************
+// BamWriter_p.h (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 25 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides the basic functionality for producing BAM files
+// ***************************************************************************
+
+#ifndef BAMWRITER_P_H
+#define BAMWRITER_P_H
+
+//  -------------
+//  W A R N I N G
+//  -------------
+//
+// This file is not part of the BamTools API.  It exists purely as an
+// implementation detail.  This header file may change from version to
+// version without notice, or even be removed.
+//
+// We mean it.
+
+#include "api/BamAux.h"
+#include "api/internal/io/BgzfStream_p.h"
+#include <string>
+#include <vector>
+
+namespace BamTools {
+
+class BamAlignment;
+
+namespace Internal {
+
+class BamWriterPrivate {
+
+    // ctor & dtor
+    public:
+        BamWriterPrivate(void);
+        ~BamWriterPrivate(void);
+
+    // interface methods
+    public:
+        void Close(void);
+        std::string GetErrorString(void) const;
+        bool IsOpen(void) const;
+        bool Open(const std::string& filename,
+                  const std::string& samHeaderText,
+                  const BamTools::RefVector& referenceSequences);
+        bool SaveAlignment(const BamAlignment& al);
+        void SetWriteCompressed(bool ok);
+
+    // 'internal' methods
+    public:
+        uint32_t CalculateMinimumBin(const int begin, int end) const;
+        void CreatePackedCigar(const std::vector<BamTools::CigarOp>& cigarOperations, std::string& packedCigar);
+        void EncodeQuerySequence(const std::string& query, std::string& encodedQuery);
+        void WriteAlignment(const BamAlignment& al);
+        void WriteCoreAlignment(const BamAlignment& al);
+        void WriteMagicNumber(void);
+        void WriteReferences(const BamTools::RefVector& referenceSequences);
+        void WriteSamHeaderText(const std::string& samHeaderText);
+
+    // data members
+    private:
+        BgzfStream m_stream;
+        bool m_isBigEndian;
+        std::string m_errorString;
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // BAMWRITER_P_H
diff --git a/src/api/internal/index/BamIndexFactory_p.cpp b/src/api/internal/index/BamIndexFactory_p.cpp
new file mode 100644 (file)
index 0000000..3afcbb9
--- /dev/null
@@ -0,0 +1,112 @@
+// ***************************************************************************
+// BamIndexFactory_p.cpp (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 25 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides interface for generating BamIndex implementations
+// ***************************************************************************
+
+#include "api/BamAux.h"
+#include "api/internal/index/BamIndexFactory_p.h"
+#include "api/internal/index/BamStandardIndex_p.h"
+#include "api/internal/index/BamToolsIndex_p.h"
+using namespace BamTools;
+using namespace BamTools::Internal;
+using namespace std;
+
+// generates index filename from BAM filename (depending on requested type)
+// if type is unknown, returns empty string
+const string BamIndexFactory::CreateIndexFilename(const string& bamFilename,
+                                                  const BamIndex::IndexType& type)
+{
+    switch ( type ) {
+        case ( BamIndex::STANDARD ) : return ( bamFilename + BamStandardIndex::Extension() );
+        case ( BamIndex::BAMTOOLS ) : return ( bamFilename + BamToolsIndex::Extension() );
+        default :
+            return string();
+    }
+}
+
+// creates a new BamIndex object, depending on extension of @indexFilename
+BamIndex* BamIndexFactory::CreateIndexFromFilename(const string& indexFilename, BamReaderPrivate* reader) {
+
+    // if file doesn't exist, return null index
+    if ( !BamTools::FileExists(indexFilename) )
+        return 0;
+
+    // get file extension from index filename, including dot (".EXT")
+    // if can't get file extension, return null index
+    const string extension = FileExtension(indexFilename);
+    if ( extension.empty() )
+        return 0;
+
+    // create index based on extension
+    if      ( extension == BamStandardIndex::Extension() ) return new BamStandardIndex(reader);
+    else if ( extension == BamToolsIndex::Extension()    ) return new BamToolsIndex(reader);
+    else
+        return 0;
+}
+
+// creates a new BamIndex, object of requested @type
+BamIndex* BamIndexFactory::CreateIndexOfType(const BamIndex::IndexType& type,
+                                             BamReaderPrivate* reader)
+{
+    switch ( type ) {
+        case ( BamIndex::STANDARD ) : return new BamStandardIndex(reader);
+        case ( BamIndex::BAMTOOLS ) : return new BamToolsIndex(reader);
+        default :
+            return 0;
+    }
+}
+
+// retrieves file extension (including '.')
+const string BamIndexFactory::FileExtension(const string& filename) {
+
+    // if filename cannot contain valid path + extension, return empty string
+    if ( filename.empty() || filename.length() <= 4 )
+        return string();
+
+    // look for last dot in filename
+    const size_t lastDotPosition = filename.find_last_of('.');
+
+    // if none found, return empty string
+    if ( lastDotPosition == string::npos )
+        return string();
+
+    // return substring from last dot position
+    return filename.substr(lastDotPosition);
+}
+
+// returns name of existing index file that corresponds to @bamFilename
+// will defer to @preferredType if possible, if not will attempt to load any supported type
+// returns empty string if not found
+const string BamIndexFactory::FindIndexFilename(const string& bamFilename,
+                                                const BamIndex::IndexType& preferredType)
+{
+    // skip if BAM filename provided is empty
+    if ( bamFilename.empty() )
+        return string();
+
+    // try to find index of preferred type first
+    // return index filename if found
+    string indexFilename = CreateIndexFilename(bamFilename, preferredType);
+    if ( !indexFilename.empty() && BamTools::FileExists(indexFilename) )
+        return indexFilename;
+
+    // couldn't find preferred type, try the other supported types
+    // return index filename if found
+    if ( preferredType != BamIndex::STANDARD ) {
+        indexFilename = CreateIndexFilename(bamFilename, BamIndex::STANDARD);
+        if ( !indexFilename.empty() && BamTools::FileExists(indexFilename) )
+            return indexFilename;
+    }
+    if ( preferredType != BamIndex::BAMTOOLS ) {
+        indexFilename = CreateIndexFilename(bamFilename, BamIndex::BAMTOOLS);
+        if ( !indexFilename.empty() && BamTools::FileExists(indexFilename) )
+            return indexFilename;
+    }
+
+    // otherwise couldn't find any index matching this filename
+    return string();
+}
diff --git a/src/api/internal/index/BamIndexFactory_p.h b/src/api/internal/index/BamIndexFactory_p.h
new file mode 100644 (file)
index 0000000..4e4f1cf
--- /dev/null
@@ -0,0 +1,49 @@
+// ***************************************************************************
+// BamIndexFactory_p.h (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides interface for generating BamIndex implementations
+// ***************************************************************************
+
+#ifndef BAMINDEX_FACTORY_P_H
+#define BAMINDEX_FACTORY_P_H
+
+#include "api/BamIndex.h"
+#include <string>
+
+namespace BamTools {
+namespace Internal {
+
+class BamIndexFactory {
+
+    // static interface methods
+    public:
+        // creates a new BamIndex object, depending on extension of @indexFilename
+        static BamIndex* CreateIndexFromFilename(const std::string& indexFilename,
+                                                 BamReaderPrivate* reader);
+        // creates a new BamIndex object, of requested @type
+        static BamIndex* CreateIndexOfType(const BamIndex::IndexType& type,
+                                           BamReaderPrivate* reader);
+        // returns name of existing index file that corresponds to @bamFilename
+        // will defer to @preferredType if possible
+        // if @preferredType not found, will attempt to load any supported index type
+        // returns empty string if no index file (of any type) is found
+        static const std::string FindIndexFilename(const std::string& bamFilename,
+                                                   const BamIndex::IndexType& preferredType);
+
+    // internal methods
+    public:
+        // generates index filename from BAM filename (depending on requested type)
+        // if type is unknown, returns empty string
+        static const std::string CreateIndexFilename(const std::string& bamFilename,
+                                                     const BamIndex::IndexType& type);
+        // retrieves file extension (including '.')
+        static const std::string FileExtension(const std::string& filename);
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // BAMINDEX_FACTORY_P_H
diff --git a/src/api/internal/index/BamStandardIndex_p.cpp b/src/api/internal/index/BamStandardIndex_p.cpp
new file mode 100644 (file)
index 0000000..706c7c1
--- /dev/null
@@ -0,0 +1,954 @@
+// ***************************************************************************
+// BamStandardIndex.cpp (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 25 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides index operations for the standardized BAM index format (".bai")
+// ***************************************************************************
+
+#include "api/BamAlignment.h"
+#include "api/internal/bam/BamReader_p.h"
+#include "api/internal/index/BamStandardIndex_p.h"
+#include "api/internal/utils/BamException_p.h"
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <algorithm>
+#include <sstream>
+using namespace std;
+
+// -----------------------------------
+// static BamStandardIndex constants
+// -----------------------------------
+
+const int BamStandardIndex::MAX_BIN               = 37450;  // =(8^6-1)/7+1
+const int BamStandardIndex::BAM_LIDX_SHIFT        = 14;
+const string BamStandardIndex::BAI_EXTENSION      = ".bai";
+const char* const BamStandardIndex::BAI_MAGIC     = "BAI\1";
+const int BamStandardIndex::SIZEOF_ALIGNMENTCHUNK = sizeof(uint64_t)*2;
+const int BamStandardIndex::SIZEOF_BINCORE        = sizeof(uint32_t) + sizeof(int32_t);
+const int BamStandardIndex::SIZEOF_LINEAROFFSET   = sizeof(uint64_t);
+
+// ----------------------------
+// RaiiWrapper implementation
+// ----------------------------
+
+BamStandardIndex::RaiiWrapper::RaiiWrapper(void)
+    : IndexStream(0)
+    , Buffer(0)
+{ }
+
+BamStandardIndex::RaiiWrapper::~RaiiWrapper(void) {
+
+    if ( IndexStream ) {
+        fclose(IndexStream);
+        IndexStream = 0;
+    }
+
+    if ( Buffer ) {
+        delete[] Buffer;
+        Buffer = 0;
+    }
+}
+
+// ---------------------------------
+// BamStandardIndex implementation
+// ---------------------------------
+
+// ctor
+BamStandardIndex::BamStandardIndex(Internal::BamReaderPrivate* reader)
+    : BamIndex(reader)
+    , m_bufferLength(0)
+{
+     m_isBigEndian = BamTools::SystemIsBigEndian();
+}
+
+// dtor
+BamStandardIndex::~BamStandardIndex(void) {
+    CloseFile();
+}
+
+void BamStandardIndex::AdjustRegion(const BamRegion& region, uint32_t& begin, uint32_t& end) {
+
+    // retrieve references from reader
+    const RefVector& references = m_reader->GetReferenceData();
+
+    // LeftPosition cannot be greater than or equal to reference length
+    if ( region.LeftPosition >= references.at(region.LeftRefID).RefLength )
+        throw BamException("BamStandardIndex::AdjustRegion", "invalid region requested");
+
+    // set region 'begin'
+    begin = (unsigned int)region.LeftPosition;
+
+    // if right bound specified AND left&right bounds are on same reference
+    // OK to use right bound position as region 'end'
+    if ( region.isRightBoundSpecified() && ( region.LeftRefID == region.RightRefID ) )
+        end = (unsigned int)region.RightPosition;
+
+    // otherwise, set region 'end' to last reference base
+    else end = (unsigned int)references.at(region.LeftRefID).RefLength;
+}
+
+// [begin, end)
+void BamStandardIndex::CalculateCandidateBins(const uint32_t& begin,
+                                              const uint32_t& end,
+                                              set<uint16_t>& candidateBins)
+{
+    // initialize list, bin '0' is always a valid bin
+    candidateBins.insert(0);
+
+    // get rest of bins that contain this region
+    unsigned int k;
+    for (k =    1 + (begin>>26); k <=    1 + (end>>26); ++k) { candidateBins.insert(k); }
+    for (k =    9 + (begin>>23); k <=    9 + (end>>23); ++k) { candidateBins.insert(k); }
+    for (k =   73 + (begin>>20); k <=   73 + (end>>20); ++k) { candidateBins.insert(k); }
+    for (k =  585 + (begin>>17); k <=  585 + (end>>17); ++k) { candidateBins.insert(k); }
+    for (k = 4681 + (begin>>14); k <= 4681 + (end>>14); ++k) { candidateBins.insert(k); }
+}
+
+void BamStandardIndex::CalculateCandidateOffsets(const BaiReferenceSummary& refSummary,
+                                                 const uint64_t& minOffset,
+                                                 set<uint16_t>& candidateBins,
+                                                 vector<int64_t>& offsets)
+{
+    // seek to first bin
+    Seek(refSummary.FirstBinFilePosition, SEEK_SET);
+
+    // iterate over reference bins
+    uint32_t binId;
+    int32_t numAlignmentChunks;
+    set<uint16_t>::iterator candidateBinIter;
+    for ( int i = 0; i < refSummary.NumBins; ++i ) {
+
+        // read bin contents (if successful, alignment chunks are now in m_buffer)
+        ReadBinIntoBuffer(binId, numAlignmentChunks);
+
+        // see if bin is a 'candidate bin'
+        candidateBinIter = candidateBins.find(binId);
+
+        // if not, move on to next bin
+        if ( candidateBinIter == candidateBins.end() )
+            continue;
+
+        // otherwise, check bin's contents against for overlap
+        else {
+
+            size_t offset = 0;
+            uint64_t chunkStart;
+            uint64_t chunkStop;
+
+            // iterate over alignment chunks
+            for ( int j = 0; j < numAlignmentChunks; ++j ) {
+
+                // read chunk start & stop from buffer
+                memcpy((char*)&chunkStart, Resources.Buffer+offset, sizeof(uint64_t));
+                offset += sizeof(uint64_t);
+                memcpy((char*)&chunkStop, Resources.Buffer+offset, sizeof(uint64_t));
+                offset += sizeof(uint64_t);
+
+                // swap endian-ness if necessary
+                if ( m_isBigEndian ) {
+                    SwapEndian_64(chunkStart);
+                    SwapEndian_64(chunkStop);
+                }
+
+                // store alignment chunk's start offset
+                // if its stop offset is larger than our 'minOffset'
+                if ( chunkStop >= minOffset )
+                    offsets.push_back(chunkStart);
+            }
+
+            // 'pop' bin ID from candidate bins set
+            candidateBins.erase(candidateBinIter);
+
+            // quit if no more candidates
+            if ( candidateBins.empty() )
+                break;
+        }
+    }
+}
+
+uint64_t BamStandardIndex::CalculateMinOffset(const BaiReferenceSummary& refSummary,
+                                              const uint32_t& begin)
+{
+    // if no linear offsets exist, return 0
+    if ( refSummary.NumLinearOffsets == 0 )
+        return 0;
+
+    // if 'begin' starts beyond last linear offset, use the last linear offset as minimum
+    // else use the offset corresponding to the requested start position
+    const int shiftedBegin = begin>>BamStandardIndex::BAM_LIDX_SHIFT;
+    if ( shiftedBegin >= refSummary.NumLinearOffsets )
+        return LookupLinearOffset( refSummary, refSummary.NumLinearOffsets-1 );
+    else
+        return LookupLinearOffset( refSummary, shiftedBegin );
+}
+
+void BamStandardIndex::CheckBufferSize(char*& buffer,
+                                       unsigned int& bufferLength,
+                                       const unsigned int& requestedBytes)
+{
+    try {
+        if ( requestedBytes > bufferLength ) {
+            bufferLength = requestedBytes + 10;
+            delete[] buffer;
+            buffer = new char[bufferLength];
+        }
+    } catch ( std::bad_alloc&  ) {
+        stringstream s("");
+        s << "out of memory when allocating " << requestedBytes << " bytes";
+        throw BamException("BamStandardIndex::CheckBufferSize", s.str());
+    }
+}
+
+void BamStandardIndex::CheckBufferSize(unsigned char*& buffer,
+                                       unsigned int& bufferLength,
+                                       const unsigned int& requestedBytes)
+{
+    try {
+        if ( requestedBytes > bufferLength ) {
+            bufferLength = requestedBytes + 10;
+            delete[] buffer;
+            buffer = new unsigned char[bufferLength];
+        }
+    } catch ( std::bad_alloc& ) {
+        stringstream s("");
+        s << "out of memory when allocating " << requestedBytes << " bytes";
+        throw BamException("BamStandardIndex::CheckBufferSize", s.str());
+    }
+}
+
+void BamStandardIndex::CheckMagicNumber(void) {
+
+    // check 'magic number' to see if file is BAI index
+    char magic[4];
+    const size_t elementsRead = fread(magic, sizeof(char), 4, Resources.IndexStream);
+    if ( elementsRead != 4 )
+        throw BamException("BamStandardIndex::CheckMagicNumber", "could not read BAI magic number");
+
+    // compare to expected value
+    if ( strncmp(magic, BamStandardIndex::BAI_MAGIC, 4) != 0 )
+        throw BamException("BamStandardIndex::CheckMagicNumber", "invalid BAI magic number");
+}
+
+void BamStandardIndex::ClearReferenceEntry(BaiReferenceEntry& refEntry) {
+    refEntry.ID = -1;
+    refEntry.Bins.clear();
+    refEntry.LinearOffsets.clear();
+}
+
+void BamStandardIndex::CloseFile(void) {
+
+    // close file stream
+    if ( IsFileOpen() ) {
+        fclose(Resources.IndexStream);
+        Resources.IndexStream = 0;
+    }
+
+    // clear index file summary data
+    m_indexFileSummary.clear();
+
+    // clean up I/O buffer
+    delete[] Resources.Buffer;
+    Resources.Buffer = 0;
+    m_bufferLength = 0;
+}
+
+// builds index from associated BAM file & writes out to index file
+bool BamStandardIndex::Create(void) {
+
+    // skip if BamReader is invalid or not open
+    if ( m_reader == 0 || !m_reader->IsOpen() ) {
+        SetErrorString("BamStandardIndex::Create", "could not create index: reader is not open");
+        return false;
+    }
+
+    // rewind BamReader
+    if ( !m_reader->Rewind() ) {
+        const string readerError = m_reader->GetErrorString();
+        const string message = "could not create index: \n\t" + readerError;
+        SetErrorString("BamStandardIndex::Create", message);
+        return false;
+    }
+
+    try {
+
+        // open new index file (read & write)
+        string indexFilename = m_reader->Filename() + Extension();
+        OpenFile(indexFilename, "w+b");
+
+        // initialize BaiFileSummary with number of references
+        const int& numReferences = m_reader->GetReferenceCount();
+        ReserveForSummary(numReferences);
+
+        // initialize output file
+        WriteHeader();
+
+        // set up bin, ID, offset, & coordinate markers
+        const uint32_t defaultValue = 0xffffffffu;
+        uint32_t currentBin    = defaultValue;
+        uint32_t lastBin       = defaultValue;
+        int32_t  currentRefID  = defaultValue;
+        int32_t  lastRefID     = defaultValue;
+        uint64_t currentOffset = (uint64_t)m_reader->Tell();
+        uint64_t lastOffset    = currentOffset;
+        int32_t  lastPosition  = defaultValue;
+
+        // iterate through alignments in BAM file
+        BamAlignment al;
+        BaiReferenceEntry refEntry;
+        while ( m_reader->LoadNextAlignment(al) ) {
+
+            // changed to new reference
+            if ( lastRefID != al.RefID ) {
+
+                // if not first reference, save previous reference data
+                if ( lastRefID != (int32_t)defaultValue ) {
+
+                    SaveAlignmentChunkToBin(refEntry.Bins, currentBin, currentOffset, lastOffset);
+                    WriteReferenceEntry(refEntry);
+                    ClearReferenceEntry(refEntry);
+
+                    // write any empty references between (but *NOT* including) lastRefID & al.RefID
+                    for ( int i = lastRefID+1; i < al.RefID; ++i ) {
+                        BaiReferenceEntry emptyEntry(i);
+                        WriteReferenceEntry(emptyEntry);
+                    }
+
+                    // update bin markers
+                    currentOffset = lastOffset;
+                    currentBin    = al.Bin;
+                    lastBin       = al.Bin;
+                    currentRefID  = al.RefID;
+                }
+
+                // otherwise, this is first pass
+                // be sure to write any empty references up to (but *NOT* including) current RefID
+                else {
+                    for ( int i = 0; i < al.RefID; ++i ) {
+                        BaiReferenceEntry emptyEntry(i);
+                        WriteReferenceEntry(emptyEntry);
+                    }
+                }
+
+                // update reference markers
+                refEntry.ID = al.RefID;
+                lastRefID   = al.RefID;
+                lastBin     = defaultValue;
+            }
+
+            // if lastPosition greater than current alignment position - file not sorted properly
+            else if ( lastPosition > al.Position ) {
+                stringstream s("");
+                s << "BAM file is not properly sorted by coordinate" << endl
+                  << "Current alignment position: " << al.Position
+                  << " < previous alignment position: " << lastPosition
+                  << " on reference ID: " << al.RefID << endl;
+                SetErrorString("BamStandardIndex::Create", s.str());
+                return false;
+            }
+
+            // if alignment's ref ID is valid & its bin is not a 'leaf'
+            if ( (al.RefID >= 0) && (al.Bin < 4681) )
+                SaveLinearOffsetEntry(refEntry.LinearOffsets, al.Position, al.GetEndPosition(), lastOffset);
+
+            // changed to new BAI bin
+            if ( al.Bin != lastBin ) {
+
+                // if not first bin on reference, save previous bin data
+                if ( currentBin != defaultValue )
+                    SaveAlignmentChunkToBin(refEntry.Bins, currentBin, currentOffset, lastOffset);
+
+                // update markers
+                currentOffset = lastOffset;
+                currentBin    = al.Bin;
+                lastBin       = al.Bin;
+                currentRefID  = al.RefID;
+
+                // if invalid RefID, break out
+                if ( currentRefID < 0 )
+                    break;
+            }
+
+            // make sure that current file pointer is beyond lastOffset
+            if ( m_reader->Tell() <= (int64_t)lastOffset ) {
+                SetErrorString("BamStandardIndex::Create", "calculating offsets failed");
+                return false;
+            }
+
+            // update lastOffset & lastPosition
+            lastOffset   = m_reader->Tell();
+            lastPosition = al.Position;
+        }
+
+        // after finishing alignments, if any data was read, check:
+        if ( currentRefID >= 0 ) {
+
+            // store last alignment chunk to its bin, then write last reference entry with data
+            SaveAlignmentChunkToBin(refEntry.Bins, currentBin, currentOffset, lastOffset);
+            WriteReferenceEntry(refEntry);
+
+            // then write any empty references remaining at end of file
+            for ( int i = currentRefID+1; i < numReferences; ++i ) {
+                BaiReferenceEntry emptyEntry(i);
+                WriteReferenceEntry(emptyEntry);
+            }
+        }
+
+    } catch ( BamException& e) {
+        m_errorString = e.what();
+        return false;
+    }
+
+    // rewind BamReader
+    if ( !m_reader->Rewind() ) {
+        const string readerError = m_reader->GetErrorString();
+        const string message = "could not create index: \n\t" + readerError;
+        SetErrorString("BamStandardIndex::Create", message);
+        return false;
+    }
+
+    // return success
+    return true;
+}
+
+// returns format's file extension
+const string BamStandardIndex::Extension(void) {
+    return BamStandardIndex::BAI_EXTENSION;
+}
+
+void BamStandardIndex::GetOffset(const BamRegion& region, int64_t& offset, bool* hasAlignmentsInRegion) {
+
+    // cannot calculate offsets if unknown/invalid reference ID requested
+    if ( region.LeftRefID < 0 || region.LeftRefID >= (int)m_indexFileSummary.size() )
+        throw BamException("BamStandardIndex::GetOffset", "invalid reference ID requested");
+
+    // retrieve index summary for left bound reference
+    const BaiReferenceSummary& refSummary = m_indexFileSummary.at(region.LeftRefID);
+
+    // set up region boundaries based on actual BamReader data
+    uint32_t begin;
+    uint32_t end;
+    AdjustRegion(region, begin, end);
+
+    // retrieve all candidate bin IDs for region
+    set<uint16_t> candidateBins;
+    CalculateCandidateBins(begin, end, candidateBins);
+
+    // use reference's linear offsets to calculate the minimum offset
+    // that must be considered to find overlap
+    const uint64_t& minOffset = CalculateMinOffset(refSummary, begin);
+
+    // attempt to use reference summary, minOffset, & candidateBins to calculate offsets
+    // no data should not be error, just bail
+    vector<int64_t> offsets;
+    CalculateCandidateOffsets(refSummary, minOffset, candidateBins, offsets);
+    if ( offsets.empty() )
+        return;
+    
+    // ensure that offsets are sorted before processing
+    sort( offsets.begin(), offsets.end() );
+
+    // binary search for an overlapping block (may not be first one though)
+    BamAlignment al;
+    typedef vector<int64_t>::const_iterator OffsetConstIterator;
+    OffsetConstIterator offsetFirst = offsets.begin();
+    OffsetConstIterator offsetIter  = offsetFirst;
+    OffsetConstIterator offsetLast  = offsets.end();
+    iterator_traits<OffsetConstIterator>::difference_type count = distance(offsetFirst, offsetLast);
+    iterator_traits<OffsetConstIterator>::difference_type step;
+    while ( count > 0 ) {
+        offsetIter = offsetFirst;
+        step = count/2;
+        advance(offsetIter, step);
+
+        // attempt seek to candidate offset
+        const int64_t& candidateOffset = (*offsetIter);
+        if ( !m_reader->Seek(candidateOffset) ) {
+            const string readerError = m_reader->GetErrorString();
+            const string message = "could not seek in BAM file: \n\t" + readerError;
+            throw BamException("BamToolsIndex::GetOffset", message);
+        }
+
+        // load first available alignment, setting flag to true if data exists
+        *hasAlignmentsInRegion = m_reader->LoadNextAlignment(al);
+
+        // check alignment against region
+        if ( al.GetEndPosition() <= region.LeftPosition ) {
+            offsetFirst = ++offsetIter;
+            count -= step+1;
+        } else count = step;
+    }
+
+    // step back to the offset before the 'current offset' (to make sure we cover overlaps)
+    if ( offsetIter != offsets.begin() )
+        --offsetIter;
+    offset = (*offsetIter);
+}
+
+// returns whether reference has alignments or no
+bool BamStandardIndex::HasAlignments(const int& referenceID) const {
+    if ( referenceID < 0 || referenceID >= (int)m_indexFileSummary.size() )
+        return false;
+    const BaiReferenceSummary& refSummary = m_indexFileSummary.at(referenceID);
+    return ( refSummary.NumBins > 0 );
+}
+
+bool BamStandardIndex::IsFileOpen(void) const {
+    return ( Resources.IndexStream != 0 );
+}
+
+// attempts to use index data to jump to @region, returns success/fail
+// a "successful" jump indicates no error, but not whether this region has data
+//   * thus, the method sets a flag to indicate whether there are alignments
+//     available after the jump position
+bool BamStandardIndex::Jump(const BamRegion& region, bool* hasAlignmentsInRegion) {
+
+    // clear out flag
+    *hasAlignmentsInRegion = false;
+
+    // skip if invalid reader or not open
+    if ( m_reader == 0 || !m_reader->IsOpen() ) {
+        SetErrorString("BamStandardIndex::Jump", "could not jump: reader is not open");
+        return false;
+    }
+
+    // calculate nearest offset to jump to
+    int64_t offset;
+    try {
+        GetOffset(region, offset, hasAlignmentsInRegion);
+    } catch ( BamException& e ) {
+        m_errorString = e.what();
+        return false;
+    }
+
+    // if region has alignments, return success/fail of seeking there
+    if ( *hasAlignmentsInRegion )
+        return m_reader->Seek(offset);
+
+    // otherwise, simply return true (but hasAlignmentsInRegion flag has been set to false)
+    // (this is OK, BamReader will check this flag before trying to load data)
+    return true;
+}
+
+// loads existing data from file into memory
+bool BamStandardIndex::Load(const std::string& filename) {
+
+    try {
+
+        // attempt to open file (read-only)
+        OpenFile(filename, "rb");
+
+        // validate format
+        CheckMagicNumber();
+
+        // load in-memory summary of index data
+        SummarizeIndexFile();
+
+        // return success
+        return true;
+
+    } catch ( BamException& e ) {
+        m_errorString = e.what();
+        return false;
+    }
+}
+
+uint64_t BamStandardIndex::LookupLinearOffset(const BaiReferenceSummary& refSummary, const int& index) {
+
+    // attempt seek to proper index file position
+    const int64_t linearOffsetFilePosition = (int64_t)refSummary.FirstLinearOffsetFilePosition +
+                                             index*BamStandardIndex::SIZEOF_LINEAROFFSET;
+    Seek(linearOffsetFilePosition, SEEK_SET);
+
+    // read linear offset from BAI file
+    uint64_t linearOffset;
+    ReadLinearOffset(linearOffset);
+    return linearOffset;
+}
+
+void BamStandardIndex::MergeAlignmentChunks(BaiAlignmentChunkVector& chunks) {
+
+    // skip if chunks are empty, nothing to merge
+    if ( chunks.empty() )
+        return;
+
+    // set up merged alignment chunk container
+    BaiAlignmentChunkVector mergedChunks;
+    mergedChunks.push_back( chunks[0] );
+
+    // iterate over chunks
+    int i = 0;
+    BaiAlignmentChunkVector::iterator chunkIter = chunks.begin();
+    BaiAlignmentChunkVector::iterator chunkEnd  = chunks.end();
+    for ( ++chunkIter; chunkIter != chunkEnd; ++chunkIter) {
+
+        // get 'currentMergeChunk' based on numeric index
+        BaiAlignmentChunk& currentMergeChunk = mergedChunks[i];
+
+        // get sourceChunk based on source vector iterator
+        BaiAlignmentChunk& sourceChunk = (*chunkIter);
+
+        // if currentMergeChunk ends where sourceChunk starts, then merge the two
+        if ( currentMergeChunk.Stop>>16 == sourceChunk.Start>>16 )
+            currentMergeChunk.Stop = sourceChunk.Stop;
+
+        // otherwise
+        else {
+            // append sourceChunk after currentMergeChunk
+            mergedChunks.push_back(sourceChunk);
+
+            // update i, so the next iteration will consider the
+            // recently-appended sourceChunk as new mergeChunk candidate
+            ++i;
+        }
+    }
+
+    // saved newly-merged chunks into (parameter) chunks
+    chunks = mergedChunks;
+}
+
+void BamStandardIndex::OpenFile(const std::string& filename, const char* mode) {
+
+    // make sure any previous index file is closed
+    CloseFile();
+
+    // attempt to open file
+    Resources.IndexStream = fopen(filename.c_str(), mode);
+    if ( !IsFileOpen() ) {
+        const string message = string("could not open file: ") + filename;
+        throw BamException("BamStandardIndex::OpenFile", message);
+    }
+}
+
+void BamStandardIndex::ReadBinID(uint32_t& binId) {
+    const size_t elementsRead = fread(&binId, sizeof(binId), 1, Resources.IndexStream);
+    if ( m_isBigEndian ) SwapEndian_32(binId);
+    if ( elementsRead != 1 )
+        throw BamException("BamStandardIndex::ReadBinID", "could not read BAI bin ID");
+}
+
+void BamStandardIndex::ReadBinIntoBuffer(uint32_t& binId, int32_t& numAlignmentChunks) {
+
+    // read bin header
+    ReadBinID(binId);
+    ReadNumAlignmentChunks(numAlignmentChunks);
+
+    // read bin contents
+    const unsigned int bytesRequested = numAlignmentChunks*BamStandardIndex::SIZEOF_ALIGNMENTCHUNK;
+    ReadIntoBuffer(bytesRequested);
+}
+
+void BamStandardIndex::ReadIntoBuffer(const unsigned int& bytesRequested) {
+
+    // ensure that our buffer is big enough for request
+    BamStandardIndex::CheckBufferSize(Resources.Buffer, m_bufferLength, bytesRequested);
+
+    // read from BAI file stream
+    const size_t bytesRead = fread( Resources.Buffer, sizeof(char), bytesRequested, Resources.IndexStream );
+    if ( bytesRead != (size_t)bytesRequested ) {
+        stringstream s("");
+        s << "expected to read: " << bytesRequested << " bytes, "
+          << "but instead read: " << bytesRead;
+        throw BamException("BamStandardIndex::ReadIntoBuffer", s.str());
+    }
+}
+
+void BamStandardIndex::ReadLinearOffset(uint64_t& linearOffset) {
+    const size_t elementsRead = fread(&linearOffset, sizeof(linearOffset), 1, Resources.IndexStream);
+    if ( m_isBigEndian ) SwapEndian_64(linearOffset);
+    if ( elementsRead != 1 )
+        throw BamException("BamStandardIndex::ReadLinearOffset", "could not read BAI linear offset");
+}
+
+void BamStandardIndex::ReadNumAlignmentChunks(int& numAlignmentChunks) {
+    const size_t elementsRead = fread(&numAlignmentChunks, sizeof(numAlignmentChunks), 1, Resources.IndexStream);
+    if ( m_isBigEndian ) SwapEndian_32(numAlignmentChunks);
+    if ( elementsRead != 1 )
+        throw BamException("BamStandardIndex::ReadNumAlignmentChunks", "could not read BAI chunk count");
+}
+
+void BamStandardIndex::ReadNumBins(int& numBins) {
+    const size_t elementsRead = fread(&numBins, sizeof(numBins), 1, Resources.IndexStream);
+    if ( m_isBigEndian ) SwapEndian_32(numBins);
+    if ( elementsRead != 1 )
+        throw BamException("BamStandardIndex::ReadNumBins", "could not read BAI bin count");
+}
+
+void BamStandardIndex::ReadNumLinearOffsets(int& numLinearOffsets) {
+    const size_t elementsRead = fread(&numLinearOffsets, sizeof(numLinearOffsets), 1, Resources.IndexStream);
+    if ( m_isBigEndian ) SwapEndian_32(numLinearOffsets);
+    if ( elementsRead != 1 )
+        throw BamException("BamStandardIndex::ReadNumAlignmentChunks", "could not read BAI linear offset count");
+}
+
+void BamStandardIndex::ReadNumReferences(int& numReferences) {
+    const size_t elementsRead = fread(&numReferences, sizeof(numReferences), 1, Resources.IndexStream);
+    if ( m_isBigEndian ) SwapEndian_32(numReferences);
+    if ( elementsRead != 1 )
+        throw BamException("BamStandardIndex::ReadNumReferences", "could not read reference count");
+}
+
+void BamStandardIndex::ReserveForSummary(const int& numReferences) {
+    m_indexFileSummary.clear();
+    m_indexFileSummary.assign( numReferences, BaiReferenceSummary() );
+}
+
+void BamStandardIndex::SaveAlignmentChunkToBin(BaiBinMap& binMap,
+                                               const uint32_t& currentBin,
+                                               const uint64_t& currentOffset,
+                                               const uint64_t& lastOffset)
+{
+    // create new alignment chunk
+    BaiAlignmentChunk newChunk(currentOffset, lastOffset);
+
+    // if no entry exists yet for this bin, create one and store alignment chunk
+    BaiBinMap::iterator binIter = binMap.find(currentBin);
+    if ( binIter == binMap.end() ) {
+        BaiAlignmentChunkVector newChunks;
+        newChunks.push_back(newChunk);
+        binMap.insert( pair<uint32_t, BaiAlignmentChunkVector>(currentBin, newChunks));
+    }
+
+    // otherwise, just append alignment chunk
+    else {
+        BaiAlignmentChunkVector& binChunks = (*binIter).second;
+        binChunks.push_back( newChunk );
+    }
+}
+
+void BamStandardIndex::SaveBinsSummary(const int& refId, const int& numBins) {
+    BaiReferenceSummary& refSummary = m_indexFileSummary.at(refId);
+    refSummary.NumBins = numBins;
+    refSummary.FirstBinFilePosition = Tell();
+}
+
+void BamStandardIndex::SaveLinearOffsetEntry(BaiLinearOffsetVector& offsets,
+                                             const int& alignmentStartPosition,
+                                             const int& alignmentStopPosition,
+                                             const uint64_t& lastOffset)
+{
+    // get converted offsets
+    const int beginOffset = alignmentStartPosition >> BamStandardIndex::BAM_LIDX_SHIFT;
+    const int endOffset   = (alignmentStopPosition - 1) >> BamStandardIndex::BAM_LIDX_SHIFT;
+
+    // resize vector if necessary
+    int oldSize = offsets.size();
+    int newSize = endOffset + 1;
+    if ( oldSize < newSize )
+        offsets.resize(newSize, 0);
+
+    // store offset
+    for( int i = beginOffset + 1; i <= endOffset; ++i ) {
+        if ( offsets[i] == 0 )
+            offsets[i] = lastOffset;
+    }
+}
+
+void BamStandardIndex::SaveLinearOffsetsSummary(const int& refId, const int& numLinearOffsets) {
+    BaiReferenceSummary& refSummary = m_indexFileSummary.at(refId);
+    refSummary.NumLinearOffsets = numLinearOffsets;
+    refSummary.FirstLinearOffsetFilePosition = Tell();
+}
+
+// seek to position in index file stream
+void BamStandardIndex::Seek(const int64_t& position, const int& origin) {
+    if ( fseek64(Resources.IndexStream, position, origin) != 0 )
+        throw BamException("BamStandardIndex::Seek", "could not seek in BAI file");
+}
+
+void BamStandardIndex::SkipBins(const int& numBins) {
+    uint32_t binId;
+    int32_t numAlignmentChunks;
+    for (int i = 0; i < numBins; ++i)
+        ReadBinIntoBuffer(binId, numAlignmentChunks); // results & buffer ignored
+}
+
+void BamStandardIndex::SkipLinearOffsets(const int& numLinearOffsets) {
+    const unsigned int bytesRequested = numLinearOffsets*BamStandardIndex::SIZEOF_LINEAROFFSET;
+    ReadIntoBuffer(bytesRequested);
+}
+
+void BamStandardIndex::SortLinearOffsets(BaiLinearOffsetVector& linearOffsets) {
+    sort( linearOffsets.begin(), linearOffsets.end() );
+}
+
+void BamStandardIndex::SummarizeBins(BaiReferenceSummary& refSummary) {
+
+    // load number of bins
+    int numBins;
+    ReadNumBins(numBins);
+
+    // store bins summary for this reference
+    refSummary.NumBins = numBins;
+    refSummary.FirstBinFilePosition = Tell();
+
+    // skip this reference's bins
+    SkipBins(numBins);
+}
+
+void BamStandardIndex::SummarizeIndexFile(void) {
+
+    // load number of reference sequences
+    int numReferences;
+    ReadNumReferences(numReferences);
+
+    // initialize file summary data
+    ReserveForSummary(numReferences);
+
+    // iterate over reference entries
+    BaiFileSummary::iterator summaryIter = m_indexFileSummary.begin();
+    BaiFileSummary::iterator summaryEnd  = m_indexFileSummary.end();
+    for ( int i = 0; summaryIter != summaryEnd; ++summaryIter, ++i )
+        SummarizeReference(*summaryIter);
+}
+
+void BamStandardIndex::SummarizeLinearOffsets(BaiReferenceSummary& refSummary) {
+
+    // load number of linear offsets
+    int numLinearOffsets;
+    ReadNumLinearOffsets(numLinearOffsets);
+
+    // store bin summary data for this reference
+    refSummary.NumLinearOffsets = numLinearOffsets;
+    refSummary.FirstLinearOffsetFilePosition = Tell();
+
+    // skip linear offsets in index file
+    SkipLinearOffsets(numLinearOffsets);
+}
+
+void BamStandardIndex::SummarizeReference(BaiReferenceSummary& refSummary) {
+    SummarizeBins(refSummary);
+    SummarizeLinearOffsets(refSummary);
+}
+
+// return position of file pointer in index file stream
+int64_t BamStandardIndex::Tell(void) const {
+    return ftell64(Resources.IndexStream);
+}
+
+void BamStandardIndex::WriteAlignmentChunk(const BaiAlignmentChunk& chunk) {
+
+    // localize alignment chunk offsets
+    uint64_t start = chunk.Start;
+    uint64_t stop  = chunk.Stop;
+
+    // swap endian-ness if necessary
+    if ( m_isBigEndian ) {
+        SwapEndian_64(start);
+        SwapEndian_64(stop);
+    }
+
+    // write to index file
+    size_t elementsWritten = 0;
+    elementsWritten += fwrite(&start, sizeof(start), 1, Resources.IndexStream);
+    elementsWritten += fwrite(&stop,  sizeof(stop),  1, Resources.IndexStream);
+    if ( elementsWritten != 2 )
+        throw BamException("BamStandardIndex::WriteAlignmentChunk", "could not write BAI alignment chunk");
+}
+
+void BamStandardIndex::WriteAlignmentChunks(BaiAlignmentChunkVector& chunks) {
+
+    // make sure chunks are merged (simplified) before writing & saving summary
+    MergeAlignmentChunks(chunks);
+
+    // write chunks
+    int32_t chunkCount = chunks.size();
+    if ( m_isBigEndian ) SwapEndian_32(chunkCount);
+    const size_t elementsWritten = fwrite(&chunkCount, sizeof(chunkCount), 1, Resources.IndexStream);
+    if ( elementsWritten != 1 )
+        throw BamException("BamStandardIndex::WriteAlignmentChunks", "could not write BAI chunk count");
+
+    // iterate over chunks
+    BaiAlignmentChunkVector::const_iterator chunkIter = chunks.begin();
+    BaiAlignmentChunkVector::const_iterator chunkEnd  = chunks.end();
+    for ( ; chunkIter != chunkEnd; ++chunkIter )
+        WriteAlignmentChunk( (*chunkIter) );
+}
+
+void BamStandardIndex::WriteBin(const uint32_t& binId, BaiAlignmentChunkVector& chunks) {
+
+    // write BAM bin ID
+    uint32_t binKey = binId;
+    if ( m_isBigEndian ) SwapEndian_32(binKey);
+    const size_t elementsWritten = fwrite(&binKey, sizeof(binKey), 1, Resources.IndexStream);
+    if ( elementsWritten != 1 )
+        throw BamException("BamStandardIndex::WriteBin", "could not write bin ID");
+
+    // write bin's alignment chunks
+    WriteAlignmentChunks(chunks);
+}
+
+void BamStandardIndex::WriteBins(const int& refId, BaiBinMap& bins) {
+
+    // write number of bins
+    int32_t binCount = bins.size();
+    if ( m_isBigEndian ) SwapEndian_32(binCount);
+    const size_t elementsWritten = fwrite(&binCount, sizeof(binCount), 1, Resources.IndexStream);
+    if ( elementsWritten != 1 )
+        throw BamException("BamStandardIndex::WriteBins", "could not write bin count");
+
+    // save summary for reference's bins
+    SaveBinsSummary(refId, bins.size());
+
+    // iterate over bins
+    BaiBinMap::iterator binIter = bins.begin();
+    BaiBinMap::iterator binEnd  = bins.end();
+    for ( ; binIter != binEnd; ++binIter )
+        WriteBin( (*binIter).first, (*binIter).second );
+}
+
+void BamStandardIndex::WriteHeader(void) {
+
+    size_t elementsWritten = 0;
+
+    // write magic number
+    elementsWritten += fwrite(BamStandardIndex::BAI_MAGIC, sizeof(char), 4, Resources.IndexStream);
+
+    // write number of reference sequences
+    int32_t numReferences = m_indexFileSummary.size();
+    if ( m_isBigEndian ) SwapEndian_32(numReferences);
+    elementsWritten += fwrite(&numReferences, sizeof(numReferences), 1, Resources.IndexStream);
+
+    if ( elementsWritten != 5 )
+        throw BamException("BamStandardIndex::WriteHeader", "could not write BAI header");
+}
+
+void BamStandardIndex::WriteLinearOffsets(const int& refId, BaiLinearOffsetVector& linearOffsets) {
+
+    // make sure linear offsets are sorted before writing & saving summary
+    SortLinearOffsets(linearOffsets);
+
+    size_t elementsWritten = 0;
+
+    // write number of linear offsets
+    int32_t offsetCount = linearOffsets.size();
+    if ( m_isBigEndian ) SwapEndian_32(offsetCount);
+    elementsWritten += fwrite(&offsetCount, sizeof(offsetCount), 1, Resources.IndexStream);
+
+    // save summary for reference's linear offsets
+    SaveLinearOffsetsSummary(refId, linearOffsets.size());
+
+    // iterate over linear offsets
+    BaiLinearOffsetVector::const_iterator offsetIter = linearOffsets.begin();
+    BaiLinearOffsetVector::const_iterator offsetEnd  = linearOffsets.end();
+    for ( ; offsetIter != offsetEnd; ++offsetIter ) {
+
+        // write linear offset
+        uint64_t linearOffset = (*offsetIter);
+        if ( m_isBigEndian ) SwapEndian_64(linearOffset);
+        elementsWritten += fwrite(&linearOffset, sizeof(linearOffset), 1, Resources.IndexStream);
+    }
+
+    if ( elementsWritten != (linearOffsets.size() + 1) )
+        throw BamException("BamStandardIndex::WriteLinearOffsets", "could not write BAI linear offsets");
+}
+
+void BamStandardIndex::WriteReferenceEntry(BaiReferenceEntry& refEntry) {
+    WriteBins(refEntry.ID, refEntry.Bins);
+    WriteLinearOffsets(refEntry.ID, refEntry.LinearOffsets);
+}
diff --git a/src/api/internal/index/BamStandardIndex_p.h b/src/api/internal/index/BamStandardIndex_p.h
new file mode 100644 (file)
index 0000000..03e0042
--- /dev/null
@@ -0,0 +1,236 @@
+// ***************************************************************************
+// BamStandardIndex.h (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides index operations for the standardized BAM index format (".bai")
+// ***************************************************************************
+
+#ifndef BAM_STANDARD_INDEX_FORMAT_H
+#define BAM_STANDARD_INDEX_FORMAT_H
+
+//  -------------
+//  W A R N I N G
+//  -------------
+//
+// This file is not part of the BamTools API.  It exists purely as an
+// implementation detail.  This header file may change from version to
+// version without notice, or even be removed.
+//
+// We mean it.
+
+#include "api/BamAux.h"
+#include "api/BamIndex.h"
+#include <map>
+#include <set>
+#include <string>
+#include <vector>
+
+namespace BamTools {
+namespace Internal {
+
+// -----------------------------------------------------------------------------
+// BamStandardIndex data structures
+
+// defines start and end of a contiguous run of alignments
+struct BaiAlignmentChunk {
+
+    // data members
+    uint64_t Start;
+    uint64_t Stop;
+
+    // constructor
+    BaiAlignmentChunk(const uint64_t& start = 0,
+                      const uint64_t& stop = 0)
+        : Start(start)
+        , Stop(stop)
+    { }
+};
+
+// comparison operator (for sorting)
+inline
+bool operator<(const BaiAlignmentChunk& lhs, const BaiAlignmentChunk& rhs) {
+    return lhs.Start < rhs.Start;
+}
+
+// convenience typedef for a list of all alignment 'chunks' in a BAI bin
+typedef std::vector<BaiAlignmentChunk> BaiAlignmentChunkVector;
+
+// convenience typedef for a map of all BAI bins in a reference (ID => chunks)
+typedef std::map<uint32_t, BaiAlignmentChunkVector> BaiBinMap;
+
+// convenience typedef for a list of all 'linear offsets' in a reference
+typedef std::vector<uint64_t> BaiLinearOffsetVector;
+
+// contains all fields necessary for building, loading, & writing
+// full BAI index data for a single reference
+struct BaiReferenceEntry {
+
+    // data members
+    int32_t ID;
+    BaiBinMap Bins;
+    BaiLinearOffsetVector LinearOffsets;
+
+    // ctor
+    BaiReferenceEntry(const int32_t& id = -1)
+        : ID(id)
+    { }
+};
+
+// provides (persistent) summary of BaiReferenceEntry's index data
+struct BaiReferenceSummary {
+
+    // data members
+    int NumBins;
+    int NumLinearOffsets;
+    uint64_t FirstBinFilePosition;
+    uint64_t FirstLinearOffsetFilePosition;
+
+    // ctor
+    BaiReferenceSummary(void)
+        : NumBins(0)
+        , NumLinearOffsets(0)
+        , FirstBinFilePosition(0)
+        , FirstLinearOffsetFilePosition(0)
+    { }
+};
+
+// convenience typedef for describing a full BAI index file summary
+typedef std::vector<BaiReferenceSummary> BaiFileSummary;
+
+// end BamStandardIndex data structures
+// -----------------------------------------------------------------------------
+
+class BamStandardIndex : public BamIndex {
+
+    // ctor & dtor
+    public:
+        BamStandardIndex(Internal::BamReaderPrivate* reader);
+        ~BamStandardIndex(void);
+
+    // BamIndex implementation
+    public:
+        // builds index from associated BAM file & writes out to index file
+        bool Create(void);
+        // returns whether reference has alignments or no
+        bool HasAlignments(const int& referenceID) const;
+        // attempts to use index data to jump to @region, returns success/fail
+        // a "successful" jump indicates no error, but not whether this region has data
+        //   * thus, the method sets a flag to indicate whether there are alignments
+        //     available after the jump position
+        bool Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion);
+        // loads existing data from file into memory
+        bool Load(const std::string& filename);
+    public:
+        // returns format's file extension
+        static const std::string Extension(void);
+
+    // internal methods
+    private:
+
+        // index file ops
+        void CheckMagicNumber(void);
+        void CloseFile(void);
+        bool IsFileOpen(void) const;
+        void OpenFile(const std::string& filename, const char* mode);
+        void Seek(const int64_t& position, const int& origin);
+        int64_t Tell(void) const;
+
+        // BAI index building methods
+        void ClearReferenceEntry(BaiReferenceEntry& refEntry);
+        void SaveAlignmentChunkToBin(BaiBinMap& binMap,
+                                     const uint32_t& currentBin,
+                                     const uint64_t& currentOffset,
+                                     const uint64_t& lastOffset);
+        void SaveLinearOffsetEntry(BaiLinearOffsetVector& offsets,
+                                   const int& alignmentStartPosition,
+                                   const int& alignmentStopPosition,
+                                   const uint64_t& lastOffset);
+
+        // random-access methods
+        void AdjustRegion(const BamRegion& region, uint32_t& begin, uint32_t& end);
+        void CalculateCandidateBins(const uint32_t& begin,
+                                    const uint32_t& end,
+                                    std::set<uint16_t>& candidateBins);
+        void CalculateCandidateOffsets(const BaiReferenceSummary& refSummary,
+                                       const uint64_t& minOffset,
+                                       std::set<uint16_t>& candidateBins,
+                                       std::vector<int64_t>& offsets);
+        uint64_t CalculateMinOffset(const BaiReferenceSummary& refSummary, const uint32_t& begin);
+        void GetOffset(const BamRegion& region, int64_t& offset, bool* hasAlignmentsInRegion);
+        uint64_t LookupLinearOffset(const BaiReferenceSummary& refSummary, const int& index);
+
+        // BAI summary (create/load) methods
+        void ReserveForSummary(const int& numReferences);
+        void SaveBinsSummary(const int& refId, const int& numBins);
+        void SaveLinearOffsetsSummary(const int& refId, const int& numLinearOffsets);
+        void SkipBins(const int& numBins);
+        void SkipLinearOffsets(const int& numLinearOffsets);
+        void SummarizeBins(BaiReferenceSummary& refSummary);
+        void SummarizeIndexFile(void);
+        void SummarizeLinearOffsets(BaiReferenceSummary& refSummary);
+        void SummarizeReference(BaiReferenceSummary& refSummary);
+
+        // BAI full index input methods
+        void ReadBinID(uint32_t& binId);
+        void ReadBinIntoBuffer(uint32_t& binId, int32_t& numAlignmentChunks);
+        void ReadIntoBuffer(const unsigned int& bytesRequested);
+        void ReadLinearOffset(uint64_t& linearOffset);
+        void ReadNumAlignmentChunks(int& numAlignmentChunks);
+        void ReadNumBins(int& numBins);
+        void ReadNumLinearOffsets(int& numLinearOffsets);
+        void ReadNumReferences(int& numReferences);
+
+        // BAI full index output methods
+        void MergeAlignmentChunks(BaiAlignmentChunkVector& chunks);
+        void SortLinearOffsets(BaiLinearOffsetVector& linearOffsets);
+        void WriteAlignmentChunk(const BaiAlignmentChunk& chunk);
+        void WriteAlignmentChunks(BaiAlignmentChunkVector& chunks);
+        void WriteBin(const uint32_t& binId, BaiAlignmentChunkVector& chunks);
+        void WriteBins(const int& refId, BaiBinMap& bins);
+        void WriteHeader(void);
+        void WriteLinearOffsets(const int& refId, BaiLinearOffsetVector& linearOffsets);
+        void WriteReferenceEntry(BaiReferenceEntry& refEntry);
+
+    // data members
+    private:
+        bool m_isBigEndian;
+        BaiFileSummary m_indexFileSummary;
+
+        // our input buffer
+        unsigned int m_bufferLength;
+
+        struct RaiiWrapper {
+            FILE* IndexStream;
+            char* Buffer;
+            RaiiWrapper(void);
+            ~RaiiWrapper(void);
+        };
+        RaiiWrapper Resources;
+
+    // static methods
+    private:
+        // checks if the buffer is large enough to accomodate the requested size
+        static void CheckBufferSize(char*& buffer,
+                                    unsigned int& bufferLength,
+                                    const unsigned int& requestedBytes);
+        // checks if the buffer is large enough to accomodate the requested size
+        static void CheckBufferSize(unsigned char*& buffer,
+                                    unsigned int& bufferLength,
+                                    const unsigned int& requestedBytes);
+    // static constants
+    private:
+        static const int MAX_BIN;
+        static const int BAM_LIDX_SHIFT;
+        static const std::string BAI_EXTENSION;
+        static const char* const BAI_MAGIC;
+        static const int SIZEOF_ALIGNMENTCHUNK;
+        static const int SIZEOF_BINCORE;
+        static const int SIZEOF_LINEAROFFSET;
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // BAM_STANDARD_INDEX_FORMAT_H
diff --git a/src/api/internal/index/BamToolsIndex_p.cpp b/src/api/internal/index/BamToolsIndex_p.cpp
new file mode 100644 (file)
index 0000000..af0a684
--- /dev/null
@@ -0,0 +1,615 @@
+// ***************************************************************************
+// BamToolsIndex.cpp (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 25 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides index operations for the BamTools index format (".bti")
+// ***************************************************************************
+
+#include "api/BamAlignment.h"
+#include "api/internal/bam/BamReader_p.h"
+#include "api/internal/index/BamToolsIndex_p.h"
+#include "api/internal/io/BgzfStream_p.h"
+#include "api/internal/utils/BamException_p.h"
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <algorithm>
+#include <iostream>
+#include <iterator>
+#include <map>
+using namespace std;
+
+// --------------------------------
+// static BamToolsIndex constants
+// --------------------------------
+
+const uint32_t BamToolsIndex::DEFAULT_BLOCK_LENGTH = 1000;
+const string BamToolsIndex::BTI_EXTENSION     = ".bti";
+const char* const BamToolsIndex::BTI_MAGIC    = "BTI\1";
+const int BamToolsIndex::SIZEOF_BLOCK         = sizeof(int32_t)*2 + sizeof(int64_t);
+
+// ----------------------------
+// RaiiWrapper implementation
+// ----------------------------
+
+BamToolsIndex::RaiiWrapper::RaiiWrapper(void)
+    : IndexStream(0)
+{ }
+
+BamToolsIndex::RaiiWrapper::~RaiiWrapper(void) {
+    if ( IndexStream )
+        fclose(IndexStream);
+}
+
+// ------------------------------
+// BamToolsIndex implementation
+// ------------------------------
+
+// ctor
+BamToolsIndex::BamToolsIndex(Internal::BamReaderPrivate* reader)
+    : BamIndex(reader)
+    , m_blockSize(BamToolsIndex::DEFAULT_BLOCK_LENGTH)
+    , m_inputVersion(0)
+    , m_outputVersion(BTI_2_0) // latest version - used for writing new index files
+{
+    m_isBigEndian = BamTools::SystemIsBigEndian();
+}
+
+// dtor
+BamToolsIndex::~BamToolsIndex(void) {
+    CloseFile();
+}
+
+void BamToolsIndex::CheckMagicNumber(void) {
+
+    // read magic number
+    char magic[4];
+    size_t elementsRead = fread(magic, sizeof(char), 4, Resources.IndexStream);
+    if ( elementsRead != 4 )
+        throw BamException("BamToolsIndex::CheckMagicNumber", "could not read BTI magic number");
+
+    // validate expected magic number
+    if ( strncmp(magic, BamToolsIndex::BTI_MAGIC, 4) != 0 )
+        throw BamException("BamToolsIndex::CheckMagicNumber", "invalid BTI magic number");
+}
+
+// check index file version, return true if OK
+void BamToolsIndex::CheckVersion(void) {
+
+    // read version from file
+    size_t elementsRead = fread(&m_inputVersion, sizeof(m_inputVersion), 1, Resources.IndexStream);
+    if ( elementsRead != 1 )
+        throw BamException("BamToolsIndex::CheckVersion", "could not read format version");
+    if ( m_isBigEndian ) SwapEndian_32(m_inputVersion);
+
+    // if version is negative, or zero
+    if ( m_inputVersion <= 0 )
+        throw BamException("BamToolsIndex::CheckVersion", "invalid format version");
+
+    // if version is newer than can be supported by this version of bamtools
+    else if ( m_inputVersion > m_outputVersion ) {
+        const string message = "unsupported format: this index was created by a newer version of BamTools. "
+                               "Update your local version of BamTools to use the index file.";
+        throw BamException("BamToolsIndex::CheckVersion", message);
+    }
+
+    // ------------------------------------------------------------------
+    // check for deprecated, unsupported versions
+    // (the format had to be modified to accomodate a particular bug fix)
+
+    // Version 2.0: introduced support for half-open intervals, instead of the old closed intervals
+    //   respondBy: throwing exception - we're not going to try to handle the old BTI files.
+    else if ( (Version)m_inputVersion < BamToolsIndex::BTI_2_0 ) {
+        const string message = "unsupported format: this version of the index may not properly handle "
+                               "coordinate intervals. Please run 'bamtools index -bti -in yourData.bam' "
+                               "to generate an up-to-date, fixed BTI file.";
+        throw BamException("BamToolsIndex::CheckVersion", message);
+    }
+}
+
+void BamToolsIndex::ClearReferenceEntry(BtiReferenceEntry& refEntry) {
+    refEntry.ID = -1;
+    refEntry.Blocks.clear();
+}
+
+void BamToolsIndex::CloseFile(void) {
+    if ( IsFileOpen() ) {
+        fclose(Resources.IndexStream);
+        Resources.IndexStream = 0;
+    }
+    m_indexFileSummary.clear();
+}
+
+// builds index from associated BAM file & writes out to index file
+bool BamToolsIndex::Create(void) {
+
+    // skip if BamReader is invalid or not open
+    if ( m_reader == 0 || !m_reader->IsOpen() ) {
+        SetErrorString("BamToolsIndex::Create", "could not create index: reader is not open");
+        return false;
+    }
+
+    // rewind BamReader
+    if ( !m_reader->Rewind() ) {
+        const string readerError = m_reader->GetErrorString();
+        const string message = "could not create index: \n\t" + readerError;
+        SetErrorString("BamToolsIndex::Create", message);
+        return false;
+    }
+
+    try {
+        // open new index file (read & write)
+        const string indexFilename = m_reader->Filename() + Extension();
+        OpenFile(indexFilename, "w+b");
+
+        // initialize BtiFileSummary with number of references
+        const int& numReferences = m_reader->GetReferenceCount();
+        InitializeFileSummary(numReferences);
+
+        // intialize output file header
+        WriteHeader();
+
+        // index building markers
+        uint32_t currentBlockCount      = 0;
+        int64_t currentAlignmentOffset  = m_reader->Tell();
+        int32_t blockRefId              = -1;
+        int32_t blockMaxEndPosition     = -1;
+        int64_t blockStartOffset        = currentAlignmentOffset;
+        int32_t blockStartPosition      = -1;
+
+        // plow through alignments, storing index entries
+        BamAlignment al;
+        BtiReferenceEntry refEntry;
+        while ( m_reader->LoadNextAlignment(al) ) {
+
+            // if moved to new reference
+            if ( al.RefID != blockRefId ) {
+
+                // if first pass, check:
+                if ( currentBlockCount == 0 ) {
+
+                    // write any empty references up to (but not including) al.RefID
+                    for ( int i = 0; i < al.RefID; ++i )
+                        WriteReferenceEntry( BtiReferenceEntry(i) );
+                }
+
+                // not first pass:
+                else {
+
+                    // store previous BTI block data in reference entry
+                    const BtiBlock block(blockMaxEndPosition, blockStartOffset, blockStartPosition);
+                    refEntry.Blocks.push_back(block);
+
+                    // write reference entry, then clear
+                    WriteReferenceEntry(refEntry);
+                    ClearReferenceEntry(refEntry);
+
+                    // write any empty references between (but not including)
+                    // the last blockRefID and current al.RefID
+                    for ( int i = blockRefId+1; i < al.RefID; ++i )
+                        WriteReferenceEntry( BtiReferenceEntry(i) );
+
+                    // reset block count
+                    currentBlockCount = 0;
+                }
+
+                // set ID for new reference entry
+                refEntry.ID = al.RefID;
+            }
+
+            // if beginning of block, update counters
+            if ( currentBlockCount == 0 ) {
+                blockRefId          = al.RefID;
+                blockStartOffset    = currentAlignmentOffset;
+                blockStartPosition  = al.Position;
+                blockMaxEndPosition = al.GetEndPosition();
+            }
+
+            // increment block counter
+            ++currentBlockCount;
+
+            // check end position
+            const int32_t alignmentEndPosition = al.GetEndPosition();
+            if ( alignmentEndPosition > blockMaxEndPosition )
+                blockMaxEndPosition = alignmentEndPosition;
+
+            // if block is full, get offset for next block, reset currentBlockCount
+            if ( currentBlockCount == m_blockSize ) {
+
+                // store previous block data in reference entry
+                const BtiBlock block(blockMaxEndPosition, blockStartOffset, blockStartPosition);
+                refEntry.Blocks.push_back(block);
+
+                // update markers
+                blockStartOffset  = m_reader->Tell();
+                currentBlockCount = 0;
+            }
+
+            // not the best name, but for the next iteration, this value will be the offset of the
+            // *current* alignment. this is necessary because we won't know if this next alignment
+            // is on a new reference until we actually read it
+            currentAlignmentOffset = m_reader->Tell();
+        }
+
+        // after finishing alignments, if any data was read, check:
+        if ( blockRefId >= 0 ) {
+
+            // store last BTI block data in reference entry
+            const BtiBlock block(blockMaxEndPosition, blockStartOffset, blockStartPosition);
+            refEntry.Blocks.push_back(block);
+
+            // write last reference entry, then clear
+            WriteReferenceEntry(refEntry);
+            ClearReferenceEntry(refEntry);
+
+            // then write any empty references remaining at end of file
+            for ( int i = blockRefId+1; i < numReferences; ++i )
+                WriteReferenceEntry( BtiReferenceEntry(i) );
+        }
+
+    } catch ( BamException& e ) {
+        m_errorString = e.what();
+        return false;
+    }
+
+    // rewind BamReader
+    if ( !m_reader->Rewind() ) {
+        const string readerError = m_reader->GetErrorString();
+        const string message = "could not create index: \n\t" + readerError;
+        SetErrorString("BamToolsIndex::Create", message);
+        return false;
+    }
+
+    // return success
+    return true;
+}
+
+// returns format's file extension
+const std::string BamToolsIndex::Extension(void) {
+    return BamToolsIndex::BTI_EXTENSION;
+}
+
+void BamToolsIndex::GetOffset(const BamRegion& region, int64_t& offset, bool* hasAlignmentsInRegion) {
+
+    // return false ref ID is not a valid index in file summary data
+    if ( region.LeftRefID < 0 || region.LeftRefID >= (int)m_indexFileSummary.size() )
+        throw BamException("BamToolsIndex::GetOffset", "invalid region requested");
+
+    // retrieve reference index data for left bound reference
+    BtiReferenceEntry refEntry(region.LeftRefID);
+    ReadReferenceEntry(refEntry);
+
+    // binary search for an overlapping block (may not be first one though)
+    bool found = false;
+    typedef BtiBlockVector::const_iterator BtiBlockConstIterator;
+    BtiBlockConstIterator blockFirst = refEntry.Blocks.begin();
+    BtiBlockConstIterator blockIter  = blockFirst;
+    BtiBlockConstIterator blockLast  = refEntry.Blocks.end();
+    iterator_traits<BtiBlockConstIterator>::difference_type count = distance(blockFirst, blockLast);
+    iterator_traits<BtiBlockConstIterator>::difference_type step;
+    while ( count > 0 ) {
+        blockIter = blockFirst;
+        step = count/2;
+        advance(blockIter, step);
+
+        const BtiBlock& block = (*blockIter);
+        if ( block.StartPosition <= region.RightPosition ) {
+            if ( block.MaxEndPosition > region.LeftPosition ) {
+                offset = block.StartOffset;
+                break;
+            }
+            blockFirst = ++blockIter;
+            count -= step+1;
+        }
+        else count = step;
+    }
+
+    // if we didn't search "off the end" of the blocks
+    if ( blockIter != blockLast ) {
+
+        // "walk back" until we've gone too far
+        while ( blockIter != blockFirst ) {
+            const BtiBlock& currentBlock = (*blockIter);
+
+            --blockIter;
+            const BtiBlock& previousBlock = (*blockIter);
+            if ( previousBlock.MaxEndPosition <= region.LeftPosition ) {
+                offset = currentBlock.StartOffset;
+                found = true;
+                break;
+            }
+        }
+
+        // if we walked all the way to first block, just return that and let the reader's
+        // region overlap parsing do the rest
+        if ( blockIter == blockFirst ) {
+            const BtiBlock& block = (*blockIter);
+            offset = block.StartOffset;
+            found = true;
+        }
+    }
+
+
+    // sets to false if blocks container is empty, or if no matching block could be found
+    *hasAlignmentsInRegion = found;
+}
+
+// returns whether reference has alignments or no
+bool BamToolsIndex::HasAlignments(const int& referenceID) const {
+    if ( referenceID < 0 || referenceID >= (int)m_indexFileSummary.size() )
+        return false;
+    const BtiReferenceSummary& refSummary = m_indexFileSummary.at(referenceID);
+    return ( refSummary.NumBlocks > 0 );
+}
+
+// pre-allocates space for each reference's summary data
+void BamToolsIndex::InitializeFileSummary(const int& numReferences) {
+    m_indexFileSummary.clear();
+    for ( int i = 0; i < numReferences; ++i )
+        m_indexFileSummary.push_back( BtiReferenceSummary() );
+}
+
+// returns true if the index stream is open
+bool BamToolsIndex::IsFileOpen(void) const {
+    return ( Resources.IndexStream != 0 );
+}
+
+// attempts to use index data to jump to @region, returns success/fail
+// a "successful" jump indicates no error, but not whether this region has data
+//   * thus, the method sets a flag to indicate whether there are alignments
+//     available after the jump position
+bool BamToolsIndex::Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion) {
+
+    // clear flag
+    *hasAlignmentsInRegion = false;
+
+    // skip if invalid reader or not open
+    if ( m_reader == 0 || !m_reader->IsOpen() ) {
+        SetErrorString("BamToolsIndex::Jump", "could not jump: reader is not open");
+        return false;
+    }
+
+    // make sure left-bound position is valid
+    const RefVector& references = m_reader->GetReferenceData();
+    if ( region.LeftPosition > references.at(region.LeftRefID).RefLength ) {
+        SetErrorString("BamToolsIndex::Jump", "could not create index: invalid region requested");
+        return false;
+    }
+
+    // calculate nearest offset to jump to
+    int64_t offset;
+    try {
+        GetOffset(region, offset, hasAlignmentsInRegion);
+    } catch ( BamException& e ) {
+        m_errorString = e.what();
+        return false;
+    }
+
+    // return success/failure of seek
+    return m_reader->Seek(offset);
+}
+
+// loads existing data from file into memory
+bool BamToolsIndex::Load(const std::string& filename) {
+
+    try {
+
+        // attempt to open file (read-only)
+        OpenFile(filename, "rb");
+
+        // load metadata & generate in-memory summary
+        LoadHeader();
+        LoadFileSummary();
+
+        // return success
+        return true;
+
+    } catch ( BamException& e ) {
+        m_errorString = e.what();
+        return false;
+    }
+}
+
+void BamToolsIndex::LoadFileSummary(void) {
+
+    // load number of reference sequences
+    int numReferences;
+    LoadNumReferences(numReferences);
+
+    // initialize file summary data
+    InitializeFileSummary(numReferences);
+
+    // load summary for each reference
+    BtiFileSummary::iterator summaryIter = m_indexFileSummary.begin();
+    BtiFileSummary::iterator summaryEnd  = m_indexFileSummary.end();
+    for ( ; summaryIter != summaryEnd; ++summaryIter )
+        LoadReferenceSummary(*summaryIter);
+}
+
+void BamToolsIndex::LoadHeader(void) {
+
+    // check BTI file metadata
+    CheckMagicNumber();
+    CheckVersion();
+
+    // use file's BTI block size to set member variable
+    const size_t elementsRead = fread(&m_blockSize, sizeof(m_blockSize), 1, Resources.IndexStream);
+    if ( m_isBigEndian ) SwapEndian_32(m_blockSize);
+    if ( elementsRead != 1 )
+        throw BamException("BamToolsIndex::LoadHeader", "could not read BTI block size");
+}
+
+void BamToolsIndex::LoadNumBlocks(int& numBlocks) {
+    const size_t elementsRead = fread(&numBlocks, sizeof(numBlocks), 1, Resources.IndexStream);
+    if ( m_isBigEndian ) SwapEndian_32(numBlocks);
+    if ( elementsRead != 1 )
+        throw BamException("BamToolsIndex::LoadNumBlocks", "could not read number of BTI blocks");
+}
+
+void BamToolsIndex::LoadNumReferences(int& numReferences) {
+    const size_t elementsRead = fread(&numReferences, sizeof(numReferences), 1, Resources.IndexStream);
+    if ( m_isBigEndian ) SwapEndian_32(numReferences);
+    if ( elementsRead != 1 )
+        throw BamException("BamToolsIndex::LoadNumReferences", "could not read number of references");
+}
+
+void BamToolsIndex::LoadReferenceSummary(BtiReferenceSummary& refSummary) {
+
+    // load number of blocks
+    int numBlocks;
+    LoadNumBlocks(numBlocks);
+
+    // store block summary data for this reference
+    refSummary.NumBlocks = numBlocks;
+    refSummary.FirstBlockFilePosition = Tell();
+
+    // skip reference's blocks
+    SkipBlocks(numBlocks);
+}
+
+void BamToolsIndex::OpenFile(const std::string& filename, const char* mode) {
+
+    // make sure any previous index file is closed
+    CloseFile();
+
+    // attempt to open file
+    Resources.IndexStream = fopen(filename.c_str(), mode);
+    if ( !IsFileOpen() ) {
+        const string message = string("could not open file: ") + filename;
+        throw BamException("BamToolsIndex::OpenFile", message);
+    }
+}
+
+void BamToolsIndex::ReadBlock(BtiBlock& block) {
+
+    // read in block data members
+    size_t elementsRead = 0;
+    elementsRead += fread(&block.MaxEndPosition, sizeof(block.MaxEndPosition), 1, Resources.IndexStream);
+    elementsRead += fread(&block.StartOffset,    sizeof(block.StartOffset),    1, Resources.IndexStream);
+    elementsRead += fread(&block.StartPosition,  sizeof(block.StartPosition),  1, Resources.IndexStream);
+
+    // swap endian-ness if necessary
+    if ( m_isBigEndian ) {
+        SwapEndian_32(block.MaxEndPosition);
+        SwapEndian_64(block.StartOffset);
+        SwapEndian_32(block.StartPosition);
+    }
+
+    if ( elementsRead != 3 )
+        throw BamException("BamToolsIndex::ReadBlock", "could not read block");
+}
+
+void BamToolsIndex::ReadBlocks(const BtiReferenceSummary& refSummary, BtiBlockVector& blocks) {
+
+    // prep blocks container
+    blocks.clear();
+    blocks.reserve(refSummary.NumBlocks);
+
+    // skip to first block entry
+    Seek( refSummary.FirstBlockFilePosition, SEEK_SET );
+
+    // read & store block entries
+    BtiBlock block;
+    for ( int i = 0; i < refSummary.NumBlocks; ++i ) {
+        ReadBlock(block);
+        blocks.push_back(block);
+    }
+}
+
+void BamToolsIndex::ReadReferenceEntry(BtiReferenceEntry& refEntry) {
+
+    // return false if refId not valid index in file summary structure
+    if ( refEntry.ID < 0 || refEntry.ID >= (int)m_indexFileSummary.size() )
+        throw BamException("BamToolsIndex::ReadReferenceEntry", "invalid reference requested");
+
+    // use index summary to assist reading the reference's BTI blocks
+    const BtiReferenceSummary& refSummary = m_indexFileSummary.at(refEntry.ID);
+    ReadBlocks(refSummary, refEntry.Blocks);
+}
+
+void BamToolsIndex::Seek(const int64_t& position, const int& origin) {
+    if ( fseek64(Resources.IndexStream, position, origin) != 0 )
+        throw BamException("BamToolsIndex::Seek", "could not seek in BAI file");
+}
+
+void BamToolsIndex::SkipBlocks(const int& numBlocks) {
+    Seek( numBlocks*BamToolsIndex::SIZEOF_BLOCK, SEEK_CUR );
+}
+
+int64_t BamToolsIndex::Tell(void) const {
+    return ftell64(Resources.IndexStream);
+}
+
+void BamToolsIndex::WriteBlock(const BtiBlock& block) {
+
+    // copy entry data
+    int32_t maxEndPosition = block.MaxEndPosition;
+    int64_t startOffset    = block.StartOffset;
+    int32_t startPosition  = block.StartPosition;
+
+    // swap endian-ness if necessary
+    if ( m_isBigEndian ) {
+        SwapEndian_32(maxEndPosition);
+        SwapEndian_64(startOffset);
+        SwapEndian_32(startPosition);
+    }
+
+    // write the reference index entry
+    size_t elementsWritten = 0;
+    elementsWritten += fwrite(&maxEndPosition, sizeof(maxEndPosition), 1, Resources.IndexStream);
+    elementsWritten += fwrite(&startOffset,    sizeof(startOffset),    1, Resources.IndexStream);
+    elementsWritten += fwrite(&startPosition,  sizeof(startPosition),  1, Resources.IndexStream);
+    if ( elementsWritten != 3 )
+        throw BamException("BamToolsIndex::WriteBlock", "could not write BTI block");
+}
+
+void BamToolsIndex::WriteBlocks(const BtiBlockVector& blocks) {
+    BtiBlockVector::const_iterator blockIter = blocks.begin();
+    BtiBlockVector::const_iterator blockEnd  = blocks.end();
+    for ( ; blockIter != blockEnd; ++blockIter )
+        WriteBlock(*blockIter);
+}
+
+void BamToolsIndex::WriteHeader(void) {
+
+    size_t elementsWritten = 0;
+
+    // write BTI index format 'magic number'
+    elementsWritten += fwrite(BamToolsIndex::BTI_MAGIC, 1, 4, Resources.IndexStream);
+
+    // write BTI index format version
+    int32_t currentVersion = (int32_t)m_outputVersion;
+    if ( m_isBigEndian ) SwapEndian_32(currentVersion);
+    elementsWritten += fwrite(&currentVersion, sizeof(currentVersion), 1, Resources.IndexStream);
+
+    // write block size
+    uint32_t blockSize = m_blockSize;
+    if ( m_isBigEndian ) SwapEndian_32(blockSize);
+    elementsWritten += fwrite(&blockSize, sizeof(blockSize), 1, Resources.IndexStream);
+
+    // write number of references
+    int32_t numReferences = m_indexFileSummary.size();
+    if ( m_isBigEndian ) SwapEndian_32(numReferences);
+    elementsWritten += fwrite(&numReferences, sizeof(numReferences), 1, Resources.IndexStream);
+
+    if ( elementsWritten != 7 )
+        throw BamException("BamToolsIndex::WriteHeader", "could not write BTI header");
+}
+
+void BamToolsIndex::WriteReferenceEntry(const BtiReferenceEntry& refEntry) {
+
+    // write number of blocks this reference
+    uint32_t numBlocks = refEntry.Blocks.size();
+    if ( m_isBigEndian ) SwapEndian_32(numBlocks);
+    const size_t elementsWritten = fwrite(&numBlocks, sizeof(numBlocks), 1, Resources.IndexStream);
+    if ( elementsWritten != 1 )
+        throw BamException("BamToolsIndex::WriteReferenceEntry", "could not write number of blocks");
+
+    // write actual block entries
+    WriteBlocks(refEntry.Blocks);
+}
diff --git a/src/api/internal/index/BamToolsIndex_p.h b/src/api/internal/index/BamToolsIndex_p.h
new file mode 100644 (file)
index 0000000..7c1550b
--- /dev/null
@@ -0,0 +1,184 @@
+// ***************************************************************************
+// BamToolsIndex.h (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides index operations for the BamTools index format (".bti")
+// ***************************************************************************
+
+#ifndef BAMTOOLS_INDEX_FORMAT_H
+#define BAMTOOLS_INDEX_FORMAT_H
+
+//  -------------
+//  W A R N I N G
+//  -------------
+//
+// This file is not part of the BamTools API.  It exists purely as an
+// implementation detail.  This header file may change from version to
+// version without notice, or even be removed.
+//
+// We mean it.
+
+#include "api/BamAux.h"
+#include "api/BamIndex.h"
+#include <map>
+#include <string>
+#include <vector>
+
+namespace BamTools {
+namespace Internal {
+
+// contains data for each 'block' in a BTI index
+struct BtiBlock {
+
+    // data members
+    int32_t MaxEndPosition;
+    int64_t StartOffset;
+    int32_t StartPosition;
+
+    // ctor
+    BtiBlock(const int32_t& maxEndPosition = 0,
+             const int64_t& startOffset    = 0,
+             const int32_t& startPosition  = 0)
+        : MaxEndPosition(maxEndPosition)
+        , StartOffset(startOffset)
+        , StartPosition(startPosition)
+    { }
+};
+
+// convenience typedef for describing a a list of BTI blocks on a reference
+typedef std::vector<BtiBlock> BtiBlockVector;
+
+// contains all fields necessary for building, loading, & writing
+// full BTI index data for a single reference
+struct BtiReferenceEntry {
+
+    // data members
+    int32_t ID;
+    BtiBlockVector Blocks;
+
+    // ctor
+    BtiReferenceEntry(const int& id = -1)
+        : ID(id)
+    { }
+};
+
+// provides (persistent) summary of BtiReferenceEntry's index data
+struct BtiReferenceSummary {
+
+    // data members
+    int NumBlocks;
+    uint64_t FirstBlockFilePosition;
+
+    // ctor
+    BtiReferenceSummary(void)
+        : NumBlocks(0)
+        , FirstBlockFilePosition(0)
+    { }
+};
+
+// convenience typedef for describing a full BTI index file summary
+typedef std::vector<BtiReferenceSummary> BtiFileSummary;
+
+class BamToolsIndex : public BamIndex {
+
+    // keep a list of any supported versions here
+    // (might be useful later to handle any 'legacy' versions if the format changes)
+    // listed for example like: BTI_1_0 = 1, BTI_1_1 = 2, BTI_1_2 = 3, BTI_2_0 = 4, and so on
+    //
+    // so a change introduced in BTI_1_2 may be handled from then on by:
+    //
+    // if ( indexVersion >= BTI_1_2 )
+    //   do something new
+    // else
+    //   do the old thing
+    enum Version { BTI_1_0 = 1
+                 , BTI_1_1
+                 , BTI_1_2
+                 , BTI_2_0
+                 };
+
+    // ctor & dtor
+    public:
+        BamToolsIndex(Internal::BamReaderPrivate* reader);
+        ~BamToolsIndex(void);
+
+    // BamIndex implementation
+    public:
+        // builds index from associated BAM file & writes out to index file
+        bool Create(void);
+        // returns whether reference has alignments or no
+        bool HasAlignments(const int& referenceID) const;
+        // attempts to use index data to jump to @region, returns success/fail
+        // a "successful" jump indicates no error, but not whether this region has data
+        //   * thus, the method sets a flag to indicate whether there are alignments
+        //     available after the jump position
+        bool Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion);
+        // loads existing data from file into memory
+        bool Load(const std::string& filename);
+    public:
+        // returns format's file extension
+        static const std::string Extension(void);
+
+    // internal methods
+    private:
+
+        // index file ops
+        void CheckMagicNumber(void);
+        void CheckVersion(void);
+        void CloseFile(void);
+        bool IsFileOpen(void) const;
+        void OpenFile(const std::string& filename, const char* mode);
+        void Seek(const int64_t& position, const int& origin);
+        int64_t Tell(void) const;
+
+        // index-creation methods
+        void ClearReferenceEntry(BtiReferenceEntry& refEntry);
+        void WriteBlock(const BtiBlock& block);
+        void WriteBlocks(const BtiBlockVector& blocks);
+        void WriteHeader(void);
+        void WriteReferenceEntry(const BtiReferenceEntry& refEntry);
+
+        // random-access methods
+        void GetOffset(const BamRegion& region, int64_t& offset, bool* hasAlignmentsInRegion);
+        void ReadBlock(BtiBlock& block);
+        void ReadBlocks(const BtiReferenceSummary& refSummary, BtiBlockVector& blocks);
+        void ReadReferenceEntry(BtiReferenceEntry& refEntry);
+
+        // BTI summary data methods
+        void InitializeFileSummary(const int& numReferences);
+        void LoadFileSummary(void);
+        void LoadHeader(void);
+        void LoadNumBlocks(int& numBlocks);
+        void LoadNumReferences(int& numReferences);
+        void LoadReferenceSummary(BtiReferenceSummary& refSummary);
+        void SkipBlocks(const int& numBlocks);
+
+    // data members
+    private:
+        bool  m_isBigEndian;
+        BtiFileSummary m_indexFileSummary;
+        uint32_t m_blockSize;
+        int32_t m_inputVersion; // Version is serialized as int
+        Version m_outputVersion;
+
+        struct RaiiWrapper {
+            FILE* IndexStream;
+            RaiiWrapper(void);
+            ~RaiiWrapper(void);
+        };
+        RaiiWrapper Resources;
+
+    // static constants
+    private:
+        static const uint32_t DEFAULT_BLOCK_LENGTH;
+        static const std::string BTI_EXTENSION;
+        static const char* const BTI_MAGIC;
+        static const int SIZEOF_BLOCK;
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // BAMTOOLS_INDEX_FORMAT_H
diff --git a/src/api/internal/io/BamDeviceFactory_p.cpp b/src/api/internal/io/BamDeviceFactory_p.cpp
new file mode 100644 (file)
index 0000000..f9c7694
--- /dev/null
@@ -0,0 +1,37 @@
+// ***************************************************************************
+// BamDeviceFactory_p.cpp (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 25 September 2011 (DB)
+// ---------------------------------------------------------------------------
+// Creates built-in concrete implementations of IBamIODevices
+// ***************************************************************************
+
+#include "api/internal/io/BamDeviceFactory_p.h"
+#include "api/internal/io/BamFile_p.h"
+#include "api/internal/io/BamFtp_p.h"
+#include "api/internal/io/BamHttp_p.h"
+#include "api/internal/io/BamPipe_p.h"
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+#include <iostream>
+using namespace std;
+
+IBamIODevice* BamDeviceFactory::CreateDevice(const string& source) {
+
+    // check for requested pipe
+    if ( source == "-" || source == "stdin" || source == "stdout" )
+        return new BamPipe;
+
+    // check for HTTP prefix
+    if ( source.find("http://") == 0 )
+        return new BamHttp(source);
+
+    // check for FTP prefix
+    if ( source.find("ftp://") == 0 )
+        return new BamFtp(source);
+
+    // otherwise assume a "normal" file
+    return new BamFile(source);
+}
diff --git a/src/api/internal/io/BamDeviceFactory_p.h b/src/api/internal/io/BamDeviceFactory_p.h
new file mode 100644 (file)
index 0000000..1d48533
--- /dev/null
@@ -0,0 +1,37 @@
+// ***************************************************************************
+// BamDeviceFactory_p.h (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Creates built-in concrete implementations of IBamIODevices
+// ***************************************************************************
+
+#ifndef BAMDEVICEFACTORY_P_H
+#define BAMDEVICEFACTORY_P_H
+
+//  -------------
+//  W A R N I N G
+//  -------------
+//
+// This file is not part of the BamTools API.  It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include "api/IBamIODevice.h"
+#include <string>
+
+namespace BamTools {
+namespace Internal {
+
+class BamDeviceFactory {
+    public:
+        static IBamIODevice* CreateDevice(const std::string& source);
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // BAMDEVICEFACTORY_P_H
diff --git a/src/api/internal/io/BamFile_p.cpp b/src/api/internal/io/BamFile_p.cpp
new file mode 100644 (file)
index 0000000..94c919e
--- /dev/null
@@ -0,0 +1,67 @@
+// ***************************************************************************
+// BamFile_p.cpp (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 25 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides BAM file-specific IO behavior
+// ***************************************************************************
+
+#include "api/internal/io/BamFile_p.h"
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+#include <cstdio>
+#include <iostream>
+using namespace std;
+
+BamFile::BamFile(const string& filename)
+    : ILocalIODevice()
+    , m_filename(filename)
+{ }
+
+BamFile::~BamFile(void) { }
+
+void BamFile::Close(void) {
+    if ( IsOpen() ) {
+        m_filename.clear();
+        ILocalIODevice::Close();
+    }
+}
+
+bool BamFile::IsRandomAccess(void) const {
+    return true;
+}
+
+bool BamFile::Open(const IBamIODevice::OpenMode mode) {
+
+    // make sure we're starting with a fresh file stream
+    Close();
+
+    // attempt to open FILE* depending on requested openmode
+    if ( mode == IBamIODevice::ReadOnly )
+        m_stream = fopen(m_filename.c_str(), "rb");
+    else if ( mode == IBamIODevice::WriteOnly )
+        m_stream = fopen(m_filename.c_str(), "wb");
+    else {
+        SetErrorString("BamFile::Open", "unknown open mode requested");
+        return false;
+    }
+
+    // check that we obtained a valid FILE*
+    if ( m_stream == 0 ) {
+        const string message_base = string("could not open file handle for ");
+        const string message = message_base + ( (m_filename.empty()) ? "empty filename" : m_filename );
+        SetErrorString("BamFile::Open", message);
+        return false;
+    }
+
+    // store current IO mode & return success
+    m_mode = mode;
+    return true;
+}
+
+bool BamFile::Seek(const int64_t& position) {
+    BT_ASSERT_X( m_stream, "BamFile::Seek() - null stream" );
+    return ( fseek64(m_stream, position, SEEK_SET) == 0 );
+}
diff --git a/src/api/internal/io/BamFile_p.h b/src/api/internal/io/BamFile_p.h
new file mode 100644 (file)
index 0000000..bd7d64b
--- /dev/null
@@ -0,0 +1,51 @@
+// ***************************************************************************
+// BamFile_p.h (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 25 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides BAM file-specific IO behavior
+// ***************************************************************************
+
+#ifndef BAMFILE_P_H
+#define BAMFILE_P_H
+
+//  -------------
+//  W A R N I N G
+//  -------------
+//
+// This file is not part of the BamTools API.  It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include "api/internal/io/ILocalIODevice_p.h"
+#include <string>
+
+namespace BamTools {
+namespace Internal {
+
+class BamFile : public ILocalIODevice {
+
+    // ctor & dtor
+    public:
+        BamFile(const std::string& filename);
+        ~BamFile(void);
+
+    // ILocalIODevice implementation
+    public:
+        void Close(void);
+        bool IsRandomAccess(void) const;
+        bool Open(const IBamIODevice::OpenMode mode);
+        bool Seek(const int64_t& position);
+
+    // data members
+    private:
+        std::string m_filename;
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // BAMFILE_P_H
diff --git a/src/api/internal/io/BamFtp_p.cpp b/src/api/internal/io/BamFtp_p.cpp
new file mode 100644 (file)
index 0000000..f94d4ac
--- /dev/null
@@ -0,0 +1,56 @@
+// ***************************************************************************
+// BamFtp_p.cpp (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 25 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides reading/writing of BAM files on FTP server
+// ***************************************************************************
+
+#include "api/internal/io/BamFtp_p.h"
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+using namespace std;
+
+BamFtp::BamFtp(const string& url)
+    : IBamIODevice()
+{
+    BT_ASSERT_X(false, "BamFtp not yet implemented");
+}
+
+BamFtp::~BamFtp(void) { }
+
+void BamFtp::Close(void) {
+    return ;
+}
+
+bool BamFtp::IsRandomAccess(void) const {
+    return true;
+}
+
+bool BamFtp::Open(const IBamIODevice::OpenMode mode) {
+    (void) mode;
+    return true;
+}
+
+size_t BamFtp::Read(char* data, const unsigned int numBytes) {
+    (void)data;
+    (void)numBytes;
+    return 0;
+}
+
+bool BamFtp::Seek(const int64_t& position) {
+    (void)position;
+    return true;
+}
+
+int64_t BamFtp::Tell(void) const {
+    return -1;
+}
+
+size_t BamFtp::Write(const char* data, const unsigned int numBytes) {
+    (void)data;
+    (void)numBytes;
+    return 0;
+}
diff --git a/src/api/internal/io/BamFtp_p.h b/src/api/internal/io/BamFtp_p.h
new file mode 100644 (file)
index 0000000..1f5ee0f
--- /dev/null
@@ -0,0 +1,56 @@
+// ***************************************************************************
+// BamFtp_p.h (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides reading/writing of BAM files on FTP server
+// ***************************************************************************
+
+#ifndef BAMFTP_P_H
+#define BAMFTP_P_H
+
+//  -------------
+//  W A R N I N G
+//  -------------
+//
+// This file is not part of the BamTools API.  It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include "api/IBamIODevice.h"
+#include <string>
+
+namespace BamTools {
+namespace Internal {
+
+class BamFtp : public IBamIODevice {
+
+    // ctor & dtor
+    public:
+        BamFtp(const std::string& url);
+        ~BamFtp(void);
+
+    // IBamIODevice implementation
+    public:
+        void Close(void);
+        bool IsRandomAccess(void) const;
+        bool Open(const IBamIODevice::OpenMode mode);
+        size_t Read(char* data, const unsigned int numBytes);
+        bool Seek(const int64_t& position);
+        int64_t Tell(void) const;
+        size_t Write(const char* data, const unsigned int numBytes);
+
+    // internal methods
+    private:
+
+    // data members
+    private:
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // BAMFTP_P_H
diff --git a/src/api/internal/io/BamHttp_p.cpp b/src/api/internal/io/BamHttp_p.cpp
new file mode 100644 (file)
index 0000000..2892829
--- /dev/null
@@ -0,0 +1,56 @@
+// ***************************************************************************
+// BamHttp_p.cpp (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 25 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides reading/writing of BAM files on HTTP server
+// ***************************************************************************
+
+#include "api/internal/io/BamHttp_p.h"
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+using namespace std;
+
+BamHttp::BamHttp(const string& url)
+    : IBamIODevice()
+{
+    BT_ASSERT_X(false, "BamHttp not yet implemented");
+}
+
+BamHttp::~BamHttp(void) { }
+
+void BamHttp::Close(void) {
+    return ;
+}
+
+bool BamHttp::IsRandomAccess(void) const {
+    return true;
+}
+
+bool BamHttp::Open(const IBamIODevice::OpenMode mode) {
+    (void) mode;
+    return true;
+}
+
+size_t BamHttp::Read(char* data, const unsigned int numBytes) {
+    (void)data;
+    (void)numBytes;
+    return 0;
+}
+
+bool BamHttp::Seek(const int64_t& position) {
+    (void)position;
+    return true;
+}
+
+int64_t BamHttp::Tell(void) const {
+    return -1;
+}
+
+size_t BamHttp::Write(const char* data, const unsigned int numBytes) {
+    (void)data;
+    (void)numBytes;
+    return 0;
+}
diff --git a/src/api/internal/io/BamHttp_p.h b/src/api/internal/io/BamHttp_p.h
new file mode 100644 (file)
index 0000000..38e94b7
--- /dev/null
@@ -0,0 +1,56 @@
+// ***************************************************************************
+// BamHttp_p.h (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides reading/writing of BAM files on HTTP server
+// ***************************************************************************
+
+#ifndef BAMHTTP_P_H
+#define BAMHTTP_P_H
+
+//  -------------
+//  W A R N I N G
+//  -------------
+//
+// This file is not part of the BamTools API.  It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include "api/IBamIODevice.h"
+#include <string>
+
+namespace BamTools {
+namespace Internal {
+
+class BamHttp : public IBamIODevice {
+
+    // ctor & dtor
+    public:
+        BamHttp(const std::string& url);
+        ~BamHttp(void);
+
+    // IBamIODevice implementation
+    public:
+        void Close(void);
+        bool IsRandomAccess(void) const;
+        bool Open(const IBamIODevice::OpenMode mode);
+        size_t Read(char* data, const unsigned int numBytes);
+        bool Seek(const int64_t& position);
+        int64_t Tell(void) const;
+        size_t Write(const char* data, const unsigned int numBytes);
+
+    // internal methods
+    private:
+
+    // data members
+    private:
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // BAMHTTP_P_H
diff --git a/src/api/internal/io/BamPipe_p.cpp b/src/api/internal/io/BamPipe_p.cpp
new file mode 100644 (file)
index 0000000..40f1e10
--- /dev/null
@@ -0,0 +1,57 @@
+// ***************************************************************************
+// BamPipe_p.cpp (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 25 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides BAM pipe-specific IO behavior
+// ***************************************************************************
+
+#include "api/internal/io/BamPipe_p.h"
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+#include <cstdio>
+#include <iostream>
+using namespace std;
+
+BamPipe::BamPipe(void) : ILocalIODevice() { }
+
+BamPipe::~BamPipe(void) { }
+
+bool BamPipe::IsRandomAccess(void) const {
+    return false;
+}
+
+bool BamPipe::Open(const IBamIODevice::OpenMode mode) {
+
+    // make sure we're starting with a fresh pipe
+    Close();
+
+    // open stdin/stdout depending on requested openmode
+    if ( mode == IBamIODevice::ReadOnly )
+        m_stream = freopen(0, "rb", stdin);
+    else if ( mode == IBamIODevice::WriteOnly )
+        m_stream = freopen(0, "wb", stdout);
+    else {
+        SetErrorString("BamPipe::Open", "unknown open mode requested");
+        return false;
+    }
+
+    // check that we obtained a valid FILE*
+    if ( m_stream == 0 ) {
+        const string message_base = string("could not open handle on ");
+        const string message = message_base + ( (mode == IBamIODevice::ReadOnly) ? "stdin" : "stdout" );
+        SetErrorString("BamPipe::Open", message);
+        return false;
+    }
+
+    // store current IO mode & return success
+    m_mode = mode;
+    return true;
+}
+
+bool BamPipe::Seek(const int64_t& ) {
+    SetErrorString("BamPipe::Seek", "random access not allowed in FIFO pipe");
+    return false;
+}
diff --git a/src/api/internal/io/BamPipe_p.h b/src/api/internal/io/BamPipe_p.h
new file mode 100644 (file)
index 0000000..8e4e4c4
--- /dev/null
@@ -0,0 +1,46 @@
+// ***************************************************************************
+// BamPipe_p.h (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 25 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides BAM pipe-specific IO behavior
+// ***************************************************************************
+
+#ifndef BAMPIPE_P_H
+#define BAMPIPE_P_H
+
+//  -------------
+//  W A R N I N G
+//  -------------
+//
+// This file is not part of the BamTools API.  It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include "api/internal/io/ILocalIODevice_p.h"
+#include <string>
+
+namespace BamTools {
+namespace Internal {
+
+class BamPipe : public ILocalIODevice {
+
+    // ctor & dtor
+    public:
+        BamPipe(void);
+        ~BamPipe(void);
+
+    // IBamIODevice implementation
+    public:
+        bool IsRandomAccess(void) const;
+        bool Open(const IBamIODevice::OpenMode mode);
+        bool Seek(const int64_t& position);
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // BAMPIPE_P_H
diff --git a/src/api/internal/io/BgzfStream_p.cpp b/src/api/internal/io/BgzfStream_p.cpp
new file mode 100644 (file)
index 0000000..8b1aff6
--- /dev/null
@@ -0,0 +1,460 @@
+// ***************************************************************************
+// BgzfStream_p.cpp (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 25 October 2011(DB)
+// ---------------------------------------------------------------------------
+// Based on BGZF routines developed at the Broad Institute.
+// Provides the basic functionality for reading & writing BGZF files
+// Replaces the old BGZF.* files to avoid clashing with other toolkits
+// ***************************************************************************
+
+#include "api/BamAux.h"
+#include "api/BamConstants.h"
+#include "api/internal/io/BamDeviceFactory_p.h"
+#include "api/internal/io/BgzfStream_p.h"
+#include "api/internal/utils/BamException_p.h"
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+#include "zlib.h"
+
+#include <cstring>
+#include <algorithm>
+#include <iostream>
+#include <sstream>
+using namespace std;
+
+// ----------------------------
+// RaiiWrapper implementation
+// ----------------------------
+
+BgzfStream::RaiiWrapper::RaiiWrapper(void) {
+    CompressedBlock   = new char[Constants::BGZF_MAX_BLOCK_SIZE];
+    UncompressedBlock = new char[Constants::BGZF_DEFAULT_BLOCK_SIZE];
+}
+
+BgzfStream::RaiiWrapper::~RaiiWrapper(void) {
+
+    // clean up buffers
+    delete[] CompressedBlock;
+    delete[] UncompressedBlock;
+    CompressedBlock = 0;
+    UncompressedBlock = 0;
+}
+
+// ---------------------------
+// BgzfStream implementation
+// ---------------------------
+
+// constructor
+BgzfStream::BgzfStream(void)
+  : m_blockLength(0)
+  , m_blockOffset(0)
+  , m_blockAddress(0)
+  , m_isWriteCompressed(true)
+  , m_device(0)
+{ }
+
+// destructor
+BgzfStream::~BgzfStream(void) {
+    Close();
+}
+
+// checks BGZF block header
+bool BgzfStream::CheckBlockHeader(char* header) {
+    return (header[0] == Constants::GZIP_ID1 &&
+            header[1] == Constants::GZIP_ID2 &&
+            header[2] == Z_DEFLATED &&
+            (header[3] & Constants::FLG_FEXTRA) != 0 &&
+            BamTools::UnpackUnsignedShort(&header[10]) == Constants::BGZF_XLEN &&
+            header[12] == Constants::BGZF_ID1 &&
+            header[13] == Constants::BGZF_ID2 &&
+            BamTools::UnpackUnsignedShort(&header[14]) == Constants::BGZF_LEN );
+}
+
+// closes BGZF file
+void BgzfStream::Close(void) {
+
+    // skip if no device open
+    if ( m_device == 0 ) return;
+
+    // if writing to file, flush the current BGZF block,
+    // then write an empty block (as EOF marker)
+    if ( m_device->IsOpen() && (m_device->Mode() == IBamIODevice::WriteOnly) ) {
+        FlushBlock();
+        const size_t blockLength = DeflateBlock();
+        m_device->Write(Resources.CompressedBlock, blockLength);
+    }
+
+    // close device
+    m_device->Close();
+    delete m_device;
+    m_device = 0;
+
+    // reset state
+    m_blockLength = 0;
+    m_blockOffset = 0;
+    m_blockAddress = 0;
+    m_isWriteCompressed = true;
+}
+
+// compresses the current block
+size_t BgzfStream::DeflateBlock(void) {
+
+    // initialize the gzip header
+    char* buffer = Resources.CompressedBlock;
+    memset(buffer, 0, 18);
+    buffer[0]  = Constants::GZIP_ID1;
+    buffer[1]  = Constants::GZIP_ID2;
+    buffer[2]  = Constants::CM_DEFLATE;
+    buffer[3]  = Constants::FLG_FEXTRA;
+    buffer[9]  = Constants::OS_UNKNOWN;
+    buffer[10] = Constants::BGZF_XLEN;
+    buffer[12] = Constants::BGZF_ID1;
+    buffer[13] = Constants::BGZF_ID2;
+    buffer[14] = Constants::BGZF_LEN;
+
+    // set compression level
+    const int compressionLevel = ( m_isWriteCompressed ? Z_DEFAULT_COMPRESSION : 0 );
+
+    // loop to retry for blocks that do not compress enough
+    int inputLength = m_blockOffset;
+    size_t compressedLength = 0;
+    const unsigned int bufferSize = Constants::BGZF_MAX_BLOCK_SIZE;
+
+    while ( true ) {
+
+        // initialize zstream values
+        z_stream zs;
+        zs.zalloc    = NULL;
+        zs.zfree     = NULL;
+        zs.next_in   = (Bytef*)Resources.UncompressedBlock;
+        zs.avail_in  = inputLength;
+        zs.next_out  = (Bytef*)&buffer[Constants::BGZF_BLOCK_HEADER_LENGTH];
+        zs.avail_out = bufferSize -
+                       Constants::BGZF_BLOCK_HEADER_LENGTH -
+                       Constants::BGZF_BLOCK_FOOTER_LENGTH;
+
+        // initialize the zlib compression algorithm
+        int status = deflateInit2(&zs,
+                                  compressionLevel,
+                                  Z_DEFLATED,
+                                  Constants::GZIP_WINDOW_BITS,
+                                  Constants::Z_DEFAULT_MEM_LEVEL,
+                                  Z_DEFAULT_STRATEGY);
+        if ( status != Z_OK )
+            throw BamException("BgzfStream::DeflateBlock", "zlib deflateInit2 failed");
+
+        // compress the data
+        status = deflate(&zs, Z_FINISH);
+
+        // if not at stream end
+        if ( status != Z_STREAM_END ) {
+
+            deflateEnd(&zs);
+
+            // there was not enough space available in buffer
+            // try to reduce the input length & re-start loop
+            if ( status == Z_OK ) {
+                inputLength -= 1024;
+                if ( inputLength < 0 )
+                    throw BamException("BgzfStream::DeflateBlock", "input reduction failed");
+                continue;
+            }
+
+            throw BamException("BgzfStream::DeflateBlock", "zlib deflate failed");
+        }
+
+        // finalize the compression routine
+        status = deflateEnd(&zs);
+        if ( status != Z_OK )
+            throw BamException("BgzfStream::DeflateBlock", "zlib deflateEnd failed");
+
+        // update compressedLength
+        compressedLength = zs.total_out +
+                           Constants::BGZF_BLOCK_HEADER_LENGTH +
+                           Constants::BGZF_BLOCK_FOOTER_LENGTH;
+        if ( compressedLength > Constants::BGZF_MAX_BLOCK_SIZE )
+            throw BamException("BgzfStream::DeflateBlock", "deflate overflow");
+
+        // quit while loop
+        break;
+    }
+
+    // store the compressed length
+    BamTools::PackUnsignedShort(&buffer[16], static_cast<uint16_t>(compressedLength - 1));
+
+    // store the CRC32 checksum
+    uint32_t crc = crc32(0, NULL, 0);
+    crc = crc32(crc, (Bytef*)Resources.UncompressedBlock, inputLength);
+    BamTools::PackUnsignedInt(&buffer[compressedLength - 8], crc);
+    BamTools::PackUnsignedInt(&buffer[compressedLength - 4], inputLength);
+
+    // ensure that we have less than a block of data left
+    int remaining = m_blockOffset - inputLength;
+    if ( remaining > 0 ) {
+        if ( remaining > inputLength )
+            throw BamException("BgzfStream::DeflateBlock", "after deflate, remainder too large");
+        memcpy(Resources.UncompressedBlock, Resources.UncompressedBlock + inputLength, remaining);
+    }
+
+    // update block data
+    m_blockOffset = remaining;
+
+    // return result
+    return compressedLength;
+}
+
+// flushes the data in the BGZF block
+void BgzfStream::FlushBlock(void) {
+
+    BT_ASSERT_X( m_device, "BgzfStream::FlushBlock() - attempting to flush to null device" );
+
+    // flush all of the remaining blocks
+    while ( m_blockOffset > 0 ) {
+
+        // compress the data block
+        const size_t blockLength = DeflateBlock();
+
+        // flush the data to our output device
+        const size_t numBytesWritten = m_device->Write(Resources.CompressedBlock, blockLength);
+        if ( numBytesWritten != blockLength ) {
+            stringstream s("");
+            s << "expected to write " << blockLength
+              << " bytes during flushing, but wrote " << numBytesWritten;
+            throw BamException("BgzfStream::FlushBlock", s.str());
+        }
+
+        // update block data
+        m_blockAddress += blockLength;
+    }
+}
+
+// decompresses the current block
+size_t BgzfStream::InflateBlock(const size_t& blockLength) {
+
+    // setup zlib stream object
+    z_stream zs;
+    zs.zalloc    = NULL;
+    zs.zfree     = NULL;
+    zs.next_in   = (Bytef*)Resources.CompressedBlock + 18;
+    zs.avail_in  = blockLength - 16;
+    zs.next_out  = (Bytef*)Resources.UncompressedBlock;
+    zs.avail_out = Constants::BGZF_DEFAULT_BLOCK_SIZE;
+
+    // initialize
+    int status = inflateInit2(&zs, Constants::GZIP_WINDOW_BITS);
+    if ( status != Z_OK )
+        throw BamException("BgzfStream::InflateBlock", "zlib inflateInit failed");
+
+    // decompress
+    status = inflate(&zs, Z_FINISH);
+    if ( status != Z_STREAM_END ) {
+        inflateEnd(&zs);
+        throw BamException("BgzfStream::InflateBlock", "zlib inflate failed");
+    }
+
+    // finalize
+    status = inflateEnd(&zs);
+    if ( status != Z_OK ) {
+        inflateEnd(&zs);
+        throw BamException("BgzfStream::InflateBlock", "zlib inflateEnd failed");
+    }
+
+    // return result
+    return zs.total_out;
+}
+
+bool BgzfStream::IsOpen(void) const {
+    if ( m_device == 0 )
+        return false;
+    return m_device->IsOpen();
+}
+
+void BgzfStream::Open(const string& filename, const IBamIODevice::OpenMode mode) {
+
+    // close current device if necessary
+    Close();
+    BT_ASSERT_X( (m_device == 0), "BgzfStream::Open() - unable to properly close previous IO device" );
+
+    // retrieve new IO device depending on filename
+    m_device = BamDeviceFactory::CreateDevice(filename);
+    BT_ASSERT_X( m_device, "BgzfStream::Open() - unable to create IO device from filename" );
+
+    // if device fails to open
+    if ( !m_device->Open(mode) ) {
+        const string deviceError = m_device->GetErrorString();
+        const string message = string("could not open BGZF stream: \n\t") + deviceError;
+        throw BamException("BgzfStream::Open", message);
+    }
+}
+
+// reads BGZF data into a byte buffer
+size_t BgzfStream::Read(char* data, const size_t dataLength) {
+
+    if ( dataLength == 0 )
+        return 0;
+
+    // if stream not open for reading
+    BT_ASSERT_X( m_device, "BgzfStream::Read() - trying to read from null device");
+    if ( !m_device->IsOpen() || (m_device->Mode() != IBamIODevice::ReadOnly) )
+        return 0;
+
+    // read blocks as needed until desired data length is retrieved
+    char* output = data;
+    size_t numBytesRead = 0;
+    while ( numBytesRead < dataLength ) {
+
+        // determine bytes available in current block
+        int bytesAvailable = m_blockLength - m_blockOffset;
+
+        // read (and decompress) next block if needed
+        if ( bytesAvailable <= 0 ) {
+            ReadBlock();
+            bytesAvailable = m_blockLength - m_blockOffset;
+            if ( bytesAvailable <= 0 )
+                break;
+        }
+
+        // copy data from uncompressed source buffer into data destination buffer
+        const size_t copyLength = min( (dataLength-numBytesRead), (size_t)bytesAvailable );
+        memcpy(output, Resources.UncompressedBlock + m_blockOffset, copyLength);
+
+        // update counters
+        m_blockOffset += copyLength;
+        output        += copyLength;
+        numBytesRead  += copyLength;
+    }
+
+    // update block data
+    if ( m_blockOffset == m_blockLength ) {
+        m_blockAddress = m_device->Tell();
+        m_blockOffset  = 0;
+        m_blockLength  = 0;
+
+    }
+
+    // return actual number of bytes read
+    return numBytesRead;
+}
+
+// reads a BGZF block
+void BgzfStream::ReadBlock(void) {
+
+    BT_ASSERT_X( m_device, "BgzfStream::ReadBlock() - trying to read from null IO device");
+
+    // store block's starting address
+    int64_t blockAddress = m_device->Tell();
+
+    // read block header from file
+    char header[Constants::BGZF_BLOCK_HEADER_LENGTH];
+    size_t numBytesRead = m_device->Read(header, Constants::BGZF_BLOCK_HEADER_LENGTH);
+
+    // if block header empty
+    if ( numBytesRead == 0 ) {
+        m_blockLength = 0;
+        return;
+    }
+
+    // if block header invalid size
+    if ( numBytesRead != Constants::BGZF_BLOCK_HEADER_LENGTH )
+        throw BamException("BgzfStream::ReadBlock", "invalid block header size");
+
+    // validate block header contents
+    if ( !BgzfStream::CheckBlockHeader(header) )
+        throw BamException("BgzfStream::ReadBlock", "invalid block header contents");
+
+    // copy header contents to compressed buffer
+    const size_t blockLength = BamTools::UnpackUnsignedShort(&header[16]) + 1;
+    memcpy(Resources.CompressedBlock, header, Constants::BGZF_BLOCK_HEADER_LENGTH);
+
+    // read remainder of block
+    const size_t remaining = blockLength - Constants::BGZF_BLOCK_HEADER_LENGTH;
+    numBytesRead = m_device->Read(&Resources.CompressedBlock[Constants::BGZF_BLOCK_HEADER_LENGTH], remaining);
+    if ( numBytesRead != remaining )
+        throw BamException("BgzfStream::ReadBlock", "could not read data from block");
+
+    // decompress block data
+    numBytesRead = InflateBlock(blockLength);
+
+    // update block data
+    if ( m_blockLength != 0 )
+        m_blockOffset = 0;
+    m_blockAddress = blockAddress;
+    m_blockLength  = numBytesRead;
+}
+
+// seek to position in BGZF file
+void BgzfStream::Seek(const int64_t& position) {
+
+    BT_ASSERT_X( m_device, "BgzfStream::Seek() - trying to seek on null IO device");
+
+    // skip if device is not open
+    if ( !IsOpen() ) return;
+
+    // determine adjusted offset & address
+    int     blockOffset  = (position & 0xFFFF);
+    int64_t blockAddress = (position >> 16) & 0xFFFFFFFFFFFFLL;
+
+    // attempt seek in file
+    if ( m_device->IsRandomAccess() && m_device->Seek(blockAddress) ) {
+
+        // update block data & return success
+        m_blockLength  = 0;
+        m_blockAddress = blockAddress;
+        m_blockOffset  = blockOffset;
+    }
+    else {
+        stringstream s("");
+        s << "unable to seek to position: " << position;
+        throw BamException("BgzfStream::Seek", s.str());
+    }
+}
+
+void BgzfStream::SetWriteCompressed(bool ok) {
+    m_isWriteCompressed = ok;
+}
+
+// get file position in BGZF file
+int64_t BgzfStream::Tell(void) const {
+    if ( !IsOpen() )
+        return 0;
+    return ( (m_blockAddress << 16) | (m_blockOffset & 0xFFFF) );
+}
+
+// writes the supplied data into the BGZF buffer
+size_t BgzfStream::Write(const char* data, const size_t dataLength) {
+
+    BT_ASSERT_X( m_device, "BgzfStream::Write() - trying to write to null IO device");
+    BT_ASSERT_X( (m_device->Mode() == IBamIODevice::WriteOnly),
+                 "BgzfStream::Write() - trying to write to non-writable IO device");
+
+    // skip if file not open for writing
+    if ( !IsOpen() )
+        return 0;
+
+    // write blocks as needed til all data is written
+    size_t numBytesWritten = 0;
+    const char* input = data;
+    const size_t blockLength = Constants::BGZF_DEFAULT_BLOCK_SIZE;
+    while ( numBytesWritten < dataLength ) {
+
+        // copy data contents to uncompressed output buffer
+        unsigned int copyLength = min(blockLength - m_blockOffset, dataLength - numBytesWritten);
+        char* buffer = Resources.UncompressedBlock;
+        memcpy(buffer + m_blockOffset, input, copyLength);
+
+        // update counter
+        m_blockOffset   += copyLength;
+        input           += copyLength;
+        numBytesWritten += copyLength;
+
+        // flush (& compress) output buffer when full
+        if ( m_blockOffset == blockLength )
+            FlushBlock();
+    }
+
+    // return actual number of bytes written
+    return numBytesWritten;
+}
diff --git a/src/api/internal/io/BgzfStream_p.h b/src/api/internal/io/BgzfStream_p.h
new file mode 100644 (file)
index 0000000..88d7472
--- /dev/null
@@ -0,0 +1,97 @@
+// ***************************************************************************
+// BgzfStream_p.h (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 October 2011(DB)
+// ---------------------------------------------------------------------------
+// Based on BGZF routines developed at the Broad Institute.
+// Provides the basic functionality for reading & writing BGZF files
+// Replaces the old BGZF.* files to avoid clashing with other toolkits
+// ***************************************************************************
+
+#ifndef BGZFSTREAM_P_H
+#define BGZFSTREAM_P_H
+
+//  -------------
+//  W A R N I N G
+//  -------------
+//
+// This file is not part of the BamTools API.  It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include "api/api_global.h"
+#include "api/IBamIODevice.h"
+#include <string>
+
+namespace BamTools {
+namespace Internal {
+
+class BgzfStream {
+
+    // constructor & destructor
+    public:
+        BgzfStream(void);
+        ~BgzfStream(void);
+
+    // main interface methods
+    public:
+        // closes BGZF file
+        void Close(void);
+        // returns true if BgzfStream open for IO
+        bool IsOpen(void) const;
+        // opens the BGZF file
+        void Open(const std::string& filename, const IBamIODevice::OpenMode mode);
+        // reads BGZF data into a byte buffer
+        size_t Read(char* data, const size_t dataLength);
+        // seek to position in BGZF file
+        void Seek(const int64_t& position);
+        // sets IO device (closes previous, if any, but does not attempt to open)
+        void SetIODevice(IBamIODevice* device);
+        // enable/disable compressed output
+        void SetWriteCompressed(bool ok);
+        // get file position in BGZF file
+        int64_t Tell(void) const;
+        // writes the supplied data into the BGZF buffer
+        size_t Write(const char* data, const size_t dataLength);
+
+    // internal methods
+    private:
+        // compresses the current block
+        size_t DeflateBlock(void);
+        // flushes the data in the BGZF block
+        void FlushBlock(void);
+        // de-compresses the current block
+        size_t InflateBlock(const size_t& blockLength);
+        // reads a BGZF block
+        void ReadBlock(void);
+
+    // static 'utility' methods
+    public:
+        // checks BGZF block header
+        static bool CheckBlockHeader(char* header);
+
+    // data members
+    public:
+        unsigned int m_blockLength;
+        unsigned int m_blockOffset;
+        uint64_t     m_blockAddress;
+
+        bool m_isWriteCompressed;
+        IBamIODevice* m_device;
+
+        struct RaiiWrapper {
+            RaiiWrapper(void);
+            ~RaiiWrapper(void);
+            char* UncompressedBlock;
+            char* CompressedBlock;
+        };
+        RaiiWrapper Resources;
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // BGZFSTREAM_P_H
diff --git a/src/api/internal/io/ILocalIODevice_p.cpp b/src/api/internal/io/ILocalIODevice_p.cpp
new file mode 100644 (file)
index 0000000..63a3bee
--- /dev/null
@@ -0,0 +1,56 @@
+// ***************************************************************************
+// ILocalIODevice_p.cpp (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 25 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides shared behavior for files & pipes
+// ***************************************************************************
+
+#include "api/internal/io/ILocalIODevice_p.h"
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+#include <cstdio>
+using namespace std;
+
+ILocalIODevice::ILocalIODevice(void)
+    : IBamIODevice()
+    , m_stream(0)
+{ }
+
+ILocalIODevice::~ILocalIODevice(void) {
+    Close();
+}
+
+void ILocalIODevice::Close(void) {
+
+    // skip if not open
+    if ( !IsOpen() )
+        return;
+
+    // flush & close FILE*
+    fflush(m_stream);
+    fclose(m_stream);
+    m_stream = 0;
+
+    // reset other device state
+    m_mode = IBamIODevice::NotOpen;
+}
+
+size_t ILocalIODevice::Read(char* data, const unsigned int numBytes) {
+    BT_ASSERT_X( m_stream, "ILocalIODevice::Read: trying to read from null stream" );
+    BT_ASSERT_X( (m_mode == IBamIODevice::ReadOnly), "ILocalIODevice::Read: device not in read-only mode");
+    return fread(data, sizeof(char), numBytes, m_stream);
+}
+
+int64_t ILocalIODevice::Tell(void) const {
+    BT_ASSERT_X( m_stream, "ILocalIODevice::Tell: trying to get file position fromnull stream" );
+    return ftell64(m_stream);
+}
+
+size_t ILocalIODevice::Write(const char* data, const unsigned int numBytes) {
+    BT_ASSERT_X( m_stream, "ILocalIODevice::Write: tryint to write to null stream" );
+    BT_ASSERT_X( (m_mode == IBamIODevice::WriteOnly), "ILocalIODevice::Write: device not in write-only mode" );
+    return fwrite(data, sizeof(char), numBytes, m_stream);
+}
diff --git a/src/api/internal/io/ILocalIODevice_p.h b/src/api/internal/io/ILocalIODevice_p.h
new file mode 100644 (file)
index 0000000..a71f378
--- /dev/null
@@ -0,0 +1,50 @@
+// ***************************************************************************
+// ILocalIODevice_p.h (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides shared behavior for files & pipes
+// ***************************************************************************
+
+#ifndef ILOCALIODEVICE_P_H
+#define ILOCALIODEVICE_P_H
+
+//  -------------
+//  W A R N I N G
+//  -------------
+//
+// This file is not part of the BamTools API.  It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include "api/IBamIODevice.h"
+
+namespace BamTools {
+namespace Internal {
+
+class ILocalIODevice : public IBamIODevice {
+
+    // ctor & dtor
+    public:
+        ILocalIODevice(void);
+        virtual ~ILocalIODevice(void);
+
+    // IBamIODevice implementation
+    public:
+        virtual void Close(void);
+        virtual size_t Read(char* data, const unsigned int numBytes);
+        virtual int64_t Tell(void) const;
+        virtual size_t Write(const char* data, const unsigned int numBytes);
+
+    // data members
+    protected:
+        FILE* m_stream;
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // ILOCALIODEVICE_P_H
diff --git a/src/api/internal/io/IRemoteIODevice_p.cpp b/src/api/internal/io/IRemoteIODevice_p.cpp
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/src/api/internal/io/IRemoteIODevice_p.h b/src/api/internal/io/IRemoteIODevice_p.h
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/src/api/internal/sam/SamFormatParser_p.cpp b/src/api/internal/sam/SamFormatParser_p.cpp
new file mode 100644 (file)
index 0000000..74c1fed
--- /dev/null
@@ -0,0 +1,222 @@
+// ***************************************************************************
+// SamFormatParser.cpp (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 25 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides functionality for parsing SAM header text into SamHeader object
+// ***************************************************************************
+
+#include "api/SamConstants.h"
+#include "api/SamHeader.h"
+#include "api/internal/sam/SamFormatParser_p.h"
+#include "api/internal/utils/BamException_p.h"
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+#include <iostream>
+#include <sstream>
+#include <vector>
+using namespace std;
+
+SamFormatParser::SamFormatParser(SamHeader& header)
+    : m_header(header)
+{ }
+
+SamFormatParser::~SamFormatParser(void) { }
+
+void SamFormatParser::Parse(const string& headerText) {
+
+    // clear header's prior contents
+    m_header.Clear();
+
+    // empty header is OK, but skip processing
+    if ( headerText.empty() )
+        return;
+
+    // other wise parse SAM lines
+    istringstream headerStream(headerText);
+    string headerLine("");
+    while ( getline(headerStream, headerLine) )
+         ParseSamLine(headerLine);
+}
+
+void SamFormatParser::ParseSamLine(const string& line) {
+
+    // skip if line is not long enough to contain true values
+    if ( line.length() < 5 ) return;
+
+    // determine token at beginning of line
+    const string firstToken = line.substr(0,3);
+    string restOfLine = line.substr(4);
+    if      ( firstToken == Constants::SAM_HD_BEGIN_TOKEN) ParseHDLine(restOfLine);
+    else if ( firstToken == Constants::SAM_SQ_BEGIN_TOKEN) ParseSQLine(restOfLine);
+    else if ( firstToken == Constants::SAM_RG_BEGIN_TOKEN) ParseRGLine(restOfLine);
+    else if ( firstToken == Constants::SAM_PG_BEGIN_TOKEN) ParsePGLine(restOfLine);
+    else if ( firstToken == Constants::SAM_CO_BEGIN_TOKEN) ParseCOLine(restOfLine);
+    else {
+        const string message = string("unknown token: ") + firstToken;
+        throw BamException("SamFormatParser::ParseSamLine", message);
+    }
+}
+
+void SamFormatParser::ParseHDLine(const string& line) {
+
+    // split HD lines into tokens
+    vector<string> tokens = Split(line, Constants::SAM_TAB);
+
+    // iterate over tokens
+    vector<string>::const_iterator tokenIter = tokens.begin();
+    vector<string>::const_iterator tokenEnd  = tokens.end();
+    for ( ; tokenIter != tokenEnd; ++tokenIter ) {
+
+        // get tag/value
+        const string tokenTag = (*tokenIter).substr(0,2);
+        const string tokenValue = (*tokenIter).substr(3);
+
+        // set header contents
+        if      ( tokenTag == Constants::SAM_HD_VERSION_TAG    ) m_header.Version    = tokenValue;
+        else if ( tokenTag == Constants::SAM_HD_SORTORDER_TAG  ) m_header.SortOrder  = tokenValue;
+        else if ( tokenTag == Constants::SAM_HD_GROUPORDER_TAG ) m_header.GroupOrder = tokenValue;
+        else {
+            const string message = string("unknown HD tag: ") + tokenTag;
+            throw BamException("SamFormatParser::ParseHDLine", message);
+        }
+    }
+
+    // check for required tags
+    if ( !m_header.HasVersion() )
+        throw BamException("SamFormatParser::ParseHDLine", "@HD line is missing VN tag");
+}
+
+void SamFormatParser::ParseSQLine(const string& line) {
+
+    SamSequence seq;
+
+    // split SQ line into tokens
+    vector<string> tokens = Split(line, Constants::SAM_TAB);
+
+    // iterate over tokens
+    vector<string>::const_iterator tokenIter = tokens.begin();
+    vector<string>::const_iterator tokenEnd  = tokens.end();
+    for ( ; tokenIter != tokenEnd; ++tokenIter ) {
+
+        // get tag/value
+        const string tokenTag = (*tokenIter).substr(0,2);
+        const string tokenValue = (*tokenIter).substr(3);
+
+        // set sequence contents
+        if      ( tokenTag == Constants::SAM_SQ_NAME_TAG       ) seq.Name = tokenValue;
+        else if ( tokenTag == Constants::SAM_SQ_LENGTH_TAG     ) seq.Length = tokenValue;
+        else if ( tokenTag == Constants::SAM_SQ_ASSEMBLYID_TAG ) seq.AssemblyID = tokenValue;
+        else if ( tokenTag == Constants::SAM_SQ_CHECKSUM_TAG   ) seq.Checksum = tokenValue;
+        else if ( tokenTag == Constants::SAM_SQ_SPECIES_TAG    ) seq.Species = tokenValue;
+        else if ( tokenTag == Constants::SAM_SQ_URI_TAG        ) seq.URI = tokenValue;
+        else {
+            const string message = string("unknown SQ tag: ") + tokenTag;
+            throw BamException("SamFormatParser::ParseSQLine", message);
+        }
+    }
+
+    // check for required tags
+    if ( !seq.HasName() )
+        throw BamException("SamFormatParser::ParseSQLine", "@SQ line is missing SN tag");
+    if ( !seq.HasLength() )
+        throw BamException("SamFormatParser::ParseSQLine", "@SQ line is missing LN tag");
+
+    // store SAM sequence entry
+    m_header.Sequences.Add(seq);
+}
+
+void SamFormatParser::ParseRGLine(const string& line) {
+
+    SamReadGroup rg;
+
+    // split string into tokens
+    vector<string> tokens = Split(line, Constants::SAM_TAB);
+
+    // iterate over tokens
+    vector<string>::const_iterator tokenIter = tokens.begin();
+    vector<string>::const_iterator tokenEnd  = tokens.end();
+    for ( ; tokenIter != tokenEnd; ++tokenIter ) {
+
+        // get token tag/value
+        const string tokenTag = (*tokenIter).substr(0,2);
+        const string tokenValue = (*tokenIter).substr(3);
+
+        // set read group contents
+        if      ( tokenTag == Constants::SAM_RG_ID_TAG                  ) rg.ID = tokenValue;
+        else if ( tokenTag == Constants::SAM_RG_DESCRIPTION_TAG         ) rg.Description = tokenValue;
+        else if ( tokenTag == Constants::SAM_RG_FLOWORDER_TAG           ) rg.FlowOrder = tokenValue;
+        else if ( tokenTag == Constants::SAM_RG_KEYSEQUENCE_TAG         ) rg.KeySequence = tokenValue;
+        else if ( tokenTag == Constants::SAM_RG_LIBRARY_TAG             ) rg.Library = tokenValue;
+        else if ( tokenTag == Constants::SAM_RG_PLATFORMUNIT_TAG        ) rg.PlatformUnit = tokenValue;
+        else if ( tokenTag == Constants::SAM_RG_PREDICTEDINSERTSIZE_TAG ) rg.PredictedInsertSize = tokenValue;
+        else if ( tokenTag == Constants::SAM_RG_PRODUCTIONDATE_TAG      ) rg.ProductionDate = tokenValue;
+        else if ( tokenTag == Constants::SAM_RG_PROGRAM_TAG             ) rg.Program = tokenValue;
+        else if ( tokenTag == Constants::SAM_RG_SAMPLE_TAG              ) rg.Sample = tokenValue;
+        else if ( tokenTag == Constants::SAM_RG_SEQCENTER_TAG           ) rg.SequencingCenter = tokenValue;
+        else if ( tokenTag == Constants::SAM_RG_SEQTECHNOLOGY_TAG       ) rg.SequencingTechnology = tokenValue;
+        else {
+            const string message = string("unknown RG tag: ") + tokenTag;
+            throw BamException("SamFormatParser::ParseRGLine", message);
+        }
+    }
+
+    // check for required tags
+    if ( !rg.HasID() )
+        throw BamException("SamFormatParser::ParseRGLine", "@RG line is missing ID tag");
+
+    // store SAM read group entry
+    m_header.ReadGroups.Add(rg);
+}
+
+void SamFormatParser::ParsePGLine(const string& line) {
+
+    SamProgram pg;
+
+    // split string into tokens
+    vector<string> tokens = Split(line, Constants::SAM_TAB);
+
+    // iterate over tokens
+    vector<string>::const_iterator tokenIter = tokens.begin();
+    vector<string>::const_iterator tokenEnd  = tokens.end();
+    for ( ; tokenIter != tokenEnd; ++tokenIter ) {
+
+        // get token tag/value
+        const string tokenTag = (*tokenIter).substr(0,2);
+        const string tokenValue = (*tokenIter).substr(3);
+
+        // set program record contents
+        if      ( tokenTag == Constants::SAM_PG_ID_TAG              ) pg.ID = tokenValue;
+        else if ( tokenTag == Constants::SAM_PG_NAME_TAG            ) pg.Name = tokenValue;
+        else if ( tokenTag == Constants::SAM_PG_COMMANDLINE_TAG     ) pg.CommandLine = tokenValue;
+        else if ( tokenTag == Constants::SAM_PG_PREVIOUSPROGRAM_TAG ) pg.PreviousProgramID = tokenValue;
+        else if ( tokenTag == Constants::SAM_PG_VERSION_TAG         ) pg.Version = tokenValue;
+        else {
+            const string message = string("unknown PG tag: ") + tokenTag;
+            throw BamException("SamFormatParser::ParsePGLine", message);
+        }
+    }
+
+    // check for required tags
+    if ( !pg.HasID() )
+        throw BamException("SamFormatParser::ParsePGLine", "@PG line is missing ID tag");
+
+    // store SAM program entry
+    m_header.Programs.Add(pg);
+}
+
+void SamFormatParser::ParseCOLine(const string& line) {
+    // simply add line to comments list
+    m_header.Comments.push_back(line);
+}
+
+const vector<string> SamFormatParser::Split(const string& line, const char delim) {
+    vector<string> tokens;
+    stringstream lineStream(line);
+    string token;
+    while ( getline(lineStream, token, delim) )
+        tokens.push_back(token);
+    return tokens;
+}
diff --git a/src/api/internal/sam/SamFormatParser_p.h b/src/api/internal/sam/SamFormatParser_p.h
new file mode 100644 (file)
index 0000000..cf6d54c
--- /dev/null
@@ -0,0 +1,61 @@
+// ***************************************************************************
+// SamFormatParser.h (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 23 December 2010 (DB)
+// ---------------------------------------------------------------------------
+// Provides functionality for parsing SAM header text into SamHeader object
+// ***************************************************************************
+
+#ifndef SAM_FORMAT_PARSER_H
+#define SAM_FORMAT_PARSER_H
+
+//  -------------
+//  W A R N I N G
+//  -------------
+//
+// This file is not part of the BamTools API.  It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include <string>
+#include <vector>
+
+namespace BamTools {
+
+class SamHeader;
+
+namespace Internal {
+
+class SamFormatParser {
+
+    // ctor & dtor
+    public:
+        SamFormatParser(BamTools::SamHeader& header);
+        ~SamFormatParser(void);
+
+    // parse text & populate header data
+    public:
+        void Parse(const std::string& headerText);
+
+    // internal methods
+    private:
+        void ParseSamLine(const std::string& line);
+        void ParseHDLine(const std::string& line);
+        void ParseSQLine(const std::string& line);
+        void ParseRGLine(const std::string& line);
+        void ParsePGLine(const std::string& line);
+        void ParseCOLine(const std::string& line);
+        const std::vector<std::string> Split(const std::string& line, const char delim);
+
+    // data members
+    private:
+        SamHeader& m_header;
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // SAM_FORMAT_PARSER_H
diff --git a/src/api/internal/sam/SamFormatPrinter_p.cpp b/src/api/internal/sam/SamFormatPrinter_p.cpp
new file mode 100644 (file)
index 0000000..5a51a2f
--- /dev/null
@@ -0,0 +1,219 @@
+// ***************************************************************************
+// SamFormatPrinter.cpp (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 25 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides functionality for printing formatted SAM header to string
+// ***************************************************************************
+
+#include "api/SamConstants.h"
+#include "api/SamHeader.h"
+#include "api/internal/sam/SamFormatPrinter_p.h"
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+#include <iostream>
+#include <sstream>
+#include <vector>
+using namespace std;
+
+// ------------------------
+// static utility methods
+// ------------------------
+
+static inline
+const string FormatTag(const string& tag, const string& value) {
+    return string(Constants::SAM_TAB + tag + Constants::SAM_COLON + value);
+}
+
+// ---------------------------------
+// SamFormatPrinter implementation
+// ---------------------------------
+
+SamFormatPrinter::SamFormatPrinter(const SamHeader& header)
+    : m_header(header)
+{ }
+
+SamFormatPrinter::~SamFormatPrinter(void) { }
+
+const string SamFormatPrinter::ToString(void) const {
+
+    // clear out stream
+    stringstream out("");
+
+    // generate formatted header text
+    PrintHD(out);
+    PrintSQ(out);
+    PrintRG(out);
+    PrintPG(out);
+    PrintCO(out);
+
+    // return result
+    return out.str();
+}
+
+void SamFormatPrinter::PrintHD(std::stringstream& out) const {
+
+    // if header has @HD data
+    if ( m_header.HasVersion() ) {
+
+        // @HD VN:<Version>
+        out << Constants::SAM_HD_BEGIN_TOKEN
+            << FormatTag(Constants::SAM_HD_VERSION_TAG, m_header.Version);
+
+        // SO:<SortOrder>
+        if ( m_header.HasSortOrder() )
+            out << FormatTag(Constants::SAM_HD_SORTORDER_TAG, m_header.SortOrder);
+
+        // GO:<GroupOrder>
+        if ( m_header.HasGroupOrder() )
+            out << FormatTag(Constants::SAM_HD_GROUPORDER_TAG, m_header.GroupOrder);
+
+        // newline
+        out << endl;
+    }
+}
+
+void SamFormatPrinter::PrintSQ(std::stringstream& out) const {
+
+    // iterate over sequence entries
+    SamSequenceConstIterator seqIter = m_header.Sequences.ConstBegin();
+    SamSequenceConstIterator seqEnd  = m_header.Sequences.ConstEnd();
+    for ( ; seqIter != seqEnd; ++seqIter ) {
+        const SamSequence& seq = (*seqIter);
+
+        // @SQ SN:<Name> LN:<Length>
+        out << Constants::SAM_SQ_BEGIN_TOKEN
+            << FormatTag(Constants::SAM_SQ_NAME_TAG, seq.Name)
+            << FormatTag(Constants::SAM_SQ_LENGTH_TAG, seq.Length);
+
+        // AS:<AssemblyID>
+        if ( seq.HasAssemblyID() )
+            out << FormatTag(Constants::SAM_SQ_ASSEMBLYID_TAG, seq.AssemblyID);
+
+        // M5:<Checksum>
+        if ( seq.HasChecksum() )
+            out << FormatTag(Constants::SAM_SQ_CHECKSUM_TAG, seq.Checksum);
+
+        // SP:<Species>
+        if ( seq.HasSpecies() )
+            out << FormatTag(Constants::SAM_SQ_SPECIES_TAG, seq.Species);
+
+        // UR:<URI>
+        if ( seq.HasURI() )
+            out << FormatTag(Constants::SAM_SQ_URI_TAG, seq.URI);
+
+        // newline
+        out << endl;
+    }
+}
+
+void SamFormatPrinter::PrintRG(std::stringstream& out) const {
+
+    // iterate over read group entries
+    SamReadGroupConstIterator rgIter = m_header.ReadGroups.ConstBegin();
+    SamReadGroupConstIterator rgEnd  = m_header.ReadGroups.ConstEnd();
+    for ( ; rgIter != rgEnd; ++rgIter ) {
+        const SamReadGroup& rg = (*rgIter);
+
+        // @RG ID:<ID>
+        out << Constants::SAM_RG_BEGIN_TOKEN
+            << FormatTag(Constants::SAM_RG_ID_TAG, rg.ID);
+
+        // CN:<SequencingCenter>
+        if ( rg.HasSequencingCenter() )
+            out << FormatTag(Constants::SAM_RG_SEQCENTER_TAG, rg.SequencingCenter);
+
+        // DS:<Description>
+        if ( rg.HasDescription() )
+            out << FormatTag(Constants::SAM_RG_DESCRIPTION_TAG, rg.Description);
+
+        // DT:<ProductionDate>
+        if ( rg.HasProductionDate() )
+            out << FormatTag(Constants::SAM_RG_PRODUCTIONDATE_TAG, rg.ProductionDate);
+
+        // FO:<FlowOrder>
+        if ( rg.HasFlowOrder() )
+            out << FormatTag(Constants::SAM_RG_FLOWORDER_TAG, rg.FlowOrder);
+
+        // KS:<KeySequence>
+        if ( rg.HasKeySequence() )
+            out << FormatTag(Constants::SAM_RG_KEYSEQUENCE_TAG, rg.KeySequence);
+
+        // LB:<Library>
+        if ( rg.HasLibrary() )
+            out << FormatTag(Constants::SAM_RG_LIBRARY_TAG, rg.Library);
+
+        // PG:<Program>
+        if ( rg.HasProgram() )
+            out << FormatTag(Constants::SAM_RG_PROGRAM_TAG, rg.Program);
+
+        // PI:<PredictedInsertSize>
+        if ( rg.HasPredictedInsertSize() )
+            out << FormatTag(Constants::SAM_RG_PREDICTEDINSERTSIZE_TAG, rg.PredictedInsertSize);
+
+        // PL:<SequencingTechnology>
+        if ( rg.HasSequencingTechnology() )
+            out << FormatTag(Constants::SAM_RG_SEQTECHNOLOGY_TAG, rg.SequencingTechnology);
+
+        // PU:<PlatformUnit>
+        if ( rg.HasPlatformUnit() )
+            out << FormatTag(Constants::SAM_RG_PLATFORMUNIT_TAG, rg.PlatformUnit);
+
+        // SM:<Sample>
+        if ( rg.HasSample() )
+            out << FormatTag(Constants::SAM_RG_SAMPLE_TAG, rg.Sample);
+
+        // newline
+        out << endl;
+    }
+}
+
+void SamFormatPrinter::PrintPG(std::stringstream& out) const {
+
+    // iterate over program record entries
+    SamProgramConstIterator pgIter = m_header.Programs.ConstBegin();
+    SamProgramConstIterator pgEnd  = m_header.Programs.ConstEnd();
+    for ( ; pgIter != pgEnd; ++pgIter ) {
+        const SamProgram& pg = (*pgIter);
+
+        // @PG ID:<ID>
+        out << Constants::SAM_PG_BEGIN_TOKEN
+            << FormatTag(Constants::SAM_PG_ID_TAG, pg.ID);
+
+        // PN:<Name>
+        if ( pg.HasName() )
+            out << FormatTag(Constants::SAM_PG_NAME_TAG, pg.Name);
+
+        // CL:<CommandLine>
+        if ( pg.HasCommandLine() )
+            out << FormatTag(Constants::SAM_PG_COMMANDLINE_TAG, pg.CommandLine);
+
+        // PP:<PreviousProgramID>
+        if ( pg.HasPreviousProgramID() )
+            out << FormatTag(Constants::SAM_PG_PREVIOUSPROGRAM_TAG, pg.PreviousProgramID);
+
+        // VN:<Version>
+        if ( pg.HasVersion() )
+            out << FormatTag(Constants::SAM_PG_VERSION_TAG, pg.Version);
+
+        // newline
+        out << endl;
+    }
+}
+
+void SamFormatPrinter::PrintCO(std::stringstream& out) const {
+
+    // iterate over comments
+    vector<string>::const_iterator commentIter = m_header.Comments.begin();
+    vector<string>::const_iterator commentEnd  = m_header.Comments.end();
+    for ( ; commentIter != commentEnd; ++commentIter ) {
+
+        // @CO <Comment>
+        out << Constants::SAM_CO_BEGIN_TOKEN
+            << Constants::SAM_TAB
+            << (*commentIter)
+            << endl;
+    }
+}
diff --git a/src/api/internal/sam/SamFormatPrinter_p.h b/src/api/internal/sam/SamFormatPrinter_p.h
new file mode 100644 (file)
index 0000000..ea29181
--- /dev/null
@@ -0,0 +1,59 @@
+// ***************************************************************************
+// SamFormatPrinter.h (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 6 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides functionality for printing formatted SAM header to string
+// ***************************************************************************
+
+#ifndef SAM_FORMAT_PRINTER_H
+#define SAM_FORMAT_PRINTER_H
+
+//  -------------
+//  W A R N I N G
+//  -------------
+//
+// This file is not part of the BamTools API.  It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include <sstream>
+#include <string>
+
+namespace BamTools {
+
+class SamHeader;
+
+namespace Internal {
+
+class SamFormatPrinter {
+
+    // ctor & dtor
+    public:
+        SamFormatPrinter(const BamTools::SamHeader& header);
+        ~SamFormatPrinter(void);
+
+    // generates SAM-formatted string from header data
+    public:
+        const std::string ToString(void) const;
+
+    // internal methods
+    private:
+        void PrintHD(std::stringstream& out) const;
+        void PrintSQ(std::stringstream& out) const;
+        void PrintRG(std::stringstream& out) const;
+        void PrintPG(std::stringstream& out) const;
+        void PrintCO(std::stringstream& out) const;
+
+    // data members
+    private:
+        const SamHeader& m_header;
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // SAM_FORMAT_PRINTER_H
diff --git a/src/api/internal/sam/SamHeaderValidator_p.cpp b/src/api/internal/sam/SamHeaderValidator_p.cpp
new file mode 100644 (file)
index 0000000..6bcb8a9
--- /dev/null
@@ -0,0 +1,524 @@
+// ***************************************************************************
+// SamHeaderValidator.cpp (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 25 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides functionality for validating SamHeader data
+// ***************************************************************************
+
+#include "api/SamConstants.h"
+#include "api/SamHeader.h"
+#include "api/internal/sam/SamHeaderValidator_p.h"
+#include "api/internal/sam/SamHeaderVersion_p.h"
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+#include <cctype>
+#include <set>
+#include <sstream>
+using namespace std;
+
+// ------------------------
+// static utility methods
+// -------------------------
+
+static
+bool caseInsensitiveCompare(const string& lhs, const string& rhs) {
+
+    // can omit checking chars if lengths not equal
+    const int lhsLength = lhs.length();
+    const int rhsLength = rhs.length();
+    if ( lhsLength != rhsLength )
+        return false;
+
+    // do *basic* toupper checks on each string char's
+    for ( int i = 0; i < lhsLength; ++i ) {
+        if ( toupper( (int)lhs.at(i)) != toupper( (int)rhs.at(i)) )
+            return false;
+    }
+
+    // otherwise OK
+    return true;
+}
+
+// ------------------------------------------------------------------------
+// Allow validation rules to vary, as needed, between SAM header versions
+//
+// use SAM_VERSION_X_Y to tag important changes
+//
+// Together, they will allow for comparisons like:
+// if ( m_version < SAM_VERSION_2_0 ) {
+//     // use some older rule
+// else
+//     // use rule introduced with version 2.0
+
+static const SamHeaderVersion SAM_VERSION_1_0 = SamHeaderVersion(1,0);
+static const SamHeaderVersion SAM_VERSION_1_1 = SamHeaderVersion(1,1);
+static const SamHeaderVersion SAM_VERSION_1_2 = SamHeaderVersion(1,2);
+static const SamHeaderVersion SAM_VERSION_1_3 = SamHeaderVersion(1,3);
+static const SamHeaderVersion SAM_VERSION_1_4 = SamHeaderVersion(1,4);
+
+// TODO: This functionality is currently unused.
+//       Make validation "version-aware."
+//
+// ------------------------------------------------------------------------
+
+const string SamHeaderValidator::ERROR_PREFIX = "ERROR: ";
+const string SamHeaderValidator::WARN_PREFIX  = "WARNING: ";
+const string SamHeaderValidator::NEWLINE      = "\n";
+
+SamHeaderValidator::SamHeaderValidator(const SamHeader& header)
+    : m_header(header)
+{ }
+
+SamHeaderValidator::~SamHeaderValidator(void) { }
+
+void SamHeaderValidator::AddError(const string& message) {
+    m_errorMessages.push_back(ERROR_PREFIX + message + NEWLINE);
+}
+
+void SamHeaderValidator::AddWarning(const string& message) {
+    m_warningMessages.push_back(WARN_PREFIX + message + NEWLINE);
+}
+
+void SamHeaderValidator::PrintErrorMessages(ostream& stream) {
+
+    // skip if no error messages
+    if ( m_errorMessages.empty() )
+        return;
+
+    // print error header line
+    stream << "* SAM header has " << m_errorMessages.size() << " errors:" << endl;
+
+    // print each error message
+    vector<string>::const_iterator errorIter = m_errorMessages.begin();
+    vector<string>::const_iterator errorEnd  = m_errorMessages.end();
+    for ( ; errorIter != errorEnd; ++errorIter )
+        stream << (*errorIter);
+}
+
+void SamHeaderValidator::PrintMessages(ostream& stream) {
+    PrintErrorMessages(stream);
+    PrintWarningMessages(stream);
+}
+
+void SamHeaderValidator::PrintWarningMessages(ostream& stream) {
+
+    // skip if no warning messages
+    if ( m_warningMessages.empty() )
+        return;
+
+    // print warning header line
+    stream << "* SAM header has " << m_warningMessages.size() << " warnings:" << endl;
+
+    // print each warning message
+    vector<string>::const_iterator warnIter = m_warningMessages.begin();
+    vector<string>::const_iterator warnEnd  = m_warningMessages.end();
+    for ( ; warnIter != warnEnd; ++warnIter )
+        stream << (*warnIter);
+}
+
+// entry point for validation
+bool SamHeaderValidator::Validate(void) {
+    bool isValid = true;
+    isValid &= ValidateMetadata();
+    isValid &= ValidateSequenceDictionary();
+    isValid &= ValidateReadGroupDictionary();
+    isValid &= ValidateProgramChain();
+    return isValid;
+}
+
+// check all SAM header 'metadata'
+bool SamHeaderValidator::ValidateMetadata(void) {
+    bool isValid = true;
+    isValid &= ValidateVersion();
+    isValid &= ValidateSortOrder();
+    isValid &= ValidateGroupOrder();
+    return isValid;
+}
+
+// check SAM header version tag
+bool SamHeaderValidator::ValidateVersion(void) {
+
+    const string& version = m_header.Version;
+
+    // warn if version not present
+    if ( version.empty() ) {
+        AddWarning("Version (VN) missing. Not required, but strongly recommended");
+        return true;
+    }
+
+    // invalid if version does not contain a period
+    const size_t periodFound = version.find(Constants::SAM_PERIOD);
+    if ( periodFound == string::npos ) {
+        AddError("Invalid version (VN) format: " + version);
+        return false;
+    }
+
+    // invalid if major version is empty or contains non-digits
+    const string majorVersion = version.substr(0, periodFound);
+    if ( majorVersion.empty() || !ContainsOnlyDigits(majorVersion) ) {
+        AddError("Invalid version (VN) format: " + version);
+        return false;
+    }
+
+    // invalid if major version is empty or contains non-digits
+    const string minorVersion = version.substr(periodFound + 1);
+    if ( minorVersion.empty() || !ContainsOnlyDigits(minorVersion) ) {
+        AddError("Invalid version (VN) format: " + version);
+        return false;
+    }
+
+    // TODO: check if version is not just syntactically OK,
+    // but is also a valid SAM version ( 1.0 .. CURRENT )
+
+    // all checked out this far, then version is OK
+    return true;
+}
+
+// assumes non-empty input string
+bool SamHeaderValidator::ContainsOnlyDigits(const string& s) {
+    const size_t nonDigitPosition = s.find_first_not_of(Constants::SAM_DIGITS);
+    return ( nonDigitPosition == string::npos ) ;
+}
+
+// validate SAM header sort order tag
+bool SamHeaderValidator::ValidateSortOrder(void) {
+
+    const string& sortOrder = m_header.SortOrder;
+
+    // warn if sort order not present
+    if ( sortOrder.empty() ) {
+        AddWarning("Sort order (SO) missing. Not required, but strongly recommended");
+        return true;
+    }
+
+    // if sort order is valid keyword
+    if ( sortOrder == Constants::SAM_HD_SORTORDER_COORDINATE ||
+         sortOrder == Constants::SAM_HD_SORTORDER_QUERYNAME  ||
+         sortOrder == Constants::SAM_HD_SORTORDER_UNSORTED
+       )
+    {
+        return true;
+    }
+
+    // otherwise
+    AddError("Invalid sort order (SO): " + sortOrder);
+    return false;
+}
+
+// validate SAM header group order tag
+bool SamHeaderValidator::ValidateGroupOrder(void) {
+
+    const string& groupOrder = m_header.GroupOrder;
+
+    // if no group order, no problem, just return OK
+    if ( groupOrder.empty() )
+        return true;
+
+    // if group order is valid keyword
+    if ( groupOrder == Constants::SAM_HD_GROUPORDER_NONE  ||
+         groupOrder == Constants::SAM_HD_GROUPORDER_QUERY ||
+         groupOrder == Constants::SAM_HD_GROUPORDER_REFERENCE
+       )
+    {
+        return true;
+    }
+
+    // otherwise
+    AddError("Invalid group order (GO): " + groupOrder);
+    return false;
+}
+
+// validate SAM header sequence dictionary
+bool SamHeaderValidator::ValidateSequenceDictionary(void) {
+
+    bool isValid = true;
+
+    // check for unique sequence names
+    isValid &= ContainsUniqueSequenceNames();
+
+    // iterate over sequences
+    const SamSequenceDictionary& sequences = m_header.Sequences;
+    SamSequenceConstIterator seqIter = sequences.ConstBegin();
+    SamSequenceConstIterator seqEnd  = sequences.ConstEnd();
+    for ( ; seqIter != seqEnd; ++seqIter ) {
+        const SamSequence& seq = (*seqIter);
+        isValid &= ValidateSequence(seq);
+    }
+
+    // return validation state
+    return isValid;
+}
+
+// make sure all SQ names are unique
+bool SamHeaderValidator::ContainsUniqueSequenceNames(void) {
+
+    bool isValid = true;
+    set<string> sequenceNames;
+    set<string>::iterator nameIter;
+
+    // iterate over sequences
+    const SamSequenceDictionary& sequences = m_header.Sequences;
+    SamSequenceConstIterator seqIter = sequences.ConstBegin();
+    SamSequenceConstIterator seqEnd  = sequences.ConstEnd();
+    for ( ; seqIter != seqEnd; ++seqIter ) {
+        const SamSequence& seq = (*seqIter);
+
+        // lookup sequence name
+        const string& name = seq.Name;
+        nameIter = sequenceNames.find(name);
+
+        // error if found (duplicate entry)
+        if ( nameIter != sequenceNames.end() ) {
+            AddError("Sequence name (SN): " + name + " is not unique");
+            isValid = false;
+        }
+
+        // otherwise ok, store name
+        sequenceNames.insert(name);
+    }
+
+    // return validation state
+    return isValid;
+}
+
+// validate SAM header sequence entry
+bool SamHeaderValidator::ValidateSequence(const SamSequence& seq) {
+    bool isValid = true;
+    isValid &= CheckNameFormat(seq.Name);
+    isValid &= CheckLengthInRange(seq.Length);
+    return isValid;
+}
+
+// check sequence name is valid format
+bool SamHeaderValidator::CheckNameFormat(const string& name) {
+
+    // invalid if name is empty
+    if ( name.empty() ) {
+        AddError("Sequence entry (@SQ) is missing SN tag");
+        return false;
+    }
+
+    // invalid if first character is a reserved char
+    const char firstChar = name.at(0);
+    if ( firstChar == Constants::SAM_EQUAL || firstChar == Constants::SAM_STAR ) {
+        AddError("Invalid sequence name (SN): " + name);
+        return false;
+    }
+    // otherwise OK
+    return true;
+}
+
+// check that sequence length is within accepted range
+bool SamHeaderValidator::CheckLengthInRange(const string& length) {
+
+    // invalid if empty
+    if ( length.empty() ) {
+        AddError("Sequence entry (@SQ) is missing LN tag");
+        return false;
+    }
+
+    // convert string length to numeric
+    stringstream lengthStream(length);
+    unsigned int sequenceLength;
+    lengthStream >> sequenceLength;
+
+    // invalid if length outside accepted range
+    if ( sequenceLength < Constants::SAM_SQ_LENGTH_MIN || sequenceLength > Constants::SAM_SQ_LENGTH_MAX ) {
+        AddError("Sequence length (LN): " + length + " out of range");
+        return false;
+    }
+
+    // otherwise OK
+    return true;
+}
+
+// validate SAM header read group dictionary
+bool SamHeaderValidator::ValidateReadGroupDictionary(void) {
+
+    bool isValid = true;
+
+    // check for unique read group IDs & platform units
+    isValid &= ContainsUniqueIDsAndPlatformUnits();
+
+    // iterate over read groups
+    const SamReadGroupDictionary& readGroups = m_header.ReadGroups;
+    SamReadGroupConstIterator rgIter = readGroups.ConstBegin();
+    SamReadGroupConstIterator rgEnd  = readGroups.ConstEnd();
+    for ( ; rgIter != rgEnd; ++rgIter ) {
+        const SamReadGroup& rg = (*rgIter);
+        isValid &= ValidateReadGroup(rg);
+    }
+
+    // return validation state
+    return isValid;
+}
+
+// make sure RG IDs and platform units are unique
+bool SamHeaderValidator::ContainsUniqueIDsAndPlatformUnits(void) {
+
+    bool isValid = true;
+    set<string> readGroupIds;
+    set<string> platformUnits;
+    set<string>::iterator idIter;
+    set<string>::iterator puIter;
+
+    // iterate over sequences
+    const SamReadGroupDictionary& readGroups = m_header.ReadGroups;
+    SamReadGroupConstIterator rgIter = readGroups.ConstBegin();
+    SamReadGroupConstIterator rgEnd  = readGroups.ConstEnd();
+    for ( ; rgIter != rgEnd; ++rgIter ) {
+        const SamReadGroup& rg = (*rgIter);
+
+        // --------------------------------
+        // check for unique ID
+
+        // lookup read group ID
+        const string& id = rg.ID;
+        idIter = readGroupIds.find(id);
+
+        // error if found (duplicate entry)
+        if ( idIter != readGroupIds.end() ) {
+            AddError("Read group ID (ID): " + id + " is not unique");
+            isValid = false;
+        }
+
+        // otherwise ok, store id
+        readGroupIds.insert(id);
+
+        // --------------------------------
+        // check for unique platform unit
+
+        // lookup platform unit
+        const string& pu = rg.PlatformUnit;
+        puIter = platformUnits.find(pu);
+
+        // error if found (duplicate entry)
+        if ( puIter != platformUnits.end() ) {
+            AddError("Platform unit (PU): " + pu + " is not unique");
+            isValid = false;
+        }
+
+        // otherwise ok, store platform unit
+        platformUnits.insert(pu);
+    }
+
+    // return validation state
+    return isValid;
+}
+
+// validate SAM header read group entry
+bool SamHeaderValidator::ValidateReadGroup(const SamReadGroup& rg) {
+    bool isValid = true;
+    isValid &= CheckReadGroupID(rg.ID);
+    isValid &= CheckSequencingTechnology(rg.SequencingTechnology);
+    return isValid;
+}
+
+// make sure RG ID exists
+bool SamHeaderValidator::CheckReadGroupID(const string& id) {
+
+    // invalid if empty
+    if ( id.empty() ) {
+        AddError("Read group entry (@RG) is missing ID tag");
+        return false;
+    }
+
+    // otherwise OK
+    return true;
+}
+
+// make sure RG sequencing tech is one of the accepted keywords
+bool SamHeaderValidator::CheckSequencingTechnology(const string& technology) {
+
+    // if no technology provided, no problem, just return OK
+    if ( technology.empty() )
+        return true;
+
+    // if technology is valid keyword
+    if ( caseInsensitiveCompare(technology, Constants::SAM_RG_SEQTECHNOLOGY_CAPILLARY)  ||
+         caseInsensitiveCompare(technology, Constants::SAM_RG_SEQTECHNOLOGY_HELICOS)    ||
+         caseInsensitiveCompare(technology, Constants::SAM_RG_SEQTECHNOLOGY_ILLUMINA)   ||
+         caseInsensitiveCompare(technology, Constants::SAM_RG_SEQTECHNOLOGY_IONTORRENT) ||
+         caseInsensitiveCompare(technology, Constants::SAM_RG_SEQTECHNOLOGY_LS454)      ||
+         caseInsensitiveCompare(technology, Constants::SAM_RG_SEQTECHNOLOGY_PACBIO)     ||
+         caseInsensitiveCompare(technology, Constants::SAM_RG_SEQTECHNOLOGY_SOLID)
+       )
+    {
+        return true;
+    }
+
+    // otherwise
+    AddError("Invalid read group sequencing platform (PL): " + technology);
+    return false;
+}
+
+// validate the SAM header "program chain"
+bool SamHeaderValidator::ValidateProgramChain(void) {
+    bool isValid = true;
+    isValid &= ContainsUniqueProgramIds();
+    isValid &= ValidatePreviousProgramIds();
+    return isValid;
+}
+
+// make sure all PG IDs are unique
+bool SamHeaderValidator::ContainsUniqueProgramIds(void) {
+
+    bool isValid = true;
+    set<string> programIds;
+    set<string>::iterator pgIdIter;
+
+    // iterate over program records
+    const SamProgramChain& programs = m_header.Programs;
+    SamProgramConstIterator pgIter = programs.ConstBegin();
+    SamProgramConstIterator pgEnd  = programs.ConstEnd();
+    for ( ; pgIter != pgEnd; ++pgIter ) {
+        const SamProgram& pg = (*pgIter);
+
+        // lookup program ID
+        const string& pgId = pg.ID;
+        pgIdIter = programIds.find(pgId);
+
+        // error if found (duplicate entry)
+        if ( pgIdIter != programIds.end() ) {
+            AddError("Program ID (ID): " + pgId + " is not unique");
+            isValid = false;
+        }
+
+        // otherwise ok, store ID
+        programIds.insert(pgId);
+    }
+
+    // return validation state
+    return isValid;
+}
+
+// make sure that any PP tags present point to existing @PG IDs
+bool SamHeaderValidator::ValidatePreviousProgramIds(void) {
+
+    bool isValid = true;
+
+    // iterate over program records
+    const SamProgramChain& programs = m_header.Programs;
+    SamProgramConstIterator pgIter = programs.ConstBegin();
+    SamProgramConstIterator pgEnd  = programs.ConstEnd();
+    for ( ; pgIter != pgEnd; ++pgIter ) {
+        const SamProgram& pg = (*pgIter);
+
+        // ignore record for validation if PreviousProgramID is empty
+        const string& ppId = pg.PreviousProgramID;
+        if ( ppId.empty() )
+            continue;
+
+        // see if program "chain" contains an entry for ppId
+        if ( !programs.Contains(ppId) ) {
+            AddError("PreviousProgramID (PP): " + ppId + " is not a known ID");
+            isValid = false;
+        }
+    }
+
+    // return validation state
+    return isValid;
+}
diff --git a/src/api/internal/sam/SamHeaderValidator_p.h b/src/api/internal/sam/SamHeaderValidator_p.h
new file mode 100644 (file)
index 0000000..7d0c60a
--- /dev/null
@@ -0,0 +1,105 @@
+// ***************************************************************************
+// SamHeaderValidator.h (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 6 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides functionality for validating SamHeader data
+// ***************************************************************************
+
+#ifndef SAM_HEADER_VALIDATOR_P_H
+#define SAM_HEADER_VALIDATOR_P_H
+
+//  -------------
+//  W A R N I N G
+//  -------------
+//
+// This file is not part of the BamTools API.  It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include <iostream>
+#include <string>
+#include <vector>
+
+namespace BamTools {
+
+class SamHeader;
+class SamReadGroup;
+class SamSequence;
+
+namespace Internal {
+
+class SamHeaderValidator {
+
+    // ctor & dtor
+    public:
+        SamHeaderValidator(const SamHeader& header);
+        ~SamHeaderValidator(void);
+
+    // SamHeaderValidator interface
+    public:
+
+        // prints error & warning messages
+        void PrintMessages(std::ostream& stream);
+
+        // validates SamHeader data, returns true/false accordingly
+        bool Validate(void);
+
+    // internal methods
+    private:
+
+        // validate header metadata
+        bool ValidateMetadata(void);
+        bool ValidateVersion(void);
+        bool ContainsOnlyDigits(const std::string& s);
+        bool ValidateSortOrder(void);
+        bool ValidateGroupOrder(void);
+
+        // validate sequence dictionary
+        bool ValidateSequenceDictionary(void);
+        bool ContainsUniqueSequenceNames(void);
+        bool CheckNameFormat(const std::string& name);
+        bool ValidateSequence(const SamSequence& seq);
+        bool CheckLengthInRange(const std::string& length);
+
+        // validate read group dictionary
+        bool ValidateReadGroupDictionary(void);
+        bool ContainsUniqueIDsAndPlatformUnits(void);
+        bool ValidateReadGroup(const SamReadGroup& rg);
+        bool CheckReadGroupID(const std::string& id);
+        bool CheckSequencingTechnology(const std::string& technology);
+
+        // validate program data
+        bool ValidateProgramChain(void);
+        bool ContainsUniqueProgramIds(void);
+        bool ValidatePreviousProgramIds(void);
+
+        // error reporting
+        void AddError(const std::string& message);
+        void AddWarning(const std::string& message);
+        void PrintErrorMessages(std::ostream& stream);
+        void PrintWarningMessages(std::ostream& stream);
+
+    // data members
+    private:
+
+        // SamHeader being validated
+        const SamHeader& m_header;
+
+        // error reporting helpers
+        static const std::string ERROR_PREFIX;
+        static const std::string WARN_PREFIX;
+        static const std::string NEWLINE;
+
+        // error reporting messages
+        std::vector<std::string> m_errorMessages;
+        std::vector<std::string> m_warningMessages;
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // SAM_HEADER_VALIDATOR_P_H
diff --git a/src/api/internal/sam/SamHeaderVersion_p.h b/src/api/internal/sam/SamHeaderVersion_p.h
new file mode 100644 (file)
index 0000000..4f85df0
--- /dev/null
@@ -0,0 +1,134 @@
+// ***************************************************************************
+// SamHeaderVersion.h (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides functionality for comparing SAM header versions
+// *************************************************************************
+
+#ifndef SAM_HEADERVERSION_P_H
+#define SAM_HEADERVERSION_P_H
+
+//  -------------
+//  W A R N I N G
+//  -------------
+//
+// This file is not part of the BamTools API.  It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include "api/SamConstants.h"
+#include <sstream>
+#include <string>
+
+namespace BamTools {
+namespace Internal {
+
+class SamHeaderVersion {
+
+    // ctors & dtor
+    public:
+        SamHeaderVersion(void)
+            : m_majorVersion(0)
+            , m_minorVersion(0)
+        { }
+
+        explicit SamHeaderVersion(const std::string& version)
+            : m_majorVersion(0)
+            , m_minorVersion(0)
+        {
+            SetVersion(version);
+        }
+
+        SamHeaderVersion(const unsigned int& major, const unsigned int& minor)
+            : m_majorVersion(major)
+            , m_minorVersion(minor)
+        { }
+
+        ~SamHeaderVersion(void) {
+            m_majorVersion = 0;
+            m_minorVersion = 0;
+        }
+    
+    // acess data
+    public:
+        unsigned int MajorVersion(void) const { return m_majorVersion; }
+        unsigned int MinorVersion(void) const { return m_minorVersion; }
+
+        void SetVersion(const std::string& version);
+        std::string ToString(void) const;
+
+    // data members
+    private:
+        unsigned int m_majorVersion;
+        unsigned int m_minorVersion;
+};
+
+inline
+void SamHeaderVersion::SetVersion(const std::string& version) {
+
+    // do nothing if version is empty
+    if ( !version.empty() ) {
+
+        std::stringstream versionStream("");
+
+        // do nothing if period not found
+        const size_t periodFound = version.find(Constants::SAM_PERIOD);
+        if ( periodFound != std::string::npos ) {
+
+            // store major version if non-empty and contains only digits
+            const std::string& majorVersion = version.substr(0, periodFound);
+            versionStream.str(majorVersion);
+            if ( !majorVersion.empty() ) {
+                const size_t nonDigitFound = majorVersion.find_first_not_of(Constants::SAM_DIGITS);
+                if ( nonDigitFound == std::string::npos )
+                    versionStream >> m_majorVersion;
+            }
+
+            // store minor version if non-empty and contains only digits
+            const std::string& minorVersion = version.substr(periodFound + 1);
+            versionStream.str(minorVersion);
+            if ( !minorVersion.empty() ) {
+                const size_t nonDigitFound = minorVersion.find_first_not_of(Constants::SAM_DIGITS);
+                if ( nonDigitFound == std::string::npos )
+                    versionStream >> m_minorVersion;
+            }
+        }
+    }
+}
+
+// -----------------------------------------------------
+// printing
+
+inline std::string SamHeaderVersion::ToString(void) const {
+    std::stringstream version;
+    version << m_majorVersion << Constants::SAM_PERIOD << m_minorVersion;
+    return version.str();
+}
+
+// -----------------------------------------------------
+// comparison operators
+
+inline bool operator==(const SamHeaderVersion& lhs, const SamHeaderVersion& rhs) {
+    return (lhs.MajorVersion() == rhs.MajorVersion()) &&
+           (lhs.MinorVersion() == rhs.MinorVersion());
+}
+
+inline bool operator<(const SamHeaderVersion& lhs, const SamHeaderVersion& rhs) {
+    if ( lhs.MajorVersion() == rhs.MajorVersion() )
+        return lhs.MinorVersion() < rhs.MinorVersion();
+    else 
+        return lhs.MajorVersion() < rhs.MajorVersion();
+}
+
+inline bool operator> (const SamHeaderVersion& lhs, const SamHeaderVersion& rhs) { return rhs < lhs;  }
+inline bool operator<=(const SamHeaderVersion& lhs, const SamHeaderVersion& rhs) { return !(lhs>rhs); }
+inline bool operator>=(const SamHeaderVersion& lhs, const SamHeaderVersion& rhs) { return !(lhs<rhs); }
+
+} // namespace Internal 
+} // namespace BamTools
+
+#endif // SAM_HEADERVERSION_P_H
diff --git a/src/api/internal/utils/BamException_p.cpp b/src/api/internal/utils/BamException_p.cpp
new file mode 100644 (file)
index 0000000..103e34b
--- /dev/null
@@ -0,0 +1,15 @@
+// ***************************************************************************
+// BamException_p.cpp (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 25 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides a basic exception class for BamTools internals
+// ***************************************************************************
+
+#include "api/internal/utils/BamException_p.h"
+using namespace BamTools;
+using namespace BamTools::Internal;
+using namespace std;
+
+const string BamException::SEPARATOR = ": ";
diff --git a/src/api/internal/utils/BamException_p.h b/src/api/internal/utils/BamException_p.h
new file mode 100644 (file)
index 0000000..5199737
--- /dev/null
@@ -0,0 +1,51 @@
+// ***************************************************************************
+// BamException_p.h (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 6 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides a basic exception class for BamTools internals
+// ***************************************************************************
+
+#ifndef BAMEXCEPTION_P_H
+#define BAMEXCEPTION_P_H
+
+//  -------------
+//  W A R N I N G
+//  -------------
+//
+// This file is not part of the BamTools API.  It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include <exception>
+#include <string>
+
+namespace BamTools {
+namespace Internal {
+
+class BamException : public std::exception {
+
+    public:
+        inline BamException(const std::string& where, const std::string& message)
+            : std::exception()
+            , m_errorString(where + SEPARATOR + message)
+        { }
+
+        inline ~BamException(void) throw() { }
+
+        inline const char* what(void) const throw() {
+            return m_errorString.c_str();
+        }
+
+    private:
+        std::string m_errorString;
+        static const std::string SEPARATOR;
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // BAMEXCEPTION_P_H