// BamAux.h (c) 2009 Derek Barnett, Michael Str�mberg\r
// Marth Lab, Department of Biology, Boston College\r
// ---------------------------------------------------------------------------\r
-// Last modified: 10 October 2011 (DB)\r
+// Last modified: 25 October 2011 (DB)\r
// ---------------------------------------------------------------------------\r
// Provides data structures & utility methods that are used throughout the API.\r
// ***************************************************************************\r
#define BAMAUX_H\r
\r
#include "api/api_global.h"\r
+#include <cstring>\r
#include <fstream> \r
#include <iostream>\r
#include <string>\r
\internal\r
*/\r
struct RaiiBuffer {\r
+\r
+ // data members\r
+ char* Buffer;\r
+ const size_t NumBytes;\r
+\r
+ // ctor & dtor\r
RaiiBuffer(const size_t n)\r
: Buffer( new char[n]() )\r
+ , NumBytes(n)\r
{ }\r
+\r
~RaiiBuffer(void) {\r
delete[] Buffer;\r
}\r
- char* Buffer;\r
+\r
+ // add'l methods\r
+ void Clear(void) {\r
+ memset(Buffer, 0, NumBytes);\r
+ }\r
};\r
\r
} // namespace BamTools\r
// BamMultiReader.cpp (c) 2010 Erik Garrison, Derek Barnett
// Marth Lab, Department of Biology, Boston College
// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
+// Last modified: 25 October 2011 (DB)
// ---------------------------------------------------------------------------
// Convenience class for reading multiple BAM files.
//
// ***************************************************************************
#include "api/BamMultiReader.h"
-#include "api/internal/BamMultiReader_p.h"
+#include "api/internal/bam/BamMultiReader_p.h"
using namespace BamTools;
#include <string>
// BamMultiReader.h (c) 2010 Erik Garrison, Derek Barnett
// Marth Lab, Department of Biology, Boston College
// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
+// Last modified: 25 October 2011 (DB)
// ---------------------------------------------------------------------------
// Convenience class for reading multiple BAM files.
// ***************************************************************************
// BamReader.cpp (c) 2009 Derek Barnett, Michael Str�mberg
// Marth Lab, Department of Biology, Boston College
// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
+// Last modified: 25 October 2011 (DB)
// ---------------------------------------------------------------------------
// Provides read access to BAM files.
// ***************************************************************************
#include "api/BamReader.h"
-#include "api/internal/BamReader_p.h"
+#include "api/internal/bam/BamReader_p.h"
using namespace BamTools;
using namespace BamTools::Internal;
// BamWriter.cpp (c) 2009 Michael Str�mberg, Derek Barnett\r
// Marth Lab, Department of Biology, Boston College\r
// ---------------------------------------------------------------------------\r
-// Last modified: 10 October 2011 (DB)\r
+// Last modified: 25 October 2011 (DB)\r
// ---------------------------------------------------------------------------\r
// Provides the basic functionality for producing BAM files\r
// ***************************************************************************\r
#include "api/BamAlignment.h"\r
#include "api/BamWriter.h"\r
#include "api/SamHeader.h"\r
-#include "api/internal/BamWriter_p.h"\r
+#include "api/internal/bam/BamWriter_p.h"\r
using namespace BamTools;\r
using namespace BamTools::Internal;\r
using namespace std;\r
add_definitions( -DBAMTOOLS_API_LIBRARY ) # (for proper exporting of library symbols)
add_definitions( -fPIC ) # (attempt to force PIC compiling on CentOS, not being set on shared libs by CMake)
-# list of all BamTools API source (.cpp) files
+# fetch all internal source files
+add_subdirectory ( internal )
+
+# make list of all API source files
set( BamToolsAPISources
BamAlignment.cpp
BamMultiReader.cpp
SamReadGroupDictionary.cpp
SamSequence.cpp
SamSequenceDictionary.cpp
- internal/BamDeviceFactory_p.cpp
- internal/BamException_p.cpp
- internal/BamFile_p.cpp
- internal/BamFtp_p.cpp
- internal/BamHeader_p.cpp
- internal/BamHttp_p.cpp
- internal/BamIndexFactory_p.cpp
- internal/BamMultiReader_p.cpp
- internal/BamPipe_p.cpp
- internal/BamRandomAccessController_p.cpp
- internal/BamReader_p.cpp
- internal/BamStandardIndex_p.cpp
- internal/BamToolsIndex_p.cpp
- internal/BamWriter_p.cpp
- internal/BgzfStream_p.cpp
- internal/ILocalIODevice_p.cpp
- internal/IRemoteIODevice_p.cpp
- internal/SamFormatParser_p.cpp
- internal/SamFormatPrinter_p.cpp
- internal/SamHeaderValidator_p.cpp
+ ${InternalSources}
)
# create main BamTools API shared library
set_target_properties( BamTools PROPERTIES
SOVERSION "2.0.5"
OUTPUT_NAME "bamtools" )
-target_link_libraries( BamTools z )
-install( TARGETS BamTools LIBRARY DESTINATION "lib/bamtools" RUNTIME DESTINATION "bin" )
# create main BamTools API static library
add_library( BamTools-static STATIC ${BamToolsAPISources} )
-set_target_properties( BamTools-static PROPERTIES
- OUTPUT_NAME "bamtools"
+set_target_properties( BamTools-static PROPERTIES
+ OUTPUT_NAME "bamtools"
PREFIX "lib" )
-target_link_libraries( BamTools-static z )
-install( TARGETS BamTools-static ARCHIVE DESTINATION "lib/bamtools" )
+
+# link libraries with zlib automatically
+if ( _WIN32 )
+ set( APILibs z ws2_32 )
+else ( _WIN32 )
+ set( APILibs z )
+endif ( _WIN32 )
+
+target_link_libraries( BamTools ${APILibs} )
+target_link_libraries( BamTools-static ${APILibs} )
+
+# set library install destinations
+install( TARGETS BamTools LIBRARY DESTINATION "lib/bamtools" RUNTIME DESTINATION "bin")
+install( TARGETS BamTools-static ARCHIVE DESTINATION "lib/bamtools")
# export API headers
-include( ../ExportHeader.cmake )
-set( ApiIncludeDir "api" )
-ExportHeader( APIHeaders api_global.h ${ApiIncludeDir} )
-ExportHeader( APIHeaders BamAlgorithms.h ${ApiIncludeDir} )
-ExportHeader( APIHeaders BamAlignment.h ${ApiIncludeDir} )
-ExportHeader( APIHeaders BamAux.h ${ApiIncludeDir} )
-ExportHeader( APIHeaders BamConstants.h ${ApiIncludeDir} )
-ExportHeader( APIHeaders BamIndex.h ${ApiIncludeDir} )
-ExportHeader( APIHeaders BamMultiReader.h ${ApiIncludeDir} )
-ExportHeader( APIHeaders BamReader.h ${ApiIncludeDir} )
-ExportHeader( APIHeaders BamWriter.h ${ApiIncludeDir} )
-ExportHeader( APIHeaders IBamIODevice.h ${ApiIncludeDir} )
-ExportHeader( APIHeaders SamConstants.h ${ApiIncludeDir} )
-ExportHeader( APIHeaders SamHeader.h ${ApiIncludeDir} )
-ExportHeader( APIHeaders SamProgram.h ${ApiIncludeDir} )
-ExportHeader( APIHeaders SamProgramChain.h ${ApiIncludeDir} )
-ExportHeader( APIHeaders SamReadGroup.h ${ApiIncludeDir} )
-ExportHeader( APIHeaders SamReadGroupDictionary.h ${ApiIncludeDir} )
-ExportHeader( APIHeaders SamSequence.h ${ApiIncludeDir} )
-ExportHeader( APIHeaders SamSequenceDictionary.h ${ApiIncludeDir} )
+include(../ExportHeader.cmake)
+set(ApiIncludeDir "api")
+ExportHeader(APIHeaders api_global.h ${ApiIncludeDir})
+ExportHeader(APIHeaders BamAlgorithms.h ${ApiIncludeDir})
+ExportHeader(APIHeaders BamAlignment.h ${ApiIncludeDir})
+ExportHeader(APIHeaders BamAux.h ${ApiIncludeDir})
+ExportHeader(APIHeaders BamConstants.h ${ApiIncludeDir})
+ExportHeader(APIHeaders BamIndex.h ${ApiIncludeDir})
+ExportHeader(APIHeaders BamMultiReader.h ${ApiIncludeDir})
+ExportHeader(APIHeaders BamReader.h ${ApiIncludeDir})
+ExportHeader(APIHeaders BamWriter.h ${ApiIncludeDir})
+ExportHeader(APIHeaders IBamIODevice.h ${ApiIncludeDir})
+ExportHeader(APIHeaders SamConstants.h ${ApiIncludeDir})
+ExportHeader(APIHeaders SamHeader.h ${ApiIncludeDir})
+ExportHeader(APIHeaders SamProgram.h ${ApiIncludeDir})
+ExportHeader(APIHeaders SamProgramChain.h ${ApiIncludeDir})
+ExportHeader(APIHeaders SamReadGroup.h ${ApiIncludeDir})
+ExportHeader(APIHeaders SamReadGroupDictionary.h ${ApiIncludeDir})
+ExportHeader(APIHeaders SamSequence.h ${ApiIncludeDir})
+ExportHeader(APIHeaders SamSequenceDictionary.h ${ApiIncludeDir})
set( AlgorithmsIncludeDir "api/algorithms" )
ExportHeader( AlgorithmsHeaders algorithms/Sort.h ${AlgorithmsIncludeDir} )
// IBamIODevice.h (c) 2011 Derek Barnett
// Marth Lab, Department of Biology, Boston College
// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
+// Last modified: 10 November 2011 (DB)
// ---------------------------------------------------------------------------
// Base class for all BAM I/O devices (e.g. local file, pipe, HTTP, FTP, etc.)
//
#define IBAMIODEVICE_H
#include "api/api_global.h"
+#include <cstdio>
#include <string>
namespace BamTools {
class API_EXPORT IBamIODevice {
// enums
- public: enum OpenMode { NotOpen = 0
- , ReadOnly
- , WriteOnly
+ public: enum OpenMode { NotOpen = 0x0000
+ , ReadOnly = 0x0001
+ , WriteOnly = 0x0002
+ , ReadWrite = ReadOnly | WriteOnly
};
// ctor & dtor
// IBamIODevice interface
public:
+ // TODO: add seek(pos, *from*)
+
// pure virtuals
virtual void Close(void) =0;
virtual bool IsRandomAccess(void) const =0;
virtual bool Open(const OpenMode mode) =0;
- virtual size_t Read(char* data, const unsigned int numBytes) =0;
- virtual bool Seek(const int64_t& position) =0;
+ virtual int64_t Read(char* data, const unsigned int numBytes) =0;
+ virtual bool Seek(const int64_t& position, const int origin = SEEK_SET) =0;
virtual int64_t Tell(void) const =0;
- virtual size_t Write(const char* data, const unsigned int numBytes) =0;
+ virtual int64_t Write(const char* data, const unsigned int numBytes) =0;
// default implementation provided
virtual std::string GetErrorString(void);
// SamHeader.cpp (c) 2010 Derek Barnett
// Marth Lab, Department of Biology, Boston College
// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
+// Last modified: 25 October 2011 (DB)
// ---------------------------------------------------------------------------
// Provides direct read/write access to the SAM header data fields.
// ***************************************************************************
#include "api/SamConstants.h"
#include "api/SamHeader.h"
-#include "api/internal/BamException_p.h"
-#include "api/internal/SamFormatParser_p.h"
-#include "api/internal/SamFormatPrinter_p.h"
-#include "api/internal/SamHeaderValidator_p.h"
+#include "api/internal/utils/BamException_p.h"
+#include "api/internal/sam/SamFormatParser_p.h"
+#include "api/internal/sam/SamFormatPrinter_p.h"
+#include "api/internal/sam/SamHeaderValidator_p.h"
using namespace BamTools;
using namespace BamTools::Internal;
using namespace std;
+++ /dev/null
-// ***************************************************************************
-// BamDeviceFactory_p.cpp (c) 2011 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 September 2011 (DB)
-// ---------------------------------------------------------------------------
-// Creates built-in concrete implementations of IBamIODevices
-// ***************************************************************************
-
-#include "api/internal/BamDeviceFactory_p.h"
-#include "api/internal/BamFile_p.h"
-#include "api/internal/BamFtp_p.h"
-#include "api/internal/BamHttp_p.h"
-#include "api/internal/BamPipe_p.h"
-using namespace BamTools;
-using namespace BamTools::Internal;
-
-#include <iostream>
-using namespace std;
-
-IBamIODevice* BamDeviceFactory::CreateDevice(const string& source) {
-
- // check for requested pipe
- if ( source == "-" || source == "stdin" || source == "stdout" )
- return new BamPipe;
-
- // check for HTTP prefix
- if ( source.find("http://") == 0 )
- return new BamHttp(source);
-
- // check for FTP prefix
- if ( source.find("ftp://") == 0 )
- return new BamFtp(source);
-
- // otherwise assume a "normal" file
- return new BamFile(source);
-}
+++ /dev/null
-// ***************************************************************************
-// BamDeviceFactory_p.h (c) 2011 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Creates built-in concrete implementations of IBamIODevices
-// ***************************************************************************
-
-#ifndef BAMDEVICEFACTORY_P_H
-#define BAMDEVICEFACTORY_P_H
-
-// -------------
-// W A R N I N G
-// -------------
-//
-// This file is not part of the BamTools API. It exists purely as an
-// implementation detail. This header file may change from version to version
-// without notice, or even be removed.
-//
-// We mean it.
-
-#include "api/IBamIODevice.h"
-#include <string>
-
-namespace BamTools {
-namespace Internal {
-
-class BamDeviceFactory {
- public:
- static IBamIODevice* CreateDevice(const std::string& source);
-};
-
-} // namespace Internal
-} // namespace BamTools
-
-#endif // BAMDEVICEFACTORY_P_H
+++ /dev/null
-// ***************************************************************************
-// BamException_p.cpp (c) 2011 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides a basic exception class for BamTools internals
-// ***************************************************************************
-
-#include "api/internal/BamException_p.h"
-using namespace BamTools;
-using namespace BamTools::Internal;
-using namespace std;
-
-const string BamException::SEPARATOR = ": ";
+++ /dev/null
-// ***************************************************************************
-// BamException_p.h (c) 2011 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 6 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides a basic exception class for BamTools internals
-// ***************************************************************************
-
-#ifndef BAMEXCEPTION_P_H
-#define BAMEXCEPTION_P_H
-
-// -------------
-// W A R N I N G
-// -------------
-//
-// This file is not part of the BamTools API. It exists purely as an
-// implementation detail. This header file may change from version to version
-// without notice, or even be removed.
-//
-// We mean it.
-
-#include <exception>
-#include <string>
-
-namespace BamTools {
-namespace Internal {
-
-class BamException : public std::exception {
-
- public:
- inline BamException(const std::string& where, const std::string& message)
- : std::exception()
- , m_errorString(where + SEPARATOR + message)
- { }
-
- inline ~BamException(void) throw() { }
-
- inline const char* what(void) const throw() {
- return m_errorString.c_str();
- }
-
- private:
- std::string m_errorString;
- static const std::string SEPARATOR;
-};
-
-} // namespace Internal
-} // namespace BamTools
-
-#endif // BAMEXCEPTION_P_H
+++ /dev/null
-// ***************************************************************************
-// BamFile_p.cpp (c) 2011 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides BAM file-specific IO behavior
-// ***************************************************************************
-
-#include "api/internal/BamFile_p.h"
-using namespace BamTools;
-using namespace BamTools::Internal;
-
-#include <cstdio>
-#include <iostream>
-using namespace std;
-
-BamFile::BamFile(const string& filename)
- : ILocalIODevice()
- , m_filename(filename)
-{ }
-
-BamFile::~BamFile(void) { }
-
-void BamFile::Close(void) {
- if ( IsOpen() ) {
- m_filename.clear();
- ILocalIODevice::Close();
- }
-}
-
-bool BamFile::IsRandomAccess(void) const {
- return true;
-}
-
-bool BamFile::Open(const IBamIODevice::OpenMode mode) {
-
- // make sure we're starting with a fresh file stream
- Close();
-
- // attempt to open FILE* depending on requested openmode
- if ( mode == IBamIODevice::ReadOnly )
- m_stream = fopen(m_filename.c_str(), "rb");
- else if ( mode == IBamIODevice::WriteOnly )
- m_stream = fopen(m_filename.c_str(), "wb");
- else {
- SetErrorString("BamFile::Open", "unknown open mode requested");
- return false;
- }
-
- // check that we obtained a valid FILE*
- if ( m_stream == 0 ) {
- const string message_base = string("could not open file handle for ");
- const string message = message_base + ( (m_filename.empty()) ? "empty filename" : m_filename );
- SetErrorString("BamFile::Open", message);
- return false;
- }
-
- // store current IO mode & return success
- m_mode = mode;
- return true;
-}
-
-bool BamFile::Seek(const int64_t& position) {
- BT_ASSERT_X( m_stream, "BamFile::Seek() - null stream" );
- return ( fseek64(m_stream, position, SEEK_SET) == 0 );
-}
+++ /dev/null
-// ***************************************************************************
-// BamFile_p.h (c) 2011 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides BAM file-specific IO behavior
-// ***************************************************************************
-
-#ifndef BAMFILE_P_H
-#define BAMFILE_P_H
-
-// -------------
-// W A R N I N G
-// -------------
-//
-// This file is not part of the BamTools API. It exists purely as an
-// implementation detail. This header file may change from version to version
-// without notice, or even be removed.
-//
-// We mean it.
-
-#include "api/internal/ILocalIODevice_p.h"
-#include <string>
-
-namespace BamTools {
-namespace Internal {
-
-class BamFile : public ILocalIODevice {
-
- // ctor & dtor
- public:
- BamFile(const std::string& filename);
- ~BamFile(void);
-
- // ILocalIODevice implementation
- public:
- void Close(void);
- bool IsRandomAccess(void) const;
- bool Open(const IBamIODevice::OpenMode mode);
- bool Seek(const int64_t& position);
-
- // data members
- private:
- std::string m_filename;
-};
-
-} // namespace Internal
-} // namespace BamTools
-
-#endif // BAMFILE_P_H
+++ /dev/null
-// ***************************************************************************
-// BamFtp_p.cpp (c) 2011 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides reading/writing of BAM files on FTP server
-// ***************************************************************************
-
-#include "api/internal/BamFtp_p.h"
-using namespace BamTools;
-using namespace BamTools::Internal;
-
-using namespace std;
-
-BamFtp::BamFtp(const string& url)
- : IBamIODevice()
-{
- BT_ASSERT_X(false, "BamFtp not yet implemented");
-}
-
-BamFtp::~BamFtp(void) { }
-
-void BamFtp::Close(void) {
- return ;
-}
-
-bool BamFtp::IsRandomAccess(void) const {
- return true;
-}
-
-bool BamFtp::Open(const IBamIODevice::OpenMode mode) {
- (void) mode;
- return true;
-}
-
-size_t BamFtp::Read(char* data, const unsigned int numBytes) {
- (void)data;
- (void)numBytes;
- return 0;
-}
-
-bool BamFtp::Seek(const int64_t& position) {
- (void)position;
- return true;
-}
-
-int64_t BamFtp::Tell(void) const {
- return -1;
-}
-
-size_t BamFtp::Write(const char* data, const unsigned int numBytes) {
- (void)data;
- (void)numBytes;
- return 0;
-}
+++ /dev/null
-// ***************************************************************************
-// BamFtp_p.h (c) 2011 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides reading/writing of BAM files on FTP server
-// ***************************************************************************
-
-#ifndef BAMFTP_P_H
-#define BAMFTP_P_H
-
-// -------------
-// W A R N I N G
-// -------------
-//
-// This file is not part of the BamTools API. It exists purely as an
-// implementation detail. This header file may change from version to version
-// without notice, or even be removed.
-//
-// We mean it.
-
-#include "api/IBamIODevice.h"
-#include <string>
-
-namespace BamTools {
-namespace Internal {
-
-class BamFtp : public IBamIODevice {
-
- // ctor & dtor
- public:
- BamFtp(const std::string& url);
- ~BamFtp(void);
-
- // IBamIODevice implementation
- public:
- void Close(void);
- bool IsRandomAccess(void) const;
- bool Open(const IBamIODevice::OpenMode mode);
- size_t Read(char* data, const unsigned int numBytes);
- bool Seek(const int64_t& position);
- int64_t Tell(void) const;
- size_t Write(const char* data, const unsigned int numBytes);
-
- // internal methods
- private:
-
- // data members
- private:
-};
-
-} // namespace Internal
-} // namespace BamTools
-
-#endif // BAMFTP_P_H
+++ /dev/null
-// ***************************************************************************
-// BamHeader_p.cpp (c) 2010 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides the basic functionality for handling BAM headers.
-// ***************************************************************************
-
-#include "api/BamAux.h"
-#include "api/BamConstants.h"
-#include "api/internal/BamException_p.h"
-#include "api/internal/BamHeader_p.h"
-#include "api/internal/BgzfStream_p.h"
-using namespace BamTools;
-using namespace BamTools::Internal;
-
-#include <cstdlib>
-#include <cstring>
-using namespace std;
-
-// ------------------------
-// static utility methods
-// ------------------------
-
-static inline
-bool isValidMagicNumber(const char* buffer) {
- return ( strncmp(buffer, Constants::BAM_HEADER_MAGIC,
- Constants::BAM_HEADER_MAGIC_LENGTH) == 0 );
-}
-
-// --------------------------
-// BamHeader implementation
-// --------------------------
-
-// ctor
-BamHeader::BamHeader(void) { }
-
-// dtor
-BamHeader::~BamHeader(void) { }
-
-// reads magic number from BGZF stream, returns true if valid
-void BamHeader::CheckMagicNumber(BgzfStream* stream) {
-
- // try to read magic number
- char buffer[Constants::BAM_HEADER_MAGIC_LENGTH];
- const size_t numBytesRead = stream->Read(buffer, Constants::BAM_HEADER_MAGIC_LENGTH);
- if ( numBytesRead != (int)Constants::BAM_HEADER_MAGIC_LENGTH )
- throw BamException("BamHeader::CheckMagicNumber", "could not read magic number");
-
- // validate magic number
- if ( !isValidMagicNumber(buffer) )
- throw BamException("BamHeader::CheckMagicNumber", "invalid magic number");
-}
-
-// clear SamHeader data
-void BamHeader::Clear(void) {
- m_header.Clear();
-}
-
-// return true if SamHeader data is valid
-bool BamHeader::IsValid(void) const {
- return m_header.IsValid();
-}
-
-// load BAM header ('magic number' and SAM header text) from BGZF stream
-void BamHeader::Load(BgzfStream* stream) {
-
- // read & check magic number
- CheckMagicNumber(stream);
-
- // read header (length, then actual text)
- uint32_t length(0);
- ReadHeaderLength(stream, length);
- ReadHeaderText(stream, length);
-}
-
-// reads SAM header text length from BGZF stream, stores it in @length
-void BamHeader::ReadHeaderLength(BgzfStream* stream, uint32_t& length) {
-
- // read BAM header text length
- char buffer[sizeof(uint32_t)];
- const size_t numBytesRead = stream->Read(buffer, sizeof(uint32_t));
- if ( numBytesRead != sizeof(uint32_t) )
- throw BamException("BamHeader::ReadHeaderLength", "could not read header length");
-
- // convert char buffer to length
- length = BamTools::UnpackUnsignedInt(buffer);
- if ( BamTools::SystemIsBigEndian() )
- BamTools::SwapEndian_32(length);
-}
-
-// reads SAM header text from BGZF stream, stores in SamHeader object
-void BamHeader::ReadHeaderText(BgzfStream* stream, const uint32_t& length) {
-
- // read header text
- char* headerText = (char*)calloc(length + 1, 1);
- const size_t bytesRead = stream->Read(headerText, length);
-
- // if error reading, clean up buffer & throw
- if ( bytesRead != length ) {
- free(headerText);
- throw BamException("BamHeader::ReadHeaderText", "could not read header text");
- }
-
- // otherwise, text was read OK
- // store & cleanup
- m_header.SetHeaderText( (string)((const char*)headerText) );
- free(headerText);
-}
-
-// returns *copy* of SamHeader data object
-SamHeader BamHeader::ToSamHeader(void) const {
- return m_header;
-}
-
-// returns SAM-formatted string of header data
-string BamHeader::ToString(void) const {
- return m_header.ToString();
-}
+++ /dev/null
-// ***************************************************************************
-// BamHeader_p.h (c) 2010 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides the basic functionality for handling BAM headers.
-// ***************************************************************************
-
-#ifndef BAMHEADER_P_H
-#define BAMHEADER_P_H
-
-// -------------
-// W A R N I N G
-// -------------
-//
-// This file is not part of the BamTools API. It exists purely as an
-// implementation detail. This header file may change from version to version
-// without notice, or even be removed.
-//
-// We mean it.
-
-#include "api/SamHeader.h"
-#include <string>
-
-namespace BamTools {
-namespace Internal {
-
-class BgzfStream;
-
-class BamHeader {
-
- // ctor & dtor
- public:
- BamHeader(void);
- ~BamHeader(void);
-
- // BamHeader interface
- public:
- // clear SamHeader data
- void Clear(void);
- // return true if SamHeader data is valid
- bool IsValid(void) const;
- // load BAM header ('magic number' and SAM header text) from BGZF stream
- // returns true if all OK
- void Load(BgzfStream* stream);
- // returns (editable) copy of SamHeader data object
- SamHeader ToSamHeader(void) const;
- // returns SAM-formatted string of header data
- std::string ToString(void) const;
-
- // internal methods
- private:
- // reads magic number from BGZF stream
- void CheckMagicNumber(BgzfStream* stream);
- // reads SAM header length from BGZF stream, stores it in @length
- void ReadHeaderLength(BgzfStream* stream, uint32_t& length);
- // reads SAM header text from BGZF stream, stores in SamHeader object
- void ReadHeaderText(BgzfStream* stream, const uint32_t& length);
-
- // data members
- private:
- SamHeader m_header;
-};
-
-} // namespace Internal
-} // namespace BamTools
-
-#endif // BAMHEADER_P_H
+++ /dev/null
-// ***************************************************************************
-// BamHttp_p.cpp (c) 2011 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides reading/writing of BAM files on HTTP server
-// ***************************************************************************
-
-#include "api/internal/BamHttp_p.h"
-using namespace BamTools;
-using namespace BamTools::Internal;
-
-using namespace std;
-
-BamHttp::BamHttp(const string& url)
- : IBamIODevice()
-{
- BT_ASSERT_X(false, "BamHttp not yet implemented");
-}
-
-BamHttp::~BamHttp(void) { }
-
-void BamHttp::Close(void) {
- return ;
-}
-
-bool BamHttp::IsRandomAccess(void) const {
- return true;
-}
-
-bool BamHttp::Open(const IBamIODevice::OpenMode mode) {
- (void) mode;
- return true;
-}
-
-size_t BamHttp::Read(char* data, const unsigned int numBytes) {
- (void)data;
- (void)numBytes;
- return 0;
-}
-
-bool BamHttp::Seek(const int64_t& position) {
- (void)position;
- return true;
-}
-
-int64_t BamHttp::Tell(void) const {
- return -1;
-}
-
-size_t BamHttp::Write(const char* data, const unsigned int numBytes) {
- (void)data;
- (void)numBytes;
- return 0;
-}
+++ /dev/null
-// ***************************************************************************
-// BamHttp_p.h (c) 2011 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides reading/writing of BAM files on HTTP server
-// ***************************************************************************
-
-#ifndef BAMHTTP_P_H
-#define BAMHTTP_P_H
-
-// -------------
-// W A R N I N G
-// -------------
-//
-// This file is not part of the BamTools API. It exists purely as an
-// implementation detail. This header file may change from version to version
-// without notice, or even be removed.
-//
-// We mean it.
-
-#include "api/IBamIODevice.h"
-#include <string>
-
-namespace BamTools {
-namespace Internal {
-
-class BamHttp : public IBamIODevice {
-
- // ctor & dtor
- public:
- BamHttp(const std::string& url);
- ~BamHttp(void);
-
- // IBamIODevice implementation
- public:
- void Close(void);
- bool IsRandomAccess(void) const;
- bool Open(const IBamIODevice::OpenMode mode);
- size_t Read(char* data, const unsigned int numBytes);
- bool Seek(const int64_t& position);
- int64_t Tell(void) const;
- size_t Write(const char* data, const unsigned int numBytes);
-
- // internal methods
- private:
-
- // data members
- private:
-};
-
-} // namespace Internal
-} // namespace BamTools
-
-#endif // BAMHTTP_P_H
+++ /dev/null
-// ***************************************************************************
-// BamIndexFactory_p.cpp (c) 2011 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides interface for generating BamIndex implementations
-// ***************************************************************************
-
-#include "api/BamAux.h"
-#include "api/internal/BamIndexFactory_p.h"
-#include "api/internal/BamStandardIndex_p.h"
-#include "api/internal/BamToolsIndex_p.h"
-using namespace BamTools;
-using namespace BamTools::Internal;
-using namespace std;
-
-// generates index filename from BAM filename (depending on requested type)
-// if type is unknown, returns empty string
-const string BamIndexFactory::CreateIndexFilename(const string& bamFilename,
- const BamIndex::IndexType& type)
-{
- switch ( type ) {
- case ( BamIndex::STANDARD ) : return ( bamFilename + BamStandardIndex::Extension() );
- case ( BamIndex::BAMTOOLS ) : return ( bamFilename + BamToolsIndex::Extension() );
- default :
- return string();
- }
-}
-
-// creates a new BamIndex object, depending on extension of @indexFilename
-BamIndex* BamIndexFactory::CreateIndexFromFilename(const string& indexFilename, BamReaderPrivate* reader) {
-
- // if file doesn't exist, return null index
- if ( !BamTools::FileExists(indexFilename) )
- return 0;
-
- // get file extension from index filename, including dot (".EXT")
- // if can't get file extension, return null index
- const string extension = FileExtension(indexFilename);
- if ( extension.empty() )
- return 0;
-
- // create index based on extension
- if ( extension == BamStandardIndex::Extension() ) return new BamStandardIndex(reader);
- else if ( extension == BamToolsIndex::Extension() ) return new BamToolsIndex(reader);
- else
- return 0;
-}
-
-// creates a new BamIndex, object of requested @type
-BamIndex* BamIndexFactory::CreateIndexOfType(const BamIndex::IndexType& type,
- BamReaderPrivate* reader)
-{
- switch ( type ) {
- case ( BamIndex::STANDARD ) : return new BamStandardIndex(reader);
- case ( BamIndex::BAMTOOLS ) : return new BamToolsIndex(reader);
- default :
- return 0;
- }
-}
-
-// retrieves file extension (including '.')
-const string BamIndexFactory::FileExtension(const string& filename) {
-
- // if filename cannot contain valid path + extension, return empty string
- if ( filename.empty() || filename.length() <= 4 )
- return string();
-
- // look for last dot in filename
- const size_t lastDotPosition = filename.find_last_of('.');
-
- // if none found, return empty string
- if ( lastDotPosition == string::npos )
- return string();
-
- // return substring from last dot position
- return filename.substr(lastDotPosition);
-}
-
-// returns name of existing index file that corresponds to @bamFilename
-// will defer to @preferredType if possible, if not will attempt to load any supported type
-// returns empty string if not found
-const string BamIndexFactory::FindIndexFilename(const string& bamFilename,
- const BamIndex::IndexType& preferredType)
-{
- // skip if BAM filename provided is empty
- if ( bamFilename.empty() )
- return string();
-
- // try to find index of preferred type first
- // return index filename if found
- string indexFilename = CreateIndexFilename(bamFilename, preferredType);
- if ( !indexFilename.empty() && BamTools::FileExists(indexFilename) )
- return indexFilename;
-
- // couldn't find preferred type, try the other supported types
- // return index filename if found
- if ( preferredType != BamIndex::STANDARD ) {
- indexFilename = CreateIndexFilename(bamFilename, BamIndex::STANDARD);
- if ( !indexFilename.empty() && BamTools::FileExists(indexFilename) )
- return indexFilename;
- }
- if ( preferredType != BamIndex::BAMTOOLS ) {
- indexFilename = CreateIndexFilename(bamFilename, BamIndex::BAMTOOLS);
- if ( !indexFilename.empty() && BamTools::FileExists(indexFilename) )
- return indexFilename;
- }
-
- // otherwise couldn't find any index matching this filename
- return string();
-}
+++ /dev/null
-// ***************************************************************************
-// BamIndexFactory_p.h (c) 2011 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides interface for generating BamIndex implementations
-// ***************************************************************************
-
-#ifndef BAMINDEX_FACTORY_P_H
-#define BAMINDEX_FACTORY_P_H
-
-#include "api/BamIndex.h"
-#include <string>
-
-namespace BamTools {
-namespace Internal {
-
-class BamIndexFactory {
-
- // static interface methods
- public:
- // creates a new BamIndex object, depending on extension of @indexFilename
- static BamIndex* CreateIndexFromFilename(const std::string& indexFilename,
- BamReaderPrivate* reader);
- // creates a new BamIndex object, of requested @type
- static BamIndex* CreateIndexOfType(const BamIndex::IndexType& type,
- BamReaderPrivate* reader);
- // returns name of existing index file that corresponds to @bamFilename
- // will defer to @preferredType if possible
- // if @preferredType not found, will attempt to load any supported index type
- // returns empty string if no index file (of any type) is found
- static const std::string FindIndexFilename(const std::string& bamFilename,
- const BamIndex::IndexType& preferredType);
-
- // internal methods
- public:
- // generates index filename from BAM filename (depending on requested type)
- // if type is unknown, returns empty string
- static const std::string CreateIndexFilename(const std::string& bamFilename,
- const BamIndex::IndexType& type);
- // retrieves file extension (including '.')
- static const std::string FileExtension(const std::string& filename);
-};
-
-} // namespace Internal
-} // namespace BamTools
-
-#endif // BAMINDEX_FACTORY_P_H
+++ /dev/null
-// ***************************************************************************
-// BamMultiMerger_p.h (c) 2010 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides merging functionality for BamMultiReader. At this point, supports
-// sorting results by (refId, position) or by read name.
-// ***************************************************************************
-
-#ifndef BAMMULTIMERGER_P_H
-#define BAMMULTIMERGER_P_H
-
-// -------------
-// W A R N I N G
-// -------------
-//
-// This file is not part of the BamTools API. It exists purely as an
-// implementation detail. This header file may change from version to version
-// without notice, or even be removed.
-//
-// We mean it.
-
-#include "api/BamAlignment.h"
-#include "api/BamReader.h"
-#include "api/algorithms/Sort.h"
-#include <deque>
-#include <functional>
-#include <set>
-#include <string>
-
-namespace BamTools {
-namespace Internal {
-
-struct MergeItem {
-
- // data members
- BamReader* Reader;
- BamAlignment* Alignment;
-
- // ctors & dtor
- MergeItem(BamReader* reader = 0,
- BamAlignment* alignment = 0)
- : Reader(reader)
- , Alignment(alignment)
- { }
-
- MergeItem(const MergeItem& other)
- : Reader(other.Reader)
- , Alignment(other.Alignment)
- { }
-
- ~MergeItem(void) { }
-};
-
-template<typename Compare>
-struct MergeItemSorter : public std::binary_function<MergeItem, MergeItem, bool> {
-
- public:
- MergeItemSorter(const Compare& comp = Compare())
- : m_comp(comp)
- { }
-
- bool operator()(const MergeItem& lhs, const MergeItem& rhs) {
- const BamAlignment& l = *lhs.Alignment;
- const BamAlignment& r = *rhs.Alignment;
- return m_comp(l,r);
- }
-
- private:
- Compare m_comp;
-};
-
-// pure ABC so we can just work polymorphically with any specific merger implementation
-class IMultiMerger {
-
- public:
- IMultiMerger(void) { }
- virtual ~IMultiMerger(void) { }
- public:
- virtual void Add(MergeItem item) =0;
- virtual void Clear(void) =0;
- virtual const MergeItem& First(void) const =0;
- virtual bool IsEmpty(void) const =0;
- virtual void Remove(BamReader* reader) =0;
- virtual int Size(void) const =0;
- virtual MergeItem TakeFirst(void) =0;
-};
-
-// general merger
-template<typename Compare>
-class MultiMerger : public IMultiMerger {
-
- public:
- typedef Compare CompareType;
- typedef MergeItemSorter<CompareType> MergeType;
-
- public:
- explicit MultiMerger(const Compare& comp = Compare())
- : IMultiMerger()
- , m_data( MergeType(comp) )
- { }
- ~MultiMerger(void) { }
-
- public:
- void Add(MergeItem item);
- void Clear(void);
- const MergeItem& First(void) const;
- bool IsEmpty(void) const;
- void Remove(BamReader* reader);
- int Size(void) const;
- MergeItem TakeFirst(void);
-
- private:
- typedef MergeItem ValueType;
- typedef std::multiset<ValueType, MergeType> ContainerType;
- typedef typename ContainerType::iterator DataIterator;
- typedef typename ContainerType::const_iterator DataConstIterator;
- ContainerType m_data;
-};
-
-template <typename Compare>
-inline void MultiMerger<Compare>::Add(MergeItem item) {
-
- // N.B. - any future custom Compare types must define this method
- // see algorithms/Sort.h
-
- if ( CompareType::UsesCharData() )
- item.Alignment->BuildCharData();
- m_data.insert(item);
-}
-
-template <typename Compare>
-inline void MultiMerger<Compare>::Clear(void) {
- m_data.clear();
-}
-
-template <typename Compare>
-inline const MergeItem& MultiMerger<Compare>::First(void) const {
- const ValueType& entry = (*m_data.begin());
- return entry;
-}
-
-template <typename Compare>
-inline bool MultiMerger<Compare>::IsEmpty(void) const {
- return m_data.empty();
-}
-template <typename Compare>
-inline void MultiMerger<Compare>::Remove(BamReader* reader) {
-
- if ( reader == 0 ) return;
- const std::string& filenameToRemove = reader->GetFilename();
-
- // iterate over readers in cache
- DataIterator dataIter = m_data.begin();
- DataIterator dataEnd = m_data.end();
- for ( ; dataIter != dataEnd; ++dataIter ) {
- const MergeItem& item = (*dataIter);
- const BamReader* itemReader = item.Reader;
- if ( itemReader == 0 ) continue;
-
- // remove iterator on match
- if ( itemReader->GetFilename() == filenameToRemove ) {
- m_data.erase(dataIter);
- return;
- }
- }
-}
-template <typename Compare>
-inline int MultiMerger<Compare>::Size(void) const {
- return m_data.size();
-}
-
-template <typename Compare>
-inline MergeItem MultiMerger<Compare>::TakeFirst(void) {
- DataIterator firstIter = m_data.begin();
- MergeItem firstItem = (*firstIter);
- m_data.erase(firstIter);
- return firstItem;
-}
-
-// unsorted "merger"
-template<>
-class MultiMerger<Algorithms::Sort::Unsorted> : public IMultiMerger {
-
- public:
- explicit MultiMerger(const Algorithms::Sort::Unsorted& comp = Algorithms::Sort::Unsorted())
- : IMultiMerger()
- { }
- ~MultiMerger(void) { }
-
- public:
- void Add(MergeItem item);
- void Clear(void);
- const MergeItem& First(void) const;
- bool IsEmpty(void) const;
- void Remove(BamReader* reader);
- int Size(void) const;
- MergeItem TakeFirst(void);
-
- private:
- typedef MergeItem ValueType;
- typedef std::deque<ValueType> ContainerType;
- typedef ContainerType::iterator DataIterator;
- typedef ContainerType::const_iterator DataConstIterator;
- ContainerType m_data;
-};
-
-inline
-void MultiMerger<Algorithms::Sort::Unsorted>::Add(MergeItem item) {
- m_data.push_back(item);
-}
-
-inline
-void MultiMerger<Algorithms::Sort::Unsorted>::Clear(void) {
- m_data.clear();
-}
-
-inline
-const MergeItem& MultiMerger<Algorithms::Sort::Unsorted>::First(void) const {
- return m_data.front();
-}
-
-inline
-bool MultiMerger<Algorithms::Sort::Unsorted>::IsEmpty(void) const {
- return m_data.empty();
-}
-
-inline
-void MultiMerger<Algorithms::Sort::Unsorted>::Remove(BamReader* reader) {
-
- if ( reader == 0 ) return;
- const std::string filenameToRemove = reader->GetFilename();
-
- // iterate over readers in cache
- DataIterator dataIter = m_data.begin();
- DataIterator dataEnd = m_data.end();
- for ( ; dataIter != dataEnd; ++dataIter ) {
- const MergeItem& item = (*dataIter);
- const BamReader* itemReader = item.Reader;
- if ( itemReader == 0 ) continue;
-
- // remove iterator on match
- if ( itemReader->GetFilename() == filenameToRemove ) {
- m_data.erase(dataIter);
- return;
- }
- }
-}
-
-inline
-int MultiMerger<Algorithms::Sort::Unsorted>::Size(void) const {
- return m_data.size();
-}
-
-inline
-MergeItem MultiMerger<Algorithms::Sort::Unsorted>::TakeFirst(void) {
- MergeItem firstItem = m_data.front();
- m_data.pop_front();
- return firstItem;
-}
-
-} // namespace Internal
-} // namespace BamTools
-
-#endif // BAMMULTIMERGER_P_H
+++ /dev/null
-// ***************************************************************************
-// BamMultiReader_p.cpp (c) 2010 Derek Barnett, Erik Garrison
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 14 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Functionality for simultaneously reading multiple BAM files
-// *************************************************************************
-
-#include "api/BamAlignment.h"
-#include "api/BamMultiReader.h"
-#include "api/SamConstants.h"
-#include "api/algorithms/Sort.h"
-#include "api/internal/BamMultiReader_p.h"
-using namespace BamTools;
-using namespace BamTools::Internal;
-
-#include <algorithm>
-#include <fstream>
-#include <iostream>
-#include <iterator>
-#include <sstream>
-using namespace std;
-
-// ctor
-BamMultiReaderPrivate::BamMultiReaderPrivate(void)
- : m_alignmentCache(0)
-{ }
-
-// dtor
-BamMultiReaderPrivate::~BamMultiReaderPrivate(void) {
- Close();
-}
-
-// close all BAM files
-bool BamMultiReaderPrivate::Close(void) {
-
- m_errorString.clear();
-
- if ( CloseFiles(Filenames()) )
- return true;
- else {
- const string currentError = m_errorString;
- const string message = string("error encountered while closing all files: \n\t") + currentError;
- SetErrorString("BamMultiReader::Close", message);
- return false;
- }
-}
-
-// close requested BAM file
-bool BamMultiReaderPrivate::CloseFile(const string& filename) {
-
- m_errorString.clear();
-
- vector<string> filenames(1, filename);
- if ( CloseFiles(filenames) )
- return true;
- else {
- const string currentError = m_errorString;
- const string message = string("error while closing file: ") + filename + "\n" + currentError;
- SetErrorString("BamMultiReader::CloseFile", message);
- return false;
- }
-}
-
-// close requested BAM files
-bool BamMultiReaderPrivate::CloseFiles(const vector<string>& filenames) {
-
- bool errorsEncountered = false;
- m_errorString.clear();
-
- // iterate over filenames
- vector<string>::const_iterator filesIter = filenames.begin();
- vector<string>::const_iterator filesEnd = filenames.end();
- for ( ; filesIter != filesEnd; ++filesIter ) {
- const string& filename = (*filesIter);
- if ( filename.empty() ) continue;
-
- // iterate over readers
- vector<MergeItem>::iterator readerIter = m_readers.begin();
- vector<MergeItem>::iterator readerEnd = m_readers.end();
- for ( ; readerIter != readerEnd; ++readerIter ) {
- MergeItem& item = (*readerIter);
- BamReader* reader = item.Reader;
- if ( reader == 0 ) continue;
-
- // if reader matches requested filename
- if ( reader->GetFilename() == filename ) {
-
- // remove reader's entry from alignment cache
- m_alignmentCache->Remove(reader);
-
- // clean up reader & its alignment
- if ( !reader->Close() ) {
- m_errorString.append(1, '\t');
- m_errorString.append(reader->GetErrorString());
- m_errorString.append(1, '\n');
- errorsEncountered = true;
- }
- delete reader;
- reader = 0;
-
- // delete reader's alignment entry
- BamAlignment* alignment = item.Alignment;
- delete alignment;
- alignment = 0;
-
- // remove reader from reader list
- m_readers.erase(readerIter);
-
- // on match, just go on to next filename
- // (no need to keep looking and item iterator is invalid now anyway)
- break;
- }
- }
- }
-
- // make sure alignment cache is cleaned up if all readers closed
- if ( m_readers.empty() && m_alignmentCache ) {
- m_alignmentCache->Clear();
- delete m_alignmentCache;
- m_alignmentCache = 0;
- }
-
- // return whether all readers closed OK
- return !errorsEncountered;
-}
-
-// creates index files for BAM files that don't have them
-bool BamMultiReaderPrivate::CreateIndexes(const BamIndex::IndexType& type) {
-
- bool errorsEncountered = false;
- m_errorString.clear();
-
- // iterate over readers
- vector<MergeItem>::iterator itemIter = m_readers.begin();
- vector<MergeItem>::iterator itemEnd = m_readers.end();
- for ( ; itemIter != itemEnd; ++itemIter ) {
- MergeItem& item = (*itemIter);
- BamReader* reader = item.Reader;
- if ( reader == 0 ) continue;
-
- // if reader doesn't have an index, create one
- if ( !reader->HasIndex() ) {
- if ( !reader->CreateIndex(type) ) {
- m_errorString.append(1, '\t');
- m_errorString.append(reader->GetErrorString());
- m_errorString.append(1, '\n');
- errorsEncountered = true;
- }
- }
- }
-
- // check for errors encountered before returning success/fail
- if ( errorsEncountered ) {
- const string currentError = m_errorString;
- const string message = string("error while creating index files: ") + "\n" + currentError;
- SetErrorString("BamMultiReader::CreateIndexes", message);
- return false;
- } else
- return true;
-}
-
-IMultiMerger* BamMultiReaderPrivate::CreateAlignmentCache(void) const {
-
- // fetch SamHeader
- SamHeader header = GetHeader();
-
- // if BAM files are sorted by position
- if ( header.SortOrder == Constants::SAM_HD_SORTORDER_COORDINATE )
- return new MultiMerger<Algorithms::Sort::ByPosition>();
-
- // if BAM files are sorted by read name
- if ( header.SortOrder == Constants::SAM_HD_SORTORDER_QUERYNAME )
- return new MultiMerger<Algorithms::Sort::ByName>();
-
- // otherwise "unknown" or "unsorted", use unsorted merger and just read in
- return new MultiMerger<Algorithms::Sort::Unsorted>();
-}
-
-const vector<string> BamMultiReaderPrivate::Filenames(void) const {
-
- // init filename container
- vector<string> filenames;
- filenames.reserve( m_readers.size() );
-
- // iterate over readers
- vector<MergeItem>::const_iterator itemIter = m_readers.begin();
- vector<MergeItem>::const_iterator itemEnd = m_readers.end();
- for ( ; itemIter != itemEnd; ++itemIter ) {
- const MergeItem& item = (*itemIter);
- const BamReader* reader = item.Reader;
- if ( reader == 0 ) continue;
-
- // store filename if not empty
- const string& filename = reader->GetFilename();
- if ( !filename.empty() )
- filenames.push_back(filename);
- }
-
- // return result
- return filenames;
-}
-
-string BamMultiReaderPrivate::GetErrorString(void) const {
- return m_errorString;
-}
-
-SamHeader BamMultiReaderPrivate::GetHeader(void) const {
- const string& text = GetHeaderText();
- return SamHeader(text);
-}
-
-// makes a virtual, unified header for all the bam files in the multireader
-string BamMultiReaderPrivate::GetHeaderText(void) const {
-
- // N.B. - right now, simply copies all header data from first BAM,
- // and then appends RG's from other BAM files
- // TODO: make this more intelligent wrt other header lines/fields
-
- // if no readers open
- const size_t numReaders = m_readers.size();
- if ( numReaders == 0 ) return string();
-
- // retrieve first reader's header
- const MergeItem& firstItem = m_readers.front();
- const BamReader* reader = firstItem.Reader;
- if ( reader == 0 ) return string();
- SamHeader mergedHeader = reader->GetHeader();
-
- // iterate over any remaining readers (skipping the first)
- for ( size_t i = 1; i < numReaders; ++i ) {
- const MergeItem& item = m_readers.at(i);
- const BamReader* reader = item.Reader;
- if ( reader == 0 ) continue;
-
- // retrieve current reader's header
- const SamHeader currentHeader = reader->GetHeader();
-
- // append current reader's RG entries to merged header
- // N.B. - SamReadGroupDictionary handles duplicate-checking
- mergedHeader.ReadGroups.Add(currentHeader.ReadGroups);
-
- // TODO: merge anything else??
- }
-
- // return stringified header
- return mergedHeader.ToString();
-}
-
-// get next alignment among all files
-bool BamMultiReaderPrivate::GetNextAlignment(BamAlignment& al) {
- return PopNextCachedAlignment(al, true);
-}
-
-// get next alignment among all files without parsing character data from alignments
-bool BamMultiReaderPrivate::GetNextAlignmentCore(BamAlignment& al) {
- return PopNextCachedAlignment(al, false);
-}
-
-// ---------------------------------------------------------------------------------------
-//
-// NB: The following GetReferenceX() functions assume that we have identical
-// references for all BAM files. We enforce this by invoking the
-// ValidateReaders() method to verify that our reference data is the same
-// across all files on Open - so we will not encounter a situation in which
-// there is a mismatch and we are still live.
-//
-// ---------------------------------------------------------------------------------------
-
-// returns the number of reference sequences
-int BamMultiReaderPrivate::GetReferenceCount(void) const {
-
- // handle empty multireader
- if ( m_readers.empty() ) return 0;
-
- // return reference count from first reader
- const MergeItem& item = m_readers.front();
- const BamReader* reader = item.Reader;
- if ( reader == 0 ) return 0;
- else
- return reader->GetReferenceCount();
-}
-
-// returns vector of reference objects
-const RefVector BamMultiReaderPrivate::GetReferenceData(void) const {
-
- // handle empty multireader
- if ( m_readers.empty() ) return RefVector();
-
- // return reference data from first BamReader
- const MergeItem& item = m_readers.front();
- const BamReader* reader = item.Reader;
- if ( reader == 0 ) return RefVector();
- else
- return reader->GetReferenceData();
-}
-
-// returns refID from reference name
-int BamMultiReaderPrivate::GetReferenceID(const string& refName) const {
-
- // handle empty multireader
- if ( m_readers.empty() ) return -1;
-
- // return reference ID from first BamReader
- const MergeItem& item = m_readers.front();
- const BamReader* reader = item.Reader;
- if ( reader == 0 ) return -1;
- else
- return reader->GetReferenceID(refName);
-}
-// ---------------------------------------------------------------------------------------
-
-// returns true if all readers have index data available
-// this is useful to indicate whether Jump() or SetRegion() are possible
-bool BamMultiReaderPrivate::HasIndexes(void) const {
-
- // handle empty multireader
- if ( m_readers.empty() )
- return false;
-
- bool result = true;
-
- // iterate over readers
- vector<MergeItem>::const_iterator readerIter = m_readers.begin();
- vector<MergeItem>::const_iterator readerEnd = m_readers.end();
- for ( ; readerIter != readerEnd; ++readerIter ) {
- const MergeItem& item = (*readerIter);
- const BamReader* reader = item.Reader;
- if ( reader == 0 ) continue;
-
- // see if current reader has index data
- result &= reader->HasIndex();
- }
-
- return result;
-}
-
-// returns true if multireader has open readers
-bool BamMultiReaderPrivate::HasOpenReaders(void) {
-
- // iterate over readers
- vector<MergeItem>::const_iterator readerIter = m_readers.begin();
- vector<MergeItem>::const_iterator readerEnd = m_readers.end();
- for ( ; readerIter != readerEnd; ++readerIter ) {
- const MergeItem& item = (*readerIter);
- const BamReader* reader = item.Reader;
- if ( reader == 0 ) continue;
-
- // return true whenever an open reader is found
- if ( reader->IsOpen() ) return true;
- }
-
- // no readers open
- return false;
-}
-
-// performs random-access jump using (refID, position) as a left-bound
-bool BamMultiReaderPrivate::Jump(int refID, int position) {
-
- // NB: While it may make sense to track readers in which we can
- // successfully Jump, in practice a failure of Jump means "no
- // alignments here." It makes sense to simply accept the failure,
- // UpdateAlignments(), and continue.
-
- // iterate over readers
- vector<MergeItem>::iterator readerIter = m_readers.begin();
- vector<MergeItem>::iterator readerEnd = m_readers.end();
- for ( ; readerIter != readerEnd; ++readerIter ) {
- MergeItem& item = (*readerIter);
- BamReader* reader = item.Reader;
- if ( reader == 0 ) continue;
-
- // jump in each BamReader to position of interest
- reader->Jump(refID, position);
- }
-
- // returns status of cache update
- return UpdateAlignmentCache();
-}
-
-// locate (& load) index files for BAM readers that don't already have one loaded
-bool BamMultiReaderPrivate::LocateIndexes(const BamIndex::IndexType& preferredType) {
-
- bool errorsEncountered = false;
- m_errorString.clear();
-
- // iterate over readers
- vector<MergeItem>::iterator readerIter = m_readers.begin();
- vector<MergeItem>::iterator readerEnd = m_readers.end();
- for ( ; readerIter != readerEnd; ++readerIter ) {
- MergeItem& item = (*readerIter);
- BamReader* reader = item.Reader;
- if ( reader == 0 ) continue;
-
- // if reader has no index, try to locate one
- if ( !reader->HasIndex() ) {
- if ( !reader->LocateIndex(preferredType) ) {
- m_errorString.append(1, '\t');
- m_errorString.append(reader->GetErrorString());
- m_errorString.append(1, '\n');
- errorsEncountered = true;
- }
- }
- }
-
- // check for errors encountered before returning success/fail
- if ( errorsEncountered ) {
- const string currentError = m_errorString;
- const string message = string("error while locating index files: ") + "\n" + currentError;
- SetErrorString("BamMultiReader::LocatingIndexes", message);
- return false;
- } else
- return true;
-}
-
-// opens BAM files
-bool BamMultiReaderPrivate::Open(const vector<string>& filenames) {
-
- m_errorString.clear();
-
- // put all current readers back at beginning (refreshes alignment cache)
- if ( !Rewind() ) {
- const string currentError = m_errorString;
- const string message = string("unable to rewind existing readers: \n\t") + currentError;
- SetErrorString("BamMultiReader::Open", message);
- return false;
- }
-
- // iterate over filenames
- bool errorsEncountered = false;
- vector<string>::const_iterator filenameIter = filenames.begin();
- vector<string>::const_iterator filenameEnd = filenames.end();
- for ( ; filenameIter != filenameEnd; ++filenameIter ) {
- const string& filename = (*filenameIter);
- if ( filename.empty() ) continue;
-
- // attempt to open BamReader
- BamReader* reader = new BamReader;
- const bool readerOpened = reader->Open(filename);
-
- // if opened OK, store it
- if ( readerOpened )
- m_readers.push_back( MergeItem(reader, new BamAlignment) );
-
- // otherwise store error & clean up invalid reader
- else {
- m_errorString.append(1, '\t');
- m_errorString += string("unable to open file: ") + filename;
- m_errorString.append(1, '\n');
- errorsEncountered = true;
-
- delete reader;
- reader = 0;
- }
- }
-
- // check for errors while opening
- if ( errorsEncountered ) {
- const string currentError = m_errorString;
- const string message = string("unable to open all files: \t\n") + currentError;
- SetErrorString("BamMultiReader::Open", message);
- return false;
- }
-
- // check for BAM file consistency
- if ( !ValidateReaders() ) {
- const string currentError = m_errorString;
- const string message = string("unable to open inconsistent files: \t\n") + currentError;
- SetErrorString("BamMultiReader::Open", message);
- return false;
- }
-
- // update alignment cache
- return UpdateAlignmentCache();
-}
-
-bool BamMultiReaderPrivate::OpenFile(const std::string& filename) {
- vector<string> filenames(1, filename);
- if ( Open(filenames) )
- return true;
- else {
- const string currentError = m_errorString;
- const string message = string("could not open file: ") + filename + "\n\t" + currentError;
- SetErrorString("BamMultiReader::OpenFile", message);
- return false;
- }
-}
-
-bool BamMultiReaderPrivate::OpenIndexes(const vector<string>& indexFilenames) {
-
- // TODO: This needs to be cleaner - should not assume same order.
- // And either way, shouldn't start at first reader. Should start at
- // first reader without an index?
-
- // make sure same number of index filenames as readers
- if ( m_readers.size() != indexFilenames.size() ) {
- const string message("size of index file list does not match current BAM file count");
- SetErrorString("BamMultiReader::OpenIndexes", message);
- return false;
- }
-
- bool errorsEncountered = false;
- m_errorString.clear();
-
- // iterate over BamReaders
- vector<string>::const_iterator indexFilenameIter = indexFilenames.begin();
- vector<string>::const_iterator indexFilenameEnd = indexFilenames.end();
- vector<MergeItem>::iterator readerIter = m_readers.begin();
- vector<MergeItem>::iterator readerEnd = m_readers.end();
- for ( ; readerIter != readerEnd; ++readerIter ) {
- MergeItem& item = (*readerIter);
- BamReader* reader = item.Reader;
-
- // open index filename on reader
- if ( reader ) {
- const string& indexFilename = (*indexFilenameIter);
- if ( !reader->OpenIndex(indexFilename) ) {
- m_errorString.append(1, '\t');
- m_errorString += reader->GetErrorString();
- m_errorString.append(1, '\n');
- errorsEncountered = true;
- }
- }
-
- // increment filename iterator, skip if no more index files to open
- if ( ++indexFilenameIter == indexFilenameEnd )
- break;
- }
-
- // return success/fail
- if ( errorsEncountered ) {
- const string currentError = m_errorString;
- const string message = string("could not open all index files: \n\t") + currentError;
- SetErrorString("BamMultiReader::OpenIndexes", message);
- return false;
- } else
- return true;
-}
-
-bool BamMultiReaderPrivate::PopNextCachedAlignment(BamAlignment& al, const bool needCharData) {
-
- // skip if no alignments available
- if ( m_alignmentCache == 0 || m_alignmentCache->IsEmpty() )
- return false;
-
- // pop next merge item entry from cache
- MergeItem item = m_alignmentCache->TakeFirst();
- BamReader* reader = item.Reader;
- BamAlignment* alignment = item.Alignment;
- if ( reader == 0 || alignment == 0 )
- return false;
-
- // set char data if requested
- if ( needCharData ) {
- alignment->BuildCharData();
- alignment->Filename = reader->GetFilename();
- }
-
- // store cached alignment into destination parameter (by copy)
- al = *alignment;
-
- // load next alignment from reader & store in cache
- SaveNextAlignment(reader, alignment);
- return true;
-}
-
-// returns BAM file pointers to beginning of alignment data & resets alignment cache
-bool BamMultiReaderPrivate::Rewind(void) {
-
- // skip if no readers open
- if ( m_readers.empty() )
- return true;
-
- // attempt to rewind files
- if ( !RewindReaders() ) {
- const string currentError = m_errorString;
- const string message = string("could not rewind readers: \n\t") + currentError;
- SetErrorString("BamMultiReader::Rewind", message);
- return false;
- }
-
- // return status of cache update
- return UpdateAlignmentCache();
-}
-
-// returns BAM file pointers to beginning of alignment data
-bool BamMultiReaderPrivate::RewindReaders(void) {
-
- m_errorString.clear();
- bool errorsEncountered = false;
-
- // iterate over readers
- vector<MergeItem>::iterator readerIter = m_readers.begin();
- vector<MergeItem>::iterator readerEnd = m_readers.end();
- for ( ; readerIter != readerEnd; ++readerIter ) {
- MergeItem& item = (*readerIter);
- BamReader* reader = item.Reader;
- if ( reader == 0 ) continue;
-
- // attempt rewind on BamReader
- if ( !reader->Rewind() ) {
- m_errorString.append(1, '\t');
- m_errorString.append( reader->GetErrorString() );
- m_errorString.append(1, '\n');
- errorsEncountered = true;
- }
- }
-
- return !errorsEncountered;
-}
-
-void BamMultiReaderPrivate::SaveNextAlignment(BamReader* reader, BamAlignment* alignment) {
-
- // if can read alignment from reader, store in cache
- //
- // N.B. - lazy building of alignment's char data - populated only:
- // automatically by alignment cache to maintain its sorting OR
- // on demand from client call to future call to GetNextAlignment()
-
- if ( reader->GetNextAlignmentCore(*alignment) )
- m_alignmentCache->Add( MergeItem(reader, alignment) );
-}
-
-void BamMultiReaderPrivate::SetErrorString(const string& where, const string& what) const {
- static const string SEPARATOR = ": ";
- m_errorString = where + SEPARATOR + what;
-}
-
-bool BamMultiReaderPrivate::SetRegion(const BamRegion& region) {
-
- // NB: While it may make sense to track readers in which we can
- // successfully SetRegion, In practice a failure of SetRegion means "no
- // alignments here." It makes sense to simply accept the failure,
- // UpdateAlignments(), and continue.
-
- // iterate over alignments
- vector<MergeItem>::iterator readerIter = m_readers.begin();
- vector<MergeItem>::iterator readerEnd = m_readers.end();
- for ( ; readerIter != readerEnd; ++readerIter ) {
- MergeItem& item = (*readerIter);
- BamReader* reader = item.Reader;
- if ( reader == 0 ) continue;
-
- // set region of interest
- reader->SetRegion(region);
- }
-
- // return status of cache update
- return UpdateAlignmentCache();
-}
-
-// updates our alignment cache
-bool BamMultiReaderPrivate::UpdateAlignmentCache(void) {
-
- // create alignment cache if not created yet
- if ( m_alignmentCache == 0 ) {
- m_alignmentCache = CreateAlignmentCache();
- if ( m_alignmentCache == 0 ) {
- SetErrorString("BamMultiReader::UpdateAlignmentCache", "unable to create new alignment cache");
- return false;
- }
- }
-
- // clear any prior cache data
- m_alignmentCache->Clear();
-
- // iterate over readers
- vector<MergeItem>::iterator readerIter = m_readers.begin();
- vector<MergeItem>::iterator readerEnd = m_readers.end();
- for ( ; readerIter != readerEnd; ++readerIter ) {
- MergeItem& item = (*readerIter);
- BamReader* reader = item.Reader;
- BamAlignment* alignment = item.Alignment;
- if ( reader == 0 || alignment == 0 ) continue;
-
- // save next alignment from each reader in cache
- SaveNextAlignment(reader, alignment);
- }
-
- // if we get here, ok
- return true;
-}
-
-// ValidateReaders checks that all the readers point to BAM files representing
-// alignments against the same set of reference sequences, and that the
-// sequences are identically ordered. If these checks fail the operation of
-// the multireader is undefined, so we force program exit.
-bool BamMultiReaderPrivate::ValidateReaders(void) const {
-
- m_errorString.clear();
-
- // skip if 0 or 1 readers opened
- if ( m_readers.empty() || (m_readers.size() == 1) )
- return true;
-
- // retrieve first reader
- const MergeItem& firstItem = m_readers.front();
- const BamReader* firstReader = firstItem.Reader;
- if ( firstReader == 0 ) return false;
-
- // retrieve first reader's header data
- const SamHeader& firstReaderHeader = firstReader->GetHeader();
- const string& firstReaderSortOrder = firstReaderHeader.SortOrder;
-
- // retrieve first reader's reference data
- const RefVector& firstReaderRefData = firstReader->GetReferenceData();
- const int firstReaderRefCount = firstReader->GetReferenceCount();
- const int firstReaderRefSize = firstReaderRefData.size();
-
- // iterate over all readers
- vector<MergeItem>::const_iterator readerIter = m_readers.begin();
- vector<MergeItem>::const_iterator readerEnd = m_readers.end();
- for ( ; readerIter != readerEnd; ++readerIter ) {
- const MergeItem& item = (*readerIter);
- BamReader* reader = item.Reader;
- if ( reader == 0 ) continue;
-
- // get current reader's header data
- const SamHeader& currentReaderHeader = reader->GetHeader();
- const string& currentReaderSortOrder = currentReaderHeader.SortOrder;
-
- // check compatible sort order
- if ( currentReaderSortOrder != firstReaderSortOrder ) {
- const string message = string("mismatched sort order in ") + reader->GetFilename() +
- ", expected " + firstReaderSortOrder +
- ", but found " + currentReaderSortOrder;
- SetErrorString("BamMultiReader::ValidateReaders", message);
- return false;
- }
-
- // get current reader's reference data
- const RefVector currentReaderRefData = reader->GetReferenceData();
- const int currentReaderRefCount = reader->GetReferenceCount();
- const int currentReaderRefSize = currentReaderRefData.size();
-
- // init reference data iterators
- RefVector::const_iterator firstRefIter = firstReaderRefData.begin();
- RefVector::const_iterator firstRefEnd = firstReaderRefData.end();
- RefVector::const_iterator currentRefIter = currentReaderRefData.begin();
-
- // compare reference counts from BamReader ( & container size, in case of BR error)
- if ( (currentReaderRefCount != firstReaderRefCount) ||
- (firstReaderRefSize != currentReaderRefSize) )
- {
- stringstream s("");
- s << "mismatched reference count in " << reader->GetFilename()
- << ", expected " << firstReaderRefCount
- << ", but found " << currentReaderRefCount;
- SetErrorString("BamMultiReader::ValidateReaders", s.str());
- return false;
- }
-
- // this will be ok; we just checked above that we have identically-sized sets of references
- // here we simply check if they are all, in fact, equal in content
- while ( firstRefIter != firstRefEnd ) {
- const RefData& firstRef = (*firstRefIter);
- const RefData& currentRef = (*currentRefIter);
-
- // compare reference name & length
- if ( (firstRef.RefName != currentRef.RefName) ||
- (firstRef.RefLength != currentRef.RefLength) )
- {
- stringstream s("");
- s << "mismatched references found in" << reader->GetFilename()
- << "expected: " << endl;
-
- // print first reader's reference data
- RefVector::const_iterator refIter = firstReaderRefData.begin();
- RefVector::const_iterator refEnd = firstReaderRefData.end();
- for ( ; refIter != refEnd; ++refIter ) {
- const RefData& entry = (*refIter);
- stringstream s("");
- s << entry.RefName << " " << endl;
- }
-
- s << "but found: " << endl;
-
- // print current reader's reference data
- refIter = currentReaderRefData.begin();
- refEnd = currentReaderRefData.end();
- for ( ; refIter != refEnd; ++refIter ) {
- const RefData& entry = (*refIter);
- s << entry.RefName << " " << entry.RefLength << endl;
- }
-
- SetErrorString("BamMultiReader::ValidateReaders", s.str());
- return false;
- }
-
- // update iterators
- ++firstRefIter;
- ++currentRefIter;
- }
- }
-
- // if we get here, everything checks out
- return true;
-}
+++ /dev/null
-// ***************************************************************************
-// BamMultiReader_p.h (c) 2010 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Functionality for simultaneously reading multiple BAM files
-// *************************************************************************
-
-#ifndef BAMMULTIREADER_P_H
-#define BAMMULTIREADER_P_H
-
-// -------------
-// W A R N I N G
-// -------------
-//
-// This file is not part of the BamTools API. It exists purely as an
-// implementation detail. This header file may change from version to version
-// without notice, or even be removed.
-//
-// We mean it.
-
-#include "api/SamHeader.h"
-#include "api/BamMultiReader.h"
-#include "api/internal/BamMultiMerger_p.h"
-#include <string>
-#include <vector>
-
-namespace BamTools {
-namespace Internal {
-
-class BamMultiReaderPrivate {
-
- // typedefs
- public:
- typedef std::pair<BamReader*, BamAlignment*> ReaderAlignment;
-
- // constructor / destructor
- public:
- BamMultiReaderPrivate(void);
- ~BamMultiReaderPrivate(void);
-
- // public interface
- public:
-
- // file operations
- bool Close(void);
- bool CloseFile(const std::string& filename);
- const std::vector<std::string> Filenames(void) const;
- bool Jump(int refID, int position = 0);
- bool Open(const std::vector<std::string>& filenames);
- bool OpenFile(const std::string& filename);
- bool Rewind(void);
- bool SetRegion(const BamRegion& region);
-
- // access alignment data
- bool GetNextAlignment(BamAlignment& al);
- bool GetNextAlignmentCore(BamAlignment& al);
- bool HasOpenReaders(void);
-
- // access auxiliary data
- SamHeader GetHeader(void) const;
- std::string GetHeaderText(void) const;
- int GetReferenceCount(void) const;
- const BamTools::RefVector GetReferenceData(void) const;
- int GetReferenceID(const std::string& refName) const;
-
- // BAM index operations
- bool CreateIndexes(const BamIndex::IndexType& type = BamIndex::STANDARD);
- bool HasIndexes(void) const;
- bool LocateIndexes(const BamIndex::IndexType& preferredType = BamIndex::STANDARD);
- bool OpenIndexes(const std::vector<std::string>& indexFilenames);
-
- // error handling
- std::string GetErrorString(void) const;
-
- // 'internal' methods
- public:
-
- bool CloseFiles(const std::vector<std::string>& filenames);
- IMultiMerger* CreateAlignmentCache(void) const;
- bool PopNextCachedAlignment(BamAlignment& al, const bool needCharData);
- bool RewindReaders(void);
- void SaveNextAlignment(BamReader* reader, BamAlignment* alignment);
- void SetErrorString(const std::string& where, const std::string& what) const; //
- bool UpdateAlignmentCache(void);
- bool ValidateReaders(void) const;
-
- // data members
- public:
- std::vector<MergeItem> m_readers;
- IMultiMerger* m_alignmentCache;
- mutable std::string m_errorString;
-};
-
-} // namespace Internal
-} // namespace BamTools
-
-#endif // BAMMULTIREADER_P_H
+++ /dev/null
-// ***************************************************************************
-// BamPipe_p.cpp (c) 2011 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides BAM pipe-specific IO behavior
-// ***************************************************************************
-
-#include "api/internal/BamPipe_p.h"
-using namespace BamTools;
-using namespace BamTools::Internal;
-
-#include <cstdio>
-#include <iostream>
-using namespace std;
-
-BamPipe::BamPipe(void) : ILocalIODevice() { }
-
-BamPipe::~BamPipe(void) { }
-
-bool BamPipe::IsRandomAccess(void) const {
- return false;
-}
-
-bool BamPipe::Open(const IBamIODevice::OpenMode mode) {
-
- // make sure we're starting with a fresh pipe
- Close();
-
- // open stdin/stdout depending on requested openmode
- if ( mode == IBamIODevice::ReadOnly )
- m_stream = freopen(0, "rb", stdin);
- else if ( mode == IBamIODevice::WriteOnly )
- m_stream = freopen(0, "wb", stdout);
- else {
- SetErrorString("BamPipe::Open", "unknown open mode requested");
- return false;
- }
-
- // check that we obtained a valid FILE*
- if ( m_stream == 0 ) {
- const string message_base = string("could not open handle on ");
- const string message = message_base + ( (mode == IBamIODevice::ReadOnly) ? "stdin" : "stdout" );
- SetErrorString("BamPipe::Open", message);
- return false;
- }
-
- // store current IO mode & return success
- m_mode = mode;
- return true;
-}
-
-bool BamPipe::Seek(const int64_t& ) {
- SetErrorString("BamPipe::Seek", "random access not allowed in FIFO pipe");
- return false;
-}
+++ /dev/null
-// ***************************************************************************
-// BamPipe_p.h (c) 2011 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides BAM pipe-specific IO behavior
-// ***************************************************************************
-
-#ifndef BAMPIPE_P_H
-#define BAMPIPE_P_H
-
-// -------------
-// W A R N I N G
-// -------------
-//
-// This file is not part of the BamTools API. It exists purely as an
-// implementation detail. This header file may change from version to version
-// without notice, or even be removed.
-//
-// We mean it.
-
-#include "api/internal/ILocalIODevice_p.h"
-#include <string>
-
-namespace BamTools {
-namespace Internal {
-
-class BamPipe : public ILocalIODevice {
-
- // ctor & dtor
- public:
- BamPipe(void);
- ~BamPipe(void);
-
- // IBamIODevice implementation
- public:
- bool IsRandomAccess(void) const;
- bool Open(const IBamIODevice::OpenMode mode);
- bool Seek(const int64_t& position);
-};
-
-} // namespace Internal
-} // namespace BamTools
-
-#endif // BAMPIPE_P_H
+++ /dev/null
-// ***************************************************************************
-// BamRandomAccessController_p.cpp (c) 2011 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011(DB)
-// ---------------------------------------------------------------------------
-// Manages random access operations in a BAM file
-// **************************************************************************
-
-#include "api/BamIndex.h"
-#include "api/internal/BamException_p.h"
-#include "api/internal/BamRandomAccessController_p.h"
-#include "api/internal/BamReader_p.h"
-#include "api/internal/BamIndexFactory_p.h"
-using namespace BamTools;
-using namespace BamTools::Internal;
-
-#include <cassert>
-#include <sstream>
-using namespace std;
-
-BamRandomAccessController::BamRandomAccessController(void)
- : m_index(0)
- , m_hasAlignmentsInRegion(true)
-{ }
-
-BamRandomAccessController::~BamRandomAccessController(void) {
- Close();
-}
-
-void BamRandomAccessController::AdjustRegion(const int& referenceCount) {
-
- // skip if no index available
- if ( m_index == 0 )
- return;
-
- // see if any references in region have alignments
- m_hasAlignmentsInRegion = false;
- int currentId = m_region.LeftRefID;
- const int rightBoundRefId = ( m_region.isRightBoundSpecified() ? m_region.RightRefID : referenceCount - 1 );
- while ( currentId <= rightBoundRefId ) {
- m_hasAlignmentsInRegion = m_index->HasAlignments(currentId);
- if ( m_hasAlignmentsInRegion ) break;
- ++currentId;
- }
-
- // if no data found on any reference in region
- if ( !m_hasAlignmentsInRegion )
- return;
-
- // if left bound of desired region had no data, use first reference that had data
- // otherwise, leave requested region as-is
- if ( currentId != m_region.LeftRefID ) {
- m_region.LeftRefID = currentId;
- m_region.LeftPosition = 0;
- }
-}
-
-// returns alignments' "RegionState": { Before|Overlaps|After } current region
-BamRandomAccessController::RegionState
-BamRandomAccessController::AlignmentState(const BamAlignment& alignment) const {
-
- // if region has no left bound at all
- if ( !m_region.isLeftBoundSpecified() )
- return OverlapsRegion;
-
- // handle unmapped reads - return AFTER region to halt processing
- if ( alignment.RefID == -1 )
- return AfterRegion;
-
- // if alignment is on any reference before left bound reference
- if ( alignment.RefID < m_region.LeftRefID )
- return BeforeRegion;
-
- // if alignment is on left bound reference
- else if ( alignment.RefID == m_region.LeftRefID ) {
-
- // if alignment starts at or after left bound position
- if ( alignment.Position >= m_region.LeftPosition) {
-
- if ( m_region.isRightBoundSpecified() && // right bound is specified AND
- m_region.LeftRefID == m_region.RightRefID && // left & right bounds on same reference AND
- alignment.Position >= m_region.RightPosition ) // alignment starts on or after right bound position
- return AfterRegion;
-
- // otherwise, alignment overlaps region
- else return OverlapsRegion;
- }
-
- // alignment starts before left bound position
- else {
-
- // if alignment overlaps left bound position
- if ( alignment.GetEndPosition() > m_region.LeftPosition )
- return OverlapsRegion;
- else
- return BeforeRegion;
- }
- }
-
- // otherwise alignment is on a reference after left bound reference
- else {
-
- // if region has a right bound
- if ( m_region.isRightBoundSpecified() ) {
-
- // alignment is on any reference between boundaries
- if ( alignment.RefID < m_region.RightRefID )
- return OverlapsRegion;
-
- // alignment is on any reference after right boundary
- else if ( alignment.RefID > m_region.RightRefID )
- return AfterRegion;
-
- // alignment is on right bound reference
- else {
-
- // if alignment starts before right bound position
- if ( alignment.Position < m_region.RightPosition )
- return OverlapsRegion;
- else
- return AfterRegion;
- }
- }
-
- // otherwise, alignment starts after left bound and there is no right bound given
- else return OverlapsRegion;
- }
-}
-
-void BamRandomAccessController::Close(void) {
- ClearIndex();
- ClearRegion();
-}
-
-void BamRandomAccessController::ClearIndex(void) {
- if ( m_index ) {
- delete m_index;
- m_index = 0;
- }
-}
-
-void BamRandomAccessController::ClearRegion(void) {
- m_region.clear();
- m_hasAlignmentsInRegion = true;
-}
-
-bool BamRandomAccessController::CreateIndex(BamReaderPrivate* reader,
- const BamIndex::IndexType& type)
-{
- // skip if reader is invalid
- assert(reader);
- if ( !reader->IsOpen() ) {
- SetErrorString("BamRandomAccessController::CreateIndex",
- "cannot create index for unopened reader");
- return false;
- }
-
- // create new index of requested type
- BamIndex* newIndex = BamIndexFactory::CreateIndexOfType(type, reader);
- if ( newIndex == 0 ) {
- stringstream s("");
- s << "could not create index of type: " << type;
- SetErrorString("BamRandomAccessController::CreateIndex", s.str());
- return false;
- }
-
- // attempt to build index from current BamReader file
- if ( !newIndex->Create() ) {
- const string indexError = newIndex->GetErrorString();
- const string message = "could not create index: \n\t" + indexError;
- SetErrorString("BamRandomAccessController::CreateIndex", message);
- return false;
- }
-
- // save new index & return success
- SetIndex(newIndex);
- return true;
-}
-
-string BamRandomAccessController::GetErrorString(void) const {
- return m_errorString;
-}
-
-bool BamRandomAccessController::HasIndex(void) const {
- return ( m_index != 0 );
-}
-
-bool BamRandomAccessController::HasRegion(void) const {
- return ( !m_region.isNull() );
-}
-
-bool BamRandomAccessController::IndexHasAlignmentsForReference(const int& refId) {
- return m_index->HasAlignments(refId);
-}
-
-bool BamRandomAccessController::LocateIndex(BamReaderPrivate* reader,
- const BamIndex::IndexType& preferredType)
-{
- // look up index filename, deferring to preferredType if possible
- assert(reader);
- const string& indexFilename = BamIndexFactory::FindIndexFilename(reader->Filename(), preferredType);
-
- // if no index file found (of any type)
- if ( indexFilename.empty() ) {
- const string message = string("could not find index file for:") + reader->Filename();
- SetErrorString("BamRandomAccessController::LocateIndex", message);
- return false;
- }
-
- // otherwise open & use index file that was found
- return OpenIndex(indexFilename, reader);
-}
-
-bool BamRandomAccessController::OpenIndex(const string& indexFilename, BamReaderPrivate* reader) {
-
- // attempt create new index of type based on filename
- BamIndex* index = BamIndexFactory::CreateIndexFromFilename(indexFilename, reader);
- if ( index == 0 ) {
- const string message = string("could not open index file: ") + indexFilename;
- SetErrorString("BamRandomAccessController::OpenIndex", message);
- return false;
- }
-
- // attempt to load data from index file
- if ( !index->Load(indexFilename) ) {
- const string indexError = index->GetErrorString();
- const string message = string("could not load index data from file: ") + indexFilename +
- "\n\t" + indexError;
- SetErrorString("BamRandomAccessController::OpenIndex", message);
- return false;
- }
-
- // save new index & return success
- SetIndex(index);
- return true;
-}
-
-bool BamRandomAccessController::RegionHasAlignments(void) const {
- return m_hasAlignmentsInRegion;
-}
-
-void BamRandomAccessController::SetErrorString(const string& where, const string& what) {
- m_errorString = where + ": " + what;
-}
-
-void BamRandomAccessController::SetIndex(BamIndex* index) {
- if ( m_index )
- ClearIndex();
- m_index = index;
-}
-
-bool BamRandomAccessController::SetRegion(const BamRegion& region, const int& referenceCount) {
-
- // store region
- m_region = region;
-
- // cannot jump when no index is available
- if ( !HasIndex() ) {
- SetErrorString("BamRandomAccessController", "cannot jump if no index data available");
- return false;
- }
-
- // adjust region as necessary to reflect where data actually begins
- AdjustRegion(referenceCount);
-
- // if no data present, return true
- // * Not an error, but future attempts to access alignments in this region will not return data
- // Returning true is useful in a BamMultiReader setting where some BAM files may
- // lack alignments in regions where other files still have data available.
- if ( !m_hasAlignmentsInRegion )
- return true;
-
- // return success/failure of jump to specified region,
- //
- // * Index::Jump() is allowed to modify the m_hasAlignmentsInRegion flag
- // This covers 'corner case' where a region is requested that lies beyond the last
- // alignment on a reference. If this occurs, any subsequent calls to GetNextAlignment[Core]
- // will not return data. BamMultiReader will still be able to successfully pull alignments
- // from a region from other files even if this one has no data.
- if ( !m_index->Jump(m_region, &m_hasAlignmentsInRegion) ) {
- const string indexError = m_index->GetErrorString();
- const string message = string("could not set region\n\t") + indexError;
- SetErrorString("BamRandomAccessController::OpenIndex", message);
- return false;
- }
- else
- return true;
-}
+++ /dev/null
-// ***************************************************************************
-// BamRandomAccessController_p.h (c) 2011 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011(DB)
-// ---------------------------------------------------------------------------
-// Manages random access operations in a BAM file
-// ***************************************************************************
-
-#ifndef BAMRACONTROLLER_P_H
-#define BAMRACONTROLLER_P_H
-
-// -------------
-// W A R N I N G
-// -------------
-//
-// This file is not part of the BamTools API. It exists purely as an
-// implementation detail. This header file may change from version to version
-// without notice, or even be removed.
-//
-// We mean it.
-
-#include "api/BamAux.h"
-#include "api/BamIndex.h"
-
-namespace BamTools {
-
-class BamAlignment;
-
-namespace Internal {
-
-class BamReaderPrivate;
-
-class BamRandomAccessController {
-
- // enums
- public: enum RegionState { BeforeRegion = 0
- , OverlapsRegion
- , AfterRegion
- };
-
- // ctor & dtor
- public:
- BamRandomAccessController(void);
- ~BamRandomAccessController(void);
-
- // BamRandomAccessController interface
- public:
-
- // index methods
- void ClearIndex(void);
- bool CreateIndex(BamReaderPrivate* reader, const BamIndex::IndexType& type);
- bool HasIndex(void) const;
- bool IndexHasAlignmentsForReference(const int& refId);
- bool LocateIndex(BamReaderPrivate* reader, const BamIndex::IndexType& preferredType);
- bool OpenIndex(const std::string& indexFilename, BamReaderPrivate* reader);
- void SetIndex(BamIndex* index);
-
- // region methods
- void ClearRegion(void);
- bool HasRegion(void) const;
- RegionState AlignmentState(const BamAlignment& alignment) const;
- bool RegionHasAlignments(void) const;
- bool SetRegion(const BamRegion& region, const int& referenceCount);
-
- // general methods
- void Close(void);
- std::string GetErrorString(void) const;
-
- // internal methods
- private:
- // adjusts requested region if necessary (depending on where data actually begins)
- void AdjustRegion(const int& referenceCount);
- // error-string handling
- void SetErrorString(const std::string& where, const std::string& what);
-
- // data members
- private:
-
- // index data
- BamIndex* m_index; // owns the index, not a copy - responsible for deleting
-
- // region data
- BamRegion m_region;
- bool m_hasAlignmentsInRegion;
-
- // general data
- std::string m_errorString;
-};
-
-} // namespace Internal
-} // namespace BamTools
-
-#endif // BAMRACONTROLLER_P_H
+++ /dev/null
-// ***************************************************************************
-// BamReader_p.cpp (c) 2009 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 14 November 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides the basic functionality for reading BAM files
-// ***************************************************************************
-
-#include "api/BamConstants.h"
-#include "api/BamReader.h"
-#include "api/IBamIODevice.h"
-#include "api/internal/BamDeviceFactory_p.h"
-#include "api/internal/BamException_p.h"
-#include "api/internal/BamHeader_p.h"
-#include "api/internal/BamRandomAccessController_p.h"
-#include "api/internal/BamReader_p.h"
-#include "api/internal/BamStandardIndex_p.h"
-#include "api/internal/BamToolsIndex_p.h"
-using namespace BamTools;
-using namespace BamTools::Internal;
-
-#include <algorithm>
-#include <cassert>
-#include <iostream>
-#include <iterator>
-#include <vector>
-using namespace std;
-
-// constructor
-BamReaderPrivate::BamReaderPrivate(BamReader* parent)
- : m_alignmentsBeginOffset(0)
- , m_parent(parent)
-{
- m_isBigEndian = BamTools::SystemIsBigEndian();
-}
-
-// destructor
-BamReaderPrivate::~BamReaderPrivate(void) {
- Close();
-}
-
-// closes the BAM file
-bool BamReaderPrivate::Close(void) {
-
- // clear BAM metadata
- m_references.clear();
- m_header.Clear();
-
- // clear filename
- m_filename.clear();
-
- // close random access controller
- m_randomAccessController.Close();
-
- // if stream is open, attempt close
- if ( IsOpen() ) {
- try {
- m_stream.Close();
- } catch ( BamException& e ) {
- const string streamError = e.what();
- const string message = string("encountered error closing BAM file: \n\t") + streamError;
- SetErrorString("BamReader::Close", message);
- return false;
- }
- }
-
- // return success
- return true;
-}
-
-// creates an index file of requested type on current BAM file
-bool BamReaderPrivate::CreateIndex(const BamIndex::IndexType& type) {
-
- // skip if BAM file not open
- if ( !IsOpen() ) {
- SetErrorString("BamReader::CreateIndex", "cannot create index on unopened BAM file");
- return false;
- }
-
- // attempt to create index
- if ( m_randomAccessController.CreateIndex(this, type) )
- return true;
- else {
- const string bracError = m_randomAccessController.GetErrorString();
- const string message = string("could not create index: \n\t") + bracError;
- SetErrorString("BamReader::CreateIndex", message);
- return false;
- }
-}
-
-// return path & filename of current BAM file
-const string BamReaderPrivate::Filename(void) const {
- return m_filename;
-}
-
-string BamReaderPrivate::GetErrorString(void) const {
- return m_errorString;
-}
-
-// return header data as std::string
-string BamReaderPrivate::GetHeaderText(void) const {
- return m_header.ToString();
-}
-
-// return header data as SamHeader object
-SamHeader BamReaderPrivate::GetSamHeader(void) const {
- return m_header.ToSamHeader();
-}
-
-// get next alignment (with character data fully parsed)
-bool BamReaderPrivate::GetNextAlignment(BamAlignment& alignment) {
-
- // if valid alignment found
- if ( GetNextAlignmentCore(alignment) ) {
-
- // store alignment's "source" filename
- alignment.Filename = m_filename;
-
- // return success/failure of parsing char data
- if ( alignment.BuildCharData() )
- return true;
- else {
- const string alError = alignment.GetErrorString();
- const string message = string("could not populate alignment data: \n\t") + alError;
- SetErrorString("BamReader::GetNextAlignment", message);
- return false;
- }
- }
-
- // no valid alignment found
- return false;
-}
-
-// retrieves next available alignment core data (returns success/fail)
-// ** DOES NOT populate any character data fields (read name, bases, qualities, tag data, filename)
-// these can be accessed, if necessary, from the supportData
-// useful for operations requiring ONLY positional or other alignment-related information
-bool BamReaderPrivate::GetNextAlignmentCore(BamAlignment& alignment) {
-
- // skip if stream not opened
- if ( !m_stream.IsOpen() )
- return false;
-
- try {
-
- // skip if region is set but has no alignments
- if ( m_randomAccessController.HasRegion() &&
- !m_randomAccessController.RegionHasAlignments() )
- {
- return false;
- }
-
- // if can't read next alignment
- if ( !LoadNextAlignment(alignment) )
- return false;
-
- // check alignment's region-overlap state
- BamRandomAccessController::RegionState state = m_randomAccessController.AlignmentState(alignment);
-
- // if alignment starts after region, no need to keep reading
- if ( state == BamRandomAccessController::AfterRegion )
- return false;
-
- // read until overlap is found
- while ( state != BamRandomAccessController::OverlapsRegion ) {
-
- // if can't read next alignment
- if ( !LoadNextAlignment(alignment) )
- return false;
-
- // check alignment's region-overlap state
- state = m_randomAccessController.AlignmentState(alignment);
-
- // if alignment starts after region, no need to keep reading
- if ( state == BamRandomAccessController::AfterRegion )
- return false;
- }
-
- // if we get here, we found the next 'valid' alignment
- // (e.g. overlaps current region if one was set, simply the next alignment if not)
- alignment.SupportData.HasCoreOnly = true;
- return true;
-
- } catch ( BamException& e ) {
- const string streamError = e.what();
- const string message = string("encountered error reading BAM alignment: \n\t") + streamError;
- SetErrorString("BamReader::GetNextAlignmentCore", message);
- return false;
- }
-}
-
-int BamReaderPrivate::GetReferenceCount(void) const {
- return m_references.size();
-}
-
-const RefVector& BamReaderPrivate::GetReferenceData(void) const {
- return m_references;
-}
-
-// returns RefID for given RefName (returns References.size() if not found)
-int BamReaderPrivate::GetReferenceID(const string& refName) const {
-
- // retrieve names from reference data
- vector<string> refNames;
- RefVector::const_iterator refIter = m_references.begin();
- RefVector::const_iterator refEnd = m_references.end();
- for ( ; refIter != refEnd; ++refIter)
- refNames.push_back( (*refIter).RefName );
-
- // return 'index-of' refName (or -1 if not found)
- int index = distance(refNames.begin(), find(refNames.begin(), refNames.end(), refName));
- if ( index == (int)m_references.size() ) return -1;
- else return index;
-}
-
-bool BamReaderPrivate::HasIndex(void) const {
- return m_randomAccessController.HasIndex();
-}
-
-bool BamReaderPrivate::IsOpen(void) const {
- return m_stream.IsOpen();
-}
-
-// load BAM header data
-void BamReaderPrivate::LoadHeaderData(void) {
- m_header.Load(&m_stream);
-}
-
-// populates BamAlignment with alignment data under file pointer, returns success/fail
-bool BamReaderPrivate::LoadNextAlignment(BamAlignment& alignment) {
-
- // read in the 'block length' value, make sure it's not zero
- char buffer[sizeof(uint32_t)];
- m_stream.Read(buffer, sizeof(uint32_t));
- alignment.SupportData.BlockLength = BamTools::UnpackUnsignedInt(buffer);
- if ( m_isBigEndian ) BamTools::SwapEndian_32(alignment.SupportData.BlockLength);
- if ( alignment.SupportData.BlockLength == 0 )
- return false;
-
- // read in core alignment data, make sure the right size of data was read
- char x[Constants::BAM_CORE_SIZE];
- if ( m_stream.Read(x, Constants::BAM_CORE_SIZE) != Constants::BAM_CORE_SIZE )
- return false;
-
- // swap core endian-ness if necessary
- if ( m_isBigEndian ) {
- for ( unsigned int i = 0; i < Constants::BAM_CORE_SIZE; i+=sizeof(uint32_t) )
- BamTools::SwapEndian_32p(&x[i]);
- }
-
- // set BamAlignment 'core' and 'support' data
- alignment.RefID = BamTools::UnpackSignedInt(&x[0]);
- alignment.Position = BamTools::UnpackSignedInt(&x[4]);
-
- unsigned int tempValue = BamTools::UnpackUnsignedInt(&x[8]);
- alignment.Bin = tempValue >> 16;
- alignment.MapQuality = tempValue >> 8 & 0xff;
- alignment.SupportData.QueryNameLength = tempValue & 0xff;
-
- tempValue = BamTools::UnpackUnsignedInt(&x[12]);
- alignment.AlignmentFlag = tempValue >> 16;
- alignment.SupportData.NumCigarOperations = tempValue & 0xffff;
-
- alignment.SupportData.QuerySequenceLength = BamTools::UnpackUnsignedInt(&x[16]);
- alignment.MateRefID = BamTools::UnpackSignedInt(&x[20]);
- alignment.MatePosition = BamTools::UnpackSignedInt(&x[24]);
- alignment.InsertSize = BamTools::UnpackSignedInt(&x[28]);
-
- // set BamAlignment length
- alignment.Length = alignment.SupportData.QuerySequenceLength;
-
- // read in character data - make sure proper data size was read
- bool readCharDataOK = false;
- const unsigned int dataLength = alignment.SupportData.BlockLength - Constants::BAM_CORE_SIZE;
- RaiiBuffer allCharData(dataLength);
-
- if ( m_stream.Read(allCharData.Buffer, dataLength) == dataLength ) {
-
- // store 'allCharData' in supportData structure
- alignment.SupportData.AllCharData.assign((const char*)allCharData.Buffer, dataLength);
-
- // set success flag
- readCharDataOK = true;
-
- // save CIGAR ops
- // need to calculate this here so that BamAlignment::GetEndPosition() performs correctly,
- // even when GetNextAlignmentCore() is called
- const unsigned int cigarDataOffset = alignment.SupportData.QueryNameLength;
- uint32_t* cigarData = (uint32_t*)(allCharData.Buffer + cigarDataOffset);
- CigarOp op;
- alignment.CigarData.clear();
- alignment.CigarData.reserve(alignment.SupportData.NumCigarOperations);
- for ( unsigned int i = 0; i < alignment.SupportData.NumCigarOperations; ++i ) {
-
- // swap endian-ness if necessary
- if ( m_isBigEndian ) BamTools::SwapEndian_32(cigarData[i]);
-
- // build CigarOp structure
- op.Length = (cigarData[i] >> Constants::BAM_CIGAR_SHIFT);
- op.Type = Constants::BAM_CIGAR_LOOKUP[ (cigarData[i] & Constants::BAM_CIGAR_MASK) ];
-
- // save CigarOp
- alignment.CigarData.push_back(op);
- }
- }
-
- // return success/failure
- return readCharDataOK;
-}
-
-// loads reference data from BAM file
-bool BamReaderPrivate::LoadReferenceData(void) {
-
- // get number of reference sequences
- char buffer[sizeof(uint32_t)];
- m_stream.Read(buffer, sizeof(uint32_t));
- uint32_t numberRefSeqs = BamTools::UnpackUnsignedInt(buffer);
- if ( m_isBigEndian ) BamTools::SwapEndian_32(numberRefSeqs);
- m_references.reserve((int)numberRefSeqs);
-
- // iterate over all references in header
- for ( unsigned int i = 0; i != numberRefSeqs; ++i ) {
-
- // get length of reference name
- m_stream.Read(buffer, sizeof(uint32_t));
- uint32_t refNameLength = BamTools::UnpackUnsignedInt(buffer);
- if ( m_isBigEndian ) BamTools::SwapEndian_32(refNameLength);
- RaiiBuffer refName(refNameLength);
-
- // get reference name and reference sequence length
- m_stream.Read(refName.Buffer, refNameLength);
- m_stream.Read(buffer, sizeof(int32_t));
- int32_t refLength = BamTools::UnpackSignedInt(buffer);
- if ( m_isBigEndian ) BamTools::SwapEndian_32(refLength);
-
- // store data for reference
- RefData aReference;
- aReference.RefName = (string)((const char*)refName.Buffer);
- aReference.RefLength = refLength;
- m_references.push_back(aReference);
- }
-
- // return success
- return true;
-}
-
-bool BamReaderPrivate::LocateIndex(const BamIndex::IndexType& preferredType) {
-
- if ( m_randomAccessController.LocateIndex(this, preferredType) )
- return true;
- else {
- const string bracError = m_randomAccessController.GetErrorString();
- const string message = string("could not locate index: \n\t") + bracError;
- SetErrorString("BamReader::LocateIndex", message);
- return false;
- }
-}
-
-// opens BAM file (and index)
-bool BamReaderPrivate::Open(const string& filename) {
-
- try {
-
- // make sure we're starting with fresh state
- Close();
-
- // open BgzfStream
- m_stream.Open(filename, IBamIODevice::ReadOnly);
-
- // load BAM metadata
- LoadHeaderData();
- LoadReferenceData();
-
- // store filename & offset of first alignment
- m_filename = filename;
- m_alignmentsBeginOffset = m_stream.Tell();
-
- // return success
- return true;
-
- } catch ( BamException& e ) {
- const string error = e.what();
- const string message = string("could not open file: ") + filename +
- "\n\t" + error;
- SetErrorString("BamReader::Open", message);
- return false;
- }
-}
-
-bool BamReaderPrivate::OpenIndex(const std::string& indexFilename) {
-
- if ( m_randomAccessController.OpenIndex(indexFilename, this) )
- return true;
- else {
- const string bracError = m_randomAccessController.GetErrorString();
- const string message = string("could not open index: \n\t") + bracError;
- SetErrorString("BamReader::OpenIndex", message);
- return false;
- }
-}
-
-// returns BAM file pointer to beginning of alignment data
-bool BamReaderPrivate::Rewind(void) {
-
- // reset region
- m_randomAccessController.ClearRegion();
-
- // return status of seeking back to first alignment
- if ( Seek(m_alignmentsBeginOffset) )
- return true;
- else {
- const string currentError = m_errorString;
- const string message = string("could not rewind: \n\t") + currentError;
- SetErrorString("BamReader::Rewind", message);
- return false;
- }
-}
-
-bool BamReaderPrivate::Seek(const int64_t& position) {
-
- // skip if BAM file not open
- if ( !IsOpen() ) {
- SetErrorString("BamReader::Seek", "cannot seek on unopened BAM file");
- return false;
- }
-
- try {
- m_stream.Seek(position);
- return true;
- }
- catch ( BamException& e ) {
- const string streamError = e.what();
- const string message = string("could not seek in BAM file: \n\t") + streamError;
- SetErrorString("BamReader::Seek", message);
- return false;
- }
-}
-
-void BamReaderPrivate::SetErrorString(const string& where, const string& what) {
- static const string SEPARATOR = ": ";
- m_errorString = where + SEPARATOR + what;
-}
-
-void BamReaderPrivate::SetIndex(BamIndex* index) {
- m_randomAccessController.SetIndex(index);
-}
-
-// sets current region & attempts to jump to it
-// returns success/failure
-bool BamReaderPrivate::SetRegion(const BamRegion& region) {
-
- if ( m_randomAccessController.SetRegion(region, m_references.size()) )
- return true;
- else {
- const string bracError = m_randomAccessController.GetErrorString();
- const string message = string("could not set region: \n\t") + bracError;
- SetErrorString("BamReader::SetRegion", message);
- return false;
- }
-}
-
-int64_t BamReaderPrivate::Tell(void) const {
- return m_stream.Tell();
-}
+++ /dev/null
-// ***************************************************************************
-// BamReader_p.h (c) 2010 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides the basic functionality for reading BAM files
-// ***************************************************************************
-
-#ifndef BAMREADER_P_H
-#define BAMREADER_P_H
-
-// -------------
-// W A R N I N G
-// -------------
-//
-// This file is not part of the BamTools API. It exists purely as an
-// implementation detail. This header file may change from version to version
-// without notice, or even be removed.
-//
-// We mean it.
-
-#include "api/BamAlignment.h"
-#include "api/BamIndex.h"
-#include "api/BamReader.h"
-#include "api/SamHeader.h"
-#include "api/internal/BamHeader_p.h"
-#include "api/internal/BamRandomAccessController_p.h"
-#include "api/internal/BgzfStream_p.h"
-#include <string>
-
-namespace BamTools {
-namespace Internal {
-
-class BamReaderPrivate {
-
- // ctor & dtor
- public:
- BamReaderPrivate(BamReader* parent);
- ~BamReaderPrivate(void);
-
- // BamReader interface
- public:
-
- // file operations
- bool Close(void);
- const std::string Filename(void) const;
- bool IsOpen(void) const;
- bool Open(const std::string& filename);
- bool Rewind(void);
- bool SetRegion(const BamRegion& region);
-
- // access alignment data
- bool GetNextAlignment(BamAlignment& alignment);
- bool GetNextAlignmentCore(BamAlignment& alignment);
-
- // access auxiliary data
- std::string GetHeaderText(void) const;
- SamHeader GetSamHeader(void) const;
- int GetReferenceCount(void) const;
- const RefVector& GetReferenceData(void) const;
- int GetReferenceID(const std::string& refName) const;
-
- // index operations
- bool CreateIndex(const BamIndex::IndexType& type);
- bool HasIndex(void) const;
- bool LocateIndex(const BamIndex::IndexType& preferredType);
- bool OpenIndex(const std::string& indexFilename);
- void SetIndex(BamIndex* index);
-
- // error handling
- std::string GetErrorString(void) const;
- void SetErrorString(const std::string& where, const std::string& what);
-
- // internal methods, but available as a BamReaderPrivate 'interface'
- //
- // these methods should only be used by BamTools::Internal classes
- // (currently only used by the BamIndex subclasses)
- public:
- // retrieves header text from BAM file
- void LoadHeaderData(void);
- // retrieves BAM alignment under file pointer
- // (does no overlap checking or character data parsing)
- bool LoadNextAlignment(BamAlignment& alignment);
- // builds reference data structure from BAM file
- bool LoadReferenceData(void);
- // seek reader to file position
- bool Seek(const int64_t& position);
- // return reader's file position
- int64_t Tell(void) const;
-
- // data members
- public:
-
- // general BAM file data
- int64_t m_alignmentsBeginOffset;
- std::string m_filename;
- RefVector m_references;
-
- // system data
- bool m_isBigEndian;
-
- // parent BamReader
- BamReader* m_parent;
-
- // BamReaderPrivate components
- BamHeader m_header;
- BamRandomAccessController m_randomAccessController;
- BgzfStream m_stream;
-
- // error handling
- std::string m_errorString;
-};
-
-} // namespace Internal
-} // namespace BamTools
-
-#endif // BAMREADER_P_H
+++ /dev/null
-// ***************************************************************************
-// BamStandardIndex.cpp (c) 2010 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides index operations for the standardized BAM index format (".bai")
-// ***************************************************************************
-
-#include "api/BamAlignment.h"
-#include "api/internal/BamException_p.h"
-#include "api/internal/BamReader_p.h"
-#include "api/internal/BamStandardIndex_p.h"
-using namespace BamTools;
-using namespace BamTools::Internal;
-
-#include <cstdio>
-#include <cstdlib>
-#include <cstring>
-#include <algorithm>
-#include <sstream>
-using namespace std;
-
-// -----------------------------------
-// static BamStandardIndex constants
-// -----------------------------------
-
-const int BamStandardIndex::MAX_BIN = 37450; // =(8^6-1)/7+1
-const int BamStandardIndex::BAM_LIDX_SHIFT = 14;
-const string BamStandardIndex::BAI_EXTENSION = ".bai";
-const char* const BamStandardIndex::BAI_MAGIC = "BAI\1";
-const int BamStandardIndex::SIZEOF_ALIGNMENTCHUNK = sizeof(uint64_t)*2;
-const int BamStandardIndex::SIZEOF_BINCORE = sizeof(uint32_t) + sizeof(int32_t);
-const int BamStandardIndex::SIZEOF_LINEAROFFSET = sizeof(uint64_t);
-
-// ----------------------------
-// RaiiWrapper implementation
-// ----------------------------
-
-BamStandardIndex::RaiiWrapper::RaiiWrapper(void)
- : IndexStream(0)
- , Buffer(0)
-{ }
-
-BamStandardIndex::RaiiWrapper::~RaiiWrapper(void) {
-
- if ( IndexStream ) {
- fclose(IndexStream);
- IndexStream = 0;
- }
-
- if ( Buffer ) {
- delete[] Buffer;
- Buffer = 0;
- }
-}
-
-// ---------------------------------
-// BamStandardIndex implementation
-// ---------------------------------
-
-// ctor
-BamStandardIndex::BamStandardIndex(Internal::BamReaderPrivate* reader)
- : BamIndex(reader)
- , m_bufferLength(0)
-{
- m_isBigEndian = BamTools::SystemIsBigEndian();
-}
-
-// dtor
-BamStandardIndex::~BamStandardIndex(void) {
- CloseFile();
-}
-
-void BamStandardIndex::AdjustRegion(const BamRegion& region, uint32_t& begin, uint32_t& end) {
-
- // retrieve references from reader
- const RefVector& references = m_reader->GetReferenceData();
-
- // LeftPosition cannot be greater than or equal to reference length
- if ( region.LeftPosition >= references.at(region.LeftRefID).RefLength )
- throw BamException("BamStandardIndex::AdjustRegion", "invalid region requested");
-
- // set region 'begin'
- begin = (unsigned int)region.LeftPosition;
-
- // if right bound specified AND left&right bounds are on same reference
- // OK to use right bound position as region 'end'
- if ( region.isRightBoundSpecified() && ( region.LeftRefID == region.RightRefID ) )
- end = (unsigned int)region.RightPosition;
-
- // otherwise, set region 'end' to last reference base
- else end = (unsigned int)references.at(region.LeftRefID).RefLength;
-}
-
-// [begin, end)
-void BamStandardIndex::CalculateCandidateBins(const uint32_t& begin,
- const uint32_t& end,
- set<uint16_t>& candidateBins)
-{
- // initialize list, bin '0' is always a valid bin
- candidateBins.insert(0);
-
- // get rest of bins that contain this region
- unsigned int k;
- for (k = 1 + (begin>>26); k <= 1 + (end>>26); ++k) { candidateBins.insert(k); }
- for (k = 9 + (begin>>23); k <= 9 + (end>>23); ++k) { candidateBins.insert(k); }
- for (k = 73 + (begin>>20); k <= 73 + (end>>20); ++k) { candidateBins.insert(k); }
- for (k = 585 + (begin>>17); k <= 585 + (end>>17); ++k) { candidateBins.insert(k); }
- for (k = 4681 + (begin>>14); k <= 4681 + (end>>14); ++k) { candidateBins.insert(k); }
-}
-
-void BamStandardIndex::CalculateCandidateOffsets(const BaiReferenceSummary& refSummary,
- const uint64_t& minOffset,
- set<uint16_t>& candidateBins,
- vector<int64_t>& offsets)
-{
- // seek to first bin
- Seek(refSummary.FirstBinFilePosition, SEEK_SET);
-
- // iterate over reference bins
- uint32_t binId;
- int32_t numAlignmentChunks;
- set<uint16_t>::iterator candidateBinIter;
- for ( int i = 0; i < refSummary.NumBins; ++i ) {
-
- // read bin contents (if successful, alignment chunks are now in m_buffer)
- ReadBinIntoBuffer(binId, numAlignmentChunks);
-
- // see if bin is a 'candidate bin'
- candidateBinIter = candidateBins.find(binId);
-
- // if not, move on to next bin
- if ( candidateBinIter == candidateBins.end() )
- continue;
-
- // otherwise, check bin's contents against for overlap
- else {
-
- size_t offset = 0;
- uint64_t chunkStart;
- uint64_t chunkStop;
-
- // iterate over alignment chunks
- for ( int j = 0; j < numAlignmentChunks; ++j ) {
-
- // read chunk start & stop from buffer
- memcpy((char*)&chunkStart, Resources.Buffer+offset, sizeof(uint64_t));
- offset += sizeof(uint64_t);
- memcpy((char*)&chunkStop, Resources.Buffer+offset, sizeof(uint64_t));
- offset += sizeof(uint64_t);
-
- // swap endian-ness if necessary
- if ( m_isBigEndian ) {
- SwapEndian_64(chunkStart);
- SwapEndian_64(chunkStop);
- }
-
- // store alignment chunk's start offset
- // if its stop offset is larger than our 'minOffset'
- if ( chunkStop >= minOffset )
- offsets.push_back(chunkStart);
- }
-
- // 'pop' bin ID from candidate bins set
- candidateBins.erase(candidateBinIter);
-
- // quit if no more candidates
- if ( candidateBins.empty() )
- break;
- }
- }
-}
-
-uint64_t BamStandardIndex::CalculateMinOffset(const BaiReferenceSummary& refSummary,
- const uint32_t& begin)
-{
- // if no linear offsets exist, return 0
- if ( refSummary.NumLinearOffsets == 0 )
- return 0;
-
- // if 'begin' starts beyond last linear offset, use the last linear offset as minimum
- // else use the offset corresponding to the requested start position
- const int shiftedBegin = begin>>BamStandardIndex::BAM_LIDX_SHIFT;
- if ( shiftedBegin >= refSummary.NumLinearOffsets )
- return LookupLinearOffset( refSummary, refSummary.NumLinearOffsets-1 );
- else
- return LookupLinearOffset( refSummary, shiftedBegin );
-}
-
-void BamStandardIndex::CheckBufferSize(char*& buffer,
- unsigned int& bufferLength,
- const unsigned int& requestedBytes)
-{
- try {
- if ( requestedBytes > bufferLength ) {
- bufferLength = requestedBytes + 10;
- delete[] buffer;
- buffer = new char[bufferLength];
- }
- } catch ( std::bad_alloc& ) {
- stringstream s("");
- s << "out of memory when allocating " << requestedBytes << " bytes";
- throw BamException("BamStandardIndex::CheckBufferSize", s.str());
- }
-}
-
-void BamStandardIndex::CheckBufferSize(unsigned char*& buffer,
- unsigned int& bufferLength,
- const unsigned int& requestedBytes)
-{
- try {
- if ( requestedBytes > bufferLength ) {
- bufferLength = requestedBytes + 10;
- delete[] buffer;
- buffer = new unsigned char[bufferLength];
- }
- } catch ( std::bad_alloc& ) {
- stringstream s("");
- s << "out of memory when allocating " << requestedBytes << " bytes";
- throw BamException("BamStandardIndex::CheckBufferSize", s.str());
- }
-}
-
-void BamStandardIndex::CheckMagicNumber(void) {
-
- // check 'magic number' to see if file is BAI index
- char magic[4];
- const size_t elementsRead = fread(magic, sizeof(char), 4, Resources.IndexStream);
- if ( elementsRead != 4 )
- throw BamException("BamStandardIndex::CheckMagicNumber", "could not read BAI magic number");
-
- // compare to expected value
- if ( strncmp(magic, BamStandardIndex::BAI_MAGIC, 4) != 0 )
- throw BamException("BamStandardIndex::CheckMagicNumber", "invalid BAI magic number");
-}
-
-void BamStandardIndex::ClearReferenceEntry(BaiReferenceEntry& refEntry) {
- refEntry.ID = -1;
- refEntry.Bins.clear();
- refEntry.LinearOffsets.clear();
-}
-
-void BamStandardIndex::CloseFile(void) {
-
- // close file stream
- if ( IsFileOpen() ) {
- fclose(Resources.IndexStream);
- Resources.IndexStream = 0;
- }
-
- // clear index file summary data
- m_indexFileSummary.clear();
-
- // clean up I/O buffer
- delete[] Resources.Buffer;
- Resources.Buffer = 0;
- m_bufferLength = 0;
-}
-
-// builds index from associated BAM file & writes out to index file
-bool BamStandardIndex::Create(void) {
-
- // skip if BamReader is invalid or not open
- if ( m_reader == 0 || !m_reader->IsOpen() ) {
- SetErrorString("BamStandardIndex::Create", "could not create index: reader is not open");
- return false;
- }
-
- // rewind BamReader
- if ( !m_reader->Rewind() ) {
- const string readerError = m_reader->GetErrorString();
- const string message = "could not create index: \n\t" + readerError;
- SetErrorString("BamStandardIndex::Create", message);
- return false;
- }
-
- try {
-
- // open new index file (read & write)
- string indexFilename = m_reader->Filename() + Extension();
- OpenFile(indexFilename, "w+b");
-
- // initialize BaiFileSummary with number of references
- const int& numReferences = m_reader->GetReferenceCount();
- ReserveForSummary(numReferences);
-
- // initialize output file
- WriteHeader();
-
- // set up bin, ID, offset, & coordinate markers
- const uint32_t defaultValue = 0xffffffffu;
- uint32_t currentBin = defaultValue;
- uint32_t lastBin = defaultValue;
- int32_t currentRefID = defaultValue;
- int32_t lastRefID = defaultValue;
- uint64_t currentOffset = (uint64_t)m_reader->Tell();
- uint64_t lastOffset = currentOffset;
- int32_t lastPosition = defaultValue;
-
- // iterate through alignments in BAM file
- BamAlignment al;
- BaiReferenceEntry refEntry;
- while ( m_reader->LoadNextAlignment(al) ) {
-
- // changed to new reference
- if ( lastRefID != al.RefID ) {
-
- // if not first reference, save previous reference data
- if ( lastRefID != (int32_t)defaultValue ) {
-
- SaveAlignmentChunkToBin(refEntry.Bins, currentBin, currentOffset, lastOffset);
- WriteReferenceEntry(refEntry);
- ClearReferenceEntry(refEntry);
-
- // write any empty references between (but *NOT* including) lastRefID & al.RefID
- for ( int i = lastRefID+1; i < al.RefID; ++i ) {
- BaiReferenceEntry emptyEntry(i);
- WriteReferenceEntry(emptyEntry);
- }
-
- // update bin markers
- currentOffset = lastOffset;
- currentBin = al.Bin;
- lastBin = al.Bin;
- currentRefID = al.RefID;
- }
-
- // otherwise, this is first pass
- // be sure to write any empty references up to (but *NOT* including) current RefID
- else {
- for ( int i = 0; i < al.RefID; ++i ) {
- BaiReferenceEntry emptyEntry(i);
- WriteReferenceEntry(emptyEntry);
- }
- }
-
- // update reference markers
- refEntry.ID = al.RefID;
- lastRefID = al.RefID;
- lastBin = defaultValue;
- }
-
- // if lastPosition greater than current alignment position - file not sorted properly
- else if ( lastPosition > al.Position ) {
- stringstream s("");
- s << "BAM file is not properly sorted by coordinate" << endl
- << "Current alignment position: " << al.Position
- << " < previous alignment position: " << lastPosition
- << " on reference ID: " << al.RefID << endl;
- SetErrorString("BamStandardIndex::Create", s.str());
- return false;
- }
-
- // if alignment's ref ID is valid & its bin is not a 'leaf'
- if ( (al.RefID >= 0) && (al.Bin < 4681) )
- SaveLinearOffsetEntry(refEntry.LinearOffsets, al.Position, al.GetEndPosition(), lastOffset);
-
- // changed to new BAI bin
- if ( al.Bin != lastBin ) {
-
- // if not first bin on reference, save previous bin data
- if ( currentBin != defaultValue )
- SaveAlignmentChunkToBin(refEntry.Bins, currentBin, currentOffset, lastOffset);
-
- // update markers
- currentOffset = lastOffset;
- currentBin = al.Bin;
- lastBin = al.Bin;
- currentRefID = al.RefID;
-
- // if invalid RefID, break out
- if ( currentRefID < 0 )
- break;
- }
-
- // make sure that current file pointer is beyond lastOffset
- if ( m_reader->Tell() <= (int64_t)lastOffset ) {
- SetErrorString("BamStandardIndex::Create", "calculating offsets failed");
- return false;
- }
-
- // update lastOffset & lastPosition
- lastOffset = m_reader->Tell();
- lastPosition = al.Position;
- }
-
- // after finishing alignments, if any data was read, check:
- if ( currentRefID >= 0 ) {
-
- // store last alignment chunk to its bin, then write last reference entry with data
- SaveAlignmentChunkToBin(refEntry.Bins, currentBin, currentOffset, lastOffset);
- WriteReferenceEntry(refEntry);
-
- // then write any empty references remaining at end of file
- for ( int i = currentRefID+1; i < numReferences; ++i ) {
- BaiReferenceEntry emptyEntry(i);
- WriteReferenceEntry(emptyEntry);
- }
- }
-
- } catch ( BamException& e) {
- m_errorString = e.what();
- return false;
- }
-
- // rewind BamReader
- if ( !m_reader->Rewind() ) {
- const string readerError = m_reader->GetErrorString();
- const string message = "could not create index: \n\t" + readerError;
- SetErrorString("BamStandardIndex::Create", message);
- return false;
- }
-
- // return success
- return true;
-}
-
-// returns format's file extension
-const string BamStandardIndex::Extension(void) {
- return BamStandardIndex::BAI_EXTENSION;
-}
-
-void BamStandardIndex::GetOffset(const BamRegion& region, int64_t& offset, bool* hasAlignmentsInRegion) {
-
- // cannot calculate offsets if unknown/invalid reference ID requested
- if ( region.LeftRefID < 0 || region.LeftRefID >= (int)m_indexFileSummary.size() )
- throw BamException("BamStandardIndex::GetOffset", "invalid reference ID requested");
-
- // retrieve index summary for left bound reference
- const BaiReferenceSummary& refSummary = m_indexFileSummary.at(region.LeftRefID);
-
- // set up region boundaries based on actual BamReader data
- uint32_t begin;
- uint32_t end;
- AdjustRegion(region, begin, end);
-
- // retrieve all candidate bin IDs for region
- set<uint16_t> candidateBins;
- CalculateCandidateBins(begin, end, candidateBins);
-
- // use reference's linear offsets to calculate the minimum offset
- // that must be considered to find overlap
- const uint64_t& minOffset = CalculateMinOffset(refSummary, begin);
-
- // attempt to use reference summary, minOffset, & candidateBins to calculate offsets
- // no data should not be error, just bail
- vector<int64_t> offsets;
- CalculateCandidateOffsets(refSummary, minOffset, candidateBins, offsets);
- if ( offsets.empty() )
- return;
-
- // ensure that offsets are sorted before processing
- sort( offsets.begin(), offsets.end() );
-
- // binary search for an overlapping block (may not be first one though)
- BamAlignment al;
- typedef vector<int64_t>::const_iterator OffsetConstIterator;
- OffsetConstIterator offsetFirst = offsets.begin();
- OffsetConstIterator offsetIter = offsetFirst;
- OffsetConstIterator offsetLast = offsets.end();
- iterator_traits<OffsetConstIterator>::difference_type count = distance(offsetFirst, offsetLast);
- iterator_traits<OffsetConstIterator>::difference_type step;
- while ( count > 0 ) {
- offsetIter = offsetFirst;
- step = count/2;
- advance(offsetIter, step);
-
- // attempt seek to candidate offset
- const int64_t& candidateOffset = (*offsetIter);
- if ( !m_reader->Seek(candidateOffset) ) {
- const string readerError = m_reader->GetErrorString();
- const string message = "could not seek in BAM file: \n\t" + readerError;
- throw BamException("BamToolsIndex::GetOffset", message);
- }
-
- // load first available alignment, setting flag to true if data exists
- *hasAlignmentsInRegion = m_reader->LoadNextAlignment(al);
-
- // check alignment against region
- if ( al.GetEndPosition() <= region.LeftPosition ) {
- offsetFirst = ++offsetIter;
- count -= step+1;
- } else count = step;
- }
-
- // step back to the offset before the 'current offset' (to make sure we cover overlaps)
- if ( offsetIter != offsets.begin() )
- --offsetIter;
- offset = (*offsetIter);
-}
-
-// returns whether reference has alignments or no
-bool BamStandardIndex::HasAlignments(const int& referenceID) const {
- if ( referenceID < 0 || referenceID >= (int)m_indexFileSummary.size() )
- return false;
- const BaiReferenceSummary& refSummary = m_indexFileSummary.at(referenceID);
- return ( refSummary.NumBins > 0 );
-}
-
-bool BamStandardIndex::IsFileOpen(void) const {
- return ( Resources.IndexStream != 0 );
-}
-
-// attempts to use index data to jump to @region, returns success/fail
-// a "successful" jump indicates no error, but not whether this region has data
-// * thus, the method sets a flag to indicate whether there are alignments
-// available after the jump position
-bool BamStandardIndex::Jump(const BamRegion& region, bool* hasAlignmentsInRegion) {
-
- // clear out flag
- *hasAlignmentsInRegion = false;
-
- // skip if invalid reader or not open
- if ( m_reader == 0 || !m_reader->IsOpen() ) {
- SetErrorString("BamStandardIndex::Jump", "could not jump: reader is not open");
- return false;
- }
-
- // calculate nearest offset to jump to
- int64_t offset;
- try {
- GetOffset(region, offset, hasAlignmentsInRegion);
- } catch ( BamException& e ) {
- m_errorString = e.what();
- return false;
- }
-
- // if region has alignments, return success/fail of seeking there
- if ( *hasAlignmentsInRegion )
- return m_reader->Seek(offset);
-
- // otherwise, simply return true (but hasAlignmentsInRegion flag has been set to false)
- // (this is OK, BamReader will check this flag before trying to load data)
- return true;
-}
-
-// loads existing data from file into memory
-bool BamStandardIndex::Load(const std::string& filename) {
-
- try {
-
- // attempt to open file (read-only)
- OpenFile(filename, "rb");
-
- // validate format
- CheckMagicNumber();
-
- // load in-memory summary of index data
- SummarizeIndexFile();
-
- // return success
- return true;
-
- } catch ( BamException& e ) {
- m_errorString = e.what();
- return false;
- }
-}
-
-uint64_t BamStandardIndex::LookupLinearOffset(const BaiReferenceSummary& refSummary, const int& index) {
-
- // attempt seek to proper index file position
- const int64_t linearOffsetFilePosition = (int64_t)refSummary.FirstLinearOffsetFilePosition +
- index*BamStandardIndex::SIZEOF_LINEAROFFSET;
- Seek(linearOffsetFilePosition, SEEK_SET);
-
- // read linear offset from BAI file
- uint64_t linearOffset;
- ReadLinearOffset(linearOffset);
- return linearOffset;
-}
-
-void BamStandardIndex::MergeAlignmentChunks(BaiAlignmentChunkVector& chunks) {
-
- // skip if chunks are empty, nothing to merge
- if ( chunks.empty() )
- return;
-
- // set up merged alignment chunk container
- BaiAlignmentChunkVector mergedChunks;
- mergedChunks.push_back( chunks[0] );
-
- // iterate over chunks
- int i = 0;
- BaiAlignmentChunkVector::iterator chunkIter = chunks.begin();
- BaiAlignmentChunkVector::iterator chunkEnd = chunks.end();
- for ( ++chunkIter; chunkIter != chunkEnd; ++chunkIter) {
-
- // get 'currentMergeChunk' based on numeric index
- BaiAlignmentChunk& currentMergeChunk = mergedChunks[i];
-
- // get sourceChunk based on source vector iterator
- BaiAlignmentChunk& sourceChunk = (*chunkIter);
-
- // if currentMergeChunk ends where sourceChunk starts, then merge the two
- if ( currentMergeChunk.Stop>>16 == sourceChunk.Start>>16 )
- currentMergeChunk.Stop = sourceChunk.Stop;
-
- // otherwise
- else {
- // append sourceChunk after currentMergeChunk
- mergedChunks.push_back(sourceChunk);
-
- // update i, so the next iteration will consider the
- // recently-appended sourceChunk as new mergeChunk candidate
- ++i;
- }
- }
-
- // saved newly-merged chunks into (parameter) chunks
- chunks = mergedChunks;
-}
-
-void BamStandardIndex::OpenFile(const std::string& filename, const char* mode) {
-
- // make sure any previous index file is closed
- CloseFile();
-
- // attempt to open file
- Resources.IndexStream = fopen(filename.c_str(), mode);
- if ( !IsFileOpen() ) {
- const string message = string("could not open file: ") + filename;
- throw BamException("BamStandardIndex::OpenFile", message);
- }
-}
-
-void BamStandardIndex::ReadBinID(uint32_t& binId) {
- const size_t elementsRead = fread(&binId, sizeof(binId), 1, Resources.IndexStream);
- if ( m_isBigEndian ) SwapEndian_32(binId);
- if ( elementsRead != 1 )
- throw BamException("BamStandardIndex::ReadBinID", "could not read BAI bin ID");
-}
-
-void BamStandardIndex::ReadBinIntoBuffer(uint32_t& binId, int32_t& numAlignmentChunks) {
-
- // read bin header
- ReadBinID(binId);
- ReadNumAlignmentChunks(numAlignmentChunks);
-
- // read bin contents
- const unsigned int bytesRequested = numAlignmentChunks*BamStandardIndex::SIZEOF_ALIGNMENTCHUNK;
- ReadIntoBuffer(bytesRequested);
-}
-
-void BamStandardIndex::ReadIntoBuffer(const unsigned int& bytesRequested) {
-
- // ensure that our buffer is big enough for request
- BamStandardIndex::CheckBufferSize(Resources.Buffer, m_bufferLength, bytesRequested);
-
- // read from BAI file stream
- const size_t bytesRead = fread( Resources.Buffer, sizeof(char), bytesRequested, Resources.IndexStream );
- if ( bytesRead != (size_t)bytesRequested ) {
- stringstream s("");
- s << "expected to read: " << bytesRequested << " bytes, "
- << "but instead read: " << bytesRead;
- throw BamException("BamStandardIndex::ReadIntoBuffer", s.str());
- }
-}
-
-void BamStandardIndex::ReadLinearOffset(uint64_t& linearOffset) {
- const size_t elementsRead = fread(&linearOffset, sizeof(linearOffset), 1, Resources.IndexStream);
- if ( m_isBigEndian ) SwapEndian_64(linearOffset);
- if ( elementsRead != 1 )
- throw BamException("BamStandardIndex::ReadLinearOffset", "could not read BAI linear offset");
-}
-
-void BamStandardIndex::ReadNumAlignmentChunks(int& numAlignmentChunks) {
- const size_t elementsRead = fread(&numAlignmentChunks, sizeof(numAlignmentChunks), 1, Resources.IndexStream);
- if ( m_isBigEndian ) SwapEndian_32(numAlignmentChunks);
- if ( elementsRead != 1 )
- throw BamException("BamStandardIndex::ReadNumAlignmentChunks", "could not read BAI chunk count");
-}
-
-void BamStandardIndex::ReadNumBins(int& numBins) {
- const size_t elementsRead = fread(&numBins, sizeof(numBins), 1, Resources.IndexStream);
- if ( m_isBigEndian ) SwapEndian_32(numBins);
- if ( elementsRead != 1 )
- throw BamException("BamStandardIndex::ReadNumBins", "could not read BAI bin count");
-}
-
-void BamStandardIndex::ReadNumLinearOffsets(int& numLinearOffsets) {
- const size_t elementsRead = fread(&numLinearOffsets, sizeof(numLinearOffsets), 1, Resources.IndexStream);
- if ( m_isBigEndian ) SwapEndian_32(numLinearOffsets);
- if ( elementsRead != 1 )
- throw BamException("BamStandardIndex::ReadNumAlignmentChunks", "could not read BAI linear offset count");
-}
-
-void BamStandardIndex::ReadNumReferences(int& numReferences) {
- const size_t elementsRead = fread(&numReferences, sizeof(numReferences), 1, Resources.IndexStream);
- if ( m_isBigEndian ) SwapEndian_32(numReferences);
- if ( elementsRead != 1 )
- throw BamException("BamStandardIndex::ReadNumReferences", "could not read reference count");
-}
-
-void BamStandardIndex::ReserveForSummary(const int& numReferences) {
- m_indexFileSummary.clear();
- m_indexFileSummary.assign( numReferences, BaiReferenceSummary() );
-}
-
-void BamStandardIndex::SaveAlignmentChunkToBin(BaiBinMap& binMap,
- const uint32_t& currentBin,
- const uint64_t& currentOffset,
- const uint64_t& lastOffset)
-{
- // create new alignment chunk
- BaiAlignmentChunk newChunk(currentOffset, lastOffset);
-
- // if no entry exists yet for this bin, create one and store alignment chunk
- BaiBinMap::iterator binIter = binMap.find(currentBin);
- if ( binIter == binMap.end() ) {
- BaiAlignmentChunkVector newChunks;
- newChunks.push_back(newChunk);
- binMap.insert( pair<uint32_t, BaiAlignmentChunkVector>(currentBin, newChunks));
- }
-
- // otherwise, just append alignment chunk
- else {
- BaiAlignmentChunkVector& binChunks = (*binIter).second;
- binChunks.push_back( newChunk );
- }
-}
-
-void BamStandardIndex::SaveBinsSummary(const int& refId, const int& numBins) {
- BaiReferenceSummary& refSummary = m_indexFileSummary.at(refId);
- refSummary.NumBins = numBins;
- refSummary.FirstBinFilePosition = Tell();
-}
-
-void BamStandardIndex::SaveLinearOffsetEntry(BaiLinearOffsetVector& offsets,
- const int& alignmentStartPosition,
- const int& alignmentStopPosition,
- const uint64_t& lastOffset)
-{
- // get converted offsets
- const int beginOffset = alignmentStartPosition >> BamStandardIndex::BAM_LIDX_SHIFT;
- const int endOffset = (alignmentStopPosition - 1) >> BamStandardIndex::BAM_LIDX_SHIFT;
-
- // resize vector if necessary
- int oldSize = offsets.size();
- int newSize = endOffset + 1;
- if ( oldSize < newSize )
- offsets.resize(newSize, 0);
-
- // store offset
- for( int i = beginOffset + 1; i <= endOffset; ++i ) {
- if ( offsets[i] == 0 )
- offsets[i] = lastOffset;
- }
-}
-
-void BamStandardIndex::SaveLinearOffsetsSummary(const int& refId, const int& numLinearOffsets) {
- BaiReferenceSummary& refSummary = m_indexFileSummary.at(refId);
- refSummary.NumLinearOffsets = numLinearOffsets;
- refSummary.FirstLinearOffsetFilePosition = Tell();
-}
-
-// seek to position in index file stream
-void BamStandardIndex::Seek(const int64_t& position, const int& origin) {
- if ( fseek64(Resources.IndexStream, position, origin) != 0 )
- throw BamException("BamStandardIndex::Seek", "could not seek in BAI file");
-}
-
-void BamStandardIndex::SkipBins(const int& numBins) {
- uint32_t binId;
- int32_t numAlignmentChunks;
- for (int i = 0; i < numBins; ++i)
- ReadBinIntoBuffer(binId, numAlignmentChunks); // results & buffer ignored
-}
-
-void BamStandardIndex::SkipLinearOffsets(const int& numLinearOffsets) {
- const unsigned int bytesRequested = numLinearOffsets*BamStandardIndex::SIZEOF_LINEAROFFSET;
- ReadIntoBuffer(bytesRequested);
-}
-
-void BamStandardIndex::SortLinearOffsets(BaiLinearOffsetVector& linearOffsets) {
- sort( linearOffsets.begin(), linearOffsets.end() );
-}
-
-void BamStandardIndex::SummarizeBins(BaiReferenceSummary& refSummary) {
-
- // load number of bins
- int numBins;
- ReadNumBins(numBins);
-
- // store bins summary for this reference
- refSummary.NumBins = numBins;
- refSummary.FirstBinFilePosition = Tell();
-
- // skip this reference's bins
- SkipBins(numBins);
-}
-
-void BamStandardIndex::SummarizeIndexFile(void) {
-
- // load number of reference sequences
- int numReferences;
- ReadNumReferences(numReferences);
-
- // initialize file summary data
- ReserveForSummary(numReferences);
-
- // iterate over reference entries
- BaiFileSummary::iterator summaryIter = m_indexFileSummary.begin();
- BaiFileSummary::iterator summaryEnd = m_indexFileSummary.end();
- for ( int i = 0; summaryIter != summaryEnd; ++summaryIter, ++i )
- SummarizeReference(*summaryIter);
-}
-
-void BamStandardIndex::SummarizeLinearOffsets(BaiReferenceSummary& refSummary) {
-
- // load number of linear offsets
- int numLinearOffsets;
- ReadNumLinearOffsets(numLinearOffsets);
-
- // store bin summary data for this reference
- refSummary.NumLinearOffsets = numLinearOffsets;
- refSummary.FirstLinearOffsetFilePosition = Tell();
-
- // skip linear offsets in index file
- SkipLinearOffsets(numLinearOffsets);
-}
-
-void BamStandardIndex::SummarizeReference(BaiReferenceSummary& refSummary) {
- SummarizeBins(refSummary);
- SummarizeLinearOffsets(refSummary);
-}
-
-// return position of file pointer in index file stream
-int64_t BamStandardIndex::Tell(void) const {
- return ftell64(Resources.IndexStream);
-}
-
-void BamStandardIndex::WriteAlignmentChunk(const BaiAlignmentChunk& chunk) {
-
- // localize alignment chunk offsets
- uint64_t start = chunk.Start;
- uint64_t stop = chunk.Stop;
-
- // swap endian-ness if necessary
- if ( m_isBigEndian ) {
- SwapEndian_64(start);
- SwapEndian_64(stop);
- }
-
- // write to index file
- size_t elementsWritten = 0;
- elementsWritten += fwrite(&start, sizeof(start), 1, Resources.IndexStream);
- elementsWritten += fwrite(&stop, sizeof(stop), 1, Resources.IndexStream);
- if ( elementsWritten != 2 )
- throw BamException("BamStandardIndex::WriteAlignmentChunk", "could not write BAI alignment chunk");
-}
-
-void BamStandardIndex::WriteAlignmentChunks(BaiAlignmentChunkVector& chunks) {
-
- // make sure chunks are merged (simplified) before writing & saving summary
- MergeAlignmentChunks(chunks);
-
- // write chunks
- int32_t chunkCount = chunks.size();
- if ( m_isBigEndian ) SwapEndian_32(chunkCount);
- const size_t elementsWritten = fwrite(&chunkCount, sizeof(chunkCount), 1, Resources.IndexStream);
- if ( elementsWritten != 1 )
- throw BamException("BamStandardIndex::WriteAlignmentChunks", "could not write BAI chunk count");
-
- // iterate over chunks
- BaiAlignmentChunkVector::const_iterator chunkIter = chunks.begin();
- BaiAlignmentChunkVector::const_iterator chunkEnd = chunks.end();
- for ( ; chunkIter != chunkEnd; ++chunkIter )
- WriteAlignmentChunk( (*chunkIter) );
-}
-
-void BamStandardIndex::WriteBin(const uint32_t& binId, BaiAlignmentChunkVector& chunks) {
-
- // write BAM bin ID
- uint32_t binKey = binId;
- if ( m_isBigEndian ) SwapEndian_32(binKey);
- const size_t elementsWritten = fwrite(&binKey, sizeof(binKey), 1, Resources.IndexStream);
- if ( elementsWritten != 1 )
- throw BamException("BamStandardIndex::WriteBin", "could not write bin ID");
-
- // write bin's alignment chunks
- WriteAlignmentChunks(chunks);
-}
-
-void BamStandardIndex::WriteBins(const int& refId, BaiBinMap& bins) {
-
- // write number of bins
- int32_t binCount = bins.size();
- if ( m_isBigEndian ) SwapEndian_32(binCount);
- const size_t elementsWritten = fwrite(&binCount, sizeof(binCount), 1, Resources.IndexStream);
- if ( elementsWritten != 1 )
- throw BamException("BamStandardIndex::WriteBins", "could not write bin count");
-
- // save summary for reference's bins
- SaveBinsSummary(refId, bins.size());
-
- // iterate over bins
- BaiBinMap::iterator binIter = bins.begin();
- BaiBinMap::iterator binEnd = bins.end();
- for ( ; binIter != binEnd; ++binIter )
- WriteBin( (*binIter).first, (*binIter).second );
-}
-
-void BamStandardIndex::WriteHeader(void) {
-
- size_t elementsWritten = 0;
-
- // write magic number
- elementsWritten += fwrite(BamStandardIndex::BAI_MAGIC, sizeof(char), 4, Resources.IndexStream);
-
- // write number of reference sequences
- int32_t numReferences = m_indexFileSummary.size();
- if ( m_isBigEndian ) SwapEndian_32(numReferences);
- elementsWritten += fwrite(&numReferences, sizeof(numReferences), 1, Resources.IndexStream);
-
- if ( elementsWritten != 5 )
- throw BamException("BamStandardIndex::WriteHeader", "could not write BAI header");
-}
-
-void BamStandardIndex::WriteLinearOffsets(const int& refId, BaiLinearOffsetVector& linearOffsets) {
-
- // make sure linear offsets are sorted before writing & saving summary
- SortLinearOffsets(linearOffsets);
-
- size_t elementsWritten = 0;
-
- // write number of linear offsets
- int32_t offsetCount = linearOffsets.size();
- if ( m_isBigEndian ) SwapEndian_32(offsetCount);
- elementsWritten += fwrite(&offsetCount, sizeof(offsetCount), 1, Resources.IndexStream);
-
- // save summary for reference's linear offsets
- SaveLinearOffsetsSummary(refId, linearOffsets.size());
-
- // iterate over linear offsets
- BaiLinearOffsetVector::const_iterator offsetIter = linearOffsets.begin();
- BaiLinearOffsetVector::const_iterator offsetEnd = linearOffsets.end();
- for ( ; offsetIter != offsetEnd; ++offsetIter ) {
-
- // write linear offset
- uint64_t linearOffset = (*offsetIter);
- if ( m_isBigEndian ) SwapEndian_64(linearOffset);
- elementsWritten += fwrite(&linearOffset, sizeof(linearOffset), 1, Resources.IndexStream);
- }
-
- if ( elementsWritten != (linearOffsets.size() + 1) )
- throw BamException("BamStandardIndex::WriteLinearOffsets", "could not write BAI linear offsets");
-}
-
-void BamStandardIndex::WriteReferenceEntry(BaiReferenceEntry& refEntry) {
- WriteBins(refEntry.ID, refEntry.Bins);
- WriteLinearOffsets(refEntry.ID, refEntry.LinearOffsets);
-}
+++ /dev/null
-// ***************************************************************************
-// BamStandardIndex.h (c) 2010 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides index operations for the standardized BAM index format (".bai")
-// ***************************************************************************
-
-#ifndef BAM_STANDARD_INDEX_FORMAT_H
-#define BAM_STANDARD_INDEX_FORMAT_H
-
-// -------------
-// W A R N I N G
-// -------------
-//
-// This file is not part of the BamTools API. It exists purely as an
-// implementation detail. This header file may change from version to
-// version without notice, or even be removed.
-//
-// We mean it.
-
-#include "api/BamAux.h"
-#include "api/BamIndex.h"
-#include <map>
-#include <set>
-#include <string>
-#include <vector>
-
-namespace BamTools {
-namespace Internal {
-
-// -----------------------------------------------------------------------------
-// BamStandardIndex data structures
-
-// defines start and end of a contiguous run of alignments
-struct BaiAlignmentChunk {
-
- // data members
- uint64_t Start;
- uint64_t Stop;
-
- // constructor
- BaiAlignmentChunk(const uint64_t& start = 0,
- const uint64_t& stop = 0)
- : Start(start)
- , Stop(stop)
- { }
-};
-
-// comparison operator (for sorting)
-inline
-bool operator<(const BaiAlignmentChunk& lhs, const BaiAlignmentChunk& rhs) {
- return lhs.Start < rhs.Start;
-}
-
-// convenience typedef for a list of all alignment 'chunks' in a BAI bin
-typedef std::vector<BaiAlignmentChunk> BaiAlignmentChunkVector;
-
-// convenience typedef for a map of all BAI bins in a reference (ID => chunks)
-typedef std::map<uint32_t, BaiAlignmentChunkVector> BaiBinMap;
-
-// convenience typedef for a list of all 'linear offsets' in a reference
-typedef std::vector<uint64_t> BaiLinearOffsetVector;
-
-// contains all fields necessary for building, loading, & writing
-// full BAI index data for a single reference
-struct BaiReferenceEntry {
-
- // data members
- int32_t ID;
- BaiBinMap Bins;
- BaiLinearOffsetVector LinearOffsets;
-
- // ctor
- BaiReferenceEntry(const int32_t& id = -1)
- : ID(id)
- { }
-};
-
-// provides (persistent) summary of BaiReferenceEntry's index data
-struct BaiReferenceSummary {
-
- // data members
- int NumBins;
- int NumLinearOffsets;
- uint64_t FirstBinFilePosition;
- uint64_t FirstLinearOffsetFilePosition;
-
- // ctor
- BaiReferenceSummary(void)
- : NumBins(0)
- , NumLinearOffsets(0)
- , FirstBinFilePosition(0)
- , FirstLinearOffsetFilePosition(0)
- { }
-};
-
-// convenience typedef for describing a full BAI index file summary
-typedef std::vector<BaiReferenceSummary> BaiFileSummary;
-
-// end BamStandardIndex data structures
-// -----------------------------------------------------------------------------
-
-class BamStandardIndex : public BamIndex {
-
- // ctor & dtor
- public:
- BamStandardIndex(Internal::BamReaderPrivate* reader);
- ~BamStandardIndex(void);
-
- // BamIndex implementation
- public:
- // builds index from associated BAM file & writes out to index file
- bool Create(void);
- // returns whether reference has alignments or no
- bool HasAlignments(const int& referenceID) const;
- // attempts to use index data to jump to @region, returns success/fail
- // a "successful" jump indicates no error, but not whether this region has data
- // * thus, the method sets a flag to indicate whether there are alignments
- // available after the jump position
- bool Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion);
- // loads existing data from file into memory
- bool Load(const std::string& filename);
- BamIndex::IndexType Type(void) const { return BamIndex::STANDARD; }
- public:
- // returns format's file extension
- static const std::string Extension(void);
-
- // internal methods
- private:
-
- // index file ops
- void CheckMagicNumber(void);
- void CloseFile(void);
- bool IsFileOpen(void) const;
- void OpenFile(const std::string& filename, const char* mode);
- void Seek(const int64_t& position, const int& origin);
- int64_t Tell(void) const;
-
- // BAI index building methods
- void ClearReferenceEntry(BaiReferenceEntry& refEntry);
- void SaveAlignmentChunkToBin(BaiBinMap& binMap,
- const uint32_t& currentBin,
- const uint64_t& currentOffset,
- const uint64_t& lastOffset);
- void SaveLinearOffsetEntry(BaiLinearOffsetVector& offsets,
- const int& alignmentStartPosition,
- const int& alignmentStopPosition,
- const uint64_t& lastOffset);
-
- // random-access methods
- void AdjustRegion(const BamRegion& region, uint32_t& begin, uint32_t& end);
- void CalculateCandidateBins(const uint32_t& begin,
- const uint32_t& end,
- std::set<uint16_t>& candidateBins);
- void CalculateCandidateOffsets(const BaiReferenceSummary& refSummary,
- const uint64_t& minOffset,
- std::set<uint16_t>& candidateBins,
- std::vector<int64_t>& offsets);
- uint64_t CalculateMinOffset(const BaiReferenceSummary& refSummary, const uint32_t& begin);
- void GetOffset(const BamRegion& region, int64_t& offset, bool* hasAlignmentsInRegion);
- uint64_t LookupLinearOffset(const BaiReferenceSummary& refSummary, const int& index);
-
- // BAI summary (create/load) methods
- void ReserveForSummary(const int& numReferences);
- void SaveBinsSummary(const int& refId, const int& numBins);
- void SaveLinearOffsetsSummary(const int& refId, const int& numLinearOffsets);
- void SkipBins(const int& numBins);
- void SkipLinearOffsets(const int& numLinearOffsets);
- void SummarizeBins(BaiReferenceSummary& refSummary);
- void SummarizeIndexFile(void);
- void SummarizeLinearOffsets(BaiReferenceSummary& refSummary);
- void SummarizeReference(BaiReferenceSummary& refSummary);
-
- // BAI full index input methods
- void ReadBinID(uint32_t& binId);
- void ReadBinIntoBuffer(uint32_t& binId, int32_t& numAlignmentChunks);
- void ReadIntoBuffer(const unsigned int& bytesRequested);
- void ReadLinearOffset(uint64_t& linearOffset);
- void ReadNumAlignmentChunks(int& numAlignmentChunks);
- void ReadNumBins(int& numBins);
- void ReadNumLinearOffsets(int& numLinearOffsets);
- void ReadNumReferences(int& numReferences);
-
- // BAI full index output methods
- void MergeAlignmentChunks(BaiAlignmentChunkVector& chunks);
- void SortLinearOffsets(BaiLinearOffsetVector& linearOffsets);
- void WriteAlignmentChunk(const BaiAlignmentChunk& chunk);
- void WriteAlignmentChunks(BaiAlignmentChunkVector& chunks);
- void WriteBin(const uint32_t& binId, BaiAlignmentChunkVector& chunks);
- void WriteBins(const int& refId, BaiBinMap& bins);
- void WriteHeader(void);
- void WriteLinearOffsets(const int& refId, BaiLinearOffsetVector& linearOffsets);
- void WriteReferenceEntry(BaiReferenceEntry& refEntry);
-
- // data members
- private:
- bool m_isBigEndian;
- BaiFileSummary m_indexFileSummary;
-
- // our input buffer
- unsigned int m_bufferLength;
-
- struct RaiiWrapper {
- FILE* IndexStream;
- char* Buffer;
- RaiiWrapper(void);
- ~RaiiWrapper(void);
- };
- RaiiWrapper Resources;
-
- // static methods
- private:
- // checks if the buffer is large enough to accomodate the requested size
- static void CheckBufferSize(char*& buffer,
- unsigned int& bufferLength,
- const unsigned int& requestedBytes);
- // checks if the buffer is large enough to accomodate the requested size
- static void CheckBufferSize(unsigned char*& buffer,
- unsigned int& bufferLength,
- const unsigned int& requestedBytes);
- // static constants
- private:
- static const int MAX_BIN;
- static const int BAM_LIDX_SHIFT;
- static const std::string BAI_EXTENSION;
- static const char* const BAI_MAGIC;
- static const int SIZEOF_ALIGNMENTCHUNK;
- static const int SIZEOF_BINCORE;
- static const int SIZEOF_LINEAROFFSET;
-};
-
-} // namespace Internal
-} // namespace BamTools
-
-#endif // BAM_STANDARD_INDEX_FORMAT_H
+++ /dev/null
-// ***************************************************************************
-// BamToolsIndex.cpp (c) 2010 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides index operations for the BamTools index format (".bti")
-// ***************************************************************************
-
-#include "api/BamAlignment.h"
-#include "api/internal/BamException_p.h"
-#include "api/internal/BamReader_p.h"
-#include "api/internal/BamToolsIndex_p.h"
-#include "api/internal/BgzfStream_p.h"
-using namespace BamTools;
-using namespace BamTools::Internal;
-
-#include <cstdio>
-#include <cstdlib>
-#include <cstring>
-#include <algorithm>
-#include <iostream>
-#include <iterator>
-#include <map>
-using namespace std;
-
-// --------------------------------
-// static BamToolsIndex constants
-// --------------------------------
-
-const uint32_t BamToolsIndex::DEFAULT_BLOCK_LENGTH = 1000;
-const string BamToolsIndex::BTI_EXTENSION = ".bti";
-const char* const BamToolsIndex::BTI_MAGIC = "BTI\1";
-const int BamToolsIndex::SIZEOF_BLOCK = sizeof(int32_t)*2 + sizeof(int64_t);
-
-// ----------------------------
-// RaiiWrapper implementation
-// ----------------------------
-
-BamToolsIndex::RaiiWrapper::RaiiWrapper(void)
- : IndexStream(0)
-{ }
-
-BamToolsIndex::RaiiWrapper::~RaiiWrapper(void) {
- if ( IndexStream )
- fclose(IndexStream);
-}
-
-// ------------------------------
-// BamToolsIndex implementation
-// ------------------------------
-
-// ctor
-BamToolsIndex::BamToolsIndex(Internal::BamReaderPrivate* reader)
- : BamIndex(reader)
- , m_blockSize(BamToolsIndex::DEFAULT_BLOCK_LENGTH)
- , m_inputVersion(0)
- , m_outputVersion(BTI_2_0) // latest version - used for writing new index files
-{
- m_isBigEndian = BamTools::SystemIsBigEndian();
-}
-
-// dtor
-BamToolsIndex::~BamToolsIndex(void) {
- CloseFile();
-}
-
-void BamToolsIndex::CheckMagicNumber(void) {
-
- // read magic number
- char magic[4];
- size_t elementsRead = fread(magic, sizeof(char), 4, Resources.IndexStream);
- if ( elementsRead != 4 )
- throw BamException("BamToolsIndex::CheckMagicNumber", "could not read BTI magic number");
-
- // validate expected magic number
- if ( strncmp(magic, BamToolsIndex::BTI_MAGIC, 4) != 0 )
- throw BamException("BamToolsIndex::CheckMagicNumber", "invalid BTI magic number");
-}
-
-// check index file version, return true if OK
-void BamToolsIndex::CheckVersion(void) {
-
- // read version from file
- size_t elementsRead = fread(&m_inputVersion, sizeof(m_inputVersion), 1, Resources.IndexStream);
- if ( elementsRead != 1 )
- throw BamException("BamToolsIndex::CheckVersion", "could not read format version");
- if ( m_isBigEndian ) SwapEndian_32(m_inputVersion);
-
- // if version is negative, or zero
- if ( m_inputVersion <= 0 )
- throw BamException("BamToolsIndex::CheckVersion", "invalid format version");
-
- // if version is newer than can be supported by this version of bamtools
- else if ( m_inputVersion > m_outputVersion ) {
- const string message = "unsupported format: this index was created by a newer version of BamTools. "
- "Update your local version of BamTools to use the index file.";
- throw BamException("BamToolsIndex::CheckVersion", message);
- }
-
- // ------------------------------------------------------------------
- // check for deprecated, unsupported versions
- // (the format had to be modified to accomodate a particular bug fix)
-
- // Version 2.0: introduced support for half-open intervals, instead of the old closed intervals
- // respondBy: throwing exception - we're not going to try to handle the old BTI files.
- else if ( (Version)m_inputVersion < BamToolsIndex::BTI_2_0 ) {
- const string message = "unsupported format: this version of the index may not properly handle "
- "coordinate intervals. Please run 'bamtools index -bti -in yourData.bam' "
- "to generate an up-to-date, fixed BTI file.";
- throw BamException("BamToolsIndex::CheckVersion", message);
- }
-}
-
-void BamToolsIndex::ClearReferenceEntry(BtiReferenceEntry& refEntry) {
- refEntry.ID = -1;
- refEntry.Blocks.clear();
-}
-
-void BamToolsIndex::CloseFile(void) {
- if ( IsFileOpen() ) {
- fclose(Resources.IndexStream);
- Resources.IndexStream = 0;
- }
- m_indexFileSummary.clear();
-}
-
-// builds index from associated BAM file & writes out to index file
-bool BamToolsIndex::Create(void) {
-
- // skip if BamReader is invalid or not open
- if ( m_reader == 0 || !m_reader->IsOpen() ) {
- SetErrorString("BamToolsIndex::Create", "could not create index: reader is not open");
- return false;
- }
-
- // rewind BamReader
- if ( !m_reader->Rewind() ) {
- const string readerError = m_reader->GetErrorString();
- const string message = "could not create index: \n\t" + readerError;
- SetErrorString("BamToolsIndex::Create", message);
- return false;
- }
-
- try {
- // open new index file (read & write)
- const string indexFilename = m_reader->Filename() + Extension();
- OpenFile(indexFilename, "w+b");
-
- // initialize BtiFileSummary with number of references
- const int& numReferences = m_reader->GetReferenceCount();
- InitializeFileSummary(numReferences);
-
- // intialize output file header
- WriteHeader();
-
- // index building markers
- uint32_t currentBlockCount = 0;
- int64_t currentAlignmentOffset = m_reader->Tell();
- int32_t blockRefId = -1;
- int32_t blockMaxEndPosition = -1;
- int64_t blockStartOffset = currentAlignmentOffset;
- int32_t blockStartPosition = -1;
-
- // plow through alignments, storing index entries
- BamAlignment al;
- BtiReferenceEntry refEntry;
- while ( m_reader->LoadNextAlignment(al) ) {
-
- // if moved to new reference
- if ( al.RefID != blockRefId ) {
-
- // if first pass, check:
- if ( currentBlockCount == 0 ) {
-
- // write any empty references up to (but not including) al.RefID
- for ( int i = 0; i < al.RefID; ++i )
- WriteReferenceEntry( BtiReferenceEntry(i) );
- }
-
- // not first pass:
- else {
-
- // store previous BTI block data in reference entry
- const BtiBlock block(blockMaxEndPosition, blockStartOffset, blockStartPosition);
- refEntry.Blocks.push_back(block);
-
- // write reference entry, then clear
- WriteReferenceEntry(refEntry);
- ClearReferenceEntry(refEntry);
-
- // write any empty references between (but not including)
- // the last blockRefID and current al.RefID
- for ( int i = blockRefId+1; i < al.RefID; ++i )
- WriteReferenceEntry( BtiReferenceEntry(i) );
-
- // reset block count
- currentBlockCount = 0;
- }
-
- // set ID for new reference entry
- refEntry.ID = al.RefID;
- }
-
- // if beginning of block, update counters
- if ( currentBlockCount == 0 ) {
- blockRefId = al.RefID;
- blockStartOffset = currentAlignmentOffset;
- blockStartPosition = al.Position;
- blockMaxEndPosition = al.GetEndPosition();
- }
-
- // increment block counter
- ++currentBlockCount;
-
- // check end position
- const int32_t alignmentEndPosition = al.GetEndPosition();
- if ( alignmentEndPosition > blockMaxEndPosition )
- blockMaxEndPosition = alignmentEndPosition;
-
- // if block is full, get offset for next block, reset currentBlockCount
- if ( currentBlockCount == m_blockSize ) {
-
- // store previous block data in reference entry
- const BtiBlock block(blockMaxEndPosition, blockStartOffset, blockStartPosition);
- refEntry.Blocks.push_back(block);
-
- // update markers
- blockStartOffset = m_reader->Tell();
- currentBlockCount = 0;
- }
-
- // not the best name, but for the next iteration, this value will be the offset of the
- // *current* alignment. this is necessary because we won't know if this next alignment
- // is on a new reference until we actually read it
- currentAlignmentOffset = m_reader->Tell();
- }
-
- // after finishing alignments, if any data was read, check:
- if ( blockRefId >= 0 ) {
-
- // store last BTI block data in reference entry
- const BtiBlock block(blockMaxEndPosition, blockStartOffset, blockStartPosition);
- refEntry.Blocks.push_back(block);
-
- // write last reference entry, then clear
- WriteReferenceEntry(refEntry);
- ClearReferenceEntry(refEntry);
-
- // then write any empty references remaining at end of file
- for ( int i = blockRefId+1; i < numReferences; ++i )
- WriteReferenceEntry( BtiReferenceEntry(i) );
- }
-
- } catch ( BamException& e ) {
- m_errorString = e.what();
- return false;
- }
-
- // rewind BamReader
- if ( !m_reader->Rewind() ) {
- const string readerError = m_reader->GetErrorString();
- const string message = "could not create index: \n\t" + readerError;
- SetErrorString("BamToolsIndex::Create", message);
- return false;
- }
-
- // return success
- return true;
-}
-
-// returns format's file extension
-const std::string BamToolsIndex::Extension(void) {
- return BamToolsIndex::BTI_EXTENSION;
-}
-
-void BamToolsIndex::GetOffset(const BamRegion& region, int64_t& offset, bool* hasAlignmentsInRegion) {
-
- // return false ref ID is not a valid index in file summary data
- if ( region.LeftRefID < 0 || region.LeftRefID >= (int)m_indexFileSummary.size() )
- throw BamException("BamToolsIndex::GetOffset", "invalid region requested");
-
- // retrieve reference index data for left bound reference
- BtiReferenceEntry refEntry(region.LeftRefID);
- ReadReferenceEntry(refEntry);
-
- // binary search for an overlapping block (may not be first one though)
- bool found = false;
- typedef BtiBlockVector::const_iterator BtiBlockConstIterator;
- BtiBlockConstIterator blockFirst = refEntry.Blocks.begin();
- BtiBlockConstIterator blockIter = blockFirst;
- BtiBlockConstIterator blockLast = refEntry.Blocks.end();
- iterator_traits<BtiBlockConstIterator>::difference_type count = distance(blockFirst, blockLast);
- iterator_traits<BtiBlockConstIterator>::difference_type step;
- while ( count > 0 ) {
- blockIter = blockFirst;
- step = count/2;
- advance(blockIter, step);
-
- const BtiBlock& block = (*blockIter);
- if ( block.StartPosition <= region.RightPosition ) {
- if ( block.MaxEndPosition > region.LeftPosition ) {
- offset = block.StartOffset;
- break;
- }
- blockFirst = ++blockIter;
- count -= step+1;
- }
- else count = step;
- }
-
- // if we didn't search "off the end" of the blocks
- if ( blockIter != blockLast ) {
-
- // "walk back" until we've gone too far
- while ( blockIter != blockFirst ) {
- const BtiBlock& currentBlock = (*blockIter);
-
- --blockIter;
- const BtiBlock& previousBlock = (*blockIter);
- if ( previousBlock.MaxEndPosition <= region.LeftPosition ) {
- offset = currentBlock.StartOffset;
- found = true;
- break;
- }
- }
-
- // if we walked all the way to first block, just return that and let the reader's
- // region overlap parsing do the rest
- if ( blockIter == blockFirst ) {
- const BtiBlock& block = (*blockIter);
- offset = block.StartOffset;
- found = true;
- }
- }
-
-
- // sets to false if blocks container is empty, or if no matching block could be found
- *hasAlignmentsInRegion = found;
-}
-
-// returns whether reference has alignments or no
-bool BamToolsIndex::HasAlignments(const int& referenceID) const {
- if ( referenceID < 0 || referenceID >= (int)m_indexFileSummary.size() )
- return false;
- const BtiReferenceSummary& refSummary = m_indexFileSummary.at(referenceID);
- return ( refSummary.NumBlocks > 0 );
-}
-
-// pre-allocates space for each reference's summary data
-void BamToolsIndex::InitializeFileSummary(const int& numReferences) {
- m_indexFileSummary.clear();
- for ( int i = 0; i < numReferences; ++i )
- m_indexFileSummary.push_back( BtiReferenceSummary() );
-}
-
-// returns true if the index stream is open
-bool BamToolsIndex::IsFileOpen(void) const {
- return ( Resources.IndexStream != 0 );
-}
-
-// attempts to use index data to jump to @region, returns success/fail
-// a "successful" jump indicates no error, but not whether this region has data
-// * thus, the method sets a flag to indicate whether there are alignments
-// available after the jump position
-bool BamToolsIndex::Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion) {
-
- // clear flag
- *hasAlignmentsInRegion = false;
-
- // skip if invalid reader or not open
- if ( m_reader == 0 || !m_reader->IsOpen() ) {
- SetErrorString("BamToolsIndex::Jump", "could not jump: reader is not open");
- return false;
- }
-
- // make sure left-bound position is valid
- const RefVector& references = m_reader->GetReferenceData();
- if ( region.LeftPosition > references.at(region.LeftRefID).RefLength ) {
- SetErrorString("BamToolsIndex::Jump", "could not create index: invalid region requested");
- return false;
- }
-
- // calculate nearest offset to jump to
- int64_t offset;
- try {
- GetOffset(region, offset, hasAlignmentsInRegion);
- } catch ( BamException& e ) {
- m_errorString = e.what();
- return false;
- }
-
- // return success/failure of seek
- return m_reader->Seek(offset);
-}
-
-// loads existing data from file into memory
-bool BamToolsIndex::Load(const std::string& filename) {
-
- try {
-
- // attempt to open file (read-only)
- OpenFile(filename, "rb");
-
- // load metadata & generate in-memory summary
- LoadHeader();
- LoadFileSummary();
-
- // return success
- return true;
-
- } catch ( BamException& e ) {
- m_errorString = e.what();
- return false;
- }
-}
-
-void BamToolsIndex::LoadFileSummary(void) {
-
- // load number of reference sequences
- int numReferences;
- LoadNumReferences(numReferences);
-
- // initialize file summary data
- InitializeFileSummary(numReferences);
-
- // load summary for each reference
- BtiFileSummary::iterator summaryIter = m_indexFileSummary.begin();
- BtiFileSummary::iterator summaryEnd = m_indexFileSummary.end();
- for ( ; summaryIter != summaryEnd; ++summaryIter )
- LoadReferenceSummary(*summaryIter);
-}
-
-void BamToolsIndex::LoadHeader(void) {
-
- // check BTI file metadata
- CheckMagicNumber();
- CheckVersion();
-
- // use file's BTI block size to set member variable
- const size_t elementsRead = fread(&m_blockSize, sizeof(m_blockSize), 1, Resources.IndexStream);
- if ( m_isBigEndian ) SwapEndian_32(m_blockSize);
- if ( elementsRead != 1 )
- throw BamException("BamToolsIndex::LoadHeader", "could not read BTI block size");
-}
-
-void BamToolsIndex::LoadNumBlocks(int& numBlocks) {
- const size_t elementsRead = fread(&numBlocks, sizeof(numBlocks), 1, Resources.IndexStream);
- if ( m_isBigEndian ) SwapEndian_32(numBlocks);
- if ( elementsRead != 1 )
- throw BamException("BamToolsIndex::LoadNumBlocks", "could not read number of BTI blocks");
-}
-
-void BamToolsIndex::LoadNumReferences(int& numReferences) {
- const size_t elementsRead = fread(&numReferences, sizeof(numReferences), 1, Resources.IndexStream);
- if ( m_isBigEndian ) SwapEndian_32(numReferences);
- if ( elementsRead != 1 )
- throw BamException("BamToolsIndex::LoadNumReferences", "could not read number of references");
-}
-
-void BamToolsIndex::LoadReferenceSummary(BtiReferenceSummary& refSummary) {
-
- // load number of blocks
- int numBlocks;
- LoadNumBlocks(numBlocks);
-
- // store block summary data for this reference
- refSummary.NumBlocks = numBlocks;
- refSummary.FirstBlockFilePosition = Tell();
-
- // skip reference's blocks
- SkipBlocks(numBlocks);
-}
-
-void BamToolsIndex::OpenFile(const std::string& filename, const char* mode) {
-
- // make sure any previous index file is closed
- CloseFile();
-
- // attempt to open file
- Resources.IndexStream = fopen(filename.c_str(), mode);
- if ( !IsFileOpen() ) {
- const string message = string("could not open file: ") + filename;
- throw BamException("BamToolsIndex::OpenFile", message);
- }
-}
-
-void BamToolsIndex::ReadBlock(BtiBlock& block) {
-
- // read in block data members
- size_t elementsRead = 0;
- elementsRead += fread(&block.MaxEndPosition, sizeof(block.MaxEndPosition), 1, Resources.IndexStream);
- elementsRead += fread(&block.StartOffset, sizeof(block.StartOffset), 1, Resources.IndexStream);
- elementsRead += fread(&block.StartPosition, sizeof(block.StartPosition), 1, Resources.IndexStream);
-
- // swap endian-ness if necessary
- if ( m_isBigEndian ) {
- SwapEndian_32(block.MaxEndPosition);
- SwapEndian_64(block.StartOffset);
- SwapEndian_32(block.StartPosition);
- }
-
- if ( elementsRead != 3 )
- throw BamException("BamToolsIndex::ReadBlock", "could not read block");
-}
-
-void BamToolsIndex::ReadBlocks(const BtiReferenceSummary& refSummary, BtiBlockVector& blocks) {
-
- // prep blocks container
- blocks.clear();
- blocks.reserve(refSummary.NumBlocks);
-
- // skip to first block entry
- Seek( refSummary.FirstBlockFilePosition, SEEK_SET );
-
- // read & store block entries
- BtiBlock block;
- for ( int i = 0; i < refSummary.NumBlocks; ++i ) {
- ReadBlock(block);
- blocks.push_back(block);
- }
-}
-
-void BamToolsIndex::ReadReferenceEntry(BtiReferenceEntry& refEntry) {
-
- // return false if refId not valid index in file summary structure
- if ( refEntry.ID < 0 || refEntry.ID >= (int)m_indexFileSummary.size() )
- throw BamException("BamToolsIndex::ReadReferenceEntry", "invalid reference requested");
-
- // use index summary to assist reading the reference's BTI blocks
- const BtiReferenceSummary& refSummary = m_indexFileSummary.at(refEntry.ID);
- ReadBlocks(refSummary, refEntry.Blocks);
-}
-
-void BamToolsIndex::Seek(const int64_t& position, const int& origin) {
- if ( fseek64(Resources.IndexStream, position, origin) != 0 )
- throw BamException("BamToolsIndex::Seek", "could not seek in BAI file");
-}
-
-void BamToolsIndex::SkipBlocks(const int& numBlocks) {
- Seek( numBlocks*BamToolsIndex::SIZEOF_BLOCK, SEEK_CUR );
-}
-
-int64_t BamToolsIndex::Tell(void) const {
- return ftell64(Resources.IndexStream);
-}
-
-void BamToolsIndex::WriteBlock(const BtiBlock& block) {
-
- // copy entry data
- int32_t maxEndPosition = block.MaxEndPosition;
- int64_t startOffset = block.StartOffset;
- int32_t startPosition = block.StartPosition;
-
- // swap endian-ness if necessary
- if ( m_isBigEndian ) {
- SwapEndian_32(maxEndPosition);
- SwapEndian_64(startOffset);
- SwapEndian_32(startPosition);
- }
-
- // write the reference index entry
- size_t elementsWritten = 0;
- elementsWritten += fwrite(&maxEndPosition, sizeof(maxEndPosition), 1, Resources.IndexStream);
- elementsWritten += fwrite(&startOffset, sizeof(startOffset), 1, Resources.IndexStream);
- elementsWritten += fwrite(&startPosition, sizeof(startPosition), 1, Resources.IndexStream);
- if ( elementsWritten != 3 )
- throw BamException("BamToolsIndex::WriteBlock", "could not write BTI block");
-}
-
-void BamToolsIndex::WriteBlocks(const BtiBlockVector& blocks) {
- BtiBlockVector::const_iterator blockIter = blocks.begin();
- BtiBlockVector::const_iterator blockEnd = blocks.end();
- for ( ; blockIter != blockEnd; ++blockIter )
- WriteBlock(*blockIter);
-}
-
-void BamToolsIndex::WriteHeader(void) {
-
- size_t elementsWritten = 0;
-
- // write BTI index format 'magic number'
- elementsWritten += fwrite(BamToolsIndex::BTI_MAGIC, 1, 4, Resources.IndexStream);
-
- // write BTI index format version
- int32_t currentVersion = (int32_t)m_outputVersion;
- if ( m_isBigEndian ) SwapEndian_32(currentVersion);
- elementsWritten += fwrite(¤tVersion, sizeof(currentVersion), 1, Resources.IndexStream);
-
- // write block size
- uint32_t blockSize = m_blockSize;
- if ( m_isBigEndian ) SwapEndian_32(blockSize);
- elementsWritten += fwrite(&blockSize, sizeof(blockSize), 1, Resources.IndexStream);
-
- // write number of references
- int32_t numReferences = m_indexFileSummary.size();
- if ( m_isBigEndian ) SwapEndian_32(numReferences);
- elementsWritten += fwrite(&numReferences, sizeof(numReferences), 1, Resources.IndexStream);
-
- if ( elementsWritten != 7 )
- throw BamException("BamToolsIndex::WriteHeader", "could not write BTI header");
-}
-
-void BamToolsIndex::WriteReferenceEntry(const BtiReferenceEntry& refEntry) {
-
- // write number of blocks this reference
- uint32_t numBlocks = refEntry.Blocks.size();
- if ( m_isBigEndian ) SwapEndian_32(numBlocks);
- const size_t elementsWritten = fwrite(&numBlocks, sizeof(numBlocks), 1, Resources.IndexStream);
- if ( elementsWritten != 1 )
- throw BamException("BamToolsIndex::WriteReferenceEntry", "could not write number of blocks");
-
- // write actual block entries
- WriteBlocks(refEntry.Blocks);
-}
+++ /dev/null
-// ***************************************************************************
-// BamToolsIndex.h (c) 2010 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides index operations for the BamTools index format (".bti")
-// ***************************************************************************
-
-#ifndef BAMTOOLS_INDEX_FORMAT_H
-#define BAMTOOLS_INDEX_FORMAT_H
-
-// -------------
-// W A R N I N G
-// -------------
-//
-// This file is not part of the BamTools API. It exists purely as an
-// implementation detail. This header file may change from version to
-// version without notice, or even be removed.
-//
-// We mean it.
-
-#include "api/BamAux.h"
-#include "api/BamIndex.h"
-#include <map>
-#include <string>
-#include <vector>
-
-namespace BamTools {
-namespace Internal {
-
-// contains data for each 'block' in a BTI index
-struct BtiBlock {
-
- // data members
- int32_t MaxEndPosition;
- int64_t StartOffset;
- int32_t StartPosition;
-
- // ctor
- BtiBlock(const int32_t& maxEndPosition = 0,
- const int64_t& startOffset = 0,
- const int32_t& startPosition = 0)
- : MaxEndPosition(maxEndPosition)
- , StartOffset(startOffset)
- , StartPosition(startPosition)
- { }
-};
-
-// convenience typedef for describing a a list of BTI blocks on a reference
-typedef std::vector<BtiBlock> BtiBlockVector;
-
-// contains all fields necessary for building, loading, & writing
-// full BTI index data for a single reference
-struct BtiReferenceEntry {
-
- // data members
- int32_t ID;
- BtiBlockVector Blocks;
-
- // ctor
- BtiReferenceEntry(const int& id = -1)
- : ID(id)
- { }
-};
-
-// provides (persistent) summary of BtiReferenceEntry's index data
-struct BtiReferenceSummary {
-
- // data members
- int NumBlocks;
- uint64_t FirstBlockFilePosition;
-
- // ctor
- BtiReferenceSummary(void)
- : NumBlocks(0)
- , FirstBlockFilePosition(0)
- { }
-};
-
-// convenience typedef for describing a full BTI index file summary
-typedef std::vector<BtiReferenceSummary> BtiFileSummary;
-
-class BamToolsIndex : public BamIndex {
-
- // keep a list of any supported versions here
- // (might be useful later to handle any 'legacy' versions if the format changes)
- // listed for example like: BTI_1_0 = 1, BTI_1_1 = 2, BTI_1_2 = 3, BTI_2_0 = 4, and so on
- //
- // so a change introduced in BTI_1_2 may be handled from then on by:
- //
- // if ( indexVersion >= BTI_1_2 )
- // do something new
- // else
- // do the old thing
- enum Version { BTI_1_0 = 1
- , BTI_1_1
- , BTI_1_2
- , BTI_2_0
- };
-
- // ctor & dtor
- public:
- BamToolsIndex(Internal::BamReaderPrivate* reader);
- ~BamToolsIndex(void);
-
- // BamIndex implementation
- public:
- // builds index from associated BAM file & writes out to index file
- bool Create(void);
- // returns whether reference has alignments or no
- bool HasAlignments(const int& referenceID) const;
- // attempts to use index data to jump to @region, returns success/fail
- // a "successful" jump indicates no error, but not whether this region has data
- // * thus, the method sets a flag to indicate whether there are alignments
- // available after the jump position
- bool Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion);
- // loads existing data from file into memory
- bool Load(const std::string& filename);
- BamIndex::IndexType Type(void) const { return BamIndex::BAMTOOLS; }
- public:
- // returns format's file extension
- static const std::string Extension(void);
-
- // internal methods
- private:
-
- // index file ops
- void CheckMagicNumber(void);
- void CheckVersion(void);
- void CloseFile(void);
- bool IsFileOpen(void) const;
- void OpenFile(const std::string& filename, const char* mode);
- void Seek(const int64_t& position, const int& origin);
- int64_t Tell(void) const;
-
- // index-creation methods
- void ClearReferenceEntry(BtiReferenceEntry& refEntry);
- void WriteBlock(const BtiBlock& block);
- void WriteBlocks(const BtiBlockVector& blocks);
- void WriteHeader(void);
- void WriteReferenceEntry(const BtiReferenceEntry& refEntry);
-
- // random-access methods
- void GetOffset(const BamRegion& region, int64_t& offset, bool* hasAlignmentsInRegion);
- void ReadBlock(BtiBlock& block);
- void ReadBlocks(const BtiReferenceSummary& refSummary, BtiBlockVector& blocks);
- void ReadReferenceEntry(BtiReferenceEntry& refEntry);
-
- // BTI summary data methods
- void InitializeFileSummary(const int& numReferences);
- void LoadFileSummary(void);
- void LoadHeader(void);
- void LoadNumBlocks(int& numBlocks);
- void LoadNumReferences(int& numReferences);
- void LoadReferenceSummary(BtiReferenceSummary& refSummary);
- void SkipBlocks(const int& numBlocks);
-
- // data members
- private:
- bool m_isBigEndian;
- BtiFileSummary m_indexFileSummary;
- uint32_t m_blockSize;
- int32_t m_inputVersion; // Version is serialized as int
- Version m_outputVersion;
-
- struct RaiiWrapper {
- FILE* IndexStream;
- RaiiWrapper(void);
- ~RaiiWrapper(void);
- };
- RaiiWrapper Resources;
-
- // static constants
- private:
- static const uint32_t DEFAULT_BLOCK_LENGTH;
- static const std::string BTI_EXTENSION;
- static const char* const BTI_MAGIC;
- static const int SIZEOF_BLOCK;
-};
-
-} // namespace Internal
-} // namespace BamTools
-
-#endif // BAMTOOLS_INDEX_FORMAT_H
+++ /dev/null
-// ***************************************************************************
-// BamWriter_p.cpp (c) 2010 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides the basic functionality for producing BAM files
-// ***************************************************************************
-
-#include "api/BamAlignment.h"
-#include "api/BamConstants.h"
-#include "api/IBamIODevice.h"
-#include "api/internal/BamException_p.h"
-#include "api/internal/BamWriter_p.h"
-using namespace BamTools;
-using namespace BamTools::Internal;
-
-#include <cstdlib>
-#include <cstring>
-using namespace std;
-
-// ctor
-BamWriterPrivate::BamWriterPrivate(void)
- : m_isBigEndian( BamTools::SystemIsBigEndian() )
-{ }
-
-// dtor
-BamWriterPrivate::~BamWriterPrivate(void) {
- Close();
-}
-
-// calculates minimum bin for a BAM alignment interval [begin, end)
-uint32_t BamWriterPrivate::CalculateMinimumBin(const int begin, int end) const {
- --end;
- if ( (begin >> 14) == (end >> 14) ) return 4681 + (begin >> 14);
- if ( (begin >> 17) == (end >> 17) ) return 585 + (begin >> 17);
- if ( (begin >> 20) == (end >> 20) ) return 73 + (begin >> 20);
- if ( (begin >> 23) == (end >> 23) ) return 9 + (begin >> 23);
- if ( (begin >> 26) == (end >> 26) ) return 1 + (begin >> 26);
- return 0;
-}
-
-// closes the alignment archive
-void BamWriterPrivate::Close(void) {
-
- // skip if file not open
- if ( !IsOpen() ) return;
-
- // close output stream
- try {
- m_stream.Close();
- } catch ( BamException& e ) {
- m_errorString = e.what();
- }
-}
-
-// creates a cigar string from the supplied alignment
-void BamWriterPrivate::CreatePackedCigar(const vector<CigarOp>& cigarOperations, string& packedCigar) {
-
- // initialize
- const size_t numCigarOperations = cigarOperations.size();
- packedCigar.resize(numCigarOperations * Constants::BAM_SIZEOF_INT);
-
- // pack the cigar data into the string
- unsigned int* pPackedCigar = (unsigned int*)packedCigar.data();
-
- // iterate over cigar operations
- vector<CigarOp>::const_iterator coIter = cigarOperations.begin();
- vector<CigarOp>::const_iterator coEnd = cigarOperations.end();
- for ( ; coIter != coEnd; ++coIter ) {
-
- // store op in packedCigar
- uint8_t cigarOp;
- switch ( coIter->Type ) {
- case (Constants::BAM_CIGAR_MATCH_CHAR) : cigarOp = Constants::BAM_CIGAR_MATCH; break;
- case (Constants::BAM_CIGAR_INS_CHAR) : cigarOp = Constants::BAM_CIGAR_INS; break;
- case (Constants::BAM_CIGAR_DEL_CHAR) : cigarOp = Constants::BAM_CIGAR_DEL; break;
- case (Constants::BAM_CIGAR_REFSKIP_CHAR) : cigarOp = Constants::BAM_CIGAR_REFSKIP; break;
- case (Constants::BAM_CIGAR_SOFTCLIP_CHAR) : cigarOp = Constants::BAM_CIGAR_SOFTCLIP; break;
- case (Constants::BAM_CIGAR_HARDCLIP_CHAR) : cigarOp = Constants::BAM_CIGAR_HARDCLIP; break;
- case (Constants::BAM_CIGAR_PAD_CHAR) : cigarOp = Constants::BAM_CIGAR_PAD; break;
- case (Constants::BAM_CIGAR_SEQMATCH_CHAR) : cigarOp = Constants::BAM_CIGAR_SEQMATCH; break;
- case (Constants::BAM_CIGAR_MISMATCH_CHAR) : cigarOp = Constants::BAM_CIGAR_MISMATCH; break;
- default:
- const string message = string("invalid CIGAR operation type") + coIter->Type;
- throw BamException("BamWriter::CreatePackedCigar", message);
- }
-
- *pPackedCigar = coIter->Length << Constants::BAM_CIGAR_SHIFT | cigarOp;
- pPackedCigar++;
- }
-}
-
-// encodes the supplied query sequence into 4-bit notation
-void BamWriterPrivate::EncodeQuerySequence(const string& query, string& encodedQuery) {
-
- // prepare the encoded query string
- const size_t queryLength = query.size();
- const size_t encodedQueryLength = static_cast<size_t>((queryLength+1)/2);
- encodedQuery.resize(encodedQueryLength);
- char* pEncodedQuery = (char*)encodedQuery.data();
- const char* pQuery = (const char*)query.data();
-
- // walk through original query sequence, encoding its bases
- unsigned char nucleotideCode;
- bool useHighWord = true;
- while ( *pQuery ) {
- switch ( *pQuery ) {
- case (Constants::BAM_DNA_EQUAL) : nucleotideCode = Constants::BAM_BASECODE_EQUAL; break;
- case (Constants::BAM_DNA_A) : nucleotideCode = Constants::BAM_BASECODE_A; break;
- case (Constants::BAM_DNA_C) : nucleotideCode = Constants::BAM_BASECODE_C; break;
- case (Constants::BAM_DNA_M) : nucleotideCode = Constants::BAM_BASECODE_M; break;
- case (Constants::BAM_DNA_G) : nucleotideCode = Constants::BAM_BASECODE_G; break;
- case (Constants::BAM_DNA_R) : nucleotideCode = Constants::BAM_BASECODE_R; break;
- case (Constants::BAM_DNA_S) : nucleotideCode = Constants::BAM_BASECODE_S; break;
- case (Constants::BAM_DNA_V) : nucleotideCode = Constants::BAM_BASECODE_V; break;
- case (Constants::BAM_DNA_T) : nucleotideCode = Constants::BAM_BASECODE_T; break;
- case (Constants::BAM_DNA_W) : nucleotideCode = Constants::BAM_BASECODE_W; break;
- case (Constants::BAM_DNA_Y) : nucleotideCode = Constants::BAM_BASECODE_Y; break;
- case (Constants::BAM_DNA_H) : nucleotideCode = Constants::BAM_BASECODE_H; break;
- case (Constants::BAM_DNA_K) : nucleotideCode = Constants::BAM_BASECODE_K; break;
- case (Constants::BAM_DNA_D) : nucleotideCode = Constants::BAM_BASECODE_D; break;
- case (Constants::BAM_DNA_B) : nucleotideCode = Constants::BAM_BASECODE_B; break;
- case (Constants::BAM_DNA_N) : nucleotideCode = Constants::BAM_BASECODE_N; break;
- default:
- const string message = string("invalid base: ") + *pQuery;
- throw BamException("BamWriter::EncodeQuerySequence", message);
- }
-
- // pack the nucleotide code
- if ( useHighWord ) {
- *pEncodedQuery = nucleotideCode << 4;
- useHighWord = false;
- } else {
- *pEncodedQuery |= nucleotideCode;
- ++pEncodedQuery;
- useHighWord = true;
- }
-
- // increment the query position
- ++pQuery;
- }
-}
-
-// returns a description of the last error that occurred
-std::string BamWriterPrivate::GetErrorString(void) const {
- return m_errorString;
-}
-
-// returns whether BAM file is open for writing or not
-bool BamWriterPrivate::IsOpen(void) const {
- return m_stream.IsOpen();
-}
-
-// opens the alignment archive
-bool BamWriterPrivate::Open(const string& filename,
- const string& samHeaderText,
- const RefVector& referenceSequences)
-{
- try {
-
- // open the BGZF file for writing
- m_stream.Open(filename, IBamIODevice::WriteOnly);
-
- // write BAM file 'metadata' components
- WriteMagicNumber();
- WriteSamHeaderText(samHeaderText);
- WriteReferences(referenceSequences);
-
- // return success
- return true;
-
- } catch ( BamException& e ) {
- m_errorString = e.what();
- return false;
- }
-}
-
-// saves the alignment to the alignment archive
-bool BamWriterPrivate::SaveAlignment(const BamAlignment& al) {
-
- try {
-
- // if BamAlignment contains only the core data and a raw char data buffer
- // (as a result of BamReader::GetNextAlignmentCore())
- if ( al.SupportData.HasCoreOnly )
- WriteCoreAlignment(al);
-
- // otherwise, BamAlignment should contain character in the standard fields: Name, QueryBases, etc
- // (resulting from BamReader::GetNextAlignment() *OR* being generated directly by client code)
- else WriteAlignment(al);
-
- // if we get here, everything OK
- return true;
-
- } catch ( BamException& e ) {
- m_errorString = e.what();
- return false;
- }
-}
-
-void BamWriterPrivate::SetWriteCompressed(bool ok) {
- // modifying compression is not allowed if BAM file is open
- if ( !IsOpen() )
- m_stream.SetWriteCompressed(ok);
-}
-
-void BamWriterPrivate::WriteAlignment(const BamAlignment& al) {
-
- // calculate char lengths
- const unsigned int nameLength = al.Name.size() + 1;
- const unsigned int numCigarOperations = al.CigarData.size();
- const unsigned int queryLength = al.QueryBases.size();
- const unsigned int tagDataLength = al.TagData.size();
-
- // no way to tell if alignment's bin is already defined (there is no default, invalid value)
- // so we'll go ahead calculate its bin ID before storing
- const uint32_t alignmentBin = CalculateMinimumBin(al.Position, al.GetEndPosition());
-
- // create our packed cigar string
- string packedCigar;
- CreatePackedCigar(al.CigarData, packedCigar);
- const unsigned int packedCigarLength = packedCigar.size();
-
- // encode the query
- string encodedQuery;
- EncodeQuerySequence(al.QueryBases, encodedQuery);
- const unsigned int encodedQueryLength = encodedQuery.size();
-
- // write the block size
- const unsigned int dataBlockSize = nameLength +
- packedCigarLength +
- encodedQueryLength +
- queryLength +
- tagDataLength;
- unsigned int blockSize = Constants::BAM_CORE_SIZE + dataBlockSize;
- if ( m_isBigEndian ) BamTools::SwapEndian_32(blockSize);
- m_stream.Write((char*)&blockSize, Constants::BAM_SIZEOF_INT);
-
- // assign the BAM core data
- uint32_t buffer[Constants::BAM_CORE_BUFFER_SIZE];
- buffer[0] = al.RefID;
- buffer[1] = al.Position;
- buffer[2] = (alignmentBin << 16) | (al.MapQuality << 8) | nameLength;
- buffer[3] = (al.AlignmentFlag << 16) | numCigarOperations;
- buffer[4] = queryLength;
- buffer[5] = al.MateRefID;
- buffer[6] = al.MatePosition;
- buffer[7] = al.InsertSize;
-
- // swap BAM core endian-ness, if necessary
- if ( m_isBigEndian ) {
- for ( int i = 0; i < 8; ++i )
- BamTools::SwapEndian_32(buffer[i]);
- }
-
- // write the BAM core
- m_stream.Write((char*)&buffer, Constants::BAM_CORE_SIZE);
-
- // write the query name
- m_stream.Write(al.Name.c_str(), nameLength);
-
- // write the packed cigar
- if ( m_isBigEndian ) {
- char* cigarData = new char[packedCigarLength]();
- memcpy(cigarData, packedCigar.data(), packedCigarLength);
- if ( m_isBigEndian ) {
- for ( size_t i = 0; i < packedCigarLength; ++i )
- BamTools::SwapEndian_32p(&cigarData[i]);
- }
- m_stream.Write(cigarData, packedCigarLength);
- delete[] cigarData; // TODO: cleanup on Write exception thrown?
- }
- else
- m_stream.Write(packedCigar.data(), packedCigarLength);
-
- // write the encoded query sequence
- m_stream.Write(encodedQuery.data(), encodedQueryLength);
-
- // write the base qualities
- char* pBaseQualities = (char*)al.Qualities.data();
- for ( size_t i = 0; i < queryLength; ++i )
- pBaseQualities[i] -= 33; // FASTQ conversion
- m_stream.Write(pBaseQualities, queryLength);
-
- // write the read group tag
- if ( m_isBigEndian ) {
-
- char* tagData = new char[tagDataLength]();
- memcpy(tagData, al.TagData.data(), tagDataLength);
-
- size_t i = 0;
- while ( i < tagDataLength ) {
-
- i += Constants::BAM_TAG_TAGSIZE; // skip tag chars (e.g. "RG", "NM", etc.)
- const char type = tagData[i]; // get tag type at position i
- ++i;
-
- switch ( type ) {
-
- case(Constants::BAM_TAG_TYPE_ASCII) :
- case(Constants::BAM_TAG_TYPE_INT8) :
- case(Constants::BAM_TAG_TYPE_UINT8) :
- ++i;
- break;
-
- case(Constants::BAM_TAG_TYPE_INT16) :
- case(Constants::BAM_TAG_TYPE_UINT16) :
- BamTools::SwapEndian_16p(&tagData[i]);
- i += sizeof(uint16_t);
- break;
-
- case(Constants::BAM_TAG_TYPE_FLOAT) :
- case(Constants::BAM_TAG_TYPE_INT32) :
- case(Constants::BAM_TAG_TYPE_UINT32) :
- BamTools::SwapEndian_32p(&tagData[i]);
- i += sizeof(uint32_t);
- break;
-
- case(Constants::BAM_TAG_TYPE_HEX) :
- case(Constants::BAM_TAG_TYPE_STRING) :
- // no endian swapping necessary for hex-string/string data
- while ( tagData[i] )
- ++i;
- // increment one more for null terminator
- ++i;
- break;
-
- case(Constants::BAM_TAG_TYPE_ARRAY) :
-
- {
- // read array type
- const char arrayType = tagData[i];
- ++i;
-
- // swap endian-ness of number of elements in place, then retrieve for loop
- BamTools::SwapEndian_32p(&tagData[i]);
- int32_t numElements;
- memcpy(&numElements, &tagData[i], sizeof(uint32_t));
- i += sizeof(uint32_t);
-
- // swap endian-ness of array elements
- for ( int j = 0; j < numElements; ++j ) {
- switch (arrayType) {
- case (Constants::BAM_TAG_TYPE_INT8) :
- case (Constants::BAM_TAG_TYPE_UINT8) :
- // no endian-swapping necessary
- ++i;
- break;
- case (Constants::BAM_TAG_TYPE_INT16) :
- case (Constants::BAM_TAG_TYPE_UINT16) :
- BamTools::SwapEndian_16p(&tagData[i]);
- i += sizeof(uint16_t);
- break;
- case (Constants::BAM_TAG_TYPE_FLOAT) :
- case (Constants::BAM_TAG_TYPE_INT32) :
- case (Constants::BAM_TAG_TYPE_UINT32) :
- BamTools::SwapEndian_32p(&tagData[i]);
- i += sizeof(uint32_t);
- break;
- default:
- delete[] tagData;
- const string message = string("invalid binary array type: ") + arrayType;
- throw BamException("BamWriter::SaveAlignment", message);
- }
- }
-
- break;
- }
-
- default :
- delete[] tagData;
- const string message = string("invalid tag type: ") + type;
- throw BamException("BamWriter::SaveAlignment", message);
- }
- }
-
- m_stream.Write(tagData, tagDataLength);
- delete[] tagData; // TODO: cleanup on Write exception thrown?
- }
- else
- m_stream.Write(al.TagData.data(), tagDataLength);
-}
-
-void BamWriterPrivate::WriteCoreAlignment(const BamAlignment& al) {
-
- // write the block size
- unsigned int blockSize = al.SupportData.BlockLength;
- if ( m_isBigEndian ) BamTools::SwapEndian_32(blockSize);
- m_stream.Write((char*)&blockSize, Constants::BAM_SIZEOF_INT);
-
- // re-calculate bin (in case BamAlignment's position has been previously modified)
- const uint32_t alignmentBin = CalculateMinimumBin(al.Position, al.GetEndPosition());
-
- // assign the BAM core data
- uint32_t buffer[Constants::BAM_CORE_BUFFER_SIZE];
- buffer[0] = al.RefID;
- buffer[1] = al.Position;
- buffer[2] = (alignmentBin << 16) | (al.MapQuality << 8) | al.SupportData.QueryNameLength;
- buffer[3] = (al.AlignmentFlag << 16) | al.SupportData.NumCigarOperations;
- buffer[4] = al.SupportData.QuerySequenceLength;
- buffer[5] = al.MateRefID;
- buffer[6] = al.MatePosition;
- buffer[7] = al.InsertSize;
-
- // swap BAM core endian-ness, if necessary
- if ( m_isBigEndian ) {
- for ( int i = 0; i < 8; ++i )
- BamTools::SwapEndian_32(buffer[i]);
- }
-
- // write the BAM core
- m_stream.Write((char*)&buffer, Constants::BAM_CORE_SIZE);
-
- // write the raw char data
- m_stream.Write((char*)al.SupportData.AllCharData.data(),
- al.SupportData.BlockLength-Constants::BAM_CORE_SIZE);
-}
-
-void BamWriterPrivate::WriteMagicNumber(void) {
- // write BAM file 'magic number'
- m_stream.Write(Constants::BAM_HEADER_MAGIC, Constants::BAM_HEADER_MAGIC_LENGTH);
-}
-
-void BamWriterPrivate::WriteReferences(const BamTools::RefVector& referenceSequences) {
-
- // write the number of reference sequences
- uint32_t numReferenceSequences = referenceSequences.size();
- if ( m_isBigEndian ) BamTools::SwapEndian_32(numReferenceSequences);
- m_stream.Write((char*)&numReferenceSequences, Constants::BAM_SIZEOF_INT);
-
- // foreach reference sequence
- RefVector::const_iterator rsIter = referenceSequences.begin();
- RefVector::const_iterator rsEnd = referenceSequences.end();
- for ( ; rsIter != rsEnd; ++rsIter ) {
-
- // write the reference sequence name length
- uint32_t referenceSequenceNameLen = rsIter->RefName.size() + 1;
- if ( m_isBigEndian ) BamTools::SwapEndian_32(referenceSequenceNameLen);
- m_stream.Write((char*)&referenceSequenceNameLen, Constants::BAM_SIZEOF_INT);
-
- // write the reference sequence name
- m_stream.Write(rsIter->RefName.c_str(), referenceSequenceNameLen);
-
- // write the reference sequence length
- int32_t referenceLength = rsIter->RefLength;
- if ( m_isBigEndian ) BamTools::SwapEndian_32(referenceLength);
- m_stream.Write((char*)&referenceLength, Constants::BAM_SIZEOF_INT);
- }
-}
-
-void BamWriterPrivate::WriteSamHeaderText(const std::string& samHeaderText) {
-
- // write the SAM header text length
- uint32_t samHeaderLen = samHeaderText.size();
- if ( m_isBigEndian ) BamTools::SwapEndian_32(samHeaderLen);
- m_stream.Write((char*)&samHeaderLen, Constants::BAM_SIZEOF_INT);
-
- // write the SAM header text
- if ( samHeaderLen > 0 )
- m_stream.Write(samHeaderText.data(), samHeaderLen);
-}
+++ /dev/null
-// ***************************************************************************
-// BamWriter_p.h (c) 2010 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides the basic functionality for producing BAM files
-// ***************************************************************************
-
-#ifndef BAMWRITER_P_H
-#define BAMWRITER_P_H
-
-// -------------
-// W A R N I N G
-// -------------
-//
-// This file is not part of the BamTools API. It exists purely as an
-// implementation detail. This header file may change from version to
-// version without notice, or even be removed.
-//
-// We mean it.
-
-#include "api/BamAux.h"
-#include "api/internal/BgzfStream_p.h"
-#include <string>
-#include <vector>
-
-namespace BamTools {
-
-class BamAlignment;
-
-namespace Internal {
-
-class BamWriterPrivate {
-
- // ctor & dtor
- public:
- BamWriterPrivate(void);
- ~BamWriterPrivate(void);
-
- // interface methods
- public:
- void Close(void);
- std::string GetErrorString(void) const;
- bool IsOpen(void) const;
- bool Open(const std::string& filename,
- const std::string& samHeaderText,
- const BamTools::RefVector& referenceSequences);
- bool SaveAlignment(const BamAlignment& al);
- void SetWriteCompressed(bool ok);
-
- // 'internal' methods
- public:
- uint32_t CalculateMinimumBin(const int begin, int end) const;
- void CreatePackedCigar(const std::vector<BamTools::CigarOp>& cigarOperations, std::string& packedCigar);
- void EncodeQuerySequence(const std::string& query, std::string& encodedQuery);
- void WriteAlignment(const BamAlignment& al);
- void WriteCoreAlignment(const BamAlignment& al);
- void WriteMagicNumber(void);
- void WriteReferences(const BamTools::RefVector& referenceSequences);
- void WriteSamHeaderText(const std::string& samHeaderText);
-
- // data members
- private:
- BgzfStream m_stream;
- bool m_isBigEndian;
- std::string m_errorString;
-};
-
-} // namespace Internal
-} // namespace BamTools
-
-#endif // BAMWRITER_P_H
+++ /dev/null
-// ***************************************************************************
-// BgzfStream_p.cpp (c) 2011 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 11 October 2011(DB)
-// ---------------------------------------------------------------------------
-// Based on BGZF routines developed at the Broad Institute.
-// Provides the basic functionality for reading & writing BGZF files
-// Replaces the old BGZF.* files to avoid clashing with other toolkits
-// ***************************************************************************
-
-#include "api/BamAux.h"
-#include "api/BamConstants.h"
-#include "api/internal/BamDeviceFactory_p.h"
-#include "api/internal/BamException_p.h"
-#include "api/internal/BgzfStream_p.h"
-using namespace BamTools;
-using namespace BamTools::Internal;
-
-#include "zlib.h"
-
-#include <cstring>
-#include <algorithm>
-#include <iostream>
-#include <sstream>
-using namespace std;
-
-// ----------------------------
-// RaiiWrapper implementation
-// ----------------------------
-
-BgzfStream::RaiiWrapper::RaiiWrapper(void) {
- CompressedBlock = new char[Constants::BGZF_MAX_BLOCK_SIZE];
- UncompressedBlock = new char[Constants::BGZF_DEFAULT_BLOCK_SIZE];
-}
-
-BgzfStream::RaiiWrapper::~RaiiWrapper(void) {
-
- // clean up buffers
- delete[] CompressedBlock;
- delete[] UncompressedBlock;
- CompressedBlock = 0;
- UncompressedBlock = 0;
-}
-
-// ---------------------------
-// BgzfStream implementation
-// ---------------------------
-
-// constructor
-BgzfStream::BgzfStream(void)
- : m_blockLength(0)
- , m_blockOffset(0)
- , m_blockAddress(0)
- , m_isWriteCompressed(true)
- , m_device(0)
-{ }
-
-// destructor
-BgzfStream::~BgzfStream(void) {
- Close();
-}
-
-// checks BGZF block header
-bool BgzfStream::CheckBlockHeader(char* header) {
- return (header[0] == Constants::GZIP_ID1 &&
- header[1] == Constants::GZIP_ID2 &&
- header[2] == Z_DEFLATED &&
- (header[3] & Constants::FLG_FEXTRA) != 0 &&
- BamTools::UnpackUnsignedShort(&header[10]) == Constants::BGZF_XLEN &&
- header[12] == Constants::BGZF_ID1 &&
- header[13] == Constants::BGZF_ID2 &&
- BamTools::UnpackUnsignedShort(&header[14]) == Constants::BGZF_LEN );
-}
-
-// closes BGZF file
-void BgzfStream::Close(void) {
-
- // skip if no device open
- if ( m_device == 0 ) return;
-
- // if writing to file, flush the current BGZF block,
- // then write an empty block (as EOF marker)
- if ( m_device->IsOpen() && (m_device->Mode() == IBamIODevice::WriteOnly) ) {
- FlushBlock();
- const size_t blockLength = DeflateBlock();
- m_device->Write(Resources.CompressedBlock, blockLength);
- }
-
- // close device
- m_device->Close();
- delete m_device;
- m_device = 0;
-
- // reset state
- m_blockLength = 0;
- m_blockOffset = 0;
- m_blockAddress = 0;
- m_isWriteCompressed = true;
-}
-
-// compresses the current block
-size_t BgzfStream::DeflateBlock(void) {
-
- // initialize the gzip header
- char* buffer = Resources.CompressedBlock;
- memset(buffer, 0, 18);
- buffer[0] = Constants::GZIP_ID1;
- buffer[1] = Constants::GZIP_ID2;
- buffer[2] = Constants::CM_DEFLATE;
- buffer[3] = Constants::FLG_FEXTRA;
- buffer[9] = Constants::OS_UNKNOWN;
- buffer[10] = Constants::BGZF_XLEN;
- buffer[12] = Constants::BGZF_ID1;
- buffer[13] = Constants::BGZF_ID2;
- buffer[14] = Constants::BGZF_LEN;
-
- // set compression level
- const int compressionLevel = ( m_isWriteCompressed ? Z_DEFAULT_COMPRESSION : 0 );
-
- // loop to retry for blocks that do not compress enough
- int inputLength = m_blockOffset;
- size_t compressedLength = 0;
- const unsigned int bufferSize = Constants::BGZF_MAX_BLOCK_SIZE;
-
- while ( true ) {
-
- // initialize zstream values
- z_stream zs;
- zs.zalloc = NULL;
- zs.zfree = NULL;
- zs.next_in = (Bytef*)Resources.UncompressedBlock;
- zs.avail_in = inputLength;
- zs.next_out = (Bytef*)&buffer[Constants::BGZF_BLOCK_HEADER_LENGTH];
- zs.avail_out = bufferSize -
- Constants::BGZF_BLOCK_HEADER_LENGTH -
- Constants::BGZF_BLOCK_FOOTER_LENGTH;
-
- // initialize the zlib compression algorithm
- int status = deflateInit2(&zs,
- compressionLevel,
- Z_DEFLATED,
- Constants::GZIP_WINDOW_BITS,
- Constants::Z_DEFAULT_MEM_LEVEL,
- Z_DEFAULT_STRATEGY);
- if ( status != Z_OK )
- throw BamException("BgzfStream::DeflateBlock", "zlib deflateInit2 failed");
-
- // compress the data
- status = deflate(&zs, Z_FINISH);
-
- // if not at stream end
- if ( status != Z_STREAM_END ) {
-
- deflateEnd(&zs);
-
- // there was not enough space available in buffer
- // try to reduce the input length & re-start loop
- if ( status == Z_OK ) {
- inputLength -= 1024;
- if ( inputLength < 0 )
- throw BamException("BgzfStream::DeflateBlock", "input reduction failed");
- continue;
- }
-
- throw BamException("BgzfStream::DeflateBlock", "zlib deflate failed");
- }
-
- // finalize the compression routine
- status = deflateEnd(&zs);
- if ( status != Z_OK )
- throw BamException("BgzfStream::DeflateBlock", "zlib deflateEnd failed");
-
- // update compressedLength
- compressedLength = zs.total_out +
- Constants::BGZF_BLOCK_HEADER_LENGTH +
- Constants::BGZF_BLOCK_FOOTER_LENGTH;
- if ( compressedLength > Constants::BGZF_MAX_BLOCK_SIZE )
- throw BamException("BgzfStream::DeflateBlock", "deflate overflow");
-
- // quit while loop
- break;
- }
-
- // store the compressed length
- BamTools::PackUnsignedShort(&buffer[16], static_cast<uint16_t>(compressedLength - 1));
-
- // store the CRC32 checksum
- uint32_t crc = crc32(0, NULL, 0);
- crc = crc32(crc, (Bytef*)Resources.UncompressedBlock, inputLength);
- BamTools::PackUnsignedInt(&buffer[compressedLength - 8], crc);
- BamTools::PackUnsignedInt(&buffer[compressedLength - 4], inputLength);
-
- // ensure that we have less than a block of data left
- int remaining = m_blockOffset - inputLength;
- if ( remaining > 0 ) {
- if ( remaining > inputLength )
- throw BamException("BgzfStream::DeflateBlock", "after deflate, remainder too large");
- memcpy(Resources.UncompressedBlock, Resources.UncompressedBlock + inputLength, remaining);
- }
-
- // update block data
- m_blockOffset = remaining;
-
- // return result
- return compressedLength;
-}
-
-// flushes the data in the BGZF block
-void BgzfStream::FlushBlock(void) {
-
- BT_ASSERT_X( m_device, "BgzfStream::FlushBlock() - attempting to flush to null device" );
-
- // flush all of the remaining blocks
- while ( m_blockOffset > 0 ) {
-
- // compress the data block
- const size_t blockLength = DeflateBlock();
-
- // flush the data to our output device
- const size_t numBytesWritten = m_device->Write(Resources.CompressedBlock, blockLength);
- if ( numBytesWritten != blockLength ) {
- stringstream s("");
- s << "expected to write " << blockLength
- << " bytes during flushing, but wrote " << numBytesWritten;
- throw BamException("BgzfStream::FlushBlock", s.str());
- }
-
- // update block data
- m_blockAddress += blockLength;
- }
-}
-
-// decompresses the current block
-size_t BgzfStream::InflateBlock(const size_t& blockLength) {
-
- // setup zlib stream object
- z_stream zs;
- zs.zalloc = NULL;
- zs.zfree = NULL;
- zs.next_in = (Bytef*)Resources.CompressedBlock + 18;
- zs.avail_in = blockLength - 16;
- zs.next_out = (Bytef*)Resources.UncompressedBlock;
- zs.avail_out = Constants::BGZF_DEFAULT_BLOCK_SIZE;
-
- // initialize
- int status = inflateInit2(&zs, Constants::GZIP_WINDOW_BITS);
- if ( status != Z_OK )
- throw BamException("BgzfStream::InflateBlock", "zlib inflateInit failed");
-
- // decompress
- status = inflate(&zs, Z_FINISH);
- if ( status != Z_STREAM_END ) {
- inflateEnd(&zs);
- throw BamException("BgzfStream::InflateBlock", "zlib inflate failed");
- }
-
- // finalize
- status = inflateEnd(&zs);
- if ( status != Z_OK ) {
- inflateEnd(&zs);
- throw BamException("BgzfStream::InflateBlock", "zlib inflateEnd failed");
- }
-
- // return result
- return zs.total_out;
-}
-
-bool BgzfStream::IsOpen(void) const {
- if ( m_device == 0 )
- return false;
- return m_device->IsOpen();
-}
-
-void BgzfStream::Open(const string& filename, const IBamIODevice::OpenMode mode) {
-
- // close current device if necessary
- Close();
- BT_ASSERT_X( (m_device == 0), "BgzfStream::Open() - unable to properly close previous IO device" );
-
- // retrieve new IO device depending on filename
- m_device = BamDeviceFactory::CreateDevice(filename);
- BT_ASSERT_X( m_device, "BgzfStream::Open() - unable to create IO device from filename" );
-
- // if device fails to open
- if ( !m_device->Open(mode) ) {
- const string deviceError = m_device->GetErrorString();
- const string message = string("could not open BGZF stream: \n\t") + deviceError;
- throw BamException("BgzfStream::Open", message);
- }
-}
-
-// reads BGZF data into a byte buffer
-size_t BgzfStream::Read(char* data, const size_t dataLength) {
-
- if ( dataLength == 0 )
- return 0;
-
- // if stream not open for reading
- BT_ASSERT_X( m_device, "BgzfStream::Read() - trying to read from null device");
- if ( !m_device->IsOpen() || (m_device->Mode() != IBamIODevice::ReadOnly) )
- return 0;
-
- // read blocks as needed until desired data length is retrieved
- char* output = data;
- size_t numBytesRead = 0;
- while ( numBytesRead < dataLength ) {
-
- // determine bytes available in current block
- int bytesAvailable = m_blockLength - m_blockOffset;
-
- // read (and decompress) next block if needed
- if ( bytesAvailable <= 0 ) {
- ReadBlock();
- bytesAvailable = m_blockLength - m_blockOffset;
- if ( bytesAvailable <= 0 )
- break;
- }
-
- // copy data from uncompressed source buffer into data destination buffer
- const size_t copyLength = min( (dataLength-numBytesRead), (size_t)bytesAvailable );
- memcpy(output, Resources.UncompressedBlock + m_blockOffset, copyLength);
-
- // update counters
- m_blockOffset += copyLength;
- output += copyLength;
- numBytesRead += copyLength;
- }
-
- // update block data
- if ( m_blockOffset == m_blockLength ) {
- m_blockAddress = m_device->Tell();
- m_blockOffset = 0;
- m_blockLength = 0;
-
- }
-
- // return actual number of bytes read
- return numBytesRead;
-}
-
-// reads a BGZF block
-void BgzfStream::ReadBlock(void) {
-
- BT_ASSERT_X( m_device, "BgzfStream::ReadBlock() - trying to read from null IO device");
-
- // store block's starting address
- int64_t blockAddress = m_device->Tell();
-
- // read block header from file
- char header[Constants::BGZF_BLOCK_HEADER_LENGTH];
- size_t numBytesRead = m_device->Read(header, Constants::BGZF_BLOCK_HEADER_LENGTH);
-
- // if block header empty
- if ( numBytesRead == 0 ) {
- m_blockLength = 0;
- return;
- }
-
- // if block header invalid size
- if ( numBytesRead != Constants::BGZF_BLOCK_HEADER_LENGTH )
- throw BamException("BgzfStream::ReadBlock", "invalid block header size");
-
- // validate block header contents
- if ( !BgzfStream::CheckBlockHeader(header) )
- throw BamException("BgzfStream::ReadBlock", "invalid block header contents");
-
- // copy header contents to compressed buffer
- const size_t blockLength = BamTools::UnpackUnsignedShort(&header[16]) + 1;
- memcpy(Resources.CompressedBlock, header, Constants::BGZF_BLOCK_HEADER_LENGTH);
-
- // read remainder of block
- const size_t remaining = blockLength - Constants::BGZF_BLOCK_HEADER_LENGTH;
- numBytesRead = m_device->Read(&Resources.CompressedBlock[Constants::BGZF_BLOCK_HEADER_LENGTH], remaining);
- if ( numBytesRead != remaining )
- throw BamException("BgzfStream::ReadBlock", "could not read data from block");
-
- // decompress block data
- numBytesRead = InflateBlock(blockLength);
-
- // update block data
- if ( m_blockLength != 0 )
- m_blockOffset = 0;
- m_blockAddress = blockAddress;
- m_blockLength = numBytesRead;
-}
-
-// seek to position in BGZF file
-void BgzfStream::Seek(const int64_t& position) {
-
- BT_ASSERT_X( m_device, "BgzfStream::Seek() - trying to seek on null IO device");
-
- // skip if device is not open
- if ( !IsOpen() ) return;
-
- // determine adjusted offset & address
- int blockOffset = (position & 0xFFFF);
- int64_t blockAddress = (position >> 16) & 0xFFFFFFFFFFFFLL;
-
- // attempt seek in file
- if ( m_device->IsRandomAccess() && m_device->Seek(blockAddress) ) {
-
- // update block data & return success
- m_blockLength = 0;
- m_blockAddress = blockAddress;
- m_blockOffset = blockOffset;
- }
- else {
- stringstream s("");
- s << "unable to seek to position: " << position;
- throw BamException("BgzfStream::Seek", s.str());
- }
-}
-
-void BgzfStream::SetWriteCompressed(bool ok) {
- m_isWriteCompressed = ok;
-}
-
-// get file position in BGZF file
-int64_t BgzfStream::Tell(void) const {
- if ( !IsOpen() )
- return 0;
- return ( (m_blockAddress << 16) | (m_blockOffset & 0xFFFF) );
-}
-
-// writes the supplied data into the BGZF buffer
-size_t BgzfStream::Write(const char* data, const size_t dataLength) {
-
- BT_ASSERT_X( m_device, "BgzfStream::Write() - trying to write to null IO device");
- BT_ASSERT_X( (m_device->Mode() == IBamIODevice::WriteOnly),
- "BgzfStream::Write() - trying to write to non-writable IO device");
-
- // skip if file not open for writing
- if ( !IsOpen() )
- return 0;
-
- // write blocks as needed til all data is written
- size_t numBytesWritten = 0;
- const char* input = data;
- const size_t blockLength = Constants::BGZF_DEFAULT_BLOCK_SIZE;
- while ( numBytesWritten < dataLength ) {
-
- // copy data contents to uncompressed output buffer
- unsigned int copyLength = min(blockLength - m_blockOffset, dataLength - numBytesWritten);
- char* buffer = Resources.UncompressedBlock;
- memcpy(buffer + m_blockOffset, input, copyLength);
-
- // update counter
- m_blockOffset += copyLength;
- input += copyLength;
- numBytesWritten += copyLength;
-
- // flush (& compress) output buffer when full
- if ( m_blockOffset == blockLength )
- FlushBlock();
- }
-
- // return actual number of bytes written
- return numBytesWritten;
-}
+++ /dev/null
-// ***************************************************************************
-// BgzfStream_p.h (c) 2011 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011(DB)
-// ---------------------------------------------------------------------------
-// Based on BGZF routines developed at the Broad Institute.
-// Provides the basic functionality for reading & writing BGZF files
-// Replaces the old BGZF.* files to avoid clashing with other toolkits
-// ***************************************************************************
-
-#ifndef BGZFSTREAM_P_H
-#define BGZFSTREAM_P_H
-
-// -------------
-// W A R N I N G
-// -------------
-//
-// This file is not part of the BamTools API. It exists purely as an
-// implementation detail. This header file may change from version to version
-// without notice, or even be removed.
-//
-// We mean it.
-
-#include "api/api_global.h"
-#include "api/IBamIODevice.h"
-#include <string>
-
-namespace BamTools {
-namespace Internal {
-
-class BgzfStream {
-
- // constructor & destructor
- public:
- BgzfStream(void);
- ~BgzfStream(void);
-
- // main interface methods
- public:
- // closes BGZF file
- void Close(void);
- // returns true if BgzfStream open for IO
- bool IsOpen(void) const;
- // opens the BGZF file
- void Open(const std::string& filename, const IBamIODevice::OpenMode mode);
- // reads BGZF data into a byte buffer
- size_t Read(char* data, const size_t dataLength);
- // seek to position in BGZF file
- void Seek(const int64_t& position);
- // sets IO device (closes previous, if any, but does not attempt to open)
- void SetIODevice(IBamIODevice* device);
- // enable/disable compressed output
- void SetWriteCompressed(bool ok);
- // get file position in BGZF file
- int64_t Tell(void) const;
- // writes the supplied data into the BGZF buffer
- size_t Write(const char* data, const size_t dataLength);
-
- // internal methods
- private:
- // compresses the current block
- size_t DeflateBlock(void);
- // flushes the data in the BGZF block
- void FlushBlock(void);
- // de-compresses the current block
- size_t InflateBlock(const size_t& blockLength);
- // reads a BGZF block
- void ReadBlock(void);
-
- // static 'utility' methods
- public:
- // checks BGZF block header
- static bool CheckBlockHeader(char* header);
-
- // data members
- public:
- unsigned int m_blockLength;
- unsigned int m_blockOffset;
- uint64_t m_blockAddress;
-
- bool m_isWriteCompressed;
- IBamIODevice* m_device;
-
- struct RaiiWrapper {
- RaiiWrapper(void);
- ~RaiiWrapper(void);
- char* UncompressedBlock;
- char* CompressedBlock;
- };
- RaiiWrapper Resources;
-};
-
-} // namespace Internal
-} // namespace BamTools
-
-#endif // BGZFSTREAM_P_H
--- /dev/null
+# ==========================
+# BamTools CMakeLists.txt
+# (c) 2011 Derek Barnett
+#
+# src/api/internal
+# ==========================
+
+set ( InternalDir "internal" )
+
+add_subdirectory ( bam )
+add_subdirectory ( index )
+add_subdirectory ( io )
+add_subdirectory ( sam )
+add_subdirectory ( utils )
+
+set ( InternalSources
+ ${InternalBamSources}
+ ${InternalIndexSources}
+ ${InternalIOSources}
+ ${InternalSamSources}
+ ${InternalUtilsSources}
+
+ PARENT_SCOPE # <-- leave this last
+ )
+
+++ /dev/null
-// ***************************************************************************
-// ILocalIODevice_p.cpp (c) 2011 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides shared behavior for files & pipes
-// ***************************************************************************
-
-#include "api/internal/ILocalIODevice_p.h"
-using namespace BamTools;
-using namespace BamTools::Internal;
-
-#include <cstdio>
-using namespace std;
-
-ILocalIODevice::ILocalIODevice(void)
- : IBamIODevice()
- , m_stream(0)
-{ }
-
-ILocalIODevice::~ILocalIODevice(void) {
- Close();
-}
-
-void ILocalIODevice::Close(void) {
-
- // skip if not open
- if ( !IsOpen() )
- return;
-
- // flush & close FILE*
- fflush(m_stream);
- fclose(m_stream);
- m_stream = 0;
-
- // reset other device state
- m_mode = IBamIODevice::NotOpen;
-}
-
-size_t ILocalIODevice::Read(char* data, const unsigned int numBytes) {
- BT_ASSERT_X( m_stream, "ILocalIODevice::Read: trying to read from null stream" );
- BT_ASSERT_X( (m_mode == IBamIODevice::ReadOnly), "ILocalIODevice::Read: device not in read-only mode");
- return fread(data, sizeof(char), numBytes, m_stream);
-}
-
-int64_t ILocalIODevice::Tell(void) const {
- BT_ASSERT_X( m_stream, "ILocalIODevice::Tell: trying to get file position fromnull stream" );
- return ftell64(m_stream);
-}
-
-size_t ILocalIODevice::Write(const char* data, const unsigned int numBytes) {
- BT_ASSERT_X( m_stream, "ILocalIODevice::Write: tryint to write to null stream" );
- BT_ASSERT_X( (m_mode == IBamIODevice::WriteOnly), "ILocalIODevice::Write: device not in write-only mode" );
- return fwrite(data, sizeof(char), numBytes, m_stream);
-}
+++ /dev/null
-// ***************************************************************************
-// ILocalIODevice_p.h (c) 2011 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides shared behavior for files & pipes
-// ***************************************************************************
-
-#ifndef ILOCALIODEVICE_P_H
-#define ILOCALIODEVICE_P_H
-
-// -------------
-// W A R N I N G
-// -------------
-//
-// This file is not part of the BamTools API. It exists purely as an
-// implementation detail. This header file may change from version to version
-// without notice, or even be removed.
-//
-// We mean it.
-
-#include "api/IBamIODevice.h"
-
-namespace BamTools {
-namespace Internal {
-
-class ILocalIODevice : public IBamIODevice {
-
- // ctor & dtor
- public:
- ILocalIODevice(void);
- virtual ~ILocalIODevice(void);
-
- // IBamIODevice implementation
- public:
- virtual void Close(void);
- virtual size_t Read(char* data, const unsigned int numBytes);
- virtual int64_t Tell(void) const;
- virtual size_t Write(const char* data, const unsigned int numBytes);
-
- // data members
- protected:
- FILE* m_stream;
-};
-
-} // namespace Internal
-} // namespace BamTools
-
-#endif // ILOCALIODEVICE_P_H
+++ /dev/null
-// ***************************************************************************
-// SamFormatParser.cpp (c) 2010 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides functionality for parsing SAM header text into SamHeader object
-// ***************************************************************************
-
-#include "api/SamConstants.h"
-#include "api/SamHeader.h"
-#include "api/internal/BamException_p.h"
-#include "api/internal/SamFormatParser_p.h"
-using namespace BamTools;
-using namespace BamTools::Internal;
-
-#include <iostream>
-#include <sstream>
-#include <vector>
-using namespace std;
-
-SamFormatParser::SamFormatParser(SamHeader& header)
- : m_header(header)
-{ }
-
-SamFormatParser::~SamFormatParser(void) { }
-
-void SamFormatParser::Parse(const string& headerText) {
-
- // clear header's prior contents
- m_header.Clear();
-
- // empty header is OK, but skip processing
- if ( headerText.empty() )
- return;
-
- // other wise parse SAM lines
- istringstream headerStream(headerText);
- string headerLine("");
- while ( getline(headerStream, headerLine) )
- ParseSamLine(headerLine);
-}
-
-void SamFormatParser::ParseSamLine(const string& line) {
-
- // skip if line is not long enough to contain true values
- if ( line.length() < 5 ) return;
-
- // determine token at beginning of line
- const string firstToken = line.substr(0,3);
- string restOfLine = line.substr(4);
- if ( firstToken == Constants::SAM_HD_BEGIN_TOKEN) ParseHDLine(restOfLine);
- else if ( firstToken == Constants::SAM_SQ_BEGIN_TOKEN) ParseSQLine(restOfLine);
- else if ( firstToken == Constants::SAM_RG_BEGIN_TOKEN) ParseRGLine(restOfLine);
- else if ( firstToken == Constants::SAM_PG_BEGIN_TOKEN) ParsePGLine(restOfLine);
- else if ( firstToken == Constants::SAM_CO_BEGIN_TOKEN) ParseCOLine(restOfLine);
- else {
- const string message = string("unknown token: ") + firstToken;
- throw BamException("SamFormatParser::ParseSamLine", message);
- }
-}
-
-void SamFormatParser::ParseHDLine(const string& line) {
-
- // split HD lines into tokens
- vector<string> tokens = Split(line, Constants::SAM_TAB);
-
- // iterate over tokens
- vector<string>::const_iterator tokenIter = tokens.begin();
- vector<string>::const_iterator tokenEnd = tokens.end();
- for ( ; tokenIter != tokenEnd; ++tokenIter ) {
-
- // get tag/value
- const string tokenTag = (*tokenIter).substr(0,2);
- const string tokenValue = (*tokenIter).substr(3);
-
- // set header contents
- if ( tokenTag == Constants::SAM_HD_VERSION_TAG ) m_header.Version = tokenValue;
- else if ( tokenTag == Constants::SAM_HD_SORTORDER_TAG ) m_header.SortOrder = tokenValue;
- else if ( tokenTag == Constants::SAM_HD_GROUPORDER_TAG ) m_header.GroupOrder = tokenValue;
- else {
- const string message = string("unknown HD tag: ") + tokenTag;
- throw BamException("SamFormatParser::ParseHDLine", message);
- }
- }
-
- // check for required tags
- if ( !m_header.HasVersion() )
- throw BamException("SamFormatParser::ParseHDLine", "@HD line is missing VN tag");
-}
-
-void SamFormatParser::ParseSQLine(const string& line) {
-
- SamSequence seq;
-
- // split SQ line into tokens
- vector<string> tokens = Split(line, Constants::SAM_TAB);
-
- // iterate over tokens
- vector<string>::const_iterator tokenIter = tokens.begin();
- vector<string>::const_iterator tokenEnd = tokens.end();
- for ( ; tokenIter != tokenEnd; ++tokenIter ) {
-
- // get tag/value
- const string tokenTag = (*tokenIter).substr(0,2);
- const string tokenValue = (*tokenIter).substr(3);
-
- // set sequence contents
- if ( tokenTag == Constants::SAM_SQ_NAME_TAG ) seq.Name = tokenValue;
- else if ( tokenTag == Constants::SAM_SQ_LENGTH_TAG ) seq.Length = tokenValue;
- else if ( tokenTag == Constants::SAM_SQ_ASSEMBLYID_TAG ) seq.AssemblyID = tokenValue;
- else if ( tokenTag == Constants::SAM_SQ_CHECKSUM_TAG ) seq.Checksum = tokenValue;
- else if ( tokenTag == Constants::SAM_SQ_SPECIES_TAG ) seq.Species = tokenValue;
- else if ( tokenTag == Constants::SAM_SQ_URI_TAG ) seq.URI = tokenValue;
- else {
- const string message = string("unknown SQ tag: ") + tokenTag;
- throw BamException("SamFormatParser::ParseSQLine", message);
- }
- }
-
- // check for required tags
- if ( !seq.HasName() )
- throw BamException("SamFormatParser::ParseSQLine", "@SQ line is missing SN tag");
- if ( !seq.HasLength() )
- throw BamException("SamFormatParser::ParseSQLine", "@SQ line is missing LN tag");
-
- // store SAM sequence entry
- m_header.Sequences.Add(seq);
-}
-
-void SamFormatParser::ParseRGLine(const string& line) {
-
- SamReadGroup rg;
-
- // split string into tokens
- vector<string> tokens = Split(line, Constants::SAM_TAB);
-
- // iterate over tokens
- vector<string>::const_iterator tokenIter = tokens.begin();
- vector<string>::const_iterator tokenEnd = tokens.end();
- for ( ; tokenIter != tokenEnd; ++tokenIter ) {
-
- // get token tag/value
- const string tokenTag = (*tokenIter).substr(0,2);
- const string tokenValue = (*tokenIter).substr(3);
-
- // set read group contents
- if ( tokenTag == Constants::SAM_RG_ID_TAG ) rg.ID = tokenValue;
- else if ( tokenTag == Constants::SAM_RG_DESCRIPTION_TAG ) rg.Description = tokenValue;
- else if ( tokenTag == Constants::SAM_RG_FLOWORDER_TAG ) rg.FlowOrder = tokenValue;
- else if ( tokenTag == Constants::SAM_RG_KEYSEQUENCE_TAG ) rg.KeySequence = tokenValue;
- else if ( tokenTag == Constants::SAM_RG_LIBRARY_TAG ) rg.Library = tokenValue;
- else if ( tokenTag == Constants::SAM_RG_PLATFORMUNIT_TAG ) rg.PlatformUnit = tokenValue;
- else if ( tokenTag == Constants::SAM_RG_PREDICTEDINSERTSIZE_TAG ) rg.PredictedInsertSize = tokenValue;
- else if ( tokenTag == Constants::SAM_RG_PRODUCTIONDATE_TAG ) rg.ProductionDate = tokenValue;
- else if ( tokenTag == Constants::SAM_RG_PROGRAM_TAG ) rg.Program = tokenValue;
- else if ( tokenTag == Constants::SAM_RG_SAMPLE_TAG ) rg.Sample = tokenValue;
- else if ( tokenTag == Constants::SAM_RG_SEQCENTER_TAG ) rg.SequencingCenter = tokenValue;
- else if ( tokenTag == Constants::SAM_RG_SEQTECHNOLOGY_TAG ) rg.SequencingTechnology = tokenValue;
- else {
- const string message = string("unknown RG tag: ") + tokenTag;
- throw BamException("SamFormatParser::ParseRGLine", message);
- }
- }
-
- // check for required tags
- if ( !rg.HasID() )
- throw BamException("SamFormatParser::ParseRGLine", "@RG line is missing ID tag");
-
- // store SAM read group entry
- m_header.ReadGroups.Add(rg);
-}
-
-void SamFormatParser::ParsePGLine(const string& line) {
-
- SamProgram pg;
-
- // split string into tokens
- vector<string> tokens = Split(line, Constants::SAM_TAB);
-
- // iterate over tokens
- vector<string>::const_iterator tokenIter = tokens.begin();
- vector<string>::const_iterator tokenEnd = tokens.end();
- for ( ; tokenIter != tokenEnd; ++tokenIter ) {
-
- // get token tag/value
- const string tokenTag = (*tokenIter).substr(0,2);
- const string tokenValue = (*tokenIter).substr(3);
-
- // set program record contents
- if ( tokenTag == Constants::SAM_PG_ID_TAG ) pg.ID = tokenValue;
- else if ( tokenTag == Constants::SAM_PG_NAME_TAG ) pg.Name = tokenValue;
- else if ( tokenTag == Constants::SAM_PG_COMMANDLINE_TAG ) pg.CommandLine = tokenValue;
- else if ( tokenTag == Constants::SAM_PG_PREVIOUSPROGRAM_TAG ) pg.PreviousProgramID = tokenValue;
- else if ( tokenTag == Constants::SAM_PG_VERSION_TAG ) pg.Version = tokenValue;
- else {
- const string message = string("unknown PG tag: ") + tokenTag;
- throw BamException("SamFormatParser::ParsePGLine", message);
- }
- }
-
- // check for required tags
- if ( !pg.HasID() )
- throw BamException("SamFormatParser::ParsePGLine", "@PG line is missing ID tag");
-
- // store SAM program entry
- m_header.Programs.Add(pg);
-}
-
-void SamFormatParser::ParseCOLine(const string& line) {
- // simply add line to comments list
- m_header.Comments.push_back(line);
-}
-
-const vector<string> SamFormatParser::Split(const string& line, const char delim) {
- vector<string> tokens;
- stringstream lineStream(line);
- string token;
- while ( getline(lineStream, token, delim) )
- tokens.push_back(token);
- return tokens;
-}
+++ /dev/null
-// ***************************************************************************
-// SamFormatParser.h (c) 2010 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 23 December 2010 (DB)
-// ---------------------------------------------------------------------------
-// Provides functionality for parsing SAM header text into SamHeader object
-// ***************************************************************************
-
-#ifndef SAM_FORMAT_PARSER_H
-#define SAM_FORMAT_PARSER_H
-
-// -------------
-// W A R N I N G
-// -------------
-//
-// This file is not part of the BamTools API. It exists purely as an
-// implementation detail. This header file may change from version to version
-// without notice, or even be removed.
-//
-// We mean it.
-
-#include <string>
-#include <vector>
-
-namespace BamTools {
-
-class SamHeader;
-
-namespace Internal {
-
-class SamFormatParser {
-
- // ctor & dtor
- public:
- SamFormatParser(BamTools::SamHeader& header);
- ~SamFormatParser(void);
-
- // parse text & populate header data
- public:
- void Parse(const std::string& headerText);
-
- // internal methods
- private:
- void ParseSamLine(const std::string& line);
- void ParseHDLine(const std::string& line);
- void ParseSQLine(const std::string& line);
- void ParseRGLine(const std::string& line);
- void ParsePGLine(const std::string& line);
- void ParseCOLine(const std::string& line);
- const std::vector<std::string> Split(const std::string& line, const char delim);
-
- // data members
- private:
- SamHeader& m_header;
-};
-
-} // namespace Internal
-} // namespace BamTools
-
-#endif // SAM_FORMAT_PARSER_H
+++ /dev/null
-// ***************************************************************************
-// SamFormatPrinter.cpp (c) 2010 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 14 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides functionality for printing formatted SAM header to string
-// ***************************************************************************
-
-#include "api/SamConstants.h"
-#include "api/SamHeader.h"
-#include "api/internal/SamFormatPrinter_p.h"
-using namespace BamTools;
-using namespace BamTools::Internal;
-
-#include <iostream>
-#include <sstream>
-#include <vector>
-using namespace std;
-
-// ------------------------
-// static utility methods
-// ------------------------
-
-static inline
-const string FormatTag(const string& tag, const string& value) {
- return string(Constants::SAM_TAB + tag + Constants::SAM_COLON + value);
-}
-
-// ---------------------------------
-// SamFormatPrinter implementation
-// ---------------------------------
-
-SamFormatPrinter::SamFormatPrinter(const SamHeader& header)
- : m_header(header)
-{ }
-
-SamFormatPrinter::~SamFormatPrinter(void) { }
-
-const string SamFormatPrinter::ToString(void) const {
-
- // clear out stream
- stringstream out("");
-
- // generate formatted header text
- PrintHD(out);
- PrintSQ(out);
- PrintRG(out);
- PrintPG(out);
- PrintCO(out);
-
- // return result
- return out.str();
-}
-
-void SamFormatPrinter::PrintHD(std::stringstream& out) const {
-
- // if header has @HD data
- if ( m_header.HasVersion() ) {
-
- // @HD VN:<Version>
- out << Constants::SAM_HD_BEGIN_TOKEN
- << FormatTag(Constants::SAM_HD_VERSION_TAG, m_header.Version);
-
- // SO:<SortOrder>
- if ( m_header.HasSortOrder() )
- out << FormatTag(Constants::SAM_HD_SORTORDER_TAG, m_header.SortOrder);
-
- // GO:<GroupOrder>
- if ( m_header.HasGroupOrder() )
- out << FormatTag(Constants::SAM_HD_GROUPORDER_TAG, m_header.GroupOrder);
-
- // newline
- out << endl;
- }
-}
-
-void SamFormatPrinter::PrintSQ(std::stringstream& out) const {
-
- // iterate over sequence entries
- SamSequenceConstIterator seqIter = m_header.Sequences.ConstBegin();
- SamSequenceConstIterator seqEnd = m_header.Sequences.ConstEnd();
- for ( ; seqIter != seqEnd; ++seqIter ) {
- const SamSequence& seq = (*seqIter);
-
- // @SQ SN:<Name> LN:<Length>
- out << Constants::SAM_SQ_BEGIN_TOKEN
- << FormatTag(Constants::SAM_SQ_NAME_TAG, seq.Name)
- << FormatTag(Constants::SAM_SQ_LENGTH_TAG, seq.Length);
-
- // AS:<AssemblyID>
- if ( seq.HasAssemblyID() )
- out << FormatTag(Constants::SAM_SQ_ASSEMBLYID_TAG, seq.AssemblyID);
-
- // M5:<Checksum>
- if ( seq.HasChecksum() )
- out << FormatTag(Constants::SAM_SQ_CHECKSUM_TAG, seq.Checksum);
-
- // SP:<Species>
- if ( seq.HasSpecies() )
- out << FormatTag(Constants::SAM_SQ_SPECIES_TAG, seq.Species);
-
- // UR:<URI>
- if ( seq.HasURI() )
- out << FormatTag(Constants::SAM_SQ_URI_TAG, seq.URI);
-
- // newline
- out << endl;
- }
-}
-
-void SamFormatPrinter::PrintRG(std::stringstream& out) const {
-
- // iterate over read group entries
- SamReadGroupConstIterator rgIter = m_header.ReadGroups.ConstBegin();
- SamReadGroupConstIterator rgEnd = m_header.ReadGroups.ConstEnd();
- for ( ; rgIter != rgEnd; ++rgIter ) {
- const SamReadGroup& rg = (*rgIter);
-
- // @RG ID:<ID>
- out << Constants::SAM_RG_BEGIN_TOKEN
- << FormatTag(Constants::SAM_RG_ID_TAG, rg.ID);
-
- // CN:<SequencingCenter>
- if ( rg.HasSequencingCenter() )
- out << FormatTag(Constants::SAM_RG_SEQCENTER_TAG, rg.SequencingCenter);
-
- // DS:<Description>
- if ( rg.HasDescription() )
- out << FormatTag(Constants::SAM_RG_DESCRIPTION_TAG, rg.Description);
-
- // DT:<ProductionDate>
- if ( rg.HasProductionDate() )
- out << FormatTag(Constants::SAM_RG_PRODUCTIONDATE_TAG, rg.ProductionDate);
-
- // FO:<FlowOrder>
- if ( rg.HasFlowOrder() )
- out << FormatTag(Constants::SAM_RG_FLOWORDER_TAG, rg.FlowOrder);
-
- // KS:<KeySequence>
- if ( rg.HasKeySequence() )
- out << FormatTag(Constants::SAM_RG_KEYSEQUENCE_TAG, rg.KeySequence);
-
- // LB:<Library>
- if ( rg.HasLibrary() )
- out << FormatTag(Constants::SAM_RG_LIBRARY_TAG, rg.Library);
-
- // PG:<Program>
- if ( rg.HasProgram() )
- out << FormatTag(Constants::SAM_RG_PROGRAM_TAG, rg.Program);
-
- // PI:<PredictedInsertSize>
- if ( rg.HasPredictedInsertSize() )
- out << FormatTag(Constants::SAM_RG_PREDICTEDINSERTSIZE_TAG, rg.PredictedInsertSize);
-
- // PL:<SequencingTechnology>
- if ( rg.HasSequencingTechnology() )
- out << FormatTag(Constants::SAM_RG_SEQTECHNOLOGY_TAG, rg.SequencingTechnology);
-
- // PU:<PlatformUnit>
- if ( rg.HasPlatformUnit() )
- out << FormatTag(Constants::SAM_RG_PLATFORMUNIT_TAG, rg.PlatformUnit);
-
- // SM:<Sample>
- if ( rg.HasSample() )
- out << FormatTag(Constants::SAM_RG_SAMPLE_TAG, rg.Sample);
-
- // newline
- out << endl;
- }
-}
-
-void SamFormatPrinter::PrintPG(std::stringstream& out) const {
-
- // iterate over program record entries
- SamProgramConstIterator pgIter = m_header.Programs.ConstBegin();
- SamProgramConstIterator pgEnd = m_header.Programs.ConstEnd();
- for ( ; pgIter != pgEnd; ++pgIter ) {
- const SamProgram& pg = (*pgIter);
-
- // @PG ID:<ID>
- out << Constants::SAM_PG_BEGIN_TOKEN
- << FormatTag(Constants::SAM_PG_ID_TAG, pg.ID);
-
- // PN:<Name>
- if ( pg.HasName() )
- out << FormatTag(Constants::SAM_PG_NAME_TAG, pg.Name);
-
- // CL:<CommandLine>
- if ( pg.HasCommandLine() )
- out << FormatTag(Constants::SAM_PG_COMMANDLINE_TAG, pg.CommandLine);
-
- // PP:<PreviousProgramID>
- if ( pg.HasPreviousProgramID() )
- out << FormatTag(Constants::SAM_PG_PREVIOUSPROGRAM_TAG, pg.PreviousProgramID);
-
- // VN:<Version>
- if ( pg.HasVersion() )
- out << FormatTag(Constants::SAM_PG_VERSION_TAG, pg.Version);
-
- // newline
- out << endl;
- }
-}
-
-void SamFormatPrinter::PrintCO(std::stringstream& out) const {
-
- // iterate over comments
- vector<string>::const_iterator commentIter = m_header.Comments.begin();
- vector<string>::const_iterator commentEnd = m_header.Comments.end();
- for ( ; commentIter != commentEnd; ++commentIter ) {
-
- // @CO <Comment>
- out << Constants::SAM_CO_BEGIN_TOKEN
- << Constants::SAM_TAB
- << (*commentIter)
- << endl;
- }
-}
+++ /dev/null
-// ***************************************************************************
-// SamFormatPrinter.h (c) 2010 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 6 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides functionality for printing formatted SAM header to string
-// ***************************************************************************
-
-#ifndef SAM_FORMAT_PRINTER_H
-#define SAM_FORMAT_PRINTER_H
-
-// -------------
-// W A R N I N G
-// -------------
-//
-// This file is not part of the BamTools API. It exists purely as an
-// implementation detail. This header file may change from version to version
-// without notice, or even be removed.
-//
-// We mean it.
-
-#include <sstream>
-#include <string>
-
-namespace BamTools {
-
-class SamHeader;
-
-namespace Internal {
-
-class SamFormatPrinter {
-
- // ctor & dtor
- public:
- SamFormatPrinter(const BamTools::SamHeader& header);
- ~SamFormatPrinter(void);
-
- // generates SAM-formatted string from header data
- public:
- const std::string ToString(void) const;
-
- // internal methods
- private:
- void PrintHD(std::stringstream& out) const;
- void PrintSQ(std::stringstream& out) const;
- void PrintRG(std::stringstream& out) const;
- void PrintPG(std::stringstream& out) const;
- void PrintCO(std::stringstream& out) const;
-
- // data members
- private:
- const SamHeader& m_header;
-};
-
-} // namespace Internal
-} // namespace BamTools
-
-#endif // SAM_FORMAT_PRINTER_H
+++ /dev/null
-// ***************************************************************************
-// SamHeaderValidator.cpp (c) 2010 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 14 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides functionality for validating SamHeader data
-// ***************************************************************************
-
-#include "api/SamConstants.h"
-#include "api/SamHeader.h"
-#include "api/internal/SamHeaderValidator_p.h"
-#include "api/internal/SamHeaderVersion_p.h"
-using namespace BamTools;
-using namespace BamTools::Internal;
-
-#include <cctype>
-#include <set>
-#include <sstream>
-using namespace std;
-
-// ------------------------
-// static utility methods
-// -------------------------
-
-static
-bool caseInsensitiveCompare(const string& lhs, const string& rhs) {
-
- // can omit checking chars if lengths not equal
- const int lhsLength = lhs.length();
- const int rhsLength = rhs.length();
- if ( lhsLength != rhsLength )
- return false;
-
- // do *basic* toupper checks on each string char's
- for ( int i = 0; i < lhsLength; ++i ) {
- if ( toupper( (int)lhs.at(i)) != toupper( (int)rhs.at(i)) )
- return false;
- }
-
- // otherwise OK
- return true;
-}
-
-// ------------------------------------------------------------------------
-// Allow validation rules to vary, as needed, between SAM header versions
-//
-// use SAM_VERSION_X_Y to tag important changes
-//
-// Together, they will allow for comparisons like:
-// if ( m_version < SAM_VERSION_2_0 ) {
-// // use some older rule
-// else
-// // use rule introduced with version 2.0
-
-static const SamHeaderVersion SAM_VERSION_1_0 = SamHeaderVersion(1,0);
-static const SamHeaderVersion SAM_VERSION_1_1 = SamHeaderVersion(1,1);
-static const SamHeaderVersion SAM_VERSION_1_2 = SamHeaderVersion(1,2);
-static const SamHeaderVersion SAM_VERSION_1_3 = SamHeaderVersion(1,3);
-static const SamHeaderVersion SAM_VERSION_1_4 = SamHeaderVersion(1,4);
-
-// TODO: This functionality is currently unused.
-// Make validation "version-aware."
-//
-// ------------------------------------------------------------------------
-
-const string SamHeaderValidator::ERROR_PREFIX = "ERROR: ";
-const string SamHeaderValidator::WARN_PREFIX = "WARNING: ";
-const string SamHeaderValidator::NEWLINE = "\n";
-
-SamHeaderValidator::SamHeaderValidator(const SamHeader& header)
- : m_header(header)
-{ }
-
-SamHeaderValidator::~SamHeaderValidator(void) { }
-
-void SamHeaderValidator::AddError(const string& message) {
- m_errorMessages.push_back(ERROR_PREFIX + message + NEWLINE);
-}
-
-void SamHeaderValidator::AddWarning(const string& message) {
- m_warningMessages.push_back(WARN_PREFIX + message + NEWLINE);
-}
-
-void SamHeaderValidator::PrintErrorMessages(ostream& stream) {
-
- // skip if no error messages
- if ( m_errorMessages.empty() )
- return;
-
- // print error header line
- stream << "* SAM header has " << m_errorMessages.size() << " errors:" << endl;
-
- // print each error message
- vector<string>::const_iterator errorIter = m_errorMessages.begin();
- vector<string>::const_iterator errorEnd = m_errorMessages.end();
- for ( ; errorIter != errorEnd; ++errorIter )
- stream << (*errorIter);
-}
-
-void SamHeaderValidator::PrintMessages(ostream& stream) {
- PrintErrorMessages(stream);
- PrintWarningMessages(stream);
-}
-
-void SamHeaderValidator::PrintWarningMessages(ostream& stream) {
-
- // skip if no warning messages
- if ( m_warningMessages.empty() )
- return;
-
- // print warning header line
- stream << "* SAM header has " << m_warningMessages.size() << " warnings:" << endl;
-
- // print each warning message
- vector<string>::const_iterator warnIter = m_warningMessages.begin();
- vector<string>::const_iterator warnEnd = m_warningMessages.end();
- for ( ; warnIter != warnEnd; ++warnIter )
- stream << (*warnIter);
-}
-
-// entry point for validation
-bool SamHeaderValidator::Validate(void) {
- bool isValid = true;
- isValid &= ValidateMetadata();
- isValid &= ValidateSequenceDictionary();
- isValid &= ValidateReadGroupDictionary();
- isValid &= ValidateProgramChain();
- return isValid;
-}
-
-// check all SAM header 'metadata'
-bool SamHeaderValidator::ValidateMetadata(void) {
- bool isValid = true;
- isValid &= ValidateVersion();
- isValid &= ValidateSortOrder();
- isValid &= ValidateGroupOrder();
- return isValid;
-}
-
-// check SAM header version tag
-bool SamHeaderValidator::ValidateVersion(void) {
-
- const string& version = m_header.Version;
-
- // warn if version not present
- if ( version.empty() ) {
- AddWarning("Version (VN) missing. Not required, but strongly recommended");
- return true;
- }
-
- // invalid if version does not contain a period
- const size_t periodFound = version.find(Constants::SAM_PERIOD);
- if ( periodFound == string::npos ) {
- AddError("Invalid version (VN) format: " + version);
- return false;
- }
-
- // invalid if major version is empty or contains non-digits
- const string majorVersion = version.substr(0, periodFound);
- if ( majorVersion.empty() || !ContainsOnlyDigits(majorVersion) ) {
- AddError("Invalid version (VN) format: " + version);
- return false;
- }
-
- // invalid if major version is empty or contains non-digits
- const string minorVersion = version.substr(periodFound + 1);
- if ( minorVersion.empty() || !ContainsOnlyDigits(minorVersion) ) {
- AddError("Invalid version (VN) format: " + version);
- return false;
- }
-
- // TODO: check if version is not just syntactically OK,
- // but is also a valid SAM version ( 1.0 .. CURRENT )
-
- // all checked out this far, then version is OK
- return true;
-}
-
-// assumes non-empty input string
-bool SamHeaderValidator::ContainsOnlyDigits(const string& s) {
- const size_t nonDigitPosition = s.find_first_not_of(Constants::SAM_DIGITS);
- return ( nonDigitPosition == string::npos ) ;
-}
-
-// validate SAM header sort order tag
-bool SamHeaderValidator::ValidateSortOrder(void) {
-
- const string& sortOrder = m_header.SortOrder;
-
- // warn if sort order not present
- if ( sortOrder.empty() ) {
- AddWarning("Sort order (SO) missing. Not required, but strongly recommended");
- return true;
- }
-
- // if sort order is valid keyword
- if ( sortOrder == Constants::SAM_HD_SORTORDER_COORDINATE ||
- sortOrder == Constants::SAM_HD_SORTORDER_QUERYNAME ||
- sortOrder == Constants::SAM_HD_SORTORDER_UNSORTED
- )
- {
- return true;
- }
-
- // otherwise
- AddError("Invalid sort order (SO): " + sortOrder);
- return false;
-}
-
-// validate SAM header group order tag
-bool SamHeaderValidator::ValidateGroupOrder(void) {
-
- const string& groupOrder = m_header.GroupOrder;
-
- // if no group order, no problem, just return OK
- if ( groupOrder.empty() )
- return true;
-
- // if group order is valid keyword
- if ( groupOrder == Constants::SAM_HD_GROUPORDER_NONE ||
- groupOrder == Constants::SAM_HD_GROUPORDER_QUERY ||
- groupOrder == Constants::SAM_HD_GROUPORDER_REFERENCE
- )
- {
- return true;
- }
-
- // otherwise
- AddError("Invalid group order (GO): " + groupOrder);
- return false;
-}
-
-// validate SAM header sequence dictionary
-bool SamHeaderValidator::ValidateSequenceDictionary(void) {
-
- bool isValid = true;
-
- // check for unique sequence names
- isValid &= ContainsUniqueSequenceNames();
-
- // iterate over sequences
- const SamSequenceDictionary& sequences = m_header.Sequences;
- SamSequenceConstIterator seqIter = sequences.ConstBegin();
- SamSequenceConstIterator seqEnd = sequences.ConstEnd();
- for ( ; seqIter != seqEnd; ++seqIter ) {
- const SamSequence& seq = (*seqIter);
- isValid &= ValidateSequence(seq);
- }
-
- // return validation state
- return isValid;
-}
-
-// make sure all SQ names are unique
-bool SamHeaderValidator::ContainsUniqueSequenceNames(void) {
-
- bool isValid = true;
- set<string> sequenceNames;
- set<string>::iterator nameIter;
-
- // iterate over sequences
- const SamSequenceDictionary& sequences = m_header.Sequences;
- SamSequenceConstIterator seqIter = sequences.ConstBegin();
- SamSequenceConstIterator seqEnd = sequences.ConstEnd();
- for ( ; seqIter != seqEnd; ++seqIter ) {
- const SamSequence& seq = (*seqIter);
-
- // lookup sequence name
- const string& name = seq.Name;
- nameIter = sequenceNames.find(name);
-
- // error if found (duplicate entry)
- if ( nameIter != sequenceNames.end() ) {
- AddError("Sequence name (SN): " + name + " is not unique");
- isValid = false;
- }
-
- // otherwise ok, store name
- sequenceNames.insert(name);
- }
-
- // return validation state
- return isValid;
-}
-
-// validate SAM header sequence entry
-bool SamHeaderValidator::ValidateSequence(const SamSequence& seq) {
- bool isValid = true;
- isValid &= CheckNameFormat(seq.Name);
- isValid &= CheckLengthInRange(seq.Length);
- return isValid;
-}
-
-// check sequence name is valid format
-bool SamHeaderValidator::CheckNameFormat(const string& name) {
-
- // invalid if name is empty
- if ( name.empty() ) {
- AddError("Sequence entry (@SQ) is missing SN tag");
- return false;
- }
-
- // invalid if first character is a reserved char
- const char firstChar = name.at(0);
- if ( firstChar == Constants::SAM_EQUAL || firstChar == Constants::SAM_STAR ) {
- AddError("Invalid sequence name (SN): " + name);
- return false;
- }
- // otherwise OK
- return true;
-}
-
-// check that sequence length is within accepted range
-bool SamHeaderValidator::CheckLengthInRange(const string& length) {
-
- // invalid if empty
- if ( length.empty() ) {
- AddError("Sequence entry (@SQ) is missing LN tag");
- return false;
- }
-
- // convert string length to numeric
- stringstream lengthStream(length);
- unsigned int sequenceLength;
- lengthStream >> sequenceLength;
-
- // invalid if length outside accepted range
- if ( sequenceLength < Constants::SAM_SQ_LENGTH_MIN || sequenceLength > Constants::SAM_SQ_LENGTH_MAX ) {
- AddError("Sequence length (LN): " + length + " out of range");
- return false;
- }
-
- // otherwise OK
- return true;
-}
-
-// validate SAM header read group dictionary
-bool SamHeaderValidator::ValidateReadGroupDictionary(void) {
-
- bool isValid = true;
-
- // check for unique read group IDs & platform units
- isValid &= ContainsUniqueIDsAndPlatformUnits();
-
- // iterate over read groups
- const SamReadGroupDictionary& readGroups = m_header.ReadGroups;
- SamReadGroupConstIterator rgIter = readGroups.ConstBegin();
- SamReadGroupConstIterator rgEnd = readGroups.ConstEnd();
- for ( ; rgIter != rgEnd; ++rgIter ) {
- const SamReadGroup& rg = (*rgIter);
- isValid &= ValidateReadGroup(rg);
- }
-
- // return validation state
- return isValid;
-}
-
-// make sure RG IDs and platform units are unique
-bool SamHeaderValidator::ContainsUniqueIDsAndPlatformUnits(void) {
-
- bool isValid = true;
- set<string> readGroupIds;
- set<string> platformUnits;
- set<string>::iterator idIter;
- set<string>::iterator puIter;
-
- // iterate over sequences
- const SamReadGroupDictionary& readGroups = m_header.ReadGroups;
- SamReadGroupConstIterator rgIter = readGroups.ConstBegin();
- SamReadGroupConstIterator rgEnd = readGroups.ConstEnd();
- for ( ; rgIter != rgEnd; ++rgIter ) {
- const SamReadGroup& rg = (*rgIter);
-
- // --------------------------------
- // check for unique ID
-
- // lookup read group ID
- const string& id = rg.ID;
- idIter = readGroupIds.find(id);
-
- // error if found (duplicate entry)
- if ( idIter != readGroupIds.end() ) {
- AddError("Read group ID (ID): " + id + " is not unique");
- isValid = false;
- }
-
- // otherwise ok, store id
- readGroupIds.insert(id);
-
- // --------------------------------
- // check for unique platform unit
-
- // lookup platform unit
- const string& pu = rg.PlatformUnit;
- puIter = platformUnits.find(pu);
-
- // error if found (duplicate entry)
- if ( puIter != platformUnits.end() ) {
- AddError("Platform unit (PU): " + pu + " is not unique");
- isValid = false;
- }
-
- // otherwise ok, store platform unit
- platformUnits.insert(pu);
- }
-
- // return validation state
- return isValid;
-}
-
-// validate SAM header read group entry
-bool SamHeaderValidator::ValidateReadGroup(const SamReadGroup& rg) {
- bool isValid = true;
- isValid &= CheckReadGroupID(rg.ID);
- isValid &= CheckSequencingTechnology(rg.SequencingTechnology);
- return isValid;
-}
-
-// make sure RG ID exists
-bool SamHeaderValidator::CheckReadGroupID(const string& id) {
-
- // invalid if empty
- if ( id.empty() ) {
- AddError("Read group entry (@RG) is missing ID tag");
- return false;
- }
-
- // otherwise OK
- return true;
-}
-
-// make sure RG sequencing tech is one of the accepted keywords
-bool SamHeaderValidator::CheckSequencingTechnology(const string& technology) {
-
- // if no technology provided, no problem, just return OK
- if ( technology.empty() )
- return true;
-
- // if technology is valid keyword
- if ( caseInsensitiveCompare(technology, Constants::SAM_RG_SEQTECHNOLOGY_CAPILLARY) ||
- caseInsensitiveCompare(technology, Constants::SAM_RG_SEQTECHNOLOGY_HELICOS) ||
- caseInsensitiveCompare(technology, Constants::SAM_RG_SEQTECHNOLOGY_ILLUMINA) ||
- caseInsensitiveCompare(technology, Constants::SAM_RG_SEQTECHNOLOGY_IONTORRENT) ||
- caseInsensitiveCompare(technology, Constants::SAM_RG_SEQTECHNOLOGY_LS454) ||
- caseInsensitiveCompare(technology, Constants::SAM_RG_SEQTECHNOLOGY_PACBIO) ||
- caseInsensitiveCompare(technology, Constants::SAM_RG_SEQTECHNOLOGY_SOLID)
- )
- {
- return true;
- }
-
- // otherwise
- AddError("Invalid read group sequencing platform (PL): " + technology);
- return false;
-}
-
-// validate the SAM header "program chain"
-bool SamHeaderValidator::ValidateProgramChain(void) {
- bool isValid = true;
- isValid &= ContainsUniqueProgramIds();
- isValid &= ValidatePreviousProgramIds();
- return isValid;
-}
-
-// make sure all PG IDs are unique
-bool SamHeaderValidator::ContainsUniqueProgramIds(void) {
-
- bool isValid = true;
- set<string> programIds;
- set<string>::iterator pgIdIter;
-
- // iterate over program records
- const SamProgramChain& programs = m_header.Programs;
- SamProgramConstIterator pgIter = programs.ConstBegin();
- SamProgramConstIterator pgEnd = programs.ConstEnd();
- for ( ; pgIter != pgEnd; ++pgIter ) {
- const SamProgram& pg = (*pgIter);
-
- // lookup program ID
- const string& pgId = pg.ID;
- pgIdIter = programIds.find(pgId);
-
- // error if found (duplicate entry)
- if ( pgIdIter != programIds.end() ) {
- AddError("Program ID (ID): " + pgId + " is not unique");
- isValid = false;
- }
-
- // otherwise ok, store ID
- programIds.insert(pgId);
- }
-
- // return validation state
- return isValid;
-}
-
-// make sure that any PP tags present point to existing @PG IDs
-bool SamHeaderValidator::ValidatePreviousProgramIds(void) {
-
- bool isValid = true;
-
- // iterate over program records
- const SamProgramChain& programs = m_header.Programs;
- SamProgramConstIterator pgIter = programs.ConstBegin();
- SamProgramConstIterator pgEnd = programs.ConstEnd();
- for ( ; pgIter != pgEnd; ++pgIter ) {
- const SamProgram& pg = (*pgIter);
-
- // ignore record for validation if PreviousProgramID is empty
- const string& ppId = pg.PreviousProgramID;
- if ( ppId.empty() )
- continue;
-
- // see if program "chain" contains an entry for ppId
- if ( !programs.Contains(ppId) ) {
- AddError("PreviousProgramID (PP): " + ppId + " is not a known ID");
- isValid = false;
- }
- }
-
- // return validation state
- return isValid;
-}
+++ /dev/null
-// ***************************************************************************
-// SamHeaderValidator.h (c) 2010 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 6 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides functionality for validating SamHeader data
-// ***************************************************************************
-
-#ifndef SAM_HEADER_VALIDATOR_P_H
-#define SAM_HEADER_VALIDATOR_P_H
-
-// -------------
-// W A R N I N G
-// -------------
-//
-// This file is not part of the BamTools API. It exists purely as an
-// implementation detail. This header file may change from version to version
-// without notice, or even be removed.
-//
-// We mean it.
-
-#include <iostream>
-#include <string>
-#include <vector>
-
-namespace BamTools {
-
-class SamHeader;
-class SamReadGroup;
-class SamSequence;
-
-namespace Internal {
-
-class SamHeaderValidator {
-
- // ctor & dtor
- public:
- SamHeaderValidator(const SamHeader& header);
- ~SamHeaderValidator(void);
-
- // SamHeaderValidator interface
- public:
-
- // prints error & warning messages
- void PrintMessages(std::ostream& stream);
-
- // validates SamHeader data, returns true/false accordingly
- bool Validate(void);
-
- // internal methods
- private:
-
- // validate header metadata
- bool ValidateMetadata(void);
- bool ValidateVersion(void);
- bool ContainsOnlyDigits(const std::string& s);
- bool ValidateSortOrder(void);
- bool ValidateGroupOrder(void);
-
- // validate sequence dictionary
- bool ValidateSequenceDictionary(void);
- bool ContainsUniqueSequenceNames(void);
- bool CheckNameFormat(const std::string& name);
- bool ValidateSequence(const SamSequence& seq);
- bool CheckLengthInRange(const std::string& length);
-
- // validate read group dictionary
- bool ValidateReadGroupDictionary(void);
- bool ContainsUniqueIDsAndPlatformUnits(void);
- bool ValidateReadGroup(const SamReadGroup& rg);
- bool CheckReadGroupID(const std::string& id);
- bool CheckSequencingTechnology(const std::string& technology);
-
- // validate program data
- bool ValidateProgramChain(void);
- bool ContainsUniqueProgramIds(void);
- bool ValidatePreviousProgramIds(void);
-
- // error reporting
- void AddError(const std::string& message);
- void AddWarning(const std::string& message);
- void PrintErrorMessages(std::ostream& stream);
- void PrintWarningMessages(std::ostream& stream);
-
- // data members
- private:
-
- // SamHeader being validated
- const SamHeader& m_header;
-
- // error reporting helpers
- static const std::string ERROR_PREFIX;
- static const std::string WARN_PREFIX;
- static const std::string NEWLINE;
-
- // error reporting messages
- std::vector<std::string> m_errorMessages;
- std::vector<std::string> m_warningMessages;
-};
-
-} // namespace Internal
-} // namespace BamTools
-
-#endif // SAM_HEADER_VALIDATOR_P_H
+++ /dev/null
-// ***************************************************************************
-// SamHeaderVersion.h (c) 2010 Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
-// ---------------------------------------------------------------------------
-// Provides functionality for comparing SAM header versions
-// *************************************************************************
-
-#ifndef SAM_HEADERVERSION_P_H
-#define SAM_HEADERVERSION_P_H
-
-// -------------
-// W A R N I N G
-// -------------
-//
-// This file is not part of the BamTools API. It exists purely as an
-// implementation detail. This header file may change from version to version
-// without notice, or even be removed.
-//
-// We mean it.
-
-#include "api/SamConstants.h"
-#include <sstream>
-#include <string>
-
-namespace BamTools {
-namespace Internal {
-
-class SamHeaderVersion {
-
- // ctors & dtor
- public:
- SamHeaderVersion(void)
- : m_majorVersion(0)
- , m_minorVersion(0)
- { }
-
- explicit SamHeaderVersion(const std::string& version)
- : m_majorVersion(0)
- , m_minorVersion(0)
- {
- SetVersion(version);
- }
-
- SamHeaderVersion(const unsigned int& major, const unsigned int& minor)
- : m_majorVersion(major)
- , m_minorVersion(minor)
- { }
-
- ~SamHeaderVersion(void) {
- m_majorVersion = 0;
- m_minorVersion = 0;
- }
-
- // acess data
- public:
- unsigned int MajorVersion(void) const { return m_majorVersion; }
- unsigned int MinorVersion(void) const { return m_minorVersion; }
-
- void SetVersion(const std::string& version);
- std::string ToString(void) const;
-
- // data members
- private:
- unsigned int m_majorVersion;
- unsigned int m_minorVersion;
-};
-
-inline
-void SamHeaderVersion::SetVersion(const std::string& version) {
-
- // do nothing if version is empty
- if ( !version.empty() ) {
-
- std::stringstream versionStream("");
-
- // do nothing if period not found
- const size_t periodFound = version.find(Constants::SAM_PERIOD);
- if ( periodFound != std::string::npos ) {
-
- // store major version if non-empty and contains only digits
- const std::string& majorVersion = version.substr(0, periodFound);
- versionStream.str(majorVersion);
- if ( !majorVersion.empty() ) {
- const size_t nonDigitFound = majorVersion.find_first_not_of(Constants::SAM_DIGITS);
- if ( nonDigitFound == std::string::npos )
- versionStream >> m_majorVersion;
- }
-
- // store minor version if non-empty and contains only digits
- const std::string& minorVersion = version.substr(periodFound + 1);
- versionStream.str(minorVersion);
- if ( !minorVersion.empty() ) {
- const size_t nonDigitFound = minorVersion.find_first_not_of(Constants::SAM_DIGITS);
- if ( nonDigitFound == std::string::npos )
- versionStream >> m_minorVersion;
- }
- }
- }
-}
-
-// -----------------------------------------------------
-// printing
-
-inline std::string SamHeaderVersion::ToString(void) const {
- std::stringstream version;
- version << m_majorVersion << Constants::SAM_PERIOD << m_minorVersion;
- return version.str();
-}
-
-// -----------------------------------------------------
-// comparison operators
-
-inline bool operator==(const SamHeaderVersion& lhs, const SamHeaderVersion& rhs) {
- return (lhs.MajorVersion() == rhs.MajorVersion()) &&
- (lhs.MinorVersion() == rhs.MinorVersion());
-}
-
-inline bool operator<(const SamHeaderVersion& lhs, const SamHeaderVersion& rhs) {
- if ( lhs.MajorVersion() == rhs.MajorVersion() )
- return lhs.MinorVersion() < rhs.MinorVersion();
- else
- return lhs.MajorVersion() < rhs.MajorVersion();
-}
-
-inline bool operator> (const SamHeaderVersion& lhs, const SamHeaderVersion& rhs) { return rhs < lhs; }
-inline bool operator<=(const SamHeaderVersion& lhs, const SamHeaderVersion& rhs) { return !(lhs>rhs); }
-inline bool operator>=(const SamHeaderVersion& lhs, const SamHeaderVersion& rhs) { return !(lhs<rhs); }
-
-} // namespace Internal
-} // namespace BamTools
-
-#endif // SAM_HEADERVERSION_P_H
--- /dev/null
+// ***************************************************************************
+// BamHeader_p.cpp (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 25 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides the basic functionality for handling BAM headers.
+// ***************************************************************************
+
+#include "api/BamAux.h"
+#include "api/BamConstants.h"
+#include "api/internal/bam/BamHeader_p.h"
+#include "api/internal/io/BgzfStream_p.h"
+#include "api/internal/utils/BamException_p.h"
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+#include <cstdlib>
+#include <cstring>
+using namespace std;
+
+// ------------------------
+// static utility methods
+// ------------------------
+
+static inline
+bool isValidMagicNumber(const char* buffer) {
+ return ( strncmp(buffer, Constants::BAM_HEADER_MAGIC,
+ Constants::BAM_HEADER_MAGIC_LENGTH) == 0 );
+}
+
+// --------------------------
+// BamHeader implementation
+// --------------------------
+
+// ctor
+BamHeader::BamHeader(void) { }
+
+// dtor
+BamHeader::~BamHeader(void) { }
+
+// reads magic number from BGZF stream, returns true if valid
+void BamHeader::CheckMagicNumber(BgzfStream* stream) {
+
+ // try to read magic number
+ char buffer[Constants::BAM_HEADER_MAGIC_LENGTH];
+ const size_t numBytesRead = stream->Read(buffer, Constants::BAM_HEADER_MAGIC_LENGTH);
+ if ( numBytesRead != (int)Constants::BAM_HEADER_MAGIC_LENGTH )
+ throw BamException("BamHeader::CheckMagicNumber", "could not read magic number");
+
+ // validate magic number
+ if ( !isValidMagicNumber(buffer) )
+ throw BamException("BamHeader::CheckMagicNumber", "invalid magic number");
+}
+
+// clear SamHeader data
+void BamHeader::Clear(void) {
+ m_header.Clear();
+}
+
+// return true if SamHeader data is valid
+bool BamHeader::IsValid(void) const {
+ return m_header.IsValid();
+}
+
+// load BAM header ('magic number' and SAM header text) from BGZF stream
+void BamHeader::Load(BgzfStream* stream) {
+
+ // read & check magic number
+ CheckMagicNumber(stream);
+
+ // read header (length, then actual text)
+ uint32_t length(0);
+ ReadHeaderLength(stream, length);
+ ReadHeaderText(stream, length);
+}
+
+// reads SAM header text length from BGZF stream, stores it in @length
+void BamHeader::ReadHeaderLength(BgzfStream* stream, uint32_t& length) {
+
+ // read BAM header text length
+ char buffer[sizeof(uint32_t)];
+ const size_t numBytesRead = stream->Read(buffer, sizeof(uint32_t));
+ if ( numBytesRead != sizeof(uint32_t) )
+ throw BamException("BamHeader::ReadHeaderLength", "could not read header length");
+
+ // convert char buffer to length
+ length = BamTools::UnpackUnsignedInt(buffer);
+ if ( BamTools::SystemIsBigEndian() )
+ BamTools::SwapEndian_32(length);
+}
+
+// reads SAM header text from BGZF stream, stores in SamHeader object
+void BamHeader::ReadHeaderText(BgzfStream* stream, const uint32_t& length) {
+
+ // read header text
+ char* headerText = (char*)calloc(length + 1, 1);
+ const size_t bytesRead = stream->Read(headerText, length);
+
+ // if error reading, clean up buffer & throw
+ if ( bytesRead != length ) {
+ free(headerText);
+ throw BamException("BamHeader::ReadHeaderText", "could not read header text");
+ }
+
+ // otherwise, text was read OK
+ // store & cleanup
+ m_header.SetHeaderText( (string)((const char*)headerText) );
+ free(headerText);
+}
+
+// returns *copy* of SamHeader data object
+SamHeader BamHeader::ToSamHeader(void) const {
+ return m_header;
+}
+
+// returns SAM-formatted string of header data
+string BamHeader::ToString(void) const {
+ return m_header.ToString();
+}
--- /dev/null
+// ***************************************************************************
+// BamHeader_p.h (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides the basic functionality for handling BAM headers.
+// ***************************************************************************
+
+#ifndef BAMHEADER_P_H
+#define BAMHEADER_P_H
+
+// -------------
+// W A R N I N G
+// -------------
+//
+// This file is not part of the BamTools API. It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include "api/SamHeader.h"
+#include <string>
+
+namespace BamTools {
+namespace Internal {
+
+class BgzfStream;
+
+class BamHeader {
+
+ // ctor & dtor
+ public:
+ BamHeader(void);
+ ~BamHeader(void);
+
+ // BamHeader interface
+ public:
+ // clear SamHeader data
+ void Clear(void);
+ // return true if SamHeader data is valid
+ bool IsValid(void) const;
+ // load BAM header ('magic number' and SAM header text) from BGZF stream
+ // returns true if all OK
+ void Load(BgzfStream* stream);
+ // returns (editable) copy of SamHeader data object
+ SamHeader ToSamHeader(void) const;
+ // returns SAM-formatted string of header data
+ std::string ToString(void) const;
+
+ // internal methods
+ private:
+ // reads magic number from BGZF stream
+ void CheckMagicNumber(BgzfStream* stream);
+ // reads SAM header length from BGZF stream, stores it in @length
+ void ReadHeaderLength(BgzfStream* stream, uint32_t& length);
+ // reads SAM header text from BGZF stream, stores in SamHeader object
+ void ReadHeaderText(BgzfStream* stream, const uint32_t& length);
+
+ // data members
+ private:
+ SamHeader m_header;
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // BAMHEADER_P_H
--- /dev/null
+// ***************************************************************************
+// BamMultiMerger_p.h (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides merging functionality for BamMultiReader. At this point, supports
+// sorting results by (refId, position) or by read name.
+// ***************************************************************************
+
+#ifndef BAMMULTIMERGER_P_H
+#define BAMMULTIMERGER_P_H
+
+// -------------
+// W A R N I N G
+// -------------
+//
+// This file is not part of the BamTools API. It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include "api/BamAlignment.h"
+#include "api/BamReader.h"
+#include "api/algorithms/Sort.h"
+#include <deque>
+#include <functional>
+#include <set>
+#include <string>
+
+namespace BamTools {
+namespace Internal {
+
+struct MergeItem {
+
+ // data members
+ BamReader* Reader;
+ BamAlignment* Alignment;
+
+ // ctors & dtor
+ MergeItem(BamReader* reader = 0,
+ BamAlignment* alignment = 0)
+ : Reader(reader)
+ , Alignment(alignment)
+ { }
+
+ MergeItem(const MergeItem& other)
+ : Reader(other.Reader)
+ , Alignment(other.Alignment)
+ { }
+
+ ~MergeItem(void) { }
+};
+
+template<typename Compare>
+struct MergeItemSorter : public std::binary_function<MergeItem, MergeItem, bool> {
+
+ public:
+ MergeItemSorter(const Compare& comp = Compare())
+ : m_comp(comp)
+ { }
+
+ bool operator()(const MergeItem& lhs, const MergeItem& rhs) {
+ const BamAlignment& l = *lhs.Alignment;
+ const BamAlignment& r = *rhs.Alignment;
+ return m_comp(l,r);
+ }
+
+ private:
+ Compare m_comp;
+};
+
+// pure ABC so we can just work polymorphically with any specific merger implementation
+class IMultiMerger {
+
+ public:
+ IMultiMerger(void) { }
+ virtual ~IMultiMerger(void) { }
+ public:
+ virtual void Add(MergeItem item) =0;
+ virtual void Clear(void) =0;
+ virtual const MergeItem& First(void) const =0;
+ virtual bool IsEmpty(void) const =0;
+ virtual void Remove(BamReader* reader) =0;
+ virtual int Size(void) const =0;
+ virtual MergeItem TakeFirst(void) =0;
+};
+
+// general merger
+template<typename Compare>
+class MultiMerger : public IMultiMerger {
+
+ public:
+ typedef Compare CompareType;
+ typedef MergeItemSorter<CompareType> MergeType;
+
+ public:
+ explicit MultiMerger(const Compare& comp = Compare())
+ : IMultiMerger()
+ , m_data( MergeType(comp) )
+ { }
+ ~MultiMerger(void) { }
+
+ public:
+ void Add(MergeItem item);
+ void Clear(void);
+ const MergeItem& First(void) const;
+ bool IsEmpty(void) const;
+ void Remove(BamReader* reader);
+ int Size(void) const;
+ MergeItem TakeFirst(void);
+
+ private:
+ typedef MergeItem ValueType;
+ typedef std::multiset<ValueType, MergeType> ContainerType;
+ typedef typename ContainerType::iterator DataIterator;
+ typedef typename ContainerType::const_iterator DataConstIterator;
+ ContainerType m_data;
+};
+
+template <typename Compare>
+inline void MultiMerger<Compare>::Add(MergeItem item) {
+
+ // N.B. - any future custom Compare types must define this method
+ // see algorithms/Sort.h
+
+ if ( CompareType::UsesCharData() )
+ item.Alignment->BuildCharData();
+ m_data.insert(item);
+}
+
+template <typename Compare>
+inline void MultiMerger<Compare>::Clear(void) {
+ m_data.clear();
+}
+
+template <typename Compare>
+inline const MergeItem& MultiMerger<Compare>::First(void) const {
+ const ValueType& entry = (*m_data.begin());
+ return entry;
+}
+
+template <typename Compare>
+inline bool MultiMerger<Compare>::IsEmpty(void) const {
+ return m_data.empty();
+}
+template <typename Compare>
+inline void MultiMerger<Compare>::Remove(BamReader* reader) {
+
+ if ( reader == 0 ) return;
+ const std::string& filenameToRemove = reader->GetFilename();
+
+ // iterate over readers in cache
+ DataIterator dataIter = m_data.begin();
+ DataIterator dataEnd = m_data.end();
+ for ( ; dataIter != dataEnd; ++dataIter ) {
+ const MergeItem& item = (*dataIter);
+ const BamReader* itemReader = item.Reader;
+ if ( itemReader == 0 ) continue;
+
+ // remove iterator on match
+ if ( itemReader->GetFilename() == filenameToRemove ) {
+ m_data.erase(dataIter);
+ return;
+ }
+ }
+}
+template <typename Compare>
+inline int MultiMerger<Compare>::Size(void) const {
+ return m_data.size();
+}
+
+template <typename Compare>
+inline MergeItem MultiMerger<Compare>::TakeFirst(void) {
+ DataIterator firstIter = m_data.begin();
+ MergeItem firstItem = (*firstIter);
+ m_data.erase(firstIter);
+ return firstItem;
+}
+
+// unsorted "merger"
+template<>
+class MultiMerger<Algorithms::Sort::Unsorted> : public IMultiMerger {
+
+ public:
+ explicit MultiMerger(const Algorithms::Sort::Unsorted& comp = Algorithms::Sort::Unsorted())
+ : IMultiMerger()
+ { }
+ ~MultiMerger(void) { }
+
+ public:
+ void Add(MergeItem item);
+ void Clear(void);
+ const MergeItem& First(void) const;
+ bool IsEmpty(void) const;
+ void Remove(BamReader* reader);
+ int Size(void) const;
+ MergeItem TakeFirst(void);
+
+ private:
+ typedef MergeItem ValueType;
+ typedef std::deque<ValueType> ContainerType;
+ typedef ContainerType::iterator DataIterator;
+ typedef ContainerType::const_iterator DataConstIterator;
+ ContainerType m_data;
+};
+
+inline
+void MultiMerger<Algorithms::Sort::Unsorted>::Add(MergeItem item) {
+ m_data.push_back(item);
+}
+
+inline
+void MultiMerger<Algorithms::Sort::Unsorted>::Clear(void) {
+ m_data.clear();
+}
+
+inline
+const MergeItem& MultiMerger<Algorithms::Sort::Unsorted>::First(void) const {
+ return m_data.front();
+}
+
+inline
+bool MultiMerger<Algorithms::Sort::Unsorted>::IsEmpty(void) const {
+ return m_data.empty();
+}
+
+inline
+void MultiMerger<Algorithms::Sort::Unsorted>::Remove(BamReader* reader) {
+
+ if ( reader == 0 ) return;
+ const std::string filenameToRemove = reader->GetFilename();
+
+ // iterate over readers in cache
+ DataIterator dataIter = m_data.begin();
+ DataIterator dataEnd = m_data.end();
+ for ( ; dataIter != dataEnd; ++dataIter ) {
+ const MergeItem& item = (*dataIter);
+ const BamReader* itemReader = item.Reader;
+ if ( itemReader == 0 ) continue;
+
+ // remove iterator on match
+ if ( itemReader->GetFilename() == filenameToRemove ) {
+ m_data.erase(dataIter);
+ return;
+ }
+ }
+}
+
+inline
+int MultiMerger<Algorithms::Sort::Unsorted>::Size(void) const {
+ return m_data.size();
+}
+
+inline
+MergeItem MultiMerger<Algorithms::Sort::Unsorted>::TakeFirst(void) {
+ MergeItem firstItem = m_data.front();
+ m_data.pop_front();
+ return firstItem;
+}
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // BAMMULTIMERGER_P_H
--- /dev/null
+// ***************************************************************************
+// BamMultiReader_p.cpp (c) 2010 Derek Barnett, Erik Garrison
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 25 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Functionality for simultaneously reading multiple BAM files
+// *************************************************************************
+
+#include "api/BamAlignment.h"
+#include "api/BamMultiReader.h"
+#include "api/SamConstants.h"
+#include "api/algorithms/Sort.h"
+#include "api/internal/bam/BamMultiReader_p.h"
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+#include <algorithm>
+#include <fstream>
+#include <iostream>
+#include <iterator>
+#include <sstream>
+using namespace std;
+
+// ctor
+BamMultiReaderPrivate::BamMultiReaderPrivate(void)
+ : m_alignmentCache(0)
+{ }
+
+// dtor
+BamMultiReaderPrivate::~BamMultiReaderPrivate(void) {
+ Close();
+}
+
+// close all BAM files
+bool BamMultiReaderPrivate::Close(void) {
+
+ m_errorString.clear();
+
+ if ( CloseFiles(Filenames()) )
+ return true;
+ else {
+ const string currentError = m_errorString;
+ const string message = string("error encountered while closing all files: \n\t") + currentError;
+ SetErrorString("BamMultiReader::Close", message);
+ return false;
+ }
+}
+
+// close requested BAM file
+bool BamMultiReaderPrivate::CloseFile(const string& filename) {
+
+ m_errorString.clear();
+
+ vector<string> filenames(1, filename);
+ if ( CloseFiles(filenames) )
+ return true;
+ else {
+ const string currentError = m_errorString;
+ const string message = string("error while closing file: ") + filename + "\n" + currentError;
+ SetErrorString("BamMultiReader::CloseFile", message);
+ return false;
+ }
+}
+
+// close requested BAM files
+bool BamMultiReaderPrivate::CloseFiles(const vector<string>& filenames) {
+
+ bool errorsEncountered = false;
+ m_errorString.clear();
+
+ // iterate over filenames
+ vector<string>::const_iterator filesIter = filenames.begin();
+ vector<string>::const_iterator filesEnd = filenames.end();
+ for ( ; filesIter != filesEnd; ++filesIter ) {
+ const string& filename = (*filesIter);
+ if ( filename.empty() ) continue;
+
+ // iterate over readers
+ vector<MergeItem>::iterator readerIter = m_readers.begin();
+ vector<MergeItem>::iterator readerEnd = m_readers.end();
+ for ( ; readerIter != readerEnd; ++readerIter ) {
+ MergeItem& item = (*readerIter);
+ BamReader* reader = item.Reader;
+ if ( reader == 0 ) continue;
+
+ // if reader matches requested filename
+ if ( reader->GetFilename() == filename ) {
+
+ // remove reader's entry from alignment cache
+ m_alignmentCache->Remove(reader);
+
+ // clean up reader & its alignment
+ if ( !reader->Close() ) {
+ m_errorString.append(1, '\t');
+ m_errorString.append(reader->GetErrorString());
+ m_errorString.append(1, '\n');
+ errorsEncountered = true;
+ }
+ delete reader;
+ reader = 0;
+
+ // delete reader's alignment entry
+ BamAlignment* alignment = item.Alignment;
+ delete alignment;
+ alignment = 0;
+
+ // remove reader from reader list
+ m_readers.erase(readerIter);
+
+ // on match, just go on to next filename
+ // (no need to keep looking and item iterator is invalid now anyway)
+ break;
+ }
+ }
+ }
+
+ // make sure alignment cache is cleaned up if all readers closed
+ if ( m_readers.empty() && m_alignmentCache ) {
+ m_alignmentCache->Clear();
+ delete m_alignmentCache;
+ m_alignmentCache = 0;
+ }
+
+ // return whether all readers closed OK
+ return !errorsEncountered;
+}
+
+// creates index files for BAM files that don't have them
+bool BamMultiReaderPrivate::CreateIndexes(const BamIndex::IndexType& type) {
+
+ bool errorsEncountered = false;
+ m_errorString.clear();
+
+ // iterate over readers
+ vector<MergeItem>::iterator itemIter = m_readers.begin();
+ vector<MergeItem>::iterator itemEnd = m_readers.end();
+ for ( ; itemIter != itemEnd; ++itemIter ) {
+ MergeItem& item = (*itemIter);
+ BamReader* reader = item.Reader;
+ if ( reader == 0 ) continue;
+
+ // if reader doesn't have an index, create one
+ if ( !reader->HasIndex() ) {
+ if ( !reader->CreateIndex(type) ) {
+ m_errorString.append(1, '\t');
+ m_errorString.append(reader->GetErrorString());
+ m_errorString.append(1, '\n');
+ errorsEncountered = true;
+ }
+ }
+ }
+
+ // check for errors encountered before returning success/fail
+ if ( errorsEncountered ) {
+ const string currentError = m_errorString;
+ const string message = string("error while creating index files: ") + "\n" + currentError;
+ SetErrorString("BamMultiReader::CreateIndexes", message);
+ return false;
+ } else
+ return true;
+}
+
+IMultiMerger* BamMultiReaderPrivate::CreateAlignmentCache(void) const {
+
+ // fetch SamHeader
+ SamHeader header = GetHeader();
+
+ // if BAM files are sorted by position
+ if ( header.SortOrder == Constants::SAM_HD_SORTORDER_COORDINATE )
+ return new MultiMerger<Algorithms::Sort::ByPosition>();
+
+ // if BAM files are sorted by read name
+ if ( header.SortOrder == Constants::SAM_HD_SORTORDER_QUERYNAME )
+ return new MultiMerger<Algorithms::Sort::ByName>();
+
+ // otherwise "unknown" or "unsorted", use unsorted merger and just read in
+ return new MultiMerger<Algorithms::Sort::Unsorted>();
+}
+
+const vector<string> BamMultiReaderPrivate::Filenames(void) const {
+
+ // init filename container
+ vector<string> filenames;
+ filenames.reserve( m_readers.size() );
+
+ // iterate over readers
+ vector<MergeItem>::const_iterator itemIter = m_readers.begin();
+ vector<MergeItem>::const_iterator itemEnd = m_readers.end();
+ for ( ; itemIter != itemEnd; ++itemIter ) {
+ const MergeItem& item = (*itemIter);
+ const BamReader* reader = item.Reader;
+ if ( reader == 0 ) continue;
+
+ // store filename if not empty
+ const string& filename = reader->GetFilename();
+ if ( !filename.empty() )
+ filenames.push_back(filename);
+ }
+
+ // return result
+ return filenames;
+}
+
+string BamMultiReaderPrivate::GetErrorString(void) const {
+ return m_errorString;
+}
+
+SamHeader BamMultiReaderPrivate::GetHeader(void) const {
+ const string& text = GetHeaderText();
+ return SamHeader(text);
+}
+
+// makes a virtual, unified header for all the bam files in the multireader
+string BamMultiReaderPrivate::GetHeaderText(void) const {
+
+ // N.B. - right now, simply copies all header data from first BAM,
+ // and then appends RG's from other BAM files
+ // TODO: make this more intelligent wrt other header lines/fields
+
+ // if no readers open
+ const size_t numReaders = m_readers.size();
+ if ( numReaders == 0 ) return string();
+
+ // retrieve first reader's header
+ const MergeItem& firstItem = m_readers.front();
+ const BamReader* reader = firstItem.Reader;
+ if ( reader == 0 ) return string();
+ SamHeader mergedHeader = reader->GetHeader();
+
+ // iterate over any remaining readers (skipping the first)
+ for ( size_t i = 1; i < numReaders; ++i ) {
+ const MergeItem& item = m_readers.at(i);
+ const BamReader* reader = item.Reader;
+ if ( reader == 0 ) continue;
+
+ // retrieve current reader's header
+ const SamHeader currentHeader = reader->GetHeader();
+
+ // append current reader's RG entries to merged header
+ // N.B. - SamReadGroupDictionary handles duplicate-checking
+ mergedHeader.ReadGroups.Add(currentHeader.ReadGroups);
+
+ // TODO: merge anything else??
+ }
+
+ // return stringified header
+ return mergedHeader.ToString();
+}
+
+// get next alignment among all files
+bool BamMultiReaderPrivate::GetNextAlignment(BamAlignment& al) {
+ return PopNextCachedAlignment(al, true);
+}
+
+// get next alignment among all files without parsing character data from alignments
+bool BamMultiReaderPrivate::GetNextAlignmentCore(BamAlignment& al) {
+ return PopNextCachedAlignment(al, false);
+}
+
+// ---------------------------------------------------------------------------------------
+//
+// NB: The following GetReferenceX() functions assume that we have identical
+// references for all BAM files. We enforce this by invoking the
+// ValidateReaders() method to verify that our reference data is the same
+// across all files on Open - so we will not encounter a situation in which
+// there is a mismatch and we are still live.
+//
+// ---------------------------------------------------------------------------------------
+
+// returns the number of reference sequences
+int BamMultiReaderPrivate::GetReferenceCount(void) const {
+
+ // handle empty multireader
+ if ( m_readers.empty() ) return 0;
+
+ // return reference count from first reader
+ const MergeItem& item = m_readers.front();
+ const BamReader* reader = item.Reader;
+ if ( reader == 0 ) return 0;
+ else
+ return reader->GetReferenceCount();
+}
+
+// returns vector of reference objects
+const RefVector BamMultiReaderPrivate::GetReferenceData(void) const {
+
+ // handle empty multireader
+ if ( m_readers.empty() ) return RefVector();
+
+ // return reference data from first BamReader
+ const MergeItem& item = m_readers.front();
+ const BamReader* reader = item.Reader;
+ if ( reader == 0 ) return RefVector();
+ else
+ return reader->GetReferenceData();
+}
+
+// returns refID from reference name
+int BamMultiReaderPrivate::GetReferenceID(const string& refName) const {
+
+ // handle empty multireader
+ if ( m_readers.empty() ) return -1;
+
+ // return reference ID from first BamReader
+ const MergeItem& item = m_readers.front();
+ const BamReader* reader = item.Reader;
+ if ( reader == 0 ) return -1;
+ else
+ return reader->GetReferenceID(refName);
+}
+// ---------------------------------------------------------------------------------------
+
+// returns true if all readers have index data available
+// this is useful to indicate whether Jump() or SetRegion() are possible
+bool BamMultiReaderPrivate::HasIndexes(void) const {
+
+ // handle empty multireader
+ if ( m_readers.empty() )
+ return false;
+
+ bool result = true;
+
+ // iterate over readers
+ vector<MergeItem>::const_iterator readerIter = m_readers.begin();
+ vector<MergeItem>::const_iterator readerEnd = m_readers.end();
+ for ( ; readerIter != readerEnd; ++readerIter ) {
+ const MergeItem& item = (*readerIter);
+ const BamReader* reader = item.Reader;
+ if ( reader == 0 ) continue;
+
+ // see if current reader has index data
+ result &= reader->HasIndex();
+ }
+
+ return result;
+}
+
+// returns true if multireader has open readers
+bool BamMultiReaderPrivate::HasOpenReaders(void) {
+
+ // iterate over readers
+ vector<MergeItem>::const_iterator readerIter = m_readers.begin();
+ vector<MergeItem>::const_iterator readerEnd = m_readers.end();
+ for ( ; readerIter != readerEnd; ++readerIter ) {
+ const MergeItem& item = (*readerIter);
+ const BamReader* reader = item.Reader;
+ if ( reader == 0 ) continue;
+
+ // return true whenever an open reader is found
+ if ( reader->IsOpen() ) return true;
+ }
+
+ // no readers open
+ return false;
+}
+
+// performs random-access jump using (refID, position) as a left-bound
+bool BamMultiReaderPrivate::Jump(int refID, int position) {
+
+ // NB: While it may make sense to track readers in which we can
+ // successfully Jump, in practice a failure of Jump means "no
+ // alignments here." It makes sense to simply accept the failure,
+ // UpdateAlignments(), and continue.
+
+ // iterate over readers
+ vector<MergeItem>::iterator readerIter = m_readers.begin();
+ vector<MergeItem>::iterator readerEnd = m_readers.end();
+ for ( ; readerIter != readerEnd; ++readerIter ) {
+ MergeItem& item = (*readerIter);
+ BamReader* reader = item.Reader;
+ if ( reader == 0 ) continue;
+
+ // jump in each BamReader to position of interest
+ reader->Jump(refID, position);
+ }
+
+ // returns status of cache update
+ return UpdateAlignmentCache();
+}
+
+// locate (& load) index files for BAM readers that don't already have one loaded
+bool BamMultiReaderPrivate::LocateIndexes(const BamIndex::IndexType& preferredType) {
+
+ bool errorsEncountered = false;
+ m_errorString.clear();
+
+ // iterate over readers
+ vector<MergeItem>::iterator readerIter = m_readers.begin();
+ vector<MergeItem>::iterator readerEnd = m_readers.end();
+ for ( ; readerIter != readerEnd; ++readerIter ) {
+ MergeItem& item = (*readerIter);
+ BamReader* reader = item.Reader;
+ if ( reader == 0 ) continue;
+
+ // if reader has no index, try to locate one
+ if ( !reader->HasIndex() ) {
+ if ( !reader->LocateIndex(preferredType) ) {
+ m_errorString.append(1, '\t');
+ m_errorString.append(reader->GetErrorString());
+ m_errorString.append(1, '\n');
+ errorsEncountered = true;
+ }
+ }
+ }
+
+ // check for errors encountered before returning success/fail
+ if ( errorsEncountered ) {
+ const string currentError = m_errorString;
+ const string message = string("error while locating index files: ") + "\n" + currentError;
+ SetErrorString("BamMultiReader::LocatingIndexes", message);
+ return false;
+ } else
+ return true;
+}
+
+// opens BAM files
+bool BamMultiReaderPrivate::Open(const vector<string>& filenames) {
+
+ m_errorString.clear();
+
+ // put all current readers back at beginning (refreshes alignment cache)
+ if ( !Rewind() ) {
+ const string currentError = m_errorString;
+ const string message = string("unable to rewind existing readers: \n\t") + currentError;
+ SetErrorString("BamMultiReader::Open", message);
+ return false;
+ }
+
+ // iterate over filenames
+ bool errorsEncountered = false;
+ vector<string>::const_iterator filenameIter = filenames.begin();
+ vector<string>::const_iterator filenameEnd = filenames.end();
+ for ( ; filenameIter != filenameEnd; ++filenameIter ) {
+ const string& filename = (*filenameIter);
+ if ( filename.empty() ) continue;
+
+ // attempt to open BamReader
+ BamReader* reader = new BamReader;
+ const bool readerOpened = reader->Open(filename);
+
+ // if opened OK, store it
+ if ( readerOpened )
+ m_readers.push_back( MergeItem(reader, new BamAlignment) );
+
+ // otherwise store error & clean up invalid reader
+ else {
+ m_errorString.append(1, '\t');
+ m_errorString += string("unable to open file: ") + filename;
+ m_errorString.append(1, '\n');
+ errorsEncountered = true;
+
+ delete reader;
+ reader = 0;
+ }
+ }
+
+ // check for errors while opening
+ if ( errorsEncountered ) {
+ const string currentError = m_errorString;
+ const string message = string("unable to open all files: \t\n") + currentError;
+ SetErrorString("BamMultiReader::Open", message);
+ return false;
+ }
+
+ // check for BAM file consistency
+ if ( !ValidateReaders() ) {
+ const string currentError = m_errorString;
+ const string message = string("unable to open inconsistent files: \t\n") + currentError;
+ SetErrorString("BamMultiReader::Open", message);
+ return false;
+ }
+
+ // update alignment cache
+ return UpdateAlignmentCache();
+}
+
+bool BamMultiReaderPrivate::OpenFile(const std::string& filename) {
+ vector<string> filenames(1, filename);
+ if ( Open(filenames) )
+ return true;
+ else {
+ const string currentError = m_errorString;
+ const string message = string("could not open file: ") + filename + "\n\t" + currentError;
+ SetErrorString("BamMultiReader::OpenFile", message);
+ return false;
+ }
+}
+
+bool BamMultiReaderPrivate::OpenIndexes(const vector<string>& indexFilenames) {
+
+ // TODO: This needs to be cleaner - should not assume same order.
+ // And either way, shouldn't start at first reader. Should start at
+ // first reader without an index?
+
+ // make sure same number of index filenames as readers
+ if ( m_readers.size() != indexFilenames.size() ) {
+ const string message("size of index file list does not match current BAM file count");
+ SetErrorString("BamMultiReader::OpenIndexes", message);
+ return false;
+ }
+
+ bool errorsEncountered = false;
+ m_errorString.clear();
+
+ // iterate over BamReaders
+ vector<string>::const_iterator indexFilenameIter = indexFilenames.begin();
+ vector<string>::const_iterator indexFilenameEnd = indexFilenames.end();
+ vector<MergeItem>::iterator readerIter = m_readers.begin();
+ vector<MergeItem>::iterator readerEnd = m_readers.end();
+ for ( ; readerIter != readerEnd; ++readerIter ) {
+ MergeItem& item = (*readerIter);
+ BamReader* reader = item.Reader;
+
+ // open index filename on reader
+ if ( reader ) {
+ const string& indexFilename = (*indexFilenameIter);
+ if ( !reader->OpenIndex(indexFilename) ) {
+ m_errorString.append(1, '\t');
+ m_errorString += reader->GetErrorString();
+ m_errorString.append(1, '\n');
+ errorsEncountered = true;
+ }
+ }
+
+ // increment filename iterator, skip if no more index files to open
+ if ( ++indexFilenameIter == indexFilenameEnd )
+ break;
+ }
+
+ // return success/fail
+ if ( errorsEncountered ) {
+ const string currentError = m_errorString;
+ const string message = string("could not open all index files: \n\t") + currentError;
+ SetErrorString("BamMultiReader::OpenIndexes", message);
+ return false;
+ } else
+ return true;
+}
+
+bool BamMultiReaderPrivate::PopNextCachedAlignment(BamAlignment& al, const bool needCharData) {
+
+ // skip if no alignments available
+ if ( m_alignmentCache == 0 || m_alignmentCache->IsEmpty() )
+ return false;
+
+ // pop next merge item entry from cache
+ MergeItem item = m_alignmentCache->TakeFirst();
+ BamReader* reader = item.Reader;
+ BamAlignment* alignment = item.Alignment;
+ if ( reader == 0 || alignment == 0 )
+ return false;
+
+ // set char data if requested
+ if ( needCharData ) {
+ alignment->BuildCharData();
+ alignment->Filename = reader->GetFilename();
+ }
+
+ // store cached alignment into destination parameter (by copy)
+ al = *alignment;
+
+ // load next alignment from reader & store in cache
+ SaveNextAlignment(reader, alignment);
+ return true;
+}
+
+// returns BAM file pointers to beginning of alignment data & resets alignment cache
+bool BamMultiReaderPrivate::Rewind(void) {
+
+ // skip if no readers open
+ if ( m_readers.empty() )
+ return true;
+
+ // attempt to rewind files
+ if ( !RewindReaders() ) {
+ const string currentError = m_errorString;
+ const string message = string("could not rewind readers: \n\t") + currentError;
+ SetErrorString("BamMultiReader::Rewind", message);
+ return false;
+ }
+
+ // return status of cache update
+ return UpdateAlignmentCache();
+}
+
+// returns BAM file pointers to beginning of alignment data
+bool BamMultiReaderPrivate::RewindReaders(void) {
+
+ m_errorString.clear();
+ bool errorsEncountered = false;
+
+ // iterate over readers
+ vector<MergeItem>::iterator readerIter = m_readers.begin();
+ vector<MergeItem>::iterator readerEnd = m_readers.end();
+ for ( ; readerIter != readerEnd; ++readerIter ) {
+ MergeItem& item = (*readerIter);
+ BamReader* reader = item.Reader;
+ if ( reader == 0 ) continue;
+
+ // attempt rewind on BamReader
+ if ( !reader->Rewind() ) {
+ m_errorString.append(1, '\t');
+ m_errorString.append( reader->GetErrorString() );
+ m_errorString.append(1, '\n');
+ errorsEncountered = true;
+ }
+ }
+
+ return !errorsEncountered;
+}
+
+void BamMultiReaderPrivate::SaveNextAlignment(BamReader* reader, BamAlignment* alignment) {
+
+ // if can read alignment from reader, store in cache
+ //
+ // N.B. - lazy building of alignment's char data - populated only:
+ // automatically by alignment cache to maintain its sorting OR
+ // on demand from client call to future call to GetNextAlignment()
+
+ if ( reader->GetNextAlignmentCore(*alignment) )
+ m_alignmentCache->Add( MergeItem(reader, alignment) );
+}
+
+void BamMultiReaderPrivate::SetErrorString(const string& where, const string& what) const {
+ static const string SEPARATOR = ": ";
+ m_errorString = where + SEPARATOR + what;
+}
+
+bool BamMultiReaderPrivate::SetRegion(const BamRegion& region) {
+
+ // NB: While it may make sense to track readers in which we can
+ // successfully SetRegion, In practice a failure of SetRegion means "no
+ // alignments here." It makes sense to simply accept the failure,
+ // UpdateAlignments(), and continue.
+
+ // iterate over alignments
+ vector<MergeItem>::iterator readerIter = m_readers.begin();
+ vector<MergeItem>::iterator readerEnd = m_readers.end();
+ for ( ; readerIter != readerEnd; ++readerIter ) {
+ MergeItem& item = (*readerIter);
+ BamReader* reader = item.Reader;
+ if ( reader == 0 ) continue;
+
+ // set region of interest
+ reader->SetRegion(region);
+ }
+
+ // return status of cache update
+ return UpdateAlignmentCache();
+}
+
+// updates our alignment cache
+bool BamMultiReaderPrivate::UpdateAlignmentCache(void) {
+
+ // create alignment cache if not created yet
+ if ( m_alignmentCache == 0 ) {
+ m_alignmentCache = CreateAlignmentCache();
+ if ( m_alignmentCache == 0 ) {
+ SetErrorString("BamMultiReader::UpdateAlignmentCache", "unable to create new alignment cache");
+ return false;
+ }
+ }
+
+ // clear any prior cache data
+ m_alignmentCache->Clear();
+
+ // iterate over readers
+ vector<MergeItem>::iterator readerIter = m_readers.begin();
+ vector<MergeItem>::iterator readerEnd = m_readers.end();
+ for ( ; readerIter != readerEnd; ++readerIter ) {
+ MergeItem& item = (*readerIter);
+ BamReader* reader = item.Reader;
+ BamAlignment* alignment = item.Alignment;
+ if ( reader == 0 || alignment == 0 ) continue;
+
+ // save next alignment from each reader in cache
+ SaveNextAlignment(reader, alignment);
+ }
+
+ // if we get here, ok
+ return true;
+}
+
+// ValidateReaders checks that all the readers point to BAM files representing
+// alignments against the same set of reference sequences, and that the
+// sequences are identically ordered. If these checks fail the operation of
+// the multireader is undefined, so we force program exit.
+bool BamMultiReaderPrivate::ValidateReaders(void) const {
+
+ m_errorString.clear();
+
+ // skip if 0 or 1 readers opened
+ if ( m_readers.empty() || (m_readers.size() == 1) )
+ return true;
+
+ // retrieve first reader
+ const MergeItem& firstItem = m_readers.front();
+ const BamReader* firstReader = firstItem.Reader;
+ if ( firstReader == 0 ) return false;
+
+ // retrieve first reader's header data
+ const SamHeader& firstReaderHeader = firstReader->GetHeader();
+ const string& firstReaderSortOrder = firstReaderHeader.SortOrder;
+
+ // retrieve first reader's reference data
+ const RefVector& firstReaderRefData = firstReader->GetReferenceData();
+ const int firstReaderRefCount = firstReader->GetReferenceCount();
+ const int firstReaderRefSize = firstReaderRefData.size();
+
+ // iterate over all readers
+ vector<MergeItem>::const_iterator readerIter = m_readers.begin();
+ vector<MergeItem>::const_iterator readerEnd = m_readers.end();
+ for ( ; readerIter != readerEnd; ++readerIter ) {
+ const MergeItem& item = (*readerIter);
+ BamReader* reader = item.Reader;
+ if ( reader == 0 ) continue;
+
+ // get current reader's header data
+ const SamHeader& currentReaderHeader = reader->GetHeader();
+ const string& currentReaderSortOrder = currentReaderHeader.SortOrder;
+
+ // check compatible sort order
+ if ( currentReaderSortOrder != firstReaderSortOrder ) {
+ const string message = string("mismatched sort order in ") + reader->GetFilename() +
+ ", expected " + firstReaderSortOrder +
+ ", but found " + currentReaderSortOrder;
+ SetErrorString("BamMultiReader::ValidateReaders", message);
+ return false;
+ }
+
+ // get current reader's reference data
+ const RefVector currentReaderRefData = reader->GetReferenceData();
+ const int currentReaderRefCount = reader->GetReferenceCount();
+ const int currentReaderRefSize = currentReaderRefData.size();
+
+ // init reference data iterators
+ RefVector::const_iterator firstRefIter = firstReaderRefData.begin();
+ RefVector::const_iterator firstRefEnd = firstReaderRefData.end();
+ RefVector::const_iterator currentRefIter = currentReaderRefData.begin();
+
+ // compare reference counts from BamReader ( & container size, in case of BR error)
+ if ( (currentReaderRefCount != firstReaderRefCount) ||
+ (firstReaderRefSize != currentReaderRefSize) )
+ {
+ stringstream s("");
+ s << "mismatched reference count in " << reader->GetFilename()
+ << ", expected " << firstReaderRefCount
+ << ", but found " << currentReaderRefCount;
+ SetErrorString("BamMultiReader::ValidateReaders", s.str());
+ return false;
+ }
+
+ // this will be ok; we just checked above that we have identically-sized sets of references
+ // here we simply check if they are all, in fact, equal in content
+ while ( firstRefIter != firstRefEnd ) {
+ const RefData& firstRef = (*firstRefIter);
+ const RefData& currentRef = (*currentRefIter);
+
+ // compare reference name & length
+ if ( (firstRef.RefName != currentRef.RefName) ||
+ (firstRef.RefLength != currentRef.RefLength) )
+ {
+ stringstream s("");
+ s << "mismatched references found in" << reader->GetFilename()
+ << "expected: " << endl;
+
+ // print first reader's reference data
+ RefVector::const_iterator refIter = firstReaderRefData.begin();
+ RefVector::const_iterator refEnd = firstReaderRefData.end();
+ for ( ; refIter != refEnd; ++refIter ) {
+ const RefData& entry = (*refIter);
+ stringstream s("");
+ s << entry.RefName << " " << endl;
+ }
+
+ s << "but found: " << endl;
+
+ // print current reader's reference data
+ refIter = currentReaderRefData.begin();
+ refEnd = currentReaderRefData.end();
+ for ( ; refIter != refEnd; ++refIter ) {
+ const RefData& entry = (*refIter);
+ s << entry.RefName << " " << entry.RefLength << endl;
+ }
+
+ SetErrorString("BamMultiReader::ValidateReaders", s.str());
+ return false;
+ }
+
+ // update iterators
+ ++firstRefIter;
+ ++currentRefIter;
+ }
+ }
+
+ // if we get here, everything checks out
+ return true;
+}
--- /dev/null
+// ***************************************************************************
+// BamMultiReader_p.h (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 25 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Functionality for simultaneously reading multiple BAM files
+// *************************************************************************
+
+#ifndef BAMMULTIREADER_P_H
+#define BAMMULTIREADER_P_H
+
+// -------------
+// W A R N I N G
+// -------------
+//
+// This file is not part of the BamTools API. It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include "api/SamHeader.h"
+#include "api/BamMultiReader.h"
+#include "api/internal/bam/BamMultiMerger_p.h"
+#include <string>
+#include <vector>
+
+namespace BamTools {
+namespace Internal {
+
+class BamMultiReaderPrivate {
+
+ // typedefs
+ public:
+ typedef std::pair<BamReader*, BamAlignment*> ReaderAlignment;
+
+ // constructor / destructor
+ public:
+ BamMultiReaderPrivate(void);
+ ~BamMultiReaderPrivate(void);
+
+ // public interface
+ public:
+
+ // file operations
+ bool Close(void);
+ bool CloseFile(const std::string& filename);
+ const std::vector<std::string> Filenames(void) const;
+ bool Jump(int refID, int position = 0);
+ bool Open(const std::vector<std::string>& filenames);
+ bool OpenFile(const std::string& filename);
+ bool Rewind(void);
+ bool SetRegion(const BamRegion& region);
+
+ // access alignment data
+ bool GetNextAlignment(BamAlignment& al);
+ bool GetNextAlignmentCore(BamAlignment& al);
+ bool HasOpenReaders(void);
+
+ // access auxiliary data
+ SamHeader GetHeader(void) const;
+ std::string GetHeaderText(void) const;
+ int GetReferenceCount(void) const;
+ const BamTools::RefVector GetReferenceData(void) const;
+ int GetReferenceID(const std::string& refName) const;
+
+ // BAM index operations
+ bool CreateIndexes(const BamIndex::IndexType& type = BamIndex::STANDARD);
+ bool HasIndexes(void) const;
+ bool LocateIndexes(const BamIndex::IndexType& preferredType = BamIndex::STANDARD);
+ bool OpenIndexes(const std::vector<std::string>& indexFilenames);
+
+ // error handling
+ std::string GetErrorString(void) const;
+
+ // 'internal' methods
+ public:
+
+ bool CloseFiles(const std::vector<std::string>& filenames);
+ IMultiMerger* CreateAlignmentCache(void) const;
+ bool PopNextCachedAlignment(BamAlignment& al, const bool needCharData);
+ bool RewindReaders(void);
+ void SaveNextAlignment(BamReader* reader, BamAlignment* alignment);
+ void SetErrorString(const std::string& where, const std::string& what) const; //
+ bool UpdateAlignmentCache(void);
+ bool ValidateReaders(void) const;
+
+ // data members
+ public:
+ std::vector<MergeItem> m_readers;
+ IMultiMerger* m_alignmentCache;
+ mutable std::string m_errorString;
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // BAMMULTIREADER_P_H
--- /dev/null
+// ***************************************************************************
+// BamRandomAccessController_p.cpp (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 25 October 2011(DB)
+// ---------------------------------------------------------------------------
+// Manages random access operations in a BAM file
+// **************************************************************************
+
+#include "api/BamIndex.h"
+#include "api/internal/bam/BamRandomAccessController_p.h"
+#include "api/internal/bam/BamReader_p.h"
+#include "api/internal/index/BamIndexFactory_p.h"
+#include "api/internal/utils/BamException_p.h"
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+#include <cassert>
+#include <sstream>
+using namespace std;
+
+BamRandomAccessController::BamRandomAccessController(void)
+ : m_index(0)
+ , m_hasAlignmentsInRegion(true)
+{ }
+
+BamRandomAccessController::~BamRandomAccessController(void) {
+ Close();
+}
+
+void BamRandomAccessController::AdjustRegion(const int& referenceCount) {
+
+ // skip if no index available
+ if ( m_index == 0 )
+ return;
+
+ // see if any references in region have alignments
+ m_hasAlignmentsInRegion = false;
+ int currentId = m_region.LeftRefID;
+ const int rightBoundRefId = ( m_region.isRightBoundSpecified() ? m_region.RightRefID : referenceCount - 1 );
+ while ( currentId <= rightBoundRefId ) {
+ m_hasAlignmentsInRegion = m_index->HasAlignments(currentId);
+ if ( m_hasAlignmentsInRegion ) break;
+ ++currentId;
+ }
+
+ // if no data found on any reference in region
+ if ( !m_hasAlignmentsInRegion )
+ return;
+
+ // if left bound of desired region had no data, use first reference that had data
+ // otherwise, leave requested region as-is
+ if ( currentId != m_region.LeftRefID ) {
+ m_region.LeftRefID = currentId;
+ m_region.LeftPosition = 0;
+ }
+}
+
+// returns alignments' "RegionState": { Before|Overlaps|After } current region
+BamRandomAccessController::RegionState
+BamRandomAccessController::AlignmentState(const BamAlignment& alignment) const {
+
+ // if region has no left bound at all
+ if ( !m_region.isLeftBoundSpecified() )
+ return OverlapsRegion;
+
+ // handle unmapped reads - return AFTER region to halt processing
+ if ( alignment.RefID == -1 )
+ return AfterRegion;
+
+ // if alignment is on any reference before left bound reference
+ if ( alignment.RefID < m_region.LeftRefID )
+ return BeforeRegion;
+
+ // if alignment is on left bound reference
+ else if ( alignment.RefID == m_region.LeftRefID ) {
+
+ // if alignment starts at or after left bound position
+ if ( alignment.Position >= m_region.LeftPosition) {
+
+ if ( m_region.isRightBoundSpecified() && // right bound is specified AND
+ m_region.LeftRefID == m_region.RightRefID && // left & right bounds on same reference AND
+ alignment.Position >= m_region.RightPosition ) // alignment starts on or after right bound position
+ return AfterRegion;
+
+ // otherwise, alignment overlaps region
+ else return OverlapsRegion;
+ }
+
+ // alignment starts before left bound position
+ else {
+
+ // if alignment overlaps left bound position
+ if ( alignment.GetEndPosition() > m_region.LeftPosition )
+ return OverlapsRegion;
+ else
+ return BeforeRegion;
+ }
+ }
+
+ // otherwise alignment is on a reference after left bound reference
+ else {
+
+ // if region has a right bound
+ if ( m_region.isRightBoundSpecified() ) {
+
+ // alignment is on any reference between boundaries
+ if ( alignment.RefID < m_region.RightRefID )
+ return OverlapsRegion;
+
+ // alignment is on any reference after right boundary
+ else if ( alignment.RefID > m_region.RightRefID )
+ return AfterRegion;
+
+ // alignment is on right bound reference
+ else {
+
+ // if alignment starts before right bound position
+ if ( alignment.Position < m_region.RightPosition )
+ return OverlapsRegion;
+ else
+ return AfterRegion;
+ }
+ }
+
+ // otherwise, alignment starts after left bound and there is no right bound given
+ else return OverlapsRegion;
+ }
+}
+
+void BamRandomAccessController::Close(void) {
+ ClearIndex();
+ ClearRegion();
+}
+
+void BamRandomAccessController::ClearIndex(void) {
+ if ( m_index ) {
+ delete m_index;
+ m_index = 0;
+ }
+}
+
+void BamRandomAccessController::ClearRegion(void) {
+ m_region.clear();
+ m_hasAlignmentsInRegion = true;
+}
+
+bool BamRandomAccessController::CreateIndex(BamReaderPrivate* reader,
+ const BamIndex::IndexType& type)
+{
+ // skip if reader is invalid
+ assert(reader);
+ if ( !reader->IsOpen() ) {
+ SetErrorString("BamRandomAccessController::CreateIndex",
+ "cannot create index for unopened reader");
+ return false;
+ }
+
+ // create new index of requested type
+ BamIndex* newIndex = BamIndexFactory::CreateIndexOfType(type, reader);
+ if ( newIndex == 0 ) {
+ stringstream s("");
+ s << "could not create index of type: " << type;
+ SetErrorString("BamRandomAccessController::CreateIndex", s.str());
+ return false;
+ }
+
+ // attempt to build index from current BamReader file
+ if ( !newIndex->Create() ) {
+ const string indexError = newIndex->GetErrorString();
+ const string message = "could not create index: \n\t" + indexError;
+ SetErrorString("BamRandomAccessController::CreateIndex", message);
+ return false;
+ }
+
+ // save new index & return success
+ SetIndex(newIndex);
+ return true;
+}
+
+string BamRandomAccessController::GetErrorString(void) const {
+ return m_errorString;
+}
+
+bool BamRandomAccessController::HasIndex(void) const {
+ return ( m_index != 0 );
+}
+
+bool BamRandomAccessController::HasRegion(void) const {
+ return ( !m_region.isNull() );
+}
+
+bool BamRandomAccessController::IndexHasAlignmentsForReference(const int& refId) {
+ return m_index->HasAlignments(refId);
+}
+
+bool BamRandomAccessController::LocateIndex(BamReaderPrivate* reader,
+ const BamIndex::IndexType& preferredType)
+{
+ // look up index filename, deferring to preferredType if possible
+ assert(reader);
+ const string& indexFilename = BamIndexFactory::FindIndexFilename(reader->Filename(), preferredType);
+
+ // if no index file found (of any type)
+ if ( indexFilename.empty() ) {
+ const string message = string("could not find index file for:") + reader->Filename();
+ SetErrorString("BamRandomAccessController::LocateIndex", message);
+ return false;
+ }
+
+ // otherwise open & use index file that was found
+ return OpenIndex(indexFilename, reader);
+}
+
+bool BamRandomAccessController::OpenIndex(const string& indexFilename, BamReaderPrivate* reader) {
+
+ // attempt create new index of type based on filename
+ BamIndex* index = BamIndexFactory::CreateIndexFromFilename(indexFilename, reader);
+ if ( index == 0 ) {
+ const string message = string("could not open index file: ") + indexFilename;
+ SetErrorString("BamRandomAccessController::OpenIndex", message);
+ return false;
+ }
+
+ // attempt to load data from index file
+ if ( !index->Load(indexFilename) ) {
+ const string indexError = index->GetErrorString();
+ const string message = string("could not load index data from file: ") + indexFilename +
+ "\n\t" + indexError;
+ SetErrorString("BamRandomAccessController::OpenIndex", message);
+ return false;
+ }
+
+ // save new index & return success
+ SetIndex(index);
+ return true;
+}
+
+bool BamRandomAccessController::RegionHasAlignments(void) const {
+ return m_hasAlignmentsInRegion;
+}
+
+void BamRandomAccessController::SetErrorString(const string& where, const string& what) {
+ m_errorString = where + ": " + what;
+}
+
+void BamRandomAccessController::SetIndex(BamIndex* index) {
+ if ( m_index )
+ ClearIndex();
+ m_index = index;
+}
+
+bool BamRandomAccessController::SetRegion(const BamRegion& region, const int& referenceCount) {
+
+ // store region
+ m_region = region;
+
+ // cannot jump when no index is available
+ if ( !HasIndex() ) {
+ SetErrorString("BamRandomAccessController", "cannot jump if no index data available");
+ return false;
+ }
+
+ // adjust region as necessary to reflect where data actually begins
+ AdjustRegion(referenceCount);
+
+ // if no data present, return true
+ // * Not an error, but future attempts to access alignments in this region will not return data
+ // Returning true is useful in a BamMultiReader setting where some BAM files may
+ // lack alignments in regions where other files still have data available.
+ if ( !m_hasAlignmentsInRegion )
+ return true;
+
+ // return success/failure of jump to specified region,
+ //
+ // * Index::Jump() is allowed to modify the m_hasAlignmentsInRegion flag
+ // This covers 'corner case' where a region is requested that lies beyond the last
+ // alignment on a reference. If this occurs, any subsequent calls to GetNextAlignment[Core]
+ // will not return data. BamMultiReader will still be able to successfully pull alignments
+ // from a region from other files even if this one has no data.
+ if ( !m_index->Jump(m_region, &m_hasAlignmentsInRegion) ) {
+ const string indexError = m_index->GetErrorString();
+ const string message = string("could not set region\n\t") + indexError;
+ SetErrorString("BamRandomAccessController::OpenIndex", message);
+ return false;
+ }
+ else
+ return true;
+}
--- /dev/null
+// ***************************************************************************
+// BamRandomAccessController_p.h (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 October 2011(DB)
+// ---------------------------------------------------------------------------
+// Manages random access operations in a BAM file
+// ***************************************************************************
+
+#ifndef BAMRACONTROLLER_P_H
+#define BAMRACONTROLLER_P_H
+
+// -------------
+// W A R N I N G
+// -------------
+//
+// This file is not part of the BamTools API. It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include "api/BamAux.h"
+#include "api/BamIndex.h"
+
+namespace BamTools {
+
+class BamAlignment;
+
+namespace Internal {
+
+class BamReaderPrivate;
+
+class BamRandomAccessController {
+
+ // enums
+ public: enum RegionState { BeforeRegion = 0
+ , OverlapsRegion
+ , AfterRegion
+ };
+
+ // ctor & dtor
+ public:
+ BamRandomAccessController(void);
+ ~BamRandomAccessController(void);
+
+ // BamRandomAccessController interface
+ public:
+
+ // index methods
+ void ClearIndex(void);
+ bool CreateIndex(BamReaderPrivate* reader, const BamIndex::IndexType& type);
+ bool HasIndex(void) const;
+ bool IndexHasAlignmentsForReference(const int& refId);
+ bool LocateIndex(BamReaderPrivate* reader, const BamIndex::IndexType& preferredType);
+ bool OpenIndex(const std::string& indexFilename, BamReaderPrivate* reader);
+ void SetIndex(BamIndex* index);
+
+ // region methods
+ void ClearRegion(void);
+ bool HasRegion(void) const;
+ RegionState AlignmentState(const BamAlignment& alignment) const;
+ bool RegionHasAlignments(void) const;
+ bool SetRegion(const BamRegion& region, const int& referenceCount);
+
+ // general methods
+ void Close(void);
+ std::string GetErrorString(void) const;
+
+ // internal methods
+ private:
+ // adjusts requested region if necessary (depending on where data actually begins)
+ void AdjustRegion(const int& referenceCount);
+ // error-string handling
+ void SetErrorString(const std::string& where, const std::string& what);
+
+ // data members
+ private:
+
+ // index data
+ BamIndex* m_index; // owns the index, not a copy - responsible for deleting
+
+ // region data
+ BamRegion m_region;
+ bool m_hasAlignmentsInRegion;
+
+ // general data
+ std::string m_errorString;
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // BAMRACONTROLLER_P_H
--- /dev/null
+// ***************************************************************************
+// BamReader_p.cpp (c) 2009 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+<<<<<<< HEAD:src/api/internal/BamReader_p.cpp
+// Last modified: 14 November 2011 (DB)
+=======
+// Last modified: 25 October 2011 (DB)
+>>>>>>> remoteio:src/api/internal/bam/BamReader_p.cpp
+// ---------------------------------------------------------------------------
+// Provides the basic functionality for reading BAM files
+// ***************************************************************************
+
+#include "api/BamConstants.h"
+#include "api/BamReader.h"
+#include "api/IBamIODevice.h"
+#include "api/internal/bam/BamHeader_p.h"
+#include "api/internal/bam/BamRandomAccessController_p.h"
+#include "api/internal/bam/BamReader_p.h"
+#include "api/internal/index/BamStandardIndex_p.h"
+#include "api/internal/index/BamToolsIndex_p.h"
+#include "api/internal/io/BamDeviceFactory_p.h"
+#include "api/internal/utils/BamException_p.h"
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+#include <algorithm>
+#include <cassert>
+#include <iostream>
+#include <iterator>
+#include <vector>
+using namespace std;
+
+// constructor
+BamReaderPrivate::BamReaderPrivate(BamReader* parent)
+ : m_alignmentsBeginOffset(0)
+ , m_parent(parent)
+{
+ m_isBigEndian = BamTools::SystemIsBigEndian();
+}
+
+// destructor
+BamReaderPrivate::~BamReaderPrivate(void) {
+ Close();
+}
+
+// closes the BAM file
+bool BamReaderPrivate::Close(void) {
+
+ // clear BAM metadata
+ m_references.clear();
+ m_header.Clear();
+
+ // clear filename
+ m_filename.clear();
+
+ // close random access controller
+ m_randomAccessController.Close();
+
+ // if stream is open, attempt close
+ if ( IsOpen() ) {
+ try {
+ m_stream.Close();
+ } catch ( BamException& e ) {
+ const string streamError = e.what();
+ const string message = string("encountered error closing BAM file: \n\t") + streamError;
+ SetErrorString("BamReader::Close", message);
+ return false;
+ }
+ }
+
+ // return success
+ return true;
+}
+
+// creates an index file of requested type on current BAM file
+bool BamReaderPrivate::CreateIndex(const BamIndex::IndexType& type) {
+
+ // skip if BAM file not open
+ if ( !IsOpen() ) {
+ SetErrorString("BamReader::CreateIndex", "cannot create index on unopened BAM file");
+ return false;
+ }
+
+ // attempt to create index
+ if ( m_randomAccessController.CreateIndex(this, type) )
+ return true;
+ else {
+ const string bracError = m_randomAccessController.GetErrorString();
+ const string message = string("could not create index: \n\t") + bracError;
+ SetErrorString("BamReader::CreateIndex", message);
+ return false;
+ }
+}
+
+// return path & filename of current BAM file
+const string BamReaderPrivate::Filename(void) const {
+ return m_filename;
+}
+
+string BamReaderPrivate::GetErrorString(void) const {
+ return m_errorString;
+}
+
+// return header data as std::string
+string BamReaderPrivate::GetHeaderText(void) const {
+ return m_header.ToString();
+}
+
+// return header data as SamHeader object
+SamHeader BamReaderPrivate::GetSamHeader(void) const {
+ return m_header.ToSamHeader();
+}
+
+// get next alignment (with character data fully parsed)
+bool BamReaderPrivate::GetNextAlignment(BamAlignment& alignment) {
+
+ // if valid alignment found
+ if ( GetNextAlignmentCore(alignment) ) {
+
+ // store alignment's "source" filename
+ alignment.Filename = m_filename;
+
+ // return success/failure of parsing char data
+ if ( alignment.BuildCharData() )
+ return true;
+ else {
+ const string alError = alignment.GetErrorString();
+ const string message = string("could not populate alignment data: \n\t") + alError;
+ SetErrorString("BamReader::GetNextAlignment", message);
+ return false;
+ }
+ }
+
+ // no valid alignment found
+ return false;
+}
+
+// retrieves next available alignment core data (returns success/fail)
+// ** DOES NOT populate any character data fields (read name, bases, qualities, tag data, filename)
+// these can be accessed, if necessary, from the supportData
+// useful for operations requiring ONLY positional or other alignment-related information
+bool BamReaderPrivate::GetNextAlignmentCore(BamAlignment& alignment) {
+
+ // skip if stream not opened
+ if ( !m_stream.IsOpen() )
+ return false;
+
+ try {
+
+ // skip if region is set but has no alignments
+ if ( m_randomAccessController.HasRegion() &&
+ !m_randomAccessController.RegionHasAlignments() )
+ {
+ return false;
+ }
+
+ // if can't read next alignment
+ if ( !LoadNextAlignment(alignment) )
+ return false;
+
+ // check alignment's region-overlap state
+ BamRandomAccessController::RegionState state = m_randomAccessController.AlignmentState(alignment);
+
+ // if alignment starts after region, no need to keep reading
+ if ( state == BamRandomAccessController::AfterRegion )
+ return false;
+
+ // read until overlap is found
+ while ( state != BamRandomAccessController::OverlapsRegion ) {
+
+ // if can't read next alignment
+ if ( !LoadNextAlignment(alignment) )
+ return false;
+
+ // check alignment's region-overlap state
+ state = m_randomAccessController.AlignmentState(alignment);
+
+ // if alignment starts after region, no need to keep reading
+ if ( state == BamRandomAccessController::AfterRegion )
+ return false;
+ }
+
+ // if we get here, we found the next 'valid' alignment
+ // (e.g. overlaps current region if one was set, simply the next alignment if not)
+ alignment.SupportData.HasCoreOnly = true;
+ return true;
+
+ } catch ( BamException& e ) {
+ const string streamError = e.what();
+ const string message = string("encountered error reading BAM alignment: \n\t") + streamError;
+ SetErrorString("BamReader::GetNextAlignmentCore", message);
+ return false;
+ }
+}
+
+int BamReaderPrivate::GetReferenceCount(void) const {
+ return m_references.size();
+}
+
+const RefVector& BamReaderPrivate::GetReferenceData(void) const {
+ return m_references;
+}
+
+// returns RefID for given RefName (returns References.size() if not found)
+int BamReaderPrivate::GetReferenceID(const string& refName) const {
+
+ // retrieve names from reference data
+ vector<string> refNames;
+ RefVector::const_iterator refIter = m_references.begin();
+ RefVector::const_iterator refEnd = m_references.end();
+ for ( ; refIter != refEnd; ++refIter)
+ refNames.push_back( (*refIter).RefName );
+
+ // return 'index-of' refName (or -1 if not found)
+ int index = distance(refNames.begin(), find(refNames.begin(), refNames.end(), refName));
+ if ( index == (int)m_references.size() ) return -1;
+ else return index;
+}
+
+bool BamReaderPrivate::HasIndex(void) const {
+ return m_randomAccessController.HasIndex();
+}
+
+bool BamReaderPrivate::IsOpen(void) const {
+ return m_stream.IsOpen();
+}
+
+// load BAM header data
+void BamReaderPrivate::LoadHeaderData(void) {
+ m_header.Load(&m_stream);
+}
+
+// populates BamAlignment with alignment data under file pointer, returns success/fail
+bool BamReaderPrivate::LoadNextAlignment(BamAlignment& alignment) {
+
+ // read in the 'block length' value, make sure it's not zero
+ char buffer[sizeof(uint32_t)];
+ m_stream.Read(buffer, sizeof(uint32_t));
+ alignment.SupportData.BlockLength = BamTools::UnpackUnsignedInt(buffer);
+ if ( m_isBigEndian ) BamTools::SwapEndian_32(alignment.SupportData.BlockLength);
+ if ( alignment.SupportData.BlockLength == 0 )
+ return false;
+
+ // read in core alignment data, make sure the right size of data was read
+ char x[Constants::BAM_CORE_SIZE];
+ if ( m_stream.Read(x, Constants::BAM_CORE_SIZE) != Constants::BAM_CORE_SIZE )
+ return false;
+
+ // swap core endian-ness if necessary
+ if ( m_isBigEndian ) {
+ for ( unsigned int i = 0; i < Constants::BAM_CORE_SIZE; i+=sizeof(uint32_t) )
+ BamTools::SwapEndian_32p(&x[i]);
+ }
+
+ // set BamAlignment 'core' and 'support' data
+ alignment.RefID = BamTools::UnpackSignedInt(&x[0]);
+ alignment.Position = BamTools::UnpackSignedInt(&x[4]);
+
+ unsigned int tempValue = BamTools::UnpackUnsignedInt(&x[8]);
+ alignment.Bin = tempValue >> 16;
+ alignment.MapQuality = tempValue >> 8 & 0xff;
+ alignment.SupportData.QueryNameLength = tempValue & 0xff;
+
+ tempValue = BamTools::UnpackUnsignedInt(&x[12]);
+ alignment.AlignmentFlag = tempValue >> 16;
+ alignment.SupportData.NumCigarOperations = tempValue & 0xffff;
+
+ alignment.SupportData.QuerySequenceLength = BamTools::UnpackUnsignedInt(&x[16]);
+ alignment.MateRefID = BamTools::UnpackSignedInt(&x[20]);
+ alignment.MatePosition = BamTools::UnpackSignedInt(&x[24]);
+ alignment.InsertSize = BamTools::UnpackSignedInt(&x[28]);
+
+ // set BamAlignment length
+ alignment.Length = alignment.SupportData.QuerySequenceLength;
+
+ // read in character data - make sure proper data size was read
+ bool readCharDataOK = false;
+ const unsigned int dataLength = alignment.SupportData.BlockLength - Constants::BAM_CORE_SIZE;
+ RaiiBuffer allCharData(dataLength);
+
+ if ( m_stream.Read(allCharData.Buffer, dataLength) == dataLength ) {
+
+ // store 'allCharData' in supportData structure
+ alignment.SupportData.AllCharData.assign((const char*)allCharData.Buffer, dataLength);
+
+ // set success flag
+ readCharDataOK = true;
+
+ // save CIGAR ops
+ // need to calculate this here so that BamAlignment::GetEndPosition() performs correctly,
+ // even when GetNextAlignmentCore() is called
+ const unsigned int cigarDataOffset = alignment.SupportData.QueryNameLength;
+ uint32_t* cigarData = (uint32_t*)(allCharData.Buffer + cigarDataOffset);
+ CigarOp op;
+ alignment.CigarData.clear();
+ alignment.CigarData.reserve(alignment.SupportData.NumCigarOperations);
+ for ( unsigned int i = 0; i < alignment.SupportData.NumCigarOperations; ++i ) {
+
+ // swap endian-ness if necessary
+ if ( m_isBigEndian ) BamTools::SwapEndian_32(cigarData[i]);
+
+ // build CigarOp structure
+ op.Length = (cigarData[i] >> Constants::BAM_CIGAR_SHIFT);
+ op.Type = Constants::BAM_CIGAR_LOOKUP[ (cigarData[i] & Constants::BAM_CIGAR_MASK) ];
+
+ // save CigarOp
+ alignment.CigarData.push_back(op);
+ }
+ }
+
+ // return success/failure
+ return readCharDataOK;
+}
+
+// loads reference data from BAM file
+bool BamReaderPrivate::LoadReferenceData(void) {
+
+ // get number of reference sequences
+ char buffer[sizeof(uint32_t)];
+ m_stream.Read(buffer, sizeof(uint32_t));
+ uint32_t numberRefSeqs = BamTools::UnpackUnsignedInt(buffer);
+ if ( m_isBigEndian ) BamTools::SwapEndian_32(numberRefSeqs);
+ m_references.reserve((int)numberRefSeqs);
+
+ // iterate over all references in header
+ for ( unsigned int i = 0; i != numberRefSeqs; ++i ) {
+
+ // get length of reference name
+ m_stream.Read(buffer, sizeof(uint32_t));
+ uint32_t refNameLength = BamTools::UnpackUnsignedInt(buffer);
+ if ( m_isBigEndian ) BamTools::SwapEndian_32(refNameLength);
+ RaiiBuffer refName(refNameLength);
+
+ // get reference name and reference sequence length
+ m_stream.Read(refName.Buffer, refNameLength);
+ m_stream.Read(buffer, sizeof(int32_t));
+ int32_t refLength = BamTools::UnpackSignedInt(buffer);
+ if ( m_isBigEndian ) BamTools::SwapEndian_32(refLength);
+
+ // store data for reference
+ RefData aReference;
+ aReference.RefName = (string)((const char*)refName.Buffer);
+ aReference.RefLength = refLength;
+ m_references.push_back(aReference);
+ }
+
+ // return success
+ return true;
+}
+
+bool BamReaderPrivate::LocateIndex(const BamIndex::IndexType& preferredType) {
+
+ if ( m_randomAccessController.LocateIndex(this, preferredType) )
+ return true;
+ else {
+ const string bracError = m_randomAccessController.GetErrorString();
+ const string message = string("could not locate index: \n\t") + bracError;
+ SetErrorString("BamReader::LocateIndex", message);
+ return false;
+ }
+}
+
+// opens BAM file (and index)
+bool BamReaderPrivate::Open(const string& filename) {
+
+ try {
+
+ // make sure we're starting with fresh state
+ Close();
+
+ // open BgzfStream
+ m_stream.Open(filename, IBamIODevice::ReadOnly);
+
+ // load BAM metadata
+ LoadHeaderData();
+ LoadReferenceData();
+
+ // store filename & offset of first alignment
+ m_filename = filename;
+ m_alignmentsBeginOffset = m_stream.Tell();
+
+ // return success
+ return true;
+
+ } catch ( BamException& e ) {
+ const string error = e.what();
+ const string message = string("could not open file: ") + filename +
+ "\n\t" + error;
+ SetErrorString("BamReader::Open", message);
+ return false;
+ }
+}
+
+bool BamReaderPrivate::OpenIndex(const std::string& indexFilename) {
+
+ if ( m_randomAccessController.OpenIndex(indexFilename, this) )
+ return true;
+ else {
+ const string bracError = m_randomAccessController.GetErrorString();
+ const string message = string("could not open index: \n\t") + bracError;
+ SetErrorString("BamReader::OpenIndex", message);
+ return false;
+ }
+}
+
+// returns BAM file pointer to beginning of alignment data
+bool BamReaderPrivate::Rewind(void) {
+
+ // reset region
+ m_randomAccessController.ClearRegion();
+
+ // return status of seeking back to first alignment
+ if ( Seek(m_alignmentsBeginOffset) )
+ return true;
+ else {
+ const string currentError = m_errorString;
+ const string message = string("could not rewind: \n\t") + currentError;
+ SetErrorString("BamReader::Rewind", message);
+ return false;
+ }
+}
+
+bool BamReaderPrivate::Seek(const int64_t& position) {
+
+ // skip if BAM file not open
+ if ( !IsOpen() ) {
+ SetErrorString("BamReader::Seek", "cannot seek on unopened BAM file");
+ return false;
+ }
+
+ try {
+ m_stream.Seek(position);
+ return true;
+ }
+ catch ( BamException& e ) {
+ const string streamError = e.what();
+ const string message = string("could not seek in BAM file: \n\t") + streamError;
+ SetErrorString("BamReader::Seek", message);
+ return false;
+ }
+}
+
+void BamReaderPrivate::SetErrorString(const string& where, const string& what) {
+ static const string SEPARATOR = ": ";
+ m_errorString = where + SEPARATOR + what;
+}
+
+void BamReaderPrivate::SetIndex(BamIndex* index) {
+ m_randomAccessController.SetIndex(index);
+}
+
+// sets current region & attempts to jump to it
+// returns success/failure
+bool BamReaderPrivate::SetRegion(const BamRegion& region) {
+
+ if ( m_randomAccessController.SetRegion(region, m_references.size()) )
+ return true;
+ else {
+ const string bracError = m_randomAccessController.GetErrorString();
+ const string message = string("could not set region: \n\t") + bracError;
+ SetErrorString("BamReader::SetRegion", message);
+ return false;
+ }
+}
+
+int64_t BamReaderPrivate::Tell(void) const {
+ return m_stream.Tell();
+}
--- /dev/null
+// ***************************************************************************
+// BamReader_p.h (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 25 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides the basic functionality for reading BAM files
+// ***************************************************************************
+
+#ifndef BAMREADER_P_H
+#define BAMREADER_P_H
+
+// -------------
+// W A R N I N G
+// -------------
+//
+// This file is not part of the BamTools API. It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include "api/BamAlignment.h"
+#include "api/BamIndex.h"
+#include "api/BamReader.h"
+#include "api/SamHeader.h"
+#include "api/internal/bam/BamHeader_p.h"
+#include "api/internal/bam/BamRandomAccessController_p.h"
+#include "api/internal/io/BgzfStream_p.h"
+#include <string>
+
+namespace BamTools {
+namespace Internal {
+
+class BamReaderPrivate {
+
+ // ctor & dtor
+ public:
+ BamReaderPrivate(BamReader* parent);
+ ~BamReaderPrivate(void);
+
+ // BamReader interface
+ public:
+
+ // file operations
+ bool Close(void);
+ const std::string Filename(void) const;
+ bool IsOpen(void) const;
+ bool Open(const std::string& filename);
+ bool Rewind(void);
+ bool SetRegion(const BamRegion& region);
+
+ // access alignment data
+ bool GetNextAlignment(BamAlignment& alignment);
+ bool GetNextAlignmentCore(BamAlignment& alignment);
+
+ // access auxiliary data
+ std::string GetHeaderText(void) const;
+ SamHeader GetSamHeader(void) const;
+ int GetReferenceCount(void) const;
+ const RefVector& GetReferenceData(void) const;
+ int GetReferenceID(const std::string& refName) const;
+
+ // index operations
+ bool CreateIndex(const BamIndex::IndexType& type);
+ bool HasIndex(void) const;
+ bool LocateIndex(const BamIndex::IndexType& preferredType);
+ bool OpenIndex(const std::string& indexFilename);
+ void SetIndex(BamIndex* index);
+
+ // error handling
+ std::string GetErrorString(void) const;
+ void SetErrorString(const std::string& where, const std::string& what);
+
+ // internal methods, but available as a BamReaderPrivate 'interface'
+ //
+ // these methods should only be used by BamTools::Internal classes
+ // (currently only used by the BamIndex subclasses)
+ public:
+ // retrieves header text from BAM file
+ void LoadHeaderData(void);
+ // retrieves BAM alignment under file pointer
+ // (does no overlap checking or character data parsing)
+ bool LoadNextAlignment(BamAlignment& alignment);
+ // builds reference data structure from BAM file
+ bool LoadReferenceData(void);
+ // seek reader to file position
+ bool Seek(const int64_t& position);
+ // return reader's file position
+ int64_t Tell(void) const;
+
+ // data members
+ public:
+
+ // general BAM file data
+ int64_t m_alignmentsBeginOffset;
+ std::string m_filename;
+ RefVector m_references;
+
+ // system data
+ bool m_isBigEndian;
+
+ // parent BamReader
+ BamReader* m_parent;
+
+ // BamReaderPrivate components
+ BamHeader m_header;
+ BamRandomAccessController m_randomAccessController;
+ BgzfStream m_stream;
+
+ // error handling
+ std::string m_errorString;
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // BAMREADER_P_H
--- /dev/null
+// ***************************************************************************
+// BamWriter_p.cpp (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 25 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides the basic functionality for producing BAM files
+// ***************************************************************************
+
+#include "api/BamAlignment.h"
+#include "api/BamConstants.h"
+#include "api/IBamIODevice.h"
+#include "api/internal/bam/BamWriter_p.h"
+#include "api/internal/utils/BamException_p.h"
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+#include <cstdlib>
+#include <cstring>
+using namespace std;
+
+// ctor
+BamWriterPrivate::BamWriterPrivate(void)
+ : m_isBigEndian( BamTools::SystemIsBigEndian() )
+{ }
+
+// dtor
+BamWriterPrivate::~BamWriterPrivate(void) {
+ Close();
+}
+
+// calculates minimum bin for a BAM alignment interval [begin, end)
+uint32_t BamWriterPrivate::CalculateMinimumBin(const int begin, int end) const {
+ --end;
+ if ( (begin >> 14) == (end >> 14) ) return 4681 + (begin >> 14);
+ if ( (begin >> 17) == (end >> 17) ) return 585 + (begin >> 17);
+ if ( (begin >> 20) == (end >> 20) ) return 73 + (begin >> 20);
+ if ( (begin >> 23) == (end >> 23) ) return 9 + (begin >> 23);
+ if ( (begin >> 26) == (end >> 26) ) return 1 + (begin >> 26);
+ return 0;
+}
+
+// closes the alignment archive
+void BamWriterPrivate::Close(void) {
+
+ // skip if file not open
+ if ( !IsOpen() ) return;
+
+ // close output stream
+ try {
+ m_stream.Close();
+ } catch ( BamException& e ) {
+ m_errorString = e.what();
+ }
+}
+
+// creates a cigar string from the supplied alignment
+void BamWriterPrivate::CreatePackedCigar(const vector<CigarOp>& cigarOperations, string& packedCigar) {
+
+ // initialize
+ const size_t numCigarOperations = cigarOperations.size();
+ packedCigar.resize(numCigarOperations * Constants::BAM_SIZEOF_INT);
+
+ // pack the cigar data into the string
+ unsigned int* pPackedCigar = (unsigned int*)packedCigar.data();
+
+ // iterate over cigar operations
+ vector<CigarOp>::const_iterator coIter = cigarOperations.begin();
+ vector<CigarOp>::const_iterator coEnd = cigarOperations.end();
+ for ( ; coIter != coEnd; ++coIter ) {
+
+ // store op in packedCigar
+ uint8_t cigarOp;
+ switch ( coIter->Type ) {
+ case (Constants::BAM_CIGAR_MATCH_CHAR) : cigarOp = Constants::BAM_CIGAR_MATCH; break;
+ case (Constants::BAM_CIGAR_INS_CHAR) : cigarOp = Constants::BAM_CIGAR_INS; break;
+ case (Constants::BAM_CIGAR_DEL_CHAR) : cigarOp = Constants::BAM_CIGAR_DEL; break;
+ case (Constants::BAM_CIGAR_REFSKIP_CHAR) : cigarOp = Constants::BAM_CIGAR_REFSKIP; break;
+ case (Constants::BAM_CIGAR_SOFTCLIP_CHAR) : cigarOp = Constants::BAM_CIGAR_SOFTCLIP; break;
+ case (Constants::BAM_CIGAR_HARDCLIP_CHAR) : cigarOp = Constants::BAM_CIGAR_HARDCLIP; break;
+ case (Constants::BAM_CIGAR_PAD_CHAR) : cigarOp = Constants::BAM_CIGAR_PAD; break;
+ case (Constants::BAM_CIGAR_SEQMATCH_CHAR) : cigarOp = Constants::BAM_CIGAR_SEQMATCH; break;
+ case (Constants::BAM_CIGAR_MISMATCH_CHAR) : cigarOp = Constants::BAM_CIGAR_MISMATCH; break;
+ default:
+ const string message = string("invalid CIGAR operation type") + coIter->Type;
+ throw BamException("BamWriter::CreatePackedCigar", message);
+ }
+
+ *pPackedCigar = coIter->Length << Constants::BAM_CIGAR_SHIFT | cigarOp;
+ pPackedCigar++;
+ }
+}
+
+// encodes the supplied query sequence into 4-bit notation
+void BamWriterPrivate::EncodeQuerySequence(const string& query, string& encodedQuery) {
+
+ // prepare the encoded query string
+ const size_t queryLength = query.size();
+ const size_t encodedQueryLength = static_cast<size_t>((queryLength+1)/2);
+ encodedQuery.resize(encodedQueryLength);
+ char* pEncodedQuery = (char*)encodedQuery.data();
+ const char* pQuery = (const char*)query.data();
+
+ // walk through original query sequence, encoding its bases
+ unsigned char nucleotideCode;
+ bool useHighWord = true;
+ while ( *pQuery ) {
+ switch ( *pQuery ) {
+ case (Constants::BAM_DNA_EQUAL) : nucleotideCode = Constants::BAM_BASECODE_EQUAL; break;
+ case (Constants::BAM_DNA_A) : nucleotideCode = Constants::BAM_BASECODE_A; break;
+ case (Constants::BAM_DNA_C) : nucleotideCode = Constants::BAM_BASECODE_C; break;
+ case (Constants::BAM_DNA_M) : nucleotideCode = Constants::BAM_BASECODE_M; break;
+ case (Constants::BAM_DNA_G) : nucleotideCode = Constants::BAM_BASECODE_G; break;
+ case (Constants::BAM_DNA_R) : nucleotideCode = Constants::BAM_BASECODE_R; break;
+ case (Constants::BAM_DNA_S) : nucleotideCode = Constants::BAM_BASECODE_S; break;
+ case (Constants::BAM_DNA_V) : nucleotideCode = Constants::BAM_BASECODE_V; break;
+ case (Constants::BAM_DNA_T) : nucleotideCode = Constants::BAM_BASECODE_T; break;
+ case (Constants::BAM_DNA_W) : nucleotideCode = Constants::BAM_BASECODE_W; break;
+ case (Constants::BAM_DNA_Y) : nucleotideCode = Constants::BAM_BASECODE_Y; break;
+ case (Constants::BAM_DNA_H) : nucleotideCode = Constants::BAM_BASECODE_H; break;
+ case (Constants::BAM_DNA_K) : nucleotideCode = Constants::BAM_BASECODE_K; break;
+ case (Constants::BAM_DNA_D) : nucleotideCode = Constants::BAM_BASECODE_D; break;
+ case (Constants::BAM_DNA_B) : nucleotideCode = Constants::BAM_BASECODE_B; break;
+ case (Constants::BAM_DNA_N) : nucleotideCode = Constants::BAM_BASECODE_N; break;
+ default:
+ const string message = string("invalid base: ") + *pQuery;
+ throw BamException("BamWriter::EncodeQuerySequence", message);
+ }
+
+ // pack the nucleotide code
+ if ( useHighWord ) {
+ *pEncodedQuery = nucleotideCode << 4;
+ useHighWord = false;
+ } else {
+ *pEncodedQuery |= nucleotideCode;
+ ++pEncodedQuery;
+ useHighWord = true;
+ }
+
+ // increment the query position
+ ++pQuery;
+ }
+}
+
+// returns a description of the last error that occurred
+std::string BamWriterPrivate::GetErrorString(void) const {
+ return m_errorString;
+}
+
+// returns whether BAM file is open for writing or not
+bool BamWriterPrivate::IsOpen(void) const {
+ return m_stream.IsOpen();
+}
+
+// opens the alignment archive
+bool BamWriterPrivate::Open(const string& filename,
+ const string& samHeaderText,
+ const RefVector& referenceSequences)
+{
+ try {
+
+ // open the BGZF file for writing
+ m_stream.Open(filename, IBamIODevice::WriteOnly);
+
+ // write BAM file 'metadata' components
+ WriteMagicNumber();
+ WriteSamHeaderText(samHeaderText);
+ WriteReferences(referenceSequences);
+
+ // return success
+ return true;
+
+ } catch ( BamException& e ) {
+ m_errorString = e.what();
+ return false;
+ }
+}
+
+// saves the alignment to the alignment archive
+bool BamWriterPrivate::SaveAlignment(const BamAlignment& al) {
+
+ try {
+
+ // if BamAlignment contains only the core data and a raw char data buffer
+ // (as a result of BamReader::GetNextAlignmentCore())
+ if ( al.SupportData.HasCoreOnly )
+ WriteCoreAlignment(al);
+
+ // otherwise, BamAlignment should contain character in the standard fields: Name, QueryBases, etc
+ // (resulting from BamReader::GetNextAlignment() *OR* being generated directly by client code)
+ else WriteAlignment(al);
+
+ // if we get here, everything OK
+ return true;
+
+ } catch ( BamException& e ) {
+ m_errorString = e.what();
+ return false;
+ }
+}
+
+void BamWriterPrivate::SetWriteCompressed(bool ok) {
+ // modifying compression is not allowed if BAM file is open
+ if ( !IsOpen() )
+ m_stream.SetWriteCompressed(ok);
+}
+
+void BamWriterPrivate::WriteAlignment(const BamAlignment& al) {
+
+ // calculate char lengths
+ const unsigned int nameLength = al.Name.size() + 1;
+ const unsigned int numCigarOperations = al.CigarData.size();
+ const unsigned int queryLength = al.QueryBases.size();
+ const unsigned int tagDataLength = al.TagData.size();
+
+ // no way to tell if alignment's bin is already defined (there is no default, invalid value)
+ // so we'll go ahead calculate its bin ID before storing
+ const uint32_t alignmentBin = CalculateMinimumBin(al.Position, al.GetEndPosition());
+
+ // create our packed cigar string
+ string packedCigar;
+ CreatePackedCigar(al.CigarData, packedCigar);
+ const unsigned int packedCigarLength = packedCigar.size();
+
+ // encode the query
+ string encodedQuery;
+ EncodeQuerySequence(al.QueryBases, encodedQuery);
+ const unsigned int encodedQueryLength = encodedQuery.size();
+
+ // write the block size
+ const unsigned int dataBlockSize = nameLength +
+ packedCigarLength +
+ encodedQueryLength +
+ queryLength +
+ tagDataLength;
+ unsigned int blockSize = Constants::BAM_CORE_SIZE + dataBlockSize;
+ if ( m_isBigEndian ) BamTools::SwapEndian_32(blockSize);
+ m_stream.Write((char*)&blockSize, Constants::BAM_SIZEOF_INT);
+
+ // assign the BAM core data
+ uint32_t buffer[Constants::BAM_CORE_BUFFER_SIZE];
+ buffer[0] = al.RefID;
+ buffer[1] = al.Position;
+ buffer[2] = (alignmentBin << 16) | (al.MapQuality << 8) | nameLength;
+ buffer[3] = (al.AlignmentFlag << 16) | numCigarOperations;
+ buffer[4] = queryLength;
+ buffer[5] = al.MateRefID;
+ buffer[6] = al.MatePosition;
+ buffer[7] = al.InsertSize;
+
+ // swap BAM core endian-ness, if necessary
+ if ( m_isBigEndian ) {
+ for ( int i = 0; i < 8; ++i )
+ BamTools::SwapEndian_32(buffer[i]);
+ }
+
+ // write the BAM core
+ m_stream.Write((char*)&buffer, Constants::BAM_CORE_SIZE);
+
+ // write the query name
+ m_stream.Write(al.Name.c_str(), nameLength);
+
+ // write the packed cigar
+ if ( m_isBigEndian ) {
+ char* cigarData = new char[packedCigarLength]();
+ memcpy(cigarData, packedCigar.data(), packedCigarLength);
+ if ( m_isBigEndian ) {
+ for ( size_t i = 0; i < packedCigarLength; ++i )
+ BamTools::SwapEndian_32p(&cigarData[i]);
+ }
+ m_stream.Write(cigarData, packedCigarLength);
+ delete[] cigarData; // TODO: cleanup on Write exception thrown?
+ }
+ else
+ m_stream.Write(packedCigar.data(), packedCigarLength);
+
+ // write the encoded query sequence
+ m_stream.Write(encodedQuery.data(), encodedQueryLength);
+
+ // write the base qualities
+ char* pBaseQualities = (char*)al.Qualities.data();
+ for ( size_t i = 0; i < queryLength; ++i )
+ pBaseQualities[i] -= 33; // FASTQ conversion
+ m_stream.Write(pBaseQualities, queryLength);
+
+ // write the read group tag
+ if ( m_isBigEndian ) {
+
+ char* tagData = new char[tagDataLength]();
+ memcpy(tagData, al.TagData.data(), tagDataLength);
+
+ size_t i = 0;
+ while ( i < tagDataLength ) {
+
+ i += Constants::BAM_TAG_TAGSIZE; // skip tag chars (e.g. "RG", "NM", etc.)
+ const char type = tagData[i]; // get tag type at position i
+ ++i;
+
+ switch ( type ) {
+
+ case(Constants::BAM_TAG_TYPE_ASCII) :
+ case(Constants::BAM_TAG_TYPE_INT8) :
+ case(Constants::BAM_TAG_TYPE_UINT8) :
+ ++i;
+ break;
+
+ case(Constants::BAM_TAG_TYPE_INT16) :
+ case(Constants::BAM_TAG_TYPE_UINT16) :
+ BamTools::SwapEndian_16p(&tagData[i]);
+ i += sizeof(uint16_t);
+ break;
+
+ case(Constants::BAM_TAG_TYPE_FLOAT) :
+ case(Constants::BAM_TAG_TYPE_INT32) :
+ case(Constants::BAM_TAG_TYPE_UINT32) :
+ BamTools::SwapEndian_32p(&tagData[i]);
+ i += sizeof(uint32_t);
+ break;
+
+ case(Constants::BAM_TAG_TYPE_HEX) :
+ case(Constants::BAM_TAG_TYPE_STRING) :
+ // no endian swapping necessary for hex-string/string data
+ while ( tagData[i] )
+ ++i;
+ // increment one more for null terminator
+ ++i;
+ break;
+
+ case(Constants::BAM_TAG_TYPE_ARRAY) :
+
+ {
+ // read array type
+ const char arrayType = tagData[i];
+ ++i;
+
+ // swap endian-ness of number of elements in place, then retrieve for loop
+ BamTools::SwapEndian_32p(&tagData[i]);
+ int32_t numElements;
+ memcpy(&numElements, &tagData[i], sizeof(uint32_t));
+ i += sizeof(uint32_t);
+
+ // swap endian-ness of array elements
+ for ( int j = 0; j < numElements; ++j ) {
+ switch (arrayType) {
+ case (Constants::BAM_TAG_TYPE_INT8) :
+ case (Constants::BAM_TAG_TYPE_UINT8) :
+ // no endian-swapping necessary
+ ++i;
+ break;
+ case (Constants::BAM_TAG_TYPE_INT16) :
+ case (Constants::BAM_TAG_TYPE_UINT16) :
+ BamTools::SwapEndian_16p(&tagData[i]);
+ i += sizeof(uint16_t);
+ break;
+ case (Constants::BAM_TAG_TYPE_FLOAT) :
+ case (Constants::BAM_TAG_TYPE_INT32) :
+ case (Constants::BAM_TAG_TYPE_UINT32) :
+ BamTools::SwapEndian_32p(&tagData[i]);
+ i += sizeof(uint32_t);
+ break;
+ default:
+ delete[] tagData;
+ const string message = string("invalid binary array type: ") + arrayType;
+ throw BamException("BamWriter::SaveAlignment", message);
+ }
+ }
+
+ break;
+ }
+
+ default :
+ delete[] tagData;
+ const string message = string("invalid tag type: ") + type;
+ throw BamException("BamWriter::SaveAlignment", message);
+ }
+ }
+
+ m_stream.Write(tagData, tagDataLength);
+ delete[] tagData; // TODO: cleanup on Write exception thrown?
+ }
+ else
+ m_stream.Write(al.TagData.data(), tagDataLength);
+}
+
+void BamWriterPrivate::WriteCoreAlignment(const BamAlignment& al) {
+
+ // write the block size
+ unsigned int blockSize = al.SupportData.BlockLength;
+ if ( m_isBigEndian ) BamTools::SwapEndian_32(blockSize);
+ m_stream.Write((char*)&blockSize, Constants::BAM_SIZEOF_INT);
+
+ // re-calculate bin (in case BamAlignment's position has been previously modified)
+ const uint32_t alignmentBin = CalculateMinimumBin(al.Position, al.GetEndPosition());
+
+ // assign the BAM core data
+ uint32_t buffer[Constants::BAM_CORE_BUFFER_SIZE];
+ buffer[0] = al.RefID;
+ buffer[1] = al.Position;
+ buffer[2] = (alignmentBin << 16) | (al.MapQuality << 8) | al.SupportData.QueryNameLength;
+ buffer[3] = (al.AlignmentFlag << 16) | al.SupportData.NumCigarOperations;
+ buffer[4] = al.SupportData.QuerySequenceLength;
+ buffer[5] = al.MateRefID;
+ buffer[6] = al.MatePosition;
+ buffer[7] = al.InsertSize;
+
+ // swap BAM core endian-ness, if necessary
+ if ( m_isBigEndian ) {
+ for ( int i = 0; i < 8; ++i )
+ BamTools::SwapEndian_32(buffer[i]);
+ }
+
+ // write the BAM core
+ m_stream.Write((char*)&buffer, Constants::BAM_CORE_SIZE);
+
+ // write the raw char data
+ m_stream.Write((char*)al.SupportData.AllCharData.data(),
+ al.SupportData.BlockLength-Constants::BAM_CORE_SIZE);
+}
+
+void BamWriterPrivate::WriteMagicNumber(void) {
+ // write BAM file 'magic number'
+ m_stream.Write(Constants::BAM_HEADER_MAGIC, Constants::BAM_HEADER_MAGIC_LENGTH);
+}
+
+void BamWriterPrivate::WriteReferences(const BamTools::RefVector& referenceSequences) {
+
+ // write the number of reference sequences
+ uint32_t numReferenceSequences = referenceSequences.size();
+ if ( m_isBigEndian ) BamTools::SwapEndian_32(numReferenceSequences);
+ m_stream.Write((char*)&numReferenceSequences, Constants::BAM_SIZEOF_INT);
+
+ // foreach reference sequence
+ RefVector::const_iterator rsIter = referenceSequences.begin();
+ RefVector::const_iterator rsEnd = referenceSequences.end();
+ for ( ; rsIter != rsEnd; ++rsIter ) {
+
+ // write the reference sequence name length
+ uint32_t referenceSequenceNameLen = rsIter->RefName.size() + 1;
+ if ( m_isBigEndian ) BamTools::SwapEndian_32(referenceSequenceNameLen);
+ m_stream.Write((char*)&referenceSequenceNameLen, Constants::BAM_SIZEOF_INT);
+
+ // write the reference sequence name
+ m_stream.Write(rsIter->RefName.c_str(), referenceSequenceNameLen);
+
+ // write the reference sequence length
+ int32_t referenceLength = rsIter->RefLength;
+ if ( m_isBigEndian ) BamTools::SwapEndian_32(referenceLength);
+ m_stream.Write((char*)&referenceLength, Constants::BAM_SIZEOF_INT);
+ }
+}
+
+void BamWriterPrivate::WriteSamHeaderText(const std::string& samHeaderText) {
+
+ // write the SAM header text length
+ uint32_t samHeaderLen = samHeaderText.size();
+ if ( m_isBigEndian ) BamTools::SwapEndian_32(samHeaderLen);
+ m_stream.Write((char*)&samHeaderLen, Constants::BAM_SIZEOF_INT);
+
+ // write the SAM header text
+ if ( samHeaderLen > 0 )
+ m_stream.Write(samHeaderText.data(), samHeaderLen);
+}
--- /dev/null
+// ***************************************************************************
+// BamWriter_p.h (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 25 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides the basic functionality for producing BAM files
+// ***************************************************************************
+
+#ifndef BAMWRITER_P_H
+#define BAMWRITER_P_H
+
+// -------------
+// W A R N I N G
+// -------------
+//
+// This file is not part of the BamTools API. It exists purely as an
+// implementation detail. This header file may change from version to
+// version without notice, or even be removed.
+//
+// We mean it.
+
+#include "api/BamAux.h"
+#include "api/internal/io/BgzfStream_p.h"
+#include <string>
+#include <vector>
+
+namespace BamTools {
+
+class BamAlignment;
+
+namespace Internal {
+
+class BamWriterPrivate {
+
+ // ctor & dtor
+ public:
+ BamWriterPrivate(void);
+ ~BamWriterPrivate(void);
+
+ // interface methods
+ public:
+ void Close(void);
+ std::string GetErrorString(void) const;
+ bool IsOpen(void) const;
+ bool Open(const std::string& filename,
+ const std::string& samHeaderText,
+ const BamTools::RefVector& referenceSequences);
+ bool SaveAlignment(const BamAlignment& al);
+ void SetWriteCompressed(bool ok);
+
+ // 'internal' methods
+ public:
+ uint32_t CalculateMinimumBin(const int begin, int end) const;
+ void CreatePackedCigar(const std::vector<BamTools::CigarOp>& cigarOperations, std::string& packedCigar);
+ void EncodeQuerySequence(const std::string& query, std::string& encodedQuery);
+ void WriteAlignment(const BamAlignment& al);
+ void WriteCoreAlignment(const BamAlignment& al);
+ void WriteMagicNumber(void);
+ void WriteReferences(const BamTools::RefVector& referenceSequences);
+ void WriteSamHeaderText(const std::string& samHeaderText);
+
+ // data members
+ private:
+ BgzfStream m_stream;
+ bool m_isBigEndian;
+ std::string m_errorString;
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // BAMWRITER_P_H
--- /dev/null
+# ==========================
+# BamTools CMakeLists.txt
+# (c) 2011 Derek Barnett
+#
+# src/api/internal/bam
+# ==========================
+
+set ( InternalBamDir "${InternalDir}/bam" )
+
+set ( InternalBamSources
+ ${InternalBamDir}/BamHeader_p.cpp
+ ${InternalBamDir}/BamMultiReader_p.cpp
+ ${InternalBamDir}/BamRandomAccessController_p.cpp
+ ${InternalBamDir}/BamReader_p.cpp
+ ${InternalBamDir}/BamWriter_p.cpp
+
+ PARENT_SCOPE # <-- leave this last
+)
+
--- /dev/null
+// ***************************************************************************
+// BamIndexFactory_p.cpp (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 November 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides interface for generating BamIndex implementations
+// ***************************************************************************
+
+#include "api/internal/index/BamIndexFactory_p.h"
+#include "api/internal/index/BamStandardIndex_p.h"
+#include "api/internal/index/BamToolsIndex_p.h"
+using namespace BamTools;
+using namespace BamTools::Internal;
+using namespace std;
+
+// generates index filename from BAM filename (depending on requested type)
+// if type is unknown, returns empty string
+const string BamIndexFactory::CreateIndexFilename(const string& bamFilename,
+ const BamIndex::IndexType& type)
+{
+ switch ( type ) {
+ case ( BamIndex::STANDARD ) : return ( bamFilename + BamStandardIndex::Extension() );
+ case ( BamIndex::BAMTOOLS ) : return ( bamFilename + BamToolsIndex::Extension() );
+ default :
+ return string();
+ }
+}
+
+// creates a new BamIndex object, depending on extension of @indexFilename
+BamIndex* BamIndexFactory::CreateIndexFromFilename(const string& indexFilename, BamReaderPrivate* reader) {
+
+ // get file extension from index filename, including dot (".EXT")
+ // if can't get file extension, return null index
+ const string extension = FileExtension(indexFilename);
+ if ( extension.empty() )
+ return 0;
+
+ // create index based on extension
+ if ( extension == BamStandardIndex::Extension() ) return new BamStandardIndex(reader);
+ else if ( extension == BamToolsIndex::Extension() ) return new BamToolsIndex(reader);
+ else
+ return 0;
+}
+
+// creates a new BamIndex, object of requested @type
+BamIndex* BamIndexFactory::CreateIndexOfType(const BamIndex::IndexType& type,
+ BamReaderPrivate* reader)
+{
+ switch ( type ) {
+ case ( BamIndex::STANDARD ) : return new BamStandardIndex(reader);
+ case ( BamIndex::BAMTOOLS ) : return new BamToolsIndex(reader);
+ default :
+ return 0;
+ }
+}
+
+// retrieves file extension (including '.')
+const string BamIndexFactory::FileExtension(const string& filename) {
+
+ // if filename cannot contain valid path + extension, return empty string
+ if ( filename.empty() || filename.length() <= 4 )
+ return string();
+
+ // look for last dot in filename
+ const size_t lastDotPosition = filename.find_last_of('.');
+
+ // if none found, return empty string
+ if ( lastDotPosition == string::npos )
+ return string();
+
+ // return substring from last dot position
+ return filename.substr(lastDotPosition);
+}
+
+// returns name of existing index file that corresponds to @bamFilename
+// will defer to @preferredType if possible, if not will attempt to load any supported type
+// returns empty string if not found
+const string BamIndexFactory::FindIndexFilename(const string& bamFilename,
+ const BamIndex::IndexType& preferredType)
+{
+ // skip if BAM filename provided is empty
+ if ( bamFilename.empty() )
+ return string();
+
+ // try to find index of preferred type first
+ // return index filename if found
+ string indexFilename = CreateIndexFilename(bamFilename, preferredType);
+ if ( !indexFilename.empty() )
+ return indexFilename;
+
+ // couldn't find preferred type, try the other supported types
+ // return index filename if found
+ if ( preferredType != BamIndex::STANDARD ) {
+ indexFilename = CreateIndexFilename(bamFilename, BamIndex::STANDARD);
+ if ( !indexFilename.empty() )
+ return indexFilename;
+ }
+ if ( preferredType != BamIndex::BAMTOOLS ) {
+ indexFilename = CreateIndexFilename(bamFilename, BamIndex::BAMTOOLS);
+ if ( !indexFilename.empty() )
+ return indexFilename;
+ }
+
+ // otherwise couldn't find any index matching this filename
+ return string();
+}
--- /dev/null
+// ***************************************************************************
+// BamIndexFactory_p.h (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides interface for generating BamIndex implementations
+// ***************************************************************************
+
+#ifndef BAMINDEX_FACTORY_P_H
+#define BAMINDEX_FACTORY_P_H
+
+#include "api/BamIndex.h"
+#include <string>
+
+namespace BamTools {
+namespace Internal {
+
+class BamIndexFactory {
+
+ // static interface methods
+ public:
+ // creates a new BamIndex object, depending on extension of @indexFilename
+ static BamIndex* CreateIndexFromFilename(const std::string& indexFilename,
+ BamReaderPrivate* reader);
+ // creates a new BamIndex object, of requested @type
+ static BamIndex* CreateIndexOfType(const BamIndex::IndexType& type,
+ BamReaderPrivate* reader);
+ // returns name of existing index file that corresponds to @bamFilename
+ // will defer to @preferredType if possible
+ // if @preferredType not found, will attempt to load any supported index type
+ // returns empty string if no index file (of any type) is found
+ static const std::string FindIndexFilename(const std::string& bamFilename,
+ const BamIndex::IndexType& preferredType);
+
+ // internal methods
+ public:
+ // generates index filename from BAM filename (depending on requested type)
+ // if type is unknown, returns empty string
+ static const std::string CreateIndexFilename(const std::string& bamFilename,
+ const BamIndex::IndexType& type);
+ // retrieves file extension (including '.')
+ static const std::string FileExtension(const std::string& filename);
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // BAMINDEX_FACTORY_P_H
--- /dev/null
+// ***************************************************************************
+// BamStandardIndex.cpp (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 November 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides index operations for the standardized BAM index format (".bai")
+// ***************************************************************************
+
+#include "api/BamAlignment.h"
+#include "api/internal/bam/BamReader_p.h"
+#include "api/internal/index/BamStandardIndex_p.h"
+#include "api/internal/io/BamDeviceFactory_p.h"
+#include "api/internal/utils/BamException_p.h"
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <algorithm>
+#include <sstream>
+using namespace std;
+
+// -----------------------------------
+// static BamStandardIndex constants
+// -----------------------------------
+
+const int BamStandardIndex::MAX_BIN = 37450; // =(8^6-1)/7+1
+const int BamStandardIndex::BAM_LIDX_SHIFT = 14;
+const string BamStandardIndex::BAI_EXTENSION = ".bai";
+const char* const BamStandardIndex::BAI_MAGIC = "BAI\1";
+const int BamStandardIndex::SIZEOF_ALIGNMENTCHUNK = sizeof(uint64_t)*2;
+const int BamStandardIndex::SIZEOF_BINCORE = sizeof(uint32_t) + sizeof(int32_t);
+const int BamStandardIndex::SIZEOF_LINEAROFFSET = sizeof(uint64_t);
+
+// ----------------------------
+// RaiiWrapper implementation
+// ----------------------------
+
+BamStandardIndex::RaiiWrapper::RaiiWrapper(void)
+ : Device(0)
+ , Buffer(0)
+{ }
+
+BamStandardIndex::RaiiWrapper::~RaiiWrapper(void) {
+
+ if ( Device ) {
+ Device->Close();
+ delete Device;
+ Device = 0;
+ }
+
+ if ( Buffer ) {
+ delete[] Buffer;
+ Buffer = 0;
+ }
+}
+
+// ---------------------------------
+// BamStandardIndex implementation
+// ---------------------------------
+
+// ctor
+BamStandardIndex::BamStandardIndex(Internal::BamReaderPrivate* reader)
+ : BamIndex(reader)
+ , m_bufferLength(0)
+{
+ m_isBigEndian = BamTools::SystemIsBigEndian();
+}
+
+// dtor
+BamStandardIndex::~BamStandardIndex(void) {
+ CloseFile();
+}
+
+void BamStandardIndex::AdjustRegion(const BamRegion& region, uint32_t& begin, uint32_t& end) {
+
+ // retrieve references from reader
+ const RefVector& references = m_reader->GetReferenceData();
+
+ // LeftPosition cannot be greater than or equal to reference length
+ if ( region.LeftPosition >= references.at(region.LeftRefID).RefLength )
+ throw BamException("BamStandardIndex::AdjustRegion", "invalid region requested");
+
+ // set region 'begin'
+ begin = (unsigned int)region.LeftPosition;
+
+ // if right bound specified AND left&right bounds are on same reference
+ // OK to use right bound position as region 'end'
+ if ( region.isRightBoundSpecified() && ( region.LeftRefID == region.RightRefID ) )
+ end = (unsigned int)region.RightPosition;
+
+ // otherwise, set region 'end' to last reference base
+ else end = (unsigned int)references.at(region.LeftRefID).RefLength;
+}
+
+// [begin, end)
+void BamStandardIndex::CalculateCandidateBins(const uint32_t& begin,
+ const uint32_t& end,
+ set<uint16_t>& candidateBins)
+{
+ // initialize list, bin '0' is always a valid bin
+ candidateBins.insert(0);
+
+ // get rest of bins that contain this region
+ unsigned int k;
+ for (k = 1 + (begin>>26); k <= 1 + (end>>26); ++k) { candidateBins.insert(k); }
+ for (k = 9 + (begin>>23); k <= 9 + (end>>23); ++k) { candidateBins.insert(k); }
+ for (k = 73 + (begin>>20); k <= 73 + (end>>20); ++k) { candidateBins.insert(k); }
+ for (k = 585 + (begin>>17); k <= 585 + (end>>17); ++k) { candidateBins.insert(k); }
+ for (k = 4681 + (begin>>14); k <= 4681 + (end>>14); ++k) { candidateBins.insert(k); }
+}
+
+void BamStandardIndex::CalculateCandidateOffsets(const BaiReferenceSummary& refSummary,
+ const uint64_t& minOffset,
+ set<uint16_t>& candidateBins,
+ vector<int64_t>& offsets)
+{
+ // seek to first bin
+ Seek(refSummary.FirstBinFilePosition, SEEK_SET);
+
+ // iterate over reference bins
+ uint32_t binId;
+ int32_t numAlignmentChunks;
+ set<uint16_t>::iterator candidateBinIter;
+ for ( int i = 0; i < refSummary.NumBins; ++i ) {
+
+ // read bin contents (if successful, alignment chunks are now in m_buffer)
+ ReadBinIntoBuffer(binId, numAlignmentChunks);
+
+ // see if bin is a 'candidate bin'
+ candidateBinIter = candidateBins.find(binId);
+
+ // if not, move on to next bin
+ if ( candidateBinIter == candidateBins.end() )
+ continue;
+
+ // otherwise, check bin's contents against for overlap
+ else {
+
+ size_t offset = 0;
+ uint64_t chunkStart;
+ uint64_t chunkStop;
+
+ // iterate over alignment chunks
+ for ( int j = 0; j < numAlignmentChunks; ++j ) {
+
+ // read chunk start & stop from buffer
+ memcpy((char*)&chunkStart, m_resources.Buffer+offset, sizeof(uint64_t));
+ offset += sizeof(uint64_t);
+ memcpy((char*)&chunkStop, m_resources.Buffer+offset, sizeof(uint64_t));
+ offset += sizeof(uint64_t);
+
+ // swap endian-ness if necessary
+ if ( m_isBigEndian ) {
+ SwapEndian_64(chunkStart);
+ SwapEndian_64(chunkStop);
+ }
+
+ // store alignment chunk's start offset
+ // if its stop offset is larger than our 'minOffset'
+ if ( chunkStop >= minOffset )
+ offsets.push_back(chunkStart);
+ }
+
+ // 'pop' bin ID from candidate bins set
+ candidateBins.erase(candidateBinIter);
+
+ // quit if no more candidates
+ if ( candidateBins.empty() )
+ break;
+ }
+ }
+}
+
+uint64_t BamStandardIndex::CalculateMinOffset(const BaiReferenceSummary& refSummary,
+ const uint32_t& begin)
+{
+ // if no linear offsets exist, return 0
+ if ( refSummary.NumLinearOffsets == 0 )
+ return 0;
+
+ // if 'begin' starts beyond last linear offset, use the last linear offset as minimum
+ // else use the offset corresponding to the requested start position
+ const int shiftedBegin = begin>>BamStandardIndex::BAM_LIDX_SHIFT;
+ if ( shiftedBegin >= refSummary.NumLinearOffsets )
+ return LookupLinearOffset( refSummary, refSummary.NumLinearOffsets-1 );
+ else
+ return LookupLinearOffset( refSummary, shiftedBegin );
+}
+
+void BamStandardIndex::CheckBufferSize(char*& buffer,
+ unsigned int& bufferLength,
+ const unsigned int& requestedBytes)
+{
+ try {
+ if ( requestedBytes > bufferLength ) {
+ bufferLength = requestedBytes + 10;
+ delete[] buffer;
+ buffer = new char[bufferLength];
+ }
+ } catch ( std::bad_alloc& ) {
+ stringstream s("");
+ s << "out of memory when allocating " << requestedBytes << " bytes";
+ throw BamException("BamStandardIndex::CheckBufferSize", s.str());
+ }
+}
+
+void BamStandardIndex::CheckBufferSize(unsigned char*& buffer,
+ unsigned int& bufferLength,
+ const unsigned int& requestedBytes)
+{
+ try {
+ if ( requestedBytes > bufferLength ) {
+ bufferLength = requestedBytes + 10;
+ delete[] buffer;
+ buffer = new unsigned char[bufferLength];
+ }
+ } catch ( std::bad_alloc& ) {
+ stringstream s("");
+ s << "out of memory when allocating " << requestedBytes << " bytes";
+ throw BamException("BamStandardIndex::CheckBufferSize", s.str());
+ }
+}
+
+void BamStandardIndex::CheckMagicNumber(void) {
+
+ // check 'magic number' to see if file is BAI index
+ char magic[4];
+ const int64_t numBytesRead = m_resources.Device->Read(magic, sizeof(magic));
+ if ( numBytesRead != 4 )
+ throw BamException("BamStandardIndex::CheckMagicNumber", "could not read BAI magic number");
+
+ // compare to expected value
+ if ( strncmp(magic, BamStandardIndex::BAI_MAGIC, 4) != 0 )
+ throw BamException("BamStandardIndex::CheckMagicNumber", "invalid BAI magic number");
+}
+
+void BamStandardIndex::ClearReferenceEntry(BaiReferenceEntry& refEntry) {
+ refEntry.ID = -1;
+ refEntry.Bins.clear();
+ refEntry.LinearOffsets.clear();
+}
+
+void BamStandardIndex::CloseFile(void) {
+
+ // close file stream
+ if ( IsDeviceOpen() ) {
+ m_resources.Device->Close();
+ delete m_resources.Device;
+ m_resources.Device = 0;
+ }
+
+ // clear index file summary data
+ m_indexFileSummary.clear();
+
+ // clean up I/O buffer
+ delete[] m_resources.Buffer;
+ m_resources.Buffer = 0;
+ m_bufferLength = 0;
+}
+
+// builds index from associated BAM file & writes out to index file
+bool BamStandardIndex::Create(void) {
+
+ // skip if BamReader is invalid or not open
+ if ( m_reader == 0 || !m_reader->IsOpen() ) {
+ SetErrorString("BamStandardIndex::Create", "could not create index: reader is not open");
+ return false;
+ }
+
+ // rewind BamReader
+ if ( !m_reader->Rewind() ) {
+ const string readerError = m_reader->GetErrorString();
+ const string message = "could not create index: \n\t" + readerError;
+ SetErrorString("BamStandardIndex::Create", message);
+ return false;
+ }
+
+ try {
+
+ // open new index file (read & write)
+ string indexFilename = m_reader->Filename() + Extension();
+ OpenFile(indexFilename, IBamIODevice::ReadWrite);
+
+ // initialize BaiFileSummary with number of references
+ const int& numReferences = m_reader->GetReferenceCount();
+ ReserveForSummary(numReferences);
+
+ // initialize output file
+ WriteHeader();
+
+ // set up bin, ID, offset, & coordinate markers
+ const uint32_t defaultValue = 0xffffffffu;
+ uint32_t currentBin = defaultValue;
+ uint32_t lastBin = defaultValue;
+ int32_t currentRefID = defaultValue;
+ int32_t lastRefID = defaultValue;
+ uint64_t currentOffset = (uint64_t)m_reader->Tell();
+ uint64_t lastOffset = currentOffset;
+ int32_t lastPosition = defaultValue;
+
+ // iterate through alignments in BAM file
+ BamAlignment al;
+ BaiReferenceEntry refEntry;
+ while ( m_reader->LoadNextAlignment(al) ) {
+
+ // changed to new reference
+ if ( lastRefID != al.RefID ) {
+
+ // if not first reference, save previous reference data
+ if ( lastRefID != (int32_t)defaultValue ) {
+
+ SaveAlignmentChunkToBin(refEntry.Bins, currentBin, currentOffset, lastOffset);
+ WriteReferenceEntry(refEntry);
+ ClearReferenceEntry(refEntry);
+
+ // write any empty references between (but *NOT* including) lastRefID & al.RefID
+ for ( int i = lastRefID+1; i < al.RefID; ++i ) {
+ BaiReferenceEntry emptyEntry(i);
+ WriteReferenceEntry(emptyEntry);
+ }
+
+ // update bin markers
+ currentOffset = lastOffset;
+ currentBin = al.Bin;
+ lastBin = al.Bin;
+ currentRefID = al.RefID;
+ }
+
+ // otherwise, this is first pass
+ // be sure to write any empty references up to (but *NOT* including) current RefID
+ else {
+ for ( int i = 0; i < al.RefID; ++i ) {
+ BaiReferenceEntry emptyEntry(i);
+ WriteReferenceEntry(emptyEntry);
+ }
+ }
+
+ // update reference markers
+ refEntry.ID = al.RefID;
+ lastRefID = al.RefID;
+ lastBin = defaultValue;
+ }
+
+ // if lastPosition greater than current alignment position - file not sorted properly
+ else if ( lastPosition > al.Position ) {
+ stringstream s("");
+ s << "BAM file is not properly sorted by coordinate" << endl
+ << "Current alignment position: " << al.Position
+ << " < previous alignment position: " << lastPosition
+ << " on reference ID: " << al.RefID << endl;
+ SetErrorString("BamStandardIndex::Create", s.str());
+ return false;
+ }
+
+ // if alignment's ref ID is valid & its bin is not a 'leaf'
+ if ( (al.RefID >= 0) && (al.Bin < 4681) )
+ SaveLinearOffsetEntry(refEntry.LinearOffsets, al.Position, al.GetEndPosition(), lastOffset);
+
+ // changed to new BAI bin
+ if ( al.Bin != lastBin ) {
+
+ // if not first bin on reference, save previous bin data
+ if ( currentBin != defaultValue )
+ SaveAlignmentChunkToBin(refEntry.Bins, currentBin, currentOffset, lastOffset);
+
+ // update markers
+ currentOffset = lastOffset;
+ currentBin = al.Bin;
+ lastBin = al.Bin;
+ currentRefID = al.RefID;
+
+ // if invalid RefID, break out
+ if ( currentRefID < 0 )
+ break;
+ }
+
+ // make sure that current file pointer is beyond lastOffset
+ if ( m_reader->Tell() <= (int64_t)lastOffset ) {
+ SetErrorString("BamStandardIndex::Create", "calculating offsets failed");
+ return false;
+ }
+
+ // update lastOffset & lastPosition
+ lastOffset = m_reader->Tell();
+ lastPosition = al.Position;
+ }
+
+ // after finishing alignments, if any data was read, check:
+ if ( currentRefID >= 0 ) {
+
+ // store last alignment chunk to its bin, then write last reference entry with data
+ SaveAlignmentChunkToBin(refEntry.Bins, currentBin, currentOffset, lastOffset);
+ WriteReferenceEntry(refEntry);
+
+ // then write any empty references remaining at end of file
+ for ( int i = currentRefID+1; i < numReferences; ++i ) {
+ BaiReferenceEntry emptyEntry(i);
+ WriteReferenceEntry(emptyEntry);
+ }
+ }
+
+ } catch ( BamException& e) {
+ m_errorString = e.what();
+ return false;
+ }
+
+ // rewind BamReader
+ if ( !m_reader->Rewind() ) {
+ const string readerError = m_reader->GetErrorString();
+ const string message = "could not create index: \n\t" + readerError;
+ SetErrorString("BamStandardIndex::Create", message);
+ return false;
+ }
+
+ // return success
+ return true;
+}
+
+// returns format's file extension
+const string BamStandardIndex::Extension(void) {
+ return BamStandardIndex::BAI_EXTENSION;
+}
+
+void BamStandardIndex::GetOffset(const BamRegion& region, int64_t& offset, bool* hasAlignmentsInRegion) {
+
+ // cannot calculate offsets if unknown/invalid reference ID requested
+ if ( region.LeftRefID < 0 || region.LeftRefID >= (int)m_indexFileSummary.size() )
+ throw BamException("BamStandardIndex::GetOffset", "invalid reference ID requested");
+
+ // retrieve index summary for left bound reference
+ const BaiReferenceSummary& refSummary = m_indexFileSummary.at(region.LeftRefID);
+
+ // set up region boundaries based on actual BamReader data
+ uint32_t begin;
+ uint32_t end;
+ AdjustRegion(region, begin, end);
+
+ // retrieve all candidate bin IDs for region
+ set<uint16_t> candidateBins;
+ CalculateCandidateBins(begin, end, candidateBins);
+
+ // use reference's linear offsets to calculate the minimum offset
+ // that must be considered to find overlap
+ const uint64_t& minOffset = CalculateMinOffset(refSummary, begin);
+
+ // attempt to use reference summary, minOffset, & candidateBins to calculate offsets
+ // no data should not be error, just bail
+ vector<int64_t> offsets;
+ CalculateCandidateOffsets(refSummary, minOffset, candidateBins, offsets);
+ if ( offsets.empty() )
+ return;
+
+ // ensure that offsets are sorted before processing
+ sort( offsets.begin(), offsets.end() );
+
+ // binary search for an overlapping block (may not be first one though)
+ BamAlignment al;
+ typedef vector<int64_t>::const_iterator OffsetConstIterator;
+ OffsetConstIterator offsetFirst = offsets.begin();
+ OffsetConstIterator offsetIter = offsetFirst;
+ OffsetConstIterator offsetLast = offsets.end();
+ iterator_traits<OffsetConstIterator>::difference_type count = distance(offsetFirst, offsetLast);
+ iterator_traits<OffsetConstIterator>::difference_type step;
+ while ( count > 0 ) {
+ offsetIter = offsetFirst;
+ step = count/2;
+ advance(offsetIter, step);
+
+ // attempt seek to candidate offset
+ const int64_t& candidateOffset = (*offsetIter);
+ if ( !m_reader->Seek(candidateOffset) ) {
+ const string readerError = m_reader->GetErrorString();
+ const string message = "could not seek in BAM file: \n\t" + readerError;
+ throw BamException("BamToolsIndex::GetOffset", message);
+ }
+
+ // load first available alignment, setting flag to true if data exists
+ *hasAlignmentsInRegion = m_reader->LoadNextAlignment(al);
+
+ // check alignment against region
+ if ( al.GetEndPosition() <= region.LeftPosition ) {
+ offsetFirst = ++offsetIter;
+ count -= step+1;
+ } else count = step;
+ }
+
+ // step back to the offset before the 'current offset' (to make sure we cover overlaps)
+ if ( offsetIter != offsets.begin() )
+ --offsetIter;
+ offset = (*offsetIter);
+}
+
+// returns whether reference has alignments or no
+bool BamStandardIndex::HasAlignments(const int& referenceID) const {
+ if ( referenceID < 0 || referenceID >= (int)m_indexFileSummary.size() )
+ return false;
+ const BaiReferenceSummary& refSummary = m_indexFileSummary.at(referenceID);
+ return ( refSummary.NumBins > 0 );
+}
+
+bool BamStandardIndex::IsDeviceOpen(void) const {
+ if ( m_resources.Device == 0 )
+ return false;
+ return m_resources.Device->IsOpen();
+}
+
+// attempts to use index data to jump to @region, returns success/fail
+// a "successful" jump indicates no error, but not whether this region has data
+// * thus, the method sets a flag to indicate whether there are alignments
+// available after the jump position
+bool BamStandardIndex::Jump(const BamRegion& region, bool* hasAlignmentsInRegion) {
+
+ // clear out flag
+ *hasAlignmentsInRegion = false;
+
+ // skip if invalid reader or not open
+ if ( m_reader == 0 || !m_reader->IsOpen() ) {
+ SetErrorString("BamStandardIndex::Jump", "could not jump: reader is not open");
+ return false;
+ }
+
+ // calculate nearest offset to jump to
+ int64_t offset;
+ try {
+ GetOffset(region, offset, hasAlignmentsInRegion);
+ } catch ( BamException& e ) {
+ m_errorString = e.what();
+ return false;
+ }
+
+ // if region has alignments, return success/fail of seeking there
+ if ( *hasAlignmentsInRegion )
+ return m_reader->Seek(offset);
+
+ // otherwise, simply return true (but hasAlignmentsInRegion flag has been set to false)
+ // (this is OK, BamReader will check this flag before trying to load data)
+ return true;
+}
+
+// loads existing data from file into memory
+bool BamStandardIndex::Load(const std::string& filename) {
+
+ try {
+
+ // attempt to open file (read-only)
+ OpenFile(filename, IBamIODevice::ReadOnly);
+
+ // validate format
+ CheckMagicNumber();
+
+ // load in-memory summary of index data
+ SummarizeIndexFile();
+
+ // return success
+ return true;
+
+ } catch ( BamException& e ) {
+ m_errorString = e.what();
+ return false;
+ }
+}
+
+uint64_t BamStandardIndex::LookupLinearOffset(const BaiReferenceSummary& refSummary, const int& index) {
+
+ // attempt seek to proper index file position
+ const int64_t linearOffsetFilePosition = (int64_t)refSummary.FirstLinearOffsetFilePosition +
+ index*BamStandardIndex::SIZEOF_LINEAROFFSET;
+ Seek(linearOffsetFilePosition, SEEK_SET);
+
+ // read linear offset from BAI file
+ uint64_t linearOffset;
+ ReadLinearOffset(linearOffset);
+ return linearOffset;
+}
+
+void BamStandardIndex::MergeAlignmentChunks(BaiAlignmentChunkVector& chunks) {
+
+ // skip if chunks are empty, nothing to merge
+ if ( chunks.empty() )
+ return;
+
+ // set up merged alignment chunk container
+ BaiAlignmentChunkVector mergedChunks;
+ mergedChunks.push_back( chunks[0] );
+
+ // iterate over chunks
+ int i = 0;
+ BaiAlignmentChunkVector::iterator chunkIter = chunks.begin();
+ BaiAlignmentChunkVector::iterator chunkEnd = chunks.end();
+ for ( ++chunkIter; chunkIter != chunkEnd; ++chunkIter) {
+
+ // get 'currentMergeChunk' based on numeric index
+ BaiAlignmentChunk& currentMergeChunk = mergedChunks[i];
+
+ // get sourceChunk based on source vector iterator
+ BaiAlignmentChunk& sourceChunk = (*chunkIter);
+
+ // if currentMergeChunk ends where sourceChunk starts, then merge the two
+ if ( currentMergeChunk.Stop>>16 == sourceChunk.Start>>16 )
+ currentMergeChunk.Stop = sourceChunk.Stop;
+
+ // otherwise
+ else {
+ // append sourceChunk after currentMergeChunk
+ mergedChunks.push_back(sourceChunk);
+
+ // update i, so the next iteration will consider the
+ // recently-appended sourceChunk as new mergeChunk candidate
+ ++i;
+ }
+ }
+
+ // saved newly-merged chunks into (parameter) chunks
+ chunks = mergedChunks;
+}
+
+void BamStandardIndex::OpenFile(const std::string& filename, IBamIODevice::OpenMode mode) {
+
+ // make sure any previous index file is closed
+ CloseFile();
+
+ m_resources.Device = BamDeviceFactory::CreateDevice(filename);
+ if ( m_resources.Device == 0 ) {
+ const string message = string("could not open file: ") + filename;
+ throw BamException("BamStandardIndex::OpenFile", message);
+ }
+
+ // attempt to open file
+ m_resources.Device->Open(mode);
+ if ( !IsDeviceOpen() ) {
+ const string message = string("could not open file: ") + filename;
+ throw BamException("BamStandardIndex::OpenFile", message);
+ }
+}
+
+void BamStandardIndex::ReadBinID(uint32_t& binId) {
+ const int64_t numBytesRead = m_resources.Device->Read((char*)&binId, sizeof(binId));
+ if ( m_isBigEndian ) SwapEndian_32(binId);
+ if ( numBytesRead != sizeof(binId) )
+ throw BamException("BamStandardIndex::ReadBinID", "could not read BAI bin ID");
+}
+
+void BamStandardIndex::ReadBinIntoBuffer(uint32_t& binId, int32_t& numAlignmentChunks) {
+
+ // read bin header
+ ReadBinID(binId);
+ ReadNumAlignmentChunks(numAlignmentChunks);
+
+ // read bin contents
+ const unsigned int bytesRequested = numAlignmentChunks*BamStandardIndex::SIZEOF_ALIGNMENTCHUNK;
+ ReadIntoBuffer(bytesRequested);
+}
+
+void BamStandardIndex::ReadIntoBuffer(const unsigned int& bytesRequested) {
+
+ // ensure that our buffer is big enough for request
+ BamStandardIndex::CheckBufferSize(m_resources.Buffer, m_bufferLength, bytesRequested);
+
+ // read from BAI file stream
+ const int64_t bytesRead = m_resources.Device->Read(m_resources.Buffer, bytesRequested);
+ if ( bytesRead != (int64_t)bytesRequested ) {
+ stringstream s("");
+ s << "expected to read: " << bytesRequested << " bytes, "
+ << "but instead read: " << bytesRead;
+ throw BamException("BamStandardIndex::ReadIntoBuffer", s.str());
+ }
+}
+
+void BamStandardIndex::ReadLinearOffset(uint64_t& linearOffset) {
+ const int64_t numBytesRead = m_resources.Device->Read((char*)&linearOffset, sizeof(linearOffset));
+ if ( m_isBigEndian ) SwapEndian_64(linearOffset);
+ if ( numBytesRead != sizeof(linearOffset) )
+ throw BamException("BamStandardIndex::ReadLinearOffset", "could not read BAI linear offset");
+}
+
+void BamStandardIndex::ReadNumAlignmentChunks(int& numAlignmentChunks) {
+ const int64_t numBytesRead = m_resources.Device->Read((char*)&numAlignmentChunks, sizeof(numAlignmentChunks));
+ if ( m_isBigEndian ) SwapEndian_32(numAlignmentChunks);
+ if ( numBytesRead != sizeof(numAlignmentChunks) )
+ throw BamException("BamStandardIndex::ReadNumAlignmentChunks", "could not read BAI chunk count");
+}
+
+void BamStandardIndex::ReadNumBins(int& numBins) {
+ const int64_t numBytesRead = m_resources.Device->Read((char*)&numBins, sizeof(numBins));
+ if ( m_isBigEndian ) SwapEndian_32(numBins);
+ if ( numBytesRead != sizeof(numBins) )
+ throw BamException("BamStandardIndex::ReadNumBins", "could not read BAI bin count");
+}
+
+void BamStandardIndex::ReadNumLinearOffsets(int& numLinearOffsets) {
+ const int64_t numBytesRead = m_resources.Device->Read((char*)&numLinearOffsets, sizeof(numLinearOffsets));
+ if ( m_isBigEndian ) SwapEndian_32(numLinearOffsets);
+ if ( numBytesRead != sizeof(numLinearOffsets) )
+ throw BamException("BamStandardIndex::ReadNumAlignmentChunks", "could not read BAI linear offset count");
+}
+
+void BamStandardIndex::ReadNumReferences(int& numReferences) {
+ const int64_t numBytesRead = m_resources.Device->Read((char*)&numReferences, sizeof(numReferences));
+ if ( m_isBigEndian ) SwapEndian_32(numReferences);
+ if ( numBytesRead != sizeof(numReferences) )
+ throw BamException("BamStandardIndex::ReadNumReferences", "could not read reference count");
+}
+
+void BamStandardIndex::ReserveForSummary(const int& numReferences) {
+ m_indexFileSummary.clear();
+ m_indexFileSummary.assign( numReferences, BaiReferenceSummary() );
+}
+
+void BamStandardIndex::SaveAlignmentChunkToBin(BaiBinMap& binMap,
+ const uint32_t& currentBin,
+ const uint64_t& currentOffset,
+ const uint64_t& lastOffset)
+{
+ // create new alignment chunk
+ BaiAlignmentChunk newChunk(currentOffset, lastOffset);
+
+ // if no entry exists yet for this bin, create one and store alignment chunk
+ BaiBinMap::iterator binIter = binMap.find(currentBin);
+ if ( binIter == binMap.end() ) {
+ BaiAlignmentChunkVector newChunks;
+ newChunks.push_back(newChunk);
+ binMap.insert( pair<uint32_t, BaiAlignmentChunkVector>(currentBin, newChunks));
+ }
+
+ // otherwise, just append alignment chunk
+ else {
+ BaiAlignmentChunkVector& binChunks = (*binIter).second;
+ binChunks.push_back( newChunk );
+ }
+}
+
+void BamStandardIndex::SaveBinsSummary(const int& refId, const int& numBins) {
+ BaiReferenceSummary& refSummary = m_indexFileSummary.at(refId);
+ refSummary.NumBins = numBins;
+ refSummary.FirstBinFilePosition = Tell();
+}
+
+void BamStandardIndex::SaveLinearOffsetEntry(BaiLinearOffsetVector& offsets,
+ const int& alignmentStartPosition,
+ const int& alignmentStopPosition,
+ const uint64_t& lastOffset)
+{
+ // get converted offsets
+ const int beginOffset = alignmentStartPosition >> BamStandardIndex::BAM_LIDX_SHIFT;
+ const int endOffset = (alignmentStopPosition - 1) >> BamStandardIndex::BAM_LIDX_SHIFT;
+
+ // resize vector if necessary
+ int oldSize = offsets.size();
+ int newSize = endOffset + 1;
+ if ( oldSize < newSize )
+ offsets.resize(newSize, 0);
+
+ // store offset
+ for( int i = beginOffset + 1; i <= endOffset; ++i ) {
+ if ( offsets[i] == 0 )
+ offsets[i] = lastOffset;
+ }
+}
+
+void BamStandardIndex::SaveLinearOffsetsSummary(const int& refId, const int& numLinearOffsets) {
+ BaiReferenceSummary& refSummary = m_indexFileSummary.at(refId);
+ refSummary.NumLinearOffsets = numLinearOffsets;
+ refSummary.FirstLinearOffsetFilePosition = Tell();
+}
+
+// seek to position in index file stream
+void BamStandardIndex::Seek(const int64_t& position, const int origin) {
+ if ( !m_resources.Device->Seek(position, origin) )
+ throw BamException("BamStandardIndex::Seek", "could not seek in BAI file");
+}
+
+void BamStandardIndex::SkipBins(const int& numBins) {
+ uint32_t binId;
+ int32_t numAlignmentChunks;
+ for (int i = 0; i < numBins; ++i)
+ ReadBinIntoBuffer(binId, numAlignmentChunks); // results & buffer ignored
+}
+
+void BamStandardIndex::SkipLinearOffsets(const int& numLinearOffsets) {
+ const unsigned int bytesRequested = numLinearOffsets*BamStandardIndex::SIZEOF_LINEAROFFSET;
+ ReadIntoBuffer(bytesRequested);
+}
+
+void BamStandardIndex::SortLinearOffsets(BaiLinearOffsetVector& linearOffsets) {
+ sort( linearOffsets.begin(), linearOffsets.end() );
+}
+
+void BamStandardIndex::SummarizeBins(BaiReferenceSummary& refSummary) {
+
+ // load number of bins
+ int numBins;
+ ReadNumBins(numBins);
+
+ // store bins summary for this reference
+ refSummary.NumBins = numBins;
+ refSummary.FirstBinFilePosition = Tell();
+
+ // skip this reference's bins
+ SkipBins(numBins);
+}
+
+void BamStandardIndex::SummarizeIndexFile(void) {
+
+ // load number of reference sequences
+ int numReferences;
+ ReadNumReferences(numReferences);
+
+ // initialize file summary data
+ ReserveForSummary(numReferences);
+
+ // iterate over reference entries
+ BaiFileSummary::iterator summaryIter = m_indexFileSummary.begin();
+ BaiFileSummary::iterator summaryEnd = m_indexFileSummary.end();
+ for ( int i = 0; summaryIter != summaryEnd; ++summaryIter, ++i )
+ SummarizeReference(*summaryIter);
+}
+
+void BamStandardIndex::SummarizeLinearOffsets(BaiReferenceSummary& refSummary) {
+
+ // load number of linear offsets
+ int numLinearOffsets;
+ ReadNumLinearOffsets(numLinearOffsets);
+
+ // store bin summary data for this reference
+ refSummary.NumLinearOffsets = numLinearOffsets;
+ refSummary.FirstLinearOffsetFilePosition = Tell();
+
+ // skip linear offsets in index file
+ SkipLinearOffsets(numLinearOffsets);
+}
+
+void BamStandardIndex::SummarizeReference(BaiReferenceSummary& refSummary) {
+ SummarizeBins(refSummary);
+ SummarizeLinearOffsets(refSummary);
+}
+
+// return position of file pointer in index file stream
+int64_t BamStandardIndex::Tell(void) const {
+ return m_resources.Device->Tell();
+}
+
+void BamStandardIndex::WriteAlignmentChunk(const BaiAlignmentChunk& chunk) {
+
+ // localize alignment chunk offsets
+ uint64_t start = chunk.Start;
+ uint64_t stop = chunk.Stop;
+
+ // swap endian-ness if necessary
+ if ( m_isBigEndian ) {
+ SwapEndian_64(start);
+ SwapEndian_64(stop);
+ }
+
+ // write to index file
+ int64_t numBytesWritten = 0;
+ numBytesWritten += m_resources.Device->Write((const char*)&start, sizeof(start));
+ numBytesWritten += m_resources.Device->Write((const char*)&stop, sizeof(stop));
+ if ( numBytesWritten != (sizeof(start)+sizeof(stop)) )
+ throw BamException("BamStandardIndex::WriteAlignmentChunk", "could not write BAI alignment chunk");
+}
+
+void BamStandardIndex::WriteAlignmentChunks(BaiAlignmentChunkVector& chunks) {
+
+ // make sure chunks are merged (simplified) before writing & saving summary
+ MergeAlignmentChunks(chunks);
+
+ // write chunks
+ int32_t chunkCount = chunks.size();
+ if ( m_isBigEndian ) SwapEndian_32(chunkCount);
+ const int64_t numBytesWritten = m_resources.Device->Write((const char*)&chunkCount, sizeof(chunkCount));
+ if ( numBytesWritten != sizeof(chunkCount) )
+ throw BamException("BamStandardIndex::WriteAlignmentChunks", "could not write BAI chunk count");
+
+ // iterate over chunks
+ BaiAlignmentChunkVector::const_iterator chunkIter = chunks.begin();
+ BaiAlignmentChunkVector::const_iterator chunkEnd = chunks.end();
+ for ( ; chunkIter != chunkEnd; ++chunkIter )
+ WriteAlignmentChunk( (*chunkIter) );
+}
+
+void BamStandardIndex::WriteBin(const uint32_t& binId, BaiAlignmentChunkVector& chunks) {
+
+ // write BAM bin ID
+ uint32_t binKey = binId;
+ if ( m_isBigEndian ) SwapEndian_32(binKey);
+ const int64_t numBytesWritten = m_resources.Device->Write((const char*)&binKey, sizeof(binKey));
+ if ( numBytesWritten != sizeof(binKey) )
+ throw BamException("BamStandardIndex::WriteBin", "could not write bin ID");
+
+ // write bin's alignment chunks
+ WriteAlignmentChunks(chunks);
+}
+
+void BamStandardIndex::WriteBins(const int& refId, BaiBinMap& bins) {
+
+ // write number of bins
+ int32_t binCount = bins.size();
+ if ( m_isBigEndian ) SwapEndian_32(binCount);
+ const int64_t numBytesWritten = m_resources.Device->Write((const char*)&binCount, sizeof(binCount));
+ if ( numBytesWritten != sizeof(binCount) )
+ throw BamException("BamStandardIndex::WriteBins", "could not write bin count");
+
+ // save summary for reference's bins
+ SaveBinsSummary(refId, bins.size());
+
+ // iterate over bins
+ BaiBinMap::iterator binIter = bins.begin();
+ BaiBinMap::iterator binEnd = bins.end();
+ for ( ; binIter != binEnd; ++binIter )
+ WriteBin( (*binIter).first, (*binIter).second );
+}
+
+void BamStandardIndex::WriteHeader(void) {
+
+ int64_t numBytesWritten = 0;
+
+ // write magic number
+ numBytesWritten += m_resources.Device->Write(BamStandardIndex::BAI_MAGIC, 4);
+
+ // write number of reference sequences
+ int32_t numReferences = m_indexFileSummary.size();
+ if ( m_isBigEndian ) SwapEndian_32(numReferences);
+ numBytesWritten += m_resources.Device->Write((const char*)&numReferences, sizeof(numReferences));
+
+ if ( numBytesWritten != sizeof(numReferences)+4 )
+ throw BamException("BamStandardIndex::WriteHeader", "could not write BAI header");
+}
+
+void BamStandardIndex::WriteLinearOffsets(const int& refId, BaiLinearOffsetVector& linearOffsets) {
+
+ // make sure linear offsets are sorted before writing & saving summary
+ SortLinearOffsets(linearOffsets);
+
+ int64_t numBytesWritten = 0;
+
+ // write number of linear offsets
+ int32_t offsetCount = linearOffsets.size();
+ if ( m_isBigEndian ) SwapEndian_32(offsetCount);
+ numBytesWritten += m_resources.Device->Write((const char*)&offsetCount, sizeof(offsetCount));
+
+ // save summary for reference's linear offsets
+ SaveLinearOffsetsSummary(refId, linearOffsets.size());
+
+ // iterate over linear offsets
+ BaiLinearOffsetVector::const_iterator offsetIter = linearOffsets.begin();
+ BaiLinearOffsetVector::const_iterator offsetEnd = linearOffsets.end();
+ for ( ; offsetIter != offsetEnd; ++offsetIter ) {
+
+ // write linear offset
+ uint64_t linearOffset = (*offsetIter);
+ if ( m_isBigEndian ) SwapEndian_64(linearOffset);
+ numBytesWritten += m_resources.Device->Write((const char*)&linearOffset, sizeof(linearOffset));
+ }
+
+ if ( numBytesWritten != (sizeof(offsetCount) + linearOffsets.size()*sizeof(uint64_t)) )
+ throw BamException("BamStandardIndex::WriteLinearOffsets", "could not write BAI linear offsets");
+}
+
+void BamStandardIndex::WriteReferenceEntry(BaiReferenceEntry& refEntry) {
+ WriteBins(refEntry.ID, refEntry.Bins);
+ WriteLinearOffsets(refEntry.ID, refEntry.LinearOffsets);
+}
--- /dev/null
+// ***************************************************************************
+// BamStandardIndex.h (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 November 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides index operations for the standardized BAM index format (".bai")
+// ***************************************************************************
+
+#ifndef BAM_STANDARD_INDEX_FORMAT_H
+#define BAM_STANDARD_INDEX_FORMAT_H
+
+// -------------
+// W A R N I N G
+// -------------
+//
+// This file is not part of the BamTools API. It exists purely as an
+// implementation detail. This header file may change from version to
+// version without notice, or even be removed.
+//
+// We mean it.
+
+#include "api/BamAux.h"
+#include "api/BamIndex.h"
+#include "api/IBamIODevice.h"
+#include <map>
+#include <set>
+#include <string>
+#include <vector>
+
+namespace BamTools {
+namespace Internal {
+
+// -----------------------------------------------------------------------------
+// BamStandardIndex data structures
+
+// defines start and end of a contiguous run of alignments
+struct BaiAlignmentChunk {
+
+ // data members
+ uint64_t Start;
+ uint64_t Stop;
+
+ // constructor
+ BaiAlignmentChunk(const uint64_t& start = 0,
+ const uint64_t& stop = 0)
+ : Start(start)
+ , Stop(stop)
+ { }
+};
+
+// comparison operator (for sorting)
+inline
+bool operator<(const BaiAlignmentChunk& lhs, const BaiAlignmentChunk& rhs) {
+ return lhs.Start < rhs.Start;
+}
+
+// convenience typedef for a list of all alignment 'chunks' in a BAI bin
+typedef std::vector<BaiAlignmentChunk> BaiAlignmentChunkVector;
+
+// convenience typedef for a map of all BAI bins in a reference (ID => chunks)
+typedef std::map<uint32_t, BaiAlignmentChunkVector> BaiBinMap;
+
+// convenience typedef for a list of all 'linear offsets' in a reference
+typedef std::vector<uint64_t> BaiLinearOffsetVector;
+
+// contains all fields necessary for building, loading, & writing
+// full BAI index data for a single reference
+struct BaiReferenceEntry {
+
+ // data members
+ int32_t ID;
+ BaiBinMap Bins;
+ BaiLinearOffsetVector LinearOffsets;
+
+ // ctor
+ BaiReferenceEntry(const int32_t& id = -1)
+ : ID(id)
+ { }
+};
+
+// provides (persistent) summary of BaiReferenceEntry's index data
+struct BaiReferenceSummary {
+
+ // data members
+ int NumBins;
+ int NumLinearOffsets;
+ uint64_t FirstBinFilePosition;
+ uint64_t FirstLinearOffsetFilePosition;
+
+ // ctor
+ BaiReferenceSummary(void)
+ : NumBins(0)
+ , NumLinearOffsets(0)
+ , FirstBinFilePosition(0)
+ , FirstLinearOffsetFilePosition(0)
+ { }
+};
+
+// convenience typedef for describing a full BAI index file summary
+typedef std::vector<BaiReferenceSummary> BaiFileSummary;
+
+// end BamStandardIndex data structures
+// -----------------------------------------------------------------------------
+
+class BamStandardIndex : public BamIndex {
+
+ // ctor & dtor
+ public:
+ BamStandardIndex(Internal::BamReaderPrivate* reader);
+ ~BamStandardIndex(void);
+
+ // BamIndex implementation
+ public:
+ // builds index from associated BAM file & writes out to index file
+ bool Create(void);
+ // returns whether reference has alignments or no
+ bool HasAlignments(const int& referenceID) const;
+ // attempts to use index data to jump to @region, returns success/fail
+ // a "successful" jump indicates no error, but not whether this region has data
+ // * thus, the method sets a flag to indicate whether there are alignments
+ // available after the jump position
+ bool Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion);
+ // loads existing data from file into memory
+ bool Load(const std::string& filename);
+ BamIndex::IndexType Type(void) const { return BamIndex::STANDARD; }
+ public:
+ // returns format's file extension
+ static const std::string Extension(void);
+
+ // internal methods
+ private:
+
+ // index file ops
+ void CheckMagicNumber(void);
+ void CloseFile(void);
+ bool IsDeviceOpen(void) const;
+ void OpenFile(const std::string& filename, IBamIODevice::OpenMode mode);
+ void Seek(const int64_t& position, const int origin);
+ int64_t Tell(void) const;
+
+ // BAI index building methods
+ void ClearReferenceEntry(BaiReferenceEntry& refEntry);
+ void SaveAlignmentChunkToBin(BaiBinMap& binMap,
+ const uint32_t& currentBin,
+ const uint64_t& currentOffset,
+ const uint64_t& lastOffset);
+ void SaveLinearOffsetEntry(BaiLinearOffsetVector& offsets,
+ const int& alignmentStartPosition,
+ const int& alignmentStopPosition,
+ const uint64_t& lastOffset);
+
+ // random-access methods
+ void AdjustRegion(const BamRegion& region, uint32_t& begin, uint32_t& end);
+ void CalculateCandidateBins(const uint32_t& begin,
+ const uint32_t& end,
+ std::set<uint16_t>& candidateBins);
+ void CalculateCandidateOffsets(const BaiReferenceSummary& refSummary,
+ const uint64_t& minOffset,
+ std::set<uint16_t>& candidateBins,
+ std::vector<int64_t>& offsets);
+ uint64_t CalculateMinOffset(const BaiReferenceSummary& refSummary, const uint32_t& begin);
+ void GetOffset(const BamRegion& region, int64_t& offset, bool* hasAlignmentsInRegion);
+ uint64_t LookupLinearOffset(const BaiReferenceSummary& refSummary, const int& index);
+
+ // BAI summary (create/load) methods
+ void ReserveForSummary(const int& numReferences);
+ void SaveBinsSummary(const int& refId, const int& numBins);
+ void SaveLinearOffsetsSummary(const int& refId, const int& numLinearOffsets);
+ void SkipBins(const int& numBins);
+ void SkipLinearOffsets(const int& numLinearOffsets);
+ void SummarizeBins(BaiReferenceSummary& refSummary);
+ void SummarizeIndexFile(void);
+ void SummarizeLinearOffsets(BaiReferenceSummary& refSummary);
+ void SummarizeReference(BaiReferenceSummary& refSummary);
+
+ // BAI full index input methods
+ void ReadBinID(uint32_t& binId);
+ void ReadBinIntoBuffer(uint32_t& binId, int32_t& numAlignmentChunks);
+ void ReadIntoBuffer(const unsigned int& bytesRequested);
+ void ReadLinearOffset(uint64_t& linearOffset);
+ void ReadNumAlignmentChunks(int& numAlignmentChunks);
+ void ReadNumBins(int& numBins);
+ void ReadNumLinearOffsets(int& numLinearOffsets);
+ void ReadNumReferences(int& numReferences);
+
+ // BAI full index output methods
+ void MergeAlignmentChunks(BaiAlignmentChunkVector& chunks);
+ void SortLinearOffsets(BaiLinearOffsetVector& linearOffsets);
+ void WriteAlignmentChunk(const BaiAlignmentChunk& chunk);
+ void WriteAlignmentChunks(BaiAlignmentChunkVector& chunks);
+ void WriteBin(const uint32_t& binId, BaiAlignmentChunkVector& chunks);
+ void WriteBins(const int& refId, BaiBinMap& bins);
+ void WriteHeader(void);
+ void WriteLinearOffsets(const int& refId, BaiLinearOffsetVector& linearOffsets);
+ void WriteReferenceEntry(BaiReferenceEntry& refEntry);
+
+ // data members
+ private:
+ bool m_isBigEndian;
+ BaiFileSummary m_indexFileSummary;
+
+ // our input buffer
+ unsigned int m_bufferLength;
+ struct RaiiWrapper {
+ IBamIODevice* Device;
+ char* Buffer;
+ RaiiWrapper(void);
+ ~RaiiWrapper(void);
+ };
+ RaiiWrapper m_resources;
+
+ // static methods
+ private:
+ // checks if the buffer is large enough to accomodate the requested size
+ static void CheckBufferSize(char*& buffer,
+ unsigned int& bufferLength,
+ const unsigned int& requestedBytes);
+ // checks if the buffer is large enough to accomodate the requested size
+ static void CheckBufferSize(unsigned char*& buffer,
+ unsigned int& bufferLength,
+ const unsigned int& requestedBytes);
+ // static constants
+ private:
+ static const int MAX_BIN;
+ static const int BAM_LIDX_SHIFT;
+ static const std::string BAI_EXTENSION;
+ static const char* const BAI_MAGIC;
+ static const int SIZEOF_ALIGNMENTCHUNK;
+ static const int SIZEOF_BINCORE;
+ static const int SIZEOF_LINEAROFFSET;
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // BAM_STANDARD_INDEX_FORMAT_H
--- /dev/null
+// ***************************************************************************
+// BamToolsIndex.cpp (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 November 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides index operations for the BamTools index format (".bti")
+// ***************************************************************************
+
+#include "api/BamAlignment.h"
+#include "api/internal/bam/BamReader_p.h"
+#include "api/internal/index/BamToolsIndex_p.h"
+#include "api/internal/io/BamDeviceFactory_p.h"
+#include "api/internal/io/BgzfStream_p.h"
+#include "api/internal/utils/BamException_p.h"
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <algorithm>
+#include <iostream>
+#include <iterator>
+#include <map>
+using namespace std;
+
+// --------------------------------
+// static BamToolsIndex constants
+// --------------------------------
+
+const uint32_t BamToolsIndex::DEFAULT_BLOCK_LENGTH = 1000;
+const string BamToolsIndex::BTI_EXTENSION = ".bti";
+const char* const BamToolsIndex::BTI_MAGIC = "BTI\1";
+const int BamToolsIndex::SIZEOF_BLOCK = sizeof(int32_t)*2 + sizeof(int64_t);
+
+// ----------------------------
+// RaiiWrapper implementation
+// ----------------------------
+
+BamToolsIndex::RaiiWrapper::RaiiWrapper(void)
+ : Device(0)
+{ }
+
+BamToolsIndex::RaiiWrapper::~RaiiWrapper(void) {
+ if ( Device ) {
+ Device->Close();
+ delete Device;
+ Device = 0;
+ }
+}
+
+// ------------------------------
+// BamToolsIndex implementation
+// ------------------------------
+
+// ctor
+BamToolsIndex::BamToolsIndex(Internal::BamReaderPrivate* reader)
+ : BamIndex(reader)
+ , m_blockSize(BamToolsIndex::DEFAULT_BLOCK_LENGTH)
+ , m_inputVersion(0)
+ , m_outputVersion(BTI_2_0) // latest version - used for writing new index files
+{
+ m_isBigEndian = BamTools::SystemIsBigEndian();
+}
+
+// dtor
+BamToolsIndex::~BamToolsIndex(void) {
+ CloseFile();
+}
+
+void BamToolsIndex::CheckMagicNumber(void) {
+
+ // read magic number
+ char magic[4];
+ const int64_t numBytesRead = m_resources.Device->Read(magic, 4);
+ if ( numBytesRead != 4 )
+ throw BamException("BamToolsIndex::CheckMagicNumber", "could not read BTI magic number");
+
+ // validate expected magic number
+ if ( strncmp(magic, BamToolsIndex::BTI_MAGIC, 4) != 0 )
+ throw BamException("BamToolsIndex::CheckMagicNumber", "invalid BTI magic number");
+}
+
+// check index file version, return true if OK
+void BamToolsIndex::CheckVersion(void) {
+
+ // read version from file
+ const int64_t numBytesRead = m_resources.Device->Read((char*)&m_inputVersion, sizeof(m_inputVersion));
+ if ( numBytesRead != sizeof(m_inputVersion) )
+ throw BamException("BamToolsIndex::CheckVersion", "could not read format version");
+ if ( m_isBigEndian ) SwapEndian_32(m_inputVersion);
+
+ // if version is negative, or zero
+ if ( m_inputVersion <= 0 )
+ throw BamException("BamToolsIndex::CheckVersion", "invalid format version");
+
+ // if version is newer than can be supported by this version of bamtools
+ else if ( m_inputVersion > m_outputVersion ) {
+ const string message = "unsupported format: this index was created by a newer version of BamTools. "
+ "Update your local version of BamTools to use the index file.";
+ throw BamException("BamToolsIndex::CheckVersion", message);
+ }
+
+ // ------------------------------------------------------------------
+ // check for deprecated, unsupported versions
+ // (the format had to be modified to accomodate a particular bug fix)
+
+ // Version 2.0: introduced support for half-open intervals, instead of the old closed intervals
+ // respondBy: throwing exception - we're not going to try to handle the old BTI files.
+ else if ( (Version)m_inputVersion < BamToolsIndex::BTI_2_0 ) {
+ const string message = "unsupported format: this version of the index may not properly handle "
+ "coordinate intervals. Please run 'bamtools index -bti -in yourData.bam' "
+ "to generate an up-to-date, fixed BTI file.";
+ throw BamException("BamToolsIndex::CheckVersion", message);
+ }
+}
+
+void BamToolsIndex::ClearReferenceEntry(BtiReferenceEntry& refEntry) {
+ refEntry.ID = -1;
+ refEntry.Blocks.clear();
+}
+
+void BamToolsIndex::CloseFile(void) {
+ if ( IsDeviceOpen() ) {
+ m_resources.Device->Close();
+ delete m_resources.Device;
+ m_resources.Device = 0;
+ }
+ m_indexFileSummary.clear();
+}
+
+// builds index from associated BAM file & writes out to index file
+bool BamToolsIndex::Create(void) {
+
+ // skip if BamReader is invalid or not open
+ if ( m_reader == 0 || !m_reader->IsOpen() ) {
+ SetErrorString("BamToolsIndex::Create", "could not create index: reader is not open");
+ return false;
+ }
+
+ // rewind BamReader
+ if ( !m_reader->Rewind() ) {
+ const string readerError = m_reader->GetErrorString();
+ const string message = "could not create index: \n\t" + readerError;
+ SetErrorString("BamToolsIndex::Create", message);
+ return false;
+ }
+
+ try {
+ // open new index file (read & write)
+ const string indexFilename = m_reader->Filename() + Extension();
+ OpenFile(indexFilename, IBamIODevice::ReadWrite);
+
+ // initialize BtiFileSummary with number of references
+ const int& numReferences = m_reader->GetReferenceCount();
+ InitializeFileSummary(numReferences);
+
+ // intialize output file header
+ WriteHeader();
+
+ // index building markers
+ uint32_t currentBlockCount = 0;
+ int64_t currentAlignmentOffset = m_reader->Tell();
+ int32_t blockRefId = -1;
+ int32_t blockMaxEndPosition = -1;
+ int64_t blockStartOffset = currentAlignmentOffset;
+ int32_t blockStartPosition = -1;
+
+ // plow through alignments, storing index entries
+ BamAlignment al;
+ BtiReferenceEntry refEntry;
+ while ( m_reader->LoadNextAlignment(al) ) {
+
+ // if moved to new reference
+ if ( al.RefID != blockRefId ) {
+
+ // if first pass, check:
+ if ( currentBlockCount == 0 ) {
+
+ // write any empty references up to (but not including) al.RefID
+ for ( int i = 0; i < al.RefID; ++i )
+ WriteReferenceEntry( BtiReferenceEntry(i) );
+ }
+
+ // not first pass:
+ else {
+
+ // store previous BTI block data in reference entry
+ const BtiBlock block(blockMaxEndPosition, blockStartOffset, blockStartPosition);
+ refEntry.Blocks.push_back(block);
+
+ // write reference entry, then clear
+ WriteReferenceEntry(refEntry);
+ ClearReferenceEntry(refEntry);
+
+ // write any empty references between (but not including)
+ // the last blockRefID and current al.RefID
+ for ( int i = blockRefId+1; i < al.RefID; ++i )
+ WriteReferenceEntry( BtiReferenceEntry(i) );
+
+ // reset block count
+ currentBlockCount = 0;
+ }
+
+ // set ID for new reference entry
+ refEntry.ID = al.RefID;
+ }
+
+ // if beginning of block, update counters
+ if ( currentBlockCount == 0 ) {
+ blockRefId = al.RefID;
+ blockStartOffset = currentAlignmentOffset;
+ blockStartPosition = al.Position;
+ blockMaxEndPosition = al.GetEndPosition();
+ }
+
+ // increment block counter
+ ++currentBlockCount;
+
+ // check end position
+ const int32_t alignmentEndPosition = al.GetEndPosition();
+ if ( alignmentEndPosition > blockMaxEndPosition )
+ blockMaxEndPosition = alignmentEndPosition;
+
+ // if block is full, get offset for next block, reset currentBlockCount
+ if ( currentBlockCount == m_blockSize ) {
+
+ // store previous block data in reference entry
+ const BtiBlock block(blockMaxEndPosition, blockStartOffset, blockStartPosition);
+ refEntry.Blocks.push_back(block);
+
+ // update markers
+ blockStartOffset = m_reader->Tell();
+ currentBlockCount = 0;
+ }
+
+ // not the best name, but for the next iteration, this value will be the offset of the
+ // *current* alignment. this is necessary because we won't know if this next alignment
+ // is on a new reference until we actually read it
+ currentAlignmentOffset = m_reader->Tell();
+ }
+
+ // after finishing alignments, if any data was read, check:
+ if ( blockRefId >= 0 ) {
+
+ // store last BTI block data in reference entry
+ const BtiBlock block(blockMaxEndPosition, blockStartOffset, blockStartPosition);
+ refEntry.Blocks.push_back(block);
+
+ // write last reference entry, then clear
+ WriteReferenceEntry(refEntry);
+ ClearReferenceEntry(refEntry);
+
+ // then write any empty references remaining at end of file
+ for ( int i = blockRefId+1; i < numReferences; ++i )
+ WriteReferenceEntry( BtiReferenceEntry(i) );
+ }
+
+ } catch ( BamException& e ) {
+ m_errorString = e.what();
+ return false;
+ }
+
+ // rewind BamReader
+ if ( !m_reader->Rewind() ) {
+ const string readerError = m_reader->GetErrorString();
+ const string message = "could not create index: \n\t" + readerError;
+ SetErrorString("BamToolsIndex::Create", message);
+ return false;
+ }
+
+ // return success
+ return true;
+}
+
+// returns format's file extension
+const std::string BamToolsIndex::Extension(void) {
+ return BamToolsIndex::BTI_EXTENSION;
+}
+
+void BamToolsIndex::GetOffset(const BamRegion& region, int64_t& offset, bool* hasAlignmentsInRegion) {
+
+ // return false ref ID is not a valid index in file summary data
+ if ( region.LeftRefID < 0 || region.LeftRefID >= (int)m_indexFileSummary.size() )
+ throw BamException("BamToolsIndex::GetOffset", "invalid region requested");
+
+ // retrieve reference index data for left bound reference
+ BtiReferenceEntry refEntry(region.LeftRefID);
+ ReadReferenceEntry(refEntry);
+
+ // binary search for an overlapping block (may not be first one though)
+ bool found = false;
+ typedef BtiBlockVector::const_iterator BtiBlockConstIterator;
+ BtiBlockConstIterator blockFirst = refEntry.Blocks.begin();
+ BtiBlockConstIterator blockIter = blockFirst;
+ BtiBlockConstIterator blockLast = refEntry.Blocks.end();
+ iterator_traits<BtiBlockConstIterator>::difference_type count = distance(blockFirst, blockLast);
+ iterator_traits<BtiBlockConstIterator>::difference_type step;
+ while ( count > 0 ) {
+ blockIter = blockFirst;
+ step = count/2;
+ advance(blockIter, step);
+
+ const BtiBlock& block = (*blockIter);
+ if ( block.StartPosition <= region.RightPosition ) {
+ if ( block.MaxEndPosition > region.LeftPosition ) {
+ offset = block.StartOffset;
+ break;
+ }
+ blockFirst = ++blockIter;
+ count -= step+1;
+ }
+ else count = step;
+ }
+
+ // if we didn't search "off the end" of the blocks
+ if ( blockIter != blockLast ) {
+
+ // "walk back" until we've gone too far
+ while ( blockIter != blockFirst ) {
+ const BtiBlock& currentBlock = (*blockIter);
+
+ --blockIter;
+ const BtiBlock& previousBlock = (*blockIter);
+ if ( previousBlock.MaxEndPosition <= region.LeftPosition ) {
+ offset = currentBlock.StartOffset;
+ found = true;
+ break;
+ }
+ }
+
+ // if we walked all the way to first block, just return that and let the reader's
+ // region overlap parsing do the rest
+ if ( blockIter == blockFirst ) {
+ const BtiBlock& block = (*blockIter);
+ offset = block.StartOffset;
+ found = true;
+ }
+ }
+
+
+ // sets to false if blocks container is empty, or if no matching block could be found
+ *hasAlignmentsInRegion = found;
+}
+
+// returns whether reference has alignments or no
+bool BamToolsIndex::HasAlignments(const int& referenceID) const {
+ if ( referenceID < 0 || referenceID >= (int)m_indexFileSummary.size() )
+ return false;
+ const BtiReferenceSummary& refSummary = m_indexFileSummary.at(referenceID);
+ return ( refSummary.NumBlocks > 0 );
+}
+
+// pre-allocates space for each reference's summary data
+void BamToolsIndex::InitializeFileSummary(const int& numReferences) {
+ m_indexFileSummary.clear();
+ for ( int i = 0; i < numReferences; ++i )
+ m_indexFileSummary.push_back( BtiReferenceSummary() );
+}
+
+// returns true if the index stream is open
+bool BamToolsIndex::IsDeviceOpen(void) const {
+ if ( m_resources.Device == 0 )
+ return false;
+ return m_resources.Device->IsOpen();
+}
+
+// attempts to use index data to jump to @region, returns success/fail
+// a "successful" jump indicates no error, but not whether this region has data
+// * thus, the method sets a flag to indicate whether there are alignments
+// available after the jump position
+bool BamToolsIndex::Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion) {
+
+ // clear flag
+ *hasAlignmentsInRegion = false;
+
+ // skip if invalid reader or not open
+ if ( m_reader == 0 || !m_reader->IsOpen() ) {
+ SetErrorString("BamToolsIndex::Jump", "could not jump: reader is not open");
+ return false;
+ }
+
+ // make sure left-bound position is valid
+ const RefVector& references = m_reader->GetReferenceData();
+ if ( region.LeftPosition > references.at(region.LeftRefID).RefLength ) {
+ SetErrorString("BamToolsIndex::Jump", "could not create index: invalid region requested");
+ return false;
+ }
+
+ // calculate nearest offset to jump to
+ int64_t offset;
+ try {
+ GetOffset(region, offset, hasAlignmentsInRegion);
+ } catch ( BamException& e ) {
+ m_errorString = e.what();
+ return false;
+ }
+
+ // return success/failure of seek
+ return m_reader->Seek(offset);
+}
+
+// loads existing data from file into memory
+bool BamToolsIndex::Load(const std::string& filename) {
+
+ try {
+
+ // attempt to open file (read-only)
+ OpenFile(filename, IBamIODevice::ReadOnly);
+
+ // load metadata & generate in-memory summary
+ LoadHeader();
+ LoadFileSummary();
+
+ // return success
+ return true;
+
+ } catch ( BamException& e ) {
+ m_errorString = e.what();
+ return false;
+ }
+}
+
+void BamToolsIndex::LoadFileSummary(void) {
+
+ // load number of reference sequences
+ int numReferences;
+ LoadNumReferences(numReferences);
+
+ // initialize file summary data
+ InitializeFileSummary(numReferences);
+
+ // load summary for each reference
+ BtiFileSummary::iterator summaryIter = m_indexFileSummary.begin();
+ BtiFileSummary::iterator summaryEnd = m_indexFileSummary.end();
+ for ( ; summaryIter != summaryEnd; ++summaryIter )
+ LoadReferenceSummary(*summaryIter);
+}
+
+void BamToolsIndex::LoadHeader(void) {
+
+ // check BTI file metadata
+ CheckMagicNumber();
+ CheckVersion();
+
+ // use file's BTI block size to set member variable
+ const int64_t numBytesRead = m_resources.Device->Read((char*)&m_blockSize, sizeof(m_blockSize));
+ if ( m_isBigEndian ) SwapEndian_32(m_blockSize);
+ if ( numBytesRead != sizeof(m_blockSize) )
+ throw BamException("BamToolsIndex::LoadHeader", "could not read BTI block size");
+}
+
+void BamToolsIndex::LoadNumBlocks(int& numBlocks) {
+ const int64_t numBytesRead = m_resources.Device->Read((char*)&numBlocks, sizeof(numBlocks));
+ if ( m_isBigEndian ) SwapEndian_32(numBlocks);
+ if ( numBytesRead != sizeof(numBlocks) )
+ throw BamException("BamToolsIndex::LoadNumBlocks", "could not read number of BTI blocks");
+}
+
+void BamToolsIndex::LoadNumReferences(int& numReferences) {
+ const int64_t numBytesRead = m_resources.Device->Read((char*)&numReferences, sizeof(numReferences));
+ if ( m_isBigEndian ) SwapEndian_32(numReferences);
+ if ( numBytesRead != sizeof(numReferences) )
+ throw BamException("BamToolsIndex::LoadNumReferences", "could not read number of references");
+}
+
+void BamToolsIndex::LoadReferenceSummary(BtiReferenceSummary& refSummary) {
+
+ // load number of blocks
+ int numBlocks;
+ LoadNumBlocks(numBlocks);
+
+ // store block summary data for this reference
+ refSummary.NumBlocks = numBlocks;
+ refSummary.FirstBlockFilePosition = Tell();
+
+ // skip reference's blocks
+ SkipBlocks(numBlocks);
+}
+
+void BamToolsIndex::OpenFile(const std::string& filename, IBamIODevice::OpenMode mode) {
+
+ // make sure any previous index file is closed
+ CloseFile();
+
+ m_resources.Device = BamDeviceFactory::CreateDevice(filename);
+ if ( m_resources.Device == 0 ) {
+ const string message = string("could not open file: ") + filename;
+ throw BamException("BamStandardIndex::OpenFile", message);
+ }
+
+ // attempt to open file
+ m_resources.Device->Open(mode);
+ if ( !IsDeviceOpen() ) {
+ const string message = string("could not open file: ") + filename;
+ throw BamException("BamToolsIndex::OpenFile", message);
+ }
+}
+
+void BamToolsIndex::ReadBlock(BtiBlock& block) {
+
+ // read in block data members
+ int64_t numBytesRead = 0;
+ numBytesRead += m_resources.Device->Read((char*)&block.MaxEndPosition, sizeof(block.MaxEndPosition));
+ numBytesRead += m_resources.Device->Read((char*)&block.StartOffset, sizeof(block.StartOffset));
+ numBytesRead += m_resources.Device->Read((char*)&block.StartPosition, sizeof(block.StartPosition));
+
+ // swap endian-ness if necessary
+ if ( m_isBigEndian ) {
+ SwapEndian_32(block.MaxEndPosition);
+ SwapEndian_64(block.StartOffset);
+ SwapEndian_32(block.StartPosition);
+ }
+
+ // check block read ok
+ const int expectedBytes = sizeof(block.MaxEndPosition) +
+ sizeof(block.StartOffset) +
+ sizeof(block.StartPosition);
+ if ( numBytesRead != expectedBytes )
+ throw BamException("BamToolsIndex::ReadBlock", "could not read block");
+}
+
+void BamToolsIndex::ReadBlocks(const BtiReferenceSummary& refSummary, BtiBlockVector& blocks) {
+
+ // prep blocks container
+ blocks.clear();
+ blocks.reserve(refSummary.NumBlocks);
+
+ // skip to first block entry
+ Seek( refSummary.FirstBlockFilePosition, SEEK_SET );
+
+ // read & store block entries
+ BtiBlock block;
+ for ( int i = 0; i < refSummary.NumBlocks; ++i ) {
+ ReadBlock(block);
+ blocks.push_back(block);
+ }
+}
+
+void BamToolsIndex::ReadReferenceEntry(BtiReferenceEntry& refEntry) {
+
+ // return false if refId not valid index in file summary structure
+ if ( refEntry.ID < 0 || refEntry.ID >= (int)m_indexFileSummary.size() )
+ throw BamException("BamToolsIndex::ReadReferenceEntry", "invalid reference requested");
+
+ // use index summary to assist reading the reference's BTI blocks
+ const BtiReferenceSummary& refSummary = m_indexFileSummary.at(refEntry.ID);
+ ReadBlocks(refSummary, refEntry.Blocks);
+}
+
+void BamToolsIndex::Seek(const int64_t& position, const int origin) {
+ if ( !m_resources.Device->Seek(position, origin) )
+ throw BamException("BamToolsIndex::Seek", "could not seek in BAI file");
+}
+
+void BamToolsIndex::SkipBlocks(const int& numBlocks) {
+ Seek( numBlocks*BamToolsIndex::SIZEOF_BLOCK, SEEK_CUR );
+}
+
+int64_t BamToolsIndex::Tell(void) const {
+ return m_resources.Device->Tell();
+}
+
+void BamToolsIndex::WriteBlock(const BtiBlock& block) {
+
+ // copy entry data
+ int32_t maxEndPosition = block.MaxEndPosition;
+ int64_t startOffset = block.StartOffset;
+ int32_t startPosition = block.StartPosition;
+
+ // swap endian-ness if necessary
+ if ( m_isBigEndian ) {
+ SwapEndian_32(maxEndPosition);
+ SwapEndian_64(startOffset);
+ SwapEndian_32(startPosition);
+ }
+
+ // write the reference index entry
+ int64_t numBytesWritten = 0;
+ numBytesWritten += m_resources.Device->Write((const char*)&maxEndPosition, sizeof(maxEndPosition));
+ numBytesWritten += m_resources.Device->Write((const char*)&startOffset, sizeof(startOffset));
+ numBytesWritten += m_resources.Device->Write((const char*)&startPosition, sizeof(startPosition));
+
+ // check block written ok
+ const int expectedBytes = sizeof(maxEndPosition) +
+ sizeof(startOffset) +
+ sizeof(startPosition);
+ if ( numBytesWritten != expectedBytes )
+ throw BamException("BamToolsIndex::WriteBlock", "could not write BTI block");
+}
+
+void BamToolsIndex::WriteBlocks(const BtiBlockVector& blocks) {
+ BtiBlockVector::const_iterator blockIter = blocks.begin();
+ BtiBlockVector::const_iterator blockEnd = blocks.end();
+ for ( ; blockIter != blockEnd; ++blockIter )
+ WriteBlock(*blockIter);
+}
+
+void BamToolsIndex::WriteHeader(void) {
+
+ int64_t numBytesWritten = 0 ;
+
+ // write BTI index format 'magic number'
+ numBytesWritten += m_resources.Device->Write(BamToolsIndex::BTI_MAGIC, 4);
+
+ // write BTI index format version
+ int32_t currentVersion = (int32_t)m_outputVersion;
+ if ( m_isBigEndian ) SwapEndian_32(currentVersion);
+ numBytesWritten += m_resources.Device->Write((const char*)¤tVersion, sizeof(currentVersion));
+
+ // write block size
+ uint32_t blockSize = m_blockSize;
+ if ( m_isBigEndian ) SwapEndian_32(blockSize);
+ numBytesWritten += m_resources.Device->Write((const char*)&blockSize, sizeof(blockSize));
+
+ // write number of references
+ int32_t numReferences = m_indexFileSummary.size();
+ if ( m_isBigEndian ) SwapEndian_32(numReferences);
+ numBytesWritten += m_resources.Device->Write((const char*)&numReferences, sizeof(numReferences));
+
+ // check header written ok
+ const int expectedBytes = 4 +
+ sizeof(currentVersion) +
+ sizeof(blockSize) +
+ sizeof(numReferences);
+ if ( numBytesWritten != expectedBytes )
+ throw BamException("BamToolsIndex::WriteHeader", "could not write BTI header");
+}
+
+void BamToolsIndex::WriteReferenceEntry(const BtiReferenceEntry& refEntry) {
+
+ // write number of blocks this reference
+ uint32_t numBlocks = refEntry.Blocks.size();
+ if ( m_isBigEndian ) SwapEndian_32(numBlocks);
+ const int64_t numBytesWritten = m_resources.Device->Write((const char*)&numBlocks, sizeof(numBlocks));
+ if ( numBytesWritten != sizeof(numBlocks) )
+ throw BamException("BamToolsIndex::WriteReferenceEntry", "could not write number of blocks");
+
+ // write actual block entries
+ WriteBlocks(refEntry.Blocks);
+}
--- /dev/null
+// ***************************************************************************
+// BamToolsIndex.h (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 November 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides index operations for the BamTools index format (".bti")
+// ***************************************************************************
+
+#ifndef BAMTOOLS_INDEX_FORMAT_H
+#define BAMTOOLS_INDEX_FORMAT_H
+
+// -------------
+// W A R N I N G
+// -------------
+//
+// This file is not part of the BamTools API. It exists purely as an
+// implementation detail. This header file may change from version to
+// version without notice, or even be removed.
+//
+// We mean it.
+
+#include "api/BamAux.h"
+#include "api/BamIndex.h"
+#include "api/IBamIODevice.h"
+#include <map>
+#include <string>
+#include <vector>
+
+namespace BamTools {
+namespace Internal {
+
+// contains data for each 'block' in a BTI index
+struct BtiBlock {
+
+ // data members
+ int32_t MaxEndPosition;
+ int64_t StartOffset;
+ int32_t StartPosition;
+
+ // ctor
+ BtiBlock(const int32_t& maxEndPosition = 0,
+ const int64_t& startOffset = 0,
+ const int32_t& startPosition = 0)
+ : MaxEndPosition(maxEndPosition)
+ , StartOffset(startOffset)
+ , StartPosition(startPosition)
+ { }
+};
+
+// convenience typedef for describing a a list of BTI blocks on a reference
+typedef std::vector<BtiBlock> BtiBlockVector;
+
+// contains all fields necessary for building, loading, & writing
+// full BTI index data for a single reference
+struct BtiReferenceEntry {
+
+ // data members
+ int32_t ID;
+ BtiBlockVector Blocks;
+
+ // ctor
+ BtiReferenceEntry(const int& id = -1)
+ : ID(id)
+ { }
+};
+
+// provides (persistent) summary of BtiReferenceEntry's index data
+struct BtiReferenceSummary {
+
+ // data members
+ int NumBlocks;
+ uint64_t FirstBlockFilePosition;
+
+ // ctor
+ BtiReferenceSummary(void)
+ : NumBlocks(0)
+ , FirstBlockFilePosition(0)
+ { }
+};
+
+// convenience typedef for describing a full BTI index file summary
+typedef std::vector<BtiReferenceSummary> BtiFileSummary;
+
+class BamToolsIndex : public BamIndex {
+
+ // keep a list of any supported versions here
+ // (might be useful later to handle any 'legacy' versions if the format changes)
+ // listed for example like: BTI_1_0 = 1, BTI_1_1 = 2, BTI_1_2 = 3, BTI_2_0 = 4, and so on
+ //
+ // so a change introduced in BTI_1_2 may be handled from then on by:
+ //
+ // if ( indexVersion >= BTI_1_2 )
+ // do something new
+ // else
+ // do the old thing
+ enum Version { BTI_1_0 = 1
+ , BTI_1_1
+ , BTI_1_2
+ , BTI_2_0
+ };
+
+ // ctor & dtor
+ public:
+ BamToolsIndex(Internal::BamReaderPrivate* reader);
+ ~BamToolsIndex(void);
+
+ // BamIndex implementation
+ public:
+ // builds index from associated BAM file & writes out to index file
+ bool Create(void);
+ // returns whether reference has alignments or no
+ bool HasAlignments(const int& referenceID) const;
+ // attempts to use index data to jump to @region, returns success/fail
+ // a "successful" jump indicates no error, but not whether this region has data
+ // * thus, the method sets a flag to indicate whether there are alignments
+ // available after the jump position
+ bool Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion);
+ // loads existing data from file into memory
+ bool Load(const std::string& filename);
+ BamIndex::IndexType Type(void) const { return BamIndex::BAMTOOLS; }
+ public:
+ // returns format's file extension
+ static const std::string Extension(void);
+
+ // internal methods
+ private:
+
+ // index file ops
+ void CheckMagicNumber(void);
+ void CheckVersion(void);
+ void CloseFile(void);
+ bool IsDeviceOpen(void) const;
+ void OpenFile(const std::string& filename, IBamIODevice::OpenMode mode);
+ void Seek(const int64_t& position, const int origin);
+ int64_t Tell(void) const;
+
+ // index-creation methods
+ void ClearReferenceEntry(BtiReferenceEntry& refEntry);
+ void WriteBlock(const BtiBlock& block);
+ void WriteBlocks(const BtiBlockVector& blocks);
+ void WriteHeader(void);
+ void WriteReferenceEntry(const BtiReferenceEntry& refEntry);
+
+ // random-access methods
+ void GetOffset(const BamRegion& region, int64_t& offset, bool* hasAlignmentsInRegion);
+ void ReadBlock(BtiBlock& block);
+ void ReadBlocks(const BtiReferenceSummary& refSummary, BtiBlockVector& blocks);
+ void ReadReferenceEntry(BtiReferenceEntry& refEntry);
+
+ // BTI summary data methods
+ void InitializeFileSummary(const int& numReferences);
+ void LoadFileSummary(void);
+ void LoadHeader(void);
+ void LoadNumBlocks(int& numBlocks);
+ void LoadNumReferences(int& numReferences);
+ void LoadReferenceSummary(BtiReferenceSummary& refSummary);
+ void SkipBlocks(const int& numBlocks);
+
+ // data members
+ private:
+ bool m_isBigEndian;
+ BtiFileSummary m_indexFileSummary;
+ uint32_t m_blockSize;
+ int32_t m_inputVersion; // Version is serialized as int
+ Version m_outputVersion;
+
+ struct RaiiWrapper {
+ IBamIODevice* Device;
+ RaiiWrapper(void);
+ ~RaiiWrapper(void);
+ };
+ RaiiWrapper m_resources;
+
+ // static constants
+ private:
+ static const uint32_t DEFAULT_BLOCK_LENGTH;
+ static const std::string BTI_EXTENSION;
+ static const char* const BTI_MAGIC;
+ static const int SIZEOF_BLOCK;
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // BAMTOOLS_INDEX_FORMAT_H
--- /dev/null
+# ==========================
+# BamTools CMakeLists.txt
+# (c) 2011 Derek Barnett
+#
+# src/api/internal/index
+# ==========================
+
+set ( InternalIndexDir "${InternalDir}/index" )
+
+set ( InternalIndexSources
+ ${InternalIndexDir}/BamIndexFactory_p.cpp
+ ${InternalIndexDir}/BamStandardIndex_p.cpp
+ ${InternalIndexDir}/BamToolsIndex_p.cpp
+
+ PARENT_SCOPE # <-- leave this last
+)
+
--- /dev/null
+// ***************************************************************************
+// BamDeviceFactory_p.cpp (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 25 September 2011 (DB)
+// ---------------------------------------------------------------------------
+// Creates built-in concrete implementations of IBamIODevices
+// ***************************************************************************
+
+#include "api/internal/io/BamDeviceFactory_p.h"
+#include "api/internal/io/BamFile_p.h"
+#include "api/internal/io/BamFtp_p.h"
+#include "api/internal/io/BamHttp_p.h"
+#include "api/internal/io/BamPipe_p.h"
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+#include <iostream>
+using namespace std;
+
+IBamIODevice* BamDeviceFactory::CreateDevice(const string& source) {
+
+ // check for requested pipe
+ if ( source == "-" || source == "stdin" || source == "stdout" )
+ return new BamPipe;
+
+ // check for HTTP prefix
+ if ( source.find("http://") == 0 )
+ return new BamHttp(source);
+
+ // check for FTP prefix
+ if ( source.find("ftp://") == 0 )
+ return new BamFtp(source);
+
+ // otherwise assume a "normal" file
+ return new BamFile(source);
+}
--- /dev/null
+// ***************************************************************************
+// BamDeviceFactory_p.h (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Creates built-in concrete implementations of IBamIODevices
+// ***************************************************************************
+
+#ifndef BAMDEVICEFACTORY_P_H
+#define BAMDEVICEFACTORY_P_H
+
+// -------------
+// W A R N I N G
+// -------------
+//
+// This file is not part of the BamTools API. It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include "api/IBamIODevice.h"
+#include <string>
+
+namespace BamTools {
+namespace Internal {
+
+class BamDeviceFactory {
+ public:
+ static IBamIODevice* CreateDevice(const std::string& source);
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // BAMDEVICEFACTORY_P_H
--- /dev/null
+// ***************************************************************************
+// BamFile_p.cpp (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 November 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides BAM file-specific IO behavior
+// ***************************************************************************
+
+#include "api/internal/io/BamFile_p.h"
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+#include <cstdio>
+#include <iostream>
+using namespace std;
+
+BamFile::BamFile(const string& filename)
+ : ILocalIODevice()
+ , m_filename(filename)
+{ }
+
+BamFile::~BamFile(void) { }
+
+void BamFile::Close(void) {
+ if ( IsOpen() ) {
+ m_filename.clear();
+ ILocalIODevice::Close();
+ }
+}
+
+bool BamFile::IsRandomAccess(void) const {
+ return true;
+}
+
+bool BamFile::Open(const IBamIODevice::OpenMode mode) {
+
+ // make sure we're starting with a fresh file stream
+ Close();
+
+ // attempt to open FILE* depending on requested openmode
+ if ( mode == IBamIODevice::ReadOnly )
+ m_stream = fopen(m_filename.c_str(), "rb");
+ else if ( mode == IBamIODevice::WriteOnly )
+ m_stream = fopen(m_filename.c_str(), "wb");
+ else if ( mode == IBamIODevice::ReadWrite )
+ m_stream = fopen(m_filename.c_str(), "w+b");
+ else {
+ SetErrorString("BamFile::Open", "unknown open mode requested");
+ return false;
+ }
+
+ // check that we obtained a valid FILE*
+ if ( m_stream == 0 ) {
+ const string message_base = string("could not open file handle for ");
+ const string message = message_base + ( (m_filename.empty()) ? "empty filename" : m_filename );
+ SetErrorString("BamFile::Open", message);
+ return false;
+ }
+
+ // store current IO mode & return success
+ m_mode = mode;
+ return true;
+}
+
+bool BamFile::Seek(const int64_t& position, const int origin) {
+ BT_ASSERT_X( m_stream, "BamFile::Seek() - null stream" );
+ return ( fseek64(m_stream, position, origin) == 0 );
+}
--- /dev/null
+// ***************************************************************************
+// BamFile_p.h (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 November 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides BAM file-specific IO behavior
+// ***************************************************************************
+
+#ifndef BAMFILE_P_H
+#define BAMFILE_P_H
+
+// -------------
+// W A R N I N G
+// -------------
+//
+// This file is not part of the BamTools API. It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include "api/internal/io/ILocalIODevice_p.h"
+#include <string>
+
+namespace BamTools {
+namespace Internal {
+
+class BamFile : public ILocalIODevice {
+
+ // ctor & dtor
+ public:
+ BamFile(const std::string& filename);
+ ~BamFile(void);
+
+ // ILocalIODevice implementation
+ public:
+ void Close(void);
+ bool IsRandomAccess(void) const;
+ bool Open(const IBamIODevice::OpenMode mode);
+ bool Seek(const int64_t& position, const int origin = SEEK_SET);
+
+ // data members
+ private:
+ std::string m_filename;
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // BAMFILE_P_H
--- /dev/null
+// ***************************************************************************
+// BamFtp_p.cpp (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 November 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides reading/writing of BAM files on FTP server
+// ***************************************************************************
+
+#include "api/BamAux.h"
+#include "api/internal/io/BamFtp_p.h"
+#include "api/internal/io/TcpSocket_p.h"
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+#include <cctype>
+#include <cstdlib>
+#include <sstream>
+#include <vector>
+using namespace std;
+
+namespace BamTools {
+namespace Internal {
+
+// -----------
+// constants
+// -----------
+
+static const uint16_t FTP_PORT = 21;
+static const string FTP_PREFIX = "ftp://";
+static const size_t FTP_PREFIX_LENGTH = 6;
+static const string FTP_NEWLINE = "\r\n";
+
+static const string DEFAULT_USER = "anonymous";
+static const string DEFAULT_PASS = "anonymous@";
+
+static const string ABOR_CMD = "ABOR";
+static const string USER_CMD = "USER";
+static const string PASS_CMD = "PASS";
+static const string PASV_CMD = "PASV";
+static const string REIN_CMD = "REIN";
+static const string REST_CMD = "REST";
+static const string RETR_CMD = "RETR";
+static const string TYPE_CMD = "TYPE";
+
+static const char CMD_SEPARATOR = ' ';
+static const char HOST_SEPARATOR = '/';
+static const char IP_SEPARATOR = '.';
+
+static const char MULTILINE_CONTINUE = '-';
+
+static const char PASV_REPLY_PREFIX = '(';
+static const char PASV_REPLY_SEPARATOR = ',';
+static const char PASV_REPLY_SUFFIX = ')';
+
+// -----------------
+// utility methods
+// -----------------
+
+static inline
+vector<string> split(const string& source, const char delim) {
+
+ stringstream ss(source);
+ string field;
+ vector<string> fields;
+
+ while ( getline(ss, field, delim) )
+ fields.push_back(field);
+ return fields;
+}
+
+static inline
+bool startsWith(const string& source, const string& pattern) {
+ return ( source.find(pattern) == 0 );
+}
+
+static inline
+string toLower(const string& s) {
+ string out;
+ const size_t sSize = s.size();
+ out.reserve(sSize);
+ for ( size_t i = 0; i < sSize; ++i )
+ out[i] = tolower(s[i]);
+ return out;
+}
+
+} // namespace Internal
+} // namespace BamTools
+
+// -----------------------
+// BamFtp implementation
+// -----------------------
+
+BamFtp::BamFtp(const string& url)
+ : IBamIODevice()
+ , m_commandSocket(new TcpSocket)
+ , m_dataSocket(new TcpSocket)
+ , m_port(FTP_PORT)
+ , m_dataPort(0)
+ , m_username(DEFAULT_USER)
+ , m_password(DEFAULT_PASS)
+ , m_isUrlParsed(false)
+ , m_filePosition(-1)
+{
+ ParseUrl(url);
+}
+
+BamFtp::~BamFtp(void) {
+
+ // close connection & clean up
+ Close();
+ if ( m_commandSocket )
+ delete m_commandSocket;
+ if ( m_dataSocket )
+ delete m_dataSocket;
+}
+
+void BamFtp::Close(void) {
+
+ // disconnect socket
+ m_commandSocket->DisconnectFromHost();
+ m_dataSocket->DisconnectFromHost();
+
+ // reset state - necessary??
+ m_isUrlParsed = false;
+ m_filePosition = -1;
+ m_username = DEFAULT_USER;
+ m_password = DEFAULT_PASS;
+ m_dataHostname.clear();
+ m_dataPort = 0;
+}
+
+bool BamFtp::ConnectCommandSocket(void) {
+
+ BT_ASSERT_X(m_commandSocket, "null command socket?");
+
+ // connect to FTP server
+ if ( !m_commandSocket->ConnectToHost(m_hostname, m_port, m_mode) ) {
+ SetErrorString("BamFtp::ConnectCommandSocket", "could not connect to host");
+ return false;
+ }
+
+ // receive initial reply from host
+ if ( !ReceiveReply() ) {
+ Close();
+ return false;
+ }
+
+ // send USER command
+ string userCommand = USER_CMD + CMD_SEPARATOR + m_username + FTP_NEWLINE;
+ if ( !SendCommand(userCommand, true) ) {
+ Close();
+ return false;
+ }
+
+ // send PASS command
+ string passwordCommand = PASS_CMD + CMD_SEPARATOR + m_password + FTP_NEWLINE;
+ if ( !SendCommand(passwordCommand, true) ) {
+ Close();
+ return false;
+ }
+
+ // send TYPE command
+ string typeCommand = TYPE_CMD + CMD_SEPARATOR + 'I' + FTP_NEWLINE;
+ if ( !SendCommand(typeCommand, true) ) {
+ Close();
+ return false;
+ }
+
+ // return success
+ return true;
+}
+
+bool BamFtp::ConnectDataSocket(void) {
+
+ // failure if can't connect to command socket first
+ if ( !m_commandSocket->IsConnected() ) {
+ if ( !ConnectCommandSocket() )
+ return false;
+ }
+
+ // make sure we're starting with a fresh data channel
+ if ( m_dataSocket->IsConnected() )
+ m_dataSocket->DisconnectFromHost();
+
+ // send passive connection command
+ const string passiveCommand = PASV_CMD + FTP_NEWLINE;
+ if ( !SendCommand(passiveCommand, true) ) {
+ // TODO: set error string
+ return false;
+ }
+
+ // retrieve passive connection port
+ if ( !ParsePassiveResponse() ) {
+ // TODO: set error string
+ return false;
+ }
+
+ // set up restart command (tell server where to start fetching bytes from)
+ if ( m_filePosition >= 0 ) {
+
+ stringstream fpStream("");
+ fpStream << m_filePosition;
+ string restartCommand = REST_CMD + CMD_SEPARATOR + fpStream.str() + FTP_NEWLINE;
+ if ( !SendCommand(restartCommand, true) ) {
+ // TODO: set error string
+ return false;
+ }
+ }
+
+ // main file retrieval request
+ string retrieveCommand = RETR_CMD + CMD_SEPARATOR + m_filename + FTP_NEWLINE;
+ if ( !SendCommand(retrieveCommand, false) ) {
+ // TODO: set error string
+ return false;
+ }
+
+ // make data channel connection
+ if ( !m_dataSocket->ConnectToHost(m_dataHostname, m_dataPort) ) {
+ // TODO: set error string
+ return false;
+ }
+
+ // fetch intial reply from server
+ if ( !ReceiveReply() ) {
+ // TODO: set error string
+ m_dataSocket->DisconnectFromHost();
+ return false;
+ }
+
+ // make sure we have reply code 150 (all good)
+ if ( !startsWith(m_response, "150") ) {
+ // TODO: set error string
+ m_dataSocket->DisconnectFromHost();
+ return false;
+ }
+
+ // return success
+ return true;
+}
+
+bool BamFtp::IsOpen(void) const {
+ return IBamIODevice::IsOpen() && m_isUrlParsed;
+}
+
+bool BamFtp::IsRandomAccess(void) const {
+ return true;
+}
+
+bool BamFtp::Open(const IBamIODevice::OpenMode mode) {
+
+ // BamFtp only supports read-only access
+ if ( mode != IBamIODevice::ReadOnly ) {
+ SetErrorString("BamFtp::Open", "writing on this device is not supported");
+ return false;
+ }
+
+ // initialize basic valid state
+ m_mode = mode;
+ m_filePosition = 0;
+
+ // attempt connection to command & data sockets
+ return ( ConnectCommandSocket() && ConnectDataSocket() );
+}
+
+bool BamFtp::ParsePassiveResponse(void) {
+
+ // fail if empty
+ if ( m_response.empty() )
+ return false;
+
+ // find parentheses
+ const size_t leftParenFound = m_response.find(PASV_REPLY_PREFIX);
+ const size_t rightParenFound = m_response.find(PASV_REPLY_SUFFIX);
+ if ( leftParenFound == string::npos || rightParenFound == string::npos )
+ return false;
+
+ // grab everything between ( should be "h1,h2,h3,h4,p1,p2" )
+ string::const_iterator responseBegin = m_response.begin();
+ const string hostAndPort(responseBegin+leftParenFound+1, responseBegin+rightParenFound);
+
+ // parse into string fields
+ vector<string> fields = split(hostAndPort, PASV_REPLY_SEPARATOR);
+ if ( fields.size() != 6 )
+ return false;
+
+ // fetch passive connection IP
+ m_dataHostname = fields[0] + IP_SEPARATOR +
+ fields[1] + IP_SEPARATOR +
+ fields[2] + IP_SEPARATOR +
+ fields[3];
+
+ // fetch passive connection port
+ const uint8_t portUpper = static_cast<uint8_t>(atoi(fields[4].c_str()));
+ const uint8_t portLower = static_cast<uint8_t>(atoi(fields[5].c_str()));
+ m_dataPort = ( portUpper<<8 ) + portLower;
+
+ // return success
+ return true;
+}
+
+void BamFtp::ParseUrl(const string& url) {
+
+ // clear flag to start
+ m_isUrlParsed = false;
+
+ // make sure url starts with "ftp://", case-insensitive
+ string tempUrl(url);
+ toLower(tempUrl);
+ const size_t prefixFound = tempUrl.find(FTP_PREFIX);
+ if ( prefixFound == string::npos )
+ return;
+
+ // find end of host name portion (first '/' hit after the prefix)
+ const size_t firstSlashFound = tempUrl.find(HOST_SEPARATOR, FTP_PREFIX_LENGTH);
+ if ( firstSlashFound == string::npos ) {
+ ; // no slash found... no filename given along with host?
+ }
+
+ // fetch hostname
+ string hostname = tempUrl.substr(FTP_PREFIX_LENGTH, (firstSlashFound - FTP_PREFIX_LENGTH));
+ m_hostname = hostname;
+ m_port = FTP_PORT;
+
+ // store remainder of URL as filename (must be non-empty)
+ string filename = tempUrl.substr(firstSlashFound);
+ if ( filename.empty() )
+ return;
+ m_filename = filename;
+
+ // set parsed OK flag
+ m_isUrlParsed = true;
+}
+
+int64_t BamFtp::Read(char* data, const unsigned int numBytes) {
+
+ // if BamHttp not in a valid state
+ if ( !IsOpen() )
+ return -1;
+
+ // read until hit desired @numBytes
+ int64_t bytesReadSoFar = 0;
+ while ( bytesReadSoFar < numBytes ) {
+
+ // calculate number of bytes we're going to try to read this iteration
+ const size_t remainingBytes = ( numBytes - bytesReadSoFar );
+
+ // if either disconnected somehow, or (more likely) we have seeked since last read
+ if ( !m_dataSocket->IsConnected() ) {
+ if ( !ConnectDataSocket() ) {
+ // TODO: set error string
+ return -1;
+ }
+ }
+
+ // read bytes from data socket
+ const int64_t socketBytesRead = ReadDataSocket(data+bytesReadSoFar, remainingBytes);
+ if ( socketBytesRead < 0 )
+ return -1;
+ bytesReadSoFar += socketBytesRead;
+ m_filePosition += socketBytesRead;
+ }
+
+ // return actual number bytes successfully read
+ return bytesReadSoFar;
+}
+
+int64_t BamFtp::ReadCommandSocket(char* data, const unsigned int maxNumBytes) {
+
+ // try to read 'remainingBytes' from socket
+ const int64_t numBytesRead = m_commandSocket->Read(data, maxNumBytes);
+ if ( numBytesRead < 0 )
+ return -1;
+ return numBytesRead;
+}
+
+int64_t BamFtp::ReadDataSocket(char* data, const unsigned int maxNumBytes) {
+
+ // try to read 'remainingBytes' from socket
+ const int64_t numBytesRead = m_dataSocket->Read(data, maxNumBytes);
+ if ( numBytesRead < 0 )
+ return -1;
+ return numBytesRead;
+}
+
+bool BamFtp::ReceiveReply(void) {
+
+ // failure if not connected
+ if ( !m_commandSocket->IsConnected() ) {
+ SetErrorString("BamFtp::ReceiveReply()", "command socket not connected");
+ return false;
+ }
+
+ m_response.clear();
+
+ // read header data (& discard for now)
+ bool headerEnd = false;
+ while ( !headerEnd ) {
+
+ const string headerLine = m_commandSocket->ReadLine();
+ m_response += headerLine;
+
+ // if line is of form 'xyz ', quit reading lines
+ if ( (headerLine.length() >= 4 ) &&
+ isdigit(headerLine[0]) &&
+ isdigit(headerLine[1]) &&
+ isdigit(headerLine[2]) &&
+ ( headerLine[3] != MULTILINE_CONTINUE )
+ )
+ {
+ headerEnd = true;
+ }
+ }
+
+ // return success, depending on response
+ if ( m_response.empty() ) {
+ SetErrorString("BamFtp::ReceiveReply", "error reading server reply");
+ return false;
+ }
+ return true;
+}
+
+bool BamFtp::Seek(const int64_t& position, const int origin) {
+
+ // if FTP device not in a valid state
+ if ( !IsOpen() ) {
+ // TODO: set error string
+ return false;
+ }
+
+ // ----------------------
+ // UGLY !! but works??
+ // ----------------------
+ // disconnect from server
+ m_dataSocket->DisconnectFromHost();
+ m_commandSocket->DisconnectFromHost();
+
+ // update file position & return success
+ if ( origin == SEEK_CUR )
+ m_filePosition += position;
+ else if ( origin == SEEK_SET)
+ m_filePosition = position;
+ else {
+ // TODO: set error string
+ return false;
+ }
+ return true;
+}
+
+bool BamFtp::SendCommand(const string& command, bool waitForReply) {
+
+ // failure if not connected
+ if ( !m_commandSocket->IsConnected() ) {
+ SetErrorString("BamFtp::SendCommand", "command socket not connected");
+ return false;
+ }
+
+ // write command to 'command socket'
+ if ( WriteCommandSocket(command.c_str(), command.length()) == -1 ) {
+ SetErrorString("BamFtp::SendCommand", "error writing to socket");
+ // get actual error from command socket??
+ return false;
+ }
+
+ // if we sent a command that receives a response
+ if ( waitForReply )
+ return ReceiveReply();
+
+ // return success
+ return true;
+}
+
+int64_t BamFtp::Tell(void) const {
+ return ( IsOpen() ? m_filePosition : -1 );
+}
+
+int64_t BamFtp::Write(const char* data, const unsigned int numBytes) {
+ (void)data;
+ (void)numBytes;
+ BT_ASSERT_X(false, "BamFtp::Write : write-mode not supported on this device");
+ SetErrorString("BamFtp::Write", "write-mode not supported on this device");
+ return -1;
+}
+
+int64_t BamFtp::WriteCommandSocket(const char* data, const unsigned int numBytes) {
+ if ( !m_commandSocket->IsConnected() )
+ return -1;
+ m_commandSocket->ClearBuffer();
+ return m_commandSocket->Write(data, numBytes);
+}
+
+int64_t BamFtp::WriteDataSocket(const char* data, const unsigned int numBytes) {
+ (void)data;
+ (void)numBytes;
+ BT_ASSERT_X(false, "BamFtp::WriteDataSocket: write-mode not supported on this device");
+ SetErrorString("BamFtp::Write", "write-mode not supported on this device");
+ return -1;
+}
--- /dev/null
+// ***************************************************************************
+// BamFtp_p.h (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 November 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides reading/writing of BAM files on FTP server
+// ***************************************************************************
+
+#ifndef BAMFTP_P_H
+#define BAMFTP_P_H
+
+// -------------
+// W A R N I N G
+// -------------
+//
+// This file is not part of the BamTools API. It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include "api/IBamIODevice.h"
+#include <string>
+
+namespace BamTools {
+namespace Internal {
+
+class TcpSocket;
+
+class BamFtp : public IBamIODevice {
+
+ // ctor & dtor
+ public:
+ BamFtp(const std::string& url);
+ ~BamFtp(void);
+
+ // IBamIODevice implementation
+ public:
+ void Close(void);
+ bool IsOpen(void) const;
+ bool IsRandomAccess(void) const;
+ bool Open(const IBamIODevice::OpenMode mode);
+ int64_t Read(char* data, const unsigned int numBytes);
+ bool Seek(const int64_t& position, const int origin = SEEK_SET);
+ int64_t Tell(void) const;
+ int64_t Write(const char* data, const unsigned int numBytes);
+
+ // internal methods
+ private:
+ bool ConnectCommandSocket(void);
+ bool ConnectDataSocket(void);
+ bool ParsePassiveResponse(void);
+ void ParseUrl(const std::string& url);
+ int64_t ReadCommandSocket(char* data, const unsigned int numBytes);
+ int64_t ReadDataSocket(char* data, const unsigned int numBytes);
+ bool ReceiveReply(void);
+ bool SendCommand(const std::string& command, bool waitForReply);
+ int64_t WriteCommandSocket(const char* data, const unsigned int numBytes);
+ int64_t WriteDataSocket(const char* data, const unsigned int numBytes);
+
+ // data members
+ private:
+
+ // our main sockets
+ TcpSocket* m_commandSocket;
+ TcpSocket* m_dataSocket;
+
+ // our connection data
+ std::string m_hostname;
+ uint16_t m_port;
+ std::string m_dataHostname;
+ uint16_t m_dataPort;
+ std::string m_filename;
+
+ std::string m_username;
+ std::string m_password;
+
+ std::string m_response;
+
+ // internal state flags
+ bool m_isUrlParsed;
+
+ // file position
+ int64_t m_filePosition;
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // BAMFTP_P_H
--- /dev/null
+// ***************************************************************************
+// BamHttp_p.cpp (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 November 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides reading/writing of BAM files on HTTP server
+// ***************************************************************************
+
+#include "api/BamAux.h"
+#include "api/internal/io/BamHttp_p.h"
+#include "api/internal/io/HttpHeader_p.h"
+#include "api/internal/io/TcpSocket_p.h"
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+#include <cassert>
+#include <cctype>
+#include <algorithm>
+#include <sstream>
+using namespace std;
+
+namespace BamTools {
+namespace Internal {
+
+// -----------
+// constants
+// -----------
+
+static const string HTTP_PORT = "80";
+static const string HTTP_PREFIX = "http://";
+static const size_t HTTP_PREFIX_LENGTH = 7;
+
+static const string DOUBLE_NEWLINE = "\n\n";
+
+static const string GET_METHOD = "GET";
+static const string HOST_HEADER = "Host";
+static const string RANGE_HEADER = "Range";
+static const string BYTES_PREFIX = "bytes=";
+
+static const char HOST_SEPARATOR = '/';
+static const char PROXY_SEPARATOR = ':';
+
+// -----------------
+// utility methods
+// -----------------
+
+static inline
+bool endsWith(const string& source, const string& pattern) {
+ return ( source.find(pattern) == (source.length() - pattern.length()) );
+}
+
+static inline
+string toLower(const string& s) {
+ string out;
+ const size_t sSize = s.size();
+ out.reserve(sSize);
+ for ( size_t i = 0; i < sSize; ++i )
+ out[i] = tolower(s[i]);
+ return out;
+}
+
+} // namespace Internal
+} // namespace BamTools
+
+// ------------------------
+// BamHttp implementation
+// ------------------------
+
+BamHttp::BamHttp(const string& url)
+ : IBamIODevice()
+ , m_socket(new TcpSocket)
+ , m_port(HTTP_PORT)
+ , m_request(0)
+ , m_response(0)
+ , m_isUrlParsed(false)
+ , m_filePosition(-1)
+ , m_endRangeFilePosition(-1)
+{
+ ParseUrl(url);
+}
+
+BamHttp::~BamHttp(void) {
+
+ // close connection & clean up
+ Close();
+ if ( m_socket )
+ delete m_socket;
+}
+
+void BamHttp::Close(void) {
+
+ // disconnect socket
+ m_socket->DisconnectFromHost();
+
+ // clean up request & response
+ if ( m_request ) {
+ delete m_request;
+ m_request = 0;
+ }
+ if ( m_response ) {
+ delete m_response;
+ m_response = 0;
+ }
+
+ // reset state - necessary??
+ m_isUrlParsed = false;
+ m_filePosition = -1;
+ m_endRangeFilePosition = -1;
+}
+
+bool BamHttp::ConnectSocket(void) {
+
+ BT_ASSERT_X(m_socket, "null socket?");
+
+ // any state checks, etc?
+ if ( !m_socket->ConnectToHost(m_hostname, m_port, m_mode) ) {
+ // TODO: set error string
+ return false;
+ }
+
+ // attempt initial request
+ m_filePosition = 0;
+ m_endRangeFilePosition = -1;
+ if ( !SendRequest() ) {
+ // TODO: set error string
+ Close();
+ return false;
+ }
+
+ // wait for response from server
+ if ( !ReceiveResponse() ) {
+ // TODO: set error string
+ Close();
+ return false;
+ }
+
+ // return success
+ return true;
+}
+
+bool BamHttp::EnsureSocketConnection(void) {
+ if ( m_socket->IsConnected() )
+ return true;
+ else return ConnectSocket();
+}
+
+bool BamHttp::IsOpen(void) const {
+ return IBamIODevice::IsOpen() && m_isUrlParsed;
+}
+
+bool BamHttp::IsRandomAccess(void) const {
+ return true;
+}
+
+bool BamHttp::Open(const IBamIODevice::OpenMode mode) {
+
+ // BamHttp only supports read-only access
+ if ( mode != IBamIODevice::ReadOnly ) {
+ SetErrorString("BamHttp::Open", "writing on this device is not supported");
+ return false;
+ }
+ m_mode = mode;
+
+ // attempt connection to socket
+ if ( !ConnectSocket() ) {
+ SetErrorString("BamHttp::Open", m_socket->GetErrorString());
+ return false;
+ }
+
+ // return success
+ return true;
+}
+
+void BamHttp::ParseUrl(const string& url) {
+
+ // clear flag to start
+ m_isUrlParsed = false;
+
+ // make sure url starts with "http://", case-insensitive
+ string tempUrl(url);
+ toLower(tempUrl);
+ const size_t prefixFound = tempUrl.find(HTTP_PREFIX);
+ if ( prefixFound == string::npos )
+ return;
+
+ // find end of host name portion (first '/' hit after the prefix)
+ const size_t firstSlashFound = tempUrl.find(HOST_SEPARATOR, HTTP_PREFIX_LENGTH);
+ if ( firstSlashFound == string::npos ) {
+ ; // no slash found... no filename given along with host?
+ }
+
+ // fetch hostname (check for proxy port)
+ string hostname = tempUrl.substr(HTTP_PREFIX_LENGTH, (firstSlashFound - HTTP_PREFIX_LENGTH));
+ const size_t colonFound = hostname.find(PROXY_SEPARATOR);
+ if ( colonFound != string::npos ) {
+ ; // TODO: handle proxy port (later, just skip for now)
+ } else {
+ m_hostname = hostname;
+ m_port = HTTP_PORT;
+ }
+
+ // store remainder of URL as filename (must be non-empty)
+ string filename = tempUrl.substr(firstSlashFound);
+ if ( filename.empty() )
+ return;
+ m_filename = filename;
+
+ // set parsed OK flag
+ m_isUrlParsed = true;
+}
+
+int64_t BamHttp::Read(char* data, const unsigned int numBytes) {
+
+ // if BamHttp not in a valid state
+ if ( !IsOpen() )
+ return -1;
+
+ // read until hit desired @numBytes
+ int64_t bytesReadSoFar = 0;
+ while ( bytesReadSoFar < numBytes ) {
+
+ // calculate number of bytes we're going to try to read this iteration
+ const size_t remainingBytes = ( numBytes - bytesReadSoFar );
+
+ // if socket has access to entire file contents
+ // i.e. we received response with full data (status code == 200)
+ if ( m_endRangeFilePosition < 0 ) {
+
+ // try to read 'remainingBytes' from socket
+ const int64_t socketBytesRead = ReadFromSocket(data+bytesReadSoFar, remainingBytes);
+ if ( socketBytesRead < 0 )
+ return -1;
+ bytesReadSoFar += socketBytesRead;
+ m_filePosition += socketBytesRead;
+ }
+
+ // socket has access to a range of data (might already be in buffer)
+ // i.e. we received response with partial data (status code == 206)
+ else {
+
+ // there is data left from last request
+ if ( m_endRangeFilePosition > m_filePosition ) {
+
+ // try to read either the total 'remainingBytes' or
+ // whatever we have remaining from last request range
+ const size_t rangeRemainingBytes = m_endRangeFilePosition - m_filePosition;
+ const size_t bytesToRead = std::min(remainingBytes, rangeRemainingBytes);
+ const int64_t socketBytesRead = ReadFromSocket(data+bytesReadSoFar, bytesToRead);
+ if ( socketBytesRead < 0 )
+ return -1;
+ bytesReadSoFar += socketBytesRead;
+ m_filePosition += socketBytesRead;
+ }
+
+ // otherwise, this is a 1st-time read or
+ // we already read everything from the last GET request
+ else {
+
+ // request for next range
+ if ( !SendRequest(remainingBytes) || !ReceiveResponse() ) {
+ Close();
+ return -1;
+ }
+ }
+ }
+ }
+
+ // return actual number bytes successfully read
+ return bytesReadSoFar;
+}
+
+int64_t BamHttp::ReadFromSocket(char* data, const unsigned int maxNumBytes) {
+
+ // try to read 'remainingBytes' from socket
+ const int64_t numBytesRead = m_socket->Read(data, maxNumBytes);
+ if ( numBytesRead < 0 )
+ return -1;
+ return numBytesRead;
+}
+
+bool BamHttp::ReceiveResponse(void) {
+
+ // clear any prior response
+ if ( m_response )
+ delete m_response;
+
+ // make sure we're connected
+ if ( !EnsureSocketConnection() )
+ return false;
+
+ // fetch header, up until double new line
+ string responseHeader;
+ do {
+ // read line & append to full header
+ const string headerLine = m_socket->ReadLine();
+ responseHeader += headerLine;
+
+ } while ( !endsWith(responseHeader, DOUBLE_NEWLINE) );
+
+ // sanity check
+ if ( responseHeader.empty() ) {
+ // TODO: set error string
+ Close();
+ return false;
+ }
+
+ // create response from header text
+ m_response = new HttpResponseHeader(responseHeader);
+ if ( !m_response->IsValid() ) {
+ // TODO: set error string
+ Close();
+ return false;
+ }
+
+ // if we got range response as requested
+ if ( m_response->GetStatusCode() == 206 )
+ return true;
+
+ // if we got the full file contents instead of range
+ else if ( m_response->GetStatusCode() == 200 ) {
+
+ // skip up to current file position
+ RaiiBuffer tmp(0x8000);
+ int64_t numBytesRead = 0;
+ while ( numBytesRead < m_filePosition ) {
+ int64_t result = ReadFromSocket(tmp.Buffer, 0x8000);
+ if ( result < 0 ) {
+ Close();
+ return false;
+ }
+ numBytesRead += result;
+ }
+
+ // return success
+ return true;
+ }
+
+ // on any other reponse status
+ // TODO: set error string
+ Close();
+ return false;
+}
+
+bool BamHttp::Seek(const int64_t& position, const int origin) {
+
+ // if HTTP device not in a valid state
+ if ( !IsOpen() ) {
+ // TODO: set error string
+ return false;
+ }
+
+ // discard socket's buffer contents, update positions, & return success
+ m_socket->ClearBuffer();
+
+ if ( origin == SEEK_CUR )
+ m_filePosition += position;
+ else if ( origin == SEEK_SET )
+ m_filePosition = position;
+ else {
+ // TODO: set error string
+ return false;
+ }
+ m_endRangeFilePosition = m_filePosition;
+ return true;
+}
+
+bool BamHttp::SendRequest(const size_t numBytes) {
+
+ // remove any currently active request
+ if ( m_request )
+ delete m_request;
+
+ // create range string
+ m_endRangeFilePosition = m_filePosition + numBytes;
+ stringstream range("");
+ range << BYTES_PREFIX << m_filePosition << '-' << m_endRangeFilePosition;
+
+ // make sure we're connected
+ if ( !EnsureSocketConnection() )
+ return false;
+
+ // create request
+ m_request = new HttpRequestHeader(GET_METHOD, m_filename);
+ m_request->SetField(HOST_HEADER, m_hostname);
+ m_request->SetField(RANGE_HEADER, range.str());
+
+ // write request to socket
+ const string requestHeader = m_request->ToString();
+ const size_t headerSize = requestHeader.size();
+ return ( WriteToSocket(requestHeader.c_str(), headerSize) == headerSize );
+}
+
+int64_t BamHttp::Tell(void) const {
+ return ( IsOpen() ? m_filePosition : -1 );
+}
+
+int64_t BamHttp::Write(const char* data, const unsigned int numBytes) {
+ (void)data;
+ (void)numBytes;
+ BT_ASSERT_X(false, "BamHttp::Write : write-mode not supported on this device");
+ SetErrorString("BamHttp::Write", "write-mode not supported on this device");
+ return -1;
+}
+
+int64_t BamHttp::WriteToSocket(const char* data, const unsigned int numBytes) {
+ if ( !m_socket->IsConnected() )
+ return -1;
+ m_socket->ClearBuffer();
+ return m_socket->Write(data, numBytes);
+}
--- /dev/null
+// ***************************************************************************
+// BamHttp_p.h (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 November 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides reading/writing of BAM files on HTTP server
+// ***************************************************************************
+
+#ifndef BAMHTTP_P_H
+#define BAMHTTP_P_H
+
+// -------------
+// W A R N I N G
+// -------------
+//
+// This file is not part of the BamTools API. It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include "api/IBamIODevice.h"
+#include <string>
+
+namespace BamTools {
+namespace Internal {
+
+class HttpRequestHeader;
+class HttpResponseHeader;
+class TcpSocket;
+
+class BamHttp : public IBamIODevice {
+
+ // ctor & dtor
+ public:
+ BamHttp(const std::string& url);
+ ~BamHttp(void);
+
+ // IBamIODevice implementation
+ public:
+ void Close(void);
+ bool IsOpen(void) const;
+ bool IsRandomAccess(void) const;
+ bool Open(const IBamIODevice::OpenMode mode);
+ int64_t Read(char* data, const unsigned int numBytes);
+ bool Seek(const int64_t& position, const int origin = SEEK_SET);
+ int64_t Tell(void) const;
+ int64_t Write(const char* data, const unsigned int numBytes);
+
+ // internal methods
+ private:
+ bool ConnectSocket(void);
+ bool EnsureSocketConnection(void);
+ void ParseUrl(const std::string& url);
+ int64_t ReadFromSocket(char* data, const unsigned int numBytes);
+ bool ReceiveResponse(void);
+ bool SendRequest(const size_t numBytes = 0);
+ int64_t WriteToSocket(const char* data, const unsigned int numBytes);
+
+ // data members
+ private:
+
+ // our main socket
+ TcpSocket* m_socket;
+
+ // our connection data
+ std::string m_hostname;
+ std::string m_port;
+ std::string m_filename;
+
+ // our last (active) request & response info
+ HttpRequestHeader* m_request;
+ HttpResponseHeader* m_response;
+
+ // internal state flags
+ bool m_isUrlParsed;
+
+ // file position
+ int64_t m_filePosition;
+ int64_t m_endRangeFilePosition;
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // BAMHTTP_P_H
--- /dev/null
+// ***************************************************************************
+// BamPipe_p.cpp (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 November 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides BAM pipe-specific IO behavior
+// ***************************************************************************
+
+#include "api/internal/io/BamPipe_p.h"
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+#include <cstdio>
+#include <iostream>
+using namespace std;
+
+BamPipe::BamPipe(void) : ILocalIODevice() { }
+
+BamPipe::~BamPipe(void) { }
+
+bool BamPipe::IsRandomAccess(void) const {
+ return false;
+}
+
+bool BamPipe::Open(const IBamIODevice::OpenMode mode) {
+
+ // make sure we're starting with a fresh pipe
+ Close();
+
+ // open stdin/stdout depending on requested openmode
+ if ( mode == IBamIODevice::ReadOnly )
+ m_stream = freopen(0, "rb", stdin);
+ else if ( mode == IBamIODevice::WriteOnly )
+ m_stream = freopen(0, "wb", stdout);
+ else {
+ const string errorType = string( (mode == IBamIODevice::ReadWrite) ? "unsupported"
+ : "unknown" );
+ const string message = errorType + " open mode requested";
+ SetErrorString("BamPipe::Open", message);
+ return false;
+ }
+
+ // check that we obtained a valid FILE*
+ if ( m_stream == 0 ) {
+ const string message_base = string("could not open handle on ");
+ const string message = message_base + ( (mode == IBamIODevice::ReadOnly) ? "stdin"
+ : "stdout" );
+ SetErrorString("BamPipe::Open", message);
+ return false;
+ }
+
+ // store current IO mode & return success
+ m_mode = mode;
+ return true;
+}
+
+bool BamPipe::Seek(const int64_t&, const int) {
+ SetErrorString("BamPipe::Seek", "random access not allowed in FIFO pipe");
+ return false;
+}
--- /dev/null
+// ***************************************************************************
+// BamPipe_p.h (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 November 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides BAM pipe-specific IO behavior
+// ***************************************************************************
+
+#ifndef BAMPIPE_P_H
+#define BAMPIPE_P_H
+
+// -------------
+// W A R N I N G
+// -------------
+//
+// This file is not part of the BamTools API. It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include "api/internal/io/ILocalIODevice_p.h"
+#include <string>
+
+namespace BamTools {
+namespace Internal {
+
+class BamPipe : public ILocalIODevice {
+
+ // ctor & dtor
+ public:
+ BamPipe(void);
+ ~BamPipe(void);
+
+ // IBamIODevice implementation
+ public:
+ bool IsRandomAccess(void) const;
+ bool Open(const IBamIODevice::OpenMode mode);
+ bool Seek(const int64_t& position, const int origin = SEEK_SET);
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // BAMPIPE_P_H
--- /dev/null
+// ***************************************************************************
+// BgzfStream_p.cpp (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 25 October 2011(DB)
+// ---------------------------------------------------------------------------
+// Based on BGZF routines developed at the Broad Institute.
+// Provides the basic functionality for reading & writing BGZF files
+// Replaces the old BGZF.* files to avoid clashing with other toolkits
+// ***************************************************************************
+
+#include "api/BamAux.h"
+#include "api/BamConstants.h"
+#include "api/internal/io/BamDeviceFactory_p.h"
+#include "api/internal/io/BgzfStream_p.h"
+#include "api/internal/utils/BamException_p.h"
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+#include "zlib.h"
+
+#include <cstring>
+#include <algorithm>
+#include <iostream>
+#include <sstream>
+using namespace std;
+
+// ---------------------------
+// BgzfStream implementation
+// ---------------------------
+
+// constructor
+BgzfStream::BgzfStream(void)
+ : m_blockLength(0)
+ , m_blockOffset(0)
+ , m_blockAddress(0)
+ , m_isWriteCompressed(true)
+ , m_device(0)
+ , m_uncompressedBlock(Constants::BGZF_DEFAULT_BLOCK_SIZE)
+ , m_compressedBlock(Constants::BGZF_MAX_BLOCK_SIZE)
+{ }
+
+// destructor
+BgzfStream::~BgzfStream(void) {
+ Close();
+}
+
+// checks BGZF block header
+bool BgzfStream::CheckBlockHeader(char* header) {
+ return (header[0] == Constants::GZIP_ID1 &&
+ header[1] == Constants::GZIP_ID2 &&
+ header[2] == Z_DEFLATED &&
+ (header[3] & Constants::FLG_FEXTRA) != 0 &&
+ BamTools::UnpackUnsignedShort(&header[10]) == Constants::BGZF_XLEN &&
+ header[12] == Constants::BGZF_ID1 &&
+ header[13] == Constants::BGZF_ID2 &&
+ BamTools::UnpackUnsignedShort(&header[14]) == Constants::BGZF_LEN );
+}
+
+// closes BGZF file
+void BgzfStream::Close(void) {
+
+ // skip if no device open
+ if ( m_device == 0 ) return;
+
+ // if writing to file, flush the current BGZF block,
+ // then write an empty block (as EOF marker)
+ if ( m_device->IsOpen() && (m_device->Mode() == IBamIODevice::WriteOnly) ) {
+ FlushBlock();
+ const size_t blockLength = DeflateBlock();
+ m_device->Write(m_compressedBlock.Buffer, blockLength);
+ }
+
+ // close device
+ m_device->Close();
+ delete m_device;
+ m_device = 0;
+
+ // ensure our buffers are cleared out
+ m_uncompressedBlock.Clear();
+ m_compressedBlock.Clear();
+
+ // reset state
+ m_blockLength = 0;
+ m_blockOffset = 0;
+ m_blockAddress = 0;
+ m_isWriteCompressed = true;
+}
+
+// compresses the current block
+size_t BgzfStream::DeflateBlock(void) {
+
+ // initialize the gzip header
+ char* buffer = m_compressedBlock.Buffer;
+ memset(buffer, 0, 18);
+ buffer[0] = Constants::GZIP_ID1;
+ buffer[1] = Constants::GZIP_ID2;
+ buffer[2] = Constants::CM_DEFLATE;
+ buffer[3] = Constants::FLG_FEXTRA;
+ buffer[9] = Constants::OS_UNKNOWN;
+ buffer[10] = Constants::BGZF_XLEN;
+ buffer[12] = Constants::BGZF_ID1;
+ buffer[13] = Constants::BGZF_ID2;
+ buffer[14] = Constants::BGZF_LEN;
+
+ // set compression level
+ const int compressionLevel = ( m_isWriteCompressed ? Z_DEFAULT_COMPRESSION : 0 );
+
+ // loop to retry for blocks that do not compress enough
+ int inputLength = m_blockOffset;
+ size_t compressedLength = 0;
+ const unsigned int bufferSize = Constants::BGZF_MAX_BLOCK_SIZE;
+
+ while ( true ) {
+
+ // initialize zstream values
+ z_stream zs;
+ zs.zalloc = NULL;
+ zs.zfree = NULL;
+ zs.next_in = (Bytef*)m_uncompressedBlock.Buffer;
+ zs.avail_in = inputLength;
+ zs.next_out = (Bytef*)&buffer[Constants::BGZF_BLOCK_HEADER_LENGTH];
+ zs.avail_out = bufferSize -
+ Constants::BGZF_BLOCK_HEADER_LENGTH -
+ Constants::BGZF_BLOCK_FOOTER_LENGTH;
+
+ // initialize the zlib compression algorithm
+ int status = deflateInit2(&zs,
+ compressionLevel,
+ Z_DEFLATED,
+ Constants::GZIP_WINDOW_BITS,
+ Constants::Z_DEFAULT_MEM_LEVEL,
+ Z_DEFAULT_STRATEGY);
+ if ( status != Z_OK )
+ throw BamException("BgzfStream::DeflateBlock", "zlib deflateInit2 failed");
+
+ // compress the data
+ status = deflate(&zs, Z_FINISH);
+
+ // if not at stream end
+ if ( status != Z_STREAM_END ) {
+
+ deflateEnd(&zs);
+
+ // there was not enough space available in buffer
+ // try to reduce the input length & re-start loop
+ if ( status == Z_OK ) {
+ inputLength -= 1024;
+ if ( inputLength < 0 )
+ throw BamException("BgzfStream::DeflateBlock", "input reduction failed");
+ continue;
+ }
+
+ throw BamException("BgzfStream::DeflateBlock", "zlib deflate failed");
+ }
+
+ // finalize the compression routine
+ status = deflateEnd(&zs);
+ if ( status != Z_OK )
+ throw BamException("BgzfStream::DeflateBlock", "zlib deflateEnd failed");
+
+ // update compressedLength
+ compressedLength = zs.total_out +
+ Constants::BGZF_BLOCK_HEADER_LENGTH +
+ Constants::BGZF_BLOCK_FOOTER_LENGTH;
+ if ( compressedLength > Constants::BGZF_MAX_BLOCK_SIZE )
+ throw BamException("BgzfStream::DeflateBlock", "deflate overflow");
+
+ // quit while loop
+ break;
+ }
+
+ // store the compressed length
+ BamTools::PackUnsignedShort(&buffer[16], static_cast<uint16_t>(compressedLength - 1));
+
+ // store the CRC32 checksum
+ uint32_t crc = crc32(0, NULL, 0);
+ crc = crc32(crc, (Bytef*)m_uncompressedBlock.Buffer, inputLength);
+ BamTools::PackUnsignedInt(&buffer[compressedLength - 8], crc);
+ BamTools::PackUnsignedInt(&buffer[compressedLength - 4], inputLength);
+
+ // ensure that we have less than a block of data left
+ int remaining = m_blockOffset - inputLength;
+ if ( remaining > 0 ) {
+ if ( remaining > inputLength )
+ throw BamException("BgzfStream::DeflateBlock", "after deflate, remainder too large");
+ memcpy(m_uncompressedBlock.Buffer, m_uncompressedBlock.Buffer + inputLength, remaining);
+ }
+
+ // update block data
+ m_blockOffset = remaining;
+
+ // return result
+ return compressedLength;
+}
+
+// flushes the data in the BGZF block
+void BgzfStream::FlushBlock(void) {
+
+ BT_ASSERT_X( m_device, "BgzfStream::FlushBlock() - attempting to flush to null device" );
+
+ // flush all of the remaining blocks
+ while ( m_blockOffset > 0 ) {
+
+ // compress the data block
+ const size_t blockLength = DeflateBlock();
+
+ // flush the data to our output device
+ const int64_t numBytesWritten = m_device->Write(m_compressedBlock.Buffer, blockLength);
+
+ // check for device error
+ if ( numBytesWritten < 0 ) {
+ const string message = string("device error: ") + m_device->GetErrorString();
+ throw BamException("BgzfStream::FlushBlock", message);
+ }
+
+ // check that we wrote expected numBytes
+ if ( numBytesWritten != static_cast<int64_t>(blockLength) ) {
+ stringstream s("");
+ s << "expected to write " << blockLength
+ << " bytes during flushing, but wrote " << numBytesWritten;
+ throw BamException("BgzfStream::FlushBlock", s.str());
+ }
+
+ // update block data
+ m_blockAddress += blockLength;
+ }
+}
+
+// decompresses the current block
+size_t BgzfStream::InflateBlock(const size_t& blockLength) {
+
+ // setup zlib stream object
+ z_stream zs;
+ zs.zalloc = NULL;
+ zs.zfree = NULL;
+ zs.next_in = (Bytef*)m_compressedBlock.Buffer + 18;
+ zs.avail_in = blockLength - 16;
+ zs.next_out = (Bytef*)m_uncompressedBlock.Buffer;
+ zs.avail_out = Constants::BGZF_DEFAULT_BLOCK_SIZE;
+
+ // initialize
+ int status = inflateInit2(&zs, Constants::GZIP_WINDOW_BITS);
+ if ( status != Z_OK )
+ throw BamException("BgzfStream::InflateBlock", "zlib inflateInit failed");
+
+ // decompress
+ status = inflate(&zs, Z_FINISH);
+ if ( status != Z_STREAM_END ) {
+ inflateEnd(&zs);
+ throw BamException("BgzfStream::InflateBlock", "zlib inflate failed");
+ }
+
+ // finalize
+ status = inflateEnd(&zs);
+ if ( status != Z_OK ) {
+ inflateEnd(&zs);
+ throw BamException("BgzfStream::InflateBlock", "zlib inflateEnd failed");
+ }
+
+ // return result
+ return zs.total_out;
+}
+
+bool BgzfStream::IsOpen(void) const {
+ if ( m_device == 0 )
+ return false;
+ return m_device->IsOpen();
+}
+
+void BgzfStream::Open(const string& filename, const IBamIODevice::OpenMode mode) {
+
+ // close current device if necessary
+ Close();
+ BT_ASSERT_X( (m_device == 0), "BgzfStream::Open() - unable to properly close previous IO device" );
+
+ // retrieve new IO device depending on filename
+ m_device = BamDeviceFactory::CreateDevice(filename);
+ BT_ASSERT_X( m_device, "BgzfStream::Open() - unable to create IO device from filename" );
+
+ // if device fails to open
+ if ( !m_device->Open(mode) ) {
+ const string deviceError = m_device->GetErrorString();
+ const string message = string("could not open BGZF stream: \n\t") + deviceError;
+ throw BamException("BgzfStream::Open", message);
+ }
+}
+
+// reads BGZF data into a byte buffer
+size_t BgzfStream::Read(char* data, const size_t dataLength) {
+
+ if ( dataLength == 0 )
+ return 0;
+
+ // if stream not open for reading
+ BT_ASSERT_X( m_device, "BgzfStream::Read() - trying to read from null device");
+ if ( !m_device->IsOpen() || (m_device->Mode() != IBamIODevice::ReadOnly) )
+ return 0;
+
+ // read blocks as needed until desired data length is retrieved
+ char* output = data;
+ size_t numBytesRead = 0;
+ while ( numBytesRead < dataLength ) {
+
+ // determine bytes available in current block
+ int bytesAvailable = m_blockLength - m_blockOffset;
+
+ // read (and decompress) next block if needed
+ if ( bytesAvailable <= 0 ) {
+ ReadBlock();
+ bytesAvailable = m_blockLength - m_blockOffset;
+ if ( bytesAvailable <= 0 )
+ break;
+ }
+
+ // copy data from uncompressed source buffer into data destination buffer
+ const size_t copyLength = min( (dataLength-numBytesRead), (size_t)bytesAvailable );
+ memcpy(output, m_uncompressedBlock.Buffer + m_blockOffset, copyLength);
+
+ // update counters
+ m_blockOffset += copyLength;
+ output += copyLength;
+ numBytesRead += copyLength;
+ }
+
+ // update block data
+ if ( m_blockOffset == m_blockLength ) {
+ m_blockAddress = m_device->Tell();
+ m_blockOffset = 0;
+ m_blockLength = 0;
+
+ }
+
+ // return actual number of bytes read
+ return numBytesRead;
+}
+
+// reads a BGZF block
+void BgzfStream::ReadBlock(void) {
+
+ BT_ASSERT_X( m_device, "BgzfStream::ReadBlock() - trying to read from null IO device");
+
+ // store block's starting address
+ int64_t blockAddress = m_device->Tell();
+
+ // read block header from file
+ char header[Constants::BGZF_BLOCK_HEADER_LENGTH];
+ int64_t numBytesRead = m_device->Read(header, Constants::BGZF_BLOCK_HEADER_LENGTH);
+
+ // check for device error
+ if ( numBytesRead < 0 ) {
+ const string message = string("device error: ") + m_device->GetErrorString();
+ throw BamException("BgzfStream::ReadBlock", message);
+ }
+
+ // if block header empty
+ if ( numBytesRead == 0 ) {
+ m_blockLength = 0;
+ return;
+ }
+
+ // if block header invalid size
+ if ( numBytesRead != static_cast<int8_t>(Constants::BGZF_BLOCK_HEADER_LENGTH) )
+ throw BamException("BgzfStream::ReadBlock", "invalid block header size");
+
+ // validate block header contents
+ if ( !BgzfStream::CheckBlockHeader(header) )
+ throw BamException("BgzfStream::ReadBlock", "invalid block header contents");
+
+ // copy header contents to compressed buffer
+ const size_t blockLength = BamTools::UnpackUnsignedShort(&header[16]) + 1;
+ memcpy(m_compressedBlock.Buffer, header, Constants::BGZF_BLOCK_HEADER_LENGTH);
+
+ // read remainder of block
+ const size_t remaining = blockLength - Constants::BGZF_BLOCK_HEADER_LENGTH;
+ numBytesRead = m_device->Read(&m_compressedBlock.Buffer[Constants::BGZF_BLOCK_HEADER_LENGTH], remaining);
+
+ // check for device error
+ if ( numBytesRead < 0 ) {
+ const string message = string("device error: ") + m_device->GetErrorString();
+ throw BamException("BgzfStream::ReadBlock", message);
+ }
+
+ // check that we read in expected numBytes
+ if ( numBytesRead != static_cast<int64_t>(remaining) )
+ throw BamException("BgzfStream::ReadBlock", "could not read data from block");
+
+ // decompress block data
+ const size_t newBlockLength = InflateBlock(blockLength);
+
+ // update block data
+ if ( m_blockLength != 0 )
+ m_blockOffset = 0;
+ m_blockAddress = blockAddress;
+ m_blockLength = newBlockLength;
+}
+
+// seek to position in BGZF file
+void BgzfStream::Seek(const int64_t& position) {
+
+ BT_ASSERT_X( m_device, "BgzfStream::Seek() - trying to seek on null IO device");
+
+ // skip if device is not open
+ if ( !IsOpen() ) return;
+
+ // determine adjusted offset & address
+ int blockOffset = (position & 0xFFFF);
+ int64_t blockAddress = (position >> 16) & 0xFFFFFFFFFFFFLL;
+
+ // attempt seek in file
+ if ( m_device->IsRandomAccess() && m_device->Seek(blockAddress) ) {
+
+ // update block data & return success
+ m_blockLength = 0;
+ m_blockAddress = blockAddress;
+ m_blockOffset = blockOffset;
+ }
+ else {
+ stringstream s("");
+ s << "unable to seek to position: " << position;
+ throw BamException("BgzfStream::Seek", s.str());
+ }
+}
+
+void BgzfStream::SetWriteCompressed(bool ok) {
+ m_isWriteCompressed = ok;
+}
+
+// get file position in BGZF file
+int64_t BgzfStream::Tell(void) const {
+ if ( !IsOpen() )
+ return 0;
+ return ( (m_blockAddress << 16) | (m_blockOffset & 0xFFFF) );
+}
+
+// writes the supplied data into the BGZF buffer
+size_t BgzfStream::Write(const char* data, const size_t dataLength) {
+
+ BT_ASSERT_X( m_device, "BgzfStream::Write() - trying to write to null IO device");
+ BT_ASSERT_X( (m_device->Mode() == IBamIODevice::WriteOnly),
+ "BgzfStream::Write() - trying to write to non-writable IO device");
+
+ // skip if file not open for writing
+ if ( !IsOpen() )
+ return 0;
+
+ // write blocks as needed til all data is written
+ size_t numBytesWritten = 0;
+ const char* input = data;
+ const size_t blockLength = Constants::BGZF_DEFAULT_BLOCK_SIZE;
+ while ( numBytesWritten < dataLength ) {
+
+ // copy data contents to uncompressed output buffer
+ unsigned int copyLength = min(blockLength - m_blockOffset, dataLength - numBytesWritten);
+ char* buffer = m_uncompressedBlock.Buffer;
+ memcpy(buffer + m_blockOffset, input, copyLength);
+
+ // update counter
+ m_blockOffset += copyLength;
+ input += copyLength;
+ numBytesWritten += copyLength;
+
+ // flush (& compress) output buffer when full
+ if ( m_blockOffset == blockLength )
+ FlushBlock();
+ }
+
+ // return actual number of bytes written
+ return numBytesWritten;
+}
--- /dev/null
+// ***************************************************************************
+// BgzfStream_p.h (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 25 October 2011(DB)
+// ---------------------------------------------------------------------------
+// Based on BGZF routines developed at the Broad Institute.
+// Provides the basic functionality for reading & writing BGZF files
+// Replaces the old BGZF.* files to avoid clashing with other toolkits
+// ***************************************************************************
+
+#ifndef BGZFSTREAM_P_H
+#define BGZFSTREAM_P_H
+
+// -------------
+// W A R N I N G
+// -------------
+//
+// This file is not part of the BamTools API. It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include "api/api_global.h"
+#include "api/BamAux.h"
+#include "api/IBamIODevice.h"
+#include <string>
+
+namespace BamTools {
+namespace Internal {
+
+class BgzfStream {
+
+ // constructor & destructor
+ public:
+ BgzfStream(void);
+ ~BgzfStream(void);
+
+ // main interface methods
+ public:
+ // closes BGZF file
+ void Close(void);
+ // returns true if BgzfStream open for IO
+ bool IsOpen(void) const;
+ // opens the BGZF file
+ void Open(const std::string& filename, const IBamIODevice::OpenMode mode);
+ // reads BGZF data into a byte buffer
+ size_t Read(char* data, const size_t dataLength);
+ // seek to position in BGZF file
+ void Seek(const int64_t& position);
+ // sets IO device (closes previous, if any, but does not attempt to open)
+ void SetIODevice(IBamIODevice* device);
+ // enable/disable compressed output
+ void SetWriteCompressed(bool ok);
+ // get file position in BGZF file
+ int64_t Tell(void) const;
+ // writes the supplied data into the BGZF buffer
+ size_t Write(const char* data, const size_t dataLength);
+
+ // internal methods
+ private:
+ // compresses the current block
+ size_t DeflateBlock(void);
+ // flushes the data in the BGZF block
+ void FlushBlock(void);
+ // de-compresses the current block
+ size_t InflateBlock(const size_t& blockLength);
+ // reads a BGZF block
+ void ReadBlock(void);
+
+ // static 'utility' methods
+ public:
+ // checks BGZF block header
+ static bool CheckBlockHeader(char* header);
+
+ // data members
+ public:
+ unsigned int m_blockLength;
+ unsigned int m_blockOffset;
+ uint64_t m_blockAddress;
+
+ bool m_isWriteCompressed;
+ IBamIODevice* m_device;
+
+ RaiiBuffer m_uncompressedBlock;
+ RaiiBuffer m_compressedBlock;
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // BGZFSTREAM_P_H
--- /dev/null
+// ***************************************************************************
+// ByteArray_p.cpp (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 November 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides a dynamic, variable-length byte buffer
+// ***************************************************************************
+
+#include "api/internal/io/ByteArray_p.h"
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+#include <cstdlib>
+#include <cstring>
+using namespace std;
+
+// --------------------------
+// ByteArray implementation
+// --------------------------
+
+ByteArray::ByteArray(void)
+ : m_data()
+{ }
+
+ByteArray::ByteArray(const string& value)
+ : m_data(value.begin(), value.end())
+{ }
+
+ByteArray::ByteArray(const vector<char>& value)
+ : m_data(value)
+{ }
+
+ByteArray::ByteArray(const char* value, size_t n) {
+ const string s(value, n);
+ m_data.assign(s.begin(), s.end());
+}
+
+ByteArray::ByteArray(const ByteArray& other)
+ : m_data(other.m_data)
+{ }
+
+ByteArray::~ByteArray(void) { }
+
+ByteArray& ByteArray::operator=(const ByteArray& other) {
+ m_data = other.m_data;
+ return *this;
+}
+
+void ByteArray::Clear(void) {
+ m_data.clear();
+}
+
+const char* ByteArray::ConstData(void) const {
+ return &m_data[0];
+}
+
+char* ByteArray::Data(void) {
+ return &m_data[0];
+}
+
+const char& ByteArray::operator[](size_t i) const {
+ return m_data[i];
+}
+
+char& ByteArray::operator[](size_t i) {
+ return m_data[i];
+}
+
+size_t ByteArray::IndexOf(const char c, const size_t from, const size_t to) const {
+ const size_t size = ( (to == 0 ) ? m_data.size() : to );
+ for ( size_t i = from; i < size; ++i ) {
+ if ( m_data.at(i) == c )
+ return i;
+ }
+ return m_data.size();
+}
+
+ByteArray& ByteArray::Remove(size_t from, size_t n) {
+
+ // if 'from' outside range, just return
+ const size_t originalSize = m_data.size();
+ if ( from >= originalSize )
+ return *this;
+
+ // if asked to clip from 'from' to end (or beyond), simply resize
+ if ( from + n >= originalSize )
+ Resize(from);
+
+ // otherwise, shift data & resize
+ else {
+ memmove( &m_data[from], &m_data[from+n], (originalSize-from-n) );
+ Resize(originalSize - n);
+ }
+
+ // return reference to modified byte array
+ return *this;
+}
+
+void ByteArray::Resize(size_t n) {
+ m_data.resize(n, 0);
+}
+
+size_t ByteArray::Size(void) const {
+ return m_data.size();
+}
+
+void ByteArray::Squeeze(void) {
+ vector<char> t(m_data);
+ t.swap(m_data);
+}
--- /dev/null
+// ***************************************************************************
+// ByteArray_p.h (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 November 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides a dynamic, variable-length byte buffer
+// ***************************************************************************
+
+#ifndef BYTEARRAY_P_H
+#define BYTEARRAY_P_H
+
+// -------------
+// W A R N I N G
+// -------------
+//
+// This file is not part of the BamTools API. It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include "api/api_global.h"
+#include <string>
+#include <vector>
+
+namespace BamTools {
+namespace Internal {
+
+// provides a wrapper around a byte vector
+class ByteArray {
+
+ // ctors & dtor
+ public:
+ ByteArray(void);
+ ByteArray(const std::string& value);
+ ByteArray(const std::vector<char>& value);
+ ByteArray(const char* value, size_t n);
+ ByteArray(const ByteArray& other);
+ ~ByteArray(void);
+
+ ByteArray& operator=(const ByteArray& other);
+
+ // ByteArray interface
+ public:
+
+ // data access
+ const char* ConstData(void) const;
+ char* Data(void);
+ const char& operator[](size_t i) const;
+ char& operator[](size_t i);
+
+ // byte array manipulation
+ void Clear(void);
+ size_t IndexOf(const char c, const size_t from = 0, const size_t to = 0) const;
+ ByteArray& Remove(size_t from, size_t n);
+ void Resize(size_t n);
+ size_t Size(void) const;
+ void Squeeze(void);
+
+ // data members
+ private:
+ std::vector<char> m_data;
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // BYTEARRAY_P_H
--- /dev/null
+# ==========================
+# BamTools CMakeLists.txt
+# (c) 2011 Derek Barnett
+#
+# src/api/internal/io
+# ==========================
+
+set ( InternalIODir "${InternalDir}/io" )
+
+#--------------------------
+# platform-independent IO
+#--------------------------
+set ( CommonIOSources
+ ${InternalIODir}/BamDeviceFactory_p.cpp
+ ${InternalIODir}/BamFile_p.cpp
+ ${InternalIODir}/BamFtp_p.cpp
+ ${InternalIODir}/BamHttp_p.cpp
+ ${InternalIODir}/BamPipe_p.cpp
+ ${InternalIODir}/BgzfStream_p.cpp
+ ${InternalIODir}/ByteArray_p.cpp
+ ${InternalIODir}/HostAddress_p.cpp
+ ${InternalIODir}/HostInfo_p.cpp
+ ${InternalIODir}/HttpHeader_p.cpp
+ ${InternalIODir}/ILocalIODevice_p.cpp
+ ${InternalIODir}/RollingBuffer_p.cpp
+ ${InternalIODir}/TcpSocket_p.cpp
+ ${InternalIODir}/TcpSocketEngine_p.cpp
+)
+
+#------------------------
+# platform-dependent IO
+#------------------------
+if ( _WIN32 )
+ set ( PlatformIOSources
+ ${InternalIODir}/TcpSocketEngine_win_p.cpp
+ )
+else ( _WIN32 )
+ set ( PlatformIOSources
+ ${InternalIODir}/TcpSocketEngine_unix_p.cpp
+ )
+endif ( _WIN32 )
+
+#---------------------------
+# make build-specific list
+#---------------------------
+set ( InternalIOSources
+ ${CommonIOSources}
+ ${PlatformIOSources}
+
+ PARENT_SCOPE # <-- leave this last
+)
+
--- /dev/null
+// ***************************************************************************
+// HostAddress_p.cpp (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 November 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides a generic IP address container
+// ***************************************************************************
+
+#include "api/internal/io/HostAddress_p.h"
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+#include <cctype>
+#include <cstdlib>
+#include <sstream>
+#include <vector>
+using namespace std;
+
+// ------------------------
+// static utility methods
+// ------------------------
+
+namespace BamTools {
+namespace Internal {
+
+// split a string into fields, on delimiter character
+static inline
+vector<string> Split(const string& source, char delim) {
+ stringstream ss(source);
+ string field;
+ vector<string> fields;
+ while ( getline(ss, field, delim) )
+ fields.push_back(field);
+ return fields;
+}
+
+// return number of occurrences of @pattern in @source
+static inline
+uint8_t CountHits(const string& source, const string& pattern) {
+
+ uint8_t count(0);
+ size_t found = source.find(pattern);
+ while ( found != string::npos ) {
+ ++count;
+ found = source.find(pattern, found+1);
+ }
+ return count;
+}
+
+static
+bool ParseIp4(const string& address, uint32_t& maybeIp4 ) {
+
+ // split IP address into string fields
+ vector<string> addressFields = Split(address, '.');
+ if ( addressFields.size() != 4 )
+ return false;
+
+ // convert each field to integer value
+ uint32_t ipv4(0);
+ for ( uint8_t i = 0; i < 4; ++i ) {
+
+ const string& field = addressFields.at(i);
+ const size_t fieldSize = field.size();
+ for ( size_t j = 0; j < fieldSize; ++j ) {
+ if ( !isdigit(field[j]) )
+ return false;
+ }
+
+ int value = atoi( addressFields.at(i).c_str() );
+ if ( value < 0 || value > 255 )
+ return false;
+
+ // append byte value
+ ipv4 <<= 8;
+ ipv4 += value;
+ }
+
+ // store 32-bit IP address & return success
+ maybeIp4 = ipv4;
+ return true;
+}
+
+static
+bool ParseIp6(const string& address, uint8_t* maybeIp6 ) {
+
+ string tmp = address;
+
+ // look for '%' char (if found, lop off that part of address)
+ // we're going to ignore any link-local zone index, for now at least
+ const size_t percentFound = tmp.rfind('%');
+ if ( percentFound != string::npos )
+ tmp = tmp.substr(0, percentFound);
+
+ // split IP address into string fields
+ vector<string> fields = Split(tmp, ':');
+ const uint8_t numFields = fields.size();
+ if ( numFields < 3 || numFields > 8 )
+ return false;
+
+ // get number of '::' separators
+ const uint8_t numColonColons = CountHits(tmp, "::");
+ if ( numFields == 8 && numColonColons > 1 )
+ return false;
+
+ // check valid IPv6 'compression'
+ // must be valid 'pure' IPv6 or mixed IPv4/6 notation
+ const size_t dotFound = tmp.find('.');
+ const bool isMixed = ( dotFound != string::npos );
+ if ( numColonColons != 1 && (numFields < (isMixed ? 7 : 8)) )
+ return false;
+
+ // iterate over provided fields
+ size_t index = 16;
+ size_t fillCount = 9 - numFields;
+ for ( int8_t i = numFields - 1; i >= 0; --i ) {
+ if ( index == 0 )
+ return false;
+ const string& field = fields.at(i);
+
+ // if field empty
+ if ( field.empty() ) {
+
+ // if last field empty
+ if ( i == numFields - 1 ) {
+ const string& previousField = fields.at(i-1);
+ if ( previousField.empty() )
+ return false;
+ maybeIp6[--index] = 0;
+ maybeIp6[--index] = 0;
+ }
+
+ // if first field empty
+ else if ( i == 0 ) {
+ // make sure ':' isn't first character
+ const string& nextField = fields.at(i+1);
+ if ( nextField.empty() ) return false;
+ maybeIp6[--index] = 0;
+ maybeIp6[--index] = 0;
+ }
+
+ // fill in 'compressed' 0s
+ else {
+ for ( uint8_t j = 0; j < fillCount; ++j ) {
+ if ( index == 0 ) return false;
+ maybeIp6[--index] = 0;
+ maybeIp6[--index] = 0;
+ }
+ }
+ }
+
+ // field has data
+ else {
+ uint32_t value = static_cast<uint32_t>( strtoul(field.c_str(), 0, 16) );
+
+ if ( value <= 0xffff ) {
+ maybeIp6[--index] = value & 0xff;
+ maybeIp6[--index] = (value >> 8) & 0xff;
+ }
+
+ // possible mixed IPv4/6 notation
+ else {
+
+ // mixed field must be last
+ if ( i != numFields - 1 )
+ return false;
+
+ // parse the IPv4 section
+ uint32_t maybeIp4;
+ if ( !ParseIp4(field, maybeIp4) )
+ return false;
+
+ // store IPv4 fields in IPv6 container
+ maybeIp6[--index] = maybeIp4 & 0xff;
+ maybeIp6[--index] = (maybeIp4 >> 8) & 0xff;
+ maybeIp6[--index] = (maybeIp4 >> 16) & 0xff;
+ maybeIp6[--index] = (maybeIp4 >> 24) & 0xff;
+ --fillCount;
+ }
+ }
+ }
+
+ // should have parsed OK, return success
+ return true;
+}
+
+} // namespace Internal
+} // namespace BamTools
+
+// ----------------------------
+// HostAddress implementation
+// ----------------------------
+
+HostAddress::HostAddress(void)
+ : m_protocol(HostAddress::UnknownNetworkProtocol)
+ , m_ip4Address(0)
+ , m_hasIpAddress(true)
+{ }
+
+HostAddress::HostAddress(const uint32_t ip4Address)
+ : m_protocol(HostAddress::UnknownNetworkProtocol)
+ , m_ip4Address(0)
+ , m_hasIpAddress(true)
+{
+ SetAddress(ip4Address);
+}
+
+HostAddress::HostAddress(const uint8_t* ip6Address)
+ : m_protocol(HostAddress::UnknownNetworkProtocol)
+ , m_ip4Address(0)
+ , m_hasIpAddress(true)
+{
+ SetAddress(ip6Address);
+}
+
+HostAddress::HostAddress(const IPv6Address& ip6Address)
+ : m_protocol(HostAddress::UnknownNetworkProtocol)
+ , m_ip4Address(0)
+ , m_hasIpAddress(true)
+{
+ SetAddress(ip6Address);
+}
+
+HostAddress::HostAddress(const std::string& address)
+ : m_protocol(HostAddress::UnknownNetworkProtocol)
+ , m_ip4Address(0)
+{
+ SetAddress(address);
+}
+
+HostAddress::HostAddress(const HostAddress& other)
+ : m_protocol(other.m_protocol)
+ , m_ip4Address(other.m_ip4Address)
+ , m_ip6Address(other.m_ip6Address)
+ , m_ipString(other.m_ipString)
+ , m_hasIpAddress(other.m_hasIpAddress)
+{ }
+
+HostAddress::~HostAddress(void) { }
+
+bool HostAddress::operator==(const HostAddress& other) const {
+
+ // if self is IPv4
+ if ( m_protocol == HostAddress::IPv4Protocol ) {
+ return ( other.m_protocol == HostAddress::IPv4Protocol &&
+ m_ip4Address == other.m_ip4Address
+ );
+ }
+
+ // if self is IPv6
+ else if ( m_protocol == HostAddress::IPv6Protocol ) {
+ return ( other.m_protocol == HostAddress::IPv6Protocol &&
+ memcmp(&m_ip6Address, &other.m_ip6Address, sizeof(IPv6Address)) == 0
+ );
+ }
+
+ // otherwise compare protocols
+ else return m_protocol == other.m_protocol;
+}
+
+bool HostAddress::operator<(const HostAddress& other) const {
+
+ // if self is IPv4
+ if ( m_protocol == HostAddress::IPv4Protocol ) {
+ if ( other.m_protocol == HostAddress::IPv4Protocol )
+ return m_ip4Address < m_ip4Address;
+ }
+
+ // if self is IPv6
+ else if ( m_protocol == HostAddress::IPv6Protocol ) {
+ if ( other.m_protocol == HostAddress::IPv6Protocol )
+ return (memcmp(&m_ip6Address, &other.m_ip6Address, sizeof(IPv6Address)) < 0);
+ }
+
+ // otherwise compare protocol types
+ return m_protocol < other.m_protocol;
+}
+
+void HostAddress::Clear(void) {
+
+ m_protocol = HostAddress::UnknownNetworkProtocol;
+ m_ip4Address = 0;
+ memset(&m_ip6Address, 0, sizeof(IPv6Address));
+ m_ipString.clear();
+
+ // this may feel funny, but cleared IP (equivalent to '0.0.0.0') is technically valid
+ // and that's not really what this flag is checking anyway
+ //
+ // this flag is false *iff* the string passed in is a 'plain-text' hostname (www.foo.bar)
+ m_hasIpAddress = true;
+}
+
+bool HostAddress::HasIPAddress(void) const {
+ return m_hasIpAddress;
+}
+
+bool HostAddress::IsNull(void) const {
+ return m_protocol == HostAddress::UnknownNetworkProtocol;
+}
+
+uint32_t HostAddress::GetIPv4Address(void) const {
+ return m_ip4Address;
+}
+
+IPv6Address HostAddress::GetIPv6Address(void) const {
+ return m_ip6Address;
+}
+
+std::string HostAddress::GetIPString(void) const {
+
+ stringstream ss("");
+
+ // IPv4 format
+ if ( m_protocol == HostAddress::IPv4Protocol ) {
+ ss << ( (m_ip4Address>>24) & 0xff ) << '.'
+ << ( (m_ip4Address>>16) & 0xff ) << '.'
+ << ( (m_ip4Address>> 8) & 0xff ) << '.'
+ << ( m_ip4Address & 0xff );
+
+ }
+
+ // IPv6 format
+ else if ( m_protocol == HostAddress::IPv6Protocol ) {
+ for ( uint8_t i = 0; i < 8; ++i ) {
+ if ( i != 0 )
+ ss << ':';
+ ss << hex << ( (uint16_t(m_ip6Address[2*i]) << 8) |
+ (uint16_t(m_ip6Address[2*i+1]))
+ );
+ }
+ }
+
+ // return result (empty string if unknown protocol)
+ return ss.str();
+}
+
+HostAddress::NetworkProtocol HostAddress::GetProtocol(void) const {
+ return m_protocol;
+}
+
+bool HostAddress::ParseAddress(void) {
+
+ // all IPv6 addresses should have a ':'
+ string s = m_ipString;
+ size_t found = s.find(':');
+ if ( found != string::npos ) {
+ // try parse IP6 address
+ uint8_t maybeIp6[16];
+ if ( ParseIp6(s, maybeIp6) ) {
+ SetAddress(maybeIp6);
+ m_protocol = HostAddress::IPv6Protocol;
+ return true;
+ }
+ }
+
+ // all IPv4 addresses should have a '.'
+ found = s.find('.');
+ if ( found != string::npos ) {
+ uint32_t maybeIp4(0);
+ if ( ParseIp4(s, maybeIp4) ) {
+ SetAddress(maybeIp4);
+ m_protocol = HostAddress::IPv4Protocol;
+ return true;
+ }
+ }
+
+ // else likely just a plain-text host name "www.foo.bar"
+ // will need to look up IP address info later
+ m_protocol = HostAddress::UnknownNetworkProtocol;
+ return false;
+}
+
+void HostAddress::SetAddress(const uint32_t ip4Address) {
+ m_ip4Address = ip4Address;
+ m_protocol = HostAddress::IPv4Protocol;
+ m_hasIpAddress = true;
+}
+
+void HostAddress::SetAddress(const uint8_t* ip6Address) {
+ for ( uint8_t i = 0; i < 16; ++i )
+ m_ip6Address[i] = ip6Address[i];
+ m_protocol = HostAddress::IPv6Protocol;
+ m_hasIpAddress = true;
+}
+
+void HostAddress::SetAddress(const IPv6Address& ip6Address) {
+ m_ip6Address = ip6Address;
+ m_ip4Address = 0;
+ m_protocol = HostAddress::IPv6Protocol;
+ m_hasIpAddress = true;
+}
+
+void HostAddress::SetAddress(const std::string& address) {
+ m_ipString = address;
+ m_hasIpAddress = ParseAddress();
+}
--- /dev/null
+// ***************************************************************************
+// HostAddress_p.h (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 November 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides a generic IP address container
+// ***************************************************************************
+
+#ifndef HOSTADDRESS_P_H
+#define HOSTADDRESS_P_H
+
+// -------------
+// W A R N I N G
+// -------------
+//
+// This file is not part of the BamTools API. It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include "api/api_global.h"
+#include <cstring>
+#include <string>
+
+namespace BamTools {
+namespace Internal {
+
+struct IPv6Address {
+
+ // ctor
+ inline IPv6Address(void) { memset(&data, 0, sizeof(uint8_t)*16); }
+
+ // data access (no bounds checking)
+ inline uint8_t& operator[](size_t index) { return data[index]; }
+ inline uint8_t operator[](size_t index) const { return data[index]; }
+
+ // data
+ uint8_t data[16];
+};
+
+class HostAddress {
+
+ // enums
+ public:
+ enum NetworkProtocol { UnknownNetworkProtocol = -1
+ , IPv4Protocol = 0
+ , IPv6Protocol
+ };
+
+ // ctors & dtor
+ public:
+ HostAddress(void);
+ explicit HostAddress(const uint32_t ip4Address);
+ explicit HostAddress(const uint8_t* ip6Address);
+ explicit HostAddress(const IPv6Address& ip6Address);
+ explicit HostAddress(const std::string& address);
+ HostAddress(const HostAddress& other);
+ ~HostAddress(void);
+
+ // HostAddress interface
+ public:
+ void Clear(void);
+ bool HasIPAddress(void) const; // returns whether string address could be converted to IP address
+ bool IsNull(void) const;
+
+ uint32_t GetIPv4Address(void) const;
+ IPv6Address GetIPv6Address(void) const;
+ std::string GetIPString(void) const;
+ HostAddress::NetworkProtocol GetProtocol(void) const;
+
+ void SetAddress(const uint32_t ip4Address);
+ void SetAddress(const uint8_t* ip6Address);
+ void SetAddress(const IPv6Address& ip6Address);
+ void SetAddress(const std::string& address);
+
+ // HostAddress comparison operators
+ public:
+ bool operator==(const HostAddress& other) const;
+ bool operator!=(const HostAddress& other) const { return !( operator==(other) ); }
+ bool operator<(const HostAddress& other) const;
+
+ // internal methods
+ private:
+ bool ParseAddress(void);
+
+ // data members
+ private:
+ HostAddress::NetworkProtocol m_protocol;
+ uint32_t m_ip4Address;
+ IPv6Address m_ip6Address;
+ std::string m_ipString;
+ bool m_hasIpAddress; // true until string passed in, then signifies whether string was an IP
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // HOSTADDRESS_P_H
--- /dev/null
+// ***************************************************************************
+// HostInfo_p.cpp (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 November 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides DNS lookup functionality for hostname & its discovered addresses
+// ***************************************************************************
+
+#include "api/internal/io/HostInfo_p.h"
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+// platorm-specifics
+#ifdef _WIN32
+# include "api/internal/io/NetWin_p.h"
+#else
+# include "api/internal/io/NetUnix_p.h"
+#endif
+
+// standard C++ includes
+#include <cstdlib>
+#include <cstring>
+#include <set>
+using namespace std;
+
+// -------------------------
+// HostInfo implementation
+// -------------------------
+
+HostInfo::HostInfo(void)
+ : m_error(HostInfo::NoError)
+{ }
+
+HostInfo::HostInfo(const HostInfo& other)
+ : m_hostName(other.m_hostName)
+ , m_addresses(other.m_addresses)
+ , m_error(other.m_error)
+ , m_errorString(other.m_errorString)
+{ }
+
+HostInfo::~HostInfo(void) { }
+
+vector<HostAddress> HostInfo::Addresses(void) const {
+ return m_addresses;
+}
+
+HostInfo::ErrorType HostInfo::GetError(void) const {
+ return m_error;
+}
+
+string HostInfo::GetErrorString(void) const {
+ return m_errorString;
+}
+
+string HostInfo::HostName(void) const {
+ return m_hostName;
+}
+
+void HostInfo::SetAddresses(const std::vector<HostAddress>& addresses) {
+ m_addresses = addresses;
+}
+
+void HostInfo::SetError(const HostInfo::ErrorType error) {
+ m_error = error;
+}
+
+void HostInfo::SetErrorString(const std::string& errorString) {
+ m_errorString = errorString;
+}
+
+void HostInfo::SetHostName(const string& name) {
+ m_hostName = name;
+}
+
+// ---------------------------------
+// HostInfo::Lookup(host, port)
+// - the real "heavy-lifter" here
+// ---------------------------------
+
+HostInfo HostInfo::Lookup(const string& hostname, const string& port) {
+
+ HostInfo result;
+ set<HostAddress> uniqueAddresses;
+
+#ifdef _WIN32
+ WindowsSockInit init;
+#endif
+
+ HostAddress address;
+ address.SetAddress(hostname);
+
+ // if hostname is an IP string ('0.0.0.0' or IPv6 format)
+ // do reverse lookup for host domain name
+ //
+ // TODO: might just remove this... not sure if proper 'hostname' from IP string is needed
+ //
+ // so far, haven't been able to successfully fetch a domain name with reverse DNS
+ // getnameinfo() on test sites just returns original IP string. BUT this is likely a rare
+ // case that client code tries to use an IP string and the connection should work fine
+ // anyway. GetHostName() just won't quite show what I was hoping for. :(
+ if ( address.HasIPAddress() ) {
+
+ const uint16_t portNum = static_cast<uint16_t>( atoi(port.c_str()) );
+
+ sockaddr_in sa4;
+ sockaddr_in6 sa6;
+ sockaddr* sa = 0;
+ BT_SOCKLEN_T saSize = 0;
+
+ // IPv4
+ if ( address.GetProtocol() == HostAddress::IPv4Protocol ) {
+ sa = (sockaddr*)&sa4;
+ saSize = sizeof(sa4);
+ memset(&sa4, 0, sizeof(sa4));
+ sa4.sin_family = AF_INET;
+ sa4.sin_addr.s_addr = htonl(address.GetIPv4Address());
+ sa4.sin_port = htons(portNum);
+ }
+
+ // IPv6
+ else if ( address.GetProtocol() == HostAddress::IPv4Protocol ){
+ sa = (sockaddr*)&sa6;
+ saSize = sizeof(sa6);
+ memset(&sa6, 0, sizeof(sa6));
+ sa6.sin6_family = AF_INET6;
+ memcpy(sa6.sin6_addr.s6_addr, address.GetIPv6Address().data, sizeof(sa6.sin6_addr.s6_addr));
+ sa6.sin6_port = htons(portNum);
+ }
+
+ // unknown (should be unreachable)
+ else BT_ASSERT_X(false, "HostInfo::Lookup: unknown network protocol");
+
+ // lookup name for IP
+ char hbuf[NI_MAXHOST];
+ char serv[NI_MAXSERV];
+ if ( sa && (getnameinfo(sa, saSize, hbuf, sizeof(hbuf), serv, sizeof(serv), 0) == 0) )
+ result.SetHostName(string(hbuf));
+
+ // if no domain name found, just use the original address's IP string
+ if ( result.HostName().empty() )
+ result.SetHostName(address.GetIPString());
+
+ // store address in HostInfo
+ uniqueAddresses.insert(address);
+ }
+
+ // otherwise, hostname is a domain name ('www.foo.bar')
+ // do 'normal' lookup
+ else {
+
+ // setup address lookup 'hints'
+ addrinfo hints;
+ memset(&hints, 0, sizeof(hints));
+ hints.ai_family = AF_UNSPEC; // allow either IPv4 or IPv6
+ hints.ai_socktype = SOCK_STREAM; // for TCP
+ hints.ai_protocol = IPPROTO_TCP;
+
+ // fetch addresses for requested hostname/port
+ addrinfo* res;
+ int status = getaddrinfo(hostname.c_str(), port.c_str(), &hints, &res );
+
+ // if everything OK
+ if ( status == 0 ) {
+
+ // iterate over all IP addresses found
+ addrinfo* p = res;
+ for ( ; p != NULL; p = p->ai_next ) {
+
+ // IPv4
+ if ( p->ai_family == AF_INET ) {
+ sockaddr_in* ipv4 = (sockaddr_in*)p->ai_addr;
+ HostAddress a( ntohl(ipv4->sin_addr.s_addr) );
+ uniqueAddresses.insert(a);
+ }
+
+ // IPv6
+ else if ( p->ai_family == AF_INET6 ) {
+ sockaddr_in6* ipv6 = (sockaddr_in6*)p->ai_addr;
+ HostAddress a(ipv6->sin6_addr.s6_addr);
+ uniqueAddresses.insert(a);
+ }
+ }
+
+ // if we iterated, but no addresses were stored
+ if ( uniqueAddresses.empty() && (p == NULL) ) {
+ result.SetError(HostInfo::UnknownError);
+ result.SetErrorString("HostInfo: unknown address types found");
+ }
+ }
+
+ // handle error cases
+ else if (
+#ifndef _WIN32
+ status == EAI_NONAME
+ || status == EAI_FAIL
+# ifdef EAI_NODATA
+ || status == EAI_NODATA // officially deprecated, but just in case we happen to hit it
+# endif // EAI_NODATA
+
+#else // _WIN32
+ WSAGetLastError() == WSAHOST_NOT_FOUND
+ || WSAGetLastError() == WSANO_DATA
+ || WSAGetLastError() == WSANO_RECOVERY
+#endif // _WIN32
+ )
+ {
+ result.SetError(HostInfo::HostNotFound);
+ result.SetErrorString("HostInfo: host not found");
+ }
+ else {
+ result.SetError(HostInfo::UnknownError);
+ result.SetErrorString("HostInfo: unknown error encountered");
+ }
+
+ // cleanup
+ freeaddrinfo(res);
+ }
+
+ // store fetched addresses (converting set -> vector) in result & return
+ result.SetAddresses( vector<HostAddress>(uniqueAddresses.begin(), uniqueAddresses.end()) );
+ return result;
+}
--- /dev/null
+// ***************************************************************************
+// HostInfo_p.h (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 November 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides DNS lookup functionality for hostname/IP addresses
+// ***************************************************************************
+
+#ifndef HOSTINFO_P_H
+#define HOSTINFO_P_H
+
+// -------------
+// W A R N I N G
+// -------------
+//
+// This file is not part of the BamTools API. It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include "api/internal/io/HostAddress_p.h"
+#include <string>
+#include <vector>
+
+namespace BamTools {
+namespace Internal {
+
+class HostInfo {
+
+ public:
+ enum ErrorType { NoError = 0
+ , HostNotFound
+ , UnknownError
+ };
+
+ // ctors & dtor
+ public:
+ HostInfo(void);
+ HostInfo(const HostInfo& other);
+ ~HostInfo(void);
+
+ // HostInfo interface
+ public:
+ std::string HostName(void) const;
+ void SetHostName(const std::string& name);
+
+ std::vector<HostAddress> Addresses(void) const;
+ void SetAddresses(const std::vector<HostAddress>& addresses);
+
+ HostInfo::ErrorType GetError(void) const;
+ std::string GetErrorString(void) const;
+
+ // internal methods
+ private:
+ void SetError(const HostInfo::ErrorType error);
+ void SetErrorString(const std::string& errorString);
+
+ // static methods
+ public:
+ static HostInfo Lookup(const std::string& hostname,
+ const std::string& port);
+
+ // data members
+ private:
+ std::string m_hostName;
+ std::vector<HostAddress> m_addresses;
+ HostInfo::ErrorType m_error;
+ std::string m_errorString;
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // HOSTINFO_P_H
--- /dev/null
+// ***************************************************************************
+// HttpHeader_p.cpp (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 November 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides a generic interface for parsing/generating HTTP headers, along
+// with specialized request & response header types
+// ***************************************************************************
+
+#include "api/internal/io/HttpHeader_p.h"
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+#include <cstdlib>
+#include <sstream>
+#include <vector>
+using namespace std;
+
+namespace BamTools {
+
+// -----------
+// constants
+// -----------
+
+namespace Constants {
+
+static const char CAR_RET_CHAR = '\r';
+static const char COLON_CHAR = ':';
+static const char DOT_CHAR = '.';
+static const char NEWLINE_CHAR = '\n';
+static const char SPACE_CHAR = ' ';
+static const char TAB_CHAR = '\t';
+
+static const string FIELD_NEWLINE = "\r\n";
+static const string FIELD_SEPARATOR = ": ";
+static const string HTTP_STRING = "HTTP/";
+
+} // namespace Constants
+
+// ------------------------
+// static utility methods
+// ------------------------
+
+namespace Internal {
+
+static inline
+bool IsSpace(const char c) {
+ const int n = static_cast<int>(c);
+ return ( n== 0 || (n <= 13 && n >= 9) );
+}
+
+// split on hitting single char delim
+static vector<string> Split(const string& source, const char delim) {
+ stringstream ss(source);
+ string field;
+ vector<string> fields;
+ while ( getline(ss, field, delim) )
+ fields.push_back(field);
+ return fields;
+}
+
+static string Trim(const string& source) {
+
+ // skip if empty string
+ if ( source.empty() )
+ return source;
+
+ // fetch string data
+ const char* s = source.data(); // ignoring null-term on purpose
+ const size_t size = source.size();
+ size_t start = 0;
+ size_t end = size-1;
+
+ // skip if no spaces at start or end
+ if ( !IsSpace(s[start]) && !IsSpace( s[end] ) )
+ return source;
+
+ // remove leading whitespace
+ while ( (start != end) && IsSpace(s[start]) )
+ ++start;
+
+ // remove trailing whitespace
+ if ( start <= end ) {
+ while ( end && IsSpace(s[end]) )
+ --end;
+ }
+
+ // return result
+ return string(s + start, (end-start) + 1);
+}
+
+} // namespace Internal
+} // namespace BamTools
+
+// ---------------------------
+// HttpHeader implementation
+// ---------------------------
+
+HttpHeader::HttpHeader(void)
+ : m_isValid(true)
+ , m_majorVersion(1)
+ , m_minorVersion(1)
+{ }
+
+HttpHeader::HttpHeader(const string& s)
+ : m_isValid(true)
+ , m_majorVersion(1)
+ , m_minorVersion(1)
+{
+ Parse(s);
+}
+
+HttpHeader::~HttpHeader(void) { }
+
+bool HttpHeader::ContainsKey(const string& key) const {
+ return ( m_fields.find(key) != m_fields.end() );
+}
+
+int HttpHeader::GetMajorVersion(void) const {
+ return m_majorVersion;
+}
+
+int HttpHeader::GetMinorVersion(void) const {
+ return m_minorVersion;
+}
+
+string HttpHeader::GetValue(const string& key) const {
+ if ( ContainsKey(key) )
+ return m_fields.at(key);
+ else return string();
+}
+
+bool HttpHeader::IsValid(void) const {
+ return m_isValid;
+}
+
+void HttpHeader::Parse(const string& s) {
+
+ // trim whitespace from input string
+ const string trimmed = Trim(s);
+
+ // split into list of header lines
+ vector<string> rawFields = Split(trimmed, Constants::NEWLINE_CHAR);
+
+ // prep our 'cleaned' fields container
+ vector<string> cleanFields;
+ cleanFields.reserve(rawFields.size());
+
+ // remove any empty fields and clean any trailing windows-style carriage returns ('\r')
+ vector<string>::iterator rawFieldIter = rawFields.begin();
+ vector<string>::iterator rawFieldEnd = rawFields.end();
+ for ( ; rawFieldIter != rawFieldEnd; ++rawFieldIter ) {
+ string& field = (*rawFieldIter);
+
+ // skip empty fields
+ if ( field.empty() )
+ continue;
+
+ // remove carriage returns
+ const size_t fieldSize = field.size();
+ if ( field[fieldSize-1] == Constants::CAR_RET_CHAR )
+ field.resize(fieldSize-1);
+
+ // store cleaned field
+ cleanFields.push_back(field);
+ }
+
+ // skip add'l processing if nothing here
+ if ( cleanFields.empty() )
+ return;
+
+ // parse header lines
+ int lineNumber = 0;
+ vector<string>::const_iterator fieldIter = cleanFields.begin();
+ vector<string>::const_iterator fieldEnd = cleanFields.end();
+ for ( ; fieldIter != fieldEnd; ++fieldIter, ++lineNumber ) {
+ if ( !ParseLine( (*fieldIter), lineNumber ) ) {
+ m_isValid = false;
+ return;
+ }
+ }
+}
+
+bool HttpHeader::ParseLine(const string& line, int) {
+
+ // find colon position, return failure if not found
+ const size_t colonFound = line.find(Constants::COLON_CHAR);
+ if ( colonFound == string::npos )
+ return false;
+
+ // store key/value (without leading/trailing whitespace) & return success
+ const string key = Trim(line.substr(0, colonFound));
+ const string value = Trim(line.substr(colonFound+1));
+ m_fields[key] = value;
+ return true;
+}
+
+void HttpHeader::RemoveField(const string& key) {
+ m_fields.erase(key);
+}
+
+void HttpHeader::SetField(const string& key, const string& value) {
+ m_fields[key] = value;
+}
+
+void HttpHeader::SetValid(bool ok) {
+ m_isValid = ok;
+}
+
+void HttpHeader::SetVersion(int major, int minor) {
+ m_majorVersion = major;
+ m_minorVersion = minor;
+}
+
+string HttpHeader::ToString(void) const {
+ string result("");
+ if ( m_isValid ) {
+ map<string, string>::const_iterator fieldIter = m_fields.begin();
+ map<string, string>::const_iterator fieldEnd = m_fields.end();
+ for ( ; fieldIter != fieldEnd; ++fieldIter ) {
+ const string& key = (*fieldIter).first;
+ const string& value = (*fieldIter).second;
+ const string& line = key + Constants::FIELD_SEPARATOR +
+ value + Constants::FIELD_NEWLINE;
+ result += line;
+ }
+ }
+ return result;
+}
+
+// ----------------------------------
+// HttpRequestHeader implementation
+// ----------------------------------
+
+HttpRequestHeader::HttpRequestHeader(const string& method,
+ const string& resource,
+ int majorVersion,
+ int minorVersion)
+ : HttpHeader()
+ , m_method(method)
+ , m_resource(resource)
+{
+ SetVersion(majorVersion, minorVersion);
+}
+
+HttpRequestHeader::~HttpRequestHeader(void) { }
+
+string HttpRequestHeader::GetMethod(void) const {
+ return m_method;
+}
+
+string HttpRequestHeader::GetResource(void) const {
+ return m_resource;
+}
+
+bool HttpRequestHeader::ParseLine(const string& line, int lineNumber) {
+
+ // if not 'request line', just let base class parse
+ if ( lineNumber != 0 )
+ return HttpHeader::ParseLine(line, lineNumber);
+
+ // fail if empty line
+ if ( line.empty() )
+ return false;
+
+ // walk through request line, storing positions
+ // GET /path/to/resource HTTP/1.1
+ // ^ ^^ ^^
+ const size_t foundMethod = line.find_first_not_of(Constants::SPACE_CHAR); // skip any leading whitespace
+ if ( foundMethod == string::npos ) return false;
+ const size_t foundFirstSpace = line.find(Constants::SPACE_CHAR, foundMethod+1);
+ if ( foundFirstSpace == string::npos ) return false;
+ const size_t foundResource = line.find_first_not_of(Constants::SPACE_CHAR, foundFirstSpace+1);
+ if ( foundResource == string::npos ) return false;
+ const size_t foundSecondSpace = line.find(Constants::SPACE_CHAR, foundResource+1);
+ if ( foundSecondSpace == string::npos ) return false;
+ const size_t foundVersion= line.find_first_not_of(Constants::SPACE_CHAR, foundSecondSpace+1);
+ if ( foundVersion == string::npos ) return false;
+
+ // parse out method & resource
+ m_method = line.substr(foundMethod, foundFirstSpace - foundMethod);
+ m_resource = line.substr(foundResource, foundSecondSpace - foundResource);
+
+ // parse out version numbers
+ const string temp = line.substr(foundVersion);
+ if ( (temp.find(Constants::HTTP_STRING) != 0) || (temp.size() != 8) )
+ return false;
+ const int major = static_cast<int>(temp.at(5) - '0');
+ const int minor = static_cast<int>(temp.at(7) - '0');
+ SetVersion(major, minor);
+
+ // if we get here, return success
+ return true;
+}
+
+string HttpRequestHeader::ToString(void) const {
+ stringstream request("");
+ request << m_method << Constants::SPACE_CHAR
+ << m_resource << Constants::SPACE_CHAR
+ << Constants::HTTP_STRING << GetMajorVersion() << Constants::DOT_CHAR << GetMinorVersion()
+ << Constants::FIELD_NEWLINE
+ << HttpHeader::ToString()
+ << Constants::FIELD_NEWLINE;
+ return request.str();
+}
+
+// -----------------------------------
+// HttpResponseHeader implementation
+// -----------------------------------
+
+HttpResponseHeader::HttpResponseHeader(const int statusCode,
+ const string& reason,
+ int majorVersion,
+ int minorVersion)
+
+ : HttpHeader()
+ , m_statusCode(statusCode)
+ , m_reason(reason)
+{
+ SetVersion(majorVersion, minorVersion);
+}
+
+HttpResponseHeader::HttpResponseHeader(const string& s)
+ : HttpHeader()
+ , m_statusCode(0)
+{
+ Parse(s);
+}
+
+HttpResponseHeader::~HttpResponseHeader(void) { }
+
+string HttpResponseHeader::GetReason(void) const {
+ return m_reason;
+}
+
+int HttpResponseHeader::GetStatusCode(void) const {
+ return m_statusCode;
+}
+
+bool HttpResponseHeader::ParseLine(const string& line, int lineNumber) {
+
+ // if not 'status line', just let base class
+ if ( lineNumber != 0 )
+ return HttpHeader::ParseLine(line, lineNumber);
+
+ // fail if empty line
+ if ( line.empty() )
+ return false;
+
+ // walk through status line, storing positions
+ // HTTP/1.1 200 OK
+ // ^ ^^ ^^
+
+ const size_t foundVersion = line.find_first_not_of(Constants::SPACE_CHAR); // skip any leading whitespace
+ if ( foundVersion == string::npos ) return false;
+ const size_t foundFirstSpace = line.find(Constants::SPACE_CHAR, foundVersion+1);
+ if ( foundFirstSpace == string::npos ) return false;
+ const size_t foundStatusCode = line.find_first_not_of(Constants::SPACE_CHAR, foundFirstSpace+1);
+ if ( foundStatusCode == string::npos ) return false;
+ const size_t foundSecondSpace = line.find(Constants::SPACE_CHAR, foundStatusCode+1);
+ if ( foundSecondSpace == string::npos ) return false;
+ const size_t foundReason= line.find_first_not_of(Constants::SPACE_CHAR, foundSecondSpace+1);
+ if ( foundReason == string::npos ) return false;
+
+ // parse version numbers
+ string temp = line.substr(foundVersion, foundFirstSpace - foundVersion);
+ if ( (temp.find(Constants::HTTP_STRING) != 0) || (temp.size() != 8) )
+ return false;
+ const int major = static_cast<int>(temp.at(5) - '0');
+ const int minor = static_cast<int>(temp.at(7) - '0');
+ SetVersion(major, minor);
+
+ // parse status code
+ temp = line.substr(foundStatusCode, foundSecondSpace - foundStatusCode);
+ if ( temp.size() != 3 ) return false;
+ m_statusCode = atoi( temp.c_str() );
+
+ // reason phrase should be everything else left
+ m_reason = line.substr(foundReason);
+
+ // if we get here, return success
+ return true;
+}
+
+string HttpResponseHeader::ToString(void) const {
+ stringstream response("");
+ response << Constants::HTTP_STRING << GetMajorVersion() << Constants::DOT_CHAR << GetMinorVersion()
+ << Constants::SPACE_CHAR << m_statusCode
+ << Constants::SPACE_CHAR << m_reason
+ << Constants::FIELD_NEWLINE
+ << HttpHeader::ToString()
+ << Constants::FIELD_NEWLINE;
+ return response.str();
+}
--- /dev/null
+// ***************************************************************************
+// HttpHeader_p.h (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 November 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides a generic interface for parsing/generating HTTP headers, along
+// with specialized request & response header types
+// ***************************************************************************
+
+#ifndef HTTP_HEADER_P_H
+#define HTTP_HEADER_P_H
+
+// -------------
+// W A R N I N G
+// -------------
+//
+// This file is not part of the BamTools API. It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include "api/api_global.h"
+#include <map>
+#include <string>
+
+namespace BamTools {
+namespace Internal {
+
+class HttpHeader {
+
+ // ctors & dtor
+ public:
+ HttpHeader(void);
+ HttpHeader(const std::string& s);
+ virtual ~HttpHeader(void);
+
+ // HttpHeader interface
+ public:
+
+ // header field=>value access
+ bool ContainsKey(const std::string& key) const;
+ std::string GetValue(const std::string& key) const;
+ void RemoveField(const std::string& key);
+ void SetField(const std::string& key, const std::string& value);
+
+ // get formatted header string
+ virtual std::string ToString(void) const;
+
+ // query HTTP version used
+ int GetMajorVersion(void) const;
+ int GetMinorVersion(void) const;
+
+ // see if header was parsed OK
+ bool IsValid(void) const;
+
+ // internal methods
+ protected:
+ void Parse(const std::string& s);
+ virtual bool ParseLine(const std::string& line, int lineNumber);
+ void SetValid(bool ok);
+ void SetVersion(int major, int minor);
+
+ // data members
+ private:
+ std::map<std::string, std::string> m_fields;
+
+ bool m_isValid; // should usually be true, only false if error processing a header line
+ int m_majorVersion;
+ int m_minorVersion;
+};
+
+class HttpRequestHeader : public HttpHeader {
+
+ // ctor & dtor
+ public:
+ HttpRequestHeader(const std::string& method, // "GET", "PUT", etc
+ const std::string& resource, // filename
+ int majorVersion = 1, // version info
+ int minorVersion = 1);
+ ~HttpRequestHeader(void);
+
+ // HttpRequestHeader interface
+ public:
+ std::string GetMethod(void) const;
+ std::string GetResource(void) const;
+
+ // HttpHeader implementation
+ public:
+ std::string ToString(void) const;
+ protected:
+ bool ParseLine(const std::string& line, int lineNumber);
+
+ // data members
+ private:
+ std::string m_method;
+ std::string m_resource;
+};
+
+class HttpResponseHeader : public HttpHeader {
+
+ // ctor & dtor
+ public:
+ HttpResponseHeader(const int statusCode, // 200, 404, etc
+ const std::string& reason = std::string(), // 'reason phrase' for code
+ int majorVersion = 1, // version info
+ int minorVersion = 1);
+ HttpResponseHeader(const std::string& s);
+ ~HttpResponseHeader(void);
+
+ // HttpRequestHeader interface
+ public:
+ std::string GetReason(void) const;
+ int GetStatusCode(void) const;
+
+ // HttpHeader implementation
+ public:
+ std::string ToString(void) const;
+ protected:
+ bool ParseLine(const std::string& line, int lineNumber);
+
+ // data members
+ private:
+ int m_statusCode;
+ std::string m_reason;
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // HTTP_HEADER_P_H
--- /dev/null
+// ***************************************************************************
+// ILocalIODevice_p.cpp (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 25 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides shared behavior for files & pipes
+// ***************************************************************************
+
+#include "api/internal/io/ILocalIODevice_p.h"
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+#include <cstdio>
+using namespace std;
+
+ILocalIODevice::ILocalIODevice(void)
+ : IBamIODevice()
+ , m_stream(0)
+{ }
+
+ILocalIODevice::~ILocalIODevice(void) {
+ Close();
+}
+
+void ILocalIODevice::Close(void) {
+
+ // skip if not open
+ if ( !IsOpen() )
+ return;
+
+ // flush & close FILE*
+ fflush(m_stream);
+ fclose(m_stream);
+ m_stream = 0;
+
+ // reset other device state
+ m_mode = IBamIODevice::NotOpen;
+}
+
+int64_t ILocalIODevice::Read(char* data, const unsigned int numBytes) {
+ BT_ASSERT_X( m_stream, "ILocalIODevice::Read: trying to read from null stream" );
+ BT_ASSERT_X( (m_mode == IBamIODevice::ReadOnly), "ILocalIODevice::Read: device not in read-only mode");
+ return static_cast<int64_t>( fread(data, sizeof(char), numBytes, m_stream) );
+}
+
+int64_t ILocalIODevice::Tell(void) const {
+ BT_ASSERT_X( m_stream, "ILocalIODevice::Tell: trying to get file position fromnull stream" );
+ return ftell64(m_stream);
+}
+
+int64_t ILocalIODevice::Write(const char* data, const unsigned int numBytes) {
+ BT_ASSERT_X( m_stream, "ILocalIODevice::Write: tryint to write to null stream" );
+ BT_ASSERT_X( (m_mode == IBamIODevice::WriteOnly), "ILocalIODevice::Write: device not in write-only mode" );
+ return static_cast<int64_t>( fwrite(data, sizeof(char), numBytes, m_stream) );
+}
--- /dev/null
+// ***************************************************************************
+// ILocalIODevice_p.h (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides shared behavior for files & pipes
+// ***************************************************************************
+
+#ifndef ILOCALIODEVICE_P_H
+#define ILOCALIODEVICE_P_H
+
+// -------------
+// W A R N I N G
+// -------------
+//
+// This file is not part of the BamTools API. It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include "api/IBamIODevice.h"
+
+namespace BamTools {
+namespace Internal {
+
+class ILocalIODevice : public IBamIODevice {
+
+ // ctor & dtor
+ public:
+ ILocalIODevice(void);
+ virtual ~ILocalIODevice(void);
+
+ // IBamIODevice implementation
+ public:
+ virtual void Close(void);
+ virtual int64_t Read(char* data, const unsigned int numBytes);
+ virtual int64_t Tell(void) const;
+ virtual int64_t Write(const char* data, const unsigned int numBytes);
+
+ // data members
+ protected:
+ FILE* m_stream;
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // ILOCALIODEVICE_P_H
--- /dev/null
+// ***************************************************************************
+// NetUnix_p.h (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 November 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides common networking-related includes, etc. for all UNIX-like systems
+// ***************************************************************************
+
+#ifndef NETUNIX_P_H
+#define NETUNIX_P_H
+
+// -------------
+// W A R N I N G
+// -------------
+//
+// This file is not part of the BamTools API. It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#ifndef _WIN32 // <-- source files only include the proper Net*_p.h, but this is a double-check
+
+#include <arpa/inet.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <netdb.h>
+#include <unistd.h>
+
+#ifndef BT_SOCKLEN_T
+# define BT_SOCKLEN_T socklen_t
+#endif
+
+#endif // _WIN32
+#endif // NETUNIX_P_H
--- /dev/null
+// ***************************************************************************
+// NetWin_p.h (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 November 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides common networking-related includes, etc. for Windows systems
+//
+// Note: only supports XP and later
+// ***************************************************************************
+
+#ifndef NETWIN_P_H
+#define NETWIN_P_H
+
+// -------------
+// W A R N I N G
+// -------------
+//
+// This file is not part of the BamTools API. It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#ifdef _WIN32 // <-- source files only include the proper Net*_p.h, but this is a double-check
+
+#include <winsock2.h> // <-- should bring 'windows.h' along with it
+#include <Ws2tcpip.h>
+
+#ifndef BT_SOCKLEN_T
+# define BT_SOCKLEN_T int
+#endif
+
+#ifdef _MSC_VER
+# pragma comment(lib, "ws2_32.lib")
+#endif
+
+namespace BamTools {
+namespace Internal {
+
+// use RAII to ensure WSA is en
+class WindowsSockInit {
+ public:
+ WindowsSockInit(void) {
+ WSAData wsadata;
+ WSAStartup(MAKEWORD(2,2), &wsadata); // catch error ?
+ }
+
+ ~WindowsSockInit(void) {
+ WSACleanup();
+ }
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // _WIN32
+
+#endif // NETWIN_P_H
+
--- /dev/null
+// ***************************************************************************
+// RollingBuffer_p.cpp (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 November 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides a dynamic I/O FIFO byte queue, which removes bytes as they are
+// read from the front of the buffer and grows to accept bytes being written
+// to buffer end.
+//
+// implementation note: basically a 'smart' wrapper around 1..* ByteArrays
+// ***************************************************************************
+
+#include "api/internal/io/RollingBuffer_p.h"
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+#include <climits>
+#include <cstring>
+#include <algorithm>
+#include <string>
+using namespace std;
+
+// ------------------------------
+// RollingBuffer implementation
+// ------------------------------
+
+RollingBuffer::RollingBuffer(size_t growth)
+ : m_bufferGrowth(growth)
+{
+ // buffer always contains at least 1 (maybe empty) byte array
+ m_data.push_back( ByteArray() );
+
+ // set cleared state
+ Clear();
+}
+
+RollingBuffer::~RollingBuffer(void) { }
+
+size_t RollingBuffer::BlockSize(void) const {
+
+ // if only one byte array in buffer <- needed?
+ if ( m_tailBufferIndex == 0 )
+ return m_tail - m_head;
+
+ // otherwise return remaining num bytes in first array
+ const ByteArray& first = m_data.front();
+ return first.Size() - m_head;
+}
+
+bool RollingBuffer::CanReadLine(void) const {
+ return IndexOf('\n') != string::npos;
+}
+
+void RollingBuffer::Chop(size_t n) {
+
+ // update buffer size
+ if ( n > m_totalBufferSize )
+ m_totalBufferSize = 0;
+ else
+ m_totalBufferSize -= n;
+
+ // loop until target case hit
+ for ( ; ; ) {
+
+ // if only one array, decrement tail
+ if ( m_tailBufferIndex == 0 ) {
+ m_tail -= n;
+
+ // if all data chopped
+ if ( m_tail <= m_head ) {
+ m_head = 0;
+ m_tail = 0;
+ }
+ return;
+ }
+
+ // if there's room in last byte array to 'chop', just decrement tail
+ if ( n <= m_tail ) {
+ m_tail -= n;
+ return;
+ }
+
+ // otherwise we're going to overlap our internal byte arrays
+ // reduce our chop amount by the amount of data in the last byte array
+ n -= m_tail;
+
+ // remove last byte array & set tail to it's end
+ m_data.pop_back();
+ --m_tailBufferIndex;
+ m_tail = m_data.at(m_tailBufferIndex).Size();
+ }
+
+ // if buffer is now empty, reset state & clear up memory
+ if ( IsEmpty() )
+ Clear();
+}
+
+void RollingBuffer::Clear(void) {
+
+ // remove all byte arrays (except first)
+ m_data.erase( m_data.begin()+1, m_data.end() );
+
+ // clear out first byte array
+ m_data[0].Resize(0);
+ m_data[0].Squeeze();
+
+ // reset index & size markers
+ m_head = 0;
+ m_tail = 0;
+ m_tailBufferIndex = 0;
+ m_totalBufferSize = 0;
+}
+
+void RollingBuffer::Free(size_t n) {
+
+ // update buffer size
+ if ( n > m_totalBufferSize )
+ m_totalBufferSize = 0;
+ else
+ m_totalBufferSize -= n;
+
+ // loop until target case hit
+ for ( ; ; ) {
+
+ const size_t blockSize = BlockSize();
+
+ // if there's room in current array
+ if ( n < blockSize ) {
+
+ // shift 'head' over @n bytes
+ m_head += n;
+
+ // check for emptied, single byte array
+ if ( m_head == m_tail && m_tailBufferIndex == 0 ) {
+ m_head = 0;
+ m_tail = 0;
+ }
+
+ break;
+ }
+
+ // otherwise we need to check next byte array
+ // first update amount to remove
+ n -= blockSize;
+
+ // special case - there was only 1 array
+ if ( m_data.size() == 1 ) {
+ if ( m_data.at(0).Size() != m_bufferGrowth )
+ m_data[0].Resize(m_bufferGrowth);
+ m_head = 0;
+ m_tail = 0;
+ m_tailBufferIndex = 0;
+ break;
+ }
+
+ // otherwise, remove first array and move to next iteration
+ m_data.pop_front();
+ --m_tailBufferIndex;
+ m_head = 0;
+ }
+
+ // if buffer is now empty, reset state & clear up memory
+ if ( IsEmpty() )
+ Clear();
+}
+
+size_t RollingBuffer::IndexOf(char c) const {
+
+ size_t index(0);
+
+ // iterate over byte arrays
+ const size_t numBuffers = m_data.size();
+ for ( size_t i = 0; i < numBuffers; ++i ) {
+ const ByteArray& current = m_data.at(i);
+
+ // if on first array, use head; else 0
+ const size_t start = ( (i==0) ? m_head : 0 );
+
+ // if on last array, set end; else use current byte array size
+ const size_t end = ( (i==m_tailBufferIndex) ? m_tail : current.Size());
+
+ // look through this iteration's byte array for @c
+ const char* p = current.ConstData()+start;
+ for ( size_t j = start; j < end; ++j ) {
+ if ( *p++ == c )
+ return index;
+ ++index;
+ }
+ }
+
+ // no match found
+ return string::npos;
+}
+
+bool RollingBuffer::IsEmpty(void) const {
+ return (m_tailBufferIndex == 0) && (m_tail == 0);
+}
+
+size_t RollingBuffer::Read(char* dest, size_t max) {
+
+ size_t bytesToRead = std::min(Size(), max);
+ size_t bytesReadSoFar = 0;
+
+ while ( bytesReadSoFar < bytesToRead ) {
+ const char* readPtr = ReadPointer();
+ size_t blockBytes = std::min( (bytesToRead - bytesReadSoFar), BlockSize() );
+ if ( dest )
+ memcpy(dest+bytesReadSoFar, readPtr, blockBytes);
+ bytesReadSoFar += blockBytes;
+ Free(blockBytes);
+ }
+
+ return bytesReadSoFar;
+}
+
+size_t RollingBuffer::ReadLine(char* dest, size_t max) {
+
+ // if we can't read line or if max is 0
+ if ( !CanReadLine() || max == 0 )
+ return 0;
+
+ // otherwise, read until we hit newline
+ size_t bytesReadSoFar = 0;
+ bool finished = false;
+ while ( !finished ) {
+
+ const size_t index = IndexOf('\n');
+ const char* readPtr = ReadPointer();
+ size_t bytesToRead = std::min( (index+1)-bytesReadSoFar, BlockSize() );
+ bytesToRead = std::min( bytesToRead, (max-1)-bytesReadSoFar );
+ memcpy(dest+bytesReadSoFar, readPtr, bytesToRead);
+ bytesReadSoFar += bytesToRead;
+ Free(bytesToRead);
+
+ if ( !((bytesReadSoFar < index+1)&&(bytesReadSoFar < max-1)) )
+ finished = true;
+ }
+
+ // null terminate 'dest' & return numBytesRead
+ dest[bytesReadSoFar] = '\0';
+ return bytesReadSoFar;
+}
+
+const char* RollingBuffer::ReadPointer(void) const {
+
+ // return null if empty buffer
+ if ( m_data.empty() )
+ return 0;
+
+ // otherwise return pointer to current position
+ const ByteArray& first = m_data.front();
+ return first.ConstData() + m_head;
+}
+
+char* RollingBuffer::Reserve(size_t n) {
+
+ // if empty buffer
+ if ( m_totalBufferSize == 0 ) {
+ m_data[0].Resize( std::max(m_bufferGrowth, n) );
+ m_totalBufferSize += n;
+ m_tail = n;
+ return m_data[m_tailBufferIndex].Data();
+ }
+
+ // increment buffer's byte count
+ m_totalBufferSize += n;
+
+ // if buffer already contains enough space to fit @n more bytes
+ if ( (m_tail + n) <= m_data.at(m_tailBufferIndex).Size() ) {
+
+ // fetch write pointer at current 'tail', increment tail by @n & return
+ char* ptr = m_data[m_tailBufferIndex].Data() + m_tail;
+ m_tail += n;
+ return ptr;
+ }
+
+ // if last byte array isn't half full
+ if ( m_tail < m_data.at(m_tailBufferIndex).Size()/2 ) {
+
+ // we'll allow simple resize
+ m_data[m_tailBufferIndex].Resize(m_tail + n);
+
+ // fetch write pointer at current 'tail', increment tail by @n & return
+ char* ptr = m_data[m_tailBufferIndex].Data() + m_tail;
+ m_tail += n;
+ return ptr;
+ }
+
+ // otherwise, shrink last byte array to current used size
+ m_data[m_tailBufferIndex].Resize(m_tail);
+
+ // then append new byte array
+ m_data.push_back( ByteArray() );
+ ++m_tailBufferIndex;
+ m_data[m_tailBufferIndex].Resize( std::max(m_bufferGrowth, n) );
+ m_tail = n;
+
+ // return write-able pointer on new array
+ return m_data[m_tailBufferIndex].Data();
+}
+
+size_t RollingBuffer::Size(void) const {
+ return m_totalBufferSize;
+}
+
+void RollingBuffer::Write(const char* src, size_t n) {
+ char* writePtr = Reserve(n);
+ memcpy(writePtr, src, n);
+}
--- /dev/null
+// ***************************************************************************
+// RollingBuffer_p.h (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 November 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides a dynamic I/O FIFO byte queue, which removes bytes as they are
+// read from the front of the buffer and grows to accept bytes being written
+// to buffer end.
+//
+// implementation note: basically a 'smart' wrapper around 1..* ByteArrays
+// ***************************************************************************
+
+#ifndef ROLLINGBUFFER_P_H
+#define ROLLINGBUFFER_P_H
+
+// -------------
+// W A R N I N G
+// -------------
+//
+// This file is not part of the BamTools API. It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include "api/api_global.h"
+#include "api/internal/io/ByteArray_p.h"
+#include <deque>
+#include <string>
+
+namespace BamTools {
+namespace Internal {
+
+class RollingBuffer {
+
+ // ctors & dtor
+ public:
+ RollingBuffer(size_t growth);
+ ~RollingBuffer(void);
+
+ // RollingBuffer interface
+ public:
+
+ // returns current buffer size
+ size_t BlockSize(void) const;
+ // checks buffer for new line
+ bool CanReadLine(void) const;
+ // frees @n bytes from end of buffer
+ void Chop(size_t n);
+ // clears entire buffer structure
+ void Clear(void);
+ // frees @n bytes from front of buffer
+ void Free(size_t n);
+ // checks buffer for @c
+ size_t IndexOf(char c) const;
+ // returns whether buffer contains data
+ bool IsEmpty(void) const;
+ // reads up to @maxLen bytes into @dest
+ // returns exactly how many bytes were read from buffer
+ size_t Read(char* dest, size_t max);
+ // reads until newline (or up to @maxLen bytes)
+ // returns exactly how many bytes were read from buffer
+ size_t ReadLine(char* dest, size_t max);
+
+ const char* ReadPointer(void) const; // returns a C-fxn compatible char* to byte data
+ char* Reserve(size_t n); // ensures that buffer contains space for @n incoming bytes, returns write-able char*
+ size_t Size(void) const; // returns current number of bytes stored in buffer
+ void Write(const char* src, size_t n); // reserves space for @n bytes, then appends contents of @src to buffer
+
+ // data members
+ private:
+ size_t m_head; // index into current data (next char)
+ size_t m_tail; // index into last data position
+ size_t m_tailBufferIndex; // m_data::size() - 1
+ size_t m_totalBufferSize; // total buffer size
+ size_t m_bufferGrowth; // new buffers are typically initialized with this size
+ std::deque<ByteArray> m_data; // basic 'buffer of buffers'
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // ROLLINGBUFFER_P_H
--- /dev/null
+// ***************************************************************************
+// TcpSocketEngine_p.cpp (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 November 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides low-level implementation of TCP I/O
+// ***************************************************************************
+
+// N.B. - this file contains the top-level, platform-independent logic. "Native" methods
+// are called as needed from the TcpSocketEngine_<X>.cpp files. Selection of the proper
+// native method file should have been handled at build-time by CMake.
+
+#include "api/internal/io/HostInfo_p.h"
+#include "api/internal/io/TcpSocketEngine_p.h"
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+TcpSocketEngine::TcpSocketEngine(void)
+ : m_socketDescriptor(-1)
+// , m_localPort(0)
+ , m_remotePort(0)
+ , m_socketError(TcpSocket::UnknownSocketError)
+ , m_socketState(TcpSocket::UnconnectedState)
+{ }
+
+TcpSocketEngine::TcpSocketEngine(const TcpSocketEngine& other)
+ : m_socketDescriptor(other.m_socketDescriptor)
+// , m_localAddress(other.m_localAddress)
+ , m_remoteAddress(other.m_remoteAddress)
+// , m_localPort(other.m_localPort)
+ , m_remotePort(other.m_remotePort)
+ , m_socketError(other.m_socketError)
+ , m_socketState(other.m_socketState)
+ , m_errorString(other.m_errorString)
+{ }
+
+TcpSocketEngine::~TcpSocketEngine(void) {
+ Close();
+}
+
+void TcpSocketEngine::Close(void) {
+
+ // close socket if we have valid FD
+ if ( m_socketDescriptor != -1 ) {
+ nativeClose();
+ m_socketDescriptor = -1;
+ }
+
+ // reset state
+ m_socketState = TcpSocket::UnconnectedState;
+// m_localAddress.Clear();
+ m_remoteAddress.Clear();
+// m_localPort = 0;
+ m_remotePort = 0;
+}
+
+bool TcpSocketEngine::Connect(const HostAddress& address, const uint16_t port) {
+
+ // return failure if invalid FD or already connected
+ if ( !IsValid() || (m_socketState == TcpSocket::ConnectedState) ) {
+ // TODO: set error string
+ return false;
+ }
+
+ // attempt to connect to host address on requested port
+ if ( !nativeConnect(address, port) ) {
+ // TODO: set error string
+ return false;
+ }
+
+ // if successful, store remote host address port & return success
+ // TODO: (later) fetch proxied remote & local host/port here
+ m_remoteAddress = address;
+ m_remotePort = port;
+ return true;
+}
+
+std::string TcpSocketEngine::GetErrorString(void) const {
+ return m_errorString;
+}
+
+//HostAddress TcpSocketEngine::GetLocalAddress(void) const {
+// return m_localAddress;
+//}
+
+//uint16_t TcpSocketEngine::GetLocalPort(void) const {
+// return m_localPort;
+//}
+
+HostAddress TcpSocketEngine::GetRemoteAddress(void) const {
+ return m_remoteAddress;
+}
+
+uint16_t TcpSocketEngine::GetRemotePort(void) const {
+ return m_remotePort;
+}
+
+int TcpSocketEngine::GetSocketDescriptor(void) const {
+ return m_socketDescriptor;
+}
+
+TcpSocket::SocketError TcpSocketEngine::GetSocketError(void) {
+ return m_socketError;
+}
+
+TcpSocket::SocketState TcpSocketEngine::GetSocketState(void) {
+ return m_socketState;
+}
+
+bool TcpSocketEngine::Initialize(HostAddress::NetworkProtocol protocol) {
+
+ // close current socket if we have one open
+ if ( IsValid() )
+ Close();
+
+ // attempt to create new socket
+ return nativeCreateSocket(protocol);
+}
+
+bool TcpSocketEngine::IsValid(void) const {
+ return (m_socketDescriptor != -1);
+}
+
+int64_t TcpSocketEngine::NumBytesAvailable(void) const {
+
+ // return 0 if socket FD is invalid
+ if ( !IsValid() ) {
+ // TODO: set error string
+ return -1;
+ }
+
+ // otherwise check socket to see how much is ready
+ return nativeNumBytesAvailable();
+}
+
+int64_t TcpSocketEngine::Read(char* dest, size_t max) {
+
+ // return failure if can't read
+ if ( !IsValid() || (m_socketState != TcpSocket::ConnectedState) )
+ return -1;
+
+ // otherwise return number of bytes read
+ return nativeRead(dest, max);
+}
+
+bool TcpSocketEngine::WaitForRead(int msec, bool* timedOut) {
+
+ // reset timedOut flag
+ *timedOut = false;
+
+ // need to wait for our socket to be ready to read
+ int ret = nativeSelect(msec, true);
+
+ // if timed out
+ if ( ret == 0 ) {
+ *timedOut = true;
+ m_socketError = TcpSocket::SocketTimeoutError;
+ m_errorString = "socket timed out";
+ }
+
+ // return if any sockets available for reading
+ return ( ret > 0 );
+}
+
+bool TcpSocketEngine::WaitForWrite(int msec, bool* timedOut) {
+
+ // reset timedOut flag
+ *timedOut = false;
+
+ // need to wait for our socket to be ready to write
+ int ret = nativeSelect(msec, false);
+
+ // if timed out
+ if ( ret == 0 ) {
+ *timedOut = true;
+ m_socketError = TcpSocket::SocketTimeoutError;
+ m_errorString = "socket timed out";
+ }
+
+ // return if any sockets available for reading
+ return ( ret > 0 );
+}
+
+int64_t TcpSocketEngine::Write(const char* data, size_t length) {
+
+ // return failure if can't write
+ if ( !IsValid() || (m_socketState != TcpSocket::ConnectedState) ) {
+ // TODO: set error string
+ return -1;
+ }
+
+ // otherwise return number of bytes written
+ return nativeWrite(data, length);
+}
--- /dev/null
+// ***************************************************************************
+// TcpSocketEngine_p.h (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 November 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides low-level implementation of TCP I/O
+// ***************************************************************************
+
+#ifndef TCPSOCKETENGINE_P_H
+#define TCPSOCKETENGINE_P_H
+
+// -------------
+// W A R N I N G
+// -------------
+//
+// This file is not part of the BamTools API. It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include "api/internal/io/HostAddress_p.h"
+#include "api/internal/io/TcpSocket_p.h"
+
+namespace BamTools {
+namespace Internal {
+
+struct TcpSocketEngine {
+
+ // ctors & dtor
+ public:
+ TcpSocketEngine(void);
+ TcpSocketEngine(const TcpSocketEngine& other);
+ ~TcpSocketEngine(void);
+
+ // TcpSocketEngine interface
+ public:
+
+ // connection-related methods
+ void Close(void);
+ bool Connect(const HostAddress& address, const uint16_t port);
+ bool Initialize(HostAddress::NetworkProtocol protocol);
+ bool IsValid(void) const;
+
+ // IO-related methods
+ int64_t NumBytesAvailable(void) const;
+ int64_t Read(char* dest, size_t max);
+ int64_t Write(const char* data, size_t length);
+
+ bool WaitForRead(int msec, bool* timedOut);
+ bool WaitForWrite(int msec, bool* timedOut);
+
+ // query connection state
+// HostAddress GetLocalAddress(void) const;
+// uint16_t GetLocalPort(void) const;
+ HostAddress GetRemoteAddress(void) const;
+ uint16_t GetRemotePort(void) const;
+
+ int GetSocketDescriptor(void) const;
+ TcpSocket::SocketError GetSocketError(void);
+ TcpSocket::SocketState GetSocketState(void);
+
+ std::string GetErrorString(void) const;
+
+ // platform-dependent internal methods
+ // provided in the corresponding TcpSocketEngine_<OS>_p.cpp
+ private:
+ void nativeClose(void);
+ bool nativeConnect(const HostAddress& address, const uint16_t port);
+ bool nativeCreateSocket(HostAddress::NetworkProtocol protocol);
+ void nativeDisconnect(void);
+ int64_t nativeNumBytesAvailable(void) const;
+ int64_t nativeRead(char* dest, size_t max);
+ int nativeSelect(int msecs, bool isRead) const;
+ int64_t nativeWrite(const char* data, size_t length);
+
+ // data members
+ private:
+ int m_socketDescriptor;
+
+// HostAddress m_localAddress;
+ HostAddress m_remoteAddress;
+// uint16_t m_localPort;
+ uint16_t m_remotePort;
+
+ TcpSocket::SocketError m_socketError;
+ TcpSocket::SocketState m_socketState;
+ std::string m_errorString;
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // TCPSOCKETENGINE_P_H
--- /dev/null
+// ***************************************************************************
+// TcpSocketEngine_unix_p.cpp (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 15 November 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides low-level implementation of TCP I/O for all UNIX-like systems
+// ***************************************************************************
+
+#include "api/internal/io/TcpSocketEngine_p.h"
+#include "api/internal/io/NetUnix_p.h"
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+#include <cerrno>
+#include <ctime>
+#include <iostream>
+using namespace std;
+
+// ------------------------
+// static utility methods
+// ------------------------
+
+namespace BamTools {
+namespace Internal {
+
+} // namespace Internal
+} // namespace BamTools
+
+// --------------------------------
+// TcpSocketEngine implementation
+// --------------------------------
+
+void TcpSocketEngine::nativeClose(void) {
+ close(m_socketDescriptor);
+}
+
+bool TcpSocketEngine::nativeConnect(const HostAddress& address, const uint16_t port) {
+
+ // setup connection parameters from address/port
+ sockaddr_in sockAddrIPv4;
+ sockaddr_in6 sockAddrIPv6;
+ sockaddr* sockAddrPtr = 0;
+ BT_SOCKLEN_T sockAddrSize = 0;
+
+ // IPv6
+ if ( address.GetProtocol() == HostAddress::IPv6Protocol ) {
+
+ memset(&sockAddrIPv6, 0, sizeof(sockAddrIPv6));
+ sockAddrIPv6.sin6_family = AF_INET6;
+ sockAddrIPv6.sin6_port = htons(port);
+
+ IPv6Address ip6 = address.GetIPv6Address();
+ memcpy(&sockAddrIPv6.sin6_addr.s6_addr, &ip6, sizeof(ip6));
+
+ sockAddrSize = sizeof(sockAddrIPv6);
+ sockAddrPtr = (sockaddr*)&sockAddrIPv6;
+ }
+
+ // IPv4
+ else if ( address.GetProtocol() == HostAddress::IPv4Protocol ) {
+
+ memset(&sockAddrIPv4, 0, sizeof(sockAddrIPv4));
+ sockAddrIPv4.sin_family = AF_INET;
+ sockAddrIPv4.sin_port = htons(port);
+ sockAddrIPv4.sin_addr.s_addr = htonl(address.GetIPv4Address());
+
+ sockAddrSize = sizeof(sockAddrIPv4);
+ sockAddrPtr = (sockaddr*)&sockAddrIPv4;
+ }
+
+ // unknown (should be unreachable)
+ else BT_ASSERT_X(false, "TcpSocketEngine::nativeConnect() : unknown network protocol");
+
+ // attempt connection
+ int connectResult = connect(m_socketDescriptor, sockAddrPtr, sockAddrSize);
+
+ // if failed, handle error
+ if ( connectResult == -1 ) {
+
+ // ensure state is set before checking errno
+ m_socketState = TcpSocket::UnconnectedState;
+
+ // set error type/message depending on errno
+ switch ( errno ) { // <-- potential thread issues later? but can't get error type from connectResult
+
+ case EISCONN:
+ m_socketState = TcpSocket::ConnectedState; // socket was already connected
+ break;
+ case ECONNREFUSED:
+ case EINVAL:
+ m_socketError = TcpSocket::ConnectionRefusedError;
+ m_errorString = "connection refused";
+ break;
+ case ETIMEDOUT:
+ m_socketError = TcpSocket::NetworkError;
+ m_errorString = "connection timed out";
+ break;
+ case EHOSTUNREACH:
+ m_socketError = TcpSocket::NetworkError;
+ m_errorString = "host unreachable";
+ break;
+ case ENETUNREACH:
+ m_socketError = TcpSocket::NetworkError;
+ m_errorString = "network unreachable";
+ break;
+ case EADDRINUSE:
+ m_socketError = TcpSocket::SocketResourceError;
+ m_errorString = "address already in use";
+ break;
+ case EACCES:
+ case EPERM:
+ m_socketError = TcpSocket::SocketAccessError;
+ m_errorString = "permission denied";
+ break;
+ default:
+ break;
+ }
+
+ // double check that we're not in 'connected' state; if so, return failure
+ if ( m_socketState != TcpSocket::ConnectedState )
+ return false;
+ }
+
+ // otherwise, we should be good
+ // update state & return success
+ m_socketState = TcpSocket::ConnectedState;
+ return true;
+}
+
+bool TcpSocketEngine::nativeCreateSocket(HostAddress::NetworkProtocol protocol) {
+
+ // get protocol value for requested protocol type
+ const int protocolNum = ( (protocol == HostAddress::IPv6Protocol) ? AF_INET6
+ : AF_INET );
+
+ // attempt to create socket
+ int socketFd = socket(protocolNum, SOCK_STREAM, IPPROTO_TCP);
+
+ // if we fetched an invalid socket descriptor
+ if ( socketFd <= 0 ) {
+
+ // see what error we got
+ switch ( errno ) {
+ case EPROTONOSUPPORT:
+ case EAFNOSUPPORT:
+ case EINVAL:
+ m_socketError = TcpSocket::UnsupportedSocketOperationError;
+ m_errorString = "protocol not supported";
+ break;
+ case ENFILE:
+ case EMFILE:
+ case ENOBUFS:
+ case ENOMEM:
+ m_socketError = TcpSocket::SocketResourceError;
+ m_errorString = "out of resources";
+ break;
+ case EACCES:
+ m_socketError = TcpSocket::SocketAccessError;
+ m_errorString = "permission denied";
+ break;
+ default:
+ break;
+ }
+
+ // return failure
+ return false;
+ }
+
+ // otherwise, store our socket FD & return success
+ m_socketDescriptor = socketFd;
+ return true;
+}
+
+int64_t TcpSocketEngine::nativeNumBytesAvailable(void) const {
+
+ // fetch number of bytes, return 0 on error
+ int numBytes(0);
+ if ( ioctl(m_socketDescriptor, FIONREAD, (char*)&numBytes) < 0 )
+ return -1;
+ return static_cast<int64_t>(numBytes);
+}
+
+int64_t TcpSocketEngine::nativeRead(char* dest, size_t max) {
+
+ if ( !IsValid() )
+ return -1;
+
+ ssize_t ret = read(m_socketDescriptor, dest, max);
+ if ( ret < 0 ) {
+ ret = -1;
+ switch ( errno ) {
+ case EAGAIN :
+ // No data was available for reading
+ ret = -2;
+ break;
+ case ECONNRESET :
+ ret = 0;
+ break;
+ default:
+ break;
+ }
+ }
+ return static_cast<int64_t>(ret);
+}
+
+// negative value for msecs will block (forever) until ready
+int TcpSocketEngine::nativeSelect(int msecs, bool isRead) const {
+
+ // set up FD set
+ fd_set fds;
+ FD_ZERO(&fds);
+ FD_SET(m_socketDescriptor, &fds);
+
+ // setup our timeout
+ timeval tv;
+ tv.tv_sec = msecs / 1000;
+ tv.tv_usec = (msecs % 1000) * 1000;
+
+ // do 'select'
+ if ( isRead )
+ return select(m_socketDescriptor + 1, &fds, 0, 0, (msecs < 0 ? 0 : &tv));
+ else
+ return select(m_socketDescriptor + 1, 0, &fds, 0, (msecs < 0 ? 0 : &tv));
+}
+
+int64_t TcpSocketEngine::nativeWrite(const char* data, size_t length) {
+
+ ssize_t writtenBytes = write(m_socketDescriptor, data, length);
+ if ( writtenBytes < 0 ) {
+ switch (errno) {
+ case EPIPE:
+ case ECONNRESET:
+ writtenBytes = -1;
+ m_socketError = TcpSocket::RemoteHostClosedError;
+ m_errorString = "remote host closed connection";
+ Close();
+ break;
+ case EAGAIN:
+ writtenBytes = 0;
+ break;
+ default:
+ break;
+ }
+ }
+ return static_cast<int64_t>(writtenBytes);
+}
--- /dev/null
+// ***************************************************************************
+// TcpSocketEngine_win_p.cpp (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 15 November 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides low-level implementation of TCP I/O for all Windows systems
+// ***************************************************************************
+
+#include "api/internal/io/TcpSocketEngine_p.h"
+#include "api/internal/io/NetWin_p.h"
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+#include <cstring>
+#include <iostream>
+using namespace std;
+
+// ------------------------
+// static utility methods
+// ------------------------
+
+namespace BamTools {
+namespace Internal {
+
+
+} // namespace Internal
+} // namespace BamTools
+
+// --------------------------------
+// TcpSocketEngine implementation
+// --------------------------------
+
+void TcpSocketEngine::nativeClose(void) {
+ closesocket(m_socketDescriptor);
+}
+
+bool TcpSocketEngine::nativeConnect(const HostAddress& address, const uint16_t port) {
+
+ // setup connection parameters from address/port
+ sockaddr_in sockAddrIPv4;
+ sockaddr_in6 sockAddrIPv6;
+ sockaddr* sockAddrPtr = 0;
+ BT_SOCKLEN_T sockAddrSize = 0;
+
+ // IPv6
+ if ( address.GetProtocol() == HostAddress::IPv6Protocol ) {
+
+ memset(&sockAddrIPv6, 0, sizeof(sockAddrIPv6));
+ sockAddrIPv6.sin6_family = AF_INET6;
+ sockAddrIPv6.sin6_port = htons(port);
+
+ IPv6Address ip6 = address.GetIPv6Address();
+ memcpy(&sockAddrIPv6.sin6_addr.s6_addr, &ip6, sizeof(ip6));
+
+ sockAddrSize = sizeof(sockAddrIPv6);
+ sockAddrPtr = (sockaddr*)&sockAddrIPv6;
+ }
+
+ // IPv4
+ else if ( address.GetProtocol() == HostAddress::IPv4Protocol ) {
+
+ memset(&sockAddrIPv4, 0, sizeof(sockAddrIPv4));
+ sockAddrIPv4.sin_family = AF_INET;
+ sockAddrIPv4.sin_port = htons(port);
+ sockAddrIPv4.sin_addr.s_addr = htonl(address.GetIPv4Address());
+
+ sockAddrSize = sizeof(sockAddrIPv4);
+ sockAddrPtr = (sockaddr*)&sockAddrIPv4;
+ }
+
+ // unknown (should be unreachable)
+ else BT_ASSERT_X(false, "TcpSocketEngine::nativeConnect() : unknown network protocol");
+
+ // attempt conenction
+ const int connectResult = WSAConnect(m_socketDescriptor, sockAddrPtr, sockAddrSize, 0, 0, 0, 0);
+
+ // if failed, handle error
+ if ( connectResult == SOCKET_ERROR ) {
+
+ // ensure state is set before checking error code
+ m_socketState = TcpSocket::UnconnectedState;
+
+ // set error type/message depending on errorCode
+ const int errorCode = WSAGetLastError();
+ switch ( errorCode ) {
+ case WSANOTINITIALISED:
+ m_socketError = TcpSocket::UnknownSocketError;
+ m_errorString = "Windows socket functionality not properly initialized";
+ break;
+ case WSAEISCONN:
+ m_socketState = TcpSocket::ConnectedState; // socket already connected
+ break;
+ case WSAECONNREFUSED:
+ case WSAEINVAL:
+ m_socketError = TcpSocket::ConnectionRefusedError;
+ m_errorString = "connection refused";
+ break;
+ case WSAETIMEDOUT:
+ m_socketError = TcpSocket::NetworkError;
+ m_errorString = "connection timed out";
+ break;
+ case WSAEHOSTUNREACH:
+ m_socketError = TcpSocket::NetworkError;
+ m_errorString = "host unreachable";
+ break;
+ case WSAENETUNREACH:
+ m_socketError = TcpSocket::NetworkError;
+ m_errorString = "network unreachable";
+ break;
+ case WSAEADDRINUSE:
+ m_socketError = TcpSocket::SocketResourceError;
+ m_errorString = "address already in use";
+ break;
+ case WSAEACCES:
+ m_socketError = TcpSocket::SocketAccessError;
+ m_errorString = "permission denied";
+ break;
+ default:
+ break;
+ }
+
+ // double check that we're not in 'connected' state; if so, return failure
+ if ( m_socketState != TcpSocket::ConnectedState )
+ return false;
+ }
+
+ // otherwise, we should be good
+ // update state & return success
+ m_socketState = TcpSocket::ConnectedState;
+ return true;
+}
+
+bool TcpSocketEngine::nativeCreateSocket(HostAddress::NetworkProtocol protocol) {
+
+ // get protocol value for requested protocol type
+ const int protocolNum = ( (protocol == HostAddress::IPv6Protocol) ? AF_INET6 : AF_INET );
+
+ // attempt to create socket
+ SOCKET socketFd = WSASocket(protocolNum, SOCK_STREAM, IPPROTO_TCP, 0, 0, WSA_FLAG_OVERLAPPED);
+
+ // if we fetched an invalid socket descriptor
+ if ( socketFd == INVALID_SOCKET ) {
+
+ // set error type/message depending on error code
+ const int errorCode = WSAGetLastError();
+ switch ( errorCode ) {
+ case WSANOTINITIALISED:
+ m_socketError = TcpSocket::UnknownSocketError;
+ m_errorString = "Windows socket functionality not properly initialized";
+ break;
+ case WSAEAFNOSUPPORT:
+ case WSAESOCKTNOSUPPORT:
+ case WSAEPROTOTYPE:
+ case WSAEINVAL:
+ m_socketError = TcpSocket::UnsupportedSocketOperationError;
+ m_errorString = "protocol not supported";
+ break;
+ case WSAEMFILE:
+ case WSAENOBUFS:
+ m_socketError = TcpSocket::SocketResourceError;
+ m_errorString = "out of resources";
+ break;
+ default:
+ break;
+ }
+
+ // return failure
+ return false;
+ }
+
+ // otherwise, store our socket FD & return success
+ m_socketDescriptor = static_cast<int>(socketFd);
+ return true;
+}
+
+int64_t TcpSocketEngine::nativeNumBytesAvailable(void) const {
+
+ int64_t numBytes(0);
+ int64_t dummy(0);
+ DWORD bytesWritten(0);
+
+ const int ioctlResult = WSAIoctl( m_socketDescriptor, FIONREAD
+ , &dummy, sizeof(dummy)
+ , &numBytes, sizeof(numBytes)
+ , &bytesWritten, 0, 0
+ );
+ return ( ioctlResult == SOCKET_ERROR ? -1 : numBytes );
+}
+
+int64_t TcpSocketEngine::nativeRead(char* dest, size_t max) {
+
+ // skip if invalid socket
+ if ( !IsValid() )
+ return -1;
+
+ // set up our WSA output buffer
+ WSABUF buf;
+ buf.buf = dest;
+ buf.len = max;
+
+ // attempt to read bytes
+ DWORD flags = 0;
+ DWORD bytesRead = 0;
+ const int readResult = WSARecv(m_socketDescriptor, &buf, 1, &bytesRead, &flags, 0, 0);
+
+ // if error encountered
+ if ( readResult == SOCKET_ERROR ) {
+ const int errorCode = WSAGetLastError();
+ switch ( errorCode ) {
+ case WSAEWOULDBLOCK: // nothing read this time, but more coming later
+ return -2;
+ default:
+ return -1; // on any other errors
+ }
+ }
+
+ // check if nothing was read this time, but more is coming
+ if ( WSAGetLastError() == WSAEWOULDBLOCK )
+ return -2;
+
+ // otherwise return number of bytes read
+ return static_cast<int64_t>(bytesRead);
+}
+
+// negative value for msecs will block (forever) until
+int TcpSocketEngine::nativeSelect(int msecs, bool isRead) const {
+
+ fd_set fds;
+ FD_ZERO(&fds);
+ FD_SET(m_socketDescriptor, &fds);
+
+ timeval tv;
+ tv.tv_sec = msecs / 1000;
+ tv.tv_usec = (msecs % 1000) * 1000;
+
+ // do 'select'
+ if ( isRead )
+ return select(0, &fds, 0, 0, (msecs < 0 ? 0 : &tv));
+ else
+ return select(0, 0, &fds, 0, (msecs < 0 ? 0 : &tv));
+}
+
+int64_t TcpSocketEngine::nativeWrite(const char* data, size_t length) {
+
+ // setup our WSA write buffer
+ WSABUF buf;
+ buf.buf = (char*)data;
+ buf.len = length;
+
+ // attempt to write bytes
+ DWORD flags = 0;
+ DWORD bytesWritten = 0;
+ const int writeResult = WSASend(m_socketDescriptor, &buf, 1, &bytesWritten, flags, 0, 0);
+
+ // error encountered
+ if ( writeResult == SOCKET_ERROR ) {
+
+ const int errorCode = WSAGetLastError();
+ switch ( errorCode ) {
+ case WSAEWOULDBLOCK:
+ return 0;
+ case WSAECONNRESET:
+ case WSAECONNABORTED:
+ m_socketError = TcpSocket::NetworkError;
+ m_errorString = "connection reset or aborted";
+ return -1;
+ default:
+ return -1;
+ }
+ }
+
+ // otherwise return number of bytes written
+ return static_cast<int64_t>(bytesWritten);
+}
--- /dev/null
+// ***************************************************************************
+// TcpSocket_p.cpp (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 November 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides basic TCP I/O interface
+// ***************************************************************************
+
+#include "api/internal/io/ByteArray_p.h"
+#include "api/internal/io/TcpSocket_p.h"
+#include "api/internal/io/TcpSocketEngine_p.h"
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+#include <algorithm>
+#include <sstream>
+#include <vector>
+using namespace std;
+
+// ------------------------------------
+// static utility methods & constants
+// ------------------------------------
+
+namespace BamTools {
+namespace Internal {
+
+// constants
+static const size_t DEFAULT_BUFFER_SIZE = 0x4000;
+
+} // namespace Internal
+} // namespace BamTools
+
+// --------------------------
+// TcpSocket implementation
+// --------------------------
+
+TcpSocket::TcpSocket(void)
+ : m_mode(IBamIODevice::NotOpen)
+// , m_localPort(0)
+ , m_remotePort(0)
+ , m_engine(0)
+ , m_cachedSocketDescriptor(-1)
+ , m_readBuffer(DEFAULT_BUFFER_SIZE)
+ , m_error(TcpSocket::UnknownSocketError)
+ , m_state(TcpSocket::UnconnectedState)
+{ }
+
+TcpSocket::~TcpSocket(void) {
+ if ( m_state == TcpSocket::ConnectedState )
+ DisconnectFromHost();
+}
+
+size_t TcpSocket::BufferBytesAvailable(void) const {
+ return m_readBuffer.Size();
+}
+
+bool TcpSocket::CanReadLine(void) const {
+ return m_readBuffer.CanReadLine();
+}
+
+void TcpSocket::ClearBuffer(void) {
+ m_readBuffer.Clear();
+}
+
+bool TcpSocket::ConnectImpl(const HostInfo& hostInfo,
+ const std::string& port,
+ IBamIODevice::OpenMode mode)
+{
+ // skip if we're already connected
+ if ( m_state == TcpSocket::ConnectedState ) {
+ m_error = TcpSocket::SocketResourceError;
+ m_errorString = "socket already connected";
+ return false;
+ }
+
+ // reset socket state
+ m_hostName = hostInfo.HostName();
+ m_mode = mode;
+ m_state = TcpSocket::UnconnectedState;
+ m_error = TcpSocket::UnknownSocketError;
+// m_localPort = 0;
+ m_remotePort = 0;
+// m_localAddress.Clear();
+ m_remoteAddress.Clear();
+ m_readBuffer.Clear();
+
+ // fetch candidate addresses for requested host
+ vector<HostAddress> addresses = hostInfo.Addresses();
+ if ( addresses.empty() ) {
+ m_error = TcpSocket::HostNotFoundError;
+ m_errorString = "no IP addresses found for host";
+ return false;
+ }
+
+ // convert port string to integer
+ stringstream ss(port);
+ uint16_t portNumber(0);
+ ss >> portNumber;
+
+ // iterate through adddresses
+ vector<HostAddress>::const_iterator addrIter = addresses.begin();
+ vector<HostAddress>::const_iterator addrEnd = addresses.end();
+ for ( ; addrIter != addrEnd; ++addrIter) {
+ const HostAddress& addr = (*addrIter);
+
+ // try to initialize socket engine with this address
+ if ( !InitializeSocketEngine(addr.GetProtocol()) ) {
+ // failure to initialize is OK here
+ // we'll just try the next available address
+ continue;
+ }
+
+ // attempt actual connection
+ if ( m_engine->Connect(addr, portNumber) ) {
+
+ // if connection successful, update our state & return true
+ m_mode = mode;
+// m_localAddress = m_engine->GetLocalAddress();
+// m_localPort = m_engine->GetLocalPort();
+ m_remoteAddress = m_engine->GetRemoteAddress();
+ m_remotePort = m_engine->GetRemotePort();
+ m_cachedSocketDescriptor = m_engine->GetSocketDescriptor();
+ m_state = TcpSocket::ConnectedState;
+ return true;
+ }
+ }
+
+ // if we get here, no connection could be made
+ m_error = TcpSocket::HostNotFoundError;
+ m_errorString = "could not connect to any host addresses";
+ return false;
+}
+
+bool TcpSocket::ConnectToHost(const string& hostName,
+ uint16_t port,
+ IBamIODevice::OpenMode mode)
+{
+ stringstream ss("");
+ ss << port;
+ return ConnectToHost(hostName, ss.str(), mode);
+
+}
+
+bool TcpSocket::ConnectToHost(const string& hostName,
+ const string& port,
+ IBamIODevice::OpenMode mode)
+{
+ // create new address object with requested host name
+ HostAddress hostAddress;
+ hostAddress.SetAddress(hostName);
+
+ HostInfo info;
+ // if host name was IP address ("x.x.x.x" or IPv6 format)
+ // otherwise host name was 'plain-text' ("www.foo.bar")
+ // we need to look up IP address(es)
+ if ( hostAddress.HasIPAddress() )
+ info.SetAddresses( vector<HostAddress>(1, hostAddress) );
+ else
+ info = HostInfo::Lookup(hostName, port);
+
+ // attempt connection on requested port
+ return ConnectImpl(info, port, mode);
+}
+
+void TcpSocket::DisconnectFromHost(void) {
+
+ // close socket engine & delete
+ if ( m_state == TcpSocket::ConnectedState )
+ ResetSocketEngine();
+
+ // reset connection state
+// m_localPort = 0;
+ m_remotePort = 0;
+// m_localAddress.Clear();
+ m_remoteAddress.Clear();
+ m_hostName.clear();
+ m_cachedSocketDescriptor = -1;
+
+ // for future, make sure there's outgoing data that needs to be flushed
+ m_readBuffer.Clear();
+}
+
+TcpSocket::SocketError TcpSocket::GetError(void) const {
+ return m_error;
+}
+
+std::string TcpSocket::GetErrorString(void) const {
+ return m_errorString;
+}
+
+std::string TcpSocket::GetHostName(void) const {
+ return m_hostName;
+}
+
+//HostAddress TcpSocket::GetLocalAddress(void) const {
+// return m_localAddress;
+//}
+
+//uint16_t TcpSocket::GetLocalPort(void) const {
+// return m_localPort;
+//}
+
+HostAddress TcpSocket::GetRemoteAddress(void) const {
+ return m_remoteAddress;
+}
+
+uint16_t TcpSocket::GetRemotePort(void) const {
+ return m_remotePort;
+}
+
+TcpSocket::SocketState TcpSocket::GetState(void) const {
+ return m_state;
+}
+
+bool TcpSocket::InitializeSocketEngine(HostAddress::NetworkProtocol protocol) {
+ ResetSocketEngine();
+ m_engine = new TcpSocketEngine;
+ return m_engine->Initialize(protocol);
+}
+
+bool TcpSocket::IsConnected(void) const {
+ if ( m_engine == 0 )
+ return false;
+ return ( m_engine->IsValid() && (m_state == TcpSocket::ConnectedState) );
+}
+
+// may be read in a look until desired data amount has been read
+// returns: number of bytes read, or -1 if error
+int64_t TcpSocket::Read(char* data, const unsigned int numBytes) {
+
+ // if we have data in buffer, just return it
+ if ( !m_readBuffer.IsEmpty() ) {
+ const size_t bytesRead = m_readBuffer.Read(data, numBytes);
+ return static_cast<int64_t>(bytesRead);
+ }
+
+ // otherwise, we'll need to fetch data from socket
+ // first make sure we have a valid socket engine
+ if ( m_engine == 0 ) {
+ // TODO: set error string/state?
+ return -1;
+ }
+
+ // fetch data from socket, return 0 for success, -1 for failure
+ // since this should be called in a loop, we'll pull the actual bytes on next iteration
+ return ( ReadFromSocket() ? 0 : -1 );
+}
+
+bool TcpSocket::ReadFromSocket(void) {
+
+ // check for any socket engine errors
+ if ( !m_engine->IsValid() ) {
+ m_errorString = "TcpSocket::ReadFromSocket - socket disconnected";
+ ResetSocketEngine();
+ return false;
+ }
+
+ // wait for ready read
+ bool timedOut;
+ bool isReadyRead = m_engine->WaitForRead(5000, &timedOut);
+
+ // if not ready
+ if ( !isReadyRead ) {
+
+ // if we simply timed out
+ if ( timedOut ) {
+ m_errorString = "TcpSocket::ReadFromSocket - timed out waiting for ready read";
+ // get error from engine ?
+ return false;
+ }
+
+ // otherwise, there was an error
+ else {
+ m_errorString = "TcpSocket::ReadFromSocket - encountered error while waiting for ready read";
+ // get error from engine ?
+ return false;
+ }
+ }
+
+ // #########################################################################
+ // clean this up - smells funky, but it's a key step so it has to be right
+ // #########################################################################
+
+ // get number of bytes available from socket
+ // (if 0, still try to read some data so we don't trigger any OS event behavior
+ // that respond to repeated access to a remote closed socket)
+ int64_t bytesToRead = m_engine->NumBytesAvailable();
+ if ( bytesToRead < 0 ) {
+ m_errorString = "TcpSocket::ReadFromSocket - encountered error while determining numBytesAvailable";
+ // get error from engine ?
+ return false;
+ }
+ else if ( bytesToRead == 0 )
+ bytesToRead = 4096;
+
+ // make space in buffer & read from socket
+ char* buffer = m_readBuffer.Reserve(bytesToRead);
+ int64_t numBytesRead = m_engine->Read(buffer, bytesToRead);
+
+ // if error while reading
+ if ( numBytesRead == -1 ) {
+ m_errorString = "TcpSocket::ReadFromSocket - encountered error while reading bytes";
+ // get error from engine ?
+ return false;
+ }
+
+ // handle special case (no data, but not error)
+ if ( numBytesRead == -2 )
+ m_readBuffer.Chop(bytesToRead);
+
+ // return success
+ return true;
+}
+
+string TcpSocket::ReadLine(int64_t max) {
+
+ // prep result byte buffer
+ ByteArray result;
+
+ size_t bufferMax = ((max > static_cast<int64_t>(string::npos)) ? string::npos : static_cast<size_t>(max));
+ result.Resize(bufferMax);
+
+ // read data
+ int64_t readBytes(0);
+ if ( result.Size() == 0 ) {
+
+ if ( bufferMax == 0 )
+ bufferMax = string::npos;
+
+ result.Resize(1);
+
+ int64_t readResult;
+ do {
+ result.Resize( static_cast<size_t>(std::min(bufferMax, result.Size() + DEFAULT_BUFFER_SIZE)) );
+ readResult = ReadLine(result.Data()+readBytes, result.Size()-readBytes);
+ if ( readResult > 0 || readBytes == 0 )
+ readBytes += readResult;
+ } while ( readResult == DEFAULT_BUFFER_SIZE && result[static_cast<size_t>(readBytes-1)] != '\n' );
+
+ } else
+ readBytes = ReadLine(result.Data(), result.Size());
+
+ // clean up byte buffer
+ if ( readBytes <= 0 )
+ result.Clear();
+ else
+ result.Resize(static_cast<size_t>(readBytes));
+
+ // return byte buffer as string
+ return string( result.ConstData(), result.Size() );
+}
+
+int64_t TcpSocket::ReadLine(char* dest, size_t max) {
+
+ // wait for buffer to contain line contents
+ if ( !WaitForReadLine() ) {
+ m_errorString = "TcpSocket::ReadLine - error waiting for read line";
+ return -1;
+ }
+
+ // leave room for null term
+ if ( max < 2 )
+ return -1;
+ --max;
+
+ // read from buffer, handle newlines
+ int64_t readSoFar = m_readBuffer.ReadLine(dest, max);
+ if ( readSoFar && dest[readSoFar-1] == '\n' ) {
+
+ // adjust for windows-style '\r\n'
+ if ( readSoFar > 1 && dest[readSoFar-2] == '\r') {
+ --readSoFar;
+ dest[readSoFar-1] = '\n';
+ }
+ }
+
+ // null terminate & return number of bytes read
+ dest[readSoFar] = '\0';
+ return readSoFar;
+}
+
+void TcpSocket::ResetSocketEngine(void) {
+
+ // shut down socket engine
+ if ( m_engine ) {
+ m_engine->Close();
+ delete m_engine;
+ m_engine = 0;
+ }
+
+ // reset our state & cached socket handle
+ m_state = TcpSocket::UnconnectedState;
+ m_cachedSocketDescriptor = -1;
+}
+
+bool TcpSocket::WaitForReadLine(void) {
+
+ // wait until we can read a line (will return immediately if already capable)
+ while ( !CanReadLine() ) {
+ if ( !ReadFromSocket() )
+ return false;
+ }
+
+ // if we get here, success
+ return true;
+}
+
+int64_t TcpSocket::Write(const char* data, const unsigned int numBytes) {
+
+ // single-shot attempt at write (not buffered, just try to shove the data through socket)
+ // this method purely exists to send 'small' HTTP requests/FTP commands from client to server
+
+ int64_t bytesWritten(0);
+
+ // wait for our socket to be write-able
+ bool timedOut;
+ bool isReadyWrite = m_engine->WaitForWrite(3000, &timedOut);
+ if ( isReadyWrite )
+ bytesWritten = m_engine->Write(data, numBytes);
+ else {
+ // timeout is OK (with current setup), we'll just return 0 & try again
+ // but we need to report if engine encountered some other error
+ if ( !timedOut ) {
+ // TODO: set error string
+ bytesWritten = -1;
+ }
+ }
+
+ // return actual number of bytes written to socket
+ return bytesWritten;
+}
--- /dev/null
+// ***************************************************************************
+// TcpSocket_p.h (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 November 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides basic TCP I/O interface
+// ***************************************************************************
+
+#ifndef TCPSOCKET_P_H
+#define TCPSOCKET_P_H
+
+// -------------
+// W A R N I N G
+// -------------
+//
+// This file is not part of the BamTools API. It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include "api/IBamIODevice.h"
+#include "api/internal/io/HostInfo_p.h"
+#include "api/internal/io/RollingBuffer_p.h"
+#include <string>
+
+namespace BamTools {
+namespace Internal {
+
+class TcpSocketEngine;
+
+class TcpSocket {
+
+ // enums
+ public:
+ enum SocketError { UnknownSocketError = -1
+ , ConnectionRefusedError = 0
+ , RemoteHostClosedError
+ , HostNotFoundError
+ , SocketAccessError
+ , SocketResourceError
+ , SocketTimeoutError
+ , NetworkError
+ , UnsupportedSocketOperationError
+ };
+
+ enum SocketState { UnconnectedState = 0
+ , ConnectedState
+ };
+
+ // ctor & dtor
+ public:
+ TcpSocket(void);
+ ~TcpSocket(void);
+
+ // TcpSocket interface
+ public:
+
+ // connection methods
+ bool ConnectToHost(const std::string& hostName,
+ const uint16_t port, // Connect("host", 80)
+ IBamIODevice::OpenMode mode = IBamIODevice::ReadOnly);
+ bool ConnectToHost(const std::string& hostName,
+ const std::string& port, // Connect("host", "80")
+ IBamIODevice::OpenMode mode = IBamIODevice::ReadOnly);
+ void DisconnectFromHost(void);
+ bool IsConnected(void) const;
+
+ // I/O methods
+ size_t BufferBytesAvailable(void) const;
+ bool CanReadLine(void) const;
+ void ClearBuffer(void); // force buffer to clear (not a 'flush', just a 'discard')
+ int64_t Read(char* data, const unsigned int numBytes);
+ std::string ReadLine(int64_t max = 0);
+ int64_t ReadLine(char* dest, size_t max);
+ bool WaitForReadLine(void);
+ int64_t Write(const char* data, const unsigned int numBytes);
+
+ // connection values
+ std::string GetHostName(void) const;
+// HostAddress GetLocalAddress(void) const;
+// uint16_t GetLocalPort(void) const;
+ HostAddress GetRemoteAddress(void) const;
+ uint16_t GetRemotePort(void) const;
+
+ // connection status
+ TcpSocket::SocketError GetError(void) const;
+ TcpSocket::SocketState GetState(void) const;
+ std::string GetErrorString(void) const;
+
+ // internal methods
+ private:
+ bool ConnectImpl(const HostInfo& hostInfo,
+ const std::string& port,
+ IBamIODevice::OpenMode mode);
+ bool InitializeSocketEngine(HostAddress::NetworkProtocol protocol);
+ bool ReadFromSocket(void);
+ void ResetSocketEngine(void);
+
+ // data members
+ private:
+ IBamIODevice::OpenMode m_mode;
+
+ std::string m_hostName;
+// uint16_t m_localPort;
+ uint16_t m_remotePort;
+// HostAddress m_localAddress;
+ HostAddress m_remoteAddress;
+
+ TcpSocketEngine* m_engine;
+ int m_cachedSocketDescriptor;
+
+ RollingBuffer m_readBuffer;
+
+ TcpSocket::SocketError m_error;
+ TcpSocket::SocketState m_state;
+ std::string m_errorString;
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // TCPSOCKET_P_H
--- /dev/null
+# ==========================
+# BamTools CMakeLists.txt
+# (c) 2011 Derek Barnett
+#
+# src/api/internal/sam
+# ==========================
+
+set ( InternalSamDir "${InternalDir}/sam" )
+
+set ( InternalSamSources
+ ${InternalSamDir}/SamFormatParser_p.cpp
+ ${InternalSamDir}/SamFormatPrinter_p.cpp
+ ${InternalSamDir}/SamHeaderValidator_p.cpp
+
+ PARENT_SCOPE # <-- leave this last
+)
+
--- /dev/null
+// ***************************************************************************
+// SamFormatParser.cpp (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 25 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides functionality for parsing SAM header text into SamHeader object
+// ***************************************************************************
+
+#include "api/SamConstants.h"
+#include "api/SamHeader.h"
+#include "api/internal/sam/SamFormatParser_p.h"
+#include "api/internal/utils/BamException_p.h"
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+#include <iostream>
+#include <sstream>
+#include <vector>
+using namespace std;
+
+SamFormatParser::SamFormatParser(SamHeader& header)
+ : m_header(header)
+{ }
+
+SamFormatParser::~SamFormatParser(void) { }
+
+void SamFormatParser::Parse(const string& headerText) {
+
+ // clear header's prior contents
+ m_header.Clear();
+
+ // empty header is OK, but skip processing
+ if ( headerText.empty() )
+ return;
+
+ // other wise parse SAM lines
+ istringstream headerStream(headerText);
+ string headerLine("");
+ while ( getline(headerStream, headerLine) )
+ ParseSamLine(headerLine);
+}
+
+void SamFormatParser::ParseSamLine(const string& line) {
+
+ // skip if line is not long enough to contain true values
+ if ( line.length() < 5 ) return;
+
+ // determine token at beginning of line
+ const string firstToken = line.substr(0,3);
+ string restOfLine = line.substr(4);
+ if ( firstToken == Constants::SAM_HD_BEGIN_TOKEN) ParseHDLine(restOfLine);
+ else if ( firstToken == Constants::SAM_SQ_BEGIN_TOKEN) ParseSQLine(restOfLine);
+ else if ( firstToken == Constants::SAM_RG_BEGIN_TOKEN) ParseRGLine(restOfLine);
+ else if ( firstToken == Constants::SAM_PG_BEGIN_TOKEN) ParsePGLine(restOfLine);
+ else if ( firstToken == Constants::SAM_CO_BEGIN_TOKEN) ParseCOLine(restOfLine);
+ else {
+ const string message = string("unknown token: ") + firstToken;
+ throw BamException("SamFormatParser::ParseSamLine", message);
+ }
+}
+
+void SamFormatParser::ParseHDLine(const string& line) {
+
+ // split HD lines into tokens
+ vector<string> tokens = Split(line, Constants::SAM_TAB);
+
+ // iterate over tokens
+ vector<string>::const_iterator tokenIter = tokens.begin();
+ vector<string>::const_iterator tokenEnd = tokens.end();
+ for ( ; tokenIter != tokenEnd; ++tokenIter ) {
+
+ // get tag/value
+ const string tokenTag = (*tokenIter).substr(0,2);
+ const string tokenValue = (*tokenIter).substr(3);
+
+ // set header contents
+ if ( tokenTag == Constants::SAM_HD_VERSION_TAG ) m_header.Version = tokenValue;
+ else if ( tokenTag == Constants::SAM_HD_SORTORDER_TAG ) m_header.SortOrder = tokenValue;
+ else if ( tokenTag == Constants::SAM_HD_GROUPORDER_TAG ) m_header.GroupOrder = tokenValue;
+ else {
+ const string message = string("unknown HD tag: ") + tokenTag;
+ throw BamException("SamFormatParser::ParseHDLine", message);
+ }
+ }
+
+ // check for required tags
+ if ( !m_header.HasVersion() )
+ throw BamException("SamFormatParser::ParseHDLine", "@HD line is missing VN tag");
+}
+
+void SamFormatParser::ParseSQLine(const string& line) {
+
+ SamSequence seq;
+
+ // split SQ line into tokens
+ vector<string> tokens = Split(line, Constants::SAM_TAB);
+
+ // iterate over tokens
+ vector<string>::const_iterator tokenIter = tokens.begin();
+ vector<string>::const_iterator tokenEnd = tokens.end();
+ for ( ; tokenIter != tokenEnd; ++tokenIter ) {
+
+ // get tag/value
+ const string tokenTag = (*tokenIter).substr(0,2);
+ const string tokenValue = (*tokenIter).substr(3);
+
+ // set sequence contents
+ if ( tokenTag == Constants::SAM_SQ_NAME_TAG ) seq.Name = tokenValue;
+ else if ( tokenTag == Constants::SAM_SQ_LENGTH_TAG ) seq.Length = tokenValue;
+ else if ( tokenTag == Constants::SAM_SQ_ASSEMBLYID_TAG ) seq.AssemblyID = tokenValue;
+ else if ( tokenTag == Constants::SAM_SQ_CHECKSUM_TAG ) seq.Checksum = tokenValue;
+ else if ( tokenTag == Constants::SAM_SQ_SPECIES_TAG ) seq.Species = tokenValue;
+ else if ( tokenTag == Constants::SAM_SQ_URI_TAG ) seq.URI = tokenValue;
+ else {
+ const string message = string("unknown SQ tag: ") + tokenTag;
+ throw BamException("SamFormatParser::ParseSQLine", message);
+ }
+ }
+
+ // check for required tags
+ if ( !seq.HasName() )
+ throw BamException("SamFormatParser::ParseSQLine", "@SQ line is missing SN tag");
+ if ( !seq.HasLength() )
+ throw BamException("SamFormatParser::ParseSQLine", "@SQ line is missing LN tag");
+
+ // store SAM sequence entry
+ m_header.Sequences.Add(seq);
+}
+
+void SamFormatParser::ParseRGLine(const string& line) {
+
+ SamReadGroup rg;
+
+ // split string into tokens
+ vector<string> tokens = Split(line, Constants::SAM_TAB);
+
+ // iterate over tokens
+ vector<string>::const_iterator tokenIter = tokens.begin();
+ vector<string>::const_iterator tokenEnd = tokens.end();
+ for ( ; tokenIter != tokenEnd; ++tokenIter ) {
+
+ // get token tag/value
+ const string tokenTag = (*tokenIter).substr(0,2);
+ const string tokenValue = (*tokenIter).substr(3);
+
+ // set read group contents
+ if ( tokenTag == Constants::SAM_RG_ID_TAG ) rg.ID = tokenValue;
+ else if ( tokenTag == Constants::SAM_RG_DESCRIPTION_TAG ) rg.Description = tokenValue;
+ else if ( tokenTag == Constants::SAM_RG_FLOWORDER_TAG ) rg.FlowOrder = tokenValue;
+ else if ( tokenTag == Constants::SAM_RG_KEYSEQUENCE_TAG ) rg.KeySequence = tokenValue;
+ else if ( tokenTag == Constants::SAM_RG_LIBRARY_TAG ) rg.Library = tokenValue;
+ else if ( tokenTag == Constants::SAM_RG_PLATFORMUNIT_TAG ) rg.PlatformUnit = tokenValue;
+ else if ( tokenTag == Constants::SAM_RG_PREDICTEDINSERTSIZE_TAG ) rg.PredictedInsertSize = tokenValue;
+ else if ( tokenTag == Constants::SAM_RG_PRODUCTIONDATE_TAG ) rg.ProductionDate = tokenValue;
+ else if ( tokenTag == Constants::SAM_RG_PROGRAM_TAG ) rg.Program = tokenValue;
+ else if ( tokenTag == Constants::SAM_RG_SAMPLE_TAG ) rg.Sample = tokenValue;
+ else if ( tokenTag == Constants::SAM_RG_SEQCENTER_TAG ) rg.SequencingCenter = tokenValue;
+ else if ( tokenTag == Constants::SAM_RG_SEQTECHNOLOGY_TAG ) rg.SequencingTechnology = tokenValue;
+ else {
+ const string message = string("unknown RG tag: ") + tokenTag;
+ throw BamException("SamFormatParser::ParseRGLine", message);
+ }
+ }
+
+ // check for required tags
+ if ( !rg.HasID() )
+ throw BamException("SamFormatParser::ParseRGLine", "@RG line is missing ID tag");
+
+ // store SAM read group entry
+ m_header.ReadGroups.Add(rg);
+}
+
+void SamFormatParser::ParsePGLine(const string& line) {
+
+ SamProgram pg;
+
+ // split string into tokens
+ vector<string> tokens = Split(line, Constants::SAM_TAB);
+
+ // iterate over tokens
+ vector<string>::const_iterator tokenIter = tokens.begin();
+ vector<string>::const_iterator tokenEnd = tokens.end();
+ for ( ; tokenIter != tokenEnd; ++tokenIter ) {
+
+ // get token tag/value
+ const string tokenTag = (*tokenIter).substr(0,2);
+ const string tokenValue = (*tokenIter).substr(3);
+
+ // set program record contents
+ if ( tokenTag == Constants::SAM_PG_ID_TAG ) pg.ID = tokenValue;
+ else if ( tokenTag == Constants::SAM_PG_NAME_TAG ) pg.Name = tokenValue;
+ else if ( tokenTag == Constants::SAM_PG_COMMANDLINE_TAG ) pg.CommandLine = tokenValue;
+ else if ( tokenTag == Constants::SAM_PG_PREVIOUSPROGRAM_TAG ) pg.PreviousProgramID = tokenValue;
+ else if ( tokenTag == Constants::SAM_PG_VERSION_TAG ) pg.Version = tokenValue;
+ else {
+ const string message = string("unknown PG tag: ") + tokenTag;
+ throw BamException("SamFormatParser::ParsePGLine", message);
+ }
+ }
+
+ // check for required tags
+ if ( !pg.HasID() )
+ throw BamException("SamFormatParser::ParsePGLine", "@PG line is missing ID tag");
+
+ // store SAM program entry
+ m_header.Programs.Add(pg);
+}
+
+void SamFormatParser::ParseCOLine(const string& line) {
+ // simply add line to comments list
+ m_header.Comments.push_back(line);
+}
+
+const vector<string> SamFormatParser::Split(const string& line, const char delim) {
+ vector<string> tokens;
+ stringstream lineStream(line);
+ string token;
+ while ( getline(lineStream, token, delim) )
+ tokens.push_back(token);
+ return tokens;
+}
--- /dev/null
+// ***************************************************************************
+// SamFormatParser.h (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 23 December 2010 (DB)
+// ---------------------------------------------------------------------------
+// Provides functionality for parsing SAM header text into SamHeader object
+// ***************************************************************************
+
+#ifndef SAM_FORMAT_PARSER_H
+#define SAM_FORMAT_PARSER_H
+
+// -------------
+// W A R N I N G
+// -------------
+//
+// This file is not part of the BamTools API. It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include <string>
+#include <vector>
+
+namespace BamTools {
+
+class SamHeader;
+
+namespace Internal {
+
+class SamFormatParser {
+
+ // ctor & dtor
+ public:
+ SamFormatParser(BamTools::SamHeader& header);
+ ~SamFormatParser(void);
+
+ // parse text & populate header data
+ public:
+ void Parse(const std::string& headerText);
+
+ // internal methods
+ private:
+ void ParseSamLine(const std::string& line);
+ void ParseHDLine(const std::string& line);
+ void ParseSQLine(const std::string& line);
+ void ParseRGLine(const std::string& line);
+ void ParsePGLine(const std::string& line);
+ void ParseCOLine(const std::string& line);
+ const std::vector<std::string> Split(const std::string& line, const char delim);
+
+ // data members
+ private:
+ SamHeader& m_header;
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // SAM_FORMAT_PARSER_H
--- /dev/null
+// ***************************************************************************
+// SamFormatPrinter.cpp (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 25 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides functionality for printing formatted SAM header to string
+// ***************************************************************************
+
+#include "api/SamConstants.h"
+#include "api/SamHeader.h"
+#include "api/internal/sam/SamFormatPrinter_p.h"
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+#include <iostream>
+#include <sstream>
+#include <vector>
+using namespace std;
+
+// ------------------------
+// static utility methods
+// ------------------------
+
+static inline
+const string FormatTag(const string& tag, const string& value) {
+ return string(Constants::SAM_TAB + tag + Constants::SAM_COLON + value);
+}
+
+// ---------------------------------
+// SamFormatPrinter implementation
+// ---------------------------------
+
+SamFormatPrinter::SamFormatPrinter(const SamHeader& header)
+ : m_header(header)
+{ }
+
+SamFormatPrinter::~SamFormatPrinter(void) { }
+
+const string SamFormatPrinter::ToString(void) const {
+
+ // clear out stream
+ stringstream out("");
+
+ // generate formatted header text
+ PrintHD(out);
+ PrintSQ(out);
+ PrintRG(out);
+ PrintPG(out);
+ PrintCO(out);
+
+ // return result
+ return out.str();
+}
+
+void SamFormatPrinter::PrintHD(std::stringstream& out) const {
+
+ // if header has @HD data
+ if ( m_header.HasVersion() ) {
+
+ // @HD VN:<Version>
+ out << Constants::SAM_HD_BEGIN_TOKEN
+ << FormatTag(Constants::SAM_HD_VERSION_TAG, m_header.Version);
+
+ // SO:<SortOrder>
+ if ( m_header.HasSortOrder() )
+ out << FormatTag(Constants::SAM_HD_SORTORDER_TAG, m_header.SortOrder);
+
+ // GO:<GroupOrder>
+ if ( m_header.HasGroupOrder() )
+ out << FormatTag(Constants::SAM_HD_GROUPORDER_TAG, m_header.GroupOrder);
+
+ // newline
+ out << endl;
+ }
+}
+
+void SamFormatPrinter::PrintSQ(std::stringstream& out) const {
+
+ // iterate over sequence entries
+ SamSequenceConstIterator seqIter = m_header.Sequences.ConstBegin();
+ SamSequenceConstIterator seqEnd = m_header.Sequences.ConstEnd();
+ for ( ; seqIter != seqEnd; ++seqIter ) {
+ const SamSequence& seq = (*seqIter);
+
+ // @SQ SN:<Name> LN:<Length>
+ out << Constants::SAM_SQ_BEGIN_TOKEN
+ << FormatTag(Constants::SAM_SQ_NAME_TAG, seq.Name)
+ << FormatTag(Constants::SAM_SQ_LENGTH_TAG, seq.Length);
+
+ // AS:<AssemblyID>
+ if ( seq.HasAssemblyID() )
+ out << FormatTag(Constants::SAM_SQ_ASSEMBLYID_TAG, seq.AssemblyID);
+
+ // M5:<Checksum>
+ if ( seq.HasChecksum() )
+ out << FormatTag(Constants::SAM_SQ_CHECKSUM_TAG, seq.Checksum);
+
+ // SP:<Species>
+ if ( seq.HasSpecies() )
+ out << FormatTag(Constants::SAM_SQ_SPECIES_TAG, seq.Species);
+
+ // UR:<URI>
+ if ( seq.HasURI() )
+ out << FormatTag(Constants::SAM_SQ_URI_TAG, seq.URI);
+
+ // newline
+ out << endl;
+ }
+}
+
+void SamFormatPrinter::PrintRG(std::stringstream& out) const {
+
+ // iterate over read group entries
+ SamReadGroupConstIterator rgIter = m_header.ReadGroups.ConstBegin();
+ SamReadGroupConstIterator rgEnd = m_header.ReadGroups.ConstEnd();
+ for ( ; rgIter != rgEnd; ++rgIter ) {
+ const SamReadGroup& rg = (*rgIter);
+
+ // @RG ID:<ID>
+ out << Constants::SAM_RG_BEGIN_TOKEN
+ << FormatTag(Constants::SAM_RG_ID_TAG, rg.ID);
+
+ // CN:<SequencingCenter>
+ if ( rg.HasSequencingCenter() )
+ out << FormatTag(Constants::SAM_RG_SEQCENTER_TAG, rg.SequencingCenter);
+
+ // DS:<Description>
+ if ( rg.HasDescription() )
+ out << FormatTag(Constants::SAM_RG_DESCRIPTION_TAG, rg.Description);
+
+ // DT:<ProductionDate>
+ if ( rg.HasProductionDate() )
+ out << FormatTag(Constants::SAM_RG_PRODUCTIONDATE_TAG, rg.ProductionDate);
+
+ // FO:<FlowOrder>
+ if ( rg.HasFlowOrder() )
+ out << FormatTag(Constants::SAM_RG_FLOWORDER_TAG, rg.FlowOrder);
+
+ // KS:<KeySequence>
+ if ( rg.HasKeySequence() )
+ out << FormatTag(Constants::SAM_RG_KEYSEQUENCE_TAG, rg.KeySequence);
+
+ // LB:<Library>
+ if ( rg.HasLibrary() )
+ out << FormatTag(Constants::SAM_RG_LIBRARY_TAG, rg.Library);
+
+ // PG:<Program>
+ if ( rg.HasProgram() )
+ out << FormatTag(Constants::SAM_RG_PROGRAM_TAG, rg.Program);
+
+ // PI:<PredictedInsertSize>
+ if ( rg.HasPredictedInsertSize() )
+ out << FormatTag(Constants::SAM_RG_PREDICTEDINSERTSIZE_TAG, rg.PredictedInsertSize);
+
+ // PL:<SequencingTechnology>
+ if ( rg.HasSequencingTechnology() )
+ out << FormatTag(Constants::SAM_RG_SEQTECHNOLOGY_TAG, rg.SequencingTechnology);
+
+ // PU:<PlatformUnit>
+ if ( rg.HasPlatformUnit() )
+ out << FormatTag(Constants::SAM_RG_PLATFORMUNIT_TAG, rg.PlatformUnit);
+
+ // SM:<Sample>
+ if ( rg.HasSample() )
+ out << FormatTag(Constants::SAM_RG_SAMPLE_TAG, rg.Sample);
+
+ // newline
+ out << endl;
+ }
+}
+
+void SamFormatPrinter::PrintPG(std::stringstream& out) const {
+
+ // iterate over program record entries
+ SamProgramConstIterator pgIter = m_header.Programs.ConstBegin();
+ SamProgramConstIterator pgEnd = m_header.Programs.ConstEnd();
+ for ( ; pgIter != pgEnd; ++pgIter ) {
+ const SamProgram& pg = (*pgIter);
+
+ // @PG ID:<ID>
+ out << Constants::SAM_PG_BEGIN_TOKEN
+ << FormatTag(Constants::SAM_PG_ID_TAG, pg.ID);
+
+ // PN:<Name>
+ if ( pg.HasName() )
+ out << FormatTag(Constants::SAM_PG_NAME_TAG, pg.Name);
+
+ // CL:<CommandLine>
+ if ( pg.HasCommandLine() )
+ out << FormatTag(Constants::SAM_PG_COMMANDLINE_TAG, pg.CommandLine);
+
+ // PP:<PreviousProgramID>
+ if ( pg.HasPreviousProgramID() )
+ out << FormatTag(Constants::SAM_PG_PREVIOUSPROGRAM_TAG, pg.PreviousProgramID);
+
+ // VN:<Version>
+ if ( pg.HasVersion() )
+ out << FormatTag(Constants::SAM_PG_VERSION_TAG, pg.Version);
+
+ // newline
+ out << endl;
+ }
+}
+
+void SamFormatPrinter::PrintCO(std::stringstream& out) const {
+
+ // iterate over comments
+ vector<string>::const_iterator commentIter = m_header.Comments.begin();
+ vector<string>::const_iterator commentEnd = m_header.Comments.end();
+ for ( ; commentIter != commentEnd; ++commentIter ) {
+
+ // @CO <Comment>
+ out << Constants::SAM_CO_BEGIN_TOKEN
+ << Constants::SAM_TAB
+ << (*commentIter)
+ << endl;
+ }
+}
--- /dev/null
+// ***************************************************************************
+// SamFormatPrinter.h (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 6 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides functionality for printing formatted SAM header to string
+// ***************************************************************************
+
+#ifndef SAM_FORMAT_PRINTER_H
+#define SAM_FORMAT_PRINTER_H
+
+// -------------
+// W A R N I N G
+// -------------
+//
+// This file is not part of the BamTools API. It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include <sstream>
+#include <string>
+
+namespace BamTools {
+
+class SamHeader;
+
+namespace Internal {
+
+class SamFormatPrinter {
+
+ // ctor & dtor
+ public:
+ SamFormatPrinter(const BamTools::SamHeader& header);
+ ~SamFormatPrinter(void);
+
+ // generates SAM-formatted string from header data
+ public:
+ const std::string ToString(void) const;
+
+ // internal methods
+ private:
+ void PrintHD(std::stringstream& out) const;
+ void PrintSQ(std::stringstream& out) const;
+ void PrintRG(std::stringstream& out) const;
+ void PrintPG(std::stringstream& out) const;
+ void PrintCO(std::stringstream& out) const;
+
+ // data members
+ private:
+ const SamHeader& m_header;
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // SAM_FORMAT_PRINTER_H
--- /dev/null
+// ***************************************************************************
+// SamHeaderValidator.cpp (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 25 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides functionality for validating SamHeader data
+// ***************************************************************************
+
+#include "api/SamConstants.h"
+#include "api/SamHeader.h"
+#include "api/internal/sam/SamHeaderValidator_p.h"
+#include "api/internal/sam/SamHeaderVersion_p.h"
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+#include <cctype>
+#include <set>
+#include <sstream>
+using namespace std;
+
+// ------------------------
+// static utility methods
+// -------------------------
+
+static
+bool caseInsensitiveCompare(const string& lhs, const string& rhs) {
+
+ // can omit checking chars if lengths not equal
+ const int lhsLength = lhs.length();
+ const int rhsLength = rhs.length();
+ if ( lhsLength != rhsLength )
+ return false;
+
+ // do *basic* toupper checks on each string char's
+ for ( int i = 0; i < lhsLength; ++i ) {
+ if ( toupper( (int)lhs.at(i)) != toupper( (int)rhs.at(i)) )
+ return false;
+ }
+
+ // otherwise OK
+ return true;
+}
+
+// ------------------------------------------------------------------------
+// Allow validation rules to vary, as needed, between SAM header versions
+//
+// use SAM_VERSION_X_Y to tag important changes
+//
+// Together, they will allow for comparisons like:
+// if ( m_version < SAM_VERSION_2_0 ) {
+// // use some older rule
+// else
+// // use rule introduced with version 2.0
+
+static const SamHeaderVersion SAM_VERSION_1_0 = SamHeaderVersion(1,0);
+static const SamHeaderVersion SAM_VERSION_1_1 = SamHeaderVersion(1,1);
+static const SamHeaderVersion SAM_VERSION_1_2 = SamHeaderVersion(1,2);
+static const SamHeaderVersion SAM_VERSION_1_3 = SamHeaderVersion(1,3);
+static const SamHeaderVersion SAM_VERSION_1_4 = SamHeaderVersion(1,4);
+
+// TODO: This functionality is currently unused.
+// Make validation "version-aware."
+//
+// ------------------------------------------------------------------------
+
+const string SamHeaderValidator::ERROR_PREFIX = "ERROR: ";
+const string SamHeaderValidator::WARN_PREFIX = "WARNING: ";
+const string SamHeaderValidator::NEWLINE = "\n";
+
+SamHeaderValidator::SamHeaderValidator(const SamHeader& header)
+ : m_header(header)
+{ }
+
+SamHeaderValidator::~SamHeaderValidator(void) { }
+
+void SamHeaderValidator::AddError(const string& message) {
+ m_errorMessages.push_back(ERROR_PREFIX + message + NEWLINE);
+}
+
+void SamHeaderValidator::AddWarning(const string& message) {
+ m_warningMessages.push_back(WARN_PREFIX + message + NEWLINE);
+}
+
+void SamHeaderValidator::PrintErrorMessages(ostream& stream) {
+
+ // skip if no error messages
+ if ( m_errorMessages.empty() )
+ return;
+
+ // print error header line
+ stream << "* SAM header has " << m_errorMessages.size() << " errors:" << endl;
+
+ // print each error message
+ vector<string>::const_iterator errorIter = m_errorMessages.begin();
+ vector<string>::const_iterator errorEnd = m_errorMessages.end();
+ for ( ; errorIter != errorEnd; ++errorIter )
+ stream << (*errorIter);
+}
+
+void SamHeaderValidator::PrintMessages(ostream& stream) {
+ PrintErrorMessages(stream);
+ PrintWarningMessages(stream);
+}
+
+void SamHeaderValidator::PrintWarningMessages(ostream& stream) {
+
+ // skip if no warning messages
+ if ( m_warningMessages.empty() )
+ return;
+
+ // print warning header line
+ stream << "* SAM header has " << m_warningMessages.size() << " warnings:" << endl;
+
+ // print each warning message
+ vector<string>::const_iterator warnIter = m_warningMessages.begin();
+ vector<string>::const_iterator warnEnd = m_warningMessages.end();
+ for ( ; warnIter != warnEnd; ++warnIter )
+ stream << (*warnIter);
+}
+
+// entry point for validation
+bool SamHeaderValidator::Validate(void) {
+ bool isValid = true;
+ isValid &= ValidateMetadata();
+ isValid &= ValidateSequenceDictionary();
+ isValid &= ValidateReadGroupDictionary();
+ isValid &= ValidateProgramChain();
+ return isValid;
+}
+
+// check all SAM header 'metadata'
+bool SamHeaderValidator::ValidateMetadata(void) {
+ bool isValid = true;
+ isValid &= ValidateVersion();
+ isValid &= ValidateSortOrder();
+ isValid &= ValidateGroupOrder();
+ return isValid;
+}
+
+// check SAM header version tag
+bool SamHeaderValidator::ValidateVersion(void) {
+
+ const string& version = m_header.Version;
+
+ // warn if version not present
+ if ( version.empty() ) {
+ AddWarning("Version (VN) missing. Not required, but strongly recommended");
+ return true;
+ }
+
+ // invalid if version does not contain a period
+ const size_t periodFound = version.find(Constants::SAM_PERIOD);
+ if ( periodFound == string::npos ) {
+ AddError("Invalid version (VN) format: " + version);
+ return false;
+ }
+
+ // invalid if major version is empty or contains non-digits
+ const string majorVersion = version.substr(0, periodFound);
+ if ( majorVersion.empty() || !ContainsOnlyDigits(majorVersion) ) {
+ AddError("Invalid version (VN) format: " + version);
+ return false;
+ }
+
+ // invalid if major version is empty or contains non-digits
+ const string minorVersion = version.substr(periodFound + 1);
+ if ( minorVersion.empty() || !ContainsOnlyDigits(minorVersion) ) {
+ AddError("Invalid version (VN) format: " + version);
+ return false;
+ }
+
+ // TODO: check if version is not just syntactically OK,
+ // but is also a valid SAM version ( 1.0 .. CURRENT )
+
+ // all checked out this far, then version is OK
+ return true;
+}
+
+// assumes non-empty input string
+bool SamHeaderValidator::ContainsOnlyDigits(const string& s) {
+ const size_t nonDigitPosition = s.find_first_not_of(Constants::SAM_DIGITS);
+ return ( nonDigitPosition == string::npos ) ;
+}
+
+// validate SAM header sort order tag
+bool SamHeaderValidator::ValidateSortOrder(void) {
+
+ const string& sortOrder = m_header.SortOrder;
+
+ // warn if sort order not present
+ if ( sortOrder.empty() ) {
+ AddWarning("Sort order (SO) missing. Not required, but strongly recommended");
+ return true;
+ }
+
+ // if sort order is valid keyword
+ if ( sortOrder == Constants::SAM_HD_SORTORDER_COORDINATE ||
+ sortOrder == Constants::SAM_HD_SORTORDER_QUERYNAME ||
+ sortOrder == Constants::SAM_HD_SORTORDER_UNSORTED
+ )
+ {
+ return true;
+ }
+
+ // otherwise
+ AddError("Invalid sort order (SO): " + sortOrder);
+ return false;
+}
+
+// validate SAM header group order tag
+bool SamHeaderValidator::ValidateGroupOrder(void) {
+
+ const string& groupOrder = m_header.GroupOrder;
+
+ // if no group order, no problem, just return OK
+ if ( groupOrder.empty() )
+ return true;
+
+ // if group order is valid keyword
+ if ( groupOrder == Constants::SAM_HD_GROUPORDER_NONE ||
+ groupOrder == Constants::SAM_HD_GROUPORDER_QUERY ||
+ groupOrder == Constants::SAM_HD_GROUPORDER_REFERENCE
+ )
+ {
+ return true;
+ }
+
+ // otherwise
+ AddError("Invalid group order (GO): " + groupOrder);
+ return false;
+}
+
+// validate SAM header sequence dictionary
+bool SamHeaderValidator::ValidateSequenceDictionary(void) {
+
+ bool isValid = true;
+
+ // check for unique sequence names
+ isValid &= ContainsUniqueSequenceNames();
+
+ // iterate over sequences
+ const SamSequenceDictionary& sequences = m_header.Sequences;
+ SamSequenceConstIterator seqIter = sequences.ConstBegin();
+ SamSequenceConstIterator seqEnd = sequences.ConstEnd();
+ for ( ; seqIter != seqEnd; ++seqIter ) {
+ const SamSequence& seq = (*seqIter);
+ isValid &= ValidateSequence(seq);
+ }
+
+ // return validation state
+ return isValid;
+}
+
+// make sure all SQ names are unique
+bool SamHeaderValidator::ContainsUniqueSequenceNames(void) {
+
+ bool isValid = true;
+ set<string> sequenceNames;
+ set<string>::iterator nameIter;
+
+ // iterate over sequences
+ const SamSequenceDictionary& sequences = m_header.Sequences;
+ SamSequenceConstIterator seqIter = sequences.ConstBegin();
+ SamSequenceConstIterator seqEnd = sequences.ConstEnd();
+ for ( ; seqIter != seqEnd; ++seqIter ) {
+ const SamSequence& seq = (*seqIter);
+
+ // lookup sequence name
+ const string& name = seq.Name;
+ nameIter = sequenceNames.find(name);
+
+ // error if found (duplicate entry)
+ if ( nameIter != sequenceNames.end() ) {
+ AddError("Sequence name (SN): " + name + " is not unique");
+ isValid = false;
+ }
+
+ // otherwise ok, store name
+ sequenceNames.insert(name);
+ }
+
+ // return validation state
+ return isValid;
+}
+
+// validate SAM header sequence entry
+bool SamHeaderValidator::ValidateSequence(const SamSequence& seq) {
+ bool isValid = true;
+ isValid &= CheckNameFormat(seq.Name);
+ isValid &= CheckLengthInRange(seq.Length);
+ return isValid;
+}
+
+// check sequence name is valid format
+bool SamHeaderValidator::CheckNameFormat(const string& name) {
+
+ // invalid if name is empty
+ if ( name.empty() ) {
+ AddError("Sequence entry (@SQ) is missing SN tag");
+ return false;
+ }
+
+ // invalid if first character is a reserved char
+ const char firstChar = name.at(0);
+ if ( firstChar == Constants::SAM_EQUAL || firstChar == Constants::SAM_STAR ) {
+ AddError("Invalid sequence name (SN): " + name);
+ return false;
+ }
+ // otherwise OK
+ return true;
+}
+
+// check that sequence length is within accepted range
+bool SamHeaderValidator::CheckLengthInRange(const string& length) {
+
+ // invalid if empty
+ if ( length.empty() ) {
+ AddError("Sequence entry (@SQ) is missing LN tag");
+ return false;
+ }
+
+ // convert string length to numeric
+ stringstream lengthStream(length);
+ unsigned int sequenceLength;
+ lengthStream >> sequenceLength;
+
+ // invalid if length outside accepted range
+ if ( sequenceLength < Constants::SAM_SQ_LENGTH_MIN || sequenceLength > Constants::SAM_SQ_LENGTH_MAX ) {
+ AddError("Sequence length (LN): " + length + " out of range");
+ return false;
+ }
+
+ // otherwise OK
+ return true;
+}
+
+// validate SAM header read group dictionary
+bool SamHeaderValidator::ValidateReadGroupDictionary(void) {
+
+ bool isValid = true;
+
+ // check for unique read group IDs & platform units
+ isValid &= ContainsUniqueIDsAndPlatformUnits();
+
+ // iterate over read groups
+ const SamReadGroupDictionary& readGroups = m_header.ReadGroups;
+ SamReadGroupConstIterator rgIter = readGroups.ConstBegin();
+ SamReadGroupConstIterator rgEnd = readGroups.ConstEnd();
+ for ( ; rgIter != rgEnd; ++rgIter ) {
+ const SamReadGroup& rg = (*rgIter);
+ isValid &= ValidateReadGroup(rg);
+ }
+
+ // return validation state
+ return isValid;
+}
+
+// make sure RG IDs and platform units are unique
+bool SamHeaderValidator::ContainsUniqueIDsAndPlatformUnits(void) {
+
+ bool isValid = true;
+ set<string> readGroupIds;
+ set<string> platformUnits;
+ set<string>::iterator idIter;
+ set<string>::iterator puIter;
+
+ // iterate over sequences
+ const SamReadGroupDictionary& readGroups = m_header.ReadGroups;
+ SamReadGroupConstIterator rgIter = readGroups.ConstBegin();
+ SamReadGroupConstIterator rgEnd = readGroups.ConstEnd();
+ for ( ; rgIter != rgEnd; ++rgIter ) {
+ const SamReadGroup& rg = (*rgIter);
+
+ // --------------------------------
+ // check for unique ID
+
+ // lookup read group ID
+ const string& id = rg.ID;
+ idIter = readGroupIds.find(id);
+
+ // error if found (duplicate entry)
+ if ( idIter != readGroupIds.end() ) {
+ AddError("Read group ID (ID): " + id + " is not unique");
+ isValid = false;
+ }
+
+ // otherwise ok, store id
+ readGroupIds.insert(id);
+
+ // --------------------------------
+ // check for unique platform unit
+
+ // lookup platform unit
+ const string& pu = rg.PlatformUnit;
+ puIter = platformUnits.find(pu);
+
+ // error if found (duplicate entry)
+ if ( puIter != platformUnits.end() ) {
+ AddError("Platform unit (PU): " + pu + " is not unique");
+ isValid = false;
+ }
+
+ // otherwise ok, store platform unit
+ platformUnits.insert(pu);
+ }
+
+ // return validation state
+ return isValid;
+}
+
+// validate SAM header read group entry
+bool SamHeaderValidator::ValidateReadGroup(const SamReadGroup& rg) {
+ bool isValid = true;
+ isValid &= CheckReadGroupID(rg.ID);
+ isValid &= CheckSequencingTechnology(rg.SequencingTechnology);
+ return isValid;
+}
+
+// make sure RG ID exists
+bool SamHeaderValidator::CheckReadGroupID(const string& id) {
+
+ // invalid if empty
+ if ( id.empty() ) {
+ AddError("Read group entry (@RG) is missing ID tag");
+ return false;
+ }
+
+ // otherwise OK
+ return true;
+}
+
+// make sure RG sequencing tech is one of the accepted keywords
+bool SamHeaderValidator::CheckSequencingTechnology(const string& technology) {
+
+ // if no technology provided, no problem, just return OK
+ if ( technology.empty() )
+ return true;
+
+ // if technology is valid keyword
+ if ( caseInsensitiveCompare(technology, Constants::SAM_RG_SEQTECHNOLOGY_CAPILLARY) ||
+ caseInsensitiveCompare(technology, Constants::SAM_RG_SEQTECHNOLOGY_HELICOS) ||
+ caseInsensitiveCompare(technology, Constants::SAM_RG_SEQTECHNOLOGY_ILLUMINA) ||
+ caseInsensitiveCompare(technology, Constants::SAM_RG_SEQTECHNOLOGY_IONTORRENT) ||
+ caseInsensitiveCompare(technology, Constants::SAM_RG_SEQTECHNOLOGY_LS454) ||
+ caseInsensitiveCompare(technology, Constants::SAM_RG_SEQTECHNOLOGY_PACBIO) ||
+ caseInsensitiveCompare(technology, Constants::SAM_RG_SEQTECHNOLOGY_SOLID)
+ )
+ {
+ return true;
+ }
+
+ // otherwise
+ AddError("Invalid read group sequencing platform (PL): " + technology);
+ return false;
+}
+
+// validate the SAM header "program chain"
+bool SamHeaderValidator::ValidateProgramChain(void) {
+ bool isValid = true;
+ isValid &= ContainsUniqueProgramIds();
+ isValid &= ValidatePreviousProgramIds();
+ return isValid;
+}
+
+// make sure all PG IDs are unique
+bool SamHeaderValidator::ContainsUniqueProgramIds(void) {
+
+ bool isValid = true;
+ set<string> programIds;
+ set<string>::iterator pgIdIter;
+
+ // iterate over program records
+ const SamProgramChain& programs = m_header.Programs;
+ SamProgramConstIterator pgIter = programs.ConstBegin();
+ SamProgramConstIterator pgEnd = programs.ConstEnd();
+ for ( ; pgIter != pgEnd; ++pgIter ) {
+ const SamProgram& pg = (*pgIter);
+
+ // lookup program ID
+ const string& pgId = pg.ID;
+ pgIdIter = programIds.find(pgId);
+
+ // error if found (duplicate entry)
+ if ( pgIdIter != programIds.end() ) {
+ AddError("Program ID (ID): " + pgId + " is not unique");
+ isValid = false;
+ }
+
+ // otherwise ok, store ID
+ programIds.insert(pgId);
+ }
+
+ // return validation state
+ return isValid;
+}
+
+// make sure that any PP tags present point to existing @PG IDs
+bool SamHeaderValidator::ValidatePreviousProgramIds(void) {
+
+ bool isValid = true;
+
+ // iterate over program records
+ const SamProgramChain& programs = m_header.Programs;
+ SamProgramConstIterator pgIter = programs.ConstBegin();
+ SamProgramConstIterator pgEnd = programs.ConstEnd();
+ for ( ; pgIter != pgEnd; ++pgIter ) {
+ const SamProgram& pg = (*pgIter);
+
+ // ignore record for validation if PreviousProgramID is empty
+ const string& ppId = pg.PreviousProgramID;
+ if ( ppId.empty() )
+ continue;
+
+ // see if program "chain" contains an entry for ppId
+ if ( !programs.Contains(ppId) ) {
+ AddError("PreviousProgramID (PP): " + ppId + " is not a known ID");
+ isValid = false;
+ }
+ }
+
+ // return validation state
+ return isValid;
+}
--- /dev/null
+// ***************************************************************************
+// SamHeaderValidator.h (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 6 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides functionality for validating SamHeader data
+// ***************************************************************************
+
+#ifndef SAM_HEADER_VALIDATOR_P_H
+#define SAM_HEADER_VALIDATOR_P_H
+
+// -------------
+// W A R N I N G
+// -------------
+//
+// This file is not part of the BamTools API. It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include <iostream>
+#include <string>
+#include <vector>
+
+namespace BamTools {
+
+class SamHeader;
+class SamReadGroup;
+class SamSequence;
+
+namespace Internal {
+
+class SamHeaderValidator {
+
+ // ctor & dtor
+ public:
+ SamHeaderValidator(const SamHeader& header);
+ ~SamHeaderValidator(void);
+
+ // SamHeaderValidator interface
+ public:
+
+ // prints error & warning messages
+ void PrintMessages(std::ostream& stream);
+
+ // validates SamHeader data, returns true/false accordingly
+ bool Validate(void);
+
+ // internal methods
+ private:
+
+ // validate header metadata
+ bool ValidateMetadata(void);
+ bool ValidateVersion(void);
+ bool ContainsOnlyDigits(const std::string& s);
+ bool ValidateSortOrder(void);
+ bool ValidateGroupOrder(void);
+
+ // validate sequence dictionary
+ bool ValidateSequenceDictionary(void);
+ bool ContainsUniqueSequenceNames(void);
+ bool CheckNameFormat(const std::string& name);
+ bool ValidateSequence(const SamSequence& seq);
+ bool CheckLengthInRange(const std::string& length);
+
+ // validate read group dictionary
+ bool ValidateReadGroupDictionary(void);
+ bool ContainsUniqueIDsAndPlatformUnits(void);
+ bool ValidateReadGroup(const SamReadGroup& rg);
+ bool CheckReadGroupID(const std::string& id);
+ bool CheckSequencingTechnology(const std::string& technology);
+
+ // validate program data
+ bool ValidateProgramChain(void);
+ bool ContainsUniqueProgramIds(void);
+ bool ValidatePreviousProgramIds(void);
+
+ // error reporting
+ void AddError(const std::string& message);
+ void AddWarning(const std::string& message);
+ void PrintErrorMessages(std::ostream& stream);
+ void PrintWarningMessages(std::ostream& stream);
+
+ // data members
+ private:
+
+ // SamHeader being validated
+ const SamHeader& m_header;
+
+ // error reporting helpers
+ static const std::string ERROR_PREFIX;
+ static const std::string WARN_PREFIX;
+ static const std::string NEWLINE;
+
+ // error reporting messages
+ std::vector<std::string> m_errorMessages;
+ std::vector<std::string> m_warningMessages;
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // SAM_HEADER_VALIDATOR_P_H
--- /dev/null
+// ***************************************************************************
+// SamHeaderVersion.h (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides functionality for comparing SAM header versions
+// *************************************************************************
+
+#ifndef SAM_HEADERVERSION_P_H
+#define SAM_HEADERVERSION_P_H
+
+// -------------
+// W A R N I N G
+// -------------
+//
+// This file is not part of the BamTools API. It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include "api/SamConstants.h"
+#include <sstream>
+#include <string>
+
+namespace BamTools {
+namespace Internal {
+
+class SamHeaderVersion {
+
+ // ctors & dtor
+ public:
+ SamHeaderVersion(void)
+ : m_majorVersion(0)
+ , m_minorVersion(0)
+ { }
+
+ explicit SamHeaderVersion(const std::string& version)
+ : m_majorVersion(0)
+ , m_minorVersion(0)
+ {
+ SetVersion(version);
+ }
+
+ SamHeaderVersion(const unsigned int& major, const unsigned int& minor)
+ : m_majorVersion(major)
+ , m_minorVersion(minor)
+ { }
+
+ ~SamHeaderVersion(void) {
+ m_majorVersion = 0;
+ m_minorVersion = 0;
+ }
+
+ // acess data
+ public:
+ unsigned int MajorVersion(void) const { return m_majorVersion; }
+ unsigned int MinorVersion(void) const { return m_minorVersion; }
+
+ void SetVersion(const std::string& version);
+ std::string ToString(void) const;
+
+ // data members
+ private:
+ unsigned int m_majorVersion;
+ unsigned int m_minorVersion;
+};
+
+inline
+void SamHeaderVersion::SetVersion(const std::string& version) {
+
+ // do nothing if version is empty
+ if ( !version.empty() ) {
+
+ std::stringstream versionStream("");
+
+ // do nothing if period not found
+ const size_t periodFound = version.find(Constants::SAM_PERIOD);
+ if ( periodFound != std::string::npos ) {
+
+ // store major version if non-empty and contains only digits
+ const std::string& majorVersion = version.substr(0, periodFound);
+ versionStream.str(majorVersion);
+ if ( !majorVersion.empty() ) {
+ const size_t nonDigitFound = majorVersion.find_first_not_of(Constants::SAM_DIGITS);
+ if ( nonDigitFound == std::string::npos )
+ versionStream >> m_majorVersion;
+ }
+
+ // store minor version if non-empty and contains only digits
+ const std::string& minorVersion = version.substr(periodFound + 1);
+ versionStream.str(minorVersion);
+ if ( !minorVersion.empty() ) {
+ const size_t nonDigitFound = minorVersion.find_first_not_of(Constants::SAM_DIGITS);
+ if ( nonDigitFound == std::string::npos )
+ versionStream >> m_minorVersion;
+ }
+ }
+ }
+}
+
+// -----------------------------------------------------
+// printing
+
+inline std::string SamHeaderVersion::ToString(void) const {
+ std::stringstream version;
+ version << m_majorVersion << Constants::SAM_PERIOD << m_minorVersion;
+ return version.str();
+}
+
+// -----------------------------------------------------
+// comparison operators
+
+inline bool operator==(const SamHeaderVersion& lhs, const SamHeaderVersion& rhs) {
+ return (lhs.MajorVersion() == rhs.MajorVersion()) &&
+ (lhs.MinorVersion() == rhs.MinorVersion());
+}
+
+inline bool operator<(const SamHeaderVersion& lhs, const SamHeaderVersion& rhs) {
+ if ( lhs.MajorVersion() == rhs.MajorVersion() )
+ return lhs.MinorVersion() < rhs.MinorVersion();
+ else
+ return lhs.MajorVersion() < rhs.MajorVersion();
+}
+
+inline bool operator> (const SamHeaderVersion& lhs, const SamHeaderVersion& rhs) { return rhs < lhs; }
+inline bool operator<=(const SamHeaderVersion& lhs, const SamHeaderVersion& rhs) { return !(lhs>rhs); }
+inline bool operator>=(const SamHeaderVersion& lhs, const SamHeaderVersion& rhs) { return !(lhs<rhs); }
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // SAM_HEADERVERSION_P_H
--- /dev/null
+// ***************************************************************************
+// BamException_p.cpp (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 25 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides a basic exception class for BamTools internals
+// ***************************************************************************
+
+#include "api/internal/utils/BamException_p.h"
+using namespace BamTools;
+using namespace BamTools::Internal;
+using namespace std;
+
+const string BamException::SEPARATOR = ": ";
--- /dev/null
+// ***************************************************************************
+// BamException_p.h (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 6 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides a basic exception class for BamTools internals
+// ***************************************************************************
+
+#ifndef BAMEXCEPTION_P_H
+#define BAMEXCEPTION_P_H
+
+// -------------
+// W A R N I N G
+// -------------
+//
+// This file is not part of the BamTools API. It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include <exception>
+#include <string>
+
+namespace BamTools {
+namespace Internal {
+
+class BamException : public std::exception {
+
+ public:
+ inline BamException(const std::string& where, const std::string& message)
+ : std::exception()
+ , m_errorString(where + SEPARATOR + message)
+ { }
+
+ inline ~BamException(void) throw() { }
+
+ inline const char* what(void) const throw() {
+ return m_errorString.c_str();
+ }
+
+ private:
+ std::string m_errorString;
+ static const std::string SEPARATOR;
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // BAMEXCEPTION_P_H
--- /dev/null
+# ==========================
+# BamTools CMakeLists.txt
+# (c) 2011 Derek Barnett
+#
+# src/api/internal/utils
+# ==========================
+
+set ( InternalUtilsDir "${InternalDir}/utils" )
+
+set ( InternalUtilsSources
+ ${InternalUtilsDir}/BamException_p.cpp
+
+ PARENT_SCOPE # <-- leave this last
+)
+