From aace28299671f066bf2135ef28652f24fa1d8d26 Mon Sep 17 00:00:00 2001 From: derek Date: Mon, 7 Nov 2011 12:50:10 -0500 Subject: [PATCH] Implemented basic TCP support layer * buffered I/O * design should support future expansion of protocols, proxies, etc * so far, HTTP range requests working well (on plain HTML text tests, not yet BAM-tested) --- src/api/BamAux.h | 17 +- src/api/CMakeLists.txt | 38 +- src/api/IBamIODevice.h | 11 +- src/api/internal/CMakeLists.txt | 25 ++ src/api/internal/bam/CMakeLists.txt | 19 + src/api/internal/index/CMakeLists.txt | 17 + src/api/internal/io/BamFtp_p.cpp | 13 +- src/api/internal/io/BamFtp_p.h | 4 +- src/api/internal/io/BamHttp_p.cpp | 356 +++++++++++++++- src/api/internal/io/BamHttp_p.h | 37 +- src/api/internal/io/BamPipe_p.cpp | 8 +- src/api/internal/io/BgzfStream_p.cpp | 82 ++-- src/api/internal/io/BgzfStream_p.h | 12 +- src/api/internal/io/ByteArray_p.cpp | 102 +++++ src/api/internal/io/ByteArray_p.h | 50 +++ src/api/internal/io/CMakeLists.txt | 52 +++ src/api/internal/io/HostAddress_p.cpp | 386 ++++++++++++++++++ src/api/internal/io/HostAddress_p.h | 81 ++++ src/api/internal/io/HostInfo_p.cpp | 213 ++++++++++ src/api/internal/io/HostInfo_p.h | 57 +++ src/api/internal/io/ILocalIODevice_p.cpp | 8 +- src/api/internal/io/ILocalIODevice_p.h | 4 +- src/api/internal/io/IRemoteIODevice_p.cpp | 0 src/api/internal/io/IRemoteIODevice_p.h | 0 src/api/internal/io/TcpSocketEngine_p.cpp | 183 +++++++++ src/api/internal/io/TcpSocketEngine_p.h | 77 ++++ .../internal/io/TcpSocketEngine_unix_p.cpp | 303 ++++++++++++++ src/api/internal/io/TcpSocket_p.cpp | 342 ++++++++++++++++ src/api/internal/io/TcpSocket_p.h | 122 ++++++ src/api/internal/sam/CMakeLists.txt | 17 + src/api/internal/utils/CMakeLists.txt | 15 + 31 files changed, 2548 insertions(+), 103 deletions(-) create mode 100644 src/api/internal/CMakeLists.txt create mode 100644 src/api/internal/bam/CMakeLists.txt create mode 100644 src/api/internal/index/CMakeLists.txt create mode 100644 src/api/internal/io/ByteArray_p.cpp create mode 100644 src/api/internal/io/ByteArray_p.h create mode 100644 src/api/internal/io/CMakeLists.txt create mode 100644 src/api/internal/io/HostAddress_p.cpp create mode 100644 src/api/internal/io/HostAddress_p.h create mode 100644 src/api/internal/io/HostInfo_p.cpp create mode 100644 src/api/internal/io/HostInfo_p.h delete mode 100644 src/api/internal/io/IRemoteIODevice_p.cpp delete mode 100644 src/api/internal/io/IRemoteIODevice_p.h create mode 100644 src/api/internal/io/TcpSocketEngine_p.cpp create mode 100644 src/api/internal/io/TcpSocketEngine_p.h create mode 100644 src/api/internal/io/TcpSocketEngine_unix_p.cpp create mode 100644 src/api/internal/io/TcpSocket_p.cpp create mode 100644 src/api/internal/io/TcpSocket_p.h create mode 100644 src/api/internal/sam/CMakeLists.txt create mode 100644 src/api/internal/utils/CMakeLists.txt diff --git a/src/api/BamAux.h b/src/api/BamAux.h index f451125..0dd3e99 100644 --- a/src/api/BamAux.h +++ b/src/api/BamAux.h @@ -2,7 +2,7 @@ // BamAux.h (c) 2009 Derek Barnett, Michael Str�mberg // Marth Lab, Department of Biology, Boston College // --------------------------------------------------------------------------- -// Last modified: 10 October 2011 (DB) +// Last modified: 25 October 2011 (DB) // --------------------------------------------------------------------------- // Provides data structures & utility methods that are used throughout the API. // *************************************************************************** @@ -11,6 +11,7 @@ #define BAMAUX_H #include "api/api_global.h" +#include #include #include #include @@ -441,13 +442,25 @@ API_EXPORT inline unsigned short UnpackUnsignedShort(char* buffer) { \internal */ struct RaiiBuffer { + + // data members + char* Buffer; + const size_t NumBytes; + + // ctor & dtor RaiiBuffer(const size_t n) : Buffer( new char[n]() ) + , NumBytes(n) { } + ~RaiiBuffer(void) { delete[] Buffer; } - char* Buffer; + + // add'l methods + void Clear(void) { + memset(Buffer, 0, NumBytes); + } }; } // namespace BamTools diff --git a/src/api/CMakeLists.txt b/src/api/CMakeLists.txt index d6a5b10..4500564 100644 --- a/src/api/CMakeLists.txt +++ b/src/api/CMakeLists.txt @@ -12,7 +12,10 @@ include_directories( ${BamTools_SOURCE_DIR}/src ) add_definitions( -DBAMTOOLS_API_LIBRARY ) # (for proper exporting of library symbols) add_definitions( -fPIC ) # (attempt to force PIC compiling on CentOS, not being set on shared libs by CMake) -# list of all BamTools API source (.cpp) files +# fetch all internal source files +add_subdirectory ( internal ) + +# make list of all API source files set( BamToolsAPISources BamAlignment.cpp BamMultiReader.cpp @@ -25,26 +28,7 @@ set( BamToolsAPISources SamReadGroupDictionary.cpp SamSequence.cpp SamSequenceDictionary.cpp - internal/bam/BamHeader_p.cpp - internal/bam/BamMultiReader_p.cpp - internal/bam/BamRandomAccessController_p.cpp - internal/bam/BamReader_p.cpp - internal/bam/BamWriter_p.cpp - internal/index/BamIndexFactory_p.cpp - internal/index/BamStandardIndex_p.cpp - internal/index/BamToolsIndex_p.cpp - internal/io/BamDeviceFactory_p.cpp - internal/io/BamFile_p.cpp - internal/io/BamFtp_p.cpp - internal/io/BamHttp_p.cpp - internal/io/BamPipe_p.cpp - internal/io/BgzfStream_p.cpp - internal/io/ILocalIODevice_p.cpp - internal/io/IRemoteIODevice_p.cpp - internal/sam/SamFormatParser_p.cpp - internal/sam/SamFormatPrinter_p.cpp - internal/sam/SamHeaderValidator_p.cpp - internal/utils/BamException_p.cpp + ${InternalSources} ) # create main BamTools API shared library @@ -58,8 +42,14 @@ set_target_properties( BamTools-static PROPERTIES OUTPUT_NAME "bamtools" ) set_target_properties( BamTools-static PROPERTIES PREFIX "lib" ) # link libraries with zlib automatically -target_link_libraries( BamTools z ) -target_link_libraries( BamTools-static z ) +if ( _WIN32 ) + set( APILibs z ws2_32 ) +else ( _WIN32 ) + set( APILibs z ) +endif ( _WIN32 ) + +target_link_libraries( BamTools ${APILibs} ) +target_link_libraries( BamTools-static ${APILibs} ) # set library install destinations install( TARGETS BamTools LIBRARY DESTINATION "lib/bamtools" RUNTIME DESTINATION "bin") @@ -69,7 +59,7 @@ install( TARGETS BamTools-static ARCHIVE DESTINATION "lib/bamtools") include(../ExportHeader.cmake) set(ApiIncludeDir "api") ExportHeader(APIHeaders api_global.h ${ApiIncludeDir}) -ExportHeader(APIHeaders BamAlgorithms.h ${ApiIncludeDir}) +ExportHeader(APIHeaders BamAlgorithms.h ${ApiIncludeDir}) ExportHeader(APIHeaders BamAlignment.h ${ApiIncludeDir}) ExportHeader(APIHeaders BamAux.h ${ApiIncludeDir}) ExportHeader(APIHeaders BamConstants.h ${ApiIncludeDir}) diff --git a/src/api/IBamIODevice.h b/src/api/IBamIODevice.h index b34e449..8e14827 100644 --- a/src/api/IBamIODevice.h +++ b/src/api/IBamIODevice.h @@ -26,9 +26,10 @@ namespace BamTools { class API_EXPORT IBamIODevice { // enums - public: enum OpenMode { NotOpen = 0 - , ReadOnly - , WriteOnly + public: enum OpenMode { NotOpen = 0x0000 + , ReadOnly = 0x0001 + , WriteOnly = 0x0002 + , ReadWrite = ReadOnly | WriteOnly }; // ctor & dtor @@ -42,10 +43,10 @@ class API_EXPORT IBamIODevice { virtual void Close(void) =0; virtual bool IsRandomAccess(void) const =0; virtual bool Open(const OpenMode mode) =0; - virtual size_t Read(char* data, const unsigned int numBytes) =0; + virtual int64_t Read(char* data, const unsigned int numBytes) =0; virtual bool Seek(const int64_t& position) =0; virtual int64_t Tell(void) const =0; - virtual size_t Write(const char* data, const unsigned int numBytes) =0; + virtual int64_t Write(const char* data, const unsigned int numBytes) =0; // default implementation provided virtual std::string GetErrorString(void); diff --git a/src/api/internal/CMakeLists.txt b/src/api/internal/CMakeLists.txt new file mode 100644 index 0000000..1e7b8dd --- /dev/null +++ b/src/api/internal/CMakeLists.txt @@ -0,0 +1,25 @@ +# ========================== +# BamTools CMakeLists.txt +# (c) 2011 Derek Barnett +# +# src/api/internal +# ========================== + +set ( InternalDir "internal" ) + +add_subdirectory ( bam ) +add_subdirectory ( index ) +add_subdirectory ( io ) +add_subdirectory ( sam ) +add_subdirectory ( utils ) + +set ( InternalSources + ${InternalBamSources} + ${InternalIndexSources} + ${InternalIOSources} + ${InternalSamSources} + ${InternalUtilsSources} + + PARENT_SCOPE # <-- leave this last + ) + diff --git a/src/api/internal/bam/CMakeLists.txt b/src/api/internal/bam/CMakeLists.txt new file mode 100644 index 0000000..64d8534 --- /dev/null +++ b/src/api/internal/bam/CMakeLists.txt @@ -0,0 +1,19 @@ +# ========================== +# BamTools CMakeLists.txt +# (c) 2011 Derek Barnett +# +# src/api/internal/bam +# ========================== + +set ( InternalBamDir "${InternalDir}/bam" ) + +set ( InternalBamSources + ${InternalBamDir}/BamHeader_p.cpp + ${InternalBamDir}/BamMultiReader_p.cpp + ${InternalBamDir}/BamRandomAccessController_p.cpp + ${InternalBamDir}/BamReader_p.cpp + ${InternalBamDir}/BamWriter_p.cpp + + PARENT_SCOPE # <-- leave this last +) + diff --git a/src/api/internal/index/CMakeLists.txt b/src/api/internal/index/CMakeLists.txt new file mode 100644 index 0000000..1c78cb9 --- /dev/null +++ b/src/api/internal/index/CMakeLists.txt @@ -0,0 +1,17 @@ +# ========================== +# BamTools CMakeLists.txt +# (c) 2011 Derek Barnett +# +# src/api/internal/index +# ========================== + +set ( InternalIndexDir "${InternalDir}/index" ) + +set ( InternalIndexSources + ${InternalIndexDir}/BamIndexFactory_p.cpp + ${InternalIndexDir}/BamStandardIndex_p.cpp + ${InternalIndexDir}/BamToolsIndex_p.cpp + + PARENT_SCOPE # <-- leave this last +) + diff --git a/src/api/internal/io/BamFtp_p.cpp b/src/api/internal/io/BamFtp_p.cpp index f94d4ac..10181cb 100644 --- a/src/api/internal/io/BamFtp_p.cpp +++ b/src/api/internal/io/BamFtp_p.cpp @@ -30,11 +30,17 @@ bool BamFtp::IsRandomAccess(void) const { } bool BamFtp::Open(const IBamIODevice::OpenMode mode) { - (void) mode; + + if ( mode != IBamIODevice::ReadOnly ) { + SetErrorString("BamFtp::Open", "writing on this device is not supported"); + return false; + } + + return true; } -size_t BamFtp::Read(char* data, const unsigned int numBytes) { +int64_t BamFtp::Read(char* data, const unsigned int numBytes) { (void)data; (void)numBytes; return 0; @@ -49,8 +55,9 @@ int64_t BamFtp::Tell(void) const { return -1; } -size_t BamFtp::Write(const char* data, const unsigned int numBytes) { +int64_t BamFtp::Write(const char* data, const unsigned int numBytes) { (void)data; (void)numBytes; + BT_ASSERT_X(false, "BamFtp::Write : write-mode not supported on this device"); return 0; } diff --git a/src/api/internal/io/BamFtp_p.h b/src/api/internal/io/BamFtp_p.h index 1f5ee0f..d049a10 100644 --- a/src/api/internal/io/BamFtp_p.h +++ b/src/api/internal/io/BamFtp_p.h @@ -38,10 +38,10 @@ class BamFtp : public IBamIODevice { void Close(void); bool IsRandomAccess(void) const; bool Open(const IBamIODevice::OpenMode mode); - size_t Read(char* data, const unsigned int numBytes); + int64_t Read(char* data, const unsigned int numBytes); bool Seek(const int64_t& position); int64_t Tell(void) const; - size_t Write(const char* data, const unsigned int numBytes); + int64_t Write(const char* data, const unsigned int numBytes); // internal methods private: diff --git a/src/api/internal/io/BamHttp_p.cpp b/src/api/internal/io/BamHttp_p.cpp index 2892829..04fb8e2 100644 --- a/src/api/internal/io/BamHttp_p.cpp +++ b/src/api/internal/io/BamHttp_p.cpp @@ -2,27 +2,138 @@ // BamHttp_p.cpp (c) 2011 Derek Barnett // Marth Lab, Department of Biology, Boston College // --------------------------------------------------------------------------- -// Last modified: 25 October 2011 (DB) +// Last modified: 7 November 2011 (DB) // --------------------------------------------------------------------------- // Provides reading/writing of BAM files on HTTP server // *************************************************************************** +#include "api/BamAux.h" #include "api/internal/io/BamHttp_p.h" +#include "api/internal/io/HttpHeader_p.h" +#include "api/internal/io/TcpSocket_p.h" using namespace BamTools; using namespace BamTools::Internal; +#include +#include +#include +#include using namespace std; +namespace BamTools { +namespace Internal { + +// ----------- +// constants +// ----------- + +static const string HTTP_PORT = "80"; +static const string HTTP_PREFIX = "http://"; +static const size_t HTTP_PREFIX_LENGTH = 7; +static const char COLON_CHAR = ':'; +static const char SLASH_CHAR = '/'; + +// ----------------- +// utility methods +// ----------------- + +static inline +string toLower(const string& s) { + string out; + const size_t sSize = s.size(); + out.reserve(sSize); + for ( size_t i = 0; i < sSize; ++i ) + out[i] = tolower(s[i]); + return out; +} + +} // namespace Internal +} // namespace BamTools + +// ------------------------ +// BamHttp implementation +// ------------------------ + BamHttp::BamHttp(const string& url) : IBamIODevice() + , m_socket(new TcpSocket) + , m_port(HTTP_PORT) + , m_request(0) + , m_response(0) + , m_isUrlParsed(false) + , m_filePosition(-1) + , m_endRangeFilePosition(-1) { - BT_ASSERT_X(false, "BamHttp not yet implemented"); + ParseUrl(url); } -BamHttp::~BamHttp(void) { } +BamHttp::~BamHttp(void) { + + // close connection & clean up + Close(); + if ( m_socket ) + delete m_socket; +} void BamHttp::Close(void) { - return ; + + // disconnect socket + m_socket->DisconnectFromHost(); + + // clean up request & response + if ( m_request ) { + delete m_request; + m_request = 0; + } + if ( m_response ) { + delete m_response; + m_response = 0; + } + + // reset state - necessary?? + m_isUrlParsed = false; + m_filePosition = -1; + m_endRangeFilePosition = -1; +} + +bool BamHttp::ConnectSocket(void) { + + BT_ASSERT_X(m_socket, "null socket?"); + + // any state checks, etc? + if ( !m_socket->ConnectToHost(m_hostname, m_port, m_mode) ) { + // TODO: set error string + return false; + } + + // attempt initial request + m_filePosition = 0; + m_endRangeFilePosition = -1; + if ( !SendRequest() ) { + // TODO: set error string + Close(); + return false; + } + + // wait for response from server + if ( !ReceiveResponse() ) { + // TODO: set error string + Close(); + return false; + } + + // return success + return true; +} + +bool BamHttp::EnsureSocketConnection(void) { + if ( m_socket->IsConnected() ) + return true; + else return ConnectSocket(); +} + +bool BamHttp::IsOpen(void) const { + return IBamIODevice::IsOpen() && m_isUrlParsed; } bool BamHttp::IsRandomAccess(void) const { @@ -30,27 +141,248 @@ bool BamHttp::IsRandomAccess(void) const { } bool BamHttp::Open(const IBamIODevice::OpenMode mode) { - (void) mode; + + // BamHttp only supports read-only access + if ( mode != IBamIODevice::ReadOnly ) { + SetErrorString("BamHttp::Open", "writing on this device is not supported"); + return false; + } + m_mode = mode; + + // attempt connection to socket + if ( !ConnectSocket() ) { + SetErrorString("BamHttp::Open", m_socket->GetErrorString()); + return false; + } + + // return success return true; } -size_t BamHttp::Read(char* data, const unsigned int numBytes) { - (void)data; - (void)numBytes; - return 0; +void BamHttp::ParseUrl(const string& url) { + + // make sure url starts with "http://", case-insensitive + string tempUrl(url); + toLower(tempUrl); + const size_t prefixFound = tempUrl.find(HTTP_PREFIX); + if ( prefixFound == string::npos ) + return; + + // find end of host name portion (first '/' hit after the prefix) + const size_t firstSlashFound = tempUrl.find(SLASH_CHAR, HTTP_PREFIX_LENGTH); + if ( firstSlashFound == string::npos ) { + ; // no slash found... no filename given along with host? + } + + // fetch hostname (check for proxy port) + string hostname = tempUrl.substr(HTTP_PREFIX_LENGTH, (firstSlashFound - HTTP_PREFIX_LENGTH)); + const size_t colonFound = hostname.find(COLON_CHAR); + if ( colonFound != string::npos ) { + ; // TODO: handle proxy port (later, just skip for now) + } else { + m_hostname = hostname; + m_port = HTTP_PORT; + } + + // store remainder of URL as filename (must be non-empty) + string filename = tempUrl.substr(firstSlashFound); + if ( filename.empty() ) + return; + m_filename = filename; + + // set parsed OK flag + m_isUrlParsed = true; +} + +int64_t BamHttp::Read(char* data, const unsigned int numBytes) { + + // if BamHttp not in a valid state + if ( !IsOpen() ) + return -1; + + // read until hit desired @numBytes + int64_t bytesReadSoFar = 0; + while ( bytesReadSoFar < numBytes ) { + + // calculate number of bytes we're going to try to read this iteration + const size_t remainingBytes = ( numBytes - bytesReadSoFar ); + + // if socket has access to entire file contents + // i.e. we received response with full data (status code == 200) + if ( !m_endRangeFilePosition >= 0 ) { + + // try to read 'remainingBytes' from socket + const int64_t socketBytesRead = ReadFromSocket(data+bytesReadSoFar, remainingBytes); + if ( socketBytesRead < 0 ) + return -1; + bytesReadSoFar += socketBytesRead; + } + + // socket has access to a range of data (might already be in buffer) + // i.e. we received response with partial data (status code == 206) + else { + + // there is data left from last request + if ( m_endRangeFilePosition > m_filePosition ) { + + // try to read either the total 'remainingBytes' or whatever we have remaining from last request range + const size_t rangeRemainingBytes = m_endRangeFilePosition - m_filePosition; + const size_t bytesToRead = std::min(remainingBytes, rangeRemainingBytes); + const int64_t socketBytesRead = ReadFromSocket(data+bytesReadSoFar, bytesToRead); + if ( socketBytesRead < 0 ) + return -1; + bytesReadSoFar += socketBytesRead; + } + + // otherwise, this is a 1st-time read OR we already read everything from the last GET request + else { + + // request for next range + if ( !SendRequest(remainingBytes) || !ReceiveResponse() ) { + Close(); + return -1; + } + } + } + } + + // return actual number bytes successfully read + return bytesReadSoFar; +} + +int64_t BamHttp::ReadFromSocket(char* data, const unsigned int maxNumBytes) { + + // try to read 'remainingBytes' from socket + const int64_t numBytesRead = m_socket->Read(data, maxNumBytes); + if ( numBytesRead < 0 ) + return -1; + m_filePosition += numBytesRead; + return numBytesRead; +} + +bool BamHttp::ReceiveResponse(void) { + + // clear any prior response + if ( m_response ) + delete m_response; + + // make sure we're connected + if ( !EnsureSocketConnection() ) + return false; + + // read response header from socket + RaiiBuffer header(0x10000); + size_t l = 0; + while ( m_socket->Read(header.Buffer + l, 1) >= 0 ) { + if ( header.Buffer[l] == '\n' && l >= 3 ) { + if (strncmp(header.Buffer + l - 3, "\r\n\r\n", 4) == 0) + break; + } + ++l; + } + string responseHeader; + responseHeader.resize(l+1); + for ( size_t i = 0; i < l; ++i ) + responseHeader[i] = header.Buffer[i]; + + if ( responseHeader.empty() ) { + // TODO: set error string + Close(); + return false; + } + + // create response from header text + m_response = new HttpResponseHeader(responseHeader); + if ( !m_response->IsValid() ) { + // TODO: set error string + Close(); + return false; + } + + // if we got range response as requested + if ( m_response->GetStatusCode() == 206 ) + return true; + + // if we got the full file contents instead of range + else if ( m_response->GetStatusCode() == 200 ) { + + // skip up to current file position + RaiiBuffer tmp(0x8000); + int64_t numBytesRead = 0; + while ( numBytesRead < m_filePosition ) { + int64_t result = ReadFromSocket(tmp.Buffer, 0x8000); + if ( result < 0 ) { + Close(); + return false; + } + numBytesRead += result; + } + + // return success + return true; + } + + // on any other reponse status + // TODO: set error string + Close(); + return false; } bool BamHttp::Seek(const int64_t& position) { - (void)position; + + // if HTTP device not in a valid state + if ( !IsOpen() ) { + // TODO: set error string + return false; + } + + // discard socket's buffer contents, update positions, & return success + m_socket->ClearBuffer(); + m_filePosition = position; + m_endRangeFilePosition = -1; return true; } +bool BamHttp::SendRequest(const size_t numBytes) { + + // remove any currently active request + if ( m_request ) + delete m_request; + + // create range string + m_endRangeFilePosition = m_filePosition + numBytes; + stringstream range("bytes="); + range << m_filePosition << "-" << m_endRangeFilePosition; + + // make sure we're connected + if ( !EnsureSocketConnection() ) + return false; + + // create request + m_request = new HttpRequestHeader("GET", m_filename); + m_request->SetField("Host", m_hostname); + m_request->SetField("Range", range.str()); + + // write request to socket + const string requestHeader = m_request->ToString(); + const size_t headerSize = requestHeader.size(); + return ( WriteToSocket(requestHeader.c_str(), headerSize) == headerSize ); +} + int64_t BamHttp::Tell(void) const { - return -1; + return ( IsOpen() ? m_filePosition : -1 ); } -size_t BamHttp::Write(const char* data, const unsigned int numBytes) { +int64_t BamHttp::Write(const char* data, const unsigned int numBytes) { (void)data; (void)numBytes; + BT_ASSERT_X(false, "BamHttp::Write : write-mode not supported on this device"); return 0; } + +int64_t BamHttp::WriteToSocket(const char* data, const unsigned int numBytes) { + if ( !EnsureSocketConnection() ) + return false; + m_socket->ClearBuffer(); + return m_socket->Write(data, numBytes); +} diff --git a/src/api/internal/io/BamHttp_p.h b/src/api/internal/io/BamHttp_p.h index 38e94b7..e48693e 100644 --- a/src/api/internal/io/BamHttp_p.h +++ b/src/api/internal/io/BamHttp_p.h @@ -2,7 +2,7 @@ // BamHttp_p.h (c) 2011 Derek Barnett // Marth Lab, Department of Biology, Boston College // --------------------------------------------------------------------------- -// Last modified: 10 October 2011 (DB) +// Last modified: 7 November 2011 (DB) // --------------------------------------------------------------------------- // Provides reading/writing of BAM files on HTTP server // *************************************************************************** @@ -26,6 +26,10 @@ namespace BamTools { namespace Internal { +class HttpRequestHeader; +class HttpResponseHeader; +class TcpSocket; + class BamHttp : public IBamIODevice { // ctor & dtor @@ -36,18 +40,45 @@ class BamHttp : public IBamIODevice { // IBamIODevice implementation public: void Close(void); + bool IsOpen(void) const ; bool IsRandomAccess(void) const; bool Open(const IBamIODevice::OpenMode mode); - size_t Read(char* data, const unsigned int numBytes); + int64_t Read(char* data, const unsigned int numBytes); bool Seek(const int64_t& position); int64_t Tell(void) const; - size_t Write(const char* data, const unsigned int numBytes); + int64_t Write(const char* data, const unsigned int numBytes); // internal methods private: + bool ConnectSocket(void); + bool EnsureSocketConnection(void); + void ParseUrl(const std::string& url); + int64_t ReadFromSocket(char* data, const unsigned int numBytes); + bool ReceiveResponse(void); + bool SendRequest(const size_t numBytes = 0); + int64_t WriteToSocket(const char* data, const unsigned int numBytes); // data members private: + + // our main socket + TcpSocket* m_socket; + + // our connection data + std::string m_hostname; + std::string m_port; + std::string m_filename; + + // our last (active) request & response info + HttpRequestHeader* m_request; + HttpResponseHeader* m_response; + + // internal state flags + bool m_isUrlParsed; + + // file position + int64_t m_filePosition; + int64_t m_endRangeFilePosition; }; } // namespace Internal diff --git a/src/api/internal/io/BamPipe_p.cpp b/src/api/internal/io/BamPipe_p.cpp index 40f1e10..92cf798 100644 --- a/src/api/internal/io/BamPipe_p.cpp +++ b/src/api/internal/io/BamPipe_p.cpp @@ -34,14 +34,18 @@ bool BamPipe::Open(const IBamIODevice::OpenMode mode) { else if ( mode == IBamIODevice::WriteOnly ) m_stream = freopen(0, "wb", stdout); else { - SetErrorString("BamPipe::Open", "unknown open mode requested"); + const string errorType = string( mode == IBamIODevice::ReadWrite ? "unsupported" + : "unknown" ); + const string message = errorType + " open mode requested"; + SetErrorString("BamPipe::Open", message); return false; } // check that we obtained a valid FILE* if ( m_stream == 0 ) { const string message_base = string("could not open handle on "); - const string message = message_base + ( (mode == IBamIODevice::ReadOnly) ? "stdin" : "stdout" ); + const string message = message_base + ( (mode == IBamIODevice::ReadOnly) ? "stdin" + : "stdout" ); SetErrorString("BamPipe::Open", message); return false; } diff --git a/src/api/internal/io/BgzfStream_p.cpp b/src/api/internal/io/BgzfStream_p.cpp index 8b1aff6..7f73d67 100644 --- a/src/api/internal/io/BgzfStream_p.cpp +++ b/src/api/internal/io/BgzfStream_p.cpp @@ -25,24 +25,6 @@ using namespace BamTools::Internal; #include using namespace std; -// ---------------------------- -// RaiiWrapper implementation -// ---------------------------- - -BgzfStream::RaiiWrapper::RaiiWrapper(void) { - CompressedBlock = new char[Constants::BGZF_MAX_BLOCK_SIZE]; - UncompressedBlock = new char[Constants::BGZF_DEFAULT_BLOCK_SIZE]; -} - -BgzfStream::RaiiWrapper::~RaiiWrapper(void) { - - // clean up buffers - delete[] CompressedBlock; - delete[] UncompressedBlock; - CompressedBlock = 0; - UncompressedBlock = 0; -} - // --------------------------- // BgzfStream implementation // --------------------------- @@ -54,6 +36,8 @@ BgzfStream::BgzfStream(void) , m_blockAddress(0) , m_isWriteCompressed(true) , m_device(0) + , m_uncompressedBlock(Constants::BGZF_DEFAULT_BLOCK_SIZE) + , m_compressedBlock(Constants::BGZF_MAX_BLOCK_SIZE) { } // destructor @@ -84,7 +68,7 @@ void BgzfStream::Close(void) { if ( m_device->IsOpen() && (m_device->Mode() == IBamIODevice::WriteOnly) ) { FlushBlock(); const size_t blockLength = DeflateBlock(); - m_device->Write(Resources.CompressedBlock, blockLength); + m_device->Write(m_compressedBlock.Buffer, blockLength); } // close device @@ -92,6 +76,10 @@ void BgzfStream::Close(void) { delete m_device; m_device = 0; + // ensure our buffers are cleared out + m_uncompressedBlock.Clear(); + m_compressedBlock.Clear(); + // reset state m_blockLength = 0; m_blockOffset = 0; @@ -103,7 +91,7 @@ void BgzfStream::Close(void) { size_t BgzfStream::DeflateBlock(void) { // initialize the gzip header - char* buffer = Resources.CompressedBlock; + char* buffer = m_compressedBlock.Buffer; memset(buffer, 0, 18); buffer[0] = Constants::GZIP_ID1; buffer[1] = Constants::GZIP_ID2; @@ -129,7 +117,7 @@ size_t BgzfStream::DeflateBlock(void) { z_stream zs; zs.zalloc = NULL; zs.zfree = NULL; - zs.next_in = (Bytef*)Resources.UncompressedBlock; + zs.next_in = (Bytef*)m_uncompressedBlock.Buffer; zs.avail_in = inputLength; zs.next_out = (Bytef*)&buffer[Constants::BGZF_BLOCK_HEADER_LENGTH]; zs.avail_out = bufferSize - @@ -187,7 +175,7 @@ size_t BgzfStream::DeflateBlock(void) { // store the CRC32 checksum uint32_t crc = crc32(0, NULL, 0); - crc = crc32(crc, (Bytef*)Resources.UncompressedBlock, inputLength); + crc = crc32(crc, (Bytef*)m_uncompressedBlock.Buffer, inputLength); BamTools::PackUnsignedInt(&buffer[compressedLength - 8], crc); BamTools::PackUnsignedInt(&buffer[compressedLength - 4], inputLength); @@ -196,7 +184,7 @@ size_t BgzfStream::DeflateBlock(void) { if ( remaining > 0 ) { if ( remaining > inputLength ) throw BamException("BgzfStream::DeflateBlock", "after deflate, remainder too large"); - memcpy(Resources.UncompressedBlock, Resources.UncompressedBlock + inputLength, remaining); + memcpy(m_uncompressedBlock.Buffer, m_uncompressedBlock.Buffer + inputLength, remaining); } // update block data @@ -218,8 +206,16 @@ void BgzfStream::FlushBlock(void) { const size_t blockLength = DeflateBlock(); // flush the data to our output device - const size_t numBytesWritten = m_device->Write(Resources.CompressedBlock, blockLength); - if ( numBytesWritten != blockLength ) { + const int64_t numBytesWritten = m_device->Write(m_compressedBlock.Buffer, blockLength); + + // check for device error + if ( numBytesWritten < 0 ) { + const string message = string("device error: ") + m_device->GetErrorString(); + throw BamException("BgzfStream::FlushBlock", message); + } + + // check that we wrote expected numBytes + if ( numBytesWritten != static_cast(blockLength) ) { stringstream s(""); s << "expected to write " << blockLength << " bytes during flushing, but wrote " << numBytesWritten; @@ -238,9 +234,9 @@ size_t BgzfStream::InflateBlock(const size_t& blockLength) { z_stream zs; zs.zalloc = NULL; zs.zfree = NULL; - zs.next_in = (Bytef*)Resources.CompressedBlock + 18; + zs.next_in = (Bytef*)m_compressedBlock.Buffer + 18; zs.avail_in = blockLength - 16; - zs.next_out = (Bytef*)Resources.UncompressedBlock; + zs.next_out = (Bytef*)m_uncompressedBlock.Buffer; zs.avail_out = Constants::BGZF_DEFAULT_BLOCK_SIZE; // initialize @@ -319,7 +315,7 @@ size_t BgzfStream::Read(char* data, const size_t dataLength) { // copy data from uncompressed source buffer into data destination buffer const size_t copyLength = min( (dataLength-numBytesRead), (size_t)bytesAvailable ); - memcpy(output, Resources.UncompressedBlock + m_blockOffset, copyLength); + memcpy(output, m_uncompressedBlock.Buffer + m_blockOffset, copyLength); // update counters m_blockOffset += copyLength; @@ -349,7 +345,13 @@ void BgzfStream::ReadBlock(void) { // read block header from file char header[Constants::BGZF_BLOCK_HEADER_LENGTH]; - size_t numBytesRead = m_device->Read(header, Constants::BGZF_BLOCK_HEADER_LENGTH); + int64_t numBytesRead = m_device->Read(header, Constants::BGZF_BLOCK_HEADER_LENGTH); + + // check for device error + if ( numBytesRead < 0 ) { + const string message = string("device error: ") + m_device->GetErrorString(); + throw BamException("BgzfStream::ReadBlock", message); + } // if block header empty if ( numBytesRead == 0 ) { @@ -358,7 +360,7 @@ void BgzfStream::ReadBlock(void) { } // if block header invalid size - if ( numBytesRead != Constants::BGZF_BLOCK_HEADER_LENGTH ) + if ( numBytesRead != static_cast(Constants::BGZF_BLOCK_HEADER_LENGTH) ) throw BamException("BgzfStream::ReadBlock", "invalid block header size"); // validate block header contents @@ -367,22 +369,30 @@ void BgzfStream::ReadBlock(void) { // copy header contents to compressed buffer const size_t blockLength = BamTools::UnpackUnsignedShort(&header[16]) + 1; - memcpy(Resources.CompressedBlock, header, Constants::BGZF_BLOCK_HEADER_LENGTH); + memcpy(m_compressedBlock.Buffer, header, Constants::BGZF_BLOCK_HEADER_LENGTH); // read remainder of block const size_t remaining = blockLength - Constants::BGZF_BLOCK_HEADER_LENGTH; - numBytesRead = m_device->Read(&Resources.CompressedBlock[Constants::BGZF_BLOCK_HEADER_LENGTH], remaining); - if ( numBytesRead != remaining ) + numBytesRead = m_device->Read(&m_compressedBlock.Buffer[Constants::BGZF_BLOCK_HEADER_LENGTH], remaining); + + // check for device error + if ( numBytesRead < 0 ) { + const string message = string("device error: ") + m_device->GetErrorString(); + throw BamException("BgzfStream::ReadBlock", message); + } + + // check that we read in expected numBytes + if ( numBytesRead != static_cast(remaining) ) throw BamException("BgzfStream::ReadBlock", "could not read data from block"); // decompress block data - numBytesRead = InflateBlock(blockLength); + const size_t newBlockLength = InflateBlock(blockLength); // update block data if ( m_blockLength != 0 ) m_blockOffset = 0; m_blockAddress = blockAddress; - m_blockLength = numBytesRead; + m_blockLength = newBlockLength; } // seek to position in BGZF file @@ -442,7 +452,7 @@ size_t BgzfStream::Write(const char* data, const size_t dataLength) { // copy data contents to uncompressed output buffer unsigned int copyLength = min(blockLength - m_blockOffset, dataLength - numBytesWritten); - char* buffer = Resources.UncompressedBlock; + char* buffer = m_uncompressedBlock.Buffer; memcpy(buffer + m_blockOffset, input, copyLength); // update counter diff --git a/src/api/internal/io/BgzfStream_p.h b/src/api/internal/io/BgzfStream_p.h index 88d7472..47b3609 100644 --- a/src/api/internal/io/BgzfStream_p.h +++ b/src/api/internal/io/BgzfStream_p.h @@ -2,7 +2,7 @@ // BgzfStream_p.h (c) 2011 Derek Barnett // Marth Lab, Department of Biology, Boston College // --------------------------------------------------------------------------- -// Last modified: 10 October 2011(DB) +// Last modified: 25 October 2011(DB) // --------------------------------------------------------------------------- // Based on BGZF routines developed at the Broad Institute. // Provides the basic functionality for reading & writing BGZF files @@ -23,6 +23,7 @@ // We mean it. #include "api/api_global.h" +#include "api/BamAux.h" #include "api/IBamIODevice.h" #include @@ -82,13 +83,8 @@ class BgzfStream { bool m_isWriteCompressed; IBamIODevice* m_device; - struct RaiiWrapper { - RaiiWrapper(void); - ~RaiiWrapper(void); - char* UncompressedBlock; - char* CompressedBlock; - }; - RaiiWrapper Resources; + RaiiBuffer m_uncompressedBlock; + RaiiBuffer m_compressedBlock; }; } // namespace Internal diff --git a/src/api/internal/io/ByteArray_p.cpp b/src/api/internal/io/ByteArray_p.cpp new file mode 100644 index 0000000..aa74f28 --- /dev/null +++ b/src/api/internal/io/ByteArray_p.cpp @@ -0,0 +1,102 @@ +#include "api/internal/io/ByteArray_p.h" +using namespace BamTools; +using namespace BamTools::Internal; + +#include +#include +using namespace std; + +// -------------------------- +// ByteArray implementation +// -------------------------- + +ByteArray::ByteArray(void) + : m_data() +{ } + +ByteArray::ByteArray(const string& value) + : m_data(value.begin(), value.end()) +{ } + +ByteArray::ByteArray(const vector& value) + : m_data(value) +{ } + +ByteArray::ByteArray(const char* value, size_t n) { + const string s(value, n); + m_data.assign(s.begin(), s.end()); +} + +ByteArray::ByteArray(const ByteArray& other) + : m_data(other.m_data) +{ } + +ByteArray::~ByteArray(void) { } + +ByteArray& ByteArray::operator=(const ByteArray& other) { + m_data = other.m_data; + return *this; +} + +void ByteArray::Clear(void) { + m_data.clear(); +} + +const char* ByteArray::ConstData(void) const { + return &m_data[0]; +} + +char* ByteArray::Data(void) { + return &m_data[0]; +} + +const char& ByteArray::operator[](size_t i) const { + return m_data[i]; +} + +char& ByteArray::operator[](size_t i) { + return m_data[i]; +} + +size_t ByteArray::IndexOf(const char c, const size_t from, const size_t to) const { + const size_t size = ( (to == 0 ) ? m_data.size() : to); + for ( size_t i = from; i < size; ++i ) { + if ( m_data.at(i) == c ) + return i; + } + return m_data.size(); +} + +ByteArray& ByteArray::Remove(size_t from, size_t n) { + + // if 'from' outside range, just return + const size_t originalSize = m_data.size(); + if ( from >= originalSize ) + return *this; + + // if asked to clip from 'from' to end (or beyond), simply resize + if ( from + n >= originalSize ) + Resize(from); + + // otherwise, shift data & resize + else { + memmove( &m_data[from], &m_data[from+n], (originalSize-from-n) ); + Resize(originalSize - n); + } + + // return reference to modified byte array + return *this; +} + +void ByteArray::Resize(size_t n) { + m_data.resize(n, 0); +} + +size_t ByteArray::Size(void) const { + return m_data.size(); +} + +void ByteArray::Squeeze(void) { + vector t(m_data); + t.swap(m_data); +} diff --git a/src/api/internal/io/ByteArray_p.h b/src/api/internal/io/ByteArray_p.h new file mode 100644 index 0000000..89d9e0f --- /dev/null +++ b/src/api/internal/io/ByteArray_p.h @@ -0,0 +1,50 @@ +#ifndef BYTEARRAY_P_H +#define BYTEARRAY_P_H + +#include "api/api_global.h" +#include +#include + +namespace BamTools { +namespace Internal { + +// provides a wrapper around a byte vector +class ByteArray { + + // ctors & dtor + public: + ByteArray(void); + ByteArray(const std::string& value); + ByteArray(const std::vector& value); + ByteArray(const char* value, size_t n); + ByteArray(const ByteArray& other); + ~ByteArray(void); + + ByteArray& operator=(const ByteArray& other); + + // ByteArray interface + public: + + // data access + const char* ConstData(void) const; + char* Data(void); + const char& operator[](size_t i) const; + char& operator[](size_t i); + + // byte array manipulation + void Clear(void); + size_t IndexOf(const char c, const size_t from = 0, const size_t to = 0) const; + ByteArray& Remove(size_t from, size_t n); + void Resize(size_t n); + size_t Size(void) const; + void Squeeze(void); + + // data members + private: + std::vector m_data; +}; + +} // namespace Internal +} // namespace BamTools + +#endif // BYTEARRAY_P_H diff --git a/src/api/internal/io/CMakeLists.txt b/src/api/internal/io/CMakeLists.txt new file mode 100644 index 0000000..d9da416 --- /dev/null +++ b/src/api/internal/io/CMakeLists.txt @@ -0,0 +1,52 @@ +# ========================== +# BamTools CMakeLists.txt +# (c) 2011 Derek Barnett +# +# src/api/internal/io +# ========================== + +set ( InternalIODir "${InternalDir}/io" ) + +#-------------------------- +# platform-independent IO +#-------------------------- +set ( CommonIOSources + ${InternalIODir}/BamDeviceFactory_p.cpp + ${InternalIODir}/BamFile_p.cpp + ${InternalIODir}/BamFtp_p.cpp + ${InternalIODir}/BamHttp_p.cpp + ${InternalIODir}/BamPipe_p.cpp + ${InternalIODir}/BgzfStream_p.cpp + ${InternalIODir}/ByteArray_p.cpp + ${InternalIODir}/HostAddress_p.cpp + ${InternalIODir}/HostInfo_p.cpp + ${InternalIODir}/HttpHeader_p.cpp + ${InternalIODir}/ILocalIODevice_p.cpp + ${InternalIODir}/RollingBuffer_p.cpp + ${InternalIODir}/TcpSocket_p.cpp + ${InternalIODir}/TcpSocketEngine_p.cpp +) + +#------------------------ +# platform-dependent IO +#------------------------ +if ( _WIN32 ) + set ( PlatformIOSources + ${InternalIODir}/TcpSocketEngine_win_p.cpp + ) +else ( _WIN32 ) + set ( PlatformIOSources + ${InternalIODir}/TcpSocketEngine_unix_p.cpp + ) +endif ( _WIN32 ) + +#--------------------------- +# make build-specific list +#--------------------------- +set ( InternalIOSources + ${CommonIOSources} + ${PlatformIOSources} + + PARENT_SCOPE # <-- leave this last +) + diff --git a/src/api/internal/io/HostAddress_p.cpp b/src/api/internal/io/HostAddress_p.cpp new file mode 100644 index 0000000..aa3c9a3 --- /dev/null +++ b/src/api/internal/io/HostAddress_p.cpp @@ -0,0 +1,386 @@ +#include "api/internal/io/HostAddress_p.h" +using namespace BamTools; +using namespace BamTools::Internal; + +#include +#include +#include +#include +using namespace std; + +// ------------------------ +// static utility methods +// ------------------------ + +namespace BamTools { +namespace Internal { + +// convenience 'isalpha' wrapper +static inline +bool isAlpha(char c) { + return ( isalpha(c) != 0 ); +} + +// split a string into fields, on delimiter character +static inline +vector split(const string& source, char delim) { + stringstream ss(source); + string field; + vector fields; + while ( getline(ss, field, delim) ) + fields.push_back(field); + return fields; +} + +// return number of occurrences of @pattern in @source +static inline +uint8_t countHits(const string& source, const string& pattern) { + + uint8_t count(0); + size_t found = source.find(pattern); + while ( found != string::npos ) { + ++count; + found = source.find(pattern, found+1); + } + return count; +} + +static +bool parseIp4(const string& address, uint32_t& maybeIp4 ) { + + // split IP address into string fields + vector addressFields = split(address, '.'); + if ( addressFields.size() != 4 ) + return false; + + // convert each field to integer value + uint32_t ipv4(0); + for ( uint8_t i = 0; i < 4; ++i ) { + + int value = atoi( addressFields.at(i).c_str() ); + if ( value < 0 || value > 255 ) + return false; + + // append byte value + ipv4 <<= 8; + ipv4 += value; + } + + // store 32-bit IP address & return success + maybeIp4 = ipv4; + return true; +} + +static +bool parseIp6(const string& address, uint8_t* maybeIp6 ) { + + string tmp = address; + + // look for '%' char (if found, lop off that part of address) + // we're going to ignore any link-local zone index, for now at least + const size_t percentFound = tmp.rfind('%'); + if ( percentFound != string::npos ) + tmp = tmp.substr(0, percentFound); + + // split IP address into string fields + vector fields = split(tmp, ':'); + const uint8_t numFields = fields.size(); + if ( numFields < 3 || numFields > 8 ) + return false; + + // get number of '::' separators + const uint8_t numColonColons = countHits(tmp, "::"); + if ( numFields == 8 && numColonColons > 1 ) + return false; + + // check valid IPv6 'compression' + // must be valid 'pure' IPv6 or mixed IPv4/6 notation + const size_t dotFound = tmp.find('.'); + const bool isMixed = ( dotFound != string::npos ); + if ( numColonColons != 1 && (numFields < (isMixed ? 7 : 8)) ) + return false; + + // iterate over provided fields + size_t index = 16; + size_t fillCount = 9 - numFields; + for ( int8_t i = numFields - 1; i >= 0; --i ) { + if ( index == 0 ) + return false; + const string& field = fields.at(i); + + // if field empty + if ( field.empty() ) { + + // if last field empty + if ( i == numFields - 1 ) { + const string& previousField = fields.at(i-1); + if ( previousField.empty() ) + return false; + maybeIp6[--index] = 0; + maybeIp6[--index] = 0; + } + + // if first field empty + else if ( i == 0 ) { + // make sure ':' isn't first character + const string& nextField = fields.at(i+1); + if ( nextField.empty() ) return false; + maybeIp6[--index] = 0; + maybeIp6[--index] = 0; + } + + // fill in 'compressed' 0s + else { + for ( uint8_t j = 0; j < fillCount; ++j ) { + if ( index == 0 ) return false; + maybeIp6[--index] = 0; + maybeIp6[--index] = 0; + } + } + } + + // field has data + else { + uint32_t value = static_cast( strtoul(field.c_str(), 0, 16) ); + + if ( value <= 0xffff ) { + maybeIp6[--index] = value & 0xff; + maybeIp6[--index] = (value >> 8) & 0xff; + } + + // possible mixed IPv4/6 notation + else { + + // mixed field must be last + if ( i != numFields - 1 ) + return false; + + // parse the IPv4 section + uint32_t maybeIp4; + if ( !parseIp4(field, maybeIp4) ) + return false; + + // store IPv4 fields in IPv6 container + maybeIp6[--index] = maybeIp4 & 0xff; + maybeIp6[--index] = (maybeIp4 >> 8) & 0xff; + maybeIp6[--index] = (maybeIp4 >> 16) & 0xff; + maybeIp6[--index] = (maybeIp4 >> 24) & 0xff; + --fillCount; + } + } + } + + // should have parsed OK, return success + return true; +} + +} // namespace Internal +} // namespace BamTools + +// ---------------------------- +// HostAddress implementation +// ---------------------------- + +HostAddress::HostAddress(void) + : m_protocol(HostAddress::UnknownNetworkProtocol) + , m_ip4Address(0) + , m_hasIpAddress(true) +{ } + +HostAddress::HostAddress(const uint32_t ip4Address) + : m_protocol(HostAddress::UnknownNetworkProtocol) + , m_ip4Address(0) + , m_hasIpAddress(true) +{ + SetAddress(ip4Address); +} + +HostAddress::HostAddress(const uint8_t* ip6Address) + : m_protocol(HostAddress::UnknownNetworkProtocol) + , m_ip4Address(0) + , m_hasIpAddress(true) +{ + SetAddress(ip6Address); +} + +HostAddress::HostAddress(const IPv6Address& ip6Address) + : m_protocol(HostAddress::UnknownNetworkProtocol) + , m_ip4Address(0) + , m_hasIpAddress(true) +{ + SetAddress(ip6Address); +} + +HostAddress::HostAddress(const std::string& address) + : m_protocol(HostAddress::UnknownNetworkProtocol) + , m_ip4Address(0) +{ + SetAddress(address); +} + +HostAddress::HostAddress(const HostAddress& other) + : m_protocol(other.m_protocol) + , m_ip4Address(other.m_ip4Address) + , m_ip6Address(other.m_ip6Address) + , m_ipString(other.m_ipString) + , m_hasIpAddress(other.m_hasIpAddress) +{ } + +HostAddress::~HostAddress(void) { } + +bool HostAddress::operator==(const HostAddress& other) const { + + // if self is IPv4 + if ( m_protocol == HostAddress::IPv4Protocol ) { + return ( other.m_protocol == HostAddress::IPv4Protocol && + m_ip4Address == other.m_ip4Address + ); + } + + // if self is IPv6 + else if ( m_protocol == HostAddress::IPv6Protocol ) { + return ( other.m_protocol == HostAddress::IPv6Protocol && + memcmp(&m_ip6Address, &other.m_ip6Address, sizeof(IPv6Address)) == 0 + ); + } + + // otherwise compare protocols + else return m_protocol == other.m_protocol; +} + +bool HostAddress::operator<(const HostAddress& other) const { + + // if self is IPv4 + if ( m_protocol == HostAddress::IPv4Protocol ) { + if ( other.m_protocol == HostAddress::IPv4Protocol ) + return m_ip4Address < m_ip4Address; + } + + // if self is IPv6 + else if ( m_protocol == HostAddress::IPv6Protocol ) { + if ( other.m_protocol == HostAddress::IPv6Protocol ) + return (memcmp(&m_ip6Address, &other.m_ip6Address, sizeof(IPv6Address)) < 0); + } + + // otherwise compare protocol types + return m_protocol < other.m_protocol; +} + +void HostAddress::Clear(void) { + + m_protocol = HostAddress::UnknownNetworkProtocol; + m_ip4Address = 0; + memset(&m_ip6Address, 0, sizeof(IPv6Address)); + m_ipString.clear(); + + // this may feel funny, but cleared IP value (equivalent to '0.0.0.0') is technically valid IP + // and that's not really what this flag is checking + // + // this flag is only false iff the string passed in is a 'plain-text' hostname (www.foo.bar) + m_hasIpAddress = true; +} + +bool HostAddress::HasIPAddress(void) const { + return m_hasIpAddress; +} + +bool HostAddress::IsNull(void) const { + return m_protocol == HostAddress::UnknownNetworkProtocol; +} + +uint32_t HostAddress::GetIPv4Address(void) const { + return m_ip4Address; +} + +IPv6Address HostAddress::GetIPv6Address(void) const { + return m_ip6Address; +} + +std::string HostAddress::GetIPString(void) const { + + stringstream ss(""); + + // IPv4 format + if ( m_protocol == HostAddress::IPv4Protocol ) { + ss << ( (m_ip4Address>>24) & 0xff ) << '.' + << ( (m_ip4Address>>16) & 0xff ) << '.' + << ( (m_ip4Address>> 8) & 0xff ) << '.' + << ( m_ip4Address & 0xff ); + + } + + // IPv6 format + else if ( m_protocol == HostAddress::IPv6Protocol ) { + for ( uint8_t i = 0; i < 8; ++i ) { + if ( i != 0 ) + ss << ':'; + ss << hex << ( (uint16_t(m_ip6Address[2*i]) << 8) | + (uint16_t(m_ip6Address[2*i+1])) + ); + } + } + + // return result (empty string if unknown protocol) + return ss.str(); +} + +HostAddress::NetworkProtocol HostAddress::GetProtocol(void) const { + return m_protocol; +} + +bool HostAddress::ParseAddress(void) { + + // all IPv6 addresses should have a ':' + string s = m_ipString; + size_t found = s.find(':'); + if ( found != string::npos ) { + // try parse IP6 address + uint8_t maybeIp6[16]; + if ( parseIp6(s, maybeIp6) ) { + SetAddress(maybeIp6); + m_protocol = HostAddress::IPv6Protocol; + return true; + } + } + + // all IPv4 addresses should have a '.' + found = s.find('.'); + if ( found != string::npos ) { + uint32_t maybeIp4(0); + if ( parseIp4(s, maybeIp4) ) { + SetAddress(maybeIp4); + m_protocol = HostAddress::IPv4Protocol; + return true; + } + } + + // else likely just a plain-text host name "www.foo.bar" + // will need to look up IP address info later + m_protocol = HostAddress::UnknownNetworkProtocol; + return false; +} + +void HostAddress::SetAddress(const uint32_t ip4Address) { + m_ip4Address = ip4Address; + m_protocol = HostAddress::IPv4Protocol; + m_hasIpAddress = true; +} + +void HostAddress::SetAddress(const uint8_t* ip6Address) { + for ( uint8_t i = 0; i < 16; ++i ) + m_ip6Address[i] = ip6Address[i]; + m_protocol = HostAddress::IPv6Protocol; + m_hasIpAddress = true; +} + +void HostAddress::SetAddress(const IPv6Address& ip6Address) { + m_ip6Address = ip6Address; + m_ip4Address = 0; + m_protocol = HostAddress::IPv6Protocol; + m_hasIpAddress = true; +} + +void HostAddress::SetAddress(const std::string& address) { + m_ipString = address; + m_hasIpAddress = ParseAddress(); +} diff --git a/src/api/internal/io/HostAddress_p.h b/src/api/internal/io/HostAddress_p.h new file mode 100644 index 0000000..7542b67 --- /dev/null +++ b/src/api/internal/io/HostAddress_p.h @@ -0,0 +1,81 @@ +#ifndef HOSTADDRESS_P_H +#define HOSTADDRESS_P_H + +#include "api/api_global.h" +#include +#include + +namespace BamTools { +namespace Internal { + +struct IPv6Address { + + // ctor + inline IPv6Address(void) { memset(&data, 0, sizeof(uint8_t)*16); } + + // data access (no bounds checking) + inline uint8_t& operator[](size_t index) { return data[index]; } + inline uint8_t operator[](size_t index) const { return data[index]; } + + // data + uint8_t data[16]; +}; + +class HostAddress { + + // enums + public: + enum NetworkProtocol { UnknownNetworkProtocol = -1 + , IPv4Protocol = 0 + , IPv6Protocol + }; + + // ctors & dtor + public: + HostAddress(void); + explicit HostAddress(const uint32_t ip4Address); + explicit HostAddress(const uint8_t* ip6Address); + explicit HostAddress(const IPv6Address& ip6Address); + explicit HostAddress(const std::string& address); + HostAddress(const HostAddress& other); + ~HostAddress(void); + + // HostAddress interface + public: + void Clear(void); + bool HasIPAddress(void) const; // returns whether string address could be converted to IP address + bool IsNull(void) const; + + uint32_t GetIPv4Address(void) const; + IPv6Address GetIPv6Address(void) const; + std::string GetIPString(void) const; + HostAddress::NetworkProtocol GetProtocol(void) const; + + void SetAddress(const uint32_t ip4Address); + void SetAddress(const uint8_t* ip6Address); + void SetAddress(const IPv6Address& ip6Address); + void SetAddress(const std::string& address); + + // HostAddress comparison operators + public: + bool operator==(const HostAddress& other) const; + bool operator!=(const HostAddress& other) const { return !( operator==(other) ); } + bool operator<(const HostAddress& other) const; + + // internal methods + private: + bool ParseAddress(void); + + // data members + private: + HostAddress::NetworkProtocol m_protocol; + uint32_t m_ip4Address; + IPv6Address m_ip6Address; + std::string m_ipString; + bool m_hasIpAddress; // true until string passed in, then signifies whether string was an IP +}; + +} // namespace Internal +} // namespace BamTools + +#endif // HOSTADDRESS_P_H diff --git a/src/api/internal/io/HostInfo_p.cpp b/src/api/internal/io/HostInfo_p.cpp new file mode 100644 index 0000000..2bb0187 --- /dev/null +++ b/src/api/internal/io/HostInfo_p.cpp @@ -0,0 +1,213 @@ +#include "api/internal/io/HostInfo_p.h" +using namespace BamTools; +using namespace BamTools::Internal; + +// platorm-specifics +#ifdef _WIN32 +# include "api/internal/io/NetWin_p.h" +#else +# include "api/internal/io/NetUnix_p.h" +#endif + +// standard C++ includes +#include +#include +#include +using namespace std; + +// ------------------------- +// HostInfo basics +// ------------------------- + +HostInfo::HostInfo(void) + : m_error(HostInfo::NoError) +{ } + +HostInfo::HostInfo(const HostInfo& other) + : m_hostName(other.m_hostName) + , m_addresses(other.m_addresses) + , m_error(other.m_error) + , m_errorString(other.m_errorString) +{ } + +HostInfo::~HostInfo(void) { } + +vector HostInfo::Addresses(void) const { + return m_addresses; +} + +HostInfo::ErrorType HostInfo::GetError(void) const { + return m_error; +} + +string HostInfo::GetErrorString(void) const { + return m_errorString; +} + +string HostInfo::HostName(void) const { + return m_hostName; +} + +void HostInfo::SetAddresses(const std::vector& addresses) { + m_addresses = addresses; +} + +void HostInfo::SetError(const HostInfo::ErrorType error) { + m_error = error; +} + +void HostInfo::SetErrorString(const std::string& errorString) { + m_errorString = errorString; +} + +void HostInfo::SetHostName(const string& name) { + m_hostName = name; +} + +// ------------------------------ +// HostInfo::Lookup(host, port) +// ------------------------------ + +HostInfo HostInfo::Lookup(const string& hostname, const string& port) { + + HostInfo result; + set uniqueAddresses; + +#ifdef _WIN32 + WindowsSockInit init; +#endif + + HostAddress address; + address.SetAddress(hostname); + + // if hostname is an IP string ('0.0.0.0' or IPv6 format) + // do reverse lookup for host domain name + // + // TODO: might just remove this... not sure if proper 'hostname' from IP string is needed + // + // so far, haven't been able to successfully fetch a domain name with reverse DNS + // getnameinfo() on test sites just returns original IP string. BUT this is likely a rare + // case that client code tries to use an IP string and the connection should work fine + // anyway. GetHostName() just won't quite show what I was hoping for. :( + if ( address.HasIPAddress() ) { + + const uint16_t portNum = static_cast( atoi(port.c_str()) ); + + sockaddr_in sa4; + sockaddr_in6 sa6; + sockaddr* sa = 0; + BT_SOCKLEN_T saSize = 0; + + // IPv4 + if ( address.GetProtocol() == HostAddress::IPv4Protocol ) { + sa = (sockaddr*)&sa4; + saSize = sizeof(sa4); + memset(&sa4, 0, sizeof(sa4)); + sa4.sin_family = AF_INET; + sa4.sin_addr.s_addr = htonl(address.GetIPv4Address()); + sa4.sin_port = htons(portNum); + } + + // IPv6 + else if ( address.GetProtocol() == HostAddress::IPv4Protocol ){ + sa = (sockaddr*)&sa6; + saSize = sizeof(sa6); + memset(&sa6, 0, sizeof(sa6)); + sa6.sin6_family = AF_INET6; + memcpy(sa6.sin6_addr.s6_addr, address.GetIPv6Address().data, sizeof(sa6.sin6_addr.s6_addr)); + sa6.sin6_port = htons(portNum); + } + + // unknown (should be unreachable) + else BT_ASSERT_X(false, "HostInfo::Lookup: unknown network protocol"); + + // lookup name for IP + char hbuf[NI_MAXHOST]; + char serv[NI_MAXSERV]; + if ( sa && (getnameinfo(sa, saSize, hbuf, sizeof(hbuf), serv, sizeof(serv), 0) == 0) ) + result.SetHostName(string(hbuf)); + + // if no domain name found, just use the original address's IP string + if ( result.HostName().empty() ) + result.SetHostName(address.GetIPString()); + + // store address in HostInfo + uniqueAddresses.insert(address); + } + + // otherwise, hostname is a domain name ('www.foo.bar') + // do 'normal' lookup + else { + + // setup address lookup 'hints' + addrinfo hints; + memset(&hints, 0, sizeof(hints)); + hints.ai_family = AF_UNSPEC; // allow either IPv4 or IPv6 + hints.ai_socktype = SOCK_STREAM; // for TCP + hints.ai_protocol = IPPROTO_TCP; + + // fetch addresses for requested hostname/port + addrinfo* res; + int status = getaddrinfo(hostname.c_str(), port.c_str(), &hints, &res ); + + // if everything OK + if ( status == 0 ) { + + // iterate over all IP addresses found + addrinfo* p = res; + for ( ; p != NULL; p = p->ai_next ) { + + // IPv4 + if ( p->ai_family == AF_INET ) { + sockaddr_in* ipv4 = (sockaddr_in*)p->ai_addr; + HostAddress a( ntohl(ipv4->sin_addr.s_addr) ); + uniqueAddresses.insert(a); + } + + // IPv6 + else if ( p->ai_family == AF_INET6 ) { + sockaddr_in6* ipv6 = (sockaddr_in6*)p->ai_addr; + HostAddress a(ipv6->sin6_addr.s6_addr); + uniqueAddresses.insert(a); + } + } + + // if we iterated, but no addresses were stored + if ( uniqueAddresses.empty() && (p == NULL) ) { + result.SetError(HostInfo::UnknownError); + result.SetErrorString("HostInfo: unknown address types found"); + } + } + + // handle error cases + else if ( +#ifndef _WIN32 + status == EAI_NONAME + || status == EAI_FAIL +# ifdef EAI_NODATA + || status == EAI_NODATA // officially deprecated, but just in case we run into it +# endif // EAI_NODATA + +#else // _WIN32 + WSAGetLastError() == WSAHOST_NOT_FOUND + || WSAGetLastError() == WSANO_DATA + || WSAGetLastError() == WSANO_RECOVERY +#endif // _WIN32 + ) + { + result.SetError(HostInfo::HostNotFound); + result.SetErrorString("HostInfo: host not found"); + } + else { + result.SetError(HostInfo::UnknownError); + result.SetErrorString("HostInfo: unknown error encountered"); + } + + // cleanup + freeaddrinfo(res); + } + + // store fetched addresses (converting set -> vector) in result & return + result.SetAddresses( vector(uniqueAddresses.begin(), uniqueAddresses.end()) ); + return result; +} diff --git a/src/api/internal/io/HostInfo_p.h b/src/api/internal/io/HostInfo_p.h new file mode 100644 index 0000000..5660a38 --- /dev/null +++ b/src/api/internal/io/HostInfo_p.h @@ -0,0 +1,57 @@ +#ifndef HOSTINFO_P_H +#define HOSTINFO_P_H + +#include "api/internal/io/HostAddress_p.h" +#include +#include + +namespace BamTools { +namespace Internal { + +class HostInfo { + + public: + enum ErrorType { NoError = 0 + , HostNotFound + , UnknownError + }; + + // ctors & dtor + public: + HostInfo(void); + HostInfo(const HostInfo& other); + ~HostInfo(void); + + // HostInfo interface + public: + std::string HostName(void) const; + void SetHostName(const std::string& name); + + std::vector Addresses(void) const; + void SetAddresses(const std::vector& addresses); + + HostInfo::ErrorType GetError(void) const; + std::string GetErrorString(void) const; + + // internal methods + private: + void SetError(const HostInfo::ErrorType error); + void SetErrorString(const std::string& errorString); + + // static methods + public: + static HostInfo Lookup(const std::string& hostname, + const std::string& port); + + // data members + private: + std::string m_hostName; + std::vector m_addresses; + HostInfo::ErrorType m_error; + std::string m_errorString; +}; + +} // namespace Internal +} // namespace BamTools + +#endif // HOSTINFO_P_H diff --git a/src/api/internal/io/ILocalIODevice_p.cpp b/src/api/internal/io/ILocalIODevice_p.cpp index 63a3bee..19cc1da 100644 --- a/src/api/internal/io/ILocalIODevice_p.cpp +++ b/src/api/internal/io/ILocalIODevice_p.cpp @@ -38,10 +38,10 @@ void ILocalIODevice::Close(void) { m_mode = IBamIODevice::NotOpen; } -size_t ILocalIODevice::Read(char* data, const unsigned int numBytes) { +int64_t ILocalIODevice::Read(char* data, const unsigned int numBytes) { BT_ASSERT_X( m_stream, "ILocalIODevice::Read: trying to read from null stream" ); BT_ASSERT_X( (m_mode == IBamIODevice::ReadOnly), "ILocalIODevice::Read: device not in read-only mode"); - return fread(data, sizeof(char), numBytes, m_stream); + return static_cast( fread(data, sizeof(char), numBytes, m_stream) ); } int64_t ILocalIODevice::Tell(void) const { @@ -49,8 +49,8 @@ int64_t ILocalIODevice::Tell(void) const { return ftell64(m_stream); } -size_t ILocalIODevice::Write(const char* data, const unsigned int numBytes) { +int64_t ILocalIODevice::Write(const char* data, const unsigned int numBytes) { BT_ASSERT_X( m_stream, "ILocalIODevice::Write: tryint to write to null stream" ); BT_ASSERT_X( (m_mode == IBamIODevice::WriteOnly), "ILocalIODevice::Write: device not in write-only mode" ); - return fwrite(data, sizeof(char), numBytes, m_stream); + return static_cast( fwrite(data, sizeof(char), numBytes, m_stream) ); } diff --git a/src/api/internal/io/ILocalIODevice_p.h b/src/api/internal/io/ILocalIODevice_p.h index a71f378..cf01f90 100644 --- a/src/api/internal/io/ILocalIODevice_p.h +++ b/src/api/internal/io/ILocalIODevice_p.h @@ -35,9 +35,9 @@ class ILocalIODevice : public IBamIODevice { // IBamIODevice implementation public: virtual void Close(void); - virtual size_t Read(char* data, const unsigned int numBytes); + virtual int64_t Read(char* data, const unsigned int numBytes); virtual int64_t Tell(void) const; - virtual size_t Write(const char* data, const unsigned int numBytes); + virtual int64_t Write(const char* data, const unsigned int numBytes); // data members protected: diff --git a/src/api/internal/io/IRemoteIODevice_p.cpp b/src/api/internal/io/IRemoteIODevice_p.cpp deleted file mode 100644 index e69de29..0000000 diff --git a/src/api/internal/io/IRemoteIODevice_p.h b/src/api/internal/io/IRemoteIODevice_p.h deleted file mode 100644 index e69de29..0000000 diff --git a/src/api/internal/io/TcpSocketEngine_p.cpp b/src/api/internal/io/TcpSocketEngine_p.cpp new file mode 100644 index 0000000..d13b0bc --- /dev/null +++ b/src/api/internal/io/TcpSocketEngine_p.cpp @@ -0,0 +1,183 @@ +#include "api/internal/io/HostInfo_p.h" +#include "api/internal/io/TcpSocketEngine_p.h" + +using namespace BamTools; +using namespace BamTools::Internal; + +TcpSocketEngine::TcpSocketEngine(void) + : m_socketDescriptor(-1) +// , m_localPort(0) + , m_remotePort(0) + , m_socketError(TcpSocket::UnknownSocketError) + , m_socketState(TcpSocket::UnconnectedState) +{ } + +TcpSocketEngine::TcpSocketEngine(const TcpSocketEngine& other) + : m_socketDescriptor(other.m_socketDescriptor) +// , m_localAddress(other.m_localAddress) + , m_remoteAddress(other.m_remoteAddress) +// , m_localPort(other.m_localPort) + , m_remotePort(other.m_remotePort) + , m_socketError(other.m_socketError) + , m_socketState(other.m_socketState) + , m_errorString(other.m_errorString) +{ } + +TcpSocketEngine::~TcpSocketEngine(void) { + Close(); +} + +void TcpSocketEngine::Close(void) { + + // close socket if we have valid FD + if ( m_socketDescriptor != -1 ) { + nativeClose(); + m_socketDescriptor = -1; + } + + // reset state + m_socketState = TcpSocket::UnconnectedState; +// m_localAddress.Clear(); + m_remoteAddress.Clear(); +// m_localPort = 0; + m_remotePort = 0; +} + +bool TcpSocketEngine::Connect(const HostAddress& address, const uint16_t port) { + + // return failure if invalid FD or already connected + if ( !IsValid() || (m_socketState == TcpSocket::ConnectedState) ) { + // TODO: set error string + return false; + } + + // attempt to connect to host address on requested port + if ( !nativeConnect(address, port) ) { + // TODO: set error string + return false; + } + + // if successful, store remote host address port & return success + // TODO: (later) fetch proxied remote & local host/port here + m_remoteAddress = address; + m_remotePort = port; + return true; +} + +std::string TcpSocketEngine::GetErrorString(void) const { + return m_errorString; +} + +//HostAddress TcpSocketEngine::GetLocalAddress(void) const { +// return m_localAddress; +//} + +//uint16_t TcpSocketEngine::GetLocalPort(void) const { +// return m_localPort; +//} + +HostAddress TcpSocketEngine::GetRemoteAddress(void) const { + return m_remoteAddress; +} + +uint16_t TcpSocketEngine::GetRemotePort(void) const { + return m_remotePort; +} + +int TcpSocketEngine::GetSocketDescriptor(void) const { + return m_socketDescriptor; +} + +TcpSocket::SocketError TcpSocketEngine::GetSocketError(void) { + return m_socketError; +} + +TcpSocket::SocketState TcpSocketEngine::GetSocketState(void) { + return m_socketState; +} + +bool TcpSocketEngine::Initialize(HostAddress::NetworkProtocol protocol) { + + // close current socket if we have one open + if ( IsValid() ) + Close(); + + // attempt to create new socket + return nativeCreateSocket(protocol); +} + +bool TcpSocketEngine::IsValid(void) const { + return (m_socketDescriptor != -1); +} + +int64_t TcpSocketEngine::NumBytesAvailable(void) const { + + // return 0 if socket FD is invalid + if ( !IsValid() ) { + // TODO: set error string + return -1; + } + + // otherwise check socket to see how much is ready + return nativeNumBytesAvailable(); +} + +int64_t TcpSocketEngine::Read(char* dest, size_t max) { + + // return failure if can't read + if ( !IsValid() || (m_socketState != TcpSocket::ConnectedState) ) + return -1; + + // otherwise return number of bytes read + return nativeRead(dest, max); +} + +bool TcpSocketEngine::WaitForRead(int msec, bool* timedOut) { + + // reset timedOut flag + *timedOut = false; + + // need to wait for our socket to be ready to read + int ret = nativeSelect(msec, true); + + // if timed out + if ( ret == 0 ) { + *timedOut = true; + m_socketError = TcpSocket::SocketTimeoutError; + m_errorString = "socket timed out"; + } + + // return if any sockets available for reading + return ( ret > 0 ); +} + +bool TcpSocketEngine::WaitForWrite(int msec, bool* timedOut) { + + // reset timedOut flag + *timedOut = false; + + // need to wait for our socket to be ready to write + int ret = nativeSelect(msec, false); + + // if timed out + if ( ret == 0 ) { + *timedOut = true; + m_socketError = TcpSocket::SocketTimeoutError; + m_errorString = "socket timed out"; + } + + // return if any sockets available for reading + return ( ret > 0 ); +} + +int64_t TcpSocketEngine::Write(const char* data, size_t length) { + + // return failure if can't write + if ( !IsValid() || (m_socketState != TcpSocket::ConnectedState) ) { + // TODO: set error string + return -1; + } + + // otherwise return number of bytes written + return nativeWrite(data, length); +} diff --git a/src/api/internal/io/TcpSocketEngine_p.h b/src/api/internal/io/TcpSocketEngine_p.h new file mode 100644 index 0000000..a87eafe --- /dev/null +++ b/src/api/internal/io/TcpSocketEngine_p.h @@ -0,0 +1,77 @@ +#ifndef TCPSOCKETENGINE_P_H +#define TCPSOCKETENGINE_P_H + +#include "api/internal/io/HostAddress_p.h" +#include "api/internal/io/TcpSocket_p.h" + +namespace BamTools { +namespace Internal { + +struct TcpSocketEngine { + + // ctors & dtor + public: + TcpSocketEngine(void); + TcpSocketEngine(const TcpSocketEngine& other); + ~TcpSocketEngine(void); + + // TcpSocketEngine interface + public: + + // connection-related methods + void Close(void); + bool Connect(const HostAddress& address, const uint16_t port); + bool Initialize(HostAddress::NetworkProtocol protocol); + bool IsValid(void) const; + + // IO-related methods + int64_t NumBytesAvailable(void) const; + int64_t Read(char* dest, size_t max); + int64_t Write(const char* data, size_t length); + + bool WaitForRead(int msec, bool* timedOut); + bool WaitForWrite(int msec, bool* timedOut); + + // query connection state +// HostAddress GetLocalAddress(void) const; +// uint16_t GetLocalPort(void) const; + HostAddress GetRemoteAddress(void) const; + uint16_t GetRemotePort(void) const; + + int GetSocketDescriptor(void) const; + TcpSocket::SocketError GetSocketError(void); + TcpSocket::SocketState GetSocketState(void); + + std::string GetErrorString(void) const; + + // platform-dependent internal methods + // provided in the corresponding TcpSocketEngine__p.cpp + private: + void nativeClose(void); + bool nativeConnect(const HostAddress& address, const uint16_t port); + bool nativeCreateSocket(HostAddress::NetworkProtocol protocol); + void nativeDisconnect(void); + bool nativeFetchConnectionParameters(void); + int64_t nativeNumBytesAvailable(void) const; + int64_t nativeRead(char* dest, size_t max); + int nativeSelect(int msecs, bool isRead) const; + int64_t nativeWrite(const char* data, size_t length); + + // data members + private: + int m_socketDescriptor; + +// HostAddress m_localAddress; + HostAddress m_remoteAddress; +// uint16_t m_localPort; + uint16_t m_remotePort; + + TcpSocket::SocketError m_socketError; + TcpSocket::SocketState m_socketState; + std::string m_errorString; +}; + +} // namespace Internal +} // namespace BamTools + +#endif // TCPSOCKETENGINE_P_H diff --git a/src/api/internal/io/TcpSocketEngine_unix_p.cpp b/src/api/internal/io/TcpSocketEngine_unix_p.cpp new file mode 100644 index 0000000..d24bfb9 --- /dev/null +++ b/src/api/internal/io/TcpSocketEngine_unix_p.cpp @@ -0,0 +1,303 @@ +#include "api/internal/io/TcpSocketEngine_p.h" +#include "api/internal/io/NetUnix_p.h" +using namespace BamTools; +using namespace BamTools::Internal; + +#include +#include +#include +using namespace std; + +// ------------------------ +// static utility methods +// ------------------------ + +namespace BamTools { +namespace Internal { + +//static inline +//void getPortAndAddress(const sockaddr* s, uint16_t& port, HostAddress& address) { + +// // IPv6 +// if (s->sa_family == AF_INET6) { +// sockaddr_in6* ip6 = (sockaddr_in6*)s; +// port = ntohs(ip6->sin6_port); +// IPv6Address tmp; +// memcpy(&tmp.data, &(ip6->sin6_addr.s6_addr), sizeof(tmp)); +// address.SetAddress(tmp); +// return; +// } + +// // IPv4 +// if ( s->sa_family == AF_INET ) { +// sockaddr_in* ip4 = (sockaddr_in*)s; +// port = ntohl(ip4->sin_port); +// address.SetAddress( ntohl(ip4->sin_addr.s_addr) ); +// return; +// } + +// // should be unreachable +// BT_ASSERT_X(false, "TcpSocketEngine::getPortAndAddress() : unknown network protocol "); +//} + +} // namespace Internal +} // namespace BamTools + +// -------------------------------- +// TcpSocketEngine implementation +// -------------------------------- + +void TcpSocketEngine::nativeClose(void) { + close(m_socketDescriptor); +} + +bool TcpSocketEngine::nativeConnect(const HostAddress& address, const uint16_t port) { + + // setup connection parameters from address/port + sockaddr_in sockAddrIPv4; + sockaddr_in6 sockAddrIPv6; + sockaddr* sockAddrPtr = 0; + BT_SOCKLEN_T sockAddrSize = 0; + + // IPv6 + if ( address.GetProtocol() == HostAddress::IPv6Protocol ) { + + memset(&sockAddrIPv6, 0, sizeof(sockAddrIPv6)); + sockAddrIPv6.sin6_family = AF_INET6; + sockAddrIPv6.sin6_port = htons(port); + + IPv6Address ip6 = address.GetIPv6Address(); + memcpy(&sockAddrIPv6.sin6_addr.s6_addr, &ip6, sizeof(ip6)); + + sockAddrSize = sizeof(sockAddrIPv6); + sockAddrPtr = (sockaddr*)&sockAddrIPv6; + } + + // IPv4 + else if ( address.GetProtocol() == HostAddress::IPv4Protocol ) { + + memset(&sockAddrIPv4, 0, sizeof(sockAddrIPv4)); + sockAddrIPv4.sin_family = AF_INET; + sockAddrIPv4.sin_port = htons(port); + sockAddrIPv4.sin_addr.s_addr = htonl(address.GetIPv4Address()); + + sockAddrSize = sizeof(sockAddrIPv4); + sockAddrPtr = (sockaddr*)&sockAddrIPv4; + } + + // unknown (should be unreachable) + else BT_ASSERT_X(false, "TcpSocketEngine::nativeConnect() : unknown network protocol"); + + // attempt connection + int connectResult = connect(m_socketDescriptor, sockAddrPtr, sockAddrSize); + + // if hit error + if ( connectResult == -1 ) { + + // see what error was encountered + switch ( errno ) { + + case EISCONN: + m_socketState = TcpSocket::ConnectedState; + break; + case ECONNREFUSED: + case EINVAL: + m_socketError = TcpSocket::ConnectionRefusedError; + m_socketState = TcpSocket::UnconnectedState; + m_errorString = "connection refused"; + break; + case ETIMEDOUT: + m_socketError = TcpSocket::NetworkError; + m_errorString = "connection timed out"; + break; + case EHOSTUNREACH: + m_socketError = TcpSocket::NetworkError; + m_socketState = TcpSocket::UnconnectedState; + m_errorString = "host unreachable"; + break; + case ENETUNREACH: + m_socketError = TcpSocket::NetworkError; + m_socketState = TcpSocket::UnconnectedState; + m_errorString = "network unreachable"; + break; + case EADDRINUSE: + m_socketError = TcpSocket::NetworkError; + m_errorString = "address already in use"; + break; + case EACCES: + case EPERM: + m_socketError = TcpSocket::SocketAccessError; + m_socketState = TcpSocket::UnconnectedState; + m_errorString = "permission denied"; + case EAFNOSUPPORT: + case EBADF: + case EFAULT: + case ENOTSOCK: + m_socketState = TcpSocket::UnconnectedState; + default: + break; + } + + if ( m_socketState != TcpSocket::ConnectedState ) + return false; + } + + // otherwise, we should be good + // update state & return success + m_socketState = TcpSocket::ConnectedState; + return true; +} + +bool TcpSocketEngine::nativeCreateSocket(HostAddress::NetworkProtocol protocol) { + + // get protocol value for requested protocol type + const int protocolNum = ( (protocol == HostAddress::IPv6Protocol) ? AF_INET6 : AF_INET ); + + // attempt to create socket + int socketFd = socket(protocolNum, SOCK_STREAM, IPPROTO_TCP); + + // if we fetched an invalid socket descriptor + if ( socketFd <= 0 ) { + + // see what error we got + switch ( errno ) { + case EPROTONOSUPPORT: + case EAFNOSUPPORT: + case EINVAL: + m_socketError = TcpSocket::UnsupportedSocketOperationError; + m_errorString = "protocol not supported"; + break; + case ENFILE: + case EMFILE: + case ENOBUFS: + case ENOMEM: + m_socketError = TcpSocket::SocketResourceError; + m_errorString = "out of resources"; + break; + case EACCES: + m_socketError = TcpSocket::SocketAccessError; + m_errorString = "permission denied"; + break; + default: + break; + } + + // return failure + return false; + } + + // otherwise, store our socket FD & return success + m_socketDescriptor = socketFd; + return true; +} + +//bool TcpSocketEngine::nativeFetchConnectionParameters(void) { + +// // reset addresses/ports +//// m_localAddress.Clear(); +// m_remoteAddress.Clear(); +//// m_localPort = 0; +// m_remotePort = 0; + +// // skip (return failure) if invalid socket FD +// if ( m_socketDescriptor == -1 ) +// return false; + +// sockaddr sa; +// BT_SOCKLEN_T sockAddrSize = sizeof(sa); + +// // fetch local address info +// memset(&sa, 0, sizeof(sa)); +// if ( getsockname(m_socketDescriptor, &sa, &sockAddrSize) == 0 ) +// getPortAndAddress(&sa, m_localPort, m_localAddress); +// else if ( errno == EBADF ) { +// m_socketError = TcpSocket::UnsupportedSocketOperationError; +// m_errorString = "invalid socket descriptor"; +// return false; +// } + +// // fetch remote address +// if ( getpeername(m_socketDescriptor, &sa, &sockAddrSize) == 0 ) +// getPortAndAddress(&sa, m_remotePort, m_remoteAddress); + +// // return success +// return true; +//} + +int64_t TcpSocketEngine::nativeNumBytesAvailable(void) const { + + // fetch number of bytes, return 0 on error + int numBytes(0); + if ( ioctl(m_socketDescriptor, FIONREAD, (char*)&numBytes) < 0 ) + return -1; + return static_cast(numBytes); +} + +int64_t TcpSocketEngine::nativeRead(char* dest, size_t max) { + + if ( !IsValid() ) + return -1; + + ssize_t ret = read(m_socketDescriptor, dest, max); + if ( ret < 0 ) { + ret = -1; + switch ( errno ) { + case EAGAIN : + // No data was available for reading + ret = -2; + break; + case ECONNRESET : + ret = 0; + break; + default: + break; + } + } + + return static_cast(ret); +} + +// negative value for msecs will block (forever) until +int TcpSocketEngine::nativeSelect(int msecs, bool isRead) const { + + // set up FD set + fd_set fds; + FD_ZERO(&fds); + FD_SET(m_socketDescriptor, &fds); + + // setup our timeout + timeval tv; + tv.tv_sec = msecs / 1000; + tv.tv_usec = (msecs % 1000) * 1000; + + // do 'select' + int ret; + if ( isRead ) + ret = select(m_socketDescriptor + 1, &fds, 0, 0, (msecs < 0 ? 0 : &tv)); + else + ret = select(m_socketDescriptor + 1, 0, &fds, 0, (msecs < 0 ? 0 : &tv)); + return ret; +} + +int64_t TcpSocketEngine::nativeWrite(const char* data, size_t length) { + + ssize_t writtenBytes = write(m_socketDescriptor, data, length); + if ( writtenBytes < 0 ) { + switch (errno) { + case EPIPE: + case ECONNRESET: + writtenBytes = -1; + m_socketError = TcpSocket::RemoteHostClosedError; + m_errorString = "remote host closed connection"; + Close(); + break; + case EAGAIN: + writtenBytes = 0; + break; + default: + break; + } + } + + return static_cast(writtenBytes); +} diff --git a/src/api/internal/io/TcpSocket_p.cpp b/src/api/internal/io/TcpSocket_p.cpp new file mode 100644 index 0000000..eb9e760 --- /dev/null +++ b/src/api/internal/io/TcpSocket_p.cpp @@ -0,0 +1,342 @@ +// *************************************************************************** +// TcpSocket_p.cpp (c) 2011 Derek Barnett +// Marth Lab, Department of Biology, Boston College +// --------------------------------------------------------------------------- +// Last modified: 25 October 2011 (DB) +// --------------------------------------------------------------------------- +// Provides generic TCP socket (buffered) I/O +// *************************************************************************** + +#include "api/internal/io/TcpSocket_p.h" +#include "api/internal/io/TcpSocketEngine_p.h" +using namespace BamTools; +using namespace BamTools::Internal; + +#include +#include +using namespace std; + +// ------------------------------------ +// static utility methods & constants +// ------------------------------------ + +namespace BamTools { +namespace Internal { + +// constants +static const size_t DEFAULT_BUFFER_SIZE = 0x8000; + +} // namespace Internal +} // namespace BamTools + +// -------------------------- +// TcpSocket implementation +// -------------------------- + +TcpSocket::TcpSocket(void) + : m_mode(IBamIODevice::NotOpen) +// , m_localPort(0) + , m_remotePort(0) + , m_engine(0) + , m_cachedSocketDescriptor(-1) + , m_readBuffer(DEFAULT_BUFFER_SIZE) + , m_error(TcpSocket::UnknownSocketError) + , m_state(TcpSocket::UnconnectedState) +{ } + +TcpSocket::~TcpSocket(void) { + if ( m_state == TcpSocket::ConnectedState ) + DisconnectFromHost(); +} + +size_t TcpSocket::BufferBytesAvailable(void) const { + return m_readBuffer.Size(); +} + +bool TcpSocket::CanReadLine(void) const { + return m_readBuffer.CanReadLine(); +} + +void TcpSocket::ClearBuffer(void) { + m_readBuffer.Clear(); +} + +bool TcpSocket::ConnectImpl(const HostInfo& hostInfo, + const std::string& port, + IBamIODevice::OpenMode mode) +{ + // skip if we're already connected + if ( m_state == TcpSocket::ConnectedState ) { + m_error = TcpSocket::SocketResourceError; + return false; + } + + // reset socket state + m_mode = mode; + m_hostName = hostInfo.HostName(); + m_state = TcpSocket::UnconnectedState; + m_error = TcpSocket::UnknownSocketError; +// m_localPort = 0; + m_remotePort = 0; +// m_localAddress.Clear(); + m_remoteAddress.Clear(); + m_readBuffer.Clear(); + + // fetch candidate addresses for requested host + vector addresses = hostInfo.Addresses(); + if ( addresses.empty() ) { + m_error = TcpSocket::HostNotFoundError; + return false; + } + + // convert port string to integer + stringstream ss(port); + uint16_t portNumber(0); + ss >> portNumber; + + // iterate through adddresses + vector::const_iterator addrIter = addresses.begin(); + vector::const_iterator addrEnd = addresses.end(); + for ( ; addrIter != addrEnd; ++addrIter) { + const HostAddress& addr = (*addrIter); + + // try to initialize socket engine with this address + if ( !InitializeSocketEngine(addr.GetProtocol()) ) { + // failure to initialize is OK here + // we'll just try the next available address + continue; + } + + // attempt actual connection + if ( m_engine->Connect(addr, portNumber) ) { + + // if connection successful, update our state & return true + m_mode = mode; +// m_localAddress = m_engine->GetLocalAddress(); +// m_localPort = m_engine->GetLocalPort(); + m_remoteAddress = m_engine->GetRemoteAddress(); + m_remotePort = m_engine->GetRemotePort(); + m_cachedSocketDescriptor = m_engine->GetSocketDescriptor(); + m_state = TcpSocket::ConnectedState; + return true; + } + } + + // if we get here, no connection could be made + m_error = TcpSocket::HostNotFoundError; + return false; +} + +bool TcpSocket::ConnectToHost(const string& hostName, + uint16_t port, + IBamIODevice::OpenMode mode) +{ + stringstream ss(""); + ss << port; + return ConnectToHost(hostName, ss.str(), mode); + +} + +bool TcpSocket::ConnectToHost(const string& hostName, + const string& port, + IBamIODevice::OpenMode mode) +{ + // create new address object with requested host name + HostAddress hostAddress; + hostAddress.SetAddress(hostName); + + HostInfo info; + // if host name was IP address ("x.x.x.x" or IPv6 format) + // otherwise host name was 'plain-text' ("www.foo.bar") + // we need to look up IP address(es) + if ( hostAddress.HasIPAddress() ) + info.SetAddresses( vector(1, hostAddress) ); + else + info = HostInfo::Lookup(hostName, port); + + // attempt connection on requested port + return ConnectImpl(info, port, mode); +} + +void TcpSocket::DisconnectFromHost(void) { + + // close socket engine & delete + if ( m_state == TcpSocket::ConnectedState ) + ResetSocketEngine(); + + // reset connection state +// m_localPort = 0; + m_remotePort = 0; +// m_localAddress.Clear(); + m_remoteAddress.Clear(); + m_hostName.clear(); + m_cachedSocketDescriptor = -1; +} + +TcpSocket::SocketError TcpSocket::GetError(void) const { + return m_error; +} + +std::string TcpSocket::GetErrorString(void) const { + return m_errorString; +} + +std::string TcpSocket::GetHostName(void) const { + return m_hostName; +} + +//HostAddress TcpSocket::GetLocalAddress(void) const { +// return m_localAddress; +//} + +//uint16_t TcpSocket::GetLocalPort(void) const { +// return m_localPort; +//} + +HostAddress TcpSocket::GetRemoteAddress(void) const { + return m_remoteAddress; +} + +uint16_t TcpSocket::GetRemotePort(void) const { + return m_remotePort; +} + +TcpSocket::SocketState TcpSocket::GetState(void) const { + return m_state; +} + +bool TcpSocket::InitializeSocketEngine(HostAddress::NetworkProtocol protocol) { + ResetSocketEngine(); + m_engine = new TcpSocketEngine; + return m_engine->Initialize(protocol); +} + +bool TcpSocket::IsConnected(void) const { + if ( m_engine == 0 ) + return false; + return ( m_engine->IsValid() && (m_state == TcpSocket::ConnectedState) ); +} + +// may be read in a look until desired data amount has been read +// returns: number of bytes read, or -1 if error +int64_t TcpSocket::Read(char* data, const unsigned int numBytes) { + + // if we have data in buffer, just return it + if ( !m_readBuffer.IsEmpty() ) { + const size_t bytesRead = m_readBuffer.Read(data, numBytes); + return static_cast(bytesRead); + } + + // otherwise, we'll need to fetch data from socket + // first make sure we have a valid socket engine + if ( m_engine == 0 ) { + // TODO: set error string/state? + return -1; + } + + // fetch data from socket, return 0 for success, -1 for failure + // since this should be called in a loop, we'll pull the actual bytes on next iteration + return ( ReadFromSocket() ? 0 : -1 ); +} + +bool TcpSocket::ReadFromSocket(void) { + + // wait for ready read + bool timedOut; + bool isReadyRead = m_engine->WaitForRead(5000, &timedOut); + + // if not ready + if ( !isReadyRead ) { + + // if we simply timed out + if ( timedOut ) { + // TODO: set error string + return false; + } + + // otherwise, there was an error + else { + // TODO: set error string + return false; + } + } + + // ######################################################################### + // clean this up - smells funky, but it's a key step so it has to be right + // ######################################################################### + // get number of bytes available from socket + // (if 0, still try to read some data so we don't trigger any OS event behavior + // that respond to repeated access to a remote closed socket) + int64_t bytesToRead = m_engine->NumBytesAvailable(); + if ( bytesToRead < 0 ) + return false; + else if ( bytesToRead == 0 ) + bytesToRead = 4096; + + // make space in buffer & read from socket + char* buffer = m_readBuffer.Reserve(bytesToRead); + int64_t numBytesRead = m_engine->Read(buffer, bytesToRead); + + // (Qt uses -2 for no data, not error) + // squeeze buffer back down & return success + if ( numBytesRead == -2 ) { + m_readBuffer.Chop(bytesToRead); + return true; + } + // ######################################################################### + + // check for any socket engine errors + if ( !m_engine->IsValid() ) { + // TODO: set error string + ResetSocketEngine(); + return false; + } + + // return success + return true; +} + +string TcpSocket::ReadLine(void) { + if ( m_readBuffer.CanReadLine() ) + return m_readBuffer.ReadLine(); + return string(); +} + +void TcpSocket::ResetSocketEngine(void) { + + // shut down socket engine + if ( m_engine ) { + m_engine->Close(); + delete m_engine; + m_engine = 0; + } + + // reset our state & cached socket handle + m_state = TcpSocket::UnconnectedState; + m_cachedSocketDescriptor = -1; +} + +int64_t TcpSocket::Write(const char* data, const unsigned int numBytes) { + + // single-shot attempt at write (not buffered, just try to shove the data through socket) + // this method purely exists to send 'small' HTTP requests/FTP commands from client to server + + int64_t bytesWritten(0); + + // wait for our socket to be write-able + bool timedOut; + bool isReadyWrite = m_engine->WaitForWrite(3000, &timedOut); + if ( isReadyWrite ) + bytesWritten = m_engine->Write(data, numBytes); + else { + // timeout is OK (with current setup), we'll just return 0 & try again + // but we need to report if engine encountered some other error + if ( !timedOut ) { + // TODO: set error string + bytesWritten = -1; + } + } + + // return actual number of bytes written to socket + return bytesWritten; +} diff --git a/src/api/internal/io/TcpSocket_p.h b/src/api/internal/io/TcpSocket_p.h new file mode 100644 index 0000000..4cd1f1a --- /dev/null +++ b/src/api/internal/io/TcpSocket_p.h @@ -0,0 +1,122 @@ +// *************************************************************************** +// TcpSocket_p.h (c) 2011 Derek Barnett +// Marth Lab, Department of Biology, Boston College +// --------------------------------------------------------------------------- +// Last modified: 25 October 2011 (DB) +// --------------------------------------------------------------------------- +// Provides TCP socket I/O +// *************************************************************************** + +#ifndef TCPSOCKET_P_H +#define TCPSOCKET_P_H + +// ------------- +// W A R N I N G +// ------------- +// +// This file is not part of the BamTools API. It exists purely as an +// implementation detail. This header file may change from version to version +// without notice, or even be removed. +// +// We mean it. + +#include "api/IBamIODevice.h" +#include "api/internal/io/HostInfo_p.h" +#include "api/internal/io/RollingBuffer_p.h" +#include + +namespace BamTools { +namespace Internal { + +class TcpSocketEngine; + +class TcpSocket { + + // enums + public: + enum SocketError { UnknownSocketError = -1 + , ConnectionRefusedError = 0 + , RemoteHostClosedError + , HostNotFoundError + , SocketAccessError + , SocketResourceError + , SocketTimeoutError + , NetworkError + , UnsupportedSocketOperationError + }; + + enum SocketState { UnconnectedState = 0 + , ConnectedState + }; + + // ctor & dtor + public: + TcpSocket(void); + ~TcpSocket(void); + + // TcpSocket interface + public: + + // connection methods + bool ConnectToHost(const std::string& hostName, + const uint16_t port, // Connect("host", 80) + IBamIODevice::OpenMode mode = IBamIODevice::ReadOnly); + bool ConnectToHost(const std::string& hostName, + const std::string& port, // Connect("host", "80") + IBamIODevice::OpenMode mode = IBamIODevice::ReadOnly); + void DisconnectFromHost(void); + bool IsConnected(void) const; + + // I/O methods + size_t BufferBytesAvailable(void) const; + bool CanReadLine(void) const; + void ClearBuffer(void); // force buffer to clear (not a 'flush', just a 'discard') + int64_t Read(char* data, const unsigned int numBytes); + std::string ReadLine(void); + int64_t Write(const char* data, const unsigned int numBytes); + + // connection values + std::string GetHostName(void) const; +// HostAddress GetLocalAddress(void) const; +// uint16_t GetLocalPort(void) const; + HostAddress GetRemoteAddress(void) const; + uint16_t GetRemotePort(void) const; + + // connection status + TcpSocket::SocketError GetError(void) const; + TcpSocket::SocketState GetState(void) const; + std::string GetErrorString(void) const; + + // internal methods + private: + bool ConnectImpl(const HostInfo& hostInfo, + const std::string& port, + IBamIODevice::OpenMode mode); + bool InitializeSocketEngine(HostAddress::NetworkProtocol protocol); + bool ReadFromSocket(void); + void ResetSocketEngine(void); + + // data members + private: + IBamIODevice::OpenMode m_mode; + + std::string m_hostName; +// uint16_t m_localPort; + uint16_t m_remotePort; +// HostAddress m_localAddress; + HostAddress m_remoteAddress; + + TcpSocketEngine* m_engine; + int m_cachedSocketDescriptor; + + RollingBuffer m_readBuffer; + + TcpSocket::SocketError m_error; + TcpSocket::SocketState m_state; + std::string m_errorString; +}; + +} // namespace Internal +} // namespace BamTools + +#endif // TCPSOCKET_P_H diff --git a/src/api/internal/sam/CMakeLists.txt b/src/api/internal/sam/CMakeLists.txt new file mode 100644 index 0000000..4b2bce2 --- /dev/null +++ b/src/api/internal/sam/CMakeLists.txt @@ -0,0 +1,17 @@ +# ========================== +# BamTools CMakeLists.txt +# (c) 2011 Derek Barnett +# +# src/api/internal/sam +# ========================== + +set ( InternalSamDir "${InternalDir}/sam" ) + +set ( InternalSamSources + ${InternalSamDir}/SamFormatParser_p.cpp + ${InternalSamDir}/SamFormatPrinter_p.cpp + ${InternalSamDir}/SamHeaderValidator_p.cpp + + PARENT_SCOPE # <-- leave this last +) + diff --git a/src/api/internal/utils/CMakeLists.txt b/src/api/internal/utils/CMakeLists.txt new file mode 100644 index 0000000..38a6957 --- /dev/null +++ b/src/api/internal/utils/CMakeLists.txt @@ -0,0 +1,15 @@ +# ========================== +# BamTools CMakeLists.txt +# (c) 2011 Derek Barnett +# +# src/api/internal/utils +# ========================== + +set ( InternalUtilsDir "${InternalDir}/utils" ) + +set ( InternalUtilsSources + ${InternalUtilsDir}/BamException_p.cpp + + PARENT_SCOPE # <-- leave this last +) + -- 2.39.2