// BamHttp_p.cpp (c) 2011 Derek Barnett
// Marth Lab, Department of Biology, Boston College
// ---------------------------------------------------------------------------
-// Last modified: 7 November 2011 (DB)
+// Last modified: 24 July 2013 (DB)
// ---------------------------------------------------------------------------
// Provides reading/writing of BAM files on HTTP server
// ***************************************************************************
#include <cassert>
#include <cctype>
+#include <cstdlib>
#include <algorithm>
#include <sstream>
using namespace std;
static const string HTTP_PORT = "80";
static const string HTTP_PREFIX = "http://";
static const size_t HTTP_PREFIX_LENGTH = 7;
-static const char COLON_CHAR = ':';
-static const char SLASH_CHAR = '/';
+
+static const string DOUBLE_NEWLINE = "\n\n";
+
+static const string GET_METHOD = "GET";
+static const string HEAD_METHOD = "HEAD";
+static const string HOST_HEADER = "Host";
+static const string RANGE_HEADER = "Range";
+static const string BYTES_PREFIX = "bytes=";
+static const string CONTENT_LENGTH_HEADER = "Content-Length";
+
+static const char HOST_SEPARATOR = '/';
+static const char PROXY_SEPARATOR = ':';
// -----------------
// utility methods
// -----------------
+static inline
+bool endsWith(const string& source, const string& pattern) {
+ return ( source.find(pattern) == (source.length() - pattern.length()) );
+}
+
static inline
string toLower(const string& s) {
string out;
, m_response(0)
, m_isUrlParsed(false)
, m_filePosition(-1)
- , m_endRangeFilePosition(-1)
+ , m_fileEndPosition(-1)
+ , m_rangeEndPosition(-1)
{
ParseUrl(url);
}
delete m_socket;
}
-void BamHttp::Close(void) {
-
- // disconnect socket
- m_socket->DisconnectFromHost();
-
- // clean up request & response
- if ( m_request ) {
- delete m_request;
- m_request = 0;
- }
+void BamHttp::ClearResponse(void) {
if ( m_response ) {
delete m_response;
m_response = 0;
}
+}
- // reset state - necessary??
+void BamHttp::Close(void) {
+
+ // disconnect socket & clear related resources
+ DisconnectSocket();
+
+ // reset state
m_isUrlParsed = false;
- m_filePosition = -1;
- m_endRangeFilePosition = -1;
+ m_filePosition = -1;
+ m_fileEndPosition = -1;
+ m_rangeEndPosition = -1;
+ m_mode = IBamIODevice::NotOpen;
}
bool BamHttp::ConnectSocket(void) {
// any state checks, etc?
if ( !m_socket->ConnectToHost(m_hostname, m_port, m_mode) ) {
- // TODO: set error string
- return false;
- }
-
- // attempt initial request
- m_filePosition = 0;
- m_endRangeFilePosition = -1;
- if ( !SendRequest() ) {
- // TODO: set error string
- Close();
- return false;
- }
-
- // wait for response from server
- if ( !ReceiveResponse() ) {
- // TODO: set error string
- Close();
+ SetErrorString("BamHttp::ConnectSocket", m_socket->GetErrorString());
return false;
}
return true;
}
+void BamHttp::DisconnectSocket(void) {
+
+ // disconnect socket & clean up
+ m_socket->DisconnectFromHost();
+ ClearResponse();
+ if ( m_request ) {
+ delete m_request;
+ m_request = 0;
+ }
+}
+
bool BamHttp::EnsureSocketConnection(void) {
if ( m_socket->IsConnected() )
return true;
- else return ConnectSocket();
+ return ConnectSocket();
}
bool BamHttp::IsOpen(void) const {
return false;
}
+ // initialize our file positions
+ m_filePosition = 0;
+ m_fileEndPosition = 0;
+ m_rangeEndPosition = 0;
+
+ // attempt to send initial request (just 'HEAD' to check connection)
+ if ( !SendHeadRequest() ) {
+ SetErrorString("BamHttp::Open", m_socket->GetErrorString());
+ return false;
+ }
+
+ // clear response from HEAD request, not needed
+ ClearResponse();
+
// return success
return true;
}
void BamHttp::ParseUrl(const string& url) {
+ // clear flag to start
+ m_isUrlParsed = false;
+
// make sure url starts with "http://", case-insensitive
string tempUrl(url);
toLower(tempUrl);
return;
// find end of host name portion (first '/' hit after the prefix)
- const size_t firstSlashFound = tempUrl.find(SLASH_CHAR, HTTP_PREFIX_LENGTH);
+ const size_t firstSlashFound = tempUrl.find(HOST_SEPARATOR, HTTP_PREFIX_LENGTH);
if ( firstSlashFound == string::npos ) {
; // no slash found... no filename given along with host?
}
// fetch hostname (check for proxy port)
string hostname = tempUrl.substr(HTTP_PREFIX_LENGTH, (firstSlashFound - HTTP_PREFIX_LENGTH));
- const size_t colonFound = hostname.find(COLON_CHAR);
+ const size_t colonFound = hostname.find(PROXY_SEPARATOR);
if ( colonFound != string::npos ) {
; // TODO: handle proxy port (later, just skip for now)
} else {
if ( !IsOpen() )
return -1;
- // read until hit desired @numBytes
- int64_t bytesReadSoFar = 0;
- while ( bytesReadSoFar < numBytes ) {
-
- // calculate number of bytes we're going to try to read this iteration
- const size_t remainingBytes = ( numBytes - bytesReadSoFar );
+ int64_t numBytesReadSoFar = 0;
+ while ( numBytesReadSoFar < numBytes ) {
- // if socket has access to entire file contents
- // i.e. we received response with full data (status code == 200)
- if ( !m_endRangeFilePosition >= 0 ) {
+ const size_t remaining = static_cast<size_t>( numBytes - numBytesReadSoFar );
- // try to read 'remainingBytes' from socket
- const int64_t socketBytesRead = ReadFromSocket(data+bytesReadSoFar, remainingBytes);
- if ( socketBytesRead < 0 )
+ // if we're not holding a valid GET reponse, get one
+ if ( m_response == 0 ) {
+ if ( !SendGetRequest(remaining) )
return -1;
- bytesReadSoFar += socketBytesRead;
}
+ BT_ASSERT_X(m_response, "null HTTP response");
- // socket has access to a range of data (might already be in buffer)
- // i.e. we received response with partial data (status code == 206)
- else {
+ // check response status code
+ const int statusCode = m_response->GetStatusCode();
- // there is data left from last request
- if ( m_endRangeFilePosition > m_filePosition ) {
+ // if we receieved full file contents in response
+ if ( statusCode == 200 ) {
- // try to read either the total 'remainingBytes' or whatever we have remaining from last request range
- const size_t rangeRemainingBytes = m_endRangeFilePosition - m_filePosition;
- const size_t bytesToRead = std::min(remainingBytes, rangeRemainingBytes);
- const int64_t socketBytesRead = ReadFromSocket(data+bytesReadSoFar, bytesToRead);
- if ( socketBytesRead < 0 )
+ // try to read 'remaining' bytes from socket
+ const int64_t socketBytesRead = ReadFromSocket(data+numBytesReadSoFar, remaining);
+
+ // if error
+ if ( socketBytesRead < 0 ) {
+ SetErrorString("BamHttp::Read", m_socket->GetErrorString());
+ return -1;
+ }
+
+ // EOF
+ else if ( socketBytesRead == 0 )
+ return numBytesReadSoFar;
+
+ // update counters
+ numBytesReadSoFar += socketBytesRead;
+ m_filePosition += socketBytesRead;
+
+ }
+
+ // else if we received a range of bytes in response
+ else if ( statusCode == 206 ) {
+
+ // if we've exhausted the last request
+ if ( m_filePosition == m_rangeEndPosition ) {
+ if ( !SendGetRequest(remaining) )
return -1;
- bytesReadSoFar += socketBytesRead;
}
- // otherwise, this is a 1st-time read OR we already read everything from the last GET request
else {
- // request for next range
- if ( !SendRequest(remainingBytes) || !ReceiveResponse() ) {
- Close();
+ // try to read 'remaining' bytes from socket
+ const int64_t socketBytesRead = ReadFromSocket(data+numBytesReadSoFar, remaining);
+
+ // if error
+ if ( socketBytesRead < 0 ) {
+ SetErrorString("BamHttp::Read", m_socket->GetErrorString());
return -1;
}
+
+ // maybe EOF
+ else if ( socketBytesRead == 0 ) {
+
+ // if we know we're not at end position, fire off a new request
+ if ( m_fileEndPosition > 0 && m_filePosition < m_fileEndPosition ) {
+ if ( !SendGetRequest() )
+ return -1;
+ } else
+ return numBytesReadSoFar;
+ }
+
+ // update counters
+ numBytesReadSoFar += socketBytesRead;
+ m_filePosition += socketBytesRead;
}
}
+
+
+ // else some other HTTP status
+ else {
+ SetErrorString("BamHttp::Read", "unsupported status code in response");
+ return -1;
+ }
}
- // return actual number bytes successfully read
- return bytesReadSoFar;
+ // return actual number of bytes read
+ return numBytesReadSoFar;
}
int64_t BamHttp::ReadFromSocket(char* data, const unsigned int maxNumBytes) {
-
- // try to read 'remainingBytes' from socket
- const int64_t numBytesRead = m_socket->Read(data, maxNumBytes);
- if ( numBytesRead < 0 )
- return -1;
- m_filePosition += numBytesRead;
- return numBytesRead;
+ return m_socket->Read(data, maxNumBytes);
}
bool BamHttp::ReceiveResponse(void) {
- // clear any prior response
- if ( m_response )
- delete m_response;
+ // fetch header, up until double new line
+ string responseHeader;
+ do {
- // make sure we're connected
- if ( !EnsureSocketConnection() )
- return false;
+ // make sure we can read a line
+ if ( !m_socket->WaitForReadLine() )
+ return false;
- // read response header from socket
- RaiiBuffer header(0x10000);
- size_t l = 0;
- while ( m_socket->Read(header.Buffer + l, 1) >= 0 ) {
- if ( header.Buffer[l] == '\n' && l >= 3 ) {
- if (strncmp(header.Buffer + l - 3, "\r\n\r\n", 4) == 0)
- break;
- }
- ++l;
- }
- string responseHeader;
- responseHeader.resize(l+1);
- for ( size_t i = 0; i < l; ++i )
- responseHeader[i] = header.Buffer[i];
+ // read line & append to full header
+ const string headerLine = m_socket->ReadLine();
+ responseHeader += headerLine;
+ } while ( !endsWith(responseHeader, DOUBLE_NEWLINE) );
+
+ // sanity check
if ( responseHeader.empty() ) {
- // TODO: set error string
+ SetErrorString("BamHttp::ReceiveResponse", "empty HTTP response");
Close();
return false;
}
// create response from header text
m_response = new HttpResponseHeader(responseHeader);
if ( !m_response->IsValid() ) {
- // TODO: set error string
+ SetErrorString("BamHttp::ReceiveResponse", "could not parse HTTP response");
Close();
return false;
}
- // if we got range response as requested
- if ( m_response->GetStatusCode() == 206 )
- return true;
-
- // if we got the full file contents instead of range
- else if ( m_response->GetStatusCode() == 200 ) {
-
- // skip up to current file position
- RaiiBuffer tmp(0x8000);
- int64_t numBytesRead = 0;
- while ( numBytesRead < m_filePosition ) {
- int64_t result = ReadFromSocket(tmp.Buffer, 0x8000);
- if ( result < 0 ) {
- Close();
- return false;
- }
- numBytesRead += result;
- }
-
- // return success
- return true;
- }
-
- // on any other reponse status
- // TODO: set error string
- Close();
- return false;
+ // if we get here, success
+ return true;
}
-bool BamHttp::Seek(const int64_t& position) {
+bool BamHttp::Seek(const int64_t& position, const int origin) {
// if HTTP device not in a valid state
if ( !IsOpen() ) {
- // TODO: set error string
+ SetErrorString("BamHttp::Seek", "cannot seek on unopen connection");
return false;
}
- // discard socket's buffer contents, update positions, & return success
- m_socket->ClearBuffer();
- m_filePosition = position;
- m_endRangeFilePosition = -1;
+ // reset the connection
+ DisconnectSocket();
+ if ( !ConnectSocket() ) {
+ SetErrorString("BamHttp::Seek", m_socket->GetErrorString());
+ return false;
+ }
+
+ // udpate file position
+ switch ( origin ) {
+ case SEEK_CUR : m_filePosition += position; break;
+ case SEEK_SET : m_filePosition = position; break;
+ default :
+ SetErrorString("BamHttp::Seek", "unsupported seek origin");
+ return false;
+ }
+
+ // return success
return true;
}
-bool BamHttp::SendRequest(const size_t numBytes) {
+bool BamHttp::SendGetRequest(const size_t numBytes) {
- // remove any currently active request
+ // clear previous data
+ ClearResponse();
if ( m_request )
delete m_request;
+ m_socket->ClearBuffer();
+
+ // make sure we're connected
+ if ( !EnsureSocketConnection() )
+ return false;
// create range string
- m_endRangeFilePosition = m_filePosition + numBytes;
- stringstream range("bytes=");
- range << m_filePosition << "-" << m_endRangeFilePosition;
+ const int64_t endPosition = m_filePosition + std::max(static_cast<size_t>(0x10000), numBytes);
+ stringstream range("");
+ range << BYTES_PREFIX << m_filePosition << '-' << endPosition;
+
+ // create request
+ m_request = new HttpRequestHeader(GET_METHOD, m_filename);
+ m_request->SetField(HOST_HEADER, m_hostname);
+ m_request->SetField(RANGE_HEADER, range.str());
+
+ // send request
+ const string requestHeader = m_request->ToString();
+ const size_t headerSize = requestHeader.size();
+ if ( WriteToSocket(requestHeader.c_str(), headerSize) != headerSize ) {
+ SetErrorString("BamHttp::SendHeadRequest", m_socket->GetErrorString());
+ return false;
+ }
+
+ // ensure clean buffer
+ m_socket->ClearBuffer();
+
+ // wait for response
+ if ( !ReceiveResponse() ) {
+ SetErrorString("BamHttp::SendGetRequest", m_socket->GetErrorString());
+ Close();
+ return false;
+ }
+ BT_ASSERT_X(m_response, "BamHttp::SendGetRequest : null HttpResponse");
+ BT_ASSERT_X(m_response->IsValid(), "BamHttp::SendGetRequest : invalid HttpResponse");
+
+ // check response status code
+ const int statusCode = m_response->GetStatusCode();
+ switch ( statusCode ) {
+
+ // ranged response, as requested
+ case 206 :
+ // get content length if available
+ if ( m_response->ContainsKey(CONTENT_LENGTH_HEADER) ) {
+ const string contentLengthString = m_response->GetValue(CONTENT_LENGTH_HEADER);
+ m_rangeEndPosition = m_filePosition + atoi( contentLengthString.c_str() );
+ }
+ return true;
+
+ // full contents, not range
+ case 200 :
+ {
+ // skip up to current file position
+ RaiiBuffer tmp(0x8000);
+ int64_t numBytesRead = 0;
+ while ( numBytesRead < m_filePosition ) {
+
+ // read data from response
+ const int64_t remaining = m_filePosition - numBytesRead;
+ const size_t bytesToRead = static_cast<size_t>( (remaining > 0x8000) ? 0x8000 : remaining );
+ const int64_t socketBytesRead = ReadFromSocket(tmp.Buffer, bytesToRead);
+
+ // if error
+ if ( socketBytesRead < 0 ) {
+ SetErrorString("BamHttp::SendGetRequest", m_socket->GetErrorString());
+ Close();
+ return false;
+ }
+
+ // else if EOF
+ else if ( socketBytesRead == 0 && m_socket->BufferBytesAvailable() == 0 )
+ break;
+
+ // update byte counter
+ numBytesRead += socketBytesRead;
+ }
+
+ // return success
+ return ( numBytesRead == m_filePosition);
+ }
+
+ // any other status codes
+ default:
+ break;
+ }
+
+ // fail on unexpected status code
+ SetErrorString("BamHttp::SendGetRequest", "unsupported status code in response");
+ Close();
+ return false;
+}
+
+bool BamHttp::SendHeadRequest(void) {
+
+ // ensure clean slate
+ ClearResponse();
+ if ( m_request )
+ delete m_request;
+ m_socket->ClearBuffer();
// make sure we're connected
if ( !EnsureSocketConnection() )
return false;
// create request
- m_request = new HttpRequestHeader("GET", m_filename);
- m_request->SetField("Host", m_hostname);
- m_request->SetField("Range", range.str());
+ m_request = new HttpRequestHeader(HEAD_METHOD, m_filename);
+ m_request->SetField(HOST_HEADER, m_hostname);
- // write request to socket
+ // send request
const string requestHeader = m_request->ToString();
const size_t headerSize = requestHeader.size();
- return ( WriteToSocket(requestHeader.c_str(), headerSize) == headerSize );
+ if ( WriteToSocket(requestHeader.c_str(), headerSize) != headerSize ) {
+ SetErrorString("BamHttp::SendHeadRequest", m_socket->GetErrorString());
+ return false;
+ }
+
+ m_socket->ClearBuffer();
+
+ // wait for response from server
+ if ( !ReceiveResponse() ) {
+ SetErrorString("BamHttp::SendHeadRequest", m_socket->GetErrorString());
+ Close();
+ return false;
+ }
+ BT_ASSERT_X(m_response, "BamHttp::SendHeadRequest : null HttpResponse");
+ BT_ASSERT_X(m_response->IsValid(), "BamHttp::SendHeadRequest : invalid HttpResponse");
+
+ // get content length if available
+ if ( m_response->ContainsKey(CONTENT_LENGTH_HEADER) ) {
+ const string contentLengthString = m_response->GetValue(CONTENT_LENGTH_HEADER);
+ m_fileEndPosition = atoi( contentLengthString.c_str() ) - 1;
+ }
+
+ // return whether we found any errors
+ return m_socket->GetError() == TcpSocket::NoError;
}
int64_t BamHttp::Tell(void) const {
(void)data;
(void)numBytes;
BT_ASSERT_X(false, "BamHttp::Write : write-mode not supported on this device");
- return 0;
+ SetErrorString("BamHttp::Write", "write-mode not supported on this device");
+ return -1;
}
int64_t BamHttp::WriteToSocket(const char* data, const unsigned int numBytes) {
- if ( !EnsureSocketConnection() )
- return false;
+ if ( !m_socket->IsConnected() )
+ return -1;
m_socket->ClearBuffer();
return m_socket->Write(data, numBytes);
}