1 // ***************************************************************************
2 // BamHttp_p.cpp (c) 2011 Derek Barnett
3 // Marth Lab, Department of Biology, Boston College
4 // ---------------------------------------------------------------------------
5 // Last modified: 24 July 2013 (DB)
6 // ---------------------------------------------------------------------------
7 // Provides reading/writing of BAM files on HTTP server
8 // ***************************************************************************
10 #include "api/BamAux.h"
11 #include "api/internal/io/BamHttp_p.h"
12 #include "api/internal/io/HttpHeader_p.h"
13 #include "api/internal/io/TcpSocket_p.h"
14 using namespace BamTools;
15 using namespace BamTools::Internal;
31 static const string HTTP_PORT = "80";
32 static const string HTTP_PREFIX = "http://";
33 static const size_t HTTP_PREFIX_LENGTH = 7;
35 static const string DOUBLE_NEWLINE = "\n\n";
37 static const string GET_METHOD = "GET";
38 static const string HEAD_METHOD = "HEAD";
39 static const string HOST_HEADER = "Host";
40 static const string RANGE_HEADER = "Range";
41 static const string BYTES_PREFIX = "bytes=";
42 static const string CONTENT_LENGTH_HEADER = "Content-Length";
44 static const char HOST_SEPARATOR = '/';
45 static const char PROXY_SEPARATOR = ':';
52 bool endsWith(const string& source, const string& pattern) {
53 return ( source.find(pattern) == (source.length() - pattern.length()) );
57 string toLower(const string& s) {
59 const size_t sSize = s.size();
61 for ( size_t i = 0; i < sSize; ++i )
62 out[i] = tolower(s[i]);
66 } // namespace Internal
67 } // namespace BamTools
69 // ------------------------
70 // BamHttp implementation
71 // ------------------------
73 BamHttp::BamHttp(const string& url)
75 , m_socket(new TcpSocket)
79 , m_isUrlParsed(false)
81 , m_fileEndPosition(-1)
82 , m_rangeEndPosition(-1)
87 BamHttp::~BamHttp(void) {
89 // close connection & clean up
95 void BamHttp::ClearResponse(void) {
102 void BamHttp::Close(void) {
104 // disconnect socket & clear related resources
108 m_isUrlParsed = false;
110 m_fileEndPosition = -1;
111 m_rangeEndPosition = -1;
112 m_mode = IBamIODevice::NotOpen;
115 bool BamHttp::ConnectSocket(void) {
117 BT_ASSERT_X(m_socket, "null socket?");
119 // any state checks, etc?
120 if ( !m_socket->ConnectToHost(m_hostname, m_port, m_mode) ) {
121 SetErrorString("BamHttp::ConnectSocket", m_socket->GetErrorString());
129 void BamHttp::DisconnectSocket(void) {
131 // disconnect socket & clean up
132 m_socket->DisconnectFromHost();
140 bool BamHttp::EnsureSocketConnection(void) {
141 if ( m_socket->IsConnected() )
143 return ConnectSocket();
146 bool BamHttp::IsOpen(void) const {
147 return IBamIODevice::IsOpen() && m_isUrlParsed;
150 bool BamHttp::IsRandomAccess(void) const {
154 bool BamHttp::Open(const IBamIODevice::OpenMode mode) {
156 // BamHttp only supports read-only access
157 if ( mode != IBamIODevice::ReadOnly ) {
158 SetErrorString("BamHttp::Open", "writing on this device is not supported");
163 // attempt connection to socket
164 if ( !ConnectSocket() ) {
165 SetErrorString("BamHttp::Open", m_socket->GetErrorString());
169 // initialize our file positions
171 m_fileEndPosition = 0;
172 m_rangeEndPosition = 0;
174 // attempt to send initial request (just 'HEAD' to check connection)
175 if ( !SendHeadRequest() ) {
176 SetErrorString("BamHttp::Open", m_socket->GetErrorString());
180 // clear response from HEAD request, not needed
187 void BamHttp::ParseUrl(const string& url) {
189 // clear flag to start
190 m_isUrlParsed = false;
192 // make sure url starts with "http://", case-insensitive
195 const size_t prefixFound = tempUrl.find(HTTP_PREFIX);
196 if ( prefixFound == string::npos )
199 // find end of host name portion (first '/' hit after the prefix)
200 const size_t firstSlashFound = tempUrl.find(HOST_SEPARATOR, HTTP_PREFIX_LENGTH);
201 if ( firstSlashFound == string::npos ) {
202 ; // no slash found... no filename given along with host?
205 // fetch hostname (check for proxy port)
206 string hostname = tempUrl.substr(HTTP_PREFIX_LENGTH, (firstSlashFound - HTTP_PREFIX_LENGTH));
207 const size_t colonFound = hostname.find(PROXY_SEPARATOR);
208 if ( colonFound != string::npos ) {
209 ; // TODO: handle proxy port (later, just skip for now)
211 m_hostname = hostname;
215 // store remainder of URL as filename (must be non-empty)
216 string filename = tempUrl.substr(firstSlashFound);
217 if ( filename.empty() )
219 m_filename = filename;
221 // set parsed OK flag
222 m_isUrlParsed = true;
225 int64_t BamHttp::Read(char* data, const unsigned int numBytes) {
227 // if BamHttp not in a valid state
231 int64_t numBytesReadSoFar = 0;
232 while ( numBytesReadSoFar < numBytes ) {
234 const size_t remaining = static_cast<size_t>( numBytes - numBytesReadSoFar );
236 // if we're not holding a valid GET reponse, get one
237 if ( m_response == 0 ) {
238 if ( !SendGetRequest(remaining) )
241 BT_ASSERT_X(m_response, "null HTTP response");
243 // check response status code
244 const int statusCode = m_response->GetStatusCode();
246 // if we receieved full file contents in response
247 if ( statusCode == 200 ) {
249 // try to read 'remaining' bytes from socket
250 const int64_t socketBytesRead = ReadFromSocket(data+numBytesReadSoFar, remaining);
253 if ( socketBytesRead < 0 ) {
254 SetErrorString("BamHttp::Read", m_socket->GetErrorString());
259 else if ( socketBytesRead == 0 )
260 return numBytesReadSoFar;
263 numBytesReadSoFar += socketBytesRead;
264 m_filePosition += socketBytesRead;
268 // else if we received a range of bytes in response
269 else if ( statusCode == 206 ) {
271 // if we've exhausted the last request
272 if ( m_filePosition == m_rangeEndPosition ) {
273 if ( !SendGetRequest(remaining) )
279 // try to read 'remaining' bytes from socket
280 const int64_t socketBytesRead = ReadFromSocket(data+numBytesReadSoFar, remaining);
283 if ( socketBytesRead < 0 ) {
284 SetErrorString("BamHttp::Read", m_socket->GetErrorString());
289 else if ( socketBytesRead == 0 ) {
291 // if we know we're not at end position, fire off a new request
292 if ( m_fileEndPosition > 0 && m_filePosition < m_fileEndPosition ) {
293 if ( !SendGetRequest() )
296 return numBytesReadSoFar;
300 numBytesReadSoFar += socketBytesRead;
301 m_filePosition += socketBytesRead;
306 // else some other HTTP status
308 SetErrorString("BamHttp::Read", "unsupported status code in response");
313 // return actual number of bytes read
314 return numBytesReadSoFar;
317 int64_t BamHttp::ReadFromSocket(char* data, const unsigned int maxNumBytes) {
318 return m_socket->Read(data, maxNumBytes);
321 bool BamHttp::ReceiveResponse(void) {
323 // fetch header, up until double new line
324 string responseHeader;
327 // make sure we can read a line
328 if ( !m_socket->WaitForReadLine() )
331 // read line & append to full header
332 const string headerLine = m_socket->ReadLine();
333 responseHeader += headerLine;
335 } while ( !endsWith(responseHeader, DOUBLE_NEWLINE) );
338 if ( responseHeader.empty() ) {
339 SetErrorString("BamHttp::ReceiveResponse", "empty HTTP response");
344 // create response from header text
345 m_response = new HttpResponseHeader(responseHeader);
346 if ( !m_response->IsValid() ) {
347 SetErrorString("BamHttp::ReceiveResponse", "could not parse HTTP response");
352 // if we get here, success
356 bool BamHttp::Seek(const int64_t& position, const int origin) {
358 // if HTTP device not in a valid state
360 SetErrorString("BamHttp::Seek", "cannot seek on unopen connection");
364 // reset the connection
366 if ( !ConnectSocket() ) {
367 SetErrorString("BamHttp::Seek", m_socket->GetErrorString());
371 // udpate file position
373 case SEEK_CUR : m_filePosition += position; break;
374 case SEEK_SET : m_filePosition = position; break;
376 SetErrorString("BamHttp::Seek", "unsupported seek origin");
384 bool BamHttp::SendGetRequest(const size_t numBytes) {
386 // clear previous data
390 m_socket->ClearBuffer();
392 // make sure we're connected
393 if ( !EnsureSocketConnection() )
396 // create range string
397 const int64_t endPosition = m_filePosition + std::max(static_cast<size_t>(0x10000), numBytes);
398 stringstream range("");
399 range << BYTES_PREFIX << m_filePosition << '-' << endPosition;
402 m_request = new HttpRequestHeader(GET_METHOD, m_filename);
403 m_request->SetField(HOST_HEADER, m_hostname);
404 m_request->SetField(RANGE_HEADER, range.str());
407 const string requestHeader = m_request->ToString();
408 const size_t headerSize = requestHeader.size();
409 if ( WriteToSocket(requestHeader.c_str(), headerSize) != headerSize ) {
410 SetErrorString("BamHttp::SendHeadRequest", m_socket->GetErrorString());
414 // ensure clean buffer
415 m_socket->ClearBuffer();
418 if ( !ReceiveResponse() ) {
419 SetErrorString("BamHttp::SendGetRequest", m_socket->GetErrorString());
423 BT_ASSERT_X(m_response, "BamHttp::SendGetRequest : null HttpResponse");
424 BT_ASSERT_X(m_response->IsValid(), "BamHttp::SendGetRequest : invalid HttpResponse");
426 // check response status code
427 const int statusCode = m_response->GetStatusCode();
428 switch ( statusCode ) {
430 // ranged response, as requested
432 // get content length if available
433 if ( m_response->ContainsKey(CONTENT_LENGTH_HEADER) ) {
434 const string contentLengthString = m_response->GetValue(CONTENT_LENGTH_HEADER);
435 m_rangeEndPosition = m_filePosition + atoi( contentLengthString.c_str() );
439 // full contents, not range
442 // skip up to current file position
443 RaiiBuffer tmp(0x8000);
444 int64_t numBytesRead = 0;
445 while ( numBytesRead < m_filePosition ) {
447 // read data from response
448 const int64_t remaining = m_filePosition - numBytesRead;
449 const size_t bytesToRead = static_cast<size_t>( (remaining > 0x8000) ? 0x8000 : remaining );
450 const int64_t socketBytesRead = ReadFromSocket(tmp.Buffer, bytesToRead);
453 if ( socketBytesRead < 0 ) {
454 SetErrorString("BamHttp::SendGetRequest", m_socket->GetErrorString());
460 else if ( socketBytesRead == 0 && m_socket->BufferBytesAvailable() == 0 )
463 // update byte counter
464 numBytesRead += socketBytesRead;
468 return ( numBytesRead == m_filePosition);
471 // any other status codes
476 // fail on unexpected status code
477 SetErrorString("BamHttp::SendGetRequest", "unsupported status code in response");
482 bool BamHttp::SendHeadRequest(void) {
484 // ensure clean slate
488 m_socket->ClearBuffer();
490 // make sure we're connected
491 if ( !EnsureSocketConnection() )
495 m_request = new HttpRequestHeader(HEAD_METHOD, m_filename);
496 m_request->SetField(HOST_HEADER, m_hostname);
499 const string requestHeader = m_request->ToString();
500 const size_t headerSize = requestHeader.size();
501 if ( WriteToSocket(requestHeader.c_str(), headerSize) != headerSize ) {
502 SetErrorString("BamHttp::SendHeadRequest", m_socket->GetErrorString());
506 m_socket->ClearBuffer();
508 // wait for response from server
509 if ( !ReceiveResponse() ) {
510 SetErrorString("BamHttp::SendHeadRequest", m_socket->GetErrorString());
514 BT_ASSERT_X(m_response, "BamHttp::SendHeadRequest : null HttpResponse");
515 BT_ASSERT_X(m_response->IsValid(), "BamHttp::SendHeadRequest : invalid HttpResponse");
517 // get content length if available
518 if ( m_response->ContainsKey(CONTENT_LENGTH_HEADER) ) {
519 const string contentLengthString = m_response->GetValue(CONTENT_LENGTH_HEADER);
520 m_fileEndPosition = atoi( contentLengthString.c_str() ) - 1;
523 // return whether we found any errors
524 return m_socket->GetError() == TcpSocket::NoError;
527 int64_t BamHttp::Tell(void) const {
528 return ( IsOpen() ? m_filePosition : -1 );
531 int64_t BamHttp::Write(const char* data, const unsigned int numBytes) {
534 BT_ASSERT_X(false, "BamHttp::Write : write-mode not supported on this device");
535 SetErrorString("BamHttp::Write", "write-mode not supported on this device");
539 int64_t BamHttp::WriteToSocket(const char* data, const unsigned int numBytes) {
540 if ( !m_socket->IsConnected() )
542 m_socket->ClearBuffer();
543 return m_socket->Write(data, numBytes);