1 // ***************************************************************************
2 // BamHttp_p.cpp (c) 2011 Derek Barnett
3 // Marth Lab, Department of Biology, Boston College
4 // ---------------------------------------------------------------------------
5 // Last modified: 8 December 2011 (DB)
6 // ---------------------------------------------------------------------------
7 // Provides reading/writing of BAM files on HTTP server
8 // ***************************************************************************
10 #include "api/BamAux.h"
11 #include "api/internal/io/BamHttp_p.h"
12 #include "api/internal/io/HttpHeader_p.h"
13 #include "api/internal/io/TcpSocket_p.h"
14 using namespace BamTools;
15 using namespace BamTools::Internal;
30 static const string HTTP_PORT = "80";
31 static const string HTTP_PREFIX = "http://";
32 static const size_t HTTP_PREFIX_LENGTH = 7;
34 static const string DOUBLE_NEWLINE = "\n\n";
36 static const string GET_METHOD = "GET";
37 static const string HOST_HEADER = "Host";
38 static const string RANGE_HEADER = "Range";
39 static const string BYTES_PREFIX = "bytes=";
41 static const char HOST_SEPARATOR = '/';
42 static const char PROXY_SEPARATOR = ':';
49 bool endsWith(const string& source, const string& pattern) {
50 return ( source.find(pattern) == (source.length() - pattern.length()) );
54 string toLower(const string& s) {
56 const size_t sSize = s.size();
58 for ( size_t i = 0; i < sSize; ++i )
59 out[i] = tolower(s[i]);
63 } // namespace Internal
64 } // namespace BamTools
66 // ------------------------
67 // BamHttp implementation
68 // ------------------------
70 BamHttp::BamHttp(const string& url)
72 , m_socket(new TcpSocket)
76 , m_isUrlParsed(false)
78 , m_endRangeFilePosition(-1)
83 BamHttp::~BamHttp(void) {
85 // close connection & clean up
91 void BamHttp::Close(void) {
94 m_socket->DisconnectFromHost();
96 // clean up request & response
106 // reset state - necessary??
107 m_isUrlParsed = false;
109 m_endRangeFilePosition = -1;
112 bool BamHttp::ConnectSocket(void) {
114 BT_ASSERT_X(m_socket, "null socket?");
116 // any state checks, etc?
117 if ( !m_socket->ConnectToHost(m_hostname, m_port, m_mode) ) {
118 // TODO: set error string
122 // attempt initial request
124 m_endRangeFilePosition = -1;
125 if ( !SendRequest() ) {
126 // TODO: set error string
131 // wait for response from server
132 if ( !ReceiveResponse() ) {
133 // TODO: set error string
142 bool BamHttp::EnsureSocketConnection(void) {
143 if ( m_socket->IsConnected() )
145 else return ConnectSocket();
148 bool BamHttp::IsOpen(void) const {
149 return IBamIODevice::IsOpen() && m_isUrlParsed;
152 bool BamHttp::IsRandomAccess(void) const {
156 bool BamHttp::Open(const IBamIODevice::OpenMode mode) {
158 // BamHttp only supports read-only access
159 if ( mode != IBamIODevice::ReadOnly ) {
160 SetErrorString("BamHttp::Open", "writing on this device is not supported");
165 // attempt connection to socket
166 if ( !ConnectSocket() ) {
167 SetErrorString("BamHttp::Open", m_socket->GetErrorString());
175 void BamHttp::ParseUrl(const string& url) {
177 // clear flag to start
178 m_isUrlParsed = false;
180 // make sure url starts with "http://", case-insensitive
183 const size_t prefixFound = tempUrl.find(HTTP_PREFIX);
184 if ( prefixFound == string::npos )
187 // find end of host name portion (first '/' hit after the prefix)
188 const size_t firstSlashFound = tempUrl.find(HOST_SEPARATOR, HTTP_PREFIX_LENGTH);
189 if ( firstSlashFound == string::npos ) {
190 ; // no slash found... no filename given along with host?
193 // fetch hostname (check for proxy port)
194 string hostname = tempUrl.substr(HTTP_PREFIX_LENGTH, (firstSlashFound - HTTP_PREFIX_LENGTH));
195 const size_t colonFound = hostname.find(PROXY_SEPARATOR);
196 if ( colonFound != string::npos ) {
197 ; // TODO: handle proxy port (later, just skip for now)
199 m_hostname = hostname;
203 // store remainder of URL as filename (must be non-empty)
204 string filename = tempUrl.substr(firstSlashFound);
205 if ( filename.empty() )
207 m_filename = filename;
209 // set parsed OK flag
210 m_isUrlParsed = true;
213 int64_t BamHttp::Read(char* data, const unsigned int numBytes) {
215 // if BamHttp not in a valid state
219 // read until hit desired @numBytes
220 int64_t bytesReadSoFar = 0;
221 while ( bytesReadSoFar < numBytes ) {
223 // calculate number of bytes we're going to try to read this iteration
224 const size_t remainingBytes = ( numBytes - bytesReadSoFar );
226 // if socket has access to entire file contents
227 // i.e. we received response with full data (status code == 200)
228 if ( m_endRangeFilePosition < 0 ) {
230 // try to read 'remainingBytes' from socket
231 const int64_t socketBytesRead = ReadFromSocket(data+bytesReadSoFar, remainingBytes);
232 if ( socketBytesRead < 0 ) // error
234 else if ( socketBytesRead == 0 ) // EOF
235 return bytesReadSoFar;
236 bytesReadSoFar += socketBytesRead;
237 m_filePosition += socketBytesRead;
240 // socket has access to a range of data (might already be in buffer)
241 // i.e. we received response with partial data (status code == 206)
244 // there is data left from last request
245 if ( m_endRangeFilePosition > m_filePosition ) {
247 // try to read either the total 'remainingBytes' or
248 // whatever we have remaining from last request range
249 const size_t rangeRemainingBytes = m_endRangeFilePosition - m_filePosition;
250 const size_t bytesToRead = std::min(remainingBytes, rangeRemainingBytes);
251 const int64_t socketBytesRead = ReadFromSocket(data+bytesReadSoFar, bytesToRead);
252 if ( socketBytesRead < 0 ) // error
254 else if ( socketBytesRead == 0 ) // EOF
255 return bytesReadSoFar;
256 bytesReadSoFar += socketBytesRead;
257 m_filePosition += socketBytesRead;
260 // otherwise, this is a 1st-time read or
261 // we already read everything from the last GET request
264 // request for next range
265 if ( !SendRequest(remainingBytes) || !ReceiveResponse() ) {
273 // return actual number bytes successfully read
274 return bytesReadSoFar;
277 int64_t BamHttp::ReadFromSocket(char* data, const unsigned int maxNumBytes) {
279 // try to read 'remainingBytes' from socket
280 const int64_t numBytesRead = m_socket->Read(data, maxNumBytes);
281 if ( numBytesRead < 0 )
286 bool BamHttp::ReceiveResponse(void) {
288 // clear any prior response
292 // make sure we're connected
293 if ( !EnsureSocketConnection() )
296 // fetch header, up until double new line
297 string responseHeader;
299 // read line & append to full header
300 const string headerLine = m_socket->ReadLine();
301 responseHeader += headerLine;
303 } while ( !endsWith(responseHeader, DOUBLE_NEWLINE) );
306 if ( responseHeader.empty() ) {
307 // TODO: set error string
312 // create response from header text
313 m_response = new HttpResponseHeader(responseHeader);
314 if ( !m_response->IsValid() ) {
315 // TODO: set error string
320 // if we got range response as requested
321 if ( m_response->GetStatusCode() == 206 )
324 // if we got the full file contents instead of range
325 else if ( m_response->GetStatusCode() == 200 ) {
327 // skip up to current file position
328 RaiiBuffer tmp(0x8000);
329 int64_t numBytesRead = 0;
330 while ( numBytesRead < m_filePosition ) {
332 const int64_t remaining = m_filePosition - numBytesRead;
333 const size_t bytesToRead = static_cast<size_t>( (remaining > 0x8000) ? 0x8000 : remaining );
334 const int64_t socketBytesRead = ReadFromSocket(tmp.Buffer, bytesToRead);
335 if ( socketBytesRead < 0 ) { // error
339 else if ( socketBytesRead == 0 ) // EOF
342 numBytesRead += socketBytesRead;
346 return ( numBytesRead == m_filePosition);
349 // on any other reponse status
350 // TODO: set error string
355 bool BamHttp::Seek(const int64_t& position, const int origin) {
357 // if HTTP device not in a valid state
359 // TODO: set error string
363 // discard socket's buffer contents, update positions, & return success
364 m_socket->ClearBuffer();
366 if ( origin == SEEK_CUR )
367 m_filePosition += position;
368 else if ( origin == SEEK_SET )
369 m_filePosition = position;
371 // TODO: set error string
374 m_endRangeFilePosition = m_filePosition;
378 bool BamHttp::SendRequest(const size_t numBytes) {
380 // remove any currently active request
384 // create range string
385 m_endRangeFilePosition = m_filePosition + numBytes;
386 stringstream range("");
387 range << BYTES_PREFIX << m_filePosition << '-' << m_endRangeFilePosition;
389 // make sure we're connected
390 if ( !EnsureSocketConnection() )
394 m_request = new HttpRequestHeader(GET_METHOD, m_filename);
395 m_request->SetField(HOST_HEADER, m_hostname);
396 m_request->SetField(RANGE_HEADER, range.str());
398 // write request to socket
399 const string requestHeader = m_request->ToString();
400 const size_t headerSize = requestHeader.size();
401 return ( WriteToSocket(requestHeader.c_str(), headerSize) == headerSize );
404 int64_t BamHttp::Tell(void) const {
405 return ( IsOpen() ? m_filePosition : -1 );
408 int64_t BamHttp::Write(const char* data, const unsigned int numBytes) {
411 BT_ASSERT_X(false, "BamHttp::Write : write-mode not supported on this device");
412 SetErrorString("BamHttp::Write", "write-mode not supported on this device");
416 int64_t BamHttp::WriteToSocket(const char* data, const unsigned int numBytes) {
417 if ( !m_socket->IsConnected() )
419 m_socket->ClearBuffer();
420 return m_socket->Write(data, numBytes);