1 // ***************************************************************************
2 // BamHttp_p.cpp (c) 2011 Derek Barnett
3 // Marth Lab, Department of Biology, Boston College
4 // ---------------------------------------------------------------------------
5 // Last modified: 8 December 2011 (DB)
6 // ---------------------------------------------------------------------------
7 // Provides reading/writing of BAM files on HTTP server
8 // ***************************************************************************
10 #include "api/BamAux.h"
11 #include "api/internal/io/BamHttp_p.h"
12 #include "api/internal/io/HttpHeader_p.h"
13 #include "api/internal/io/TcpSocket_p.h"
14 using namespace BamTools;
15 using namespace BamTools::Internal;
30 static const string HTTP_PORT = "80";
31 static const string HTTP_PREFIX = "http://";
32 static const size_t HTTP_PREFIX_LENGTH = 7;
34 static const string DOUBLE_NEWLINE = "\n\n";
36 static const string GET_METHOD = "GET";
37 static const string HOST_HEADER = "Host";
38 static const string RANGE_HEADER = "Range";
39 static const string BYTES_PREFIX = "bytes=";
41 static const char HOST_SEPARATOR = '/';
42 static const char PROXY_SEPARATOR = ':';
49 bool endsWith(const string& source, const string& pattern) {
50 return ( source.find(pattern) == (source.length() - pattern.length()) );
54 string toLower(const string& s) {
56 const size_t sSize = s.size();
58 for ( size_t i = 0; i < sSize; ++i )
59 out[i] = tolower(s[i]);
63 } // namespace Internal
64 } // namespace BamTools
66 // ------------------------
67 // BamHttp implementation
68 // ------------------------
70 BamHttp::BamHttp(const string& url)
72 , m_socket(new TcpSocket)
76 , m_isUrlParsed(false)
78 , m_endRangeFilePosition(-1)
83 BamHttp::~BamHttp(void) {
85 // close connection & clean up
91 void BamHttp::Close(void) {
94 m_socket->DisconnectFromHost();
96 // clean up request & response
106 // reset state - necessary??
107 m_isUrlParsed = false;
109 m_endRangeFilePosition = -1;
112 bool BamHttp::ConnectSocket(void) {
114 BT_ASSERT_X(m_socket, "null socket?");
116 // any state checks, etc?
117 if ( !m_socket->ConnectToHost(m_hostname, m_port, m_mode) ) {
118 // TODO: set error string
122 // attempt initial request
124 m_endRangeFilePosition = -1;
125 if ( !SendRequest() ) {
126 // TODO: set error string
131 // wait for response from server
132 if ( !ReceiveResponse() ) {
133 // TODO: set error string
142 bool BamHttp::EnsureSocketConnection(void) {
143 if ( m_socket->IsConnected() )
145 else return ConnectSocket();
148 bool BamHttp::IsOpen(void) const {
149 return IBamIODevice::IsOpen() && m_isUrlParsed;
152 bool BamHttp::IsRandomAccess(void) const {
156 bool BamHttp::Open(const IBamIODevice::OpenMode mode) {
158 // BamHttp only supports read-only access
159 if ( mode != IBamIODevice::ReadOnly ) {
160 SetErrorString("BamHttp::Open", "writing on this device is not supported");
165 // attempt connection to socket
166 if ( !ConnectSocket() ) {
167 SetErrorString("BamHttp::Open", m_socket->GetErrorString());
175 void BamHttp::ParseUrl(const string& url) {
177 // clear flag to start
178 m_isUrlParsed = false;
180 // make sure url starts with "http://", case-insensitive
183 const size_t prefixFound = tempUrl.find(HTTP_PREFIX);
184 if ( prefixFound == string::npos )
187 // find end of host name portion (first '/' hit after the prefix)
188 const size_t firstSlashFound = tempUrl.find(HOST_SEPARATOR, HTTP_PREFIX_LENGTH);
189 if ( firstSlashFound == string::npos ) {
190 ; // no slash found... no filename given along with host?
193 // fetch hostname (check for proxy port)
194 string hostname = tempUrl.substr(HTTP_PREFIX_LENGTH, (firstSlashFound - HTTP_PREFIX_LENGTH));
195 const size_t colonFound = hostname.find(PROXY_SEPARATOR);
196 if ( colonFound != string::npos ) {
197 ; // TODO: handle proxy port (later, just skip for now)
199 m_hostname = hostname;
203 // store remainder of URL as filename (must be non-empty)
204 string filename = tempUrl.substr(firstSlashFound);
205 if ( filename.empty() )
207 m_filename = filename;
209 // set parsed OK flag
210 m_isUrlParsed = true;
213 int64_t BamHttp::Read(char* data, const unsigned int numBytes) {
215 // if BamHttp not in a valid state
219 // read until hit desired @numBytes
220 int64_t bytesReadSoFar = 0;
221 while ( bytesReadSoFar < numBytes ) {
223 // calculate number of bytes we're going to try to read this iteration
224 const size_t remainingBytes = ( numBytes - bytesReadSoFar );
226 // if socket has access to entire file contents
227 // i.e. we received response with full data (status code == 200)
228 if ( m_endRangeFilePosition < 0 ) {
230 // try to read 'remainingBytes' from socket
231 const int64_t socketBytesRead = ReadFromSocket(data+bytesReadSoFar, remainingBytes);
232 if ( socketBytesRead < 0 ) // error
234 else if ( socketBytesRead == 0 ) // EOF
235 return bytesReadSoFar;
236 bytesReadSoFar += socketBytesRead;
237 m_filePosition += socketBytesRead;
240 // socket has access to a range of data (might already be in buffer)
241 // i.e. we received response with partial data (status code == 206)
244 // there is data left from last request
245 if ( m_endRangeFilePosition > m_filePosition ) {
247 // try to read either the total 'remainingBytes' or
248 // whatever we have remaining from last request range
249 const size_t rangeRemainingBytes = m_endRangeFilePosition - m_filePosition;
250 const size_t bytesToRead = std::min(remainingBytes, rangeRemainingBytes);
251 const int64_t socketBytesRead = ReadFromSocket(data+bytesReadSoFar, bytesToRead);
252 if ( socketBytesRead < 0 ) // error
254 else if ( socketBytesRead == 0 ) // EOF
255 return bytesReadSoFar;
256 bytesReadSoFar += socketBytesRead;
257 m_filePosition += socketBytesRead;
260 // otherwise, this is a 1st-time read or
261 // we already read everything from the last GET request
264 // request for next range
265 if ( !SendRequest(remainingBytes) || !ReceiveResponse() ) {
273 // return actual number bytes successfully read
274 return bytesReadSoFar;
277 int64_t BamHttp::ReadFromSocket(char* data, const unsigned int maxNumBytes) {
278 return m_socket->Read(data, maxNumBytes);
281 bool BamHttp::ReceiveResponse(void) {
283 // clear any prior response
287 // make sure we're connected
288 if ( !EnsureSocketConnection() )
291 // fetch header, up until double new line
292 string responseHeader;
294 // read line & append to full header
295 const string headerLine = m_socket->ReadLine();
296 responseHeader += headerLine;
298 } while ( !endsWith(responseHeader, DOUBLE_NEWLINE) );
301 if ( responseHeader.empty() ) {
302 // TODO: set error string
307 // create response from header text
308 m_response = new HttpResponseHeader(responseHeader);
309 if ( !m_response->IsValid() ) {
310 // TODO: set error string
315 // if we got range response as requested
316 if ( m_response->GetStatusCode() == 206 )
319 // if we got the full file contents instead of range
320 else if ( m_response->GetStatusCode() == 200 ) {
322 // skip up to current file position
323 RaiiBuffer tmp(0x8000);
324 int64_t numBytesRead = 0;
325 while ( numBytesRead < m_filePosition ) {
327 const int64_t remaining = m_filePosition - numBytesRead;
328 const size_t bytesToRead = static_cast<size_t>( (remaining > 0x8000) ? 0x8000 : remaining );
329 const int64_t socketBytesRead = ReadFromSocket(tmp.Buffer, bytesToRead);
330 if ( socketBytesRead < 0 ) { // error
334 else if ( socketBytesRead == 0 ) // EOF
337 numBytesRead += socketBytesRead;
341 return ( numBytesRead == m_filePosition);
344 // on any other reponse status
345 // TODO: set error string
350 bool BamHttp::Seek(const int64_t& position, const int origin) {
352 // if HTTP device not in a valid state
354 // TODO: set error string
358 // discard socket's buffer contents, update positions, & return success
359 m_socket->ClearBuffer();
361 if ( origin == SEEK_CUR )
362 m_filePosition += position;
363 else if ( origin == SEEK_SET )
364 m_filePosition = position;
366 // TODO: set error string
369 m_endRangeFilePosition = m_filePosition;
373 bool BamHttp::SendRequest(const size_t numBytes) {
375 // remove any currently active request
379 // create range string
380 m_endRangeFilePosition = m_filePosition + numBytes;
381 stringstream range("");
382 range << BYTES_PREFIX << m_filePosition << '-' << m_endRangeFilePosition;
384 // make sure we're connected
385 if ( !EnsureSocketConnection() )
389 m_request = new HttpRequestHeader(GET_METHOD, m_filename);
390 m_request->SetField(HOST_HEADER, m_hostname);
391 m_request->SetField(RANGE_HEADER, range.str());
393 // write request to socket
394 const string requestHeader = m_request->ToString();
395 const size_t headerSize = requestHeader.size();
396 return ( WriteToSocket(requestHeader.c_str(), headerSize) == headerSize );
399 int64_t BamHttp::Tell(void) const {
400 return ( IsOpen() ? m_filePosition : -1 );
403 int64_t BamHttp::Write(const char* data, const unsigned int numBytes) {
406 BT_ASSERT_X(false, "BamHttp::Write : write-mode not supported on this device");
407 SetErrorString("BamHttp::Write", "write-mode not supported on this device");
411 int64_t BamHttp::WriteToSocket(const char* data, const unsigned int numBytes) {
412 if ( !m_socket->IsConnected() )
414 m_socket->ClearBuffer();
415 return m_socket->Write(data, numBytes);