1 // ***************************************************************************
2 // BamHttp_p.cpp (c) 2011 Derek Barnett
3 // Marth Lab, Department of Biology, Boston College
4 // ---------------------------------------------------------------------------
5 // Last modified: 10 November 2011 (DB)
6 // ---------------------------------------------------------------------------
7 // Provides reading/writing of BAM files on HTTP server
8 // ***************************************************************************
10 #include "api/BamAux.h"
11 #include "api/internal/io/BamHttp_p.h"
12 #include "api/internal/io/HttpHeader_p.h"
13 #include "api/internal/io/TcpSocket_p.h"
14 using namespace BamTools;
15 using namespace BamTools::Internal;
30 static const string HTTP_PORT = "80";
31 static const string HTTP_PREFIX = "http://";
32 static const size_t HTTP_PREFIX_LENGTH = 7;
34 static const string DOUBLE_NEWLINE = "\n\n";
36 static const string GET_METHOD = "GET";
37 static const string HOST_HEADER = "Host";
38 static const string RANGE_HEADER = "Range";
39 static const string BYTES_PREFIX = "bytes=";
41 static const char HOST_SEPARATOR = '/';
42 static const char PROXY_SEPARATOR = ':';
49 bool endsWith(const string& source, const string& pattern) {
50 return ( source.find(pattern) == (source.length() - pattern.length()) );
54 string toLower(const string& s) {
56 const size_t sSize = s.size();
58 for ( size_t i = 0; i < sSize; ++i )
59 out[i] = tolower(s[i]);
63 } // namespace Internal
64 } // namespace BamTools
66 // ------------------------
67 // BamHttp implementation
68 // ------------------------
70 BamHttp::BamHttp(const string& url)
72 , m_socket(new TcpSocket)
76 , m_isUrlParsed(false)
78 , m_endRangeFilePosition(-1)
83 BamHttp::~BamHttp(void) {
85 // close connection & clean up
91 void BamHttp::Close(void) {
94 m_socket->DisconnectFromHost();
96 // clean up request & response
106 // reset state - necessary??
107 m_isUrlParsed = false;
109 m_endRangeFilePosition = -1;
112 bool BamHttp::ConnectSocket(void) {
114 BT_ASSERT_X(m_socket, "null socket?");
116 // any state checks, etc?
117 if ( !m_socket->ConnectToHost(m_hostname, m_port, m_mode) ) {
118 // TODO: set error string
122 // attempt initial request
124 m_endRangeFilePosition = -1;
125 if ( !SendRequest() ) {
126 // TODO: set error string
131 // wait for response from server
132 if ( !ReceiveResponse() ) {
133 // TODO: set error string
142 bool BamHttp::EnsureSocketConnection(void) {
143 if ( m_socket->IsConnected() )
145 else return ConnectSocket();
148 bool BamHttp::IsOpen(void) const {
149 return IBamIODevice::IsOpen() && m_isUrlParsed;
152 bool BamHttp::IsRandomAccess(void) const {
156 bool BamHttp::Open(const IBamIODevice::OpenMode mode) {
158 // BamHttp only supports read-only access
159 if ( mode != IBamIODevice::ReadOnly ) {
160 SetErrorString("BamHttp::Open", "writing on this device is not supported");
165 // attempt connection to socket
166 if ( !ConnectSocket() ) {
167 SetErrorString("BamHttp::Open", m_socket->GetErrorString());
175 void BamHttp::ParseUrl(const string& url) {
177 // clear flag to start
178 m_isUrlParsed = false;
180 // make sure url starts with "http://", case-insensitive
183 const size_t prefixFound = tempUrl.find(HTTP_PREFIX);
184 if ( prefixFound == string::npos )
187 // find end of host name portion (first '/' hit after the prefix)
188 const size_t firstSlashFound = tempUrl.find(HOST_SEPARATOR, HTTP_PREFIX_LENGTH);
189 if ( firstSlashFound == string::npos ) {
190 ; // no slash found... no filename given along with host?
193 // fetch hostname (check for proxy port)
194 string hostname = tempUrl.substr(HTTP_PREFIX_LENGTH, (firstSlashFound - HTTP_PREFIX_LENGTH));
195 const size_t colonFound = hostname.find(PROXY_SEPARATOR);
196 if ( colonFound != string::npos ) {
197 ; // TODO: handle proxy port (later, just skip for now)
199 m_hostname = hostname;
203 // store remainder of URL as filename (must be non-empty)
204 string filename = tempUrl.substr(firstSlashFound);
205 if ( filename.empty() )
207 m_filename = filename;
209 // set parsed OK flag
210 m_isUrlParsed = true;
213 int64_t BamHttp::Read(char* data, const unsigned int numBytes) {
215 // if BamHttp not in a valid state
219 // read until hit desired @numBytes
220 int64_t bytesReadSoFar = 0;
221 while ( bytesReadSoFar < numBytes ) {
223 // calculate number of bytes we're going to try to read this iteration
224 const size_t remainingBytes = ( numBytes - bytesReadSoFar );
226 // if socket has access to entire file contents
227 // i.e. we received response with full data (status code == 200)
228 if ( m_endRangeFilePosition < 0 ) {
230 // try to read 'remainingBytes' from socket
231 const int64_t socketBytesRead = ReadFromSocket(data+bytesReadSoFar, remainingBytes);
232 if ( socketBytesRead < 0 )
234 bytesReadSoFar += socketBytesRead;
235 m_filePosition += socketBytesRead;
238 // socket has access to a range of data (might already be in buffer)
239 // i.e. we received response with partial data (status code == 206)
242 // there is data left from last request
243 if ( m_endRangeFilePosition > m_filePosition ) {
245 // try to read either the total 'remainingBytes' or
246 // whatever we have remaining from last request range
247 const size_t rangeRemainingBytes = m_endRangeFilePosition - m_filePosition;
248 const size_t bytesToRead = std::min(remainingBytes, rangeRemainingBytes);
249 const int64_t socketBytesRead = ReadFromSocket(data+bytesReadSoFar, bytesToRead);
250 if ( socketBytesRead < 0 )
252 bytesReadSoFar += socketBytesRead;
253 m_filePosition += socketBytesRead;
256 // otherwise, this is a 1st-time read or
257 // we already read everything from the last GET request
260 // request for next range
261 if ( !SendRequest(remainingBytes) || !ReceiveResponse() ) {
269 // return actual number bytes successfully read
270 return bytesReadSoFar;
273 int64_t BamHttp::ReadFromSocket(char* data, const unsigned int maxNumBytes) {
275 // try to read 'remainingBytes' from socket
276 const int64_t numBytesRead = m_socket->Read(data, maxNumBytes);
277 if ( numBytesRead < 0 )
282 bool BamHttp::ReceiveResponse(void) {
284 // clear any prior response
288 // make sure we're connected
289 if ( !EnsureSocketConnection() )
292 // fetch header, up until double new line
293 string responseHeader;
295 // read line & append to full header
296 const string headerLine = m_socket->ReadLine();
297 responseHeader += headerLine;
299 } while ( !endsWith(responseHeader, DOUBLE_NEWLINE) );
302 if ( responseHeader.empty() ) {
303 // TODO: set error string
308 // create response from header text
309 m_response = new HttpResponseHeader(responseHeader);
310 if ( !m_response->IsValid() ) {
311 // TODO: set error string
316 // if we got range response as requested
317 if ( m_response->GetStatusCode() == 206 )
320 // if we got the full file contents instead of range
321 else if ( m_response->GetStatusCode() == 200 ) {
323 // skip up to current file position
324 RaiiBuffer tmp(0x8000);
325 int64_t numBytesRead = 0;
326 while ( numBytesRead < m_filePosition ) {
327 int64_t result = ReadFromSocket(tmp.Buffer, 0x8000);
332 numBytesRead += result;
339 // on any other reponse status
340 // TODO: set error string
345 bool BamHttp::Seek(const int64_t& position, const int origin) {
347 // if HTTP device not in a valid state
349 // TODO: set error string
353 // discard socket's buffer contents, update positions, & return success
354 m_socket->ClearBuffer();
356 if ( origin == SEEK_CUR )
357 m_filePosition += position;
358 else if ( origin == SEEK_SET )
359 m_filePosition = position;
361 // TODO: set error string
364 m_endRangeFilePosition = m_filePosition;
368 bool BamHttp::SendRequest(const size_t numBytes) {
370 // remove any currently active request
374 // create range string
375 m_endRangeFilePosition = m_filePosition + numBytes;
376 stringstream range("");
377 range << BYTES_PREFIX << m_filePosition << '-' << m_endRangeFilePosition;
379 // make sure we're connected
380 if ( !EnsureSocketConnection() )
384 m_request = new HttpRequestHeader(GET_METHOD, m_filename);
385 m_request->SetField(HOST_HEADER, m_hostname);
386 m_request->SetField(RANGE_HEADER, range.str());
388 // write request to socket
389 const string requestHeader = m_request->ToString();
390 const size_t headerSize = requestHeader.size();
391 return ( WriteToSocket(requestHeader.c_str(), headerSize) == headerSize );
394 int64_t BamHttp::Tell(void) const {
395 return ( IsOpen() ? m_filePosition : -1 );
398 int64_t BamHttp::Write(const char* data, const unsigned int numBytes) {
401 BT_ASSERT_X(false, "BamHttp::Write : write-mode not supported on this device");
402 SetErrorString("BamHttp::Write", "write-mode not supported on this device");
406 int64_t BamHttp::WriteToSocket(const char* data, const unsigned int numBytes) {
407 if ( !m_socket->IsConnected() )
409 m_socket->ClearBuffer();
410 return m_socket->Write(data, numBytes);