// ***************************************************************************
// bamtools_utilities.cpp (c) 2010 Derek Barnett, Erik Garrison
// Marth Lab, Department of Biology, Boston College
-// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 3 September 2010
+// Last modified: 8 October 2011
// ---------------------------------------------------------------------------
// Provides general utilities used by BamTools sub-tools.
// ***************************************************************************
+#include <api/BamMultiReader.h>
+#include <api/BamReader.h>
+#include <utils/bamtools_utilities.h>
+using namespace BamTools;
+
+#include <algorithm>
#include <cstdlib>
+#include <cstring>
#include <fstream>
#include <iostream>
-#include "bamtools_utilities.h"
-#include "BamReader.h"
-#include "BamMultiReader.h"
+#include <sstream>
using namespace std;
-using namespace BamTools;
+
+namespace BamTools {
+
+const char REVCOMP_LOOKUP[] = {'T', 0, 'G', 'H',
+ 0, 0, 'C', 'D',
+ 0, 0, 0, 0,
+ 'K', 'N', 0, 0,
+ 0, 'Y', 'W', 'A',
+ 'A', 'B', 'S', 'X',
+ 'R', 0 };
+
+} // namespace BamTools
+
+// returns true if 'source' contains 'pattern'
+bool Utilities::Contains(const string& source, const string& pattern) {
+ return ( source.find(pattern) != string::npos );
+}
+
+// returns true if 'source' contains 'c'
+bool Utilities::Contains(const std::string &source, const char c) {
+ return ( source.find(c) != string::npos );
+}
+
+// returns true if 'source' ends with 'pattern'
+bool Utilities::EndsWith(const string& source, const string& pattern) {
+ return ( source.find(pattern) == (source.length() - pattern.length()) );
+}
+
+// returns true if 'source' ends with 'c'
+bool Utilities::EndsWith(const std::string& source, const char c) {
+ return ( source.find(c) == (source.length() - 1) );
+}
// check if a file exists
-bool Utilities::FileExists(const std::string& filename) {
+bool Utilities::FileExists(const string& filename) {
ifstream f(filename.c_str(), ifstream::in);
return !f.fail();
}
// Parses a region string, does validation (valid ID's, positions), stores in Region struct
// Returns success (true/false)
-bool Utilities::ParseRegionString(const std::string& regionString, const BamReader& reader, BamRegion& region) {
-
+bool Utilities::ParseRegionString(const string& regionString,
+ const BamReader& reader,
+ BamRegion& region)
+{
// -------------------------------
// parse region string
startChrom = regionString;
startPos = 0;
stopChrom = regionString;
- stopPos = -1;
+ stopPos = 0;
}
// colon found, so we at least have some sort of startPos requested
// if startRefID not found, return false
int startRefID = reader.GetReferenceID(startChrom);
- if ( startRefID == (int)references.size() ) return false;
+ if ( startRefID == -1 ) return false;
- // if startPos is larger than reference, return false
+ // startPos cannot be greater than or equal to reference length
const RefData& startReference = references.at(startRefID);
- if ( startPos > startReference.RefLength ) return false;
+ if ( startPos >= startReference.RefLength ) return false;
// if stopRefID not found, return false
int stopRefID = reader.GetReferenceID(stopChrom);
- if ( stopRefID == (int)references.size() ) return false;
+ if ( stopRefID == -1 ) return false;
- // if stopPosition larger than reference, return false
+ // stopPosition cannot be larger than reference length
const RefData& stopReference = references.at(stopRefID);
if ( stopPos > stopReference.RefLength ) return false;
// -------------------------------
// set up Region struct & return
- region.LeftRefID = startRefID;
- region.LeftPosition = startPos;
- region.RightRefID = stopRefID;;
+ region.LeftRefID = startRefID;
+ region.LeftPosition = startPos;
+ region.RightRefID = stopRefID;;
region.RightPosition = stopPos;
return true;
}
// Same as ParseRegionString() above, but accepts a BamMultiReader
-bool Utilities::ParseRegionString(const std::string& regionString, const BamMultiReader& reader, BamRegion& region) {
-
+bool Utilities::ParseRegionString(const string& regionString,
+ const BamMultiReader& reader,
+ BamRegion& region)
+{
// -------------------------------
// parse region string
// -------------------------------
// validate reference IDs & genomic positions
-
+
const RefVector references = reader.GetReferenceData();
-
+
// if startRefID not found, return false
int startRefID = reader.GetReferenceID(startChrom);
- if ( startRefID == (int)references.size() ) return false;
-
- // if startPos is larger than reference, return false
+ if ( startRefID == -1 ) return false;
+
+ // startPos cannot be greater than or equal to reference length
const RefData& startReference = references.at(startRefID);
- if ( startPos > startReference.RefLength ) return false;
-
+ if ( startPos >= startReference.RefLength ) return false;
+
// if stopRefID not found, return false
int stopRefID = reader.GetReferenceID(stopChrom);
- if ( stopRefID == (int)references.size() ) return false;
-
- // if stopPosition larger than reference, return false
+ if ( stopRefID == -1 ) return false;
+
+ // stopPosition cannot be larger than reference length
const RefData& stopReference = references.at(stopRefID);
if ( stopPos > stopReference.RefLength ) return false;
-
+
// if no stopPosition specified, set to reference end
- if ( stopPos == -1 ) stopPos = stopReference.RefLength;
-
+ if ( stopPos == -1 ) stopPos = stopReference.RefLength;
+
// -------------------------------
// set up Region struct & return
-
- region.LeftRefID = startRefID;
- region.LeftPosition = startPos;
- region.RightRefID = stopRefID;;
- region.RightPosition = stopPos;
+ region.LeftRefID = startRefID;
+ region.LeftPosition = startPos;
+ region.RightRefID = stopRefID;;
+ region.RightPosition = stopPos;
return true;
}
+
+void Utilities::Reverse(string& sequence) {
+ reverse(sequence.begin(), sequence.end());
+}
+
+void Utilities::ReverseComplement(string& sequence) {
+
+ // do complement, in-place
+ size_t seqLength = sequence.length();
+ for ( size_t i = 0; i < seqLength; ++i )
+ sequence.replace(i, 1, 1, REVCOMP_LOOKUP[(int)sequence.at(i) - 65]);
+
+ // reverse it
+ Reverse(sequence);
+}
+
+vector<string> Utilities::Split(const string& source, const char delim) {
+
+ stringstream ss(source);
+ string field;
+ vector<string> fields;
+
+ while ( getline(ss, field, delim) )
+ fields.push_back(field);
+ return fields;
+}
+
+vector<string> Utilities::Split(const string& source, const string& delims) {
+
+ vector<string> fields;
+
+ char* tok;
+ char* cchars = new char[source.size()+1];
+ char* cstr = &cchars[0];
+ strcpy(cstr, source.c_str());
+ tok = strtok(cstr, delims.c_str());
+ while (tok != NULL) {
+ fields.push_back(tok);
+ tok = strtok(NULL, delims.c_str());
+ }
+
+ delete[] cchars;
+
+ return fields;
+}
+
+// returns true if 'source' starts with 'pattern'
+bool Utilities::StartsWith(const string& source, const string& pattern) {
+ return ( source.find(pattern) == 0 );
+}
+
+// returns true if 'source' starts with 'c'
+bool Utilities::StartsWith(const std::string &source, const char c) {
+ return ( source.find(c) == 0 );
+}