X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=mothur.h;h=102b44d57030561244c8ba6205f7c02c831b9b36;hp=07bb971347d974066ab5449614172c5e68d4c2a6;hb=d1faab5efe1c28700890bdec5b4d8e817fa1dab2;hpb=a8f5a612bba54ceb74e17efc027d3a7f5aa93c9a diff --git a/mothur.h b/mothur.h index 07bb971..102b44d 100644 --- a/mothur.h +++ b/mothur.h @@ -1,7 +1,6 @@ #ifndef MOTHUR_H #define MOTHUR_H -using namespace std; /* @@ -22,6 +21,8 @@ using namespace std; #include #include #include +#include + //exception #include @@ -35,13 +36,59 @@ using namespace std; #include #include #include +#include //math #include #include #include +#include + +//misc +#include +#include +#include + +#ifdef USE_MPI + #include "mpi.h" +#endif +/***********************************************************************/ + +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) + #include + #include + #include + #include + #include + #include + + #ifdef USE_READLINE + #include + #include + #endif + +#else + #include //allows unbuffered screen capture from stdin + #include //get cwd + #include + #include + #include + #include + +#endif + +using namespace std; + +#define exp(x) (exp((double) x)) +#define sqrt(x) (sqrt((double) x)) +#define log10(x) (log10((double) x)) +#define log2(x) (log10(x)/log10(2)) +#define isnan(x) ((x) != (x)) +#define isinf(x) (fabs(x) == std::numeric_limits::infinity()) + -typedef unsigned long long ull; +typedef unsigned long ull; +typedef unsigned short intDist; struct IntNode { int lvalue; @@ -50,515 +97,296 @@ struct IntNode { int rcoef; IntNode* left; IntNode* right; -}; -/***********************************************************************/ - -// snagged from http://www.parashift.com/c++-faq-lite/misc-technical-issues.html#faq-39.2 -// works for now, but there should be a way to do it without killing the whole program - -class BadConversion : public runtime_error { -public: - BadConversion(const string& s) : runtime_error(s){ } + IntNode(int lv, int rv, IntNode* l, IntNode* r) : lvalue(lv), rvalue(rv), left(l), right(r) {}; + IntNode() {}; }; -//********************************************************************************************************************** - -template -inline void convert(const string& s, T& x, bool failIfLeftoverChars = true){ - istringstream i(s); - char c; - if (!(i >> x) || (failIfLeftoverChars && i.get(c))) - throw BadConversion(s); -} -//********************************************************************************************************************** - -template -inline bool convertTestFloat(const string& s, T& x, bool failIfLeftoverChars = true){ - istringstream i(s); - char c; - if (!(i >> x) || (failIfLeftoverChars && i.get(c))) - { - return false; - } - return true; -} - -//********************************************************************************************************************** - -template -inline bool convertTest(const string& s, T& x, bool failIfLeftoverChars = true){ - istringstream i(s); - char c; - if (!(i >> x) || (failIfLeftoverChars && i.get(c))) - { - cout << "'" << s << "' is unable to be converted into an integer.\n"; - return false; - } - return true; -} - -//********************************************************************************************************************** - -template -string toString(const T&x){ - stringstream output; - output << x; - return output.str(); -} - -//********************************************************************************************************************** +struct ThreadNode { + int* pid; + IntNode* left; + IntNode* right; +}; -template -string toHex(const T&x){ - stringstream output; +struct diffPair { + float prob; + float reverseProb; - output << hex << x; - - return output.str(); -} -//********************************************************************************************************************** + diffPair() { + prob = 0; reverseProb = 0; + } + diffPair(float p, float rp) { + prob = p; + reverseProb = rp; + } +}; -template -string toString(const T&x, int i){ - stringstream output; +/**********************************************************/ +struct CommonHeader { + unsigned int magicNumber; + string version; + unsigned long long indexOffset; + unsigned int indexLength; + unsigned int numReads; + unsigned short headerLength; + unsigned short keyLength; + unsigned short numFlowsPerRead; + int flogramFormatCode; + string flowChars; //length depends on number flow reads + string keySequence; //length depends on key length - output.precision(i); - output << fixed << x; - - return output.str(); -} - - -/***********************************************************************/ - -inline void gobble(istream& f){ + CommonHeader(){ magicNumber=0; indexOffset=0; indexLength=0; numReads=0; headerLength=0; keyLength=0; numFlowsPerRead=0; flogramFormatCode='s'; } + ~CommonHeader() { } +}; +/**********************************************************/ +struct Header { + unsigned short headerLength; + unsigned short nameLength; + unsigned int numBases; + unsigned short clipQualLeft; + unsigned short clipQualRight; + unsigned short clipAdapterLeft; + unsigned short clipAdapterRight; + string name; //length depends on nameLength + string timestamp; + string region; + string xy; - char d; - while(isspace(d=f.get())) {;} - f.putback(d); + Header() { headerLength=0; nameLength=0; numBases=0; clipQualLeft=0; clipQualRight=0; clipAdapterLeft=0; clipAdapterRight=0; } + ~Header() { } +}; +/**********************************************************/ +struct seqRead { + vector flowgram; + vector flowIndex; + string bases; + vector qualScores; -} + seqRead() { } + ~seqRead() { } +}; /***********************************************************************/ - -inline float roundDist(float dist, int precision){ - - return int(dist * precision + 0.5)/float(precision); - -} - +struct PDistCell{ + ull index; + float dist; + PDistCell() : index(0), dist(0) {}; + PDistCell(ull c, float d) : index(c), dist(d) {} +}; /***********************************************************************/ - -inline int getNumNames(string names){ - - int count = 0; - - if(names != ""){ - count = 1; - for(int i=0;i right.index); } - -/**************************************************************************************************/ - -inline vector > binomial(int maxOrder){ - - vector > binomial(maxOrder+1); - - for(int i=0;i<=maxOrder;i++){ - binomial[i].resize(maxOrder+1); - binomial[i][0]=1; - binomial[0][i]=0; - } - binomial[0][0]=1; - - binomial[1][0]=1; - binomial[1][1]=1; - - for(int i=2;i<=maxOrder;i++){ - binomial[1][i]=0; - } - - for(int i=2;i<=maxOrder;i++){ - for(int j=1;j<=maxOrder;j++){ - if(i==j){ binomial[i][j]=1; } - if(j>i) { binomial[i][j]=0; } - else { binomial[i][j]=binomial[i-1][j-1]+binomial[i-1][j]; } - } - } - - return binomial; +//******************************************************************************************************************** +inline bool compareSpearman(spearmanRank left, spearmanRank right){ + return (left.score < right.score); } - -/***********************************************************************/ - -inline string getRootName(string longName){ +//******************************************************************************************************************** +inline double max(double left, double right){ + if (left > right) { return left; } + else { return right; } +} +//******************************************************************************************************************** +inline double max(int left, double right){ + double value = left; + if (left > right) { return value; } + else { return right; } +} +//******************************************************************************************************************** +inline double max(double left, int right){ + double value = right; + if (left > value) { return left; } + else { return value; } +} +//******************************************************************************************************************** +//sorts highest to lowest +inline bool compareSeqPriorityNodes(seqPriorityNode left, seqPriorityNode right){ + if (left.numIdentical > right.numIdentical) { + return true; + }else if (left.numIdentical == right.numIdentical) { + if (left.seq > right.seq) { return true; } + else { return false; } + } + return false; +} - string rootName = longName; - - if(longName.find_last_of(".") != longName.npos){ - int pos = longName.find_last_of('.')+1; - rootName = longName.substr(0, pos); - } - - return rootName; +/************************************************************/ +//sorts lowest to highest +inline bool compareDistLinePairs(distlinePair left, distlinePair right){ + return (left.end < right.end); +} +//******************************************************************************************************************** +//sorts lowest to highest +inline bool compareSequenceDistance(seqDist left, seqDist right){ + return (left.dist < right.dist); +} +//******************************************************************************************************************** +//returns sign of double +inline double sign(double temp){ + //find sign + if (temp > 0) { return 1.0; } + else if (temp < 0) { return -1.0; } + return 0; } /***********************************************************************/ -inline string getSimpleName(string longName){ - - string simpleName = longName; - - if(longName.find_last_of("/") != longName.npos){ - int pos = longName.find_last_of('/')+1; - simpleName = longName.substr(pos, longName.length()); - } +// snagged from http://www.parashift.com/c++-faq-lite/misc-technical-issues.html#faq-39.2 +// works for now, but there should be a way to do it without killing the whole program - return simpleName; -} -/***********************************************************************/ +class BadConversion : public runtime_error { +public: + BadConversion(const string& s) : runtime_error(s){ } +}; -inline int factorial(int num){ - int total = 1; +//********************************************************************************************************************** +template +void convert(const string& s, T& x, bool failIfLeftoverChars = true){ - for (int i = 1; i <= num; i++) { - total *= i; - } + istringstream i(s); + char c; + if (!(i >> x) || (failIfLeftoverChars && i.get(c))) + throw BadConversion(s); - return total; } -/************************************************************************************************** - -double min(double x, double y) -{ - if(x int sgn(T val){ return (val > T(0)) - (val < T(0)); } +//********************************************************************************************************************** -inline string getPathName(string longName){ - - string rootPathName = longName; +template +bool convertTestFloat(const string& s, T& x, bool failIfLeftoverChars = true){ - if(longName.find_last_of("/") != longName.npos){ - int pos = longName.find_last_of('/')+1; - rootPathName = longName.substr(0, pos); - } - - return rootPathName; -} - -/***********************************************************************/ - -inline int openInputFile(string fileName, ifstream& fileHandle){ - - fileHandle.open(fileName.c_str()); - if(!fileHandle) { - cerr << "Error: Could not open " << fileName << endl; - return 1; - } - else { - return 0; - } + istringstream i(s); + char c; + if (!(i >> x) || (failIfLeftoverChars && i.get(c))) + { + return false; + } + return true; } -/***********************************************************************/ +//********************************************************************************************************************** -inline int openOutputFile(string fileName, ofstream& fileHandle){ +template +bool convertTest(const string& s, T& x, bool failIfLeftoverChars = true){ + + istringstream i(s); + char c; + if (!(i >> x) || (failIfLeftoverChars && i.get(c))) + { + return false; + } + return true; - fileHandle.open(fileName.c_str(), ios::trunc); - if(!fileHandle) { - cerr << "Error: Could not open " << fileName << endl; - return 1; - } - else { - return 0; - } - } -/***********************************************************************/ - -inline int openOutputFileAppend(string fileName, ofstream& fileHandle){ +//********************************************************************************************************************** +template +string toString(const T&x){ + + stringstream output; + output << x; + return output.str(); - fileHandle.open(fileName.c_str(), ios::app); - if(!fileHandle) { - cerr << "Error: Could not open " << fileName << endl; - return 1; - } - else { - return 0; - } - } +//********************************************************************************************************************** -/***********************************************************************/ - -//This function parses the estimator options and puts them in a vector -inline void splitAtDash(string& estim, vector& container) { - try { - string individual; +template +string toHex(const T&x){ + + stringstream output; - while (estim.find_first_of('-') != -1) { - individual = estim.substr(0,estim.find_first_of('-')); - if ((estim.find_first_of('-')+1) <= estim.length()) { //checks to make sure you don't have dash at end of string - estim = estim.substr(estim.find_first_of('-')+1, estim.length()); - container.push_back(individual); - } - } - //get last one - container.push_back(estim); - } - catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the mothur class Function splitAtDash. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } - catch(...) { - cout << "An unknown error has occurred in the mothur class function splitAtDash. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } + output << hex << x; + return output.str(); + } +//********************************************************************************************************************** -/***********************************************************************/ -//This function parses the label options and puts them in a set -inline void splitAtDash(string& estim, set& container) { - try { - string individual; +template +string toString(const T&x, int i){ + + stringstream output; - while (estim.find_first_of('-') != -1) { - individual = estim.substr(0,estim.find_first_of('-')); - if ((estim.find_first_of('-')+1) <= estim.length()) { //checks to make sure you don't have dash at end of string - estim = estim.substr(estim.find_first_of('-')+1, estim.length()); - container.insert(individual); - } - } - //get last one - container.insert(estim); - } - catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the mothur class Function splitAtDash. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } - catch(...) { - cout << "An unknown error has occurred in the mothur class function splitAtDash. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } - -} -/***********************************************************************/ -//This function parses the line options and puts them in a set -inline void splitAtDash(string& estim, set& container) { - try { - string individual; - int lineNum; + output.precision(i); + output << fixed << x; - while (estim.find_first_of('-') != -1) { - individual = estim.substr(0,estim.find_first_of('-')); - if ((estim.find_first_of('-')+1) <= estim.length()) { //checks to make sure you don't have dash at end of string - estim = estim.substr(estim.find_first_of('-')+1, estim.length()); - convert(individual, lineNum); //convert the string to int - container.insert(lineNum); - } - } - //get last one - convert(estim, lineNum); //convert the string to int - container.insert(lineNum); - } - catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the mothur class Function splitAtDash. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } - catch(...) { - cout << "An unknown error has occurred in the mothur class function splitAtDash. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } - -} -/***********************************************************************/ -//This function parses the a string and puts peices in a vector -inline void splitAtComma(string& estim, vector& container) { - try { - string individual; - - while (estim.find_first_of(',') != -1) { - individual = estim.substr(0,estim.find_first_of(',')); - if ((estim.find_first_of(',')+1) <= estim.length()) { //checks to make sure you don't have comma at end of string - estim = estim.substr(estim.find_first_of(',')+1, estim.length()); - container.push_back(individual); - } - } - //get last one - container.push_back(estim); - } - catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the mothur class Function splitAtComma. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } - catch(...) { - cout << "An unknown error has occurred in the mothur class function splitAtComma. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } -} -/***********************************************************************/ - -//This function splits up the various option parameters -inline void splitAtComma(string& prefix, string& suffix){ - try { - prefix = suffix.substr(0,suffix.find_first_of(',')); - if ((suffix.find_first_of(',')+2) <= suffix.length()) { //checks to make sure you don't have comma at end of string - suffix = suffix.substr(suffix.find_first_of(',')+1, suffix.length()); - string space = " "; - while(suffix.at(0) == ' ') - suffix = suffix.substr(1, suffix.length()); - } - - } - catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the mothur class Function splitAtComma. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } - catch(...) { - cout << "An unknown error has occurred in the mothur class function splitAtComma. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } - + return output.str(); + } -/***********************************************************************/ - -//This function separates the key value from the option value i.e. dist=96_... -inline void splitAtEquals(string& key, string& value){ - try { - if(value.find_first_of('=') != -1){ - key = value.substr(0,value.find_first_of('=')); - if ((value.find_first_of('=')+1) <= value.length()) { - value = value.substr(value.find_first_of('=')+1, value.length()); - } - }else{ - key = value; - value = 1; - } - } - catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the mothur class Function splitAtEquals. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } - catch(...) { - cout << "An unknown error has occurred in the mothur class function splitAtEquals. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } +//********************************************************************************************************************** -} -/**************************************************************************************************/ - -inline bool inUsersGroups(string groupname, vector Groups) { - try { - for (int i = 0; i < Groups.size(); i++) { - if (groupname == Groups[i]) { return true; } - } - return false; - } - catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the mothur class Function inUsersGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } - catch(...) { - cout << "An unknown error has occurred in the mothur class function inUsersGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } +template +T fromString(const string& s){ + istringstream stream (s); + T t; + stream >> t; + return t; } -/***********************************************************************/ -//this function determines if the user has given us labels that are smaller than the given label. -//if so then it returns true so that the calling function can run the previous valid distance. -//it's a "smart" distance function. It also checks for invalid labels. -inline bool anyLabelsToProcess(string label, set& userLabels, string errorOff) { - try { - set::iterator it; - vector orderFloat; - map userMap; //the conversion process removes trailing 0's which we need to put back - map::iterator it2; - float labelFloat; - bool smaller = false; - - //unique is the smallest line - if (label == "unique") { return false; } - else { convert(label, labelFloat); } - - //go through users set and make them floats - for(it = userLabels.begin(); it != userLabels.end(); ++it) { - - float temp; - if ((*it != "unique") && (convertTestFloat(*it, temp) == true)){ - convert(*it, temp); - orderFloat.push_back(temp); - userMap[*it] = temp; - }else if (*it == "unique") { - orderFloat.push_back(-1.0); - userMap["unique"] = -1.0; - }else { - if (errorOff == "") { cout << *it << " is not a valid label." << endl; } - userLabels.erase(*it); - it--; - } - } - - //sort order - sort(orderFloat.begin(), orderFloat.end()); - - /*************************************************/ - //is this label bigger than any of the users labels - /*************************************************/ - - //loop through order until you find a label greater than label - for (int i = 0; i < orderFloat.size(); i++) { - if (orderFloat[i] < labelFloat) { - smaller = true; - if (orderFloat[i] == -1) { - if (errorOff == "") { cout << "Your file does not include the label unique." << endl; } - userLabels.erase("unique"); - } - else { - if (errorOff == "") { cout << "Your file does not include the label "; } - string s = ""; - for (it2 = userMap.begin(); it2!= userMap.end(); it2++) { - if (it2->second == orderFloat[i]) { - s = it2->first; - //remove small labels - userLabels.erase(s); - break; - } - } - if (errorOff == "") { cout << s << ". I will use the next smallest distance. " << endl; } - } - //since they are sorted once you find a bigger one stop looking - }else { break; } - } - - return smaller; - - } - catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the mothur class Function anyLabelsToProcess. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } - catch(...) { - cout << "An unknown error has occurred in the mothur class function anyLabelsToProcess. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } - -} +//********************************************************************************************************************** -/**************************************************************************************************/ #endif