X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=mothur.h;h=c3f61b4268b613da53a567f0fa6239983a19311e;hb=0c78e45408d8c099bc51579225ebfc227f7523b2;hp=36d7b28ebf45dd3f4bf1d0555c74f4e99c841fe9;hpb=c5c7502f435e1413c19e373dab1dfebcaa67588d;p=mothur.git diff --git a/mothur.h b/mothur.h index 36d7b28..c3f61b4 100644 --- a/mothur.h +++ b/mothur.h @@ -1,7 +1,6 @@ #ifndef MOTHUR_H #define MOTHUR_H -using namespace std; /* @@ -15,12 +14,15 @@ using namespace std; /* This file contains all the standard incudes we use in the project as well as some common utilities. */ +//#include //io libraries #include #include #include #include +#include + //exception #include @@ -40,9 +42,80 @@ using namespace std; #include #include -typedef unsigned long long ull; +//misc +#include +#include +#include + +#ifdef USE_MPI + #include "mpi.h" +#endif +/***********************************************************************/ +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + #include + #include + + #ifdef USE_READLINE + #include + #include + #endif + +#else + #include //allows unbuffered screen capture from stdin + #include //get cwd +#endif +using namespace std; + +#define exp(x) (exp((double) x)) +#define sqrt(x) (sqrt((double) x)) +#define log10(x) (log10((double) x)) +#define log2(x) (log10(x)/log10(2)) +#define isnan(x) ((x) != (x)) +#define isinf(x) (fabs(x) == std::numeric_limits::infinity()) + +typedef unsigned long ull; + +struct IntNode { + int lvalue; + int rvalue; + int lcoef; + int rcoef; + IntNode* left; + IntNode* right; + + IntNode(int lv, int rv, IntNode* l, IntNode* r) : lvalue(lv), rvalue(rv), left(l), right(r) {}; + IntNode() {}; +}; + +struct ThreadNode { + int* pid; + IntNode* left; + IntNode* right; +}; + +/************************************************************/ +struct clusterNode { + int numSeq; + int parent; + int smallChild; //used to make linkTable work with list and rabund. represents bin number of this cluster node + clusterNode(int num, int par, int kid) : numSeq(num), parent(par), smallChild(kid) {}; +}; +/************************************************************/ +struct seqDist { + int seq1; + int seq2; + float dist; + seqDist() {} + seqDist(int s1, int s2, float d) : seq1(s1), seq2(s2), dist(d) {} + ~seqDist() {} +}; +//******************************************************************************************************************** +//sorts lowest to highest +inline bool compareSequenceDistance(seqDist left, seqDist right){ + return (left.dist < right.dist); +} /***********************************************************************/ // snagged from http://www.parashift.com/c++-faq-lite/misc-technical-issues.html#faq-39.2 @@ -62,6 +135,20 @@ inline void convert(const string& s, T& x, bool failIfLeftoverChars = true){ if (!(i >> x) || (failIfLeftoverChars && i.get(c))) throw BadConversion(s); } + +//********************************************************************************************************************** + +template +inline bool convertTestFloat(const string& s, T& x, bool failIfLeftoverChars = true){ + istringstream i(s); + char c; + if (!(i >> x) || (failIfLeftoverChars && i.get(c))) + { + return false; + } + return true; +} + //********************************************************************************************************************** template @@ -70,7 +157,7 @@ inline bool convertTest(const string& s, T& x, bool failIfLeftoverChars = true){ char c; if (!(i >> x) || (failIfLeftoverChars && i.get(c))) { - cout << "'" << s << "' is unable to be converted into an integer.\n"; + cout << "unable to be converted into an integer.\n" << endl; return false; } return true; @@ -95,7 +182,6 @@ string toHex(const T&x){ return output.str(); } - //********************************************************************************************************************** template @@ -107,8 +193,19 @@ string toString(const T&x, int i){ return output.str(); } +/***********************************************************************/ +inline int openOutputFileAppend(string fileName, ofstream& fileHandle){ + + fileHandle.open(fileName.c_str(), ios::app); + if(!fileHandle) { + cout << "Error: Could not open " << fileName << endl; + return 1; + } + else { + return 0; + } - +} /***********************************************************************/ inline void gobble(istream& f){ @@ -118,6 +215,38 @@ inline void gobble(istream& f){ f.putback(d); } +/***********************************************************************/ + +inline string getline(ifstream& fileHandle) { + try { + + string line = ""; + + while (!fileHandle.eof()) { + //get next character + char c = fileHandle.get(); + + //are you at the end of the line + if ((c == '\n') || (c == '\r') || (c == '\f')){ break; } + else { line += c; } + } + + return line; + + } + catch(exception& e) { + cout << "Error in mothur function getline" << endl; + exit(1); + } +} + +/***********************************************************************/ + +inline bool isTrue(string f){ + + if ((f == "TRUE") || (f == "T") || (f == "true") || (f == "t")) { return true; } + else { return false; } +} /***********************************************************************/ @@ -125,6 +254,13 @@ inline float roundDist(float dist, int precision){ return int(dist * precision + 0.5)/float(precision); +} +/***********************************************************************/ + +inline float ceilDist(float dist, int precision){ + + return int(ceil(dist * precision))/float(precision); + } /***********************************************************************/ @@ -196,50 +332,289 @@ inline string getSimpleName(string longName){ string simpleName = longName; - if(longName.find_last_of("/") != longName.npos){ - int pos = longName.find_last_of('/')+1; - simpleName = longName.substr(pos, longName.length()); - } + size_t found; + found=longName.find_last_of("/\\"); + if(found != longName.npos){ + simpleName = longName.substr(found+1); + } + + //if(longName.find_last_of("/") != longName.npos){ + // int pos = longName.find_last_of('/')+1; + // simpleName = longName.substr(pos, longName.length()); + //} + return simpleName; } /***********************************************************************/ +inline int factorial(int num){ + int total = 1; + + for (int i = 1; i <= num; i++) { + total *= i; + } + + return total; +} +/************************************************************************************************** + +double min(double x, double y) +{ + if(x 0) { simpleCWD = cwd.substr(1); } + + //break apart the current working directory + vector dirs; + while (simpleCWD.find_first_of('/') != -1) { + string dir = simpleCWD.substr(0,simpleCWD.find_first_of('/')); + simpleCWD = simpleCWD.substr(simpleCWD.find_first_of('/')+1, simpleCWD.length()); + dirs.push_back(dir); + } + //get last one // ex. ../../../filename = /user/work/desktop/filename + dirs.push_back(simpleCWD); //ex. dirs[0] = user, dirs[1] = work, dirs[2] = desktop + + + int index = dirs.size()-1; + + while((pos = path.rfind("./")) != -1) { //while you don't have a complete path + if (pos == 0) { break; //you are at the end + }else if (path[(pos-1)] == '.') { //you want your parent directory ../ + path = path.substr(0, pos-1); + index--; + if (index == 0) { break; } + }else if (path[(pos-1)] == '/') { //you want the current working dir ./ + path = path.substr(0, pos); + }else if (pos == 1) { break; //you are at the end + }else { cout << "cannot resolve path for " << fileName << endl; return fileName; } + } + + for (int i = index; i >= 0; i--) { + newFileName = dirs[i] + "/" + newFileName; + } + + newFileName = "/" + newFileName; + return newFileName; + } + #else + if (path.find("~") != -1) { //go to home directory + string homeDir = getenv ("HOMEPATH"); + newFileName = homeDir + fileName.substr(fileName.find("~")+1); + return newFileName; + }else { //find path + if (path.rfind(".\\") == -1) { return fileName; } //already complete name + else { newFileName = fileName.substr(fileName.rfind(".\\")+2); } //save the complete part of the name + + char *cwdpath = NULL; + cwdpath = getcwd(NULL, 0); // or _getcwd + if ( cwdpath != NULL) { cwd = cwdpath; } + else { cwd = ""; } + + //break apart the current working directory + vector dirs; + while (cwd.find_first_of('\\') != -1) { + string dir = cwd.substr(0,cwd.find_first_of('\\')); + cwd = cwd.substr(cwd.find_first_of('\\')+1, cwd.length()); + dirs.push_back(dir); + + } + //get last one + dirs.push_back(cwd); //ex. dirs[0] = user, dirs[1] = work, dirs[2] = desktop + + int index = dirs.size()-1; + + while((pos = path.rfind(".\\")) != -1) { //while you don't have a complete path + if (pos == 0) { break; //you are at the end + }else if (path[(pos-1)] == '.') { //you want your parent directory ../ + path = path.substr(0, pos-1); + index--; + if (index == 0) { break; } + }else if (path[(pos-1)] == '\\') { //you want the current working dir ./ + path = path.substr(0, pos); + }else if (pos == 1) { break; //you are at the end + }else { cout << "cannot resolve path for " << fileName << endl; return fileName; } + } + + for (int i = index; i >= 0; i--) { + newFileName = dirs[i] + "\\" + newFileName; + } + + return newFileName; + } + + #endif + } + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function getFullPathName. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n"; + exit(1); + } +} +/***********************************************************************/ + +inline int openInputFile(string fileName, ifstream& fileHandle, string m){ + + //get full path name + string completeFileName = getFullPathName(fileName); + + fileHandle.open(completeFileName.c_str()); + if(!fileHandle) { + return 1; + }else { + //check for blank file + gobble(fileHandle); + return 0; + } +} /***********************************************************************/ inline int openInputFile(string fileName, ifstream& fileHandle){ + + //get full path name + string completeFileName = getFullPathName(fileName); - fileHandle.open(fileName.c_str()); + fileHandle.open(completeFileName.c_str()); if(!fileHandle) { - cerr << "Error: Could not open " << fileName << endl; + cout << "Error: Could not open " << completeFileName << endl; return 1; } else { + //check for blank file + gobble(fileHandle); + if (fileHandle.eof()) { cout << completeFileName << " is blank. Please correct." << endl; } + return 0; } } +/***********************************************************************/ + +inline int renameFile(string oldName, string newName){ + + ifstream inTest; + int exist = openInputFile(newName, inTest, ""); + +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + if (exist == 0) { //you could open it so you want to delete it + inTest.close(); + string command = "rm " + newName; + system(command.c_str()); + } + + string command = "mv " + oldName + " " + newName; + system(command.c_str()); +#else + remove(newName.c_str()); + int renameOk = rename(oldName.c_str(), newName.c_str()); +#endif + return 0; +} /***********************************************************************/ inline int openOutputFile(string fileName, ofstream& fileHandle){ - fileHandle.open(fileName.c_str(), ios::trunc); + string completeFileName = getFullPathName(fileName); + + fileHandle.open(completeFileName.c_str(), ios::trunc); if(!fileHandle) { - cerr << "Error: Could not open " << fileName << endl; + cout << "Error: Could not open " << completeFileName << endl; return 1; } else { @@ -250,6 +625,38 @@ inline int openOutputFile(string fileName, ofstream& fileHandle){ /***********************************************************************/ +inline int getNumSeqs(ifstream& file){ + + int numSeqs = count(istreambuf_iterator(file),istreambuf_iterator(), '>'); + file.seekg(0); + return numSeqs; + +} +/***********************************************************************/ +inline void getNumSeqs(ifstream& file, int& numSeqs){ + + string input; + numSeqs = 0; + while(!file.eof()){ + input = getline(file); + if (input.length() != 0) { + if(input[0] == '>'){ numSeqs++; } + } + } +} + +/***********************************************************************/ + +inline bool inVector(string member, vector group){ + + for (int i = 0; i < group.size(); i++) { + if (group[i] == member) { return true; } + } + + return false; +} +/***********************************************************************/ + //This function parses the estimator options and puts them in a vector inline void splitAtDash(string& estim, vector& container) { try { @@ -266,14 +673,9 @@ inline void splitAtDash(string& estim, vector& container) { container.push_back(estim); } catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the mothur class Function splitAtDash. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } - catch(...) { - cout << "An unknown error has occurred in the mothur class function splitAtDash. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function splitAtDash. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n"; exit(1); - } - + } } /***********************************************************************/ @@ -293,14 +695,9 @@ inline void splitAtDash(string& estim, set& container) { container.insert(estim); } catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the mothur class Function splitAtDash. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } - catch(...) { - cout << "An unknown error has occurred in the mothur class function splitAtDash. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function splitAtDash. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n"; exit(1); - } - + } } /***********************************************************************/ //This function parses the line options and puts them in a set @@ -322,14 +719,9 @@ inline void splitAtDash(string& estim, set& container) { container.insert(lineNum); } catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the mothur class Function splitAtDash. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function splitAtDash. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n"; exit(1); - } - catch(...) { - cout << "An unknown error has occurred in the mothur class function splitAtDash. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } - + } } /***********************************************************************/ //This function parses the a string and puts peices in a vector @@ -348,13 +740,9 @@ inline void splitAtComma(string& estim, vector& container) { container.push_back(estim); } catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the mothur class Function splitAtComma. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function splitAtComma. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n"; exit(1); - } - catch(...) { - cout << "An unknown error has occurred in the mothur class function splitAtComma. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } + } } /***********************************************************************/ @@ -371,14 +759,9 @@ inline void splitAtComma(string& prefix, string& suffix){ } catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the mothur class Function splitAtComma. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } - catch(...) { - cout << "An unknown error has occurred in the mothur class function splitAtComma. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function splitAtComma. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n"; exit(1); - } - + } } /***********************************************************************/ @@ -396,14 +779,9 @@ inline void splitAtEquals(string& key, string& value){ } } catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the mothur class Function splitAtEquals. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } - catch(...) { - cout << "An unknown error has occurred in the mothur class function splitAtEquals. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function splitAtEquals. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n"; exit(1); - } - + } } /**************************************************************************************************/ @@ -415,17 +793,277 @@ inline bool inUsersGroups(string groupname, vector Groups) { return false; } catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the mothur class Function inUsersGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function inUsersGroups. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n"; exit(1); + } +} +/**************************************************************************************************/ +//returns true if any of the strings in first vector are in second vector +inline bool inUsersGroups(vector groupnames, vector Groups) { + try { + + for (int i = 0; i < groupnames.size(); i++) { + if (inUsersGroups(groupnames[i], Groups)) { return true; } + } + return false; } - catch(...) { - cout << "An unknown error has occurred in the mothur class function inUsersGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function inUsersGroups. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n"; exit(1); + } +} +/***********************************************************************/ +//this function determines if the user has given us labels that are smaller than the given label. +//if so then it returns true so that the calling function can run the previous valid distance. +//it's a "smart" distance function. It also checks for invalid labels. +inline bool anyLabelsToProcess(string label, set& userLabels, string errorOff) { + try { + + set::iterator it; + vector orderFloat; + map userMap; //the conversion process removes trailing 0's which we need to put back + map::iterator it2; + float labelFloat; + bool smaller = false; + + //unique is the smallest line + if (label == "unique") { return false; } + else { convert(label, labelFloat); } + + //go through users set and make them floats + for(it = userLabels.begin(); it != userLabels.end(); ++it) { + + float temp; + if ((*it != "unique") && (convertTestFloat(*it, temp) == true)){ + convert(*it, temp); + orderFloat.push_back(temp); + userMap[*it] = temp; + }else if (*it == "unique") { + orderFloat.push_back(-1.0); + userMap["unique"] = -1.0; + }else { + if (errorOff == "") { cout << *it << " is not a valid label." << endl; } + userLabels.erase(*it); + it--; + } + } + + //sort order + sort(orderFloat.begin(), orderFloat.end()); + + /*************************************************/ + //is this label bigger than any of the users labels + /*************************************************/ + + //loop through order until you find a label greater than label + for (int i = 0; i < orderFloat.size(); i++) { + if (orderFloat[i] < labelFloat) { + smaller = true; + if (orderFloat[i] == -1) { + if (errorOff == "") { cout << "Your file does not include the label unique." << endl; } + userLabels.erase("unique"); + } + else { + if (errorOff == "") { cout << "Your file does not include the label " << endl; } + string s = ""; + for (it2 = userMap.begin(); it2!= userMap.end(); it2++) { + if (it2->second == orderFloat[i]) { + s = it2->first; + //remove small labels + userLabels.erase(s); + break; + } + } + if (errorOff == "") {cout << s << ". I will use the next smallest distance. " << endl; } + } + //since they are sorted once you find a bigger one stop looking + }else { break; } + } + + return smaller; + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function anyLabelsToProcess. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n"; + exit(1); + } } /**************************************************************************************************/ +inline void appendFiles(string temp, string filename) { + try{ + ofstream output; + ifstream input; + + //open output file in append mode + openOutputFileAppend(filename, output); + int ableToOpen = openInputFile(temp, input, "no error"); + + if (ableToOpen == 0) { //you opened it + while(char c = input.get()){ + if(input.eof()) { break; } + else { output << c; } + } + input.close(); + } + + output.close(); + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function appendFiles. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n"; + exit(1); + } +} +/**************************************************************************************************/ +inline string sortFile(string distFile, string outputDir){ + try { + + //if (outputDir == "") { outputDir += hasPath(distFile); } + string outfile = getRootName(distFile) + "sorted.dist"; + + //if you can, use the unix sort since its been optimized for years + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + string command = "sort -n -k +3 " + distFile + " -o " + outfile; + system(command.c_str()); + #else //you are stuck with my best attempt... + //windows sort does not have a way to specify a column, only a character in the line + //since we cannot assume that the distance will always be at the the same character location on each line + //due to variable sequence name lengths, I chose to force the distance into first position, then sort and then put it back. + + //read in file line by file and put distance first + string tempDistFile = distFile + ".temp"; + ifstream input; + ofstream output; + openInputFile(distFile, input); + openOutputFile(tempDistFile, output); + + string firstName, secondName; + float dist; + while (input) { + input >> firstName >> secondName >> dist; + output << dist << '\t' << firstName << '\t' << secondName << endl; + gobble(input); + } + input.close(); + output.close(); + + + //sort using windows sort + string tempOutfile = outfile + ".temp"; + string command = "sort " + tempDistFile + " /O " + tempOutfile; + system(command.c_str()); + + //read in sorted file and put distance at end again + ifstream input2; + openInputFile(tempOutfile, input2); + openOutputFile(outfile, output); + + while (input2) { + input2 >> dist >> firstName >> secondName; + output << firstName << '\t' << secondName << '\t' << dist << endl; + gobble(input2); + } + input2.close(); + output.close(); + + //remove temp files + remove(tempDistFile.c_str()); + remove(tempOutfile.c_str()); + #endif + + return outfile; + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function sortfile. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n"; + exit(1); + } +} +/**************************************************************************************************/ +inline vector setFilePosFasta(string filename, int& num) { + + vector positions; + ifstream inFASTA; + openInputFile(filename, inFASTA); + + string input; + while(!inFASTA.eof()){ + input = getline(inFASTA); gobble(inFASTA); + if (input.length() != 0) { + if(input[0] == '>'){ long pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); } + } + } + inFASTA.close(); + + num = positions.size(); + + /*FILE * pFile; + long size; + + //get num bytes in file + pFile = fopen (filename.c_str(),"rb"); + if (pFile==NULL) perror ("Error opening file"); + else{ + fseek (pFile, 0, SEEK_END); + size=ftell (pFile); + fclose (pFile); + }*/ + + long size = positions[(positions.size()-1)]; + ifstream in; + openInputFile(filename, in); + + in.seekg(size); + + while(char c = in.get()){ + if(in.eof()) { break; } + else { size++; } + } + in.close(); + + positions.push_back(size); + + return positions; +} +/**************************************************************************************************/ +inline vector setFilePosEachLine(string filename, int& num) { + + vector positions; + ifstream in; + openInputFile(filename, in); + + string input; + while(!in.eof()){ + long lastpos = in.tellg(); + input = getline(in); gobble(in); + if (input.length() != 0) { + long pos = in.tellg(); + if (pos != -1) { positions.push_back(pos - input.length() - 1); } + else { positions.push_back(lastpos); } + } + } + in.close(); + + num = positions.size(); + + FILE * pFile; + long size; + + //get num bytes in file + pFile = fopen (filename.c_str(),"rb"); + if (pFile==NULL) perror ("Error opening file"); + else{ + fseek (pFile, 0, SEEK_END); + size=ftell (pFile); + fclose (pFile); + } + + positions.push_back(size); + + return positions; +} + +/**************************************************************************************************/ #endif