X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=mothur.h;h=a7946fde756a68a5eeeabec8aa63e72ec53d1df7;hb=cd985cf388dcc4c7de8251339206aec5f7e12f1e;hp=23b9f189c992f97e2e9a47823cacce62db363696;hpb=f5023c911c377e5320c5110c78af98dd8841ef58;p=mothur.git diff --git a/mothur.h b/mothur.h index 23b9f18..a7946fd 100644 --- a/mothur.h +++ b/mothur.h @@ -21,6 +21,8 @@ #include #include #include +#include + //exception #include @@ -45,10 +47,23 @@ #include #include +#ifdef USE_MPI + #include "mpi.h" +#endif +/***********************************************************************/ #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) #include #include + + #ifdef USE_READLINE + #include + #include + #endif + +#else + #include //allows unbuffered screen capture from stdin + #include //get cwd #endif using namespace std; @@ -60,7 +75,6 @@ using namespace std; #define isnan(x) ((x) != (x)) #define isinf(x) (fabs(x) == std::numeric_limits::infinity()) - typedef unsigned long ull; struct IntNode { @@ -70,6 +84,9 @@ struct IntNode { int rcoef; IntNode* left; IntNode* right; + + IntNode(int lv, int rv, IntNode* l, IntNode* r) : lvalue(lv), rvalue(rv), left(l), right(r) {}; + IntNode() {}; }; struct ThreadNode { @@ -85,7 +102,20 @@ struct clusterNode { int smallChild; //used to make linkTable work with list and rabund. represents bin number of this cluster node clusterNode(int num, int par, int kid) : numSeq(num), parent(par), smallChild(kid) {}; }; - +/************************************************************/ +struct seqDist { + int seq1; + int seq2; + float dist; + seqDist() {} + seqDist(int s1, int s2, float d) : seq1(s1), seq2(s2), dist(d) {} + ~seqDist() {} +}; +//******************************************************************************************************************** +//sorts lowest to highest +inline bool compareSequenceDistance(seqDist left, seqDist right){ + return (left.dist < right.dist); +} /***********************************************************************/ // snagged from http://www.parashift.com/c++-faq-lite/misc-technical-issues.html#faq-39.2 @@ -164,12 +194,11 @@ string toString(const T&x, int i){ return output.str(); } /***********************************************************************/ - inline int openOutputFileAppend(string fileName, ofstream& fileHandle){ fileHandle.open(fileName.c_str(), ios::app); if(!fileHandle) { - cout << "Error: Could not open " << fileName << endl; + cout << "Error: Could not open " << fileName << endl; return 1; } else { @@ -186,7 +215,6 @@ inline void gobble(istream& f){ f.putback(d); } - /***********************************************************************/ inline string getline(ifstream& fileHandle) { @@ -212,77 +240,6 @@ inline string getline(ifstream& fileHandle) { } } -/**************************************************************************************************/ - -inline void mothurOut(string message) { - try{ - ofstream out; - string logFileName = "mothur.logFile"; - openOutputFileAppend(logFileName, out); - - cout << message; - out << message; - - out.close(); - } - catch(exception& e) { - cout << "Error in mothur class mothurOut" << endl; - exit(1); - } -} -/**************************************************************************************************/ - -inline void mothurOut(string message, string precision) { - try{ - ofstream out; - string logFileName = "mothur.logFile"; - openOutputFileAppend(logFileName, out); - - cout << precision << message; - out << precision << message; - - out.close(); - } - catch(exception& e) { - cout << "Error in mothur class mothurOut" << endl; - exit(1); - } -} - -/**************************************************************************************************/ - -inline void mothurOutEndLine() { - try { - ofstream out; - string logFileName = "mothur.logFile"; - openOutputFileAppend(logFileName, out); - - cout << endl; - out << endl; - - out.close(); - } - catch(exception& e) { - cout << "error in mothur mothurOutEndLine" << endl; - exit(1); - } -} - - -/**************************************************************************************************/ - -inline void errorOut(exception& e, string object, string function) { - - mothurOut("Error: "); - mothurOut(toString(e.what())); - mothurOut(" has occurred in the " + object + " class function " + function + ". Please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry."); - mothurOutEndLine(); - -} - - - - /***********************************************************************/ inline bool isTrue(string f){ @@ -297,6 +254,13 @@ inline float roundDist(float dist, int precision){ return int(dist * precision + 0.5)/float(precision); +} +/***********************************************************************/ + +inline float ceilDist(float dist, int precision){ + + return int(ceil(dist * precision))/float(precision); + } /***********************************************************************/ @@ -368,11 +332,18 @@ inline string getSimpleName(string longName){ string simpleName = longName; - if(longName.find_last_of("/") != longName.npos){ - int pos = longName.find_last_of('/')+1; - simpleName = longName.substr(pos, longName.length()); - } + size_t found; + found=longName.find_last_of("/\\"); + if(found != longName.npos){ + simpleName = longName.substr(found+1); + } + + //if(longName.find_last_of("/") != longName.npos){ + // int pos = longName.find_last_of('/')+1; + // simpleName = longName.substr(pos, longName.length()); + //} + return simpleName; } @@ -401,13 +372,28 @@ inline string getPathName(string longName){ string rootPathName = longName; - if(longName.find_last_of('/') != longName.npos){ - int pos = longName.find_last_of('/')+1; + if(longName.find_last_of("/\\") != longName.npos){ + int pos = longName.find_last_of("/\\")+1; rootPathName = longName.substr(0, pos); } - + return rootPathName; } +/***********************************************************************/ + +inline string hasPath(string longName){ + + string path = ""; + + size_t found; + found=longName.find_last_of("~/\\"); + + if(found != longName.npos){ + path = longName.substr(0, found+1); + } + + return path; +} /***********************************************************************/ @@ -422,33 +408,213 @@ inline string getExtension(string longName){ return extension; } +/***********************************************************************/ +inline bool isBlank(string fileName){ + + ifstream fileHandle; + fileHandle.open(fileName.c_str()); + if(!fileHandle) { + cout << "Error: Could not open " << fileName << endl; + return false; + }else { + //check for blank file + gobble(fileHandle); + if (fileHandle.eof()) { fileHandle.close(); return true; } + } + return false; +} +/***********************************************************************/ +inline string getFullPathName(string fileName){ + try{ + + string path = hasPath(fileName); + string newFileName; + int pos; + + if (path == "") { return fileName; } //its a simple name + else { //we need to complete the pathname + // ex. ../../../filename + // cwd = /user/work/desktop + + string cwd; + //get current working directory + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + + if (path.find("~") != -1) { //go to home directory + string homeDir = getenv ("HOME"); + newFileName = homeDir + fileName.substr(fileName.find("~")+1); + return newFileName; + }else { //find path + if (path.rfind("./") == -1) { return fileName; } //already complete name + else { newFileName = fileName.substr(fileName.rfind("./")+2); } //save the complete part of the name + + char* cwdpath = new char[1024]; + + size_t size; + cwdpath=getcwd(cwdpath,size); + + cwd = cwdpath; + + //rip off first '/' + string simpleCWD; + if (cwd.length() > 0) { simpleCWD = cwd.substr(1); } + + //break apart the current working directory + vector dirs; + while (simpleCWD.find_first_of('/') != -1) { + string dir = simpleCWD.substr(0,simpleCWD.find_first_of('/')); + simpleCWD = simpleCWD.substr(simpleCWD.find_first_of('/')+1, simpleCWD.length()); + dirs.push_back(dir); + } + //get last one // ex. ../../../filename = /user/work/desktop/filename + dirs.push_back(simpleCWD); //ex. dirs[0] = user, dirs[1] = work, dirs[2] = desktop + + + int index = dirs.size()-1; + + while((pos = path.rfind("./")) != -1) { //while you don't have a complete path + if (pos == 0) { break; //you are at the end + }else if (path[(pos-1)] == '.') { //you want your parent directory ../ + path = path.substr(0, pos-1); + index--; + if (index == 0) { break; } + }else if (path[(pos-1)] == '/') { //you want the current working dir ./ + path = path.substr(0, pos); + }else if (pos == 1) { break; //you are at the end + }else { cout << "cannot resolve path for " << fileName << endl; return fileName; } + } + + for (int i = index; i >= 0; i--) { + newFileName = dirs[i] + "/" + newFileName; + } + + newFileName = "/" + newFileName; + return newFileName; + } + #else + if (path.find("~") != -1) { //go to home directory + string homeDir = getenv ("HOMEPATH"); + newFileName = homeDir + fileName.substr(fileName.find("~")+1); + return newFileName; + }else { //find path + if (path.rfind(".\\") == -1) { return fileName; } //already complete name + else { newFileName = fileName.substr(fileName.rfind(".\\")+2); } //save the complete part of the name + + char *cwdpath = NULL; + cwdpath = getcwd(NULL, 0); // or _getcwd + if ( cwdpath != NULL) { cwd = cwdpath; } + else { cwd = ""; } + + //break apart the current working directory + vector dirs; + while (cwd.find_first_of('\\') != -1) { + string dir = cwd.substr(0,cwd.find_first_of('\\')); + cwd = cwd.substr(cwd.find_first_of('\\')+1, cwd.length()); + dirs.push_back(dir); + + } + //get last one + dirs.push_back(cwd); //ex. dirs[0] = user, dirs[1] = work, dirs[2] = desktop + + int index = dirs.size()-1; + + while((pos = path.rfind(".\\")) != -1) { //while you don't have a complete path + if (pos == 0) { break; //you are at the end + }else if (path[(pos-1)] == '.') { //you want your parent directory ../ + path = path.substr(0, pos-1); + index--; + if (index == 0) { break; } + }else if (path[(pos-1)] == '\\') { //you want the current working dir ./ + path = path.substr(0, pos); + }else if (pos == 1) { break; //you are at the end + }else { cout << "cannot resolve path for " << fileName << endl; return fileName; } + } + + for (int i = index; i >= 0; i--) { + newFileName = dirs[i] + "\\" + newFileName; + } + + return newFileName; + } + + #endif + } + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function getFullPathName. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n"; + exit(1); + } +} +/***********************************************************************/ + +inline int openInputFile(string fileName, ifstream& fileHandle, string m){ + + //get full path name + string completeFileName = getFullPathName(fileName); + + fileHandle.open(completeFileName.c_str()); + if(!fileHandle) { + return 1; + }else { + //check for blank file + gobble(fileHandle); + return 0; + } +} /***********************************************************************/ inline int openInputFile(string fileName, ifstream& fileHandle){ + + //get full path name + string completeFileName = getFullPathName(fileName); - fileHandle.open(fileName.c_str()); + fileHandle.open(completeFileName.c_str()); if(!fileHandle) { - mothurOut("Error: Could not open " + fileName); mothurOutEndLine(); + cout << "Error: Could not open " << completeFileName << endl; return 1; } else { //check for blank file gobble(fileHandle); - if (fileHandle.eof()) { mothurOut(fileName + " is blank. Please correct."); mothurOutEndLine(); return 1; } + if (fileHandle.eof()) { cout << completeFileName << " is blank. Please correct." << endl; return 1; } return 0; } } +/***********************************************************************/ + +inline int renameFile(string oldName, string newName){ + + ifstream inTest; + int exist = openInputFile(newName, inTest, ""); + +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + if (exist == 0) { //you could open it so you want to delete it + inTest.close(); + string command = "rm " + newName; + system(command.c_str()); + } + + string command = "mv " + oldName + " " + newName; + system(command.c_str()); +#else + remove(newName.c_str()); + int renameOk = rename(oldName.c_str(), newName.c_str()); +#endif + return 0; +} /***********************************************************************/ inline int openOutputFile(string fileName, ofstream& fileHandle){ - fileHandle.open(fileName.c_str(), ios::trunc); + string completeFileName = getFullPathName(fileName); + + fileHandle.open(completeFileName.c_str(), ios::trunc); if(!fileHandle) { - mothurOut("Error: Could not open " + fileName); mothurOutEndLine(); + cout << "Error: Could not open " << completeFileName << endl; return 1; } else { @@ -466,9 +632,31 @@ inline int getNumSeqs(ifstream& file){ return numSeqs; } +/***********************************************************************/ +inline void getNumSeqs(ifstream& file, int& numSeqs){ + + string input; + numSeqs = 0; + while(!file.eof()){ + input = getline(file); + if (input.length() != 0) { + if(input[0] == '>'){ numSeqs++; } + } + } +} /***********************************************************************/ +inline bool inVector(string member, vector group){ + + for (int i = 0; i < group.size(); i++) { + if (group[i] == member) { return true; } + } + + return false; +} +/***********************************************************************/ + //This function parses the estimator options and puts them in a vector inline void splitAtDash(string& estim, vector& container) { try { @@ -485,9 +673,9 @@ inline void splitAtDash(string& estim, vector& container) { container.push_back(estim); } catch(exception& e) { - errorOut(e, "mothur", "splitAtDash"); + cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function splitAtDash. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n"; exit(1); - } + } } /***********************************************************************/ @@ -507,9 +695,9 @@ inline void splitAtDash(string& estim, set& container) { container.insert(estim); } catch(exception& e) { - errorOut(e, "mothur", "splitAtDash"); + cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function splitAtDash. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n"; exit(1); - } + } } /***********************************************************************/ //This function parses the line options and puts them in a set @@ -531,9 +719,9 @@ inline void splitAtDash(string& estim, set& container) { container.insert(lineNum); } catch(exception& e) { - errorOut(e, "mothur", "splitAtDash"); + cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function splitAtDash. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n"; exit(1); - } + } } /***********************************************************************/ //This function parses the a string and puts peices in a vector @@ -552,9 +740,9 @@ inline void splitAtComma(string& estim, vector& container) { container.push_back(estim); } catch(exception& e) { - errorOut(e, "mothur", "splitAtComma"); + cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function splitAtComma. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n"; exit(1); - } + } } /***********************************************************************/ @@ -571,9 +759,9 @@ inline void splitAtComma(string& prefix, string& suffix){ } catch(exception& e) { - errorOut(e, "mothur", "splitAtComma"); + cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function splitAtComma. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n"; exit(1); - } + } } /***********************************************************************/ @@ -591,9 +779,9 @@ inline void splitAtEquals(string& key, string& value){ } } catch(exception& e) { - errorOut(e, "mothur", "splitAtEquals"); + cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function splitAtEquals. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n"; exit(1); - } + } } /**************************************************************************************************/ @@ -605,72 +793,32 @@ inline bool inUsersGroups(string groupname, vector Groups) { return false; } catch(exception& e) { - errorOut(e, "mothur", "inUsersGroups"); + cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function inUsersGroups. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n"; exit(1); - } -} - -/**************************************************************************************************/ - -inline void mothurOutJustToLog(string message) { - try { - ofstream out; - string logFileName = "mothur.logFile"; - openOutputFileAppend(logFileName, out); - - out << message; - - out.close(); - } - catch(exception& e) { - errorOut(e, "mothur", "mothurOutJustToLog"); - exit(1); - } + } } - - /**************************************************************************************************/ - -inline void mothurOut(float num) { +//returns true if any of the strings in first vector are in second vector +inline bool inUsersGroups(vector groupnames, vector Groups) { try { - ofstream out; - string logFileName = "mothur.logFile"; - openOutputFileAppend(logFileName, out); - - cout << num; - out << num; - out.close(); + for (int i = 0; i < groupnames.size(); i++) { + if (inUsersGroups(groupnames[i], Groups)) { return true; } + } + return false; } catch(exception& e) { - cout << "Error in mothur class mothurOut float" << endl; + cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function inUsersGroups. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n"; exit(1); - } + } } -/***********************************************************************/ -inline void mothurOut(double value) { - try { - ofstream out; - string logFileName = "mothur.logFile"; - openOutputFileAppend(logFileName, out); - - cout << value; - out << value; - - out.close(); - } - catch(exception& e) { - cout << "Error in mothur class mothurOut double" << endl; - exit(1); - } -} - /***********************************************************************/ //this function determines if the user has given us labels that are smaller than the given label. //if so then it returns true so that the calling function can run the previous valid distance. //it's a "smart" distance function. It also checks for invalid labels. inline bool anyLabelsToProcess(string label, set& userLabels, string errorOff) { try { + set::iterator it; vector orderFloat; map userMap; //the conversion process removes trailing 0's which we need to put back @@ -694,7 +842,7 @@ inline bool anyLabelsToProcess(string label, set& userLabels, string err orderFloat.push_back(-1.0); userMap["unique"] = -1.0; }else { - if (errorOff == "") { mothurOut(*it + " is not a valid label."); mothurOutEndLine(); } + if (errorOff == "") { cout << *it << " is not a valid label." << endl; } userLabels.erase(*it); it--; } @@ -712,11 +860,11 @@ inline bool anyLabelsToProcess(string label, set& userLabels, string err if (orderFloat[i] < labelFloat) { smaller = true; if (orderFloat[i] == -1) { - if (errorOff == "") { mothurOut("Your file does not include the label unique."); mothurOutEndLine(); } + if (errorOff == "") { cout << "Your file does not include the label unique." << endl; } userLabels.erase("unique"); } else { - if (errorOff == "") { mothurOut("Your file does not include the label "); mothurOutEndLine(); } + if (errorOff == "") { cout << "Your file does not include the label " << endl; } string s = ""; for (it2 = userMap.begin(); it2!= userMap.end(); it2++) { if (it2->second == orderFloat[i]) { @@ -726,7 +874,7 @@ inline bool anyLabelsToProcess(string label, set& userLabels, string err break; } } - if (errorOff == "") { mothurOut(s + ". I will use the next smallest distance. "); mothurOutEndLine(); } + if (errorOff == "") {cout << s << ". I will use the next smallest distance. " << endl; } } //since they are sorted once you find a bigger one stop looking }else { break; } @@ -736,9 +884,184 @@ inline bool anyLabelsToProcess(string label, set& userLabels, string err } catch(exception& e) { - errorOut(e, "mothur", "anyLabelsToProcess"); + cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function anyLabelsToProcess. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n"; + exit(1); + } +} + +/**************************************************************************************************/ +inline void appendFiles(string temp, string filename) { + try{ + ofstream output; + ifstream input; + + //open output file in append mode + openOutputFileAppend(filename, output); + int ableToOpen = openInputFile(temp, input, "no error"); + + if (ableToOpen == 0) { //you opened it + while(char c = input.get()){ + if(input.eof()) { break; } + else { output << c; } + } + input.close(); + } + + output.close(); + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function appendFiles. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n"; exit(1); + } +} + +/**************************************************************************************************/ +inline string sortFile(string distFile, string outputDir){ + try { + + //if (outputDir == "") { outputDir += hasPath(distFile); } + string outfile = getRootName(distFile) + "sorted.dist"; + + + //if you can, use the unix sort since its been optimized for years + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + string command = "sort -n -k +3 " + distFile + " -o " + outfile; + system(command.c_str()); + #else //you are stuck with my best attempt... + //windows sort does not have a way to specify a column, only a character in the line + //since we cannot assume that the distance will always be at the the same character location on each line + //due to variable sequence name lengths, I chose to force the distance into first position, then sort and then put it back. + + //read in file line by file and put distance first + string tempDistFile = distFile + ".temp"; + ifstream input; + ofstream output; + openInputFile(distFile, input); + openOutputFile(tempDistFile, output); + + string firstName, secondName; + float dist; + while (input) { + input >> firstName >> secondName >> dist; + output << dist << '\t' << firstName << '\t' << secondName << endl; + gobble(input); + } + input.close(); + output.close(); + + + //sort using windows sort + string tempOutfile = outfile + ".temp"; + string command = "sort " + tempDistFile + " /O " + tempOutfile; + system(command.c_str()); + + //read in sorted file and put distance at end again + ifstream input2; + openInputFile(tempOutfile, input2); + openOutputFile(outfile, output); + + while (input2) { + input2 >> dist >> firstName >> secondName; + output << firstName << '\t' << secondName << '\t' << dist << endl; + gobble(input2); + } + input2.close(); + output.close(); + + //remove temp files + remove(tempDistFile.c_str()); + remove(tempOutfile.c_str()); + #endif + + return outfile; } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function sortfile. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n"; + exit(1); + } +} +/**************************************************************************************************/ +inline vector setFilePosFasta(string filename, int& num) { + + vector positions; + ifstream inFASTA; + openInputFile(filename, inFASTA); + + string input; + while(!inFASTA.eof()){ + input = getline(inFASTA); gobble(inFASTA); + if (input.length() != 0) { + if(input[0] == '>'){ long pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); } + } + } + inFASTA.close(); + + num = positions.size(); + + /*FILE * pFile; + long size; + + //get num bytes in file + pFile = fopen (filename.c_str(),"rb"); + if (pFile==NULL) perror ("Error opening file"); + else{ + fseek (pFile, 0, SEEK_END); + size=ftell (pFile); + fclose (pFile); + }*/ + + long size = positions[(positions.size()-1)]; + ifstream in; + openInputFile(filename, in); + + in.seekg(size); + + while(char c = in.get()){ + if(in.eof()) { break; } + else { size++; } + } + in.close(); + + positions.push_back(size); + + return positions; +} +/**************************************************************************************************/ +inline vector setFilePosEachLine(string filename, int& num) { + + vector positions; + ifstream in; + openInputFile(filename, in); + + string input; + while(!in.eof()){ + long lastpos = in.tellg(); + input = getline(in); gobble(in); + if (input.length() != 0) { + long pos = in.tellg(); + if (pos != -1) { positions.push_back(pos - input.length() - 1); } + else { positions.push_back(lastpos); } + } + } + in.close(); + + num = positions.size(); + + FILE * pFile; + long size; + + //get num bytes in file + pFile = fopen (filename.c_str(),"rb"); + if (pFile==NULL) perror ("Error opening file"); + else{ + fseek (pFile, 0, SEEK_END); + size=ftell (pFile); + fclose (pFile); + } + + positions.push_back(size); + + return positions; } /**************************************************************************************************/