10 * Created by Sarah Westcott on 2/19/09.
11 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
15 /* This file contains all the standard incudes we use in the project as well as some common utilities. */
50 /***********************************************************************/
52 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
57 #include <readline/readline.h>
58 #include <readline/history.h>
61 //#include <readline/readline.h>
62 //#include <readline/history.h>
64 #include <conio.h> //allows unbuffered screen capture from stdin
65 #include <direct.h> //get cwd
70 #define exp(x) (exp((double) x))
71 #define sqrt(x) (sqrt((double) x))
72 #define log10(x) (log10((double) x))
73 #define log2(x) (log10(x)/log10(2))
74 #define isnan(x) ((x) != (x))
75 #define isinf(x) (fabs(x) == std::numeric_limits<double>::infinity())
77 typedef unsigned long ull;
87 IntNode(int lv, int rv, IntNode* l, IntNode* r) : lvalue(lv), rvalue(rv), left(l), right(r) {};
97 /************************************************************/
101 int smallChild; //used to make linkTable work with list and rabund. represents bin number of this cluster node
102 clusterNode(int num, int par, int kid) : numSeq(num), parent(par), smallChild(kid) {};
104 /************************************************************/
110 seqDist(int s1, int s2, float d) : seq1(s1), seq2(s2), dist(d) {}
113 //********************************************************************************************************************
114 //sorts lowest to highest
115 inline bool compareSequenceDistance(seqDist left, seqDist right){
116 return (left.dist < right.dist);
118 /***********************************************************************/
120 // snagged from http://www.parashift.com/c++-faq-lite/misc-technical-issues.html#faq-39.2
121 // works for now, but there should be a way to do it without killing the whole program
123 class BadConversion : public runtime_error {
125 BadConversion(const string& s) : runtime_error(s){ }
128 //**********************************************************************************************************************
131 inline void convert(const string& s, T& x, bool failIfLeftoverChars = true){
134 if (!(i >> x) || (failIfLeftoverChars && i.get(c)))
135 throw BadConversion(s);
138 //**********************************************************************************************************************
141 inline bool convertTestFloat(const string& s, T& x, bool failIfLeftoverChars = true){
144 if (!(i >> x) || (failIfLeftoverChars && i.get(c)))
151 //**********************************************************************************************************************
154 inline bool convertTest(const string& s, T& x, bool failIfLeftoverChars = true){
157 if (!(i >> x) || (failIfLeftoverChars && i.get(c)))
159 cout << "unable to be converted into an integer.\n" << endl;
165 //**********************************************************************************************************************
168 string toString(const T&x){
174 //**********************************************************************************************************************
177 string toHex(const T&x){
184 //**********************************************************************************************************************
187 string toString(const T&x, int i){
191 output << fixed << x;
195 /***********************************************************************/
197 inline int openOutputFileAppend(string fileName, ofstream& fileHandle){
199 fileHandle.open(fileName.c_str(), ios::app);
201 cout << "Error: Could not open " << fileName << endl;
209 /***********************************************************************/
211 inline void gobble(istream& f){
214 while(isspace(d=f.get())) {;}
218 /***********************************************************************/
220 inline string getline(ifstream& fileHandle) {
225 while (!fileHandle.eof()) {
227 char c = fileHandle.get();
229 //are you at the end of the line
230 if ((c == '\n') || (c == '\r') || (c == '\f')){ break; }
237 catch(exception& e) {
238 cout << "Error in mothur function getline" << endl;
243 /**************************************************************************************************/
245 inline void mothurOut(string message) {
248 string logFileName = "mothur.logFile";
249 openOutputFileAppend(logFileName, out);
256 catch(exception& e) {
257 cout << "Error in mothur class mothurOut" << endl;
261 /**************************************************************************************************/
263 inline void mothurOut(string message, string precision) {
266 string logFileName = "mothur.logFile";
267 openOutputFileAppend(logFileName, out);
269 cout << precision << message;
270 out << precision << message;
274 catch(exception& e) {
275 cout << "Error in mothur class mothurOut" << endl;
280 /**************************************************************************************************/
282 inline void mothurOutEndLine() {
285 string logFileName = "mothur.logFile";
286 openOutputFileAppend(logFileName, out);
293 catch(exception& e) {
294 cout << "error in mothur mothurOutEndLine" << endl;
300 /**************************************************************************************************/
302 inline void errorOut(exception& e, string object, string function) {
304 mothurOut("Error: ");
305 mothurOut(toString(e.what()));
306 mothurOut(" has occurred in the " + object + " class function " + function + ". Please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry.");
311 /***********************************************************************/
313 inline bool isTrue(string f){
315 if ((f == "TRUE") || (f == "T") || (f == "true") || (f == "t")) { return true; }
316 else { return false; }
319 /***********************************************************************/
321 inline float roundDist(float dist, int precision){
323 return int(dist * precision + 0.5)/float(precision);
327 /***********************************************************************/
329 inline int getNumNames(string names){
335 for(int i=0;i<names.size();i++){
346 /**************************************************************************************************/
348 inline vector<vector<double> > binomial(int maxOrder){
350 vector<vector<double> > binomial(maxOrder+1);
352 for(int i=0;i<=maxOrder;i++){
353 binomial[i].resize(maxOrder+1);
362 for(int i=2;i<=maxOrder;i++){
366 for(int i=2;i<=maxOrder;i++){
367 for(int j=1;j<=maxOrder;j++){
368 if(i==j){ binomial[i][j]=1; }
369 if(j>i) { binomial[i][j]=0; }
370 else { binomial[i][j]=binomial[i-1][j-1]+binomial[i-1][j]; }
377 /***********************************************************************/
379 inline string getRootName(string longName){
381 string rootName = longName;
383 if(longName.find_last_of(".") != longName.npos){
384 int pos = longName.find_last_of('.')+1;
385 rootName = longName.substr(0, pos);
390 /***********************************************************************/
392 inline string getSimpleName(string longName){
394 string simpleName = longName;
397 found=longName.find_last_of("/\\");
399 if(found != longName.npos){
400 simpleName = longName.substr(found+1);
403 //if(longName.find_last_of("/") != longName.npos){
404 // int pos = longName.find_last_of('/')+1;
405 // simpleName = longName.substr(pos, longName.length());
411 /***********************************************************************/
413 inline int factorial(int num){
416 for (int i = 1; i <= num; i++) {
422 /**************************************************************************************************
424 double min(double x, double y)
430 /***********************************************************************/
432 inline string getPathName(string longName){
434 string rootPathName = longName;
436 if(longName.find_last_of('/') != longName.npos){
437 int pos = longName.find_last_of('/')+1;
438 rootPathName = longName.substr(0, pos);
443 /***********************************************************************/
445 inline string hasPath(string longName){
450 found=longName.find_last_of("/\\");
452 if(found != longName.npos){
453 path = longName.substr(0, found+1);
459 /***********************************************************************/
461 inline string getExtension(string longName){
463 string extension = longName;
465 if(longName.find_last_of('.') != longName.npos){
466 int pos = longName.find_last_of('.');
467 extension = longName.substr(pos, longName.length());
472 /***********************************************************************/
473 inline bool isBlank(string fileName){
476 fileHandle.open(fileName.c_str());
478 mothurOut("Error: Could not open " + fileName); mothurOutEndLine();
481 //check for blank file
483 if (fileHandle.eof()) { fileHandle.close(); return true; }
487 /***********************************************************************/
489 inline string getFullPathName(string fileName){
491 string path = hasPath(fileName);
495 if (path == "") { return fileName; } //its a simple name
496 else { //we need to complete the pathname
497 // ex. ../../../filename
498 // cwd = /user/work/desktop
501 //get current working directory
502 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
503 if (path.rfind("./") == -1) { return fileName; } //already complete name
504 else { newFileName = fileName.substr(fileName.rfind("./")+2); } //save the complete part of the name
508 cwdpath=getcwd(cwdpath,size);
513 if (cwd.length() > 0) { simpleCWD = cwd.substr(1); }
515 //break apart the current working directory
517 while (simpleCWD.find_first_of('/') != -1) {
518 string dir = simpleCWD.substr(0,simpleCWD.find_first_of('/'));
519 simpleCWD = simpleCWD.substr(simpleCWD.find_first_of('/')+1, simpleCWD.length());
522 //get last one // ex. ../../../filename = /user/work/desktop/filename
523 dirs.push_back(simpleCWD); //ex. dirs[0] = user, dirs[1] = work, dirs[2] = desktop
525 int index = dirs.size()-1;
527 while((pos = path.rfind("./")) != -1) { //while you don't have a complete path
528 if (path[(pos-1)] == '.') { //you want your parent directory ../
529 path = path.substr(0, pos-1);
531 if (index == 0) { break; }
532 }else if (path[(pos-1)] == '/') { //you want the current working dir ./
533 path = path.substr(0, pos);
534 }else if (pos == 1) { break;
535 }else { mothurOut("cannot resolve path for " + fileName); mothurOutEndLine(); return fileName; }
538 for (int i = index; i >= 0; i--) {
539 newFileName = dirs[i] + "/" + newFileName;
542 newFileName = "/" + newFileName;
546 if (path.rfind(".\\") == -1) { return fileName; } //already complete name
547 else { newFileName = fileName.substr(fileName.rfind(".\\")+2); } //save the complete part of the name
549 char *cwdpath = NULL;
550 cwdpath = getcwd(NULL, 0); // or _getcwd
551 if ( cwdpath != NULL) { cwd = cwdpath; }
554 //break apart the current working directory
556 while (cwd.find_first_of('\\') != -1) {
557 string dir = cwd.substr(0,cwd.find_first_of('\\'));
558 cwd = cwd.substr(cwd.find_first_of('\\')+1, cwd.length());
563 dirs.push_back(cwd); //ex. dirs[0] = user, dirs[1] = work, dirs[2] = desktop
565 int index = dirs.size()-1;
567 while((pos = path.rfind(".\\")) != -1) { //while you don't have a complete path
568 if (path[(pos-1)] == '.') { //you want your parent directory ../
569 path = path.substr(0, pos-1);
571 if (index == 0) { break; }
572 }else if (path[(pos-1)] == '\\') { //you want the current working dir ./
573 path = path.substr(0, pos);
574 }else if (pos == 1) { break;
575 }else { mothurOut("cannot resolve path for " + fileName); mothurOutEndLine(); return fileName; }
578 for (int i = index; i >= 0; i--) {
579 newFileName = dirs[i] + "\\" + newFileName;
588 /***********************************************************************/
590 inline int openInputFile(string fileName, ifstream& fileHandle, string m){
593 string completeFileName = getFullPathName(fileName);
594 //string completeFileName = fileName;
596 fileHandle.open(completeFileName.c_str());
598 mothurOut("Error: Could not open " + completeFileName); mothurOutEndLine();
602 //check for blank file
608 /***********************************************************************/
610 inline int openInputFile(string fileName, ifstream& fileHandle){
612 string completeFileName = getFullPathName(fileName);
614 fileHandle.open(completeFileName.c_str());
616 mothurOut("Error: Could not open " + completeFileName); mothurOutEndLine();
620 //check for blank file
622 if (fileHandle.eof()) { mothurOut(completeFileName + " is blank. Please correct."); mothurOutEndLine(); return 1; }
629 /***********************************************************************/
631 inline int openOutputFile(string fileName, ofstream& fileHandle){
633 string completeFileName = getFullPathName(fileName);
635 fileHandle.open(completeFileName.c_str(), ios::trunc);
637 mothurOut("Error: Could not open " + completeFileName); mothurOutEndLine();
646 /***********************************************************************/
648 inline int getNumSeqs(ifstream& file){
650 int numSeqs = count(istreambuf_iterator<char>(file),istreambuf_iterator<char>(), '>');
655 /***********************************************************************/
657 inline bool inVector(string member, vector<string> group){
659 for (int i = 0; i < group.size(); i++) {
660 if (group[i] == member) { return true; }
665 /***********************************************************************/
667 //This function parses the estimator options and puts them in a vector
668 inline void splitAtDash(string& estim, vector<string>& container) {
672 while (estim.find_first_of('-') != -1) {
673 individual = estim.substr(0,estim.find_first_of('-'));
674 if ((estim.find_first_of('-')+1) <= estim.length()) { //checks to make sure you don't have dash at end of string
675 estim = estim.substr(estim.find_first_of('-')+1, estim.length());
676 container.push_back(individual);
680 container.push_back(estim);
682 catch(exception& e) {
683 errorOut(e, "mothur", "splitAtDash");
688 /***********************************************************************/
689 //This function parses the label options and puts them in a set
690 inline void splitAtDash(string& estim, set<string>& container) {
694 while (estim.find_first_of('-') != -1) {
695 individual = estim.substr(0,estim.find_first_of('-'));
696 if ((estim.find_first_of('-')+1) <= estim.length()) { //checks to make sure you don't have dash at end of string
697 estim = estim.substr(estim.find_first_of('-')+1, estim.length());
698 container.insert(individual);
702 container.insert(estim);
704 catch(exception& e) {
705 errorOut(e, "mothur", "splitAtDash");
709 /***********************************************************************/
710 //This function parses the line options and puts them in a set
711 inline void splitAtDash(string& estim, set<int>& container) {
716 while (estim.find_first_of('-') != -1) {
717 individual = estim.substr(0,estim.find_first_of('-'));
718 if ((estim.find_first_of('-')+1) <= estim.length()) { //checks to make sure you don't have dash at end of string
719 estim = estim.substr(estim.find_first_of('-')+1, estim.length());
720 convert(individual, lineNum); //convert the string to int
721 container.insert(lineNum);
725 convert(estim, lineNum); //convert the string to int
726 container.insert(lineNum);
728 catch(exception& e) {
729 errorOut(e, "mothur", "splitAtDash");
733 /***********************************************************************/
734 //This function parses the a string and puts peices in a vector
735 inline void splitAtComma(string& estim, vector<string>& container) {
739 while (estim.find_first_of(',') != -1) {
740 individual = estim.substr(0,estim.find_first_of(','));
741 if ((estim.find_first_of(',')+1) <= estim.length()) { //checks to make sure you don't have comma at end of string
742 estim = estim.substr(estim.find_first_of(',')+1, estim.length());
743 container.push_back(individual);
747 container.push_back(estim);
749 catch(exception& e) {
750 errorOut(e, "mothur", "splitAtComma");
754 /***********************************************************************/
756 //This function splits up the various option parameters
757 inline void splitAtComma(string& prefix, string& suffix){
759 prefix = suffix.substr(0,suffix.find_first_of(','));
760 if ((suffix.find_first_of(',')+2) <= suffix.length()) { //checks to make sure you don't have comma at end of string
761 suffix = suffix.substr(suffix.find_first_of(',')+1, suffix.length());
763 while(suffix.at(0) == ' ')
764 suffix = suffix.substr(1, suffix.length());
768 catch(exception& e) {
769 errorOut(e, "mothur", "splitAtComma");
773 /***********************************************************************/
775 //This function separates the key value from the option value i.e. dist=96_...
776 inline void splitAtEquals(string& key, string& value){
778 if(value.find_first_of('=') != -1){
779 key = value.substr(0,value.find_first_of('='));
780 if ((value.find_first_of('=')+1) <= value.length()) {
781 value = value.substr(value.find_first_of('=')+1, value.length());
788 catch(exception& e) {
789 errorOut(e, "mothur", "splitAtEquals");
793 /**************************************************************************************************/
795 inline bool inUsersGroups(string groupname, vector<string> Groups) {
797 for (int i = 0; i < Groups.size(); i++) {
798 if (groupname == Groups[i]) { return true; }
802 catch(exception& e) {
803 errorOut(e, "mothur", "inUsersGroups");
808 /**************************************************************************************************/
810 inline void mothurOutJustToLog(string message) {
813 string logFileName = "mothur.logFile";
814 openOutputFileAppend(logFileName, out);
820 catch(exception& e) {
821 errorOut(e, "mothur", "mothurOutJustToLog");
827 /**************************************************************************************************/
829 inline void mothurOut(float num) {
832 string logFileName = "mothur.logFile";
833 openOutputFileAppend(logFileName, out);
840 catch(exception& e) {
841 cout << "Error in mothur class mothurOut float" << endl;
845 /***********************************************************************/
846 inline void mothurOut(double value) {
849 string logFileName = "mothur.logFile";
850 openOutputFileAppend(logFileName, out);
857 catch(exception& e) {
858 cout << "Error in mothur class mothurOut double" << endl;
863 /***********************************************************************/
864 //this function determines if the user has given us labels that are smaller than the given label.
865 //if so then it returns true so that the calling function can run the previous valid distance.
866 //it's a "smart" distance function. It also checks for invalid labels.
867 inline bool anyLabelsToProcess(string label, set<string>& userLabels, string errorOff) {
869 set<string>::iterator it;
870 vector<float> orderFloat;
871 map<string, float> userMap; //the conversion process removes trailing 0's which we need to put back
872 map<string, float>::iterator it2;
874 bool smaller = false;
876 //unique is the smallest line
877 if (label == "unique") { return false; }
878 else { convert(label, labelFloat); }
880 //go through users set and make them floats
881 for(it = userLabels.begin(); it != userLabels.end(); ++it) {
884 if ((*it != "unique") && (convertTestFloat(*it, temp) == true)){
886 orderFloat.push_back(temp);
888 }else if (*it == "unique") {
889 orderFloat.push_back(-1.0);
890 userMap["unique"] = -1.0;
892 if (errorOff == "") { mothurOut(*it + " is not a valid label."); mothurOutEndLine(); }
893 userLabels.erase(*it);
899 sort(orderFloat.begin(), orderFloat.end());
901 /*************************************************/
902 //is this label bigger than any of the users labels
903 /*************************************************/
905 //loop through order until you find a label greater than label
906 for (int i = 0; i < orderFloat.size(); i++) {
907 if (orderFloat[i] < labelFloat) {
909 if (orderFloat[i] == -1) {
910 if (errorOff == "") { mothurOut("Your file does not include the label unique."); mothurOutEndLine(); }
911 userLabels.erase("unique");
914 if (errorOff == "") { mothurOut("Your file does not include the label "); mothurOutEndLine(); }
916 for (it2 = userMap.begin(); it2!= userMap.end(); it2++) {
917 if (it2->second == orderFloat[i]) {
919 //remove small labels
924 if (errorOff == "") { mothurOut(s + ". I will use the next smallest distance. "); mothurOutEndLine(); }
926 //since they are sorted once you find a bigger one stop looking
933 catch(exception& e) {
934 errorOut(e, "mothur", "anyLabelsToProcess");
939 /**************************************************************************************************/
940 inline void appendFiles(string temp, string filename) {
945 //open output file in append mode
946 openOutputFileAppend(filename, output);
947 openInputFile(temp, input);
949 while(char c = input.get()){
950 if(input.eof()) { break; }
951 else { output << c; }
957 catch(exception& e) {
958 errorOut(e, "mothur", "appendFiles");
963 /**************************************************************************************************/
964 inline string sortFile(string distFile){
966 string outfile = getRootName(distFile) + "sorted.dist";
968 //if you can, use the unix sort since its been optimized for years
969 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
970 string command = "sort -n -k +3 " + distFile + " -o " + outfile;
971 system(command.c_str());
972 #else //you are stuck with my best attempt...
973 //windows sort does not have a way to specify a column, only a character in the line
974 //since we cannot assume that the distance will always be at the the same character location on each line
975 //due to variable sequence name lengths, I chose to force the distance into first position, then sort and then put it back.
977 //read in file line by file and put distance first
978 string tempDistFile = distFile + ".temp";
981 openInputFile(distFile, input);
982 openOutputFile(tempDistFile, output);
984 string firstName, secondName;
987 input >> firstName >> secondName >> dist;
988 output << dist << '\t' << firstName << '\t' << secondName << endl;
995 //sort using windows sort
996 string tempOutfile = outfile + ".temp";
997 string command = "sort " + tempDistFile + " /O " + tempOutfile;
998 system(command.c_str());
1000 //read in sorted file and put distance at end again
1002 openInputFile(tempOutfile, input2);
1003 openOutputFile(outfile, output);
1006 input2 >> dist >> firstName >> secondName;
1007 output << firstName << '\t' << secondName << '\t' << dist << endl;
1014 remove(tempDistFile.c_str());
1015 remove(tempOutfile.c_str());
1020 catch(exception& e) {
1021 errorOut(e, "mothur", "sortFile");
1025 /**************************************************************************************************/