10 * Created by Sarah Westcott on 2/19/09.
11 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
15 /* This file contains all the standard incudes we use in the project as well as some common utilities. */
50 /***********************************************************************/
52 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
57 #include <readline/readline.h>
58 #include <readline/history.h>
62 #include <conio.h> //allows unbuffered screen capture from stdin
63 #include <direct.h> //get cwd
68 #define exp(x) (exp((double) x))
69 #define sqrt(x) (sqrt((double) x))
70 #define log10(x) (log10((double) x))
71 #define log2(x) (log10(x)/log10(2))
72 #define isnan(x) ((x) != (x))
73 #define isinf(x) (fabs(x) == std::numeric_limits<double>::infinity())
75 typedef unsigned long ull;
85 IntNode(int lv, int rv, IntNode* l, IntNode* r) : lvalue(lv), rvalue(rv), left(l), right(r) {};
95 /************************************************************/
99 int smallChild; //used to make linkTable work with list and rabund. represents bin number of this cluster node
100 clusterNode(int num, int par, int kid) : numSeq(num), parent(par), smallChild(kid) {};
102 /************************************************************/
108 seqDist(int s1, int s2, float d) : seq1(s1), seq2(s2), dist(d) {}
111 //********************************************************************************************************************
112 //sorts lowest to highest
113 inline bool compareSequenceDistance(seqDist left, seqDist right){
114 return (left.dist < right.dist);
116 /***********************************************************************/
118 // snagged from http://www.parashift.com/c++-faq-lite/misc-technical-issues.html#faq-39.2
119 // works for now, but there should be a way to do it without killing the whole program
121 class BadConversion : public runtime_error {
123 BadConversion(const string& s) : runtime_error(s){ }
126 //**********************************************************************************************************************
129 inline void convert(const string& s, T& x, bool failIfLeftoverChars = true){
132 if (!(i >> x) || (failIfLeftoverChars && i.get(c)))
133 throw BadConversion(s);
136 //**********************************************************************************************************************
139 inline bool convertTestFloat(const string& s, T& x, bool failIfLeftoverChars = true){
142 if (!(i >> x) || (failIfLeftoverChars && i.get(c)))
149 //**********************************************************************************************************************
152 inline bool convertTest(const string& s, T& x, bool failIfLeftoverChars = true){
155 if (!(i >> x) || (failIfLeftoverChars && i.get(c)))
157 cout << "unable to be converted into an integer.\n" << endl;
163 //**********************************************************************************************************************
166 string toString(const T&x){
172 //**********************************************************************************************************************
175 string toHex(const T&x){
182 //**********************************************************************************************************************
185 string toString(const T&x, int i){
189 output << fixed << x;
193 /***********************************************************************/
194 inline int openOutputFileAppend(string fileName, ofstream& fileHandle){
196 fileHandle.open(fileName.c_str(), ios::app);
198 cout << "Error: Could not open " << fileName << endl;
206 /***********************************************************************/
208 inline void gobble(istream& f){
211 while(isspace(d=f.get())) {;}
215 /***********************************************************************/
217 inline string getline(ifstream& fileHandle) {
222 while (!fileHandle.eof()) {
224 char c = fileHandle.get();
226 //are you at the end of the line
227 if ((c == '\n') || (c == '\r') || (c == '\f')){ break; }
234 catch(exception& e) {
235 cout << "Error in mothur function getline" << endl;
240 /***********************************************************************/
242 inline bool isTrue(string f){
244 if ((f == "TRUE") || (f == "T") || (f == "true") || (f == "t")) { return true; }
245 else { return false; }
248 /***********************************************************************/
250 inline float roundDist(float dist, int precision){
252 return int(dist * precision + 0.5)/float(precision);
256 /***********************************************************************/
258 inline int getNumNames(string names){
264 for(int i=0;i<names.size();i++){
275 /**************************************************************************************************/
277 inline vector<vector<double> > binomial(int maxOrder){
279 vector<vector<double> > binomial(maxOrder+1);
281 for(int i=0;i<=maxOrder;i++){
282 binomial[i].resize(maxOrder+1);
291 for(int i=2;i<=maxOrder;i++){
295 for(int i=2;i<=maxOrder;i++){
296 for(int j=1;j<=maxOrder;j++){
297 if(i==j){ binomial[i][j]=1; }
298 if(j>i) { binomial[i][j]=0; }
299 else { binomial[i][j]=binomial[i-1][j-1]+binomial[i-1][j]; }
306 /***********************************************************************/
308 inline string getRootName(string longName){
310 string rootName = longName;
312 if(longName.find_last_of(".") != longName.npos){
313 int pos = longName.find_last_of('.')+1;
314 rootName = longName.substr(0, pos);
319 /***********************************************************************/
321 inline string getSimpleName(string longName){
323 string simpleName = longName;
326 found=longName.find_last_of("/\\");
328 if(found != longName.npos){
329 simpleName = longName.substr(found+1);
332 //if(longName.find_last_of("/") != longName.npos){
333 // int pos = longName.find_last_of('/')+1;
334 // simpleName = longName.substr(pos, longName.length());
340 /***********************************************************************/
342 inline int factorial(int num){
345 for (int i = 1; i <= num; i++) {
351 /**************************************************************************************************
353 double min(double x, double y)
359 /***********************************************************************/
361 inline string getPathName(string longName){
363 string rootPathName = longName;
365 if(longName.find_last_of("/\\") != longName.npos){
366 int pos = longName.find_last_of("/\\")+1;
367 rootPathName = longName.substr(0, pos);
372 /***********************************************************************/
374 inline string hasPath(string longName){
379 found=longName.find_last_of("/\\");
381 if(found != longName.npos){
382 path = longName.substr(0, found+1);
388 /***********************************************************************/
390 inline string getExtension(string longName){
392 string extension = longName;
394 if(longName.find_last_of('.') != longName.npos){
395 int pos = longName.find_last_of('.');
396 extension = longName.substr(pos, longName.length());
401 /***********************************************************************/
402 inline bool isBlank(string fileName){
405 fileHandle.open(fileName.c_str());
407 cout << "Error: Could not open " << fileName << endl;
410 //check for blank file
412 if (fileHandle.eof()) { fileHandle.close(); return true; }
416 /***********************************************************************/
418 inline string getFullPathName(string fileName){
421 string path = hasPath(fileName);
425 if (path == "") { return fileName; } //its a simple name
426 else { //we need to complete the pathname
427 // ex. ../../../filename
428 // cwd = /user/work/desktop
431 //get current working directory
432 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
433 if (path.rfind("./") == -1) { return fileName; } //already complete name
434 else { newFileName = fileName.substr(fileName.rfind("./")+2); } //save the complete part of the name
436 char* cwdpath = new char[1024];
439 cwdpath=getcwd(cwdpath,size);
445 if (cwd.length() > 0) { simpleCWD = cwd.substr(1); }
447 //break apart the current working directory
449 while (simpleCWD.find_first_of('/') != -1) {
450 string dir = simpleCWD.substr(0,simpleCWD.find_first_of('/'));
451 simpleCWD = simpleCWD.substr(simpleCWD.find_first_of('/')+1, simpleCWD.length());
454 //get last one // ex. ../../../filename = /user/work/desktop/filename
455 dirs.push_back(simpleCWD); //ex. dirs[0] = user, dirs[1] = work, dirs[2] = desktop
458 int index = dirs.size()-1;
460 while((pos = path.rfind("./")) != -1) { //while you don't have a complete path
461 if (path[(pos-1)] == '.') { //you want your parent directory ../
462 path = path.substr(0, pos-1);
464 if (index == 0) { break; }
465 }else if (path[(pos-1)] == '/') { //you want the current working dir ./
466 path = path.substr(0, pos);
467 }else if (pos == 1) { break;
468 }else { cout << "cannot resolve path for " << fileName << endl; return fileName; }
471 for (int i = index; i >= 0; i--) {
472 newFileName = dirs[i] + "/" + newFileName;
475 newFileName = "/" + newFileName;
479 if (path.rfind(".\\") == -1) { return fileName; } //already complete name
480 else { newFileName = fileName.substr(fileName.rfind(".\\")+2); } //save the complete part of the name
482 char *cwdpath = NULL;
483 cwdpath = getcwd(NULL, 0); // or _getcwd
484 if ( cwdpath != NULL) { cwd = cwdpath; }
487 //break apart the current working directory
489 while (cwd.find_first_of('\\') != -1) {
490 string dir = cwd.substr(0,cwd.find_first_of('\\'));
491 cwd = cwd.substr(cwd.find_first_of('\\')+1, cwd.length());
496 dirs.push_back(cwd); //ex. dirs[0] = user, dirs[1] = work, dirs[2] = desktop
498 int index = dirs.size()-1;
500 while((pos = path.rfind(".\\")) != -1) { //while you don't have a complete path
501 if (path[(pos-1)] == '.') { //you want your parent directory ../
502 path = path.substr(0, pos-1);
504 if (index == 0) { break; }
505 }else if (path[(pos-1)] == '\\') { //you want the current working dir ./
506 path = path.substr(0, pos);
507 }else if (pos == 1) { break;
508 }else { cout << "cannot resolve path for " << fileName << endl; return fileName; }
511 for (int i = index; i >= 0; i--) {
512 newFileName = dirs[i] + "\\" + newFileName;
520 catch(exception& e) {
521 cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function getFullPathName. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n";
525 /***********************************************************************/
527 inline int openInputFile(string fileName, ifstream& fileHandle, string m){
530 string completeFileName = getFullPathName(fileName);
532 fileHandle.open(completeFileName.c_str());
536 //check for blank file
541 /***********************************************************************/
543 inline int openInputFile(string fileName, ifstream& fileHandle){
546 string completeFileName = getFullPathName(fileName);
548 fileHandle.open(completeFileName.c_str());
550 cout << "Error: Could not open " << completeFileName << endl;
554 //check for blank file
556 if (fileHandle.eof()) { cout << completeFileName << " is blank. Please correct." << endl; return 1; }
562 /***********************************************************************/
564 inline int renameFile(string oldName, string newName){
567 int exist = openInputFile(newName, inTest, "");
569 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
570 if (exist == 0) { //you could open it so you want to delete it
572 string command = "rm " + newName;
573 system(command.c_str());
576 string command = "mv " + oldName + " " + newName;
577 system(command.c_str());
579 remove(newName.c_str());
580 int renameOk = rename(oldName.c_str(), newName.c_str());
585 /***********************************************************************/
587 inline int openOutputFile(string fileName, ofstream& fileHandle){
589 string completeFileName = getFullPathName(fileName);
591 fileHandle.open(completeFileName.c_str(), ios::trunc);
593 cout << "Error: Could not open " << completeFileName << endl;
602 /***********************************************************************/
604 inline int getNumSeqs(ifstream& file){
606 int numSeqs = count(istreambuf_iterator<char>(file),istreambuf_iterator<char>(), '>');
611 /***********************************************************************/
613 inline bool inVector(string member, vector<string> group){
615 for (int i = 0; i < group.size(); i++) {
616 if (group[i] == member) { return true; }
621 /***********************************************************************/
623 //This function parses the estimator options and puts them in a vector
624 inline void splitAtDash(string& estim, vector<string>& container) {
628 while (estim.find_first_of('-') != -1) {
629 individual = estim.substr(0,estim.find_first_of('-'));
630 if ((estim.find_first_of('-')+1) <= estim.length()) { //checks to make sure you don't have dash at end of string
631 estim = estim.substr(estim.find_first_of('-')+1, estim.length());
632 container.push_back(individual);
636 container.push_back(estim);
638 catch(exception& e) {
639 cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function splitAtDash. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n";
644 /***********************************************************************/
645 //This function parses the label options and puts them in a set
646 inline void splitAtDash(string& estim, set<string>& container) {
650 while (estim.find_first_of('-') != -1) {
651 individual = estim.substr(0,estim.find_first_of('-'));
652 if ((estim.find_first_of('-')+1) <= estim.length()) { //checks to make sure you don't have dash at end of string
653 estim = estim.substr(estim.find_first_of('-')+1, estim.length());
654 container.insert(individual);
658 container.insert(estim);
660 catch(exception& e) {
661 cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function splitAtDash. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n";
665 /***********************************************************************/
666 //This function parses the line options and puts them in a set
667 inline void splitAtDash(string& estim, set<int>& container) {
672 while (estim.find_first_of('-') != -1) {
673 individual = estim.substr(0,estim.find_first_of('-'));
674 if ((estim.find_first_of('-')+1) <= estim.length()) { //checks to make sure you don't have dash at end of string
675 estim = estim.substr(estim.find_first_of('-')+1, estim.length());
676 convert(individual, lineNum); //convert the string to int
677 container.insert(lineNum);
681 convert(estim, lineNum); //convert the string to int
682 container.insert(lineNum);
684 catch(exception& e) {
685 cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function splitAtDash. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n";
689 /***********************************************************************/
690 //This function parses the a string and puts peices in a vector
691 inline void splitAtComma(string& estim, vector<string>& container) {
695 while (estim.find_first_of(',') != -1) {
696 individual = estim.substr(0,estim.find_first_of(','));
697 if ((estim.find_first_of(',')+1) <= estim.length()) { //checks to make sure you don't have comma at end of string
698 estim = estim.substr(estim.find_first_of(',')+1, estim.length());
699 container.push_back(individual);
703 container.push_back(estim);
705 catch(exception& e) {
706 cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function splitAtComma. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n";
710 /***********************************************************************/
712 //This function splits up the various option parameters
713 inline void splitAtComma(string& prefix, string& suffix){
715 prefix = suffix.substr(0,suffix.find_first_of(','));
716 if ((suffix.find_first_of(',')+2) <= suffix.length()) { //checks to make sure you don't have comma at end of string
717 suffix = suffix.substr(suffix.find_first_of(',')+1, suffix.length());
719 while(suffix.at(0) == ' ')
720 suffix = suffix.substr(1, suffix.length());
724 catch(exception& e) {
725 cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function splitAtComma. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n";
729 /***********************************************************************/
731 //This function separates the key value from the option value i.e. dist=96_...
732 inline void splitAtEquals(string& key, string& value){
734 if(value.find_first_of('=') != -1){
735 key = value.substr(0,value.find_first_of('='));
736 if ((value.find_first_of('=')+1) <= value.length()) {
737 value = value.substr(value.find_first_of('=')+1, value.length());
744 catch(exception& e) {
745 cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function splitAtEquals. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n";
749 /**************************************************************************************************/
751 inline bool inUsersGroups(string groupname, vector<string> Groups) {
753 for (int i = 0; i < Groups.size(); i++) {
754 if (groupname == Groups[i]) { return true; }
758 catch(exception& e) {
759 cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function inUsersGroups. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n";
763 /**************************************************************************************************/
764 //returns true if any of the strings in first vector are in second vector
765 inline bool inUsersGroups(vector<string> groupnames, vector<string> Groups) {
768 for (int i = 0; i < groupnames.size(); i++) {
769 if (inUsersGroups(groupnames[i], Groups)) { return true; }
773 catch(exception& e) {
774 cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function inUsersGroups. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n";
778 /***********************************************************************/
779 //this function determines if the user has given us labels that are smaller than the given label.
780 //if so then it returns true so that the calling function can run the previous valid distance.
781 //it's a "smart" distance function. It also checks for invalid labels.
782 inline bool anyLabelsToProcess(string label, set<string>& userLabels, string errorOff) {
785 set<string>::iterator it;
786 vector<float> orderFloat;
787 map<string, float> userMap; //the conversion process removes trailing 0's which we need to put back
788 map<string, float>::iterator it2;
790 bool smaller = false;
792 //unique is the smallest line
793 if (label == "unique") { return false; }
794 else { convert(label, labelFloat); }
796 //go through users set and make them floats
797 for(it = userLabels.begin(); it != userLabels.end(); ++it) {
800 if ((*it != "unique") && (convertTestFloat(*it, temp) == true)){
802 orderFloat.push_back(temp);
804 }else if (*it == "unique") {
805 orderFloat.push_back(-1.0);
806 userMap["unique"] = -1.0;
808 if (errorOff == "") { cout << *it << " is not a valid label." << endl; }
809 userLabels.erase(*it);
815 sort(orderFloat.begin(), orderFloat.end());
817 /*************************************************/
818 //is this label bigger than any of the users labels
819 /*************************************************/
821 //loop through order until you find a label greater than label
822 for (int i = 0; i < orderFloat.size(); i++) {
823 if (orderFloat[i] < labelFloat) {
825 if (orderFloat[i] == -1) {
826 if (errorOff == "") { cout << "Your file does not include the label unique." << endl; }
827 userLabels.erase("unique");
830 if (errorOff == "") { cout << "Your file does not include the label " << endl; }
832 for (it2 = userMap.begin(); it2!= userMap.end(); it2++) {
833 if (it2->second == orderFloat[i]) {
835 //remove small labels
840 if (errorOff == "") {cout << s << ". I will use the next smallest distance. " << endl; }
842 //since they are sorted once you find a bigger one stop looking
849 catch(exception& e) {
850 cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function anyLabelsToProcess. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n";
855 /**************************************************************************************************/
856 inline void appendFiles(string temp, string filename) {
861 //open output file in append mode
862 openOutputFileAppend(filename, output);
863 openInputFile(temp, input);
865 while(char c = input.get()){
866 if(input.eof()) { break; }
867 else { output << c; }
873 catch(exception& e) {
874 cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function appendFiles. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n";
879 /**************************************************************************************************/
880 inline string sortFile(string distFile){
882 string outfile = getRootName(distFile) + "sorted.dist";
884 //if you can, use the unix sort since its been optimized for years
885 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
886 string command = "sort -n -k +3 " + distFile + " -o " + outfile;
887 system(command.c_str());
888 #else //you are stuck with my best attempt...
889 //windows sort does not have a way to specify a column, only a character in the line
890 //since we cannot assume that the distance will always be at the the same character location on each line
891 //due to variable sequence name lengths, I chose to force the distance into first position, then sort and then put it back.
893 //read in file line by file and put distance first
894 string tempDistFile = distFile + ".temp";
897 openInputFile(distFile, input);
898 openOutputFile(tempDistFile, output);
900 string firstName, secondName;
903 input >> firstName >> secondName >> dist;
904 output << dist << '\t' << firstName << '\t' << secondName << endl;
911 //sort using windows sort
912 string tempOutfile = outfile + ".temp";
913 string command = "sort " + tempDistFile + " /O " + tempOutfile;
914 system(command.c_str());
916 //read in sorted file and put distance at end again
918 openInputFile(tempOutfile, input2);
919 openOutputFile(outfile, output);
922 input2 >> dist >> firstName >> secondName;
923 output << firstName << '\t' << secondName << '\t' << dist << endl;
930 remove(tempDistFile.c_str());
931 remove(tempOutfile.c_str());
936 catch(exception& e) {
937 cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function sortfile. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n";
941 /**************************************************************************************************/