10 * Created by Sarah Westcott on 2/19/09.
11 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
15 /* This file contains all the standard incudes we use in the project as well as some common utilities. */
53 /***********************************************************************/
55 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
58 #include <sys/resource.h>
62 #include <readline/readline.h>
63 #include <readline/history.h>
67 #include <conio.h> //allows unbuffered screen capture from stdin
68 #include <direct.h> //get cwd
76 #define exp(x) (exp((double) x))
77 #define sqrt(x) (sqrt((double) x))
78 #define log10(x) (log10((double) x))
79 #define log2(x) (log10(x)/log10(2))
80 #define isnan(x) ((x) != (x))
81 #define isinf(x) (fabs(x) == std::numeric_limits<double>::infinity())
83 typedef unsigned long ull;
93 IntNode(int lv, int rv, IntNode* l, IntNode* r) : lvalue(lv), rvalue(rv), left(l), right(r) {};
103 /************************************************************/
107 int smallChild; //used to make linkTable work with list and rabund. represents bin number of this cluster node
108 clusterNode(int num, int par, int kid) : numSeq(num), parent(par), smallChild(kid) {};
110 /************************************************************/
116 seqDist(int s1, int s2, float d) : seq1(s1), seq2(s2), dist(d) {}
119 //********************************************************************************************************************
120 //sorts lowest to highest
121 inline bool compareSequenceDistance(seqDist left, seqDist right){
122 return (left.dist < right.dist);
124 /***********************************************************************/
126 // snagged from http://www.parashift.com/c++-faq-lite/misc-technical-issues.html#faq-39.2
127 // works for now, but there should be a way to do it without killing the whole program
129 class BadConversion : public runtime_error {
131 BadConversion(const string& s) : runtime_error(s){ }
134 //**********************************************************************************************************************
137 inline void convert(const string& s, T& x, bool failIfLeftoverChars = true){
140 if (!(i >> x) || (failIfLeftoverChars && i.get(c)))
141 throw BadConversion(s);
144 //**********************************************************************************************************************
147 inline bool convertTestFloat(const string& s, T& x, bool failIfLeftoverChars = true){
150 if (!(i >> x) || (failIfLeftoverChars && i.get(c)))
157 //**********************************************************************************************************************
160 inline bool convertTest(const string& s, T& x, bool failIfLeftoverChars = true){
163 if (!(i >> x) || (failIfLeftoverChars && i.get(c)))
165 cout << "unable to be converted into an integer.\n" << endl;
171 //**********************************************************************************************************************
174 string toString(const T&x){
180 //**********************************************************************************************************************
183 string toHex(const T&x){
190 //**********************************************************************************************************************
193 string toString(const T&x, int i){
197 output << fixed << x;
201 /***********************************************************************/
202 inline int openOutputFileAppend(string fileName, ofstream& fileHandle){
204 fileHandle.open(fileName.c_str(), ios::app);
206 cout << "Error: Could not open " << fileName << endl;
214 /***********************************************************************/
216 inline void gobble(istream& f){
219 while(isspace(d=f.get())) {;}
223 /***********************************************************************/
225 inline string getline(ifstream& fileHandle) {
230 while (!fileHandle.eof()) {
232 char c = fileHandle.get();
234 //are you at the end of the line
235 if ((c == '\n') || (c == '\r') || (c == '\f')){ break; }
242 catch(exception& e) {
243 cout << "Error in mothur function getline" << endl;
248 /***********************************************************************/
250 inline bool isTrue(string f){
252 if ((f == "TRUE") || (f == "T") || (f == "true") || (f == "t")) { return true; }
253 else { return false; }
256 /***********************************************************************/
258 inline float roundDist(float dist, int precision){
260 return int(dist * precision + 0.5)/float(precision);
263 /***********************************************************************/
265 inline float ceilDist(float dist, int precision){
267 return int(ceil(dist * precision))/float(precision);
271 /***********************************************************************/
273 inline int getNumNames(string names){
279 for(int i=0;i<names.size();i++){
290 /**************************************************************************************************/
292 inline vector<vector<double> > binomial(int maxOrder){
294 vector<vector<double> > binomial(maxOrder+1);
296 for(int i=0;i<=maxOrder;i++){
297 binomial[i].resize(maxOrder+1);
306 for(int i=2;i<=maxOrder;i++){
310 for(int i=2;i<=maxOrder;i++){
311 for(int j=1;j<=maxOrder;j++){
312 if(i==j){ binomial[i][j]=1; }
313 if(j>i) { binomial[i][j]=0; }
314 else { binomial[i][j]=binomial[i-1][j-1]+binomial[i-1][j]; }
321 /***********************************************************************/
323 inline string getRootName(string longName){
325 string rootName = longName;
327 if(longName.find_last_of(".") != longName.npos){
328 int pos = longName.find_last_of('.')+1;
329 rootName = longName.substr(0, pos);
334 /***********************************************************************/
336 inline string getSimpleName(string longName){
338 string simpleName = longName;
341 found=longName.find_last_of("/\\");
343 if(found != longName.npos){
344 simpleName = longName.substr(found+1);
347 //if(longName.find_last_of("/") != longName.npos){
348 // int pos = longName.find_last_of('/')+1;
349 // simpleName = longName.substr(pos, longName.length());
355 /***********************************************************************/
357 inline int factorial(int num){
360 for (int i = 1; i <= num; i++) {
366 /**************************************************************************************************
368 double min(double x, double y)
374 /***********************************************************************/
376 inline string getPathName(string longName){
378 string rootPathName = longName;
380 if(longName.find_last_of("/\\") != longName.npos){
381 int pos = longName.find_last_of("/\\")+1;
382 rootPathName = longName.substr(0, pos);
387 /***********************************************************************/
389 inline string hasPath(string longName){
394 found=longName.find_last_of("~/\\");
396 if(found != longName.npos){
397 path = longName.substr(0, found+1);
403 /***********************************************************************/
405 inline string getExtension(string longName){
407 string extension = longName;
409 if(longName.find_last_of('.') != longName.npos){
410 int pos = longName.find_last_of('.');
411 extension = longName.substr(pos, longName.length());
416 /***********************************************************************/
417 inline bool isBlank(string fileName){
420 fileHandle.open(fileName.c_str());
422 cout << "Error: Could not open " << fileName << endl;
425 //check for blank file
427 if (fileHandle.eof()) { fileHandle.close(); return true; }
431 /***********************************************************************/
433 inline string getFullPathName(string fileName){
436 string path = hasPath(fileName);
440 if (path == "") { return fileName; } //its a simple name
441 else { //we need to complete the pathname
442 // ex. ../../../filename
443 // cwd = /user/work/desktop
446 //get current working directory
447 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
449 if (path.find("~") != -1) { //go to home directory
450 string homeDir = getenv ("HOME");
451 newFileName = homeDir + fileName.substr(fileName.find("~")+1);
454 if (path.rfind("./") == -1) { return fileName; } //already complete name
455 else { newFileName = fileName.substr(fileName.rfind("./")+2); } //save the complete part of the name
457 char* cwdpath = new char[1024];
460 cwdpath=getcwd(cwdpath,size);
466 if (cwd.length() > 0) { simpleCWD = cwd.substr(1); }
468 //break apart the current working directory
470 while (simpleCWD.find_first_of('/') != -1) {
471 string dir = simpleCWD.substr(0,simpleCWD.find_first_of('/'));
472 simpleCWD = simpleCWD.substr(simpleCWD.find_first_of('/')+1, simpleCWD.length());
475 //get last one // ex. ../../../filename = /user/work/desktop/filename
476 dirs.push_back(simpleCWD); //ex. dirs[0] = user, dirs[1] = work, dirs[2] = desktop
479 int index = dirs.size()-1;
481 while((pos = path.rfind("./")) != -1) { //while you don't have a complete path
482 if (pos == 0) { break; //you are at the end
483 }else if (path[(pos-1)] == '.') { //you want your parent directory ../
484 path = path.substr(0, pos-1);
486 if (index == 0) { break; }
487 }else if (path[(pos-1)] == '/') { //you want the current working dir ./
488 path = path.substr(0, pos);
489 }else if (pos == 1) { break; //you are at the end
490 }else { cout << "cannot resolve path for " << fileName << endl; return fileName; }
493 for (int i = index; i >= 0; i--) {
494 newFileName = dirs[i] + "/" + newFileName;
497 newFileName = "/" + newFileName;
501 if (path.find("~") != -1) { //go to home directory
502 string homeDir = getenv ("HOMEPATH");
503 newFileName = homeDir + fileName.substr(fileName.find("~")+1);
506 if (path.rfind(".\\") == -1) { return fileName; } //already complete name
507 else { newFileName = fileName.substr(fileName.rfind(".\\")+2); } //save the complete part of the name
509 char *cwdpath = NULL;
510 cwdpath = getcwd(NULL, 0); // or _getcwd
511 if ( cwdpath != NULL) { cwd = cwdpath; }
514 //break apart the current working directory
516 while (cwd.find_first_of('\\') != -1) {
517 string dir = cwd.substr(0,cwd.find_first_of('\\'));
518 cwd = cwd.substr(cwd.find_first_of('\\')+1, cwd.length());
523 dirs.push_back(cwd); //ex. dirs[0] = user, dirs[1] = work, dirs[2] = desktop
525 int index = dirs.size()-1;
527 while((pos = path.rfind(".\\")) != -1) { //while you don't have a complete path
528 if (pos == 0) { break; //you are at the end
529 }else if (path[(pos-1)] == '.') { //you want your parent directory ../
530 path = path.substr(0, pos-1);
532 if (index == 0) { break; }
533 }else if (path[(pos-1)] == '\\') { //you want the current working dir ./
534 path = path.substr(0, pos);
535 }else if (pos == 1) { break; //you are at the end
536 }else { cout << "cannot resolve path for " << fileName << endl; return fileName; }
539 for (int i = index; i >= 0; i--) {
540 newFileName = dirs[i] + "\\" + newFileName;
549 catch(exception& e) {
550 cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function getFullPathName. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n";
554 /***********************************************************************/
556 inline int openInputFile(string fileName, ifstream& fileHandle, string m){
559 string completeFileName = getFullPathName(fileName);
561 fileHandle.open(completeFileName.c_str());
565 //check for blank file
570 /***********************************************************************/
572 inline int openInputFile(string fileName, ifstream& fileHandle){
575 string completeFileName = getFullPathName(fileName);
577 fileHandle.open(completeFileName.c_str());
579 cout << "Error: Could not open " << completeFileName << endl;
583 //check for blank file
585 if (fileHandle.eof()) { cout << completeFileName << " is blank. Please correct." << endl; }
591 /***********************************************************************/
593 inline int renameFile(string oldName, string newName){
596 int exist = openInputFile(newName, inTest, "");
598 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
599 if (exist == 0) { //you could open it so you want to delete it
601 string command = "rm " + newName;
602 system(command.c_str());
605 string command = "mv " + oldName + " " + newName;
606 system(command.c_str());
608 remove(newName.c_str());
609 int renameOk = rename(oldName.c_str(), newName.c_str());
614 /***********************************************************************/
616 inline int openOutputFile(string fileName, ofstream& fileHandle){
618 string completeFileName = getFullPathName(fileName);
620 fileHandle.open(completeFileName.c_str(), ios::trunc);
622 cout << "Error: Could not open " << completeFileName << endl;
631 /***********************************************************************/
633 inline int getNumSeqs(ifstream& file){
635 int numSeqs = count(istreambuf_iterator<char>(file),istreambuf_iterator<char>(), '>');
640 /***********************************************************************/
641 inline void getNumSeqs(ifstream& file, int& numSeqs){
646 input = getline(file);
647 if (input.length() != 0) {
648 if(input[0] == '>'){ numSeqs++; }
653 /***********************************************************************/
655 inline bool inVector(string member, vector<string> group){
657 for (int i = 0; i < group.size(); i++) {
658 if (group[i] == member) { return true; }
663 /***********************************************************************/
665 //This function parses the estimator options and puts them in a vector
666 inline void splitAtDash(string& estim, vector<string>& container) {
670 while (estim.find_first_of('-') != -1) {
671 individual = estim.substr(0,estim.find_first_of('-'));
672 if ((estim.find_first_of('-')+1) <= estim.length()) { //checks to make sure you don't have dash at end of string
673 estim = estim.substr(estim.find_first_of('-')+1, estim.length());
674 container.push_back(individual);
678 container.push_back(estim);
680 catch(exception& e) {
681 cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function splitAtDash. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n";
686 /***********************************************************************/
687 //This function parses the label options and puts them in a set
688 inline void splitAtDash(string& estim, set<string>& container) {
692 while (estim.find_first_of('-') != -1) {
693 individual = estim.substr(0,estim.find_first_of('-'));
694 if ((estim.find_first_of('-')+1) <= estim.length()) { //checks to make sure you don't have dash at end of string
695 estim = estim.substr(estim.find_first_of('-')+1, estim.length());
696 container.insert(individual);
700 container.insert(estim);
702 catch(exception& e) {
703 cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function splitAtDash. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n";
707 /***********************************************************************/
708 //This function parses the line options and puts them in a set
709 inline void splitAtDash(string& estim, set<int>& container) {
714 while (estim.find_first_of('-') != -1) {
715 individual = estim.substr(0,estim.find_first_of('-'));
716 if ((estim.find_first_of('-')+1) <= estim.length()) { //checks to make sure you don't have dash at end of string
717 estim = estim.substr(estim.find_first_of('-')+1, estim.length());
718 convert(individual, lineNum); //convert the string to int
719 container.insert(lineNum);
723 convert(estim, lineNum); //convert the string to int
724 container.insert(lineNum);
726 catch(exception& e) {
727 cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function splitAtDash. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n";
731 /***********************************************************************/
732 //This function parses the a string and puts peices in a vector
733 inline void splitAtComma(string& estim, vector<string>& container) {
737 while (estim.find_first_of(',') != -1) {
738 individual = estim.substr(0,estim.find_first_of(','));
739 if ((estim.find_first_of(',')+1) <= estim.length()) { //checks to make sure you don't have comma at end of string
740 estim = estim.substr(estim.find_first_of(',')+1, estim.length());
741 container.push_back(individual);
745 container.push_back(estim);
747 catch(exception& e) {
748 cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function splitAtComma. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n";
752 /***********************************************************************/
754 //This function splits up the various option parameters
755 inline void splitAtComma(string& prefix, string& suffix){
757 prefix = suffix.substr(0,suffix.find_first_of(','));
758 if ((suffix.find_first_of(',')+2) <= suffix.length()) { //checks to make sure you don't have comma at end of string
759 suffix = suffix.substr(suffix.find_first_of(',')+1, suffix.length());
761 while(suffix.at(0) == ' ')
762 suffix = suffix.substr(1, suffix.length());
766 catch(exception& e) {
767 cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function splitAtComma. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n";
771 /***********************************************************************/
773 //This function separates the key value from the option value i.e. dist=96_...
774 inline void splitAtEquals(string& key, string& value){
776 if(value.find_first_of('=') != -1){
777 key = value.substr(0,value.find_first_of('='));
778 if ((value.find_first_of('=')+1) <= value.length()) {
779 value = value.substr(value.find_first_of('=')+1, value.length());
786 catch(exception& e) {
787 cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function splitAtEquals. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n";
791 /**************************************************************************************************/
793 inline bool inUsersGroups(string groupname, vector<string> Groups) {
795 for (int i = 0; i < Groups.size(); i++) {
796 if (groupname == Groups[i]) { return true; }
800 catch(exception& e) {
801 cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function inUsersGroups. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n";
805 /**************************************************************************************************/
806 //returns true if any of the strings in first vector are in second vector
807 inline bool inUsersGroups(vector<string> groupnames, vector<string> Groups) {
810 for (int i = 0; i < groupnames.size(); i++) {
811 if (inUsersGroups(groupnames[i], Groups)) { return true; }
815 catch(exception& e) {
816 cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function inUsersGroups. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n";
820 /***********************************************************************/
821 //this function determines if the user has given us labels that are smaller than the given label.
822 //if so then it returns true so that the calling function can run the previous valid distance.
823 //it's a "smart" distance function. It also checks for invalid labels.
824 inline bool anyLabelsToProcess(string label, set<string>& userLabels, string errorOff) {
827 set<string>::iterator it;
828 vector<float> orderFloat;
829 map<string, float> userMap; //the conversion process removes trailing 0's which we need to put back
830 map<string, float>::iterator it2;
832 bool smaller = false;
834 //unique is the smallest line
835 if (label == "unique") { return false; }
837 if (convertTestFloat(label, labelFloat)) {
838 convert(label, labelFloat);
839 }else { //cant convert
844 //go through users set and make them floats
845 for(it = userLabels.begin(); it != userLabels.end(); ++it) {
848 if ((*it != "unique") && (convertTestFloat(*it, temp) == true)){
850 orderFloat.push_back(temp);
852 }else if (*it == "unique") {
853 orderFloat.push_back(-1.0);
854 userMap["unique"] = -1.0;
856 if (errorOff == "") { cout << *it << " is not a valid label." << endl; }
857 userLabels.erase(*it);
863 sort(orderFloat.begin(), orderFloat.end());
865 /*************************************************/
866 //is this label bigger than any of the users labels
867 /*************************************************/
869 //loop through order until you find a label greater than label
870 for (int i = 0; i < orderFloat.size(); i++) {
871 if (orderFloat[i] < labelFloat) {
873 if (orderFloat[i] == -1) {
874 if (errorOff == "") { cout << "Your file does not include the label unique." << endl; }
875 userLabels.erase("unique");
878 if (errorOff == "") { cout << "Your file does not include the label " << endl; }
880 for (it2 = userMap.begin(); it2!= userMap.end(); it2++) {
881 if (it2->second == orderFloat[i]) {
883 //remove small labels
888 if (errorOff == "") {cout << s << ". I will use the next smallest distance. " << endl; }
890 //since they are sorted once you find a bigger one stop looking
897 catch(exception& e) {
898 cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function anyLabelsToProcess. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n";
903 /**************************************************************************************************/
904 inline void appendFiles(string temp, string filename) {
909 //open output file in append mode
910 openOutputFileAppend(filename, output);
911 int ableToOpen = openInputFile(temp, input, "no error");
913 if (ableToOpen == 0) { //you opened it
914 while(char c = input.get()){
915 if(input.eof()) { break; }
916 else { output << c; }
923 catch(exception& e) {
924 cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function appendFiles. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n";
929 /**************************************************************************************************/
930 inline string sortFile(string distFile, string outputDir){
933 //if (outputDir == "") { outputDir += hasPath(distFile); }
934 string outfile = getRootName(distFile) + "sorted.dist";
937 //if you can, use the unix sort since its been optimized for years
938 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
939 string command = "sort -n -k +3 " + distFile + " -o " + outfile;
940 system(command.c_str());
941 #else //you are stuck with my best attempt...
942 //windows sort does not have a way to specify a column, only a character in the line
943 //since we cannot assume that the distance will always be at the the same character location on each line
944 //due to variable sequence name lengths, I chose to force the distance into first position, then sort and then put it back.
946 //read in file line by file and put distance first
947 string tempDistFile = distFile + ".temp";
950 openInputFile(distFile, input);
951 openOutputFile(tempDistFile, output);
953 string firstName, secondName;
956 input >> firstName >> secondName >> dist;
957 output << dist << '\t' << firstName << '\t' << secondName << endl;
964 //sort using windows sort
965 string tempOutfile = outfile + ".temp";
966 string command = "sort " + tempDistFile + " /O " + tempOutfile;
967 system(command.c_str());
969 //read in sorted file and put distance at end again
971 openInputFile(tempOutfile, input2);
972 openOutputFile(outfile, output);
975 input2 >> dist >> firstName >> secondName;
976 output << firstName << '\t' << secondName << '\t' << dist << endl;
983 remove(tempDistFile.c_str());
984 remove(tempOutfile.c_str());
989 catch(exception& e) {
990 cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function sortfile. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n";
994 /**************************************************************************************************/
995 inline vector<unsigned long int> setFilePosFasta(string filename, int& num) {
997 vector<unsigned long int> positions;
999 openInputFile(filename, inFASTA);
1002 while(!inFASTA.eof()){
1003 input = getline(inFASTA); gobble(inFASTA);
1004 if (input.length() != 0) {
1005 if(input[0] == '>'){ unsigned long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); }
1010 num = positions.size();
1015 //get num bytes in file
1016 pFile = fopen (filename.c_str(),"rb");
1017 if (pFile==NULL) perror ("Error opening file");
1019 fseek (pFile, 0, SEEK_END);
1024 unsigned long int size = positions[(positions.size()-1)];
1026 openInputFile(filename, in);
1030 while(char c = in.get()){
1031 if(in.eof()) { break; }
1036 positions.push_back(size);
1040 /**************************************************************************************************/
1041 inline vector<unsigned long int> setFilePosEachLine(string filename, int& num) {
1043 vector<unsigned long int> positions;
1045 openInputFile(filename, in);
1049 unsigned long int lastpos = in.tellg();
1050 input = getline(in); gobble(in);
1051 if (input.length() != 0) {
1052 unsigned long int pos = in.tellg();
1053 if (pos != -1) { positions.push_back(pos - input.length() - 1); }
1054 else { positions.push_back(lastpos); }
1059 num = positions.size();
1062 unsigned long int size;
1064 //get num bytes in file
1065 pFile = fopen (filename.c_str(),"rb");
1066 if (pFile==NULL) perror ("Error opening file");
1068 fseek (pFile, 0, SEEK_END);
1073 positions.push_back(size);
1078 /**************************************************************************************************/