10 * Created by Sarah Westcott on 2/19/09.
11 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
15 /* This file contains all the standard incudes we use in the project as well as some common utilities. */
53 /***********************************************************************/
55 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
60 #include <readline/readline.h>
61 #include <readline/history.h>
65 #include <conio.h> //allows unbuffered screen capture from stdin
66 #include <direct.h> //get cwd
74 #define exp(x) (exp((double) x))
75 #define sqrt(x) (sqrt((double) x))
76 #define log10(x) (log10((double) x))
77 #define log2(x) (log10(x)/log10(2))
78 #define isnan(x) ((x) != (x))
79 #define isinf(x) (fabs(x) == std::numeric_limits<double>::infinity())
81 typedef unsigned long ull;
91 IntNode(int lv, int rv, IntNode* l, IntNode* r) : lvalue(lv), rvalue(rv), left(l), right(r) {};
101 /************************************************************/
105 int smallChild; //used to make linkTable work with list and rabund. represents bin number of this cluster node
106 clusterNode(int num, int par, int kid) : numSeq(num), parent(par), smallChild(kid) {};
108 /************************************************************/
114 seqDist(int s1, int s2, float d) : seq1(s1), seq2(s2), dist(d) {}
117 //********************************************************************************************************************
118 //sorts lowest to highest
119 inline bool compareSequenceDistance(seqDist left, seqDist right){
120 return (left.dist < right.dist);
122 /***********************************************************************/
124 // snagged from http://www.parashift.com/c++-faq-lite/misc-technical-issues.html#faq-39.2
125 // works for now, but there should be a way to do it without killing the whole program
127 class BadConversion : public runtime_error {
129 BadConversion(const string& s) : runtime_error(s){ }
132 //**********************************************************************************************************************
135 inline void convert(const string& s, T& x, bool failIfLeftoverChars = true){
138 if (!(i >> x) || (failIfLeftoverChars && i.get(c)))
139 throw BadConversion(s);
142 //**********************************************************************************************************************
145 inline bool convertTestFloat(const string& s, T& x, bool failIfLeftoverChars = true){
148 if (!(i >> x) || (failIfLeftoverChars && i.get(c)))
155 //**********************************************************************************************************************
158 inline bool convertTest(const string& s, T& x, bool failIfLeftoverChars = true){
161 if (!(i >> x) || (failIfLeftoverChars && i.get(c)))
163 cout << "unable to be converted into an integer.\n" << endl;
169 //**********************************************************************************************************************
172 string toString(const T&x){
178 //**********************************************************************************************************************
181 string toHex(const T&x){
188 //**********************************************************************************************************************
191 string toString(const T&x, int i){
195 output << fixed << x;
199 /***********************************************************************/
200 inline int openOutputFileAppend(string fileName, ofstream& fileHandle){
202 fileHandle.open(fileName.c_str(), ios::app);
204 cout << "Error: Could not open " << fileName << endl;
212 /***********************************************************************/
214 inline void gobble(istream& f){
217 while(isspace(d=f.get())) {;}
221 /***********************************************************************/
223 inline string getline(ifstream& fileHandle) {
228 while (!fileHandle.eof()) {
230 char c = fileHandle.get();
232 //are you at the end of the line
233 if ((c == '\n') || (c == '\r') || (c == '\f')){ break; }
240 catch(exception& e) {
241 cout << "Error in mothur function getline" << endl;
246 /***********************************************************************/
248 inline bool isTrue(string f){
250 if ((f == "TRUE") || (f == "T") || (f == "true") || (f == "t")) { return true; }
251 else { return false; }
254 /***********************************************************************/
256 inline float roundDist(float dist, int precision){
258 return int(dist * precision + 0.5)/float(precision);
261 /***********************************************************************/
263 inline float ceilDist(float dist, int precision){
265 return int(ceil(dist * precision))/float(precision);
269 /***********************************************************************/
271 inline int getNumNames(string names){
277 for(int i=0;i<names.size();i++){
288 /**************************************************************************************************/
290 inline vector<vector<double> > binomial(int maxOrder){
292 vector<vector<double> > binomial(maxOrder+1);
294 for(int i=0;i<=maxOrder;i++){
295 binomial[i].resize(maxOrder+1);
304 for(int i=2;i<=maxOrder;i++){
308 for(int i=2;i<=maxOrder;i++){
309 for(int j=1;j<=maxOrder;j++){
310 if(i==j){ binomial[i][j]=1; }
311 if(j>i) { binomial[i][j]=0; }
312 else { binomial[i][j]=binomial[i-1][j-1]+binomial[i-1][j]; }
319 /***********************************************************************/
321 inline string getRootName(string longName){
323 string rootName = longName;
325 if(longName.find_last_of(".") != longName.npos){
326 int pos = longName.find_last_of('.')+1;
327 rootName = longName.substr(0, pos);
332 /***********************************************************************/
334 inline string getSimpleName(string longName){
336 string simpleName = longName;
339 found=longName.find_last_of("/\\");
341 if(found != longName.npos){
342 simpleName = longName.substr(found+1);
345 //if(longName.find_last_of("/") != longName.npos){
346 // int pos = longName.find_last_of('/')+1;
347 // simpleName = longName.substr(pos, longName.length());
353 /***********************************************************************/
355 inline int factorial(int num){
358 for (int i = 1; i <= num; i++) {
364 /**************************************************************************************************
366 double min(double x, double y)
372 /***********************************************************************/
374 inline string getPathName(string longName){
376 string rootPathName = longName;
378 if(longName.find_last_of("/\\") != longName.npos){
379 int pos = longName.find_last_of("/\\")+1;
380 rootPathName = longName.substr(0, pos);
385 /***********************************************************************/
387 inline string hasPath(string longName){
392 found=longName.find_last_of("~/\\");
394 if(found != longName.npos){
395 path = longName.substr(0, found+1);
401 /***********************************************************************/
403 inline string getExtension(string longName){
405 string extension = longName;
407 if(longName.find_last_of('.') != longName.npos){
408 int pos = longName.find_last_of('.');
409 extension = longName.substr(pos, longName.length());
414 /***********************************************************************/
415 inline bool isBlank(string fileName){
418 fileHandle.open(fileName.c_str());
420 cout << "Error: Could not open " << fileName << endl;
423 //check for blank file
425 if (fileHandle.eof()) { fileHandle.close(); return true; }
429 /***********************************************************************/
431 inline string getFullPathName(string fileName){
434 string path = hasPath(fileName);
438 if (path == "") { return fileName; } //its a simple name
439 else { //we need to complete the pathname
440 // ex. ../../../filename
441 // cwd = /user/work/desktop
444 //get current working directory
445 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
447 if (path.find("~") != -1) { //go to home directory
448 string homeDir = getenv ("HOME");
449 newFileName = homeDir + fileName.substr(fileName.find("~")+1);
452 if (path.rfind("./") == -1) { return fileName; } //already complete name
453 else { newFileName = fileName.substr(fileName.rfind("./")+2); } //save the complete part of the name
455 char* cwdpath = new char[1024];
458 cwdpath=getcwd(cwdpath,size);
464 if (cwd.length() > 0) { simpleCWD = cwd.substr(1); }
466 //break apart the current working directory
468 while (simpleCWD.find_first_of('/') != -1) {
469 string dir = simpleCWD.substr(0,simpleCWD.find_first_of('/'));
470 simpleCWD = simpleCWD.substr(simpleCWD.find_first_of('/')+1, simpleCWD.length());
473 //get last one // ex. ../../../filename = /user/work/desktop/filename
474 dirs.push_back(simpleCWD); //ex. dirs[0] = user, dirs[1] = work, dirs[2] = desktop
477 int index = dirs.size()-1;
479 while((pos = path.rfind("./")) != -1) { //while you don't have a complete path
480 if (pos == 0) { break; //you are at the end
481 }else if (path[(pos-1)] == '.') { //you want your parent directory ../
482 path = path.substr(0, pos-1);
484 if (index == 0) { break; }
485 }else if (path[(pos-1)] == '/') { //you want the current working dir ./
486 path = path.substr(0, pos);
487 }else if (pos == 1) { break; //you are at the end
488 }else { cout << "cannot resolve path for " << fileName << endl; return fileName; }
491 for (int i = index; i >= 0; i--) {
492 newFileName = dirs[i] + "/" + newFileName;
495 newFileName = "/" + newFileName;
499 if (path.find("~") != -1) { //go to home directory
500 string homeDir = getenv ("HOMEPATH");
501 newFileName = homeDir + fileName.substr(fileName.find("~")+1);
504 if (path.rfind(".\\") == -1) { return fileName; } //already complete name
505 else { newFileName = fileName.substr(fileName.rfind(".\\")+2); } //save the complete part of the name
507 char *cwdpath = NULL;
508 cwdpath = getcwd(NULL, 0); // or _getcwd
509 if ( cwdpath != NULL) { cwd = cwdpath; }
512 //break apart the current working directory
514 while (cwd.find_first_of('\\') != -1) {
515 string dir = cwd.substr(0,cwd.find_first_of('\\'));
516 cwd = cwd.substr(cwd.find_first_of('\\')+1, cwd.length());
521 dirs.push_back(cwd); //ex. dirs[0] = user, dirs[1] = work, dirs[2] = desktop
523 int index = dirs.size()-1;
525 while((pos = path.rfind(".\\")) != -1) { //while you don't have a complete path
526 if (pos == 0) { break; //you are at the end
527 }else if (path[(pos-1)] == '.') { //you want your parent directory ../
528 path = path.substr(0, pos-1);
530 if (index == 0) { break; }
531 }else if (path[(pos-1)] == '\\') { //you want the current working dir ./
532 path = path.substr(0, pos);
533 }else if (pos == 1) { break; //you are at the end
534 }else { cout << "cannot resolve path for " << fileName << endl; return fileName; }
537 for (int i = index; i >= 0; i--) {
538 newFileName = dirs[i] + "\\" + newFileName;
547 catch(exception& e) {
548 cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function getFullPathName. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n";
552 /***********************************************************************/
554 inline int openInputFile(string fileName, ifstream& fileHandle, string m){
557 string completeFileName = getFullPathName(fileName);
559 fileHandle.open(completeFileName.c_str());
563 //check for blank file
568 /***********************************************************************/
570 inline int openInputFile(string fileName, ifstream& fileHandle){
573 string completeFileName = getFullPathName(fileName);
575 fileHandle.open(completeFileName.c_str());
577 cout << "Error: Could not open " << completeFileName << endl;
581 //check for blank file
583 if (fileHandle.eof()) { cout << completeFileName << " is blank. Please correct." << endl; }
589 /***********************************************************************/
591 inline int renameFile(string oldName, string newName){
594 int exist = openInputFile(newName, inTest, "");
596 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
597 if (exist == 0) { //you could open it so you want to delete it
599 string command = "rm " + newName;
600 system(command.c_str());
603 string command = "mv " + oldName + " " + newName;
604 system(command.c_str());
606 remove(newName.c_str());
607 int renameOk = rename(oldName.c_str(), newName.c_str());
612 /***********************************************************************/
614 inline int openOutputFile(string fileName, ofstream& fileHandle){
616 string completeFileName = getFullPathName(fileName);
618 fileHandle.open(completeFileName.c_str(), ios::trunc);
620 cout << "Error: Could not open " << completeFileName << endl;
629 /***********************************************************************/
631 inline int getNumSeqs(ifstream& file){
633 int numSeqs = count(istreambuf_iterator<char>(file),istreambuf_iterator<char>(), '>');
638 /***********************************************************************/
639 inline void getNumSeqs(ifstream& file, int& numSeqs){
644 input = getline(file);
645 if (input.length() != 0) {
646 if(input[0] == '>'){ numSeqs++; }
651 /***********************************************************************/
653 inline bool inVector(string member, vector<string> group){
655 for (int i = 0; i < group.size(); i++) {
656 if (group[i] == member) { return true; }
661 /***********************************************************************/
663 //This function parses the estimator options and puts them in a vector
664 inline void splitAtDash(string& estim, vector<string>& container) {
668 while (estim.find_first_of('-') != -1) {
669 individual = estim.substr(0,estim.find_first_of('-'));
670 if ((estim.find_first_of('-')+1) <= estim.length()) { //checks to make sure you don't have dash at end of string
671 estim = estim.substr(estim.find_first_of('-')+1, estim.length());
672 container.push_back(individual);
676 container.push_back(estim);
678 catch(exception& e) {
679 cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function splitAtDash. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n";
684 /***********************************************************************/
685 //This function parses the label options and puts them in a set
686 inline void splitAtDash(string& estim, set<string>& container) {
690 while (estim.find_first_of('-') != -1) {
691 individual = estim.substr(0,estim.find_first_of('-'));
692 if ((estim.find_first_of('-')+1) <= estim.length()) { //checks to make sure you don't have dash at end of string
693 estim = estim.substr(estim.find_first_of('-')+1, estim.length());
694 container.insert(individual);
698 container.insert(estim);
700 catch(exception& e) {
701 cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function splitAtDash. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n";
705 /***********************************************************************/
706 //This function parses the line options and puts them in a set
707 inline void splitAtDash(string& estim, set<int>& container) {
712 while (estim.find_first_of('-') != -1) {
713 individual = estim.substr(0,estim.find_first_of('-'));
714 if ((estim.find_first_of('-')+1) <= estim.length()) { //checks to make sure you don't have dash at end of string
715 estim = estim.substr(estim.find_first_of('-')+1, estim.length());
716 convert(individual, lineNum); //convert the string to int
717 container.insert(lineNum);
721 convert(estim, lineNum); //convert the string to int
722 container.insert(lineNum);
724 catch(exception& e) {
725 cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function splitAtDash. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n";
729 /***********************************************************************/
730 //This function parses the a string and puts peices in a vector
731 inline void splitAtComma(string& estim, vector<string>& container) {
735 while (estim.find_first_of(',') != -1) {
736 individual = estim.substr(0,estim.find_first_of(','));
737 if ((estim.find_first_of(',')+1) <= estim.length()) { //checks to make sure you don't have comma at end of string
738 estim = estim.substr(estim.find_first_of(',')+1, estim.length());
739 container.push_back(individual);
743 container.push_back(estim);
745 catch(exception& e) {
746 cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function splitAtComma. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n";
750 /***********************************************************************/
752 //This function splits up the various option parameters
753 inline void splitAtComma(string& prefix, string& suffix){
755 prefix = suffix.substr(0,suffix.find_first_of(','));
756 if ((suffix.find_first_of(',')+2) <= suffix.length()) { //checks to make sure you don't have comma at end of string
757 suffix = suffix.substr(suffix.find_first_of(',')+1, suffix.length());
759 while(suffix.at(0) == ' ')
760 suffix = suffix.substr(1, suffix.length());
764 catch(exception& e) {
765 cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function splitAtComma. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n";
769 /***********************************************************************/
771 //This function separates the key value from the option value i.e. dist=96_...
772 inline void splitAtEquals(string& key, string& value){
774 if(value.find_first_of('=') != -1){
775 key = value.substr(0,value.find_first_of('='));
776 if ((value.find_first_of('=')+1) <= value.length()) {
777 value = value.substr(value.find_first_of('=')+1, value.length());
784 catch(exception& e) {
785 cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function splitAtEquals. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n";
789 /**************************************************************************************************/
791 inline bool inUsersGroups(string groupname, vector<string> Groups) {
793 for (int i = 0; i < Groups.size(); i++) {
794 if (groupname == Groups[i]) { return true; }
798 catch(exception& e) {
799 cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function inUsersGroups. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n";
803 /**************************************************************************************************/
804 //returns true if any of the strings in first vector are in second vector
805 inline bool inUsersGroups(vector<string> groupnames, vector<string> Groups) {
808 for (int i = 0; i < groupnames.size(); i++) {
809 if (inUsersGroups(groupnames[i], Groups)) { return true; }
813 catch(exception& e) {
814 cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function inUsersGroups. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n";
818 /***********************************************************************/
819 //this function determines if the user has given us labels that are smaller than the given label.
820 //if so then it returns true so that the calling function can run the previous valid distance.
821 //it's a "smart" distance function. It also checks for invalid labels.
822 inline bool anyLabelsToProcess(string label, set<string>& userLabels, string errorOff) {
825 set<string>::iterator it;
826 vector<float> orderFloat;
827 map<string, float> userMap; //the conversion process removes trailing 0's which we need to put back
828 map<string, float>::iterator it2;
830 bool smaller = false;
832 //unique is the smallest line
833 if (label == "unique") { return false; }
835 if (convertTestFloat(label, labelFloat)) {
836 convert(label, labelFloat);
837 }else { //cant convert
842 //go through users set and make them floats
843 for(it = userLabels.begin(); it != userLabels.end(); ++it) {
846 if ((*it != "unique") && (convertTestFloat(*it, temp) == true)){
848 orderFloat.push_back(temp);
850 }else if (*it == "unique") {
851 orderFloat.push_back(-1.0);
852 userMap["unique"] = -1.0;
854 if (errorOff == "") { cout << *it << " is not a valid label." << endl; }
855 userLabels.erase(*it);
861 sort(orderFloat.begin(), orderFloat.end());
863 /*************************************************/
864 //is this label bigger than any of the users labels
865 /*************************************************/
867 //loop through order until you find a label greater than label
868 for (int i = 0; i < orderFloat.size(); i++) {
869 if (orderFloat[i] < labelFloat) {
871 if (orderFloat[i] == -1) {
872 if (errorOff == "") { cout << "Your file does not include the label unique." << endl; }
873 userLabels.erase("unique");
876 if (errorOff == "") { cout << "Your file does not include the label " << endl; }
878 for (it2 = userMap.begin(); it2!= userMap.end(); it2++) {
879 if (it2->second == orderFloat[i]) {
881 //remove small labels
886 if (errorOff == "") {cout << s << ". I will use the next smallest distance. " << endl; }
888 //since they are sorted once you find a bigger one stop looking
895 catch(exception& e) {
896 cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function anyLabelsToProcess. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n";
901 /**************************************************************************************************/
902 inline void appendFiles(string temp, string filename) {
907 //open output file in append mode
908 openOutputFileAppend(filename, output);
909 int ableToOpen = openInputFile(temp, input, "no error");
911 if (ableToOpen == 0) { //you opened it
912 while(char c = input.get()){
913 if(input.eof()) { break; }
914 else { output << c; }
921 catch(exception& e) {
922 cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function appendFiles. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n";
927 /**************************************************************************************************/
928 inline string sortFile(string distFile, string outputDir){
931 //if (outputDir == "") { outputDir += hasPath(distFile); }
932 string outfile = getRootName(distFile) + "sorted.dist";
935 //if you can, use the unix sort since its been optimized for years
936 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
937 string command = "sort -n -k +3 " + distFile + " -o " + outfile;
938 system(command.c_str());
939 #else //you are stuck with my best attempt...
940 //windows sort does not have a way to specify a column, only a character in the line
941 //since we cannot assume that the distance will always be at the the same character location on each line
942 //due to variable sequence name lengths, I chose to force the distance into first position, then sort and then put it back.
944 //read in file line by file and put distance first
945 string tempDistFile = distFile + ".temp";
948 openInputFile(distFile, input);
949 openOutputFile(tempDistFile, output);
951 string firstName, secondName;
954 input >> firstName >> secondName >> dist;
955 output << dist << '\t' << firstName << '\t' << secondName << endl;
962 //sort using windows sort
963 string tempOutfile = outfile + ".temp";
964 string command = "sort " + tempDistFile + " /O " + tempOutfile;
965 system(command.c_str());
967 //read in sorted file and put distance at end again
969 openInputFile(tempOutfile, input2);
970 openOutputFile(outfile, output);
973 input2 >> dist >> firstName >> secondName;
974 output << firstName << '\t' << secondName << '\t' << dist << endl;
981 remove(tempDistFile.c_str());
982 remove(tempOutfile.c_str());
987 catch(exception& e) {
988 cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function sortfile. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n";
992 /**************************************************************************************************/
993 inline vector<long> setFilePosFasta(string filename, int& num) {
995 vector<long> positions;
997 openInputFile(filename, inFASTA);
1000 while(!inFASTA.eof()){
1001 input = getline(inFASTA); gobble(inFASTA);
1002 if (input.length() != 0) {
1003 if(input[0] == '>'){ long pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); }
1008 num = positions.size();
1013 //get num bytes in file
1014 pFile = fopen (filename.c_str(),"rb");
1015 if (pFile==NULL) perror ("Error opening file");
1017 fseek (pFile, 0, SEEK_END);
1022 long size = positions[(positions.size()-1)];
1024 openInputFile(filename, in);
1028 while(char c = in.get()){
1029 if(in.eof()) { break; }
1034 positions.push_back(size);
1038 /**************************************************************************************************/
1039 inline vector<long> setFilePosEachLine(string filename, int& num) {
1041 vector<long> positions;
1043 openInputFile(filename, in);
1047 long lastpos = in.tellg();
1048 input = getline(in); gobble(in);
1049 if (input.length() != 0) {
1050 long pos = in.tellg();
1051 if (pos != -1) { positions.push_back(pos - input.length() - 1); }
1052 else { positions.push_back(lastpos); }
1057 num = positions.size();
1062 //get num bytes in file
1063 pFile = fopen (filename.c_str(),"rb");
1064 if (pFile==NULL) perror ("Error opening file");
1066 fseek (pFile, 0, SEEK_END);
1071 positions.push_back(size);
1076 /**************************************************************************************************/