10 * Created by Sarah Westcott on 2/19/09.
11 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
15 /* This file contains all the standard incudes we use in the project as well as some common utilities. */
53 /***********************************************************************/
55 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
60 #include <readline/readline.h>
61 #include <readline/history.h>
65 #include <conio.h> //allows unbuffered screen capture from stdin
66 #include <direct.h> //get cwd
71 #define exp(x) (exp((double) x))
72 #define sqrt(x) (sqrt((double) x))
73 #define log10(x) (log10((double) x))
74 #define log2(x) (log10(x)/log10(2))
75 #define isnan(x) ((x) != (x))
76 #define isinf(x) (fabs(x) == std::numeric_limits<double>::infinity())
78 typedef unsigned long ull;
88 IntNode(int lv, int rv, IntNode* l, IntNode* r) : lvalue(lv), rvalue(rv), left(l), right(r) {};
98 /************************************************************/
102 int smallChild; //used to make linkTable work with list and rabund. represents bin number of this cluster node
103 clusterNode(int num, int par, int kid) : numSeq(num), parent(par), smallChild(kid) {};
105 /************************************************************/
111 seqDist(int s1, int s2, float d) : seq1(s1), seq2(s2), dist(d) {}
114 //********************************************************************************************************************
115 //sorts lowest to highest
116 inline bool compareSequenceDistance(seqDist left, seqDist right){
117 return (left.dist < right.dist);
119 /***********************************************************************/
121 // snagged from http://www.parashift.com/c++-faq-lite/misc-technical-issues.html#faq-39.2
122 // works for now, but there should be a way to do it without killing the whole program
124 class BadConversion : public runtime_error {
126 BadConversion(const string& s) : runtime_error(s){ }
129 //**********************************************************************************************************************
132 inline void convert(const string& s, T& x, bool failIfLeftoverChars = true){
135 if (!(i >> x) || (failIfLeftoverChars && i.get(c)))
136 throw BadConversion(s);
139 //**********************************************************************************************************************
142 inline bool convertTestFloat(const string& s, T& x, bool failIfLeftoverChars = true){
145 if (!(i >> x) || (failIfLeftoverChars && i.get(c)))
152 //**********************************************************************************************************************
155 inline bool convertTest(const string& s, T& x, bool failIfLeftoverChars = true){
158 if (!(i >> x) || (failIfLeftoverChars && i.get(c)))
160 cout << "unable to be converted into an integer.\n" << endl;
166 //**********************************************************************************************************************
169 string toString(const T&x){
175 //**********************************************************************************************************************
178 string toHex(const T&x){
185 //**********************************************************************************************************************
188 string toString(const T&x, int i){
192 output << fixed << x;
196 /***********************************************************************/
197 inline int openOutputFileAppend(string fileName, ofstream& fileHandle){
199 fileHandle.open(fileName.c_str(), ios::app);
201 cout << "Error: Could not open " << fileName << endl;
209 /***********************************************************************/
211 inline void gobble(istream& f){
214 while(isspace(d=f.get())) {;}
218 /***********************************************************************/
220 inline string getline(ifstream& fileHandle) {
225 while (!fileHandle.eof()) {
227 char c = fileHandle.get();
229 //are you at the end of the line
230 if ((c == '\n') || (c == '\r') || (c == '\f')){ break; }
237 catch(exception& e) {
238 cout << "Error in mothur function getline" << endl;
243 /***********************************************************************/
245 inline bool isTrue(string f){
247 if ((f == "TRUE") || (f == "T") || (f == "true") || (f == "t")) { return true; }
248 else { return false; }
251 /***********************************************************************/
253 inline float roundDist(float dist, int precision){
255 return int(dist * precision + 0.5)/float(precision);
259 /***********************************************************************/
261 inline int getNumNames(string names){
267 for(int i=0;i<names.size();i++){
278 /**************************************************************************************************/
280 inline vector<vector<double> > binomial(int maxOrder){
282 vector<vector<double> > binomial(maxOrder+1);
284 for(int i=0;i<=maxOrder;i++){
285 binomial[i].resize(maxOrder+1);
294 for(int i=2;i<=maxOrder;i++){
298 for(int i=2;i<=maxOrder;i++){
299 for(int j=1;j<=maxOrder;j++){
300 if(i==j){ binomial[i][j]=1; }
301 if(j>i) { binomial[i][j]=0; }
302 else { binomial[i][j]=binomial[i-1][j-1]+binomial[i-1][j]; }
309 /***********************************************************************/
311 inline string getRootName(string longName){
313 string rootName = longName;
315 if(longName.find_last_of(".") != longName.npos){
316 int pos = longName.find_last_of('.')+1;
317 rootName = longName.substr(0, pos);
322 /***********************************************************************/
324 inline string getSimpleName(string longName){
326 string simpleName = longName;
329 found=longName.find_last_of("/\\");
331 if(found != longName.npos){
332 simpleName = longName.substr(found+1);
335 //if(longName.find_last_of("/") != longName.npos){
336 // int pos = longName.find_last_of('/')+1;
337 // simpleName = longName.substr(pos, longName.length());
343 /***********************************************************************/
345 inline int factorial(int num){
348 for (int i = 1; i <= num; i++) {
354 /**************************************************************************************************
356 double min(double x, double y)
362 /***********************************************************************/
364 inline string getPathName(string longName){
366 string rootPathName = longName;
368 if(longName.find_last_of("/\\") != longName.npos){
369 int pos = longName.find_last_of("/\\")+1;
370 rootPathName = longName.substr(0, pos);
375 /***********************************************************************/
377 inline string hasPath(string longName){
382 found=longName.find_last_of("/\\");
384 if(found != longName.npos){
385 path = longName.substr(0, found+1);
391 /***********************************************************************/
393 inline string getExtension(string longName){
395 string extension = longName;
397 if(longName.find_last_of('.') != longName.npos){
398 int pos = longName.find_last_of('.');
399 extension = longName.substr(pos, longName.length());
404 /***********************************************************************/
405 inline bool isBlank(string fileName){
408 fileHandle.open(fileName.c_str());
410 cout << "Error: Could not open " << fileName << endl;
413 //check for blank file
415 if (fileHandle.eof()) { fileHandle.close(); return true; }
419 /***********************************************************************/
421 inline string getFullPathName(string fileName){
424 string path = hasPath(fileName);
428 if (path == "") { return fileName; } //its a simple name
429 else { //we need to complete the pathname
430 // ex. ../../../filename
431 // cwd = /user/work/desktop
434 //get current working directory
435 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
436 if (path.rfind("./") == -1) { return fileName; } //already complete name
437 else { newFileName = fileName.substr(fileName.rfind("./")+2); } //save the complete part of the name
439 char* cwdpath = new char[1024];
442 cwdpath=getcwd(cwdpath,size);
448 if (cwd.length() > 0) { simpleCWD = cwd.substr(1); }
450 //break apart the current working directory
452 while (simpleCWD.find_first_of('/') != -1) {
453 string dir = simpleCWD.substr(0,simpleCWD.find_first_of('/'));
454 simpleCWD = simpleCWD.substr(simpleCWD.find_first_of('/')+1, simpleCWD.length());
457 //get last one // ex. ../../../filename = /user/work/desktop/filename
458 dirs.push_back(simpleCWD); //ex. dirs[0] = user, dirs[1] = work, dirs[2] = desktop
461 int index = dirs.size()-1;
463 while((pos = path.rfind("./")) != -1) { //while you don't have a complete path
464 if (pos == 0) { break; //you are at the end
465 }else if (path[(pos-1)] == '.') { //you want your parent directory ../
466 path = path.substr(0, pos-1);
468 if (index == 0) { break; }
469 }else if (path[(pos-1)] == '/') { //you want the current working dir ./
470 path = path.substr(0, pos);
471 }else if (pos == 1) { break; //you are at the end
472 }else { cout << "cannot resolve path for " << fileName << endl; return fileName; }
475 for (int i = index; i >= 0; i--) {
476 newFileName = dirs[i] + "/" + newFileName;
479 newFileName = "/" + newFileName;
483 if (path.rfind(".\\") == -1) { return fileName; } //already complete name
484 else { newFileName = fileName.substr(fileName.rfind(".\\")+2); } //save the complete part of the name
486 char *cwdpath = NULL;
487 cwdpath = getcwd(NULL, 0); // or _getcwd
488 if ( cwdpath != NULL) { cwd = cwdpath; }
491 //break apart the current working directory
493 while (cwd.find_first_of('\\') != -1) {
494 string dir = cwd.substr(0,cwd.find_first_of('\\'));
495 cwd = cwd.substr(cwd.find_first_of('\\')+1, cwd.length());
500 dirs.push_back(cwd); //ex. dirs[0] = user, dirs[1] = work, dirs[2] = desktop
502 int index = dirs.size()-1;
504 while((pos = path.rfind(".\\")) != -1) { //while you don't have a complete path
505 if (pos == 0) { break; //you are at the end
506 }else if (path[(pos-1)] == '.') { //you want your parent directory ../
507 path = path.substr(0, pos-1);
509 if (index == 0) { break; }
510 }else if (path[(pos-1)] == '\\') { //you want the current working dir ./
511 path = path.substr(0, pos);
512 }else if (pos == 1) { break; //you are at the end
513 }else { cout << "cannot resolve path for " << fileName << endl; return fileName; }
516 for (int i = index; i >= 0; i--) {
517 newFileName = dirs[i] + "\\" + newFileName;
525 catch(exception& e) {
526 cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function getFullPathName. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n";
530 /***********************************************************************/
532 inline int openInputFile(string fileName, ifstream& fileHandle, string m){
535 string completeFileName = getFullPathName(fileName);
537 fileHandle.open(completeFileName.c_str());
541 //check for blank file
546 /***********************************************************************/
548 inline int openInputFile(string fileName, ifstream& fileHandle){
551 string completeFileName = getFullPathName(fileName);
553 fileHandle.open(completeFileName.c_str());
555 cout << "Error: Could not open " << completeFileName << endl;
559 //check for blank file
561 if (fileHandle.eof()) { cout << completeFileName << " is blank. Please correct." << endl; return 1; }
567 /***********************************************************************/
569 inline int renameFile(string oldName, string newName){
572 int exist = openInputFile(newName, inTest, "");
574 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
575 if (exist == 0) { //you could open it so you want to delete it
577 string command = "rm " + newName;
578 system(command.c_str());
581 string command = "mv " + oldName + " " + newName;
582 system(command.c_str());
584 remove(newName.c_str());
585 int renameOk = rename(oldName.c_str(), newName.c_str());
590 /***********************************************************************/
592 inline int openOutputFile(string fileName, ofstream& fileHandle){
594 string completeFileName = getFullPathName(fileName);
596 fileHandle.open(completeFileName.c_str(), ios::trunc);
598 cout << "Error: Could not open " << completeFileName << endl;
607 /***********************************************************************/
609 inline int getNumSeqs(ifstream& file){
611 int numSeqs = count(istreambuf_iterator<char>(file),istreambuf_iterator<char>(), '>');
616 /***********************************************************************/
618 inline bool inVector(string member, vector<string> group){
620 for (int i = 0; i < group.size(); i++) {
621 if (group[i] == member) { return true; }
626 /***********************************************************************/
628 //This function parses the estimator options and puts them in a vector
629 inline void splitAtDash(string& estim, vector<string>& container) {
633 while (estim.find_first_of('-') != -1) {
634 individual = estim.substr(0,estim.find_first_of('-'));
635 if ((estim.find_first_of('-')+1) <= estim.length()) { //checks to make sure you don't have dash at end of string
636 estim = estim.substr(estim.find_first_of('-')+1, estim.length());
637 container.push_back(individual);
641 container.push_back(estim);
643 catch(exception& e) {
644 cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function splitAtDash. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n";
649 /***********************************************************************/
650 //This function parses the label options and puts them in a set
651 inline void splitAtDash(string& estim, set<string>& container) {
655 while (estim.find_first_of('-') != -1) {
656 individual = estim.substr(0,estim.find_first_of('-'));
657 if ((estim.find_first_of('-')+1) <= estim.length()) { //checks to make sure you don't have dash at end of string
658 estim = estim.substr(estim.find_first_of('-')+1, estim.length());
659 container.insert(individual);
663 container.insert(estim);
665 catch(exception& e) {
666 cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function splitAtDash. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n";
670 /***********************************************************************/
671 //This function parses the line options and puts them in a set
672 inline void splitAtDash(string& estim, set<int>& container) {
677 while (estim.find_first_of('-') != -1) {
678 individual = estim.substr(0,estim.find_first_of('-'));
679 if ((estim.find_first_of('-')+1) <= estim.length()) { //checks to make sure you don't have dash at end of string
680 estim = estim.substr(estim.find_first_of('-')+1, estim.length());
681 convert(individual, lineNum); //convert the string to int
682 container.insert(lineNum);
686 convert(estim, lineNum); //convert the string to int
687 container.insert(lineNum);
689 catch(exception& e) {
690 cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function splitAtDash. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n";
694 /***********************************************************************/
695 //This function parses the a string and puts peices in a vector
696 inline void splitAtComma(string& estim, vector<string>& container) {
700 while (estim.find_first_of(',') != -1) {
701 individual = estim.substr(0,estim.find_first_of(','));
702 if ((estim.find_first_of(',')+1) <= estim.length()) { //checks to make sure you don't have comma at end of string
703 estim = estim.substr(estim.find_first_of(',')+1, estim.length());
704 container.push_back(individual);
708 container.push_back(estim);
710 catch(exception& e) {
711 cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function splitAtComma. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n";
715 /***********************************************************************/
717 //This function splits up the various option parameters
718 inline void splitAtComma(string& prefix, string& suffix){
720 prefix = suffix.substr(0,suffix.find_first_of(','));
721 if ((suffix.find_first_of(',')+2) <= suffix.length()) { //checks to make sure you don't have comma at end of string
722 suffix = suffix.substr(suffix.find_first_of(',')+1, suffix.length());
724 while(suffix.at(0) == ' ')
725 suffix = suffix.substr(1, suffix.length());
729 catch(exception& e) {
730 cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function splitAtComma. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n";
734 /***********************************************************************/
736 //This function separates the key value from the option value i.e. dist=96_...
737 inline void splitAtEquals(string& key, string& value){
739 if(value.find_first_of('=') != -1){
740 key = value.substr(0,value.find_first_of('='));
741 if ((value.find_first_of('=')+1) <= value.length()) {
742 value = value.substr(value.find_first_of('=')+1, value.length());
749 catch(exception& e) {
750 cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function splitAtEquals. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n";
754 /**************************************************************************************************/
756 inline bool inUsersGroups(string groupname, vector<string> Groups) {
758 for (int i = 0; i < Groups.size(); i++) {
759 if (groupname == Groups[i]) { return true; }
763 catch(exception& e) {
764 cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function inUsersGroups. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n";
768 /**************************************************************************************************/
769 //returns true if any of the strings in first vector are in second vector
770 inline bool inUsersGroups(vector<string> groupnames, vector<string> Groups) {
773 for (int i = 0; i < groupnames.size(); i++) {
774 if (inUsersGroups(groupnames[i], Groups)) { return true; }
778 catch(exception& e) {
779 cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function inUsersGroups. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n";
783 /***********************************************************************/
784 //this function determines if the user has given us labels that are smaller than the given label.
785 //if so then it returns true so that the calling function can run the previous valid distance.
786 //it's a "smart" distance function. It also checks for invalid labels.
787 inline bool anyLabelsToProcess(string label, set<string>& userLabels, string errorOff) {
790 set<string>::iterator it;
791 vector<float> orderFloat;
792 map<string, float> userMap; //the conversion process removes trailing 0's which we need to put back
793 map<string, float>::iterator it2;
795 bool smaller = false;
797 //unique is the smallest line
798 if (label == "unique") { return false; }
799 else { convert(label, labelFloat); }
801 //go through users set and make them floats
802 for(it = userLabels.begin(); it != userLabels.end(); ++it) {
805 if ((*it != "unique") && (convertTestFloat(*it, temp) == true)){
807 orderFloat.push_back(temp);
809 }else if (*it == "unique") {
810 orderFloat.push_back(-1.0);
811 userMap["unique"] = -1.0;
813 if (errorOff == "") { cout << *it << " is not a valid label." << endl; }
814 userLabels.erase(*it);
820 sort(orderFloat.begin(), orderFloat.end());
822 /*************************************************/
823 //is this label bigger than any of the users labels
824 /*************************************************/
826 //loop through order until you find a label greater than label
827 for (int i = 0; i < orderFloat.size(); i++) {
828 if (orderFloat[i] < labelFloat) {
830 if (orderFloat[i] == -1) {
831 if (errorOff == "") { cout << "Your file does not include the label unique." << endl; }
832 userLabels.erase("unique");
835 if (errorOff == "") { cout << "Your file does not include the label " << endl; }
837 for (it2 = userMap.begin(); it2!= userMap.end(); it2++) {
838 if (it2->second == orderFloat[i]) {
840 //remove small labels
845 if (errorOff == "") {cout << s << ". I will use the next smallest distance. " << endl; }
847 //since they are sorted once you find a bigger one stop looking
854 catch(exception& e) {
855 cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function anyLabelsToProcess. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n";
860 /**************************************************************************************************/
861 inline void appendFiles(string temp, string filename) {
866 //open output file in append mode
867 openOutputFileAppend(filename, output);
868 openInputFile(temp, input);
870 while(char c = input.get()){
871 if(input.eof()) { break; }
872 else { output << c; }
878 catch(exception& e) {
879 cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function appendFiles. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n";
884 /**************************************************************************************************/
885 inline string sortFile(string distFile, string outputDir){
888 //if (outputDir == "") { outputDir += hasPath(distFile); }
889 string outfile = getRootName(distFile) + "sorted.dist";
892 //if you can, use the unix sort since its been optimized for years
893 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
894 string command = "sort -n -k +3 " + distFile + " -o " + outfile;
895 system(command.c_str());
896 #else //you are stuck with my best attempt...
897 //windows sort does not have a way to specify a column, only a character in the line
898 //since we cannot assume that the distance will always be at the the same character location on each line
899 //due to variable sequence name lengths, I chose to force the distance into first position, then sort and then put it back.
901 //read in file line by file and put distance first
902 string tempDistFile = distFile + ".temp";
905 openInputFile(distFile, input);
906 openOutputFile(tempDistFile, output);
908 string firstName, secondName;
911 input >> firstName >> secondName >> dist;
912 output << dist << '\t' << firstName << '\t' << secondName << endl;
919 //sort using windows sort
920 string tempOutfile = outfile + ".temp";
921 string command = "sort " + tempDistFile + " /O " + tempOutfile;
922 system(command.c_str());
924 //read in sorted file and put distance at end again
926 openInputFile(tempOutfile, input2);
927 openOutputFile(outfile, output);
930 input2 >> dist >> firstName >> secondName;
931 output << firstName << '\t' << secondName << '\t' << dist << endl;
938 remove(tempDistFile.c_str());
939 remove(tempOutfile.c_str());
944 catch(exception& e) {
945 cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function sortfile. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n";
949 /**************************************************************************************************/
950 inline vector<long> setFilePosFasta(string filename, int& num) {
952 vector<long> positions;
954 openInputFile(filename, inFASTA);
957 while(!inFASTA.eof()){
958 input = getline(inFASTA); gobble(inFASTA);
959 if (input.length() != 0) {
960 if(input[0] == '>'){ long pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); }
965 num = positions.size();
970 //get num bytes in file
971 pFile = fopen (filename.c_str(),"rb");
972 if (pFile==NULL) perror ("Error opening file");
974 fseek (pFile, 0, SEEK_END);
979 positions.push_back(size);
983 /**************************************************************************************************/
984 inline vector<long> setFilePosEachLine(string filename, int& num) {
986 vector<long> positions;
988 openInputFile(filename, in);
992 long lastpos = in.tellg();
993 input = getline(in); gobble(in);
994 if (input.length() != 0) {
995 long pos = in.tellg();
996 if (pos != -1) { positions.push_back(pos - input.length() - 1); }
997 else { positions.push_back(lastpos); }
1002 num = positions.size();
1007 //get num bytes in file
1008 pFile = fopen (filename.c_str(),"rb");
1009 if (pFile==NULL) perror ("Error opening file");
1011 fseek (pFile, 0, SEEK_END);
1016 positions.push_back(size);
1021 /**************************************************************************************************/