10 * Created by Sarah Westcott on 2/19/09.
11 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
15 /* This file contains all the standard incudes we use in the project as well as some common utilities. */
49 /***********************************************************************/
51 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
56 #include <readline/readline.h>
57 #include <readline/history.h>
60 //#include <readline/readline.h>
61 //#include <readline/history.h>
63 #include <conio.h> //allows unbuffered screen capture from stdin
68 #define exp(x) (exp((double) x))
69 #define sqrt(x) (sqrt((double) x))
70 #define log10(x) (log10((double) x))
71 #define log2(x) (log10(x)/log10(2))
72 #define isnan(x) ((x) != (x))
73 #define isinf(x) (fabs(x) == std::numeric_limits<double>::infinity())
75 typedef unsigned long ull;
85 IntNode(int lv, int rv, IntNode* l, IntNode* r) : lvalue(lv), rvalue(rv), left(l), right(r) {};
95 /************************************************************/
99 int smallChild; //used to make linkTable work with list and rabund. represents bin number of this cluster node
100 clusterNode(int num, int par, int kid) : numSeq(num), parent(par), smallChild(kid) {};
102 /************************************************************/
108 seqDist(int s1, int s2, float d) : seq1(s1), seq2(s2), dist(d) {}
111 //********************************************************************************************************************
112 //sorts lowest to highest
113 inline bool compareSequenceDistance(seqDist left, seqDist right){
114 return (left.dist < right.dist);
116 /***********************************************************************/
118 // snagged from http://www.parashift.com/c++-faq-lite/misc-technical-issues.html#faq-39.2
119 // works for now, but there should be a way to do it without killing the whole program
121 class BadConversion : public runtime_error {
123 BadConversion(const string& s) : runtime_error(s){ }
126 //**********************************************************************************************************************
129 inline void convert(const string& s, T& x, bool failIfLeftoverChars = true){
132 if (!(i >> x) || (failIfLeftoverChars && i.get(c)))
133 throw BadConversion(s);
136 //**********************************************************************************************************************
139 inline bool convertTestFloat(const string& s, T& x, bool failIfLeftoverChars = true){
142 if (!(i >> x) || (failIfLeftoverChars && i.get(c)))
149 //**********************************************************************************************************************
152 inline bool convertTest(const string& s, T& x, bool failIfLeftoverChars = true){
155 if (!(i >> x) || (failIfLeftoverChars && i.get(c)))
157 cout << "unable to be converted into an integer.\n" << endl;
163 //**********************************************************************************************************************
166 string toString(const T&x){
172 //**********************************************************************************************************************
175 string toHex(const T&x){
182 //**********************************************************************************************************************
185 string toString(const T&x, int i){
189 output << fixed << x;
193 /***********************************************************************/
195 inline int openOutputFileAppend(string fileName, ofstream& fileHandle){
197 fileHandle.open(fileName.c_str(), ios::app);
199 cout << "Error: Could not open " << fileName << endl;
207 /***********************************************************************/
209 inline void gobble(istream& f){
212 while(isspace(d=f.get())) {;}
216 /***********************************************************************/
218 inline string getline(ifstream& fileHandle) {
223 while (!fileHandle.eof()) {
225 char c = fileHandle.get();
227 //are you at the end of the line
228 if ((c == '\n') || (c == '\r') || (c == '\f')){ break; }
235 catch(exception& e) {
236 cout << "Error in mothur function getline" << endl;
241 /**************************************************************************************************/
243 inline void mothurOut(string message) {
246 string logFileName = "mothur.logFile";
247 openOutputFileAppend(logFileName, out);
254 catch(exception& e) {
255 cout << "Error in mothur class mothurOut" << endl;
259 /**************************************************************************************************/
261 inline void mothurOut(string message, string precision) {
264 string logFileName = "mothur.logFile";
265 openOutputFileAppend(logFileName, out);
267 cout << precision << message;
268 out << precision << message;
272 catch(exception& e) {
273 cout << "Error in mothur class mothurOut" << endl;
278 /**************************************************************************************************/
280 inline void mothurOutEndLine() {
283 string logFileName = "mothur.logFile";
284 openOutputFileAppend(logFileName, out);
291 catch(exception& e) {
292 cout << "error in mothur mothurOutEndLine" << endl;
298 /**************************************************************************************************/
300 inline void errorOut(exception& e, string object, string function) {
302 mothurOut("Error: ");
303 mothurOut(toString(e.what()));
304 mothurOut(" has occurred in the " + object + " class function " + function + ". Please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry.");
309 /***********************************************************************/
311 inline bool isTrue(string f){
313 if ((f == "TRUE") || (f == "T") || (f == "true") || (f == "t")) { return true; }
314 else { return false; }
317 /***********************************************************************/
319 inline float roundDist(float dist, int precision){
321 return int(dist * precision + 0.5)/float(precision);
325 /***********************************************************************/
327 inline int getNumNames(string names){
333 for(int i=0;i<names.size();i++){
344 /**************************************************************************************************/
346 inline vector<vector<double> > binomial(int maxOrder){
348 vector<vector<double> > binomial(maxOrder+1);
350 for(int i=0;i<=maxOrder;i++){
351 binomial[i].resize(maxOrder+1);
360 for(int i=2;i<=maxOrder;i++){
364 for(int i=2;i<=maxOrder;i++){
365 for(int j=1;j<=maxOrder;j++){
366 if(i==j){ binomial[i][j]=1; }
367 if(j>i) { binomial[i][j]=0; }
368 else { binomial[i][j]=binomial[i-1][j-1]+binomial[i-1][j]; }
375 /***********************************************************************/
377 inline string getRootName(string longName){
379 string rootName = longName;
381 if(longName.find_last_of(".") != longName.npos){
382 int pos = longName.find_last_of('.')+1;
383 rootName = longName.substr(0, pos);
388 /***********************************************************************/
390 inline string getSimpleName(string longName){
392 string simpleName = longName;
394 if(longName.find_last_of("/") != longName.npos){
395 int pos = longName.find_last_of('/')+1;
396 simpleName = longName.substr(pos, longName.length());
402 /***********************************************************************/
404 inline int factorial(int num){
407 for (int i = 1; i <= num; i++) {
413 /**************************************************************************************************
415 double min(double x, double y)
421 /***********************************************************************/
423 inline string getPathName(string longName){
425 string rootPathName = longName;
427 if(longName.find_last_of('/') != longName.npos){
428 int pos = longName.find_last_of('/')+1;
429 rootPathName = longName.substr(0, pos);
435 /***********************************************************************/
437 inline string getExtension(string longName){
439 string extension = longName;
441 if(longName.find_last_of('.') != longName.npos){
442 int pos = longName.find_last_of('.');
443 extension = longName.substr(pos, longName.length());
448 /***********************************************************************/
449 inline bool isBlank(string fileName){
452 fileHandle.open(fileName.c_str());
454 mothurOut("Error: Could not open " + fileName); mothurOutEndLine();
457 //check for blank file
459 if (fileHandle.eof()) { fileHandle.close(); return true; }
463 /***********************************************************************/
465 inline int openInputFile(string fileName, ifstream& fileHandle, string m){
467 fileHandle.open(fileName.c_str());
469 mothurOut("Error: Could not open " + fileName); mothurOutEndLine();
473 //check for blank file
479 /***********************************************************************/
481 inline int openInputFile(string fileName, ifstream& fileHandle){
483 fileHandle.open(fileName.c_str());
485 mothurOut("Error: Could not open " + fileName); mothurOutEndLine();
489 //check for blank file
491 if (fileHandle.eof()) { mothurOut(fileName + " is blank. Please correct."); mothurOutEndLine(); return 1; }
498 /***********************************************************************/
500 inline int openOutputFile(string fileName, ofstream& fileHandle){
502 fileHandle.open(fileName.c_str(), ios::trunc);
504 mothurOut("Error: Could not open " + fileName); mothurOutEndLine();
513 /***********************************************************************/
515 inline int getNumSeqs(ifstream& file){
517 int numSeqs = count(istreambuf_iterator<char>(file),istreambuf_iterator<char>(), '>');
522 /***********************************************************************/
524 inline bool inVector(string member, vector<string> group){
526 for (int i = 0; i < group.size(); i++) {
527 if (group[i] == member) { return true; }
532 /***********************************************************************/
534 //This function parses the estimator options and puts them in a vector
535 inline void splitAtDash(string& estim, vector<string>& container) {
539 while (estim.find_first_of('-') != -1) {
540 individual = estim.substr(0,estim.find_first_of('-'));
541 if ((estim.find_first_of('-')+1) <= estim.length()) { //checks to make sure you don't have dash at end of string
542 estim = estim.substr(estim.find_first_of('-')+1, estim.length());
543 container.push_back(individual);
547 container.push_back(estim);
549 catch(exception& e) {
550 errorOut(e, "mothur", "splitAtDash");
555 /***********************************************************************/
556 //This function parses the label options and puts them in a set
557 inline void splitAtDash(string& estim, set<string>& container) {
561 while (estim.find_first_of('-') != -1) {
562 individual = estim.substr(0,estim.find_first_of('-'));
563 if ((estim.find_first_of('-')+1) <= estim.length()) { //checks to make sure you don't have dash at end of string
564 estim = estim.substr(estim.find_first_of('-')+1, estim.length());
565 container.insert(individual);
569 container.insert(estim);
571 catch(exception& e) {
572 errorOut(e, "mothur", "splitAtDash");
576 /***********************************************************************/
577 //This function parses the line options and puts them in a set
578 inline void splitAtDash(string& estim, set<int>& container) {
583 while (estim.find_first_of('-') != -1) {
584 individual = estim.substr(0,estim.find_first_of('-'));
585 if ((estim.find_first_of('-')+1) <= estim.length()) { //checks to make sure you don't have dash at end of string
586 estim = estim.substr(estim.find_first_of('-')+1, estim.length());
587 convert(individual, lineNum); //convert the string to int
588 container.insert(lineNum);
592 convert(estim, lineNum); //convert the string to int
593 container.insert(lineNum);
595 catch(exception& e) {
596 errorOut(e, "mothur", "splitAtDash");
600 /***********************************************************************/
601 //This function parses the a string and puts peices in a vector
602 inline void splitAtComma(string& estim, vector<string>& container) {
606 while (estim.find_first_of(',') != -1) {
607 individual = estim.substr(0,estim.find_first_of(','));
608 if ((estim.find_first_of(',')+1) <= estim.length()) { //checks to make sure you don't have comma at end of string
609 estim = estim.substr(estim.find_first_of(',')+1, estim.length());
610 container.push_back(individual);
614 container.push_back(estim);
616 catch(exception& e) {
617 errorOut(e, "mothur", "splitAtComma");
621 /***********************************************************************/
623 //This function splits up the various option parameters
624 inline void splitAtComma(string& prefix, string& suffix){
626 prefix = suffix.substr(0,suffix.find_first_of(','));
627 if ((suffix.find_first_of(',')+2) <= suffix.length()) { //checks to make sure you don't have comma at end of string
628 suffix = suffix.substr(suffix.find_first_of(',')+1, suffix.length());
630 while(suffix.at(0) == ' ')
631 suffix = suffix.substr(1, suffix.length());
635 catch(exception& e) {
636 errorOut(e, "mothur", "splitAtComma");
640 /***********************************************************************/
642 //This function separates the key value from the option value i.e. dist=96_...
643 inline void splitAtEquals(string& key, string& value){
645 if(value.find_first_of('=') != -1){
646 key = value.substr(0,value.find_first_of('='));
647 if ((value.find_first_of('=')+1) <= value.length()) {
648 value = value.substr(value.find_first_of('=')+1, value.length());
655 catch(exception& e) {
656 errorOut(e, "mothur", "splitAtEquals");
660 /**************************************************************************************************/
662 inline bool inUsersGroups(string groupname, vector<string> Groups) {
664 for (int i = 0; i < Groups.size(); i++) {
665 if (groupname == Groups[i]) { return true; }
669 catch(exception& e) {
670 errorOut(e, "mothur", "inUsersGroups");
675 /**************************************************************************************************/
677 inline void mothurOutJustToLog(string message) {
680 string logFileName = "mothur.logFile";
681 openOutputFileAppend(logFileName, out);
687 catch(exception& e) {
688 errorOut(e, "mothur", "mothurOutJustToLog");
694 /**************************************************************************************************/
696 inline void mothurOut(float num) {
699 string logFileName = "mothur.logFile";
700 openOutputFileAppend(logFileName, out);
707 catch(exception& e) {
708 cout << "Error in mothur class mothurOut float" << endl;
712 /***********************************************************************/
713 inline void mothurOut(double value) {
716 string logFileName = "mothur.logFile";
717 openOutputFileAppend(logFileName, out);
724 catch(exception& e) {
725 cout << "Error in mothur class mothurOut double" << endl;
730 /***********************************************************************/
731 //this function determines if the user has given us labels that are smaller than the given label.
732 //if so then it returns true so that the calling function can run the previous valid distance.
733 //it's a "smart" distance function. It also checks for invalid labels.
734 inline bool anyLabelsToProcess(string label, set<string>& userLabels, string errorOff) {
736 set<string>::iterator it;
737 vector<float> orderFloat;
738 map<string, float> userMap; //the conversion process removes trailing 0's which we need to put back
739 map<string, float>::iterator it2;
741 bool smaller = false;
743 //unique is the smallest line
744 if (label == "unique") { return false; }
745 else { convert(label, labelFloat); }
747 //go through users set and make them floats
748 for(it = userLabels.begin(); it != userLabels.end(); ++it) {
751 if ((*it != "unique") && (convertTestFloat(*it, temp) == true)){
753 orderFloat.push_back(temp);
755 }else if (*it == "unique") {
756 orderFloat.push_back(-1.0);
757 userMap["unique"] = -1.0;
759 if (errorOff == "") { mothurOut(*it + " is not a valid label."); mothurOutEndLine(); }
760 userLabels.erase(*it);
766 sort(orderFloat.begin(), orderFloat.end());
768 /*************************************************/
769 //is this label bigger than any of the users labels
770 /*************************************************/
772 //loop through order until you find a label greater than label
773 for (int i = 0; i < orderFloat.size(); i++) {
774 if (orderFloat[i] < labelFloat) {
776 if (orderFloat[i] == -1) {
777 if (errorOff == "") { mothurOut("Your file does not include the label unique."); mothurOutEndLine(); }
778 userLabels.erase("unique");
781 if (errorOff == "") { mothurOut("Your file does not include the label "); mothurOutEndLine(); }
783 for (it2 = userMap.begin(); it2!= userMap.end(); it2++) {
784 if (it2->second == orderFloat[i]) {
786 //remove small labels
791 if (errorOff == "") { mothurOut(s + ". I will use the next smallest distance. "); mothurOutEndLine(); }
793 //since they are sorted once you find a bigger one stop looking
800 catch(exception& e) {
801 errorOut(e, "mothur", "anyLabelsToProcess");
806 /**************************************************************************************************/
807 inline void appendFiles(string temp, string filename) {
812 //open output file in append mode
813 openOutputFileAppend(filename, output);
814 openInputFile(temp, input);
816 while(char c = input.get()){
817 if(input.eof()) { break; }
818 else { output << c; }
824 catch(exception& e) {
825 errorOut(e, "mothur", "appendFiles");
830 /**************************************************************************************************/
831 inline string sortFile(string distFile){
833 string outfile = getRootName(distFile) + "sorted.dist";
835 //if you can, use the unix sort since its been optimized for years
836 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
837 string command = "sort -n -k +3 " + distFile + " -o " + outfile;
838 system(command.c_str());
839 #else //you are stuck with my best attempt...
840 //windows sort does not have a way to specify a column, only a character in the line
841 //since we cannot assume that the distance will always be at the the same character location on each line
842 //due to variable sequence name lengths, I chose to force the distance into first position, then sort and then put it back.
844 //read in file line by file and put distance first
845 string tempDistFile = distFile + ".temp";
848 openInputFile(distFile, input);
849 openOutputFile(tempDistFile, output);
851 string firstName, secondName;
854 input >> firstName >> secondName >> dist;
855 output << dist << '\t' << firstName << '\t' << secondName << endl;
862 //sort using windows sort
863 string tempOutfile = outfile + ".temp";
864 string command = "sort " + tempDistFile + " /O " + tempOutfile;
865 system(command.c_str());
867 //read in sorted file and put distance at end again
869 openInputFile(tempOutfile, input2);
870 openOutputFile(outfile, output);
873 input2 >> dist >> firstName >> secondName;
874 output << firstName << '\t' << secondName << '\t' << dist << endl;
881 remove(tempDistFile.c_str());
882 remove(tempOutfile.c_str());
887 catch(exception& e) {
888 errorOut(e, "mothur", "sortFile");
892 /**************************************************************************************************/