]> git.donarmstrong.com Git - mothur.git/blob - mothur.h
modified sequence class to read fasta files with comments. this required modification...
[mothur.git] / mothur.h
1 #ifndef MOTHUR_H
2 #define MOTHUR_H
3
4
5
6 /*
7  *  mothur.h
8  *  Mothur
9  *
10  *  Created by Sarah Westcott on 2/19/09.
11  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
12  *
13  */
14
15 /* This file contains all the standard incudes we use in the project as well as some common utilities. */
16
17 //#include <cstddef>
18
19 //io libraries
20 #include <iostream>
21 #include <iomanip>
22 #include <fstream>
23 #include <sstream>
24 #include <signal.h>
25
26 //exception
27 #include <stdexcept>
28 #include <exception>
29 #include <cstdlib> 
30
31
32 //containers
33 #include <vector>
34 #include <set>
35 #include <map>
36 #include <string>
37 #include <list>
38
39 //math
40 #include <cmath>
41 #include <math.h>
42 #include <algorithm>
43
44 //misc
45 #include <cerrno>
46 #include <ctime>
47 #include <limits>
48
49
50 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
51         #include <sys/wait.h>
52         #include <unistd.h>
53 #endif
54
55 using namespace std;
56
57 #define exp(x) (exp((double) x))
58 #define sqrt(x) (sqrt((double) x))
59 #define log10(x) (log10((double) x))
60 #define log2(x) (log10(x)/log10(2))
61 #define isnan(x) ((x) != (x))
62 #define isinf(x) (fabs(x) == std::numeric_limits<double>::infinity())
63
64
65 typedef unsigned long ull;
66
67 struct IntNode {
68         int lvalue;
69         int rvalue;
70         int lcoef;
71         int rcoef;
72         IntNode* left;
73         IntNode* right;
74         
75         IntNode(int lv, int rv, IntNode* l, IntNode* r) : lvalue(lv), rvalue(rv), left(l), right(r) {};
76         IntNode() {};
77 };
78
79 struct ThreadNode {
80         int* pid;
81         IntNode* left;
82         IntNode* right;
83 };
84
85 /************************************************************/
86 struct clusterNode {
87         int numSeq;
88         int parent;
89         int smallChild; //used to make linkTable work with list and rabund. represents bin number of this cluster node
90         clusterNode(int num, int par, int kid) : numSeq(num), parent(par), smallChild(kid) {};
91 };
92
93 /***********************************************************************/
94
95 // snagged from http://www.parashift.com/c++-faq-lite/misc-technical-issues.html#faq-39.2
96 // works for now, but there should be a way to do it without killing the whole program
97
98 class BadConversion : public runtime_error {
99 public:
100         BadConversion(const string& s) : runtime_error(s){ }
101 };
102
103 //**********************************************************************************************************************
104
105 template<typename T>
106 inline void convert(const string& s, T& x, bool failIfLeftoverChars = true){
107         istringstream i(s);
108         char c;
109         if (!(i >> x) || (failIfLeftoverChars && i.get(c)))
110                 throw BadConversion(s);
111 }
112
113 //**********************************************************************************************************************
114
115 template<typename T>
116 inline bool convertTestFloat(const string& s, T& x, bool failIfLeftoverChars = true){
117         istringstream i(s);
118         char c;
119         if (!(i >> x) || (failIfLeftoverChars && i.get(c)))
120         {
121                 return false;
122         } 
123         return true;
124 }
125
126 //**********************************************************************************************************************
127
128 template<typename T>
129 inline bool convertTest(const string& s, T& x, bool failIfLeftoverChars = true){
130         istringstream i(s);
131         char c;
132         if (!(i >> x) || (failIfLeftoverChars && i.get(c)))
133         {
134                 cout << "unable to be converted into an integer.\n" << endl;
135                 return false;
136         } 
137         return true;
138 }
139
140 //**********************************************************************************************************************
141
142 template<typename T>
143 string toString(const T&x){
144     stringstream output;
145     output << x;
146     return output.str();
147 }
148
149 //**********************************************************************************************************************
150
151 template<typename T>
152 string toHex(const T&x){
153         stringstream output;
154         
155         output << hex << x;
156
157     return output.str();
158 }
159 //**********************************************************************************************************************
160
161 template<typename T>
162 string toString(const T&x, int i){
163         stringstream output;
164         
165         output.precision(i);
166     output << fixed << x;
167         
168     return output.str();
169 }
170 /***********************************************************************/
171
172 inline int openOutputFileAppend(string fileName, ofstream& fileHandle){
173         
174         fileHandle.open(fileName.c_str(), ios::app);
175         if(!fileHandle) {
176                 cout << "Error: Could not open " <<  fileName << endl; 
177                 return 1;
178         }
179         else {
180                 return 0;
181         }
182
183 }
184 /***********************************************************************/
185
186 inline void gobble(istream& f){
187         
188         char d;
189     while(isspace(d=f.get()))           {;}
190         f.putback(d);
191         
192 }
193
194 /***********************************************************************/
195
196 inline string getline(ifstream& fileHandle) {
197         try {
198         
199                 string line = "";
200                 
201                 while (!fileHandle.eof())       {
202                         //get next character
203                         char c = fileHandle.get(); 
204                         
205                         //are you at the end of the line
206                         if ((c == '\n') || (c == '\r') || (c == '\f')){  break; }       
207                         else {          line += c;              }
208                 }
209                 
210                 return line;
211                 
212         }
213         catch(exception& e) {
214                 cout << "Error in mothur function getline" << endl;
215                 exit(1);
216         }
217 }
218
219 /**************************************************************************************************/
220
221 inline void mothurOut(string message) {
222         try{
223                 ofstream out;
224                 string logFileName = "mothur.logFile";
225                 openOutputFileAppend(logFileName, out);
226                 
227                 cout << message;
228                 out << message;
229                 
230                 out.close();
231         }
232         catch(exception& e) {
233                 cout << "Error in mothur class mothurOut" << endl;
234                 exit(1);
235         }
236 }
237 /**************************************************************************************************/
238
239 inline void mothurOut(string message, string precision) {
240         try{
241                 ofstream out;
242                 string logFileName = "mothur.logFile";
243                 openOutputFileAppend(logFileName, out);
244                 
245                 cout << precision << message;
246                 out << precision << message;
247                 
248                 out.close();
249         }
250         catch(exception& e) {
251                 cout << "Error in mothur class mothurOut" << endl;
252                 exit(1);
253         }
254 }
255
256 /**************************************************************************************************/
257
258 inline void mothurOutEndLine() {
259         try {
260                 ofstream out;
261                 string logFileName = "mothur.logFile";
262                 openOutputFileAppend(logFileName, out);
263                 
264                 cout << endl;  
265                 out << endl;
266                 
267                 out.close();
268         }
269         catch(exception& e) {
270                 cout << "error in mothur mothurOutEndLine" << endl;
271                 exit(1);
272         }
273 }
274
275
276 /**************************************************************************************************/
277
278 inline void errorOut(exception& e, string object, string function) {
279         
280                 mothurOut("Error: ");
281                 mothurOut(toString(e.what()));
282                 mothurOut(" has occurred in the " + object + " class function " + function + ". Please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry.");
283                 mothurOutEndLine();
284         
285 }
286
287 /***********************************************************************/
288
289 inline bool isTrue(string f){
290         
291         if ((f == "TRUE") || (f == "T") || (f == "true") || (f == "t")) {       return true;    }
292         else {  return false;  }
293 }
294
295 /***********************************************************************/
296
297 inline float roundDist(float dist, int precision){
298         
299         return int(dist * precision + 0.5)/float(precision);
300         
301 }
302
303 /***********************************************************************/
304
305 inline int getNumNames(string names){
306         
307         int count = 0;
308         
309         if(names != ""){
310                 count = 1;
311                 for(int i=0;i<names.size();i++){
312                         if(names[i] == ','){
313                                 count++;
314                         }
315                 }
316         }
317         
318         return count;
319         
320 }
321
322 /**************************************************************************************************/
323
324 inline vector<vector<double> > binomial(int maxOrder){
325         
326         vector<vector<double> > binomial(maxOrder+1);
327         
328     for(int i=0;i<=maxOrder;i++){
329                 binomial[i].resize(maxOrder+1);
330                 binomial[i][0]=1;
331                 binomial[0][i]=0;
332     }
333     binomial[0][0]=1;
334         
335     binomial[1][0]=1;
336     binomial[1][1]=1;
337         
338     for(int i=2;i<=maxOrder;i++){
339                 binomial[1][i]=0;
340     }
341         
342     for(int i=2;i<=maxOrder;i++){
343                 for(int j=1;j<=maxOrder;j++){
344                         if(i==j){       binomial[i][j]=1;                                                                       }
345                         if(j>i) {       binomial[i][j]=0;                                                                       }
346                         else    {       binomial[i][j]=binomial[i-1][j-1]+binomial[i-1][j];     }
347                 }
348     }
349         
350         return binomial;
351 }
352
353 /***********************************************************************/
354
355 inline string getRootName(string longName){
356  
357         string rootName = longName;
358         
359         if(longName.find_last_of(".") != longName.npos){
360                 int pos = longName.find_last_of('.')+1;
361                 rootName = longName.substr(0, pos);
362         }
363
364         return rootName;
365 }
366 /***********************************************************************/
367
368 inline string getSimpleName(string longName){
369  
370         string simpleName = longName;
371         
372         if(longName.find_last_of("/") != longName.npos){
373                 int pos = longName.find_last_of('/')+1;
374                 simpleName = longName.substr(pos, longName.length());
375         }
376
377         return simpleName;
378 }
379
380 /***********************************************************************/
381
382 inline int factorial(int num){
383         int total = 1;
384         
385         for (int i = 1; i <= num; i++) {
386                 total *= i;
387         }
388         
389         return total;
390 }
391 /**************************************************************************************************
392
393 double min(double x, double y)
394 {
395     if(x<y){    return x;    }
396     else   {    return y;    }
397 }
398
399 /***********************************************************************/
400
401 inline string getPathName(string longName){
402  
403         string rootPathName = longName;
404         
405         if(longName.find_last_of('/') != longName.npos){
406                 int pos = longName.find_last_of('/')+1;
407                 rootPathName = longName.substr(0, pos);
408         }
409
410         return rootPathName;
411 }
412
413 /***********************************************************************/
414
415 inline string getExtension(string longName){
416         
417         string extension = longName;
418         
419         if(longName.find_last_of('.') != longName.npos){
420                 int pos = longName.find_last_of('.');
421                 extension = longName.substr(pos, longName.length());
422         }
423         
424         return extension;
425 }
426 /***********************************************************************/
427 inline bool isBlank(string fileName){
428         
429         ifstream fileHandle;
430         fileHandle.open(fileName.c_str());
431         if(!fileHandle) {
432                 mothurOut("Error: Could not open " + fileName);  mothurOutEndLine();
433                 return false;
434         }else {
435                 //check for blank file
436                 gobble(fileHandle);
437                 if (fileHandle.eof()) { fileHandle.close(); return true;  }
438         }
439         return false;
440 }
441 /***********************************************************************/
442
443 inline int openInputFile(string fileName, ifstream& fileHandle){
444
445         fileHandle.open(fileName.c_str());
446         if(!fileHandle) {
447                 mothurOut("Error: Could not open " + fileName);  mothurOutEndLine();
448                 return 1;
449         }
450         else {
451                 //check for blank file
452                 gobble(fileHandle);
453                 if (fileHandle.eof()) { mothurOut(fileName + " is blank. Please correct."); mothurOutEndLine();  return 1;  }
454                 
455                 return 0;
456         }
457         
458 }
459
460 /***********************************************************************/
461
462 inline int openOutputFile(string fileName, ofstream& fileHandle){
463         
464         fileHandle.open(fileName.c_str(), ios::trunc);
465         if(!fileHandle) {
466                 mothurOut("Error: Could not open " + fileName);  mothurOutEndLine();
467                 return 1;
468         }
469         else {
470                 return 0;
471         }
472
473 }
474
475 /***********************************************************************/
476
477 inline int getNumSeqs(ifstream& file){
478         
479         int numSeqs = count(istreambuf_iterator<char>(file),istreambuf_iterator<char>(), '>');
480         file.seekg(0);
481         return numSeqs;
482
483 }
484
485 /***********************************************************************/
486
487 //This function parses the estimator options and puts them in a vector
488 inline void splitAtDash(string& estim, vector<string>& container) {
489         try {
490                 string individual;
491                 
492                 while (estim.find_first_of('-') != -1) {
493                         individual = estim.substr(0,estim.find_first_of('-'));
494                         if ((estim.find_first_of('-')+1) <= estim.length()) { //checks to make sure you don't have dash at end of string
495                                 estim = estim.substr(estim.find_first_of('-')+1, estim.length());
496                                 container.push_back(individual);
497                         }
498                 }
499                 //get last one
500                 container.push_back(estim);
501         }
502         catch(exception& e) {
503                 errorOut(e, "mothur", "splitAtDash");
504                 exit(1);
505         }
506 }
507
508 /***********************************************************************/
509 //This function parses the label options and puts them in a set
510 inline void splitAtDash(string& estim, set<string>& container) {
511         try {
512                 string individual;
513                 
514                 while (estim.find_first_of('-') != -1) {
515                         individual = estim.substr(0,estim.find_first_of('-'));
516                         if ((estim.find_first_of('-')+1) <= estim.length()) { //checks to make sure you don't have dash at end of string
517                                 estim = estim.substr(estim.find_first_of('-')+1, estim.length());
518                                 container.insert(individual);
519                         }
520                 }
521                 //get last one
522                 container.insert(estim);
523         }
524         catch(exception& e) {
525                 errorOut(e, "mothur", "splitAtDash");
526                 exit(1);
527         }
528 }
529 /***********************************************************************/
530 //This function parses the line options and puts them in a set
531 inline void splitAtDash(string& estim, set<int>& container) {
532         try {
533                 string individual;
534                 int lineNum;
535                 
536                 while (estim.find_first_of('-') != -1) {
537                         individual = estim.substr(0,estim.find_first_of('-'));
538                         if ((estim.find_first_of('-')+1) <= estim.length()) { //checks to make sure you don't have dash at end of string
539                                 estim = estim.substr(estim.find_first_of('-')+1, estim.length());
540                                 convert(individual, lineNum); //convert the string to int
541                                 container.insert(lineNum);
542                         }
543                 }
544                 //get last one
545                 convert(estim, lineNum); //convert the string to int
546                 container.insert(lineNum);
547         }
548         catch(exception& e) {
549                 errorOut(e, "mothur", "splitAtDash");
550                 exit(1);
551         }
552 }
553 /***********************************************************************/
554 //This function parses the a string and puts peices in a vector
555 inline void splitAtComma(string& estim, vector<string>& container) {
556         try {
557                 string individual;
558                 
559                 while (estim.find_first_of(',') != -1) {
560                         individual = estim.substr(0,estim.find_first_of(','));
561                         if ((estim.find_first_of(',')+1) <= estim.length()) { //checks to make sure you don't have comma at end of string
562                                 estim = estim.substr(estim.find_first_of(',')+1, estim.length());
563                                 container.push_back(individual);
564                         }
565                 }
566                 //get last one
567                 container.push_back(estim);
568         }
569         catch(exception& e) {
570                 errorOut(e, "mothur", "splitAtComma");
571                 exit(1);
572         }
573 }
574 /***********************************************************************/
575
576 //This function splits up the various option parameters
577 inline void splitAtComma(string& prefix, string& suffix){
578         try {
579                 prefix = suffix.substr(0,suffix.find_first_of(','));
580                 if ((suffix.find_first_of(',')+2) <= suffix.length()) {  //checks to make sure you don't have comma at end of string
581                         suffix = suffix.substr(suffix.find_first_of(',')+1, suffix.length());
582                         string space = " ";
583                         while(suffix.at(0) == ' ')
584                                 suffix = suffix.substr(1, suffix.length());
585                 }
586
587         }
588         catch(exception& e) {
589                 errorOut(e, "mothur", "splitAtComma");
590                 exit(1);
591         }
592 }
593 /***********************************************************************/
594
595 //This function separates the key value from the option value i.e. dist=96_...
596 inline void splitAtEquals(string& key, string& value){          
597         try {
598                 if(value.find_first_of('=') != -1){
599                         key = value.substr(0,value.find_first_of('='));
600                         if ((value.find_first_of('=')+1) <= value.length()) {
601                                 value = value.substr(value.find_first_of('=')+1, value.length());
602                         }
603                 }else{
604                         key = value;
605                         value = 1;
606                 }
607         }
608         catch(exception& e) {
609                 errorOut(e, "mothur", "splitAtEquals");
610                 exit(1);
611         }
612 }
613 /**************************************************************************************************/
614
615 inline bool inUsersGroups(string groupname, vector<string> Groups) {
616         try {
617                 for (int i = 0; i < Groups.size(); i++) {
618                         if (groupname == Groups[i]) { return true; }
619                 }
620                 return false;
621         }
622         catch(exception& e) {
623                 errorOut(e, "mothur", "inUsersGroups");
624                 exit(1);
625         }
626 }
627
628 /**************************************************************************************************/
629
630 inline void mothurOutJustToLog(string message) {
631         try {
632                 ofstream out;
633                 string logFileName = "mothur.logFile";
634                 openOutputFileAppend(logFileName, out);
635                 
636                 out << message;
637                 
638                 out.close();
639         }
640         catch(exception& e) {
641                 errorOut(e, "mothur", "mothurOutJustToLog");
642                 exit(1);
643         }
644 }
645
646
647 /**************************************************************************************************/
648
649 inline void mothurOut(float num) {
650         try {
651                 ofstream out;
652                 string logFileName = "mothur.logFile";
653                 openOutputFileAppend(logFileName, out);
654                 
655                 cout << num;  
656                 out << num;
657                 
658                 out.close();
659         }
660         catch(exception& e) {
661                 cout << "Error in mothur class mothurOut float" << endl;
662                 exit(1);
663         }
664 }
665 /***********************************************************************/
666 inline void mothurOut(double value) {
667         try {
668                 ofstream out;
669                 string logFileName = "mothur.logFile";
670                 openOutputFileAppend(logFileName, out);
671                 
672                 cout << value;  
673                 out << value;
674                 
675                 out.close();
676         }
677         catch(exception& e) {
678                 cout << "Error in mothur class mothurOut double" << endl;
679                 exit(1);
680         }
681 }
682
683 /***********************************************************************/
684 //this function determines if the user has given us labels that are smaller than the given label.
685 //if so then it returns true so that the calling function can run the previous valid distance.
686 //it's a "smart" distance function.  It also checks for invalid labels.
687 inline bool anyLabelsToProcess(string label, set<string>& userLabels, string errorOff) {
688         try {
689                 set<string>::iterator it;
690                 vector<float> orderFloat;
691                 map<string, float> userMap;  //the conversion process removes trailing 0's which we need to put back
692                 map<string, float>::iterator it2;
693                 float labelFloat;
694                 bool smaller = false;
695                 
696                 //unique is the smallest line
697                 if (label == "unique") {  return false;  }
698                 else { convert(label, labelFloat); }
699                 
700                 //go through users set and make them floats
701                 for(it = userLabels.begin(); it != userLabels.end(); ++it) {
702                         
703                         float temp;
704                         if ((*it != "unique") && (convertTestFloat(*it, temp) == true)){
705                                 convert(*it, temp);
706                                 orderFloat.push_back(temp);
707                                 userMap[*it] = temp;
708                         }else if (*it == "unique") { 
709                                 orderFloat.push_back(-1.0);
710                                 userMap["unique"] = -1.0;
711                         }else {
712                                 if (errorOff == "") {  mothurOut(*it + " is not a valid label."); mothurOutEndLine();  }
713                                 userLabels.erase(*it); 
714                                 it--;
715                         }
716                 }
717                 
718                 //sort order
719                 sort(orderFloat.begin(), orderFloat.end());
720                 
721                 /*************************************************/
722                 //is this label bigger than any of the users labels
723                 /*************************************************/
724                                 
725                 //loop through order until you find a label greater than label
726                 for (int i = 0; i < orderFloat.size(); i++) {
727                         if (orderFloat[i] < labelFloat) {
728                                 smaller = true;
729                                 if (orderFloat[i] == -1) { 
730                                         if (errorOff == "") { mothurOut("Your file does not include the label unique."); mothurOutEndLine(); }
731                                         userLabels.erase("unique");
732                                 }
733                                 else {  
734                                         if (errorOff == "") { mothurOut("Your file does not include the label "); mothurOutEndLine(); }
735                                         string s = "";
736                                         for (it2 = userMap.begin(); it2!= userMap.end(); it2++) {  
737                                                 if (it2->second == orderFloat[i]) {  
738                                                         s = it2->first;  
739                                                         //remove small labels
740                                                         userLabels.erase(s);
741                                                         break;
742                                                 }
743                                         }
744                                         if (errorOff == "") { mothurOut(s + ". I will use the next smallest distance. "); mothurOutEndLine(); }
745                                 }
746                         //since they are sorted once you find a bigger one stop looking
747                         }else { break; }
748                 }
749                 
750                 return smaller;
751                                                 
752         }
753         catch(exception& e) {
754                 errorOut(e, "mothur", "anyLabelsToProcess");
755                 exit(1);
756         }
757 }
758
759 /**************************************************************************************************/
760 #endif
761