]> git.donarmstrong.com Git - mothur.git/blob - mothur.h
dist.seqs can now use n processors, and only outputs the phylip formatted distance...
[mothur.git] / mothur.h
1 #ifndef MOTHUR_H
2 #define MOTHUR_H
3
4 using namespace std;
5
6
7 /*
8  *  mothur.h
9  *  Mothur
10  *
11  *  Created by Sarah Westcott on 2/19/09.
12  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
13  *
14  */
15
16 /* This file contains all the standard incudes we use in the project as well as some common utilities. */
17
18 //#include <cstddef>
19
20 //io libraries
21 #include <iostream>
22 #include <iomanip>
23 #include <fstream>
24 #include <sstream>
25
26 //exception
27 #include <stdexcept>
28 #include <exception>
29 #include <cstdlib> 
30
31
32 //containers
33 #include <vector>
34 #include <set>
35 #include <map>
36 #include <string>
37 #include <list>
38
39 //math
40 #include <cmath>
41 #include <math.h>
42 #include <algorithm>
43 #include <ctime>
44
45 #ifdef _WIN32
46         #define exp(x) (exp((double) x))
47         #define sqrt(x) (sqrt((double) x))
48         #define log10(x) (log10((double) x))
49         #define log2(x) (log10(x)/log10(2))
50 #endif
51
52
53 typedef unsigned long long ull;
54
55 struct IntNode {
56         int lvalue;
57         int rvalue;
58         int lcoef;
59         int rcoef;
60         IntNode* left;
61         IntNode* right;
62 };
63
64 struct ThreadNode {
65         int* pid;
66         IntNode* left;
67         IntNode* right;
68 };
69
70 /***********************************************************************/
71
72 // snagged from http://www.parashift.com/c++-faq-lite/misc-technical-issues.html#faq-39.2
73 // works for now, but there should be a way to do it without killing the whole program
74
75 class BadConversion : public runtime_error {
76 public:
77         BadConversion(const string& s) : runtime_error(s){ }
78 };
79
80 //**********************************************************************************************************************
81
82 template<typename T>
83 inline void convert(const string& s, T& x, bool failIfLeftoverChars = true){
84         istringstream i(s);
85         char c;
86         if (!(i >> x) || (failIfLeftoverChars && i.get(c)))
87                 throw BadConversion(s);
88 }
89 //**********************************************************************************************************************
90
91 template<typename T>
92 inline bool convertTestFloat(const string& s, T& x, bool failIfLeftoverChars = true){
93         istringstream i(s);
94         char c;
95         if (!(i >> x) || (failIfLeftoverChars && i.get(c)))
96         {
97                 return false;
98         } 
99         return true;
100 }
101
102 //**********************************************************************************************************************
103
104 template<typename T>
105 inline bool convertTest(const string& s, T& x, bool failIfLeftoverChars = true){
106         istringstream i(s);
107         char c;
108         if (!(i >> x) || (failIfLeftoverChars && i.get(c)))
109         {
110                 cout << "'" << s << "' is unable to be converted into an integer.\n";
111                 return false;
112         } 
113         return true;
114 }
115
116 //**********************************************************************************************************************
117
118 template<typename T>
119 string toString(const T&x){
120     stringstream output;
121     output << x;
122     return output.str();
123 }
124
125 //**********************************************************************************************************************
126
127 template<typename T>
128 string toHex(const T&x){
129         stringstream output;
130         
131         output << hex << x;
132
133     return output.str();
134 }
135 //**********************************************************************************************************************
136
137 template<typename T>
138 string toString(const T&x, int i){
139         stringstream output;
140         
141         output.precision(i);
142     output << fixed << x;
143         
144     return output.str();
145 }
146
147
148 /***********************************************************************/
149
150 inline void gobble(istream& f){
151         
152         char d;
153     while(isspace(d=f.get()))           {;}
154         f.putback(d);
155         
156 }
157 /***********************************************************************/
158
159 inline bool isTrue(string f){
160         
161         if ((f == "TRUE") || (f == "T") || (f == "true") || (f == "t")) {       return true;    }
162         else {  return false;  }
163 }
164
165 /***********************************************************************/
166
167 inline float roundDist(float dist, int precision){
168         
169         return int(dist * precision + 0.5)/float(precision);
170         
171 }
172
173 /***********************************************************************/
174
175 inline int getNumNames(string names){
176         
177         int count = 0;
178         
179         if(names != ""){
180                 count = 1;
181                 for(int i=0;i<names.size();i++){
182                         if(names[i] == ','){
183                                 count++;
184                         }
185                 }
186         }
187         
188         return count;
189         
190 }
191
192 /**************************************************************************************************/
193
194 inline vector<vector<double> > binomial(int maxOrder){
195         
196         vector<vector<double> > binomial(maxOrder+1);
197         
198     for(int i=0;i<=maxOrder;i++){
199                 binomial[i].resize(maxOrder+1);
200                 binomial[i][0]=1;
201                 binomial[0][i]=0;
202     }
203     binomial[0][0]=1;
204         
205     binomial[1][0]=1;
206     binomial[1][1]=1;
207         
208     for(int i=2;i<=maxOrder;i++){
209                 binomial[1][i]=0;
210     }
211         
212     for(int i=2;i<=maxOrder;i++){
213                 for(int j=1;j<=maxOrder;j++){
214                         if(i==j){       binomial[i][j]=1;                                                                       }
215                         if(j>i) {       binomial[i][j]=0;                                                                       }
216                         else    {       binomial[i][j]=binomial[i-1][j-1]+binomial[i-1][j];     }
217                 }
218     }
219         
220         return binomial;
221 }
222
223 /***********************************************************************/
224
225 inline string getRootName(string longName){
226  
227         string rootName = longName;
228         
229         if(longName.find_last_of(".") != longName.npos){
230                 int pos = longName.find_last_of('.')+1;
231                 rootName = longName.substr(0, pos);
232         }
233
234         return rootName;
235 }
236 /***********************************************************************/
237
238 inline string getSimpleName(string longName){
239  
240         string simpleName = longName;
241         
242         if(longName.find_last_of("/") != longName.npos){
243                 int pos = longName.find_last_of('/')+1;
244                 simpleName = longName.substr(pos, longName.length());
245         }
246
247         return simpleName;
248 }
249
250 /***********************************************************************/
251
252 inline int factorial(int num){
253         int total = 1;
254         
255         for (int i = 1; i <= num; i++) {
256                 total *= i;
257         }
258         
259         return total;
260 }
261 /**************************************************************************************************
262
263 double min(double x, double y)
264 {
265     if(x<y){    return x;    }
266     else   {    return y;    }
267 }
268
269 /***********************************************************************/
270
271 inline string getPathName(string longName){
272  
273         string rootPathName = longName;
274         
275         if(longName.find_last_of('/') != longName.npos){
276                 int pos = longName.find_last_of('/')+1;
277                 rootPathName = longName.substr(0, pos);
278         }
279
280         return rootPathName;
281 }
282
283 /***********************************************************************/
284
285 inline string getExtension(string longName){
286         
287         string extension = longName;
288         
289         if(longName.find_last_of('.') != longName.npos){
290                 int pos = longName.find_last_of('.');
291                 extension = longName.substr(pos, longName.length());
292         }
293         
294         return extension;
295 }
296
297 /***********************************************************************/
298
299 inline int openInputFile(string fileName, ifstream& fileHandle){
300
301         fileHandle.open(fileName.c_str());
302         if(!fileHandle) {
303                 cerr << "Error: Could not open " << fileName << endl;
304                 return 1;
305         }
306         else {
307                 return 0;
308         }
309         
310 }
311
312 /***********************************************************************/
313
314 inline int openOutputFile(string fileName, ofstream& fileHandle){
315         
316         fileHandle.open(fileName.c_str(), ios::trunc);
317         if(!fileHandle) {
318                 cerr << "Error: Could not open " << fileName << endl;
319                 return 1;
320         }
321         else {
322                 return 0;
323         }
324
325 }
326 /***********************************************************************/
327
328 inline int openOutputFileAppend(string fileName, ofstream& fileHandle){
329         
330         fileHandle.open(fileName.c_str(), ios::app);
331         if(!fileHandle) {
332                 cerr << "Error: Could not open " << fileName << endl;
333                 return 1;
334         }
335         else {
336                 return 0;
337         }
338
339 }
340
341
342 /***********************************************************************/
343
344 inline int getNumSeqs(ifstream& file){
345         
346         int numSeqs = count(istreambuf_iterator<char>(file),istreambuf_iterator<char>(), '>');
347         file.seekg(0);
348         return numSeqs;
349
350 }
351
352 /***********************************************************************/
353
354 //This function parses the estimator options and puts them in a vector
355 inline void splitAtDash(string& estim, vector<string>& container) {
356         try {
357                 string individual;
358                 
359                 while (estim.find_first_of('-') != -1) {
360                         individual = estim.substr(0,estim.find_first_of('-'));
361                         if ((estim.find_first_of('-')+1) <= estim.length()) { //checks to make sure you don't have dash at end of string
362                                 estim = estim.substr(estim.find_first_of('-')+1, estim.length());
363                                 container.push_back(individual);
364                         }
365                 }
366                 //get last one
367                 container.push_back(estim);
368         }
369         catch(exception& e) {
370                 cout << "Standard Error: " << e.what() << " has occurred in the mothur class Function splitAtDash. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
371                 exit(1);
372         }
373         catch(...) {
374                 cout << "An unknown error has occurred in the mothur class function splitAtDash. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
375                 exit(1);
376         }
377
378 }
379
380 /***********************************************************************/
381 //This function parses the label options and puts them in a set
382 inline void splitAtDash(string& estim, set<string>& container) {
383         try {
384                 string individual;
385                 
386                 while (estim.find_first_of('-') != -1) {
387                         individual = estim.substr(0,estim.find_first_of('-'));
388                         if ((estim.find_first_of('-')+1) <= estim.length()) { //checks to make sure you don't have dash at end of string
389                                 estim = estim.substr(estim.find_first_of('-')+1, estim.length());
390                                 container.insert(individual);
391                         }
392                 }
393                 //get last one
394                 container.insert(estim);
395         }
396         catch(exception& e) {
397                 cout << "Standard Error: " << e.what() << " has occurred in the mothur class Function splitAtDash. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
398                 exit(1);
399         }
400         catch(...) {
401                 cout << "An unknown error has occurred in the mothur class function splitAtDash. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
402                 exit(1);
403         }
404
405 }
406 /***********************************************************************/
407 //This function parses the line options and puts them in a set
408 inline void splitAtDash(string& estim, set<int>& container) {
409         try {
410                 string individual;
411                 int lineNum;
412                 
413                 while (estim.find_first_of('-') != -1) {
414                         individual = estim.substr(0,estim.find_first_of('-'));
415                         if ((estim.find_first_of('-')+1) <= estim.length()) { //checks to make sure you don't have dash at end of string
416                                 estim = estim.substr(estim.find_first_of('-')+1, estim.length());
417                                 convert(individual, lineNum); //convert the string to int
418                                 container.insert(lineNum);
419                         }
420                 }
421                 //get last one
422                 convert(estim, lineNum); //convert the string to int
423                 container.insert(lineNum);
424         }
425         catch(exception& e) {
426                 cout << "Standard Error: " << e.what() << " has occurred in the mothur class Function splitAtDash. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
427                 exit(1);
428         }
429         catch(...) {
430                 cout << "An unknown error has occurred in the mothur class function splitAtDash. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
431                 exit(1);
432         }
433
434 }
435 /***********************************************************************/
436 //This function parses the a string and puts peices in a vector
437 inline void splitAtComma(string& estim, vector<string>& container) {
438         try {
439                 string individual;
440                 
441                 while (estim.find_first_of(',') != -1) {
442                         individual = estim.substr(0,estim.find_first_of(','));
443                         if ((estim.find_first_of(',')+1) <= estim.length()) { //checks to make sure you don't have comma at end of string
444                                 estim = estim.substr(estim.find_first_of(',')+1, estim.length());
445                                 container.push_back(individual);
446                         }
447                 }
448                 //get last one
449                 container.push_back(estim);
450         }
451         catch(exception& e) {
452                 cout << "Standard Error: " << e.what() << " has occurred in the mothur class Function splitAtComma. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
453                 exit(1);
454         }
455         catch(...) {
456                 cout << "An unknown error has occurred in the mothur class function splitAtComma. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
457                 exit(1);
458         }
459 }
460 /***********************************************************************/
461
462 //This function splits up the various option parameters
463 inline void splitAtComma(string& prefix, string& suffix){
464         try {
465                 prefix = suffix.substr(0,suffix.find_first_of(','));
466                 if ((suffix.find_first_of(',')+2) <= suffix.length()) {  //checks to make sure you don't have comma at end of string
467                         suffix = suffix.substr(suffix.find_first_of(',')+1, suffix.length());
468                         string space = " ";
469                         while(suffix.at(0) == ' ')
470                                 suffix = suffix.substr(1, suffix.length());
471                 }
472
473         }
474         catch(exception& e) {
475                 cout << "Standard Error: " << e.what() << " has occurred in the mothur class Function splitAtComma. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
476                 exit(1);
477         }
478         catch(...) {
479                 cout << "An unknown error has occurred in the mothur class function splitAtComma. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
480                 exit(1);
481         }
482
483 }
484 /***********************************************************************/
485
486 //This function separates the key value from the option value i.e. dist=96_...
487 inline void splitAtEquals(string& key, string& value){          
488         try {
489                 if(value.find_first_of('=') != -1){
490                         key = value.substr(0,value.find_first_of('='));
491                         if ((value.find_first_of('=')+1) <= value.length()) {
492                                 value = value.substr(value.find_first_of('=')+1, value.length());
493                         }
494                 }else{
495                         key = value;
496                         value = 1;
497                 }
498         }
499         catch(exception& e) {
500                 cout << "Standard Error: " << e.what() << " has occurred in the mothur class Function splitAtEquals. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
501                 exit(1);
502         }
503         catch(...) {
504                 cout << "An unknown error has occurred in the mothur class function splitAtEquals. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
505                 exit(1);
506         }
507
508 }
509 /**************************************************************************************************/
510
511 inline bool inUsersGroups(string groupname, vector<string> Groups) {
512         try {
513                 for (int i = 0; i < Groups.size(); i++) {
514                         if (groupname == Groups[i]) { return true; }
515                 }
516                 return false;
517         }
518         catch(exception& e) {
519                 cout << "Standard Error: " << e.what() << " has occurred in the mothur class Function inUsersGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
520                 exit(1);
521         }
522         catch(...) {
523                 cout << "An unknown error has occurred in the mothur class function inUsersGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
524                 exit(1);
525         }
526 }
527
528 /***********************************************************************/
529 //this function determines if the user has given us labels that are smaller than the given label.
530 //if so then it returns true so that the calling function can run the previous valid distance.
531 //it's a "smart" distance function.  It also checks for invalid labels.
532 inline bool anyLabelsToProcess(string label, set<string>& userLabels, string errorOff) {
533         try {
534                 set<string>::iterator it;
535                 vector<float> orderFloat;
536                 map<string, float> userMap;  //the conversion process removes trailing 0's which we need to put back
537                 map<string, float>::iterator it2;
538                 float labelFloat;
539                 bool smaller = false;
540                 
541                 //unique is the smallest line
542                 if (label == "unique") {  return false;  }
543                 else { convert(label, labelFloat); }
544                 
545                 //go through users set and make them floats
546                 for(it = userLabels.begin(); it != userLabels.end(); ++it) {
547                         
548                         float temp;
549                         if ((*it != "unique") && (convertTestFloat(*it, temp) == true)){
550                                 convert(*it, temp);
551                                 orderFloat.push_back(temp);
552                                 userMap[*it] = temp;
553                         }else if (*it == "unique") { 
554                                 orderFloat.push_back(-1.0);
555                                 userMap["unique"] = -1.0;
556                         }else {
557                                 if (errorOff == "") {  cout << *it << " is not a valid label." << endl;  }
558                                 userLabels.erase(*it); 
559                                 it--;
560                         }
561                 }
562                 
563                 //sort order
564                 sort(orderFloat.begin(), orderFloat.end());
565                 
566                 /*************************************************/
567                 //is this label bigger than any of the users labels
568                 /*************************************************/
569                                 
570                 //loop through order until you find a label greater than label
571                 for (int i = 0; i < orderFloat.size(); i++) {
572                         if (orderFloat[i] < labelFloat) {
573                                 smaller = true;
574                                 if (orderFloat[i] == -1) { 
575                                         if (errorOff == "") { cout << "Your file does not include the label unique." <<  endl; }
576                                         userLabels.erase("unique");
577                                 }
578                                 else {  
579                                         if (errorOff == "") { cout << "Your file does not include the label "; }
580                                         string s = "";
581                                         for (it2 = userMap.begin(); it2!= userMap.end(); it2++) {  
582                                                 if (it2->second == orderFloat[i]) {  
583                                                         s = it2->first;  
584                                                         //remove small labels
585                                                         userLabels.erase(s);
586                                                         break;
587                                                 }
588                                         }
589                                         if (errorOff == "") { cout << s << ". I will use the next smallest distance. "  <<  endl; }
590                                 }
591                         //since they are sorted once you find a bigger one stop looking
592                         }else { break; }
593                 }
594                 
595                 return smaller;
596                                                 
597         }
598         catch(exception& e) {
599                 cout << "Standard Error: " << e.what() << " has occurred in the mothur class Function anyLabelsToProcess. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
600                 exit(1);
601         }
602         catch(...) {
603                 cout << "An unknown error has occurred in the mothur class function anyLabelsToProcess. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
604                 exit(1);
605         }
606
607 }
608
609 /**************************************************************************************************/
610 #endif
611