]> git.donarmstrong.com Git - mothur.git/blob - mothur.h
added smart distance feature and optimized all commands using line by line processing
[mothur.git] / mothur.h
1 #ifndef MOTHUR_H
2 #define MOTHUR_H
3
4 using namespace std;
5
6
7 /*
8  *  mothur.h
9  *  Mothur
10  *
11  *  Created by Sarah Westcott on 2/19/09.
12  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
13  *
14  */
15
16 /* This file contains all the standard incudes we use in the project as well as some common utilities. */
17
18 //#include <cstddef>
19
20 //io libraries
21 #include <iostream>
22 #include <iomanip>
23 #include <fstream>
24 #include <sstream>
25
26 //exception
27 #include <stdexcept>
28 #include <exception>
29 #include <cstdlib> 
30
31
32 //containers
33 #include <vector>
34 #include <set>
35 #include <map>
36 #include <string>
37 #include <list>
38
39 //math
40 #include <cmath>
41 #include <math.h>
42 #include <algorithm>
43
44 typedef unsigned long long ull;
45
46 struct IntNode {
47         int lvalue;
48         int rvalue;
49         int lcoef;
50         int rcoef;
51         IntNode* left;
52         IntNode* right;
53 };
54         
55 /***********************************************************************/
56
57 // snagged from http://www.parashift.com/c++-faq-lite/misc-technical-issues.html#faq-39.2
58 // works for now, but there should be a way to do it without killing the whole program
59
60 class BadConversion : public runtime_error {
61 public:
62         BadConversion(const string& s) : runtime_error(s){ }
63 };
64
65 //**********************************************************************************************************************
66
67 template<typename T>
68 inline void convert(const string& s, T& x, bool failIfLeftoverChars = true){
69         istringstream i(s);
70         char c;
71         if (!(i >> x) || (failIfLeftoverChars && i.get(c)))
72                 throw BadConversion(s);
73 }
74 //**********************************************************************************************************************
75
76 template<typename T>
77 inline bool convertTestFloat(const string& s, T& x, bool failIfLeftoverChars = true){
78         istringstream i(s);
79         char c;
80         if (!(i >> x) || (failIfLeftoverChars && i.get(c)))
81         {
82                 return false;
83         } 
84         return true;
85 }
86
87 //**********************************************************************************************************************
88
89 template<typename T>
90 inline bool convertTest(const string& s, T& x, bool failIfLeftoverChars = true){
91         istringstream i(s);
92         char c;
93         if (!(i >> x) || (failIfLeftoverChars && i.get(c)))
94         {
95                 cout << "'" << s << "' is unable to be converted into an integer.\n";
96                 return false;
97         } 
98         return true;
99 }
100
101 //**********************************************************************************************************************
102
103 template<typename T>
104 string toString(const T&x){
105     stringstream output;
106     output << x;
107     return output.str();
108 }
109
110 //**********************************************************************************************************************
111
112 template<typename T>
113 string toHex(const T&x){
114         stringstream output;
115         
116         output << hex << x;
117
118     return output.str();
119 }
120 //**********************************************************************************************************************
121
122 template<typename T>
123 string toString(const T&x, int i){
124         stringstream output;
125         
126         output.precision(i);
127     output << fixed << x;
128         
129     return output.str();
130 }
131
132
133 /***********************************************************************/
134
135 inline void gobble(istream& f){
136         
137         char d;
138     while(isspace(d=f.get()))           {;}
139         f.putback(d);
140         
141 }
142
143 /***********************************************************************/
144
145 inline float roundDist(float dist, int precision){
146         
147         return int(dist * precision + 0.5)/float(precision);
148         
149 }
150
151 /***********************************************************************/
152
153 inline int getNumNames(string names){
154         
155         int count = 0;
156         
157         if(names != ""){
158                 count = 1;
159                 for(int i=0;i<names.size();i++){
160                         if(names[i] == ','){
161                                 count++;
162                         }
163                 }
164         }
165         
166         return count;
167         
168 }
169
170 /**************************************************************************************************/
171
172 inline vector<vector<double> > binomial(int maxOrder){
173         
174         vector<vector<double> > binomial(maxOrder+1);
175         
176     for(int i=0;i<=maxOrder;i++){
177                 binomial[i].resize(maxOrder+1);
178                 binomial[i][0]=1;
179                 binomial[0][i]=0;
180     }
181     binomial[0][0]=1;
182         
183     binomial[1][0]=1;
184     binomial[1][1]=1;
185         
186     for(int i=2;i<=maxOrder;i++){
187                 binomial[1][i]=0;
188     }
189         
190     for(int i=2;i<=maxOrder;i++){
191                 for(int j=1;j<=maxOrder;j++){
192                         if(i==j){       binomial[i][j]=1;                                                                       }
193                         if(j>i) {       binomial[i][j]=0;                                                                       }
194                         else    {       binomial[i][j]=binomial[i-1][j-1]+binomial[i-1][j];     }
195                 }
196     }
197         
198         return binomial;
199 }
200
201 /***********************************************************************/
202
203 inline string getRootName(string longName){
204  
205         string rootName = longName;
206         
207         if(longName.find_last_of(".") != longName.npos){
208                 int pos = longName.find_last_of('.')+1;
209                 rootName = longName.substr(0, pos);
210         }
211
212         return rootName;
213 }
214 /***********************************************************************/
215
216 inline string getSimpleName(string longName){
217  
218         string simpleName = longName;
219         
220         if(longName.find_last_of("/") != longName.npos){
221                 int pos = longName.find_last_of('/')+1;
222                 simpleName = longName.substr(pos, longName.length());
223         }
224
225         return simpleName;
226 }
227 /***********************************************************************/
228
229 inline int factorial(int num){
230         int total = 1;
231         
232         for (int i = 1; i <= num; i++) {
233                 total *= i;
234         }
235         
236         return total;
237 }
238 /**************************************************************************************************
239
240 double min(double x, double y)
241 {
242     if(x<y){    return x;    }
243     else   {    return y;    }
244 }
245
246 /***********************************************************************/
247
248 inline string getPathName(string longName){
249  
250         string rootPathName = longName;
251         
252         if(longName.find_last_of("/") != longName.npos){
253                 int pos = longName.find_last_of('/')+1;
254                 rootPathName = longName.substr(0, pos);
255         }
256
257         return rootPathName;
258 }
259
260 /***********************************************************************/
261
262 inline int openInputFile(string fileName, ifstream& fileHandle){
263
264         fileHandle.open(fileName.c_str());
265         if(!fileHandle) {
266                 cerr << "Error: Could not open " << fileName << endl;
267                 return 1;
268         }
269         else {
270                 return 0;
271         }
272         
273 }
274
275 /***********************************************************************/
276
277 inline int openOutputFile(string fileName, ofstream& fileHandle){
278         
279         fileHandle.open(fileName.c_str(), ios::trunc);
280         if(!fileHandle) {
281                 cerr << "Error: Could not open " << fileName << endl;
282                 return 1;
283         }
284         else {
285                 return 0;
286         }
287
288 }
289 /***********************************************************************/
290
291 inline int openOutputFileAppend(string fileName, ofstream& fileHandle){
292         
293         fileHandle.open(fileName.c_str(), ios::app);
294         if(!fileHandle) {
295                 cerr << "Error: Could not open " << fileName << endl;
296                 return 1;
297         }
298         else {
299                 return 0;
300         }
301
302 }
303
304
305 /***********************************************************************/
306
307 //This function parses the estimator options and puts them in a vector
308 inline void splitAtDash(string& estim, vector<string>& container) {
309         try {
310                 string individual;
311                 
312                 while (estim.find_first_of('-') != -1) {
313                         individual = estim.substr(0,estim.find_first_of('-'));
314                         if ((estim.find_first_of('-')+1) <= estim.length()) { //checks to make sure you don't have dash at end of string
315                                 estim = estim.substr(estim.find_first_of('-')+1, estim.length());
316                                 container.push_back(individual);
317                         }
318                 }
319                 //get last one
320                 container.push_back(estim);
321         }
322         catch(exception& e) {
323                 cout << "Standard Error: " << e.what() << " has occurred in the mothur class Function splitAtDash. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
324                 exit(1);
325         }
326         catch(...) {
327                 cout << "An unknown error has occurred in the mothur class function splitAtDash. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
328                 exit(1);
329         }
330
331 }
332
333 /***********************************************************************/
334 //This function parses the label options and puts them in a set
335 inline void splitAtDash(string& estim, set<string>& container) {
336         try {
337                 string individual;
338                 
339                 while (estim.find_first_of('-') != -1) {
340                         individual = estim.substr(0,estim.find_first_of('-'));
341                         if ((estim.find_first_of('-')+1) <= estim.length()) { //checks to make sure you don't have dash at end of string
342                                 estim = estim.substr(estim.find_first_of('-')+1, estim.length());
343                                 container.insert(individual);
344                         }
345                 }
346                 //get last one
347                 container.insert(estim);
348         }
349         catch(exception& e) {
350                 cout << "Standard Error: " << e.what() << " has occurred in the mothur class Function splitAtDash. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
351                 exit(1);
352         }
353         catch(...) {
354                 cout << "An unknown error has occurred in the mothur class function splitAtDash. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
355                 exit(1);
356         }
357
358 }
359 /***********************************************************************/
360 //This function parses the line options and puts them in a set
361 inline void splitAtDash(string& estim, set<int>& container) {
362         try {
363                 string individual;
364                 int lineNum;
365                 
366                 while (estim.find_first_of('-') != -1) {
367                         individual = estim.substr(0,estim.find_first_of('-'));
368                         if ((estim.find_first_of('-')+1) <= estim.length()) { //checks to make sure you don't have dash at end of string
369                                 estim = estim.substr(estim.find_first_of('-')+1, estim.length());
370                                 convert(individual, lineNum); //convert the string to int
371                                 container.insert(lineNum);
372                         }
373                 }
374                 //get last one
375                 convert(estim, lineNum); //convert the string to int
376                 container.insert(lineNum);
377         }
378         catch(exception& e) {
379                 cout << "Standard Error: " << e.what() << " has occurred in the mothur class Function splitAtDash. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
380                 exit(1);
381         }
382         catch(...) {
383                 cout << "An unknown error has occurred in the mothur class function splitAtDash. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
384                 exit(1);
385         }
386
387 }
388 /***********************************************************************/
389 //This function parses the a string and puts peices in a vector
390 inline void splitAtComma(string& estim, vector<string>& container) {
391         try {
392                 string individual;
393                 
394                 while (estim.find_first_of(',') != -1) {
395                         individual = estim.substr(0,estim.find_first_of(','));
396                         if ((estim.find_first_of(',')+1) <= estim.length()) { //checks to make sure you don't have comma at end of string
397                                 estim = estim.substr(estim.find_first_of(',')+1, estim.length());
398                                 container.push_back(individual);
399                         }
400                 }
401                 //get last one
402                 container.push_back(estim);
403         }
404         catch(exception& e) {
405                 cout << "Standard Error: " << e.what() << " has occurred in the mothur class Function splitAtComma. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
406                 exit(1);
407         }
408         catch(...) {
409                 cout << "An unknown error has occurred in the mothur class function splitAtComma. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
410                 exit(1);
411         }
412 }
413 /***********************************************************************/
414
415 //This function splits up the various option parameters
416 inline void splitAtComma(string& prefix, string& suffix){
417         try {
418                 prefix = suffix.substr(0,suffix.find_first_of(','));
419                 if ((suffix.find_first_of(',')+2) <= suffix.length()) {  //checks to make sure you don't have comma at end of string
420                         suffix = suffix.substr(suffix.find_first_of(',')+1, suffix.length());
421                         string space = " ";
422                         while(suffix.at(0) == ' ')
423                                 suffix = suffix.substr(1, suffix.length());
424                 }
425
426         }
427         catch(exception& e) {
428                 cout << "Standard Error: " << e.what() << " has occurred in the mothur class Function splitAtComma. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
429                 exit(1);
430         }
431         catch(...) {
432                 cout << "An unknown error has occurred in the mothur class function splitAtComma. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
433                 exit(1);
434         }
435
436 }
437 /***********************************************************************/
438
439 //This function separates the key value from the option value i.e. dist=96_...
440 inline void splitAtEquals(string& key, string& value){          
441         try {
442                 if(value.find_first_of('=') != -1){
443                         key = value.substr(0,value.find_first_of('='));
444                         if ((value.find_first_of('=')+1) <= value.length()) {
445                                 value = value.substr(value.find_first_of('=')+1, value.length());
446                         }
447                 }else{
448                         key = value;
449                         value = 1;
450                 }
451         }
452         catch(exception& e) {
453                 cout << "Standard Error: " << e.what() << " has occurred in the mothur class Function splitAtEquals. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
454                 exit(1);
455         }
456         catch(...) {
457                 cout << "An unknown error has occurred in the mothur class function splitAtEquals. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
458                 exit(1);
459         }
460
461 }
462 /**************************************************************************************************/
463
464 inline bool inUsersGroups(string groupname, vector<string> Groups) {
465         try {
466                 for (int i = 0; i < Groups.size(); i++) {
467                         if (groupname == Groups[i]) { return true; }
468                 }
469                 return false;
470         }
471         catch(exception& e) {
472                 cout << "Standard Error: " << e.what() << " has occurred in the mothur class Function inUsersGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
473                 exit(1);
474         }
475         catch(...) {
476                 cout << "An unknown error has occurred in the mothur class function inUsersGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
477                 exit(1);
478         }
479 }
480
481 /***********************************************************************/
482 //this function determines if the user has given us labels that are smaller than the given label.
483 //if so then it returns true so that the calling function can run the previous valid distance.
484 //it's a "smart" distance function.  It also checks for invalid labels.
485 inline bool anyLabelsToProcess(string label, set<string>& userLabels, string errorOff) {
486         try {
487                 set<string>::iterator it;
488                 vector<float> orderFloat;
489                 map<string, float> userMap;  //the conversion process removes trailing 0's which we need to put back
490                 map<string, float>::iterator it2;
491                 float labelFloat;
492                 bool smaller = false;
493                 
494                 //unique is the smallest line
495                 if (label == "unique") {  return false;  }
496                 else { convert(label, labelFloat); }
497                 
498                 //go through users set and make them floats
499                 for(it = userLabels.begin(); it != userLabels.end(); ++it) {
500                         
501                         float temp;
502                         if ((*it != "unique") && (convertTestFloat(*it, temp) == true)){
503                                 convert(*it, temp);
504                                 orderFloat.push_back(temp);
505                                 userMap[*it] = temp;
506                         }else if (*it == "unique") { 
507                                 orderFloat.push_back(-1.0);
508                                 userMap["unique"] = -1.0;
509                         }else {
510                                 if (errorOff == "") {  cout << *it << " is not a valid label." << endl;  }
511                                 userLabels.erase(*it); 
512                                 it--;
513                         }
514                 }
515                 
516                 //sort order
517                 sort(orderFloat.begin(), orderFloat.end());
518                 
519                 /*************************************************/
520                 //is this label bigger than any of the users labels
521                 /*************************************************/
522                                 
523                 //loop through order until you find a label greater than label
524                 for (int i = 0; i < orderFloat.size(); i++) {
525                         if (orderFloat[i] < labelFloat) {
526                                 smaller = true;
527                                 if (orderFloat[i] == -1) { 
528                                         if (errorOff == "") { cout << "Your file does not include the label unique." <<  endl; }
529                                         userLabels.erase("unique");
530                                 }
531                                 else {  
532                                         if (errorOff == "") { cout << "Your file does not include the label "; }
533                                         string s = "";
534                                         for (it2 = userMap.begin(); it2!= userMap.end(); it2++) {  
535                                                 if (it2->second == orderFloat[i]) {  
536                                                         s = it2->first;  
537                                                         //remove small labels
538                                                         userLabels.erase(s);
539                                                         break;
540                                                 }
541                                         }
542                                         if (errorOff == "") { cout << s << ". I will use the next smallest distance. "  <<  endl; }
543                                 }
544                         //since they are sorted once you find a bigger one stop looking
545                         }else { break; }
546                 }
547                 
548                 return smaller;
549                                                 
550         }
551         catch(exception& e) {
552                 cout << "Standard Error: " << e.what() << " has occurred in the mothur class Function anyLabelsToProcess. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
553                 exit(1);
554         }
555         catch(...) {
556                 cout << "An unknown error has occurred in the mothur class function anyLabelsToProcess. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
557                 exit(1);
558         }
559
560 }
561
562 /**************************************************************************************************/
563 #endif
564