]> git.donarmstrong.com Git - mothur.git/blob - mothur.h
filter.seqs bug change
[mothur.git] / mothur.h
1 #ifndef MOTHUR_H
2 #define MOTHUR_H
3
4
5
6 /*
7  *  mothur.h
8  *  Mothur
9  *
10  *  Created by Sarah Westcott on 2/19/09.
11  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
12  *
13  */
14
15 /* This file contains all the standard incudes we use in the project as well as some common utilities. */
16
17 //#include <cstddef>
18
19 //io libraries
20 #include <iostream>
21 #include <iomanip>
22 #include <fstream>
23 #include <sstream>
24 #include <signal.h>
25
26
27 //exception
28 #include <stdexcept>
29 #include <exception>
30 #include <cstdlib> 
31
32
33 //containers
34 #include <vector>
35 #include <set>
36 #include <map>
37 #include <string>
38 #include <list>
39
40 //math
41 #include <cmath>
42 #include <math.h>
43 #include <algorithm>
44
45 //misc
46 #include <cerrno>
47 #include <ctime>
48 #include <limits>
49
50 #ifdef USE_MPI
51         #include "mpi.h"
52 #endif
53 /***********************************************************************/
54
55 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
56         #include <sys/wait.h>
57         #include <sys/time.h>
58         #include <sys/resource.h>
59         #include <unistd.h>
60         
61         #ifdef USE_READLINE
62                 #include <readline/readline.h>
63                 #include <readline/history.h>
64         #endif
65
66 #else
67         #include <conio.h> //allows unbuffered screen capture from stdin
68         #include <direct.h> //get cwd
69         #include <windows.h>
70         #include <psapi.h>
71
72 #endif
73
74 using namespace std;
75
76 #define exp(x) (exp((double) x))
77 #define sqrt(x) (sqrt((double) x))
78 #define log10(x) (log10((double) x))
79 #define log2(x) (log10(x)/log10(2))
80 #define isnan(x) ((x) != (x))
81 #define isinf(x) (fabs(x) == std::numeric_limits<double>::infinity())
82
83 typedef unsigned long ull;
84
85 struct IntNode {
86         int lvalue;
87         int rvalue;
88         int lcoef;
89         int rcoef;
90         IntNode* left;
91         IntNode* right;
92         
93         IntNode(int lv, int rv, IntNode* l, IntNode* r) : lvalue(lv), rvalue(rv), left(l), right(r) {};
94         IntNode() {};
95 };
96
97 struct ThreadNode {
98         int* pid;
99         IntNode* left;
100         IntNode* right;
101 };
102
103 /************************************************************/
104 struct clusterNode {
105         int numSeq;
106         int parent;
107         int smallChild; //used to make linkTable work with list and rabund. represents bin number of this cluster node
108         clusterNode(int num, int par, int kid) : numSeq(num), parent(par), smallChild(kid) {};
109 };
110 /************************************************************/
111 struct seqDist {
112         int seq1;
113         int seq2;
114         float dist;
115         seqDist() {}
116         seqDist(int s1, int s2, float d) : seq1(s1), seq2(s2), dist(d) {}
117         ~seqDist() {}
118 };
119 //********************************************************************************************************************
120 //sorts lowest to highest
121 inline bool compareSequenceDistance(seqDist left, seqDist right){
122         return (left.dist < right.dist);        
123
124 /***********************************************************************/
125
126 // snagged from http://www.parashift.com/c++-faq-lite/misc-technical-issues.html#faq-39.2
127 // works for now, but there should be a way to do it without killing the whole program
128
129 class BadConversion : public runtime_error {
130 public:
131         BadConversion(const string& s) : runtime_error(s){ }
132 };
133
134 //**********************************************************************************************************************
135
136 template<typename T>
137 inline void convert(const string& s, T& x, bool failIfLeftoverChars = true){
138         istringstream i(s);
139         char c;
140         if (!(i >> x) || (failIfLeftoverChars && i.get(c)))
141                 throw BadConversion(s);
142 }
143
144 //**********************************************************************************************************************
145
146 template<typename T>
147 inline bool convertTestFloat(const string& s, T& x, bool failIfLeftoverChars = true){
148         istringstream i(s);
149         char c;
150         if (!(i >> x) || (failIfLeftoverChars && i.get(c)))
151         {
152                 return false;
153         } 
154         return true;
155 }
156
157 //**********************************************************************************************************************
158
159 template<typename T>
160 inline bool convertTest(const string& s, T& x, bool failIfLeftoverChars = true){
161         istringstream i(s);
162         char c;
163         if (!(i >> x) || (failIfLeftoverChars && i.get(c)))
164         {
165                 cout << "unable to be converted into an integer.\n" << endl;
166                 return false;
167         } 
168         return true;
169 }
170
171 //**********************************************************************************************************************
172
173 template<typename T>
174 string toString(const T&x){
175     stringstream output;
176     output << x;
177     return output.str();
178 }
179
180 //**********************************************************************************************************************
181
182 template<typename T>
183 string toHex(const T&x){
184         stringstream output;
185         
186         output << hex << x;
187
188     return output.str();
189 }
190 //**********************************************************************************************************************
191
192 template<typename T>
193 string toString(const T&x, int i){
194         stringstream output;
195         
196         output.precision(i);
197     output << fixed << x;
198         
199     return output.str();
200 }
201 /***********************************************************************/
202 inline int openOutputFileAppend(string fileName, ofstream& fileHandle){
203         
204         fileHandle.open(fileName.c_str(), ios::app);
205         if(!fileHandle) {
206                 cout << "Error: Could not open " << fileName << endl;
207                 return 1;
208         }
209         else {
210                 return 0;
211         }
212
213 }
214 /***********************************************************************/
215
216 inline void gobble(istream& f){
217         
218         char d;
219     while(isspace(d=f.get()))           {;}
220         f.putback(d);
221         
222 }
223 /***********************************************************************/
224
225 inline void gobble(istringstream& f){
226         
227         char d;
228     while(isspace(d=f.get()))           {;}
229         f.putback(d);
230         
231 }
232
233 /***********************************************************************/
234
235 inline string getline(istringstream& fileHandle) {
236         try {
237         
238                 string line = "";
239                 
240                 while (!fileHandle.eof())       {
241                         //get next character
242                         char c = fileHandle.get(); 
243                         
244                         //are you at the end of the line
245                         if ((c == '\n') || (c == '\r') || (c == '\f')){  break; }       
246                         else {          line += c;              }
247                 }
248                 
249                 return line;
250                 
251         }
252         catch(exception& e) {
253                 cout << "Error in mothur function getline" << endl;
254                 exit(1);
255         }
256 }
257 /***********************************************************************/
258
259 inline string getline(ifstream& fileHandle) {
260         try {
261         
262                 string line = "";
263                 
264                 while (!fileHandle.eof())       {
265                         //get next character
266                         char c = fileHandle.get(); 
267                         
268                         //are you at the end of the line
269                         if ((c == '\n') || (c == '\r') || (c == '\f')){  break; }       
270                         else {          line += c;              }
271                 }
272                 
273                 return line;
274                 
275         }
276         catch(exception& e) {
277                 cout << "Error in mothur function getline" << endl;
278                 exit(1);
279         }
280 }
281 /***********************************************************************/
282
283 inline bool isTrue(string f){
284         
285         if ((f == "TRUE") || (f == "T") || (f == "true") || (f == "t")) {       return true;    }
286         else {  return false;  }
287 }
288
289 /***********************************************************************/
290
291 inline float roundDist(float dist, int precision){
292         
293         return int(dist * precision + 0.5)/float(precision);
294         
295 }
296 /***********************************************************************/
297
298 inline float ceilDist(float dist, int precision){
299         
300         return int(ceil(dist * precision))/float(precision);
301         
302 }
303
304 /***********************************************************************/
305
306 inline int getNumNames(string names){
307         
308         int count = 0;
309         
310         if(names != ""){
311                 count = 1;
312                 for(int i=0;i<names.size();i++){
313                         if(names[i] == ','){
314                                 count++;
315                         }
316                 }
317         }
318         
319         return count;
320         
321 }
322
323 /**************************************************************************************************/
324
325 inline vector<vector<double> > binomial(int maxOrder){
326         
327         vector<vector<double> > binomial(maxOrder+1);
328         
329     for(int i=0;i<=maxOrder;i++){
330                 binomial[i].resize(maxOrder+1);
331                 binomial[i][0]=1;
332                 binomial[0][i]=0;
333     }
334     binomial[0][0]=1;
335         
336     binomial[1][0]=1;
337     binomial[1][1]=1;
338         
339     for(int i=2;i<=maxOrder;i++){
340                 binomial[1][i]=0;
341     }
342         
343     for(int i=2;i<=maxOrder;i++){
344                 for(int j=1;j<=maxOrder;j++){
345                         if(i==j){       binomial[i][j]=1;                                                                       }
346                         if(j>i) {       binomial[i][j]=0;                                                                       }
347                         else    {       binomial[i][j]=binomial[i-1][j-1]+binomial[i-1][j];     }
348                 }
349     }
350         
351         return binomial;
352 }
353
354 /***********************************************************************/
355
356 inline string getRootName(string longName){
357  
358         string rootName = longName;
359         
360         if(longName.find_last_of(".") != longName.npos){
361                 int pos = longName.find_last_of('.')+1;
362                 rootName = longName.substr(0, pos);
363         }
364
365         return rootName;
366 }
367 /***********************************************************************/
368
369 inline string getSimpleName(string longName){
370  
371         string simpleName = longName;
372         
373         size_t found;
374         found=longName.find_last_of("/\\");
375
376         if(found != longName.npos){
377                 simpleName = longName.substr(found+1);
378         }
379         
380                 //if(longName.find_last_of("/") != longName.npos){
381                 //      int pos = longName.find_last_of('/')+1;
382                 //      simpleName = longName.substr(pos, longName.length());
383                 //}
384         
385         return simpleName;
386 }
387
388 /***********************************************************************/
389
390 inline int factorial(int num){
391         int total = 1;
392         
393         for (int i = 1; i <= num; i++) {
394                 total *= i;
395         }
396         
397         return total;
398 }
399 /**************************************************************************************************
400
401 double min(double x, double y)
402 {
403     if(x<y){    return x;    }
404     else   {    return y;    }
405 }
406
407 /***********************************************************************/
408
409 inline string getPathName(string longName){
410  
411         string rootPathName = longName;
412         
413         if(longName.find_last_of("/\\") != longName.npos){
414                 int pos = longName.find_last_of("/\\")+1;
415                 rootPathName = longName.substr(0, pos);
416         }
417         
418         return rootPathName;
419 }
420 /***********************************************************************/
421
422 inline string hasPath(string longName){
423         
424         string path = "";
425         
426         size_t found;
427         found=longName.find_last_of("~/\\");
428
429         if(found != longName.npos){
430                 path = longName.substr(0, found+1);
431         }
432         
433         return path;
434 }
435
436 /***********************************************************************/
437
438 inline string getExtension(string longName){
439         
440         string extension = longName;
441         
442         if(longName.find_last_of('.') != longName.npos){
443                 int pos = longName.find_last_of('.');
444                 extension = longName.substr(pos, longName.length());
445         }
446         
447         return extension;
448 }
449 /***********************************************************************/
450 inline bool isBlank(string fileName){
451         
452         ifstream fileHandle;
453         fileHandle.open(fileName.c_str());
454         if(!fileHandle) {
455                 cout << "Error: Could not open " << fileName << endl;
456                 return false;
457         }else {
458                 //check for blank file
459                 gobble(fileHandle);
460                 if (fileHandle.eof()) { fileHandle.close(); return true;  }
461         }
462         return false;
463 }
464 /***********************************************************************/
465
466 inline string getFullPathName(string fileName){
467         try{
468         
469         string path = hasPath(fileName);
470         string newFileName;
471         int pos;
472         
473         if (path == "") { return fileName; } //its a simple name
474         else { //we need to complete the pathname
475                 // ex. ../../../filename 
476                 // cwd = /user/work/desktop
477                                 
478                 string cwd;
479                 //get current working directory 
480                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)   
481                         
482                         if (path.find("~") != -1) { //go to home directory
483                                 string homeDir = getenv ("HOME");
484                                 newFileName = homeDir + fileName.substr(fileName.find("~")+1);
485                                 return newFileName;
486                         }else { //find path
487                                 if (path.rfind("./") == -1) { return fileName; } //already complete name
488                                 else { newFileName = fileName.substr(fileName.rfind("./")+2); } //save the complete part of the name
489                                 
490                                 char* cwdpath = new char[1024];
491
492                                 size_t size;
493                                 cwdpath=getcwd(cwdpath,size);
494                         
495                                 cwd = cwdpath;
496                                 
497                                 //rip off first '/'
498                                 string simpleCWD;
499                                 if (cwd.length() > 0) { simpleCWD = cwd.substr(1); }
500                                 
501                                 //break apart the current working directory
502                                 vector<string> dirs;
503                                 while (simpleCWD.find_first_of('/') != -1) {
504                                         string dir = simpleCWD.substr(0,simpleCWD.find_first_of('/'));
505                                         simpleCWD = simpleCWD.substr(simpleCWD.find_first_of('/')+1, simpleCWD.length());
506                                         dirs.push_back(dir);
507                                 }
508                                 //get last one              // ex. ../../../filename = /user/work/desktop/filename
509                                 dirs.push_back(simpleCWD);  //ex. dirs[0] = user, dirs[1] = work, dirs[2] = desktop
510                                 
511                         
512                                 int index = dirs.size()-1;
513                 
514                                 while((pos = path.rfind("./")) != -1) { //while you don't have a complete path
515                                         if (pos == 0) { break;  //you are at the end
516                                         }else if (path[(pos-1)] == '.') { //you want your parent directory ../
517                                                 path = path.substr(0, pos-1);
518                                                 index--;
519                                                 if (index == 0) {  break; }
520                                         }else if (path[(pos-1)] == '/') { //you want the current working dir ./
521                                                 path = path.substr(0, pos);
522                                         }else if (pos == 1) { break;  //you are at the end
523                                         }else { cout << "cannot resolve path for " <<  fileName << endl; return fileName; }
524                                 }
525                         
526                                 for (int i = index; i >= 0; i--) {
527                                         newFileName = dirs[i] +  "/" + newFileName;             
528                                 }
529                                 
530                                 newFileName =  "/" +  newFileName;
531                                 return newFileName;
532                         }       
533                 #else
534                         if (path.find("~") != -1) { //go to home directory
535                                 string homeDir = getenv ("HOMEPATH");
536                                 newFileName = homeDir + fileName.substr(fileName.find("~")+1);
537                                 return newFileName;
538                         }else { //find path
539                                 if (path.rfind(".\\") == -1) { return fileName; } //already complete name
540                                 else { newFileName = fileName.substr(fileName.rfind(".\\")+2); } //save the complete part of the name
541                                                         
542                                 char *cwdpath = NULL;
543                                 cwdpath = getcwd(NULL, 0); // or _getcwd
544                                 if ( cwdpath != NULL) { cwd = cwdpath; }
545                                 else { cwd = "";  }
546                                 
547                                 //break apart the current working directory
548                                 vector<string> dirs;
549                                 while (cwd.find_first_of('\\') != -1) {
550                                         string dir = cwd.substr(0,cwd.find_first_of('\\'));
551                                         cwd = cwd.substr(cwd.find_first_of('\\')+1, cwd.length());
552                                         dirs.push_back(dir);
553                 
554                                 }
555                                 //get last one
556                                 dirs.push_back(cwd);  //ex. dirs[0] = user, dirs[1] = work, dirs[2] = desktop
557                                         
558                                 int index = dirs.size()-1;
559                                         
560                                 while((pos = path.rfind(".\\")) != -1) { //while you don't have a complete path
561                                         if (pos == 0) { break;  //you are at the end
562                                         }else if (path[(pos-1)] == '.') { //you want your parent directory ../
563                                                 path = path.substr(0, pos-1);
564                                                 index--;
565                                                 if (index == 0) {  break; }
566                                         }else if (path[(pos-1)] == '\\') { //you want the current working dir ./
567                                                 path = path.substr(0, pos);
568                                         }else if (pos == 1) { break;  //you are at the end
569                                         }else { cout << "cannot resolve path for " <<  fileName << endl; return fileName; }
570                                 }
571                         
572                                 for (int i = index; i >= 0; i--) {
573                                         newFileName = dirs[i] +  "\\" + newFileName;            
574                                 }
575                                 
576                                 return newFileName;
577                         }
578                         
579                 #endif
580         }
581         }
582         catch(exception& e) {
583                 cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function getFullPathName. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n";
584                 exit(1);
585         }       
586 }
587 /***********************************************************************/
588
589 inline int openInputFile(string fileName, ifstream& fileHandle, string m){
590         
591         //get full path name
592         string completeFileName = getFullPathName(fileName);
593
594         fileHandle.open(completeFileName.c_str());
595         if(!fileHandle) {
596                 return 1;
597         }else {
598                 //check for blank file
599                 gobble(fileHandle);
600                 return 0;
601         }       
602 }
603 /***********************************************************************/
604
605 inline int openInputFile(string fileName, ifstream& fileHandle){
606         
607         //get full path name
608         string completeFileName = getFullPathName(fileName);
609
610         fileHandle.open(completeFileName.c_str());
611         if(!fileHandle) {
612                 cout << "Error: Could not open " << completeFileName << endl;
613                 return 1;
614         }
615         else {
616                 //check for blank file
617                 gobble(fileHandle);
618                 if (fileHandle.eof()) { cout << completeFileName << " is blank. Please correct." << endl;   }
619                 
620                 return 0;
621         }
622         
623 }
624 /***********************************************************************/
625
626 inline int renameFile(string oldName, string newName){
627         
628         ifstream inTest;
629         int exist = openInputFile(newName, inTest, "");
630         
631 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)           
632         if (exist == 0) { //you could open it so you want to delete it
633                 inTest.close();
634                 string command = "rm " + newName;
635                 system(command.c_str());
636         }
637                         
638         string command = "mv " + oldName + " " + newName;
639         system(command.c_str());
640 #else
641         remove(newName.c_str());
642         int renameOk = rename(oldName.c_str(), newName.c_str());
643 #endif
644         return 0;
645 }
646
647 /***********************************************************************/
648
649 inline int openOutputFile(string fileName, ofstream& fileHandle){
650         
651         string completeFileName = getFullPathName(fileName);
652         
653         fileHandle.open(completeFileName.c_str(), ios::trunc);
654         if(!fileHandle) {
655                 cout << "Error: Could not open " << completeFileName << endl;
656                 return 1;
657         }
658         else {
659                 return 0;
660         }
661
662 }
663
664 /***********************************************************************/
665
666 inline int getNumSeqs(ifstream& file){
667         
668         int numSeqs = count(istreambuf_iterator<char>(file),istreambuf_iterator<char>(), '>');
669         file.seekg(0);
670         return numSeqs;
671
672 }
673 /***********************************************************************/
674 inline void getNumSeqs(ifstream& file, int& numSeqs){
675         
676         string input;
677         numSeqs = 0;
678         while(!file.eof()){
679                 input = getline(file);
680                 if (input.length() != 0) {
681                         if(input[0] == '>'){ numSeqs++; }
682                 }
683         }
684 }
685
686 /***********************************************************************/
687
688 inline bool inVector(string member, vector<string> group){
689         
690         for (int i = 0; i < group.size(); i++) {
691                 if (group[i] == member) {  return true;         }
692         }
693         
694         return false;
695 }
696 /***********************************************************************/
697
698 //This function parses the estimator options and puts them in a vector
699 inline void splitAtChar(string& estim, vector<string>& container, char symbol) {
700         try {
701                 string individual;
702                 
703                 while (estim.find_first_of(symbol) != -1) {
704                         individual = estim.substr(0,estim.find_first_of(symbol));
705                         if ((estim.find_first_of(symbol)+1) <= estim.length()) { //checks to make sure you don't have dash at end of string
706                                 estim = estim.substr(estim.find_first_of(symbol)+1, estim.length());
707                                 container.push_back(individual);
708                         }
709                 }
710                 //get last one
711                 container.push_back(estim);
712         }
713         catch(exception& e) {
714                 cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function splitAtDash. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n";
715                 exit(1);
716         }       
717 }
718
719 /***********************************************************************/
720
721 //This function parses the estimator options and puts them in a vector
722 inline void splitAtDash(string& estim, vector<string>& container) {
723         try {
724                 string individual;
725                 
726                 while (estim.find_first_of('-') != -1) {
727                         individual = estim.substr(0,estim.find_first_of('-'));
728                         if ((estim.find_first_of('-')+1) <= estim.length()) { //checks to make sure you don't have dash at end of string
729                                 estim = estim.substr(estim.find_first_of('-')+1, estim.length());
730                                 container.push_back(individual);
731                         }
732                 }
733                 //get last one
734                 container.push_back(estim);
735         }
736         catch(exception& e) {
737                 cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function splitAtDash. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n";
738                 exit(1);
739         }       
740 }
741
742 /***********************************************************************/
743 //This function parses the label options and puts them in a set
744 inline void splitAtDash(string& estim, set<string>& container) {
745         try {
746                 string individual;
747                 
748                 while (estim.find_first_of('-') != -1) {
749                         individual = estim.substr(0,estim.find_first_of('-'));
750                         if ((estim.find_first_of('-')+1) <= estim.length()) { //checks to make sure you don't have dash at end of string
751                                 estim = estim.substr(estim.find_first_of('-')+1, estim.length());
752                                 container.insert(individual);
753                         }
754                 }
755                 //get last one
756                 container.insert(estim);
757         }
758         catch(exception& e) {
759                 cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function splitAtDash. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n";
760                 exit(1);
761         }       
762 }
763 /***********************************************************************/
764 //This function parses the line options and puts them in a set
765 inline void splitAtDash(string& estim, set<int>& container) {
766         try {
767                 string individual;
768                 int lineNum;
769                 
770                 while (estim.find_first_of('-') != -1) {
771                         individual = estim.substr(0,estim.find_first_of('-'));
772                         if ((estim.find_first_of('-')+1) <= estim.length()) { //checks to make sure you don't have dash at end of string
773                                 estim = estim.substr(estim.find_first_of('-')+1, estim.length());
774                                 convert(individual, lineNum); //convert the string to int
775                                 container.insert(lineNum);
776                         }
777                 }
778                 //get last one
779                 convert(estim, lineNum); //convert the string to int
780                 container.insert(lineNum);
781         }
782         catch(exception& e) {
783                 cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function splitAtDash. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n";
784                 exit(1);
785         }       
786 }
787 /***********************************************************************/
788 //This function parses the a string and puts peices in a vector
789 inline void splitAtComma(string& estim, vector<string>& container) {
790         try {
791                 string individual;
792                 
793                 while (estim.find_first_of(',') != -1) {
794                         individual = estim.substr(0,estim.find_first_of(','));
795                         if ((estim.find_first_of(',')+1) <= estim.length()) { //checks to make sure you don't have comma at end of string
796                                 estim = estim.substr(estim.find_first_of(',')+1, estim.length());
797                                 container.push_back(individual);
798                         }
799                 }
800                 //get last one
801                 container.push_back(estim);
802         }
803         catch(exception& e) {
804                 cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function splitAtComma. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n";
805                 exit(1);
806         }       
807 }
808 /***********************************************************************/
809
810 //This function splits up the various option parameters
811 inline void splitAtComma(string& prefix, string& suffix){
812         try {
813                 prefix = suffix.substr(0,suffix.find_first_of(','));
814                 if ((suffix.find_first_of(',')+2) <= suffix.length()) {  //checks to make sure you don't have comma at end of string
815                         suffix = suffix.substr(suffix.find_first_of(',')+1, suffix.length());
816                         string space = " ";
817                         while(suffix.at(0) == ' ')
818                                 suffix = suffix.substr(1, suffix.length());
819                 }
820
821         }
822         catch(exception& e) {
823                 cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function splitAtComma. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n";
824                 exit(1);
825         }       
826 }
827 /***********************************************************************/
828
829 //This function separates the key value from the option value i.e. dist=96_...
830 inline void splitAtEquals(string& key, string& value){          
831         try {
832                 if(value.find_first_of('=') != -1){
833                         key = value.substr(0,value.find_first_of('='));
834                         if ((value.find_first_of('=')+1) <= value.length()) {
835                                 value = value.substr(value.find_first_of('=')+1, value.length());
836                         }
837                 }else{
838                         key = value;
839                         value = 1;
840                 }
841         }
842         catch(exception& e) {
843                 cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function splitAtEquals. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n";
844                 exit(1);
845         }       
846 }
847
848 /**************************************************************************************************/
849
850 inline bool inUsersGroups(string groupname, vector<string> Groups) {
851         try {
852                 for (int i = 0; i < Groups.size(); i++) {
853                         if (groupname == Groups[i]) { return true; }
854                 }
855                 return false;
856         }
857         catch(exception& e) {
858                 cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function inUsersGroups. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n";
859                 exit(1);
860         }       
861 }
862 /**************************************************************************************************/
863 //returns true if any of the strings in first vector are in second vector
864 inline bool inUsersGroups(vector<string> groupnames, vector<string> Groups) {
865         try {
866                 
867                 for (int i = 0; i < groupnames.size(); i++) {
868                         if (inUsersGroups(groupnames[i], Groups)) { return true; }
869                 }
870                 return false;
871         }
872         catch(exception& e) {
873                 cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function inUsersGroups. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n";
874                 exit(1);
875         }       
876 }
877 /***********************************************************************/
878 //this function determines if the user has given us labels that are smaller than the given label.
879 //if so then it returns true so that the calling function can run the previous valid distance.
880 //it's a "smart" distance function.  It also checks for invalid labels.
881 inline bool anyLabelsToProcess(string label, set<string>& userLabels, string errorOff) {
882         try {
883                 
884                 set<string>::iterator it;
885                 vector<float> orderFloat;
886                 map<string, float> userMap;  //the conversion process removes trailing 0's which we need to put back
887                 map<string, float>::iterator it2;
888                 float labelFloat;
889                 bool smaller = false;
890                 
891                 //unique is the smallest line
892                 if (label == "unique") {  return false;  }
893                 else { 
894                         if (convertTestFloat(label, labelFloat)) {
895                                 convert(label, labelFloat); 
896                         }else { //cant convert 
897                                 return false;
898                         }
899                 }
900                 
901                 //go through users set and make them floats
902                 for(it = userLabels.begin(); it != userLabels.end(); ++it) {
903                         
904                         float temp;
905                         if ((*it != "unique") && (convertTestFloat(*it, temp) == true)){
906                                 convert(*it, temp);
907                                 orderFloat.push_back(temp);
908                                 userMap[*it] = temp;
909                         }else if (*it == "unique") { 
910                                 orderFloat.push_back(-1.0);
911                                 userMap["unique"] = -1.0;
912                         }else {
913                                 if (errorOff == "") {  cout << *it << " is not a valid label." << endl;  }
914                                 userLabels.erase(*it); 
915                                 it--;
916                         }
917                 }
918                 
919                 //sort order
920                 sort(orderFloat.begin(), orderFloat.end());
921                 
922                 /*************************************************/
923                 //is this label bigger than any of the users labels
924                 /*************************************************/
925                                 
926                 //loop through order until you find a label greater than label
927                 for (int i = 0; i < orderFloat.size(); i++) {
928                         if (orderFloat[i] < labelFloat) {
929                                 smaller = true;
930                                 if (orderFloat[i] == -1) { 
931                                         if (errorOff == "") { cout << "Your file does not include the label unique." << endl; }
932                                         userLabels.erase("unique");
933                                 }
934                                 else {  
935                                         if (errorOff == "") { cout << "Your file does not include the label " << endl; }
936                                         string s = "";
937                                         for (it2 = userMap.begin(); it2!= userMap.end(); it2++) {  
938                                                 if (it2->second == orderFloat[i]) {  
939                                                         s = it2->first;  
940                                                         //remove small labels
941                                                         userLabels.erase(s);
942                                                         break;
943                                                 }
944                                         }
945                                         if (errorOff == "") {cout << s <<  ". I will use the next smallest distance. " << endl; }
946                                 }
947                         //since they are sorted once you find a bigger one stop looking
948                         }else { break; }
949                 }
950                 
951                 return smaller;
952                                                 
953         }
954         catch(exception& e) {
955                 cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function anyLabelsToProcess. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n";
956                 exit(1);
957         }       
958 }
959
960 /**************************************************************************************************/
961 inline void appendFiles(string temp, string filename) {
962         try{
963                 ofstream output;
964                 ifstream input;
965         
966                 //open output file in append mode
967                 openOutputFileAppend(filename, output);
968                 int ableToOpen = openInputFile(temp, input, "no error");
969                 
970                 if (ableToOpen == 0) { //you opened it
971                         while(char c = input.get()){
972                                 if(input.eof())         {       break;                  }
973                                 else                            {       output << c;    }
974                         }
975                         input.close();
976                 }
977                 
978                 output.close();
979         }
980         catch(exception& e) {
981                 cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function appendFiles. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n";
982                 exit(1);
983         }       
984 }
985
986 /**************************************************************************************************/
987 inline string sortFile(string distFile, string outputDir){
988         try {   
989         
990                 //if (outputDir == "") {  outputDir += hasPath(distFile);  }
991                 string outfile = getRootName(distFile) + "sorted.dist";
992
993                 
994                 //if you can, use the unix sort since its been optimized for years
995                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
996                         string command = "sort -n -k +3 " + distFile + " -o " + outfile;
997                         system(command.c_str());
998                 #else //you are stuck with my best attempt...
999                         //windows sort does not have a way to specify a column, only a character in the line
1000                         //since we cannot assume that the distance will always be at the the same character location on each line
1001                         //due to variable sequence name lengths, I chose to force the distance into first position, then sort and then put it back.
1002                 
1003                         //read in file line by file and put distance first
1004                         string tempDistFile = distFile + ".temp";
1005                         ifstream input;
1006                         ofstream output;
1007                         openInputFile(distFile, input);
1008                         openOutputFile(tempDistFile, output);
1009
1010                         string firstName, secondName;
1011                         float dist;
1012                         while (input) {
1013                                 input >> firstName >> secondName >> dist;
1014                                 output << dist << '\t' << firstName << '\t' << secondName << endl;
1015                                 gobble(input);
1016                         }
1017                         input.close();
1018                         output.close();
1019                 
1020         
1021                         //sort using windows sort
1022                         string tempOutfile = outfile + ".temp";
1023                         string command = "sort " + tempDistFile + " /O " + tempOutfile;
1024                         system(command.c_str());
1025                 
1026                         //read in sorted file and put distance at end again
1027                         ifstream input2;
1028                         openInputFile(tempOutfile, input2);
1029                         openOutputFile(outfile, output);
1030                 
1031                         while (input2) {
1032                                 input2 >> dist >> firstName >> secondName;
1033                                 output << firstName << '\t' << secondName << '\t' << dist << endl;
1034                                 gobble(input2);
1035                         }
1036                         input2.close();
1037                         output.close();
1038                 
1039                         //remove temp files
1040                         remove(tempDistFile.c_str());
1041                         remove(tempOutfile.c_str());
1042                 #endif
1043                 
1044                 return outfile;
1045         }
1046         catch(exception& e) {
1047                 cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function sortfile. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n";
1048                 exit(1);
1049         }       
1050 }
1051 /**************************************************************************************************/
1052 inline vector<unsigned long int> setFilePosFasta(string filename, int& num) {
1053
1054                         vector<unsigned long int> positions;
1055                         ifstream inFASTA;
1056                         openInputFile(filename, inFASTA);
1057                                                 
1058                         string input;
1059                         while(!inFASTA.eof()){
1060                                 input = getline(inFASTA); 
1061                                 if (input.length() != 0) {
1062                                         if(input[0] == '>'){    unsigned long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); }
1063                                 }
1064                                 gobble(inFASTA); //has to be here since windows line endings are 2 characters and mess up the positions
1065                         }
1066                         inFASTA.close();
1067                 
1068                         num = positions.size();
1069                 
1070                         /*FILE * pFile;
1071                         long size;
1072                 
1073                         //get num bytes in file
1074                         pFile = fopen (filename.c_str(),"rb");
1075                         if (pFile==NULL) perror ("Error opening file");
1076                         else{
1077                                 fseek (pFile, 0, SEEK_END);
1078                                 size=ftell (pFile);
1079                                 fclose (pFile);
1080                         }*/
1081                         
1082                         unsigned long int size = positions[(positions.size()-1)];
1083                         ifstream in;
1084                         openInputFile(filename, in);
1085                         
1086                         in.seekg(size);
1087                 
1088                         while(char c = in.get()){
1089                                 if(in.eof())            {       break;  }
1090                                 else                            {       size++; }
1091                         }
1092                         in.close();
1093                 
1094                         positions.push_back(size);
1095                 
1096                         return positions;
1097 }
1098 /**************************************************************************************************/
1099 inline vector<unsigned long int> setFilePosEachLine(string filename, int& num) {
1100
1101                         vector<unsigned long int> positions;
1102                         ifstream in;
1103                         openInputFile(filename, in);
1104                                 
1105                         string input;
1106                         while(!in.eof()){
1107                                 unsigned long int lastpos = in.tellg();
1108                                 input = getline(in); 
1109                                 if (input.length() != 0) {
1110                                         unsigned long int pos = in.tellg(); 
1111                                         if (pos != -1) { positions.push_back(pos - input.length() - 1); }
1112                                         else {  positions.push_back(lastpos);  }
1113                                 }
1114                                 gobble(in); //has to be here since windows line endings are 2 characters and mess up the positions
1115                         }
1116                         in.close();
1117                 
1118                         num = positions.size();
1119                 
1120                         FILE * pFile;
1121                         unsigned long int size;
1122                 
1123                         //get num bytes in file
1124                         pFile = fopen (filename.c_str(),"rb");
1125                         if (pFile==NULL) perror ("Error opening file");
1126                         else{
1127                                 fseek (pFile, 0, SEEK_END);
1128                                 size=ftell (pFile);
1129                                 fclose (pFile);
1130                         }
1131                 
1132                         positions.push_back(size);
1133                 
1134                         return positions;
1135 }
1136 /**************************************************************************************************/
1137
1138 inline vector<unsigned long int> divideFile(string filename, int& proc) {
1139         try{
1140         
1141                 vector<unsigned long int> filePos;
1142                 filePos.push_back(0);
1143                 
1144                 FILE * pFile;
1145                 unsigned long int size;
1146                 
1147                 //get num bytes in file
1148                 pFile = fopen (filename.c_str(),"rb");
1149                 if (pFile==NULL) perror ("Error opening file");
1150                 else{
1151                         fseek (pFile, 0, SEEK_END);
1152                         size=ftell (pFile);
1153                         fclose (pFile);
1154                 }
1155         
1156                 //estimate file breaks
1157                 unsigned long int chunkSize = 0;
1158                 chunkSize = size / proc;
1159                 
1160                 //file to small to divide by processors
1161                 if (chunkSize == 0)  {  proc = 1;       filePos.push_back(size); return filePos;        }
1162         
1163                 //for each process seekg to closest file break and search for next '>' char. make that the filebreak
1164                 for (int i = 0; i < proc; i++) {
1165                         unsigned long int spot = (i+1) * chunkSize;
1166                                         
1167                         ifstream in;
1168                         openInputFile(filename, in);
1169                         in.seekg(spot);
1170                         
1171                         //look for next '>'
1172                         unsigned long int newSpot = spot;
1173                         while (!in.eof()) {
1174                            char c = in.get();
1175                            if (c == '>') {   in.putback(c); newSpot = in.tellg(); break;  }
1176                         }
1177                                 
1178                         //there was not another sequence before the end of the file
1179                         unsigned long int sanityPos = in.tellg();
1180                         if (sanityPos == -1) {  break;  }
1181                         else {   filePos.push_back(newSpot);  }
1182                         
1183                         in.close();
1184                 }
1185                 
1186                 //save end pos
1187                 filePos.push_back(size);
1188                 
1189                 //sanity check filePos
1190                 for (int i = 0; i < (filePos.size()-1); i++) {
1191                         if (filePos[(i+1)] <= filePos[i]) {  filePos.erase(filePos.begin()+(i+1)); i--; }
1192                 }
1193
1194                 proc = (filePos.size() - 1);
1195                 
1196                 return filePos;
1197         }
1198         catch(exception& e) {
1199                 cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function divideFile. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n";
1200                 exit(1);
1201         }
1202 }
1203 /**************************************************************************************************/
1204 inline bool checkReleaseVersion(ifstream& file, string version) {
1205         try {
1206                 
1207                 bool good = true;
1208                 
1209                 string line = getline(file);  
1210
1211                 //before we added this check
1212                 if (line[0] != '#') {  good = false;  }
1213                 else {
1214                         //rip off #
1215                         line = line.substr(1);
1216                         
1217                         vector<string> versionVector;
1218                         splitAtChar(version, versionVector, '.');
1219                         
1220                         //check file version
1221                         vector<string> linesVector;
1222                         splitAtChar(line, linesVector, '.');
1223                         
1224                         if (versionVector.size() != linesVector.size()) { good = false; }
1225                         else {
1226                                 for (int j = 0; j < versionVector.size(); j++) {
1227                                         int num1, num2;
1228                                         convert(versionVector[j], num1);
1229                                         convert(linesVector[j], num2);
1230                                         
1231                                         //if mothurs version is newer than this files version, then we want to remake it
1232                                         if (num1 > num2) {  good = false; break;  }
1233                                 }
1234                         }
1235                         
1236                 }
1237                 
1238                 if (!good) {  file.close();  }
1239                 else { file.seekg(0);  }
1240                 
1241                 return good;
1242         }
1243         catch(exception& e) {
1244                 cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function checkReleaseVersion. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n";
1245                 exit(1);
1246         }
1247 }
1248 /**************************************************************************************************/
1249 #endif
1250