]> git.donarmstrong.com Git - mothur.git/blobdiff - mothur.h
filter.seqs bug change
[mothur.git] / mothur.h
index 16a9844b70799c1334e95aabc20c6557688bdaaf..d5086b3909ece580bbb552454ced3a742b790dcd 100644 (file)
--- a/mothur.h
+++ b/mothur.h
 #include <ctime>
 #include <limits>
 
+#ifdef USE_MPI
+       #include "mpi.h"
+#endif
 /***********************************************************************/
 
 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
        #include <sys/wait.h>
+       #include <sys/time.h>
+       #include <sys/resource.h>
        #include <unistd.h>
        
        #ifdef USE_READLINE
@@ -61,6 +66,9 @@
 #else
        #include <conio.h> //allows unbuffered screen capture from stdin
        #include <direct.h> //get cwd
+       #include <windows.h>
+       #include <psapi.h>
+
 #endif
 
 using namespace std;
@@ -214,7 +222,17 @@ inline void gobble(istream& f){
 }
 /***********************************************************************/
 
-inline string getline(ifstream& fileHandle) {
+inline void gobble(istringstream& f){
+       
+       char d;
+    while(isspace(d=f.get()))          {;}
+       f.putback(d);
+       
+}
+
+/***********************************************************************/
+
+inline string getline(istringstream& fileHandle) {
        try {
        
                string line = "";
@@ -236,7 +254,30 @@ inline string getline(ifstream& fileHandle) {
                exit(1);
        }
 }
+/***********************************************************************/
 
+inline string getline(ifstream& fileHandle) {
+       try {
+       
+               string line = "";
+               
+               while (!fileHandle.eof())       {
+                       //get next character
+                       char c = fileHandle.get(); 
+                       
+                       //are you at the end of the line
+                       if ((c == '\n') || (c == '\r') || (c == '\f')){  break; }       
+                       else {          line += c;              }
+               }
+               
+               return line;
+               
+       }
+       catch(exception& e) {
+               cout << "Error in mothur function getline" << endl;
+               exit(1);
+       }
+}
 /***********************************************************************/
 
 inline bool isTrue(string f){
@@ -251,6 +292,13 @@ inline float roundDist(float dist, int precision){
        
        return int(dist * precision + 0.5)/float(precision);
        
+}
+/***********************************************************************/
+
+inline float ceilDist(float dist, int precision){
+       
+       return int(ceil(dist * precision))/float(precision);
+       
 }
 
 /***********************************************************************/
@@ -376,7 +424,7 @@ inline string hasPath(string longName){
        string path = "";
        
        size_t found;
-       found=longName.find_last_of("/\\");
+       found=longName.find_last_of("~/\\");
 
        if(found != longName.npos){
                path = longName.substr(0, found+1);
@@ -430,89 +478,103 @@ inline string getFullPathName(string fileName){
                string cwd;
                //get current working directory 
                #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)   
-                       if (path.rfind("./") == -1) { return fileName; } //already complete name
-                       else { newFileName = fileName.substr(fileName.rfind("./")+2); } //save the complete part of the name
                        
-                       char* cwdpath = new char[1024];
+                       if (path.find("~") != -1) { //go to home directory
+                               string homeDir = getenv ("HOME");
+                               newFileName = homeDir + fileName.substr(fileName.find("~")+1);
+                               return newFileName;
+                       }else { //find path
+                               if (path.rfind("./") == -1) { return fileName; } //already complete name
+                               else { newFileName = fileName.substr(fileName.rfind("./")+2); } //save the complete part of the name
+                               
+                               char* cwdpath = new char[1024];
 
-                       size_t size;
-                       cwdpath=getcwd(cwdpath,size);
-               
-                       cwd = cwdpath;
-                       
-                       //rip off first '/'
-                       string simpleCWD;
-                       if (cwd.length() > 0) { simpleCWD = cwd.substr(1); }
+                               size_t size;
+                               cwdpath=getcwd(cwdpath,size);
                        
-                       //break apart the current working directory
-                       vector<string> dirs;
-                       while (simpleCWD.find_first_of('/') != -1) {
-                               string dir = simpleCWD.substr(0,simpleCWD.find_first_of('/'));
-                               simpleCWD = simpleCWD.substr(simpleCWD.find_first_of('/')+1, simpleCWD.length());
-                               dirs.push_back(dir);
-                       }
-                       //get last one              // ex. ../../../filename = /user/work/desktop/filename
-                       dirs.push_back(simpleCWD);  //ex. dirs[0] = user, dirs[1] = work, dirs[2] = desktop
+                               cwd = cwdpath;
+                               
+                               //rip off first '/'
+                               string simpleCWD;
+                               if (cwd.length() > 0) { simpleCWD = cwd.substr(1); }
+                               
+                               //break apart the current working directory
+                               vector<string> dirs;
+                               while (simpleCWD.find_first_of('/') != -1) {
+                                       string dir = simpleCWD.substr(0,simpleCWD.find_first_of('/'));
+                                       simpleCWD = simpleCWD.substr(simpleCWD.find_first_of('/')+1, simpleCWD.length());
+                                       dirs.push_back(dir);
+                               }
+                               //get last one              // ex. ../../../filename = /user/work/desktop/filename
+                               dirs.push_back(simpleCWD);  //ex. dirs[0] = user, dirs[1] = work, dirs[2] = desktop
+                               
                        
+                               int index = dirs.size()-1;
                
-                       int index = dirs.size()-1;
-               
-                       while((pos = path.rfind("./")) != -1) { //while you don't have a complete path
-                               if (path[(pos-1)] == '.') { //you want your parent directory ../
-                                       path = path.substr(0, pos-1);
-                                       index--;
-                                       if (index == 0) {  break; }
-                               }else if (path[(pos-1)] == '/') { //you want the current working dir ./
-                                       path = path.substr(0, pos);
-                               }else if (pos == 1) { break; 
-                               }else { cout << "cannot resolve path for " <<  fileName << endl; return fileName; }
-                       }
-               
-                       for (int i = index; i >= 0; i--) {
-                               newFileName = dirs[i] +  "/" + newFileName;             
-                       }
+                               while((pos = path.rfind("./")) != -1) { //while you don't have a complete path
+                                       if (pos == 0) { break;  //you are at the end
+                                       }else if (path[(pos-1)] == '.') { //you want your parent directory ../
+                                               path = path.substr(0, pos-1);
+                                               index--;
+                                               if (index == 0) {  break; }
+                                       }else if (path[(pos-1)] == '/') { //you want the current working dir ./
+                                               path = path.substr(0, pos);
+                                       }else if (pos == 1) { break;  //you are at the end
+                                       }else { cout << "cannot resolve path for " <<  fileName << endl; return fileName; }
+                               }
                        
-                       newFileName =  "/" +  newFileName;
-                       return newFileName;
+                               for (int i = index; i >= 0; i--) {
+                                       newFileName = dirs[i] +  "/" + newFileName;             
+                               }
                                
+                               newFileName =  "/" +  newFileName;
+                               return newFileName;
+                       }       
                #else
-                       if (path.rfind(".\\") == -1) { return fileName; } //already complete name
-                       else { newFileName = fileName.substr(fileName.rfind(".\\")+2); } //save the complete part of the name
-                                               
-                       char *cwdpath = NULL;
-                       cwdpath = getcwd(NULL, 0); // or _getcwd
-                       if ( cwdpath != NULL) { cwd = cwdpath; }
-                       else { cwd = "";  }
-                       
-                       //break apart the current working directory
-                       vector<string> dirs;
-                       while (cwd.find_first_of('\\') != -1) {
-                               string dir = cwd.substr(0,cwd.find_first_of('\\'));
-                               cwd = cwd.substr(cwd.find_first_of('\\')+1, cwd.length());
-                               dirs.push_back(dir);
-       
-                       }
-                       //get last one
-                       dirs.push_back(cwd);  //ex. dirs[0] = user, dirs[1] = work, dirs[2] = desktop
+                       if (path.find("~") != -1) { //go to home directory
+                               string homeDir = getenv ("HOMEPATH");
+                               newFileName = homeDir + fileName.substr(fileName.find("~")+1);
+                               return newFileName;
+                       }else { //find path
+                               if (path.rfind(".\\") == -1) { return fileName; } //already complete name
+                               else { newFileName = fileName.substr(fileName.rfind(".\\")+2); } //save the complete part of the name
+                                                       
+                               char *cwdpath = NULL;
+                               cwdpath = getcwd(NULL, 0); // or _getcwd
+                               if ( cwdpath != NULL) { cwd = cwdpath; }
+                               else { cwd = "";  }
                                
-                       int index = dirs.size()-1;
-                               
-                       while((pos = path.rfind(".\\")) != -1) { //while you don't have a complete path
-                               if (path[(pos-1)] == '.') { //you want your parent directory ../
-                                       path = path.substr(0, pos-1);
-                                       index--;
-                                       if (index == 0) {  break; }
-                               }else if (path[(pos-1)] == '\\') { //you want the current working dir ./
-                                       path = path.substr(0, pos);
-                               }else if (pos == 1) { break; 
-                               }else {  cout << "cannot resolve path for " <<  fileName << endl;  return fileName; }
-                       }
+                               //break apart the current working directory
+                               vector<string> dirs;
+                               while (cwd.find_first_of('\\') != -1) {
+                                       string dir = cwd.substr(0,cwd.find_first_of('\\'));
+                                       cwd = cwd.substr(cwd.find_first_of('\\')+1, cwd.length());
+                                       dirs.push_back(dir);
                
-                       for (int i = index; i >= 0; i--) {
-                               newFileName = dirs[i] +  "\\" + newFileName;            
-                       }
+                               }
+                               //get last one
+                               dirs.push_back(cwd);  //ex. dirs[0] = user, dirs[1] = work, dirs[2] = desktop
+                                       
+                               int index = dirs.size()-1;
+                                       
+                               while((pos = path.rfind(".\\")) != -1) { //while you don't have a complete path
+                                       if (pos == 0) { break;  //you are at the end
+                                       }else if (path[(pos-1)] == '.') { //you want your parent directory ../
+                                               path = path.substr(0, pos-1);
+                                               index--;
+                                               if (index == 0) {  break; }
+                                       }else if (path[(pos-1)] == '\\') { //you want the current working dir ./
+                                               path = path.substr(0, pos);
+                                       }else if (pos == 1) { break;  //you are at the end
+                                       }else { cout << "cannot resolve path for " <<  fileName << endl; return fileName; }
+                               }
                        
-                       return newFileName;
+                               for (int i = index; i >= 0; i--) {
+                                       newFileName = dirs[i] +  "\\" + newFileName;            
+                               }
+                               
+                               return newFileName;
+                       }
                        
                #endif
        }
@@ -553,7 +615,7 @@ inline int openInputFile(string fileName, ifstream& fileHandle){
        else {
                //check for blank file
                gobble(fileHandle);
-               if (fileHandle.eof()) { cout << completeFileName << " is blank. Please correct." << endl;  return 1;  }
+               if (fileHandle.eof()) { cout << completeFileName << " is blank. Please correct." << endl;   }
                
                return 0;
        }
@@ -608,6 +670,19 @@ inline int getNumSeqs(ifstream& file){
        return numSeqs;
 
 }
+/***********************************************************************/
+inline void getNumSeqs(ifstream& file, int& numSeqs){
+       
+       string input;
+       numSeqs = 0;
+       while(!file.eof()){
+               input = getline(file);
+               if (input.length() != 0) {
+                       if(input[0] == '>'){ numSeqs++; }
+               }
+       }
+}
+
 /***********************************************************************/
 
 inline bool inVector(string member, vector<string> group){
@@ -620,6 +695,29 @@ inline bool inVector(string member, vector<string> group){
 }
 /***********************************************************************/
 
+//This function parses the estimator options and puts them in a vector
+inline void splitAtChar(string& estim, vector<string>& container, char symbol) {
+       try {
+               string individual;
+               
+               while (estim.find_first_of(symbol) != -1) {
+                       individual = estim.substr(0,estim.find_first_of(symbol));
+                       if ((estim.find_first_of(symbol)+1) <= estim.length()) { //checks to make sure you don't have dash at end of string
+                               estim = estim.substr(estim.find_first_of(symbol)+1, estim.length());
+                               container.push_back(individual);
+                       }
+               }
+               //get last one
+               container.push_back(estim);
+       }
+       catch(exception& e) {
+               cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function splitAtDash. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n";
+               exit(1);
+       }       
+}
+
+/***********************************************************************/
+
 //This function parses the estimator options and puts them in a vector
 inline void splitAtDash(string& estim, vector<string>& container) {
        try {
@@ -746,6 +844,7 @@ inline void splitAtEquals(string& key, string& value){
                exit(1);
        }       
 }
+
 /**************************************************************************************************/
 
 inline bool inUsersGroups(string groupname, vector<string> Groups) {
@@ -791,7 +890,13 @@ inline bool anyLabelsToProcess(string label, set<string>& userLabels, string err
                
                //unique is the smallest line
                if (label == "unique") {  return false;  }
-               else { convert(label, labelFloat); }
+               else { 
+                       if (convertTestFloat(label, labelFloat)) {
+                               convert(label, labelFloat); 
+                       }else { //cant convert 
+                               return false;
+                       }
+               }
                
                //go through users set and make them floats
                for(it = userLabels.begin(); it != userLabels.end(); ++it) {
@@ -860,14 +965,16 @@ inline void appendFiles(string temp, string filename) {
        
                //open output file in append mode
                openOutputFileAppend(filename, output);
-               openInputFile(temp, input);
+               int ableToOpen = openInputFile(temp, input, "no error");
                
-               while(char c = input.get()){
-                       if(input.eof())         {       break;                  }
-                       else                            {       output << c;    }
+               if (ableToOpen == 0) { //you opened it
+                       while(char c = input.get()){
+                               if(input.eof())         {       break;                  }
+                               else                            {       output << c;    }
+                       }
+                       input.close();
                }
                
-               input.close();
                output.close();
        }
        catch(exception& e) {
@@ -877,9 +984,12 @@ inline void appendFiles(string temp, string filename) {
 }
 
 /**************************************************************************************************/
-inline string sortFile(string distFile){
+inline string sortFile(string distFile, string outputDir){
        try {   
+       
+               //if (outputDir == "") {  outputDir += hasPath(distFile);  }
                string outfile = getRootName(distFile) + "sorted.dist";
+
                
                //if you can, use the unix sort since its been optimized for years
                #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
@@ -939,5 +1049,202 @@ inline string sortFile(string distFile){
        }       
 }
 /**************************************************************************************************/
+inline vector<unsigned long int> setFilePosFasta(string filename, int& num) {
+
+                       vector<unsigned long int> positions;
+                       ifstream inFASTA;
+                       openInputFile(filename, inFASTA);
+                                               
+                       string input;
+                       while(!inFASTA.eof()){
+                               input = getline(inFASTA); 
+                               if (input.length() != 0) {
+                                       if(input[0] == '>'){    unsigned long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); }
+                               }
+                               gobble(inFASTA); //has to be here since windows line endings are 2 characters and mess up the positions
+                       }
+                       inFASTA.close();
+               
+                       num = positions.size();
+               
+                       /*FILE * pFile;
+                       long size;
+               
+                       //get num bytes in file
+                       pFile = fopen (filename.c_str(),"rb");
+                       if (pFile==NULL) perror ("Error opening file");
+                       else{
+                               fseek (pFile, 0, SEEK_END);
+                               size=ftell (pFile);
+                               fclose (pFile);
+                       }*/
+                       
+                       unsigned long int size = positions[(positions.size()-1)];
+                       ifstream in;
+                       openInputFile(filename, in);
+                       
+                       in.seekg(size);
+               
+                       while(char c = in.get()){
+                               if(in.eof())            {       break;  }
+                               else                            {       size++; }
+                       }
+                       in.close();
+               
+                       positions.push_back(size);
+               
+                       return positions;
+}
+/**************************************************************************************************/
+inline vector<unsigned long int> setFilePosEachLine(string filename, int& num) {
+
+                       vector<unsigned long int> positions;
+                       ifstream in;
+                       openInputFile(filename, in);
+                               
+                       string input;
+                       while(!in.eof()){
+                               unsigned long int lastpos = in.tellg();
+                               input = getline(in); 
+                               if (input.length() != 0) {
+                                       unsigned long int pos = in.tellg(); 
+                                       if (pos != -1) { positions.push_back(pos - input.length() - 1); }
+                                       else {  positions.push_back(lastpos);  }
+                               }
+                               gobble(in); //has to be here since windows line endings are 2 characters and mess up the positions
+                       }
+                       in.close();
+               
+                       num = positions.size();
+               
+                       FILE * pFile;
+                       unsigned long int size;
+               
+                       //get num bytes in file
+                       pFile = fopen (filename.c_str(),"rb");
+                       if (pFile==NULL) perror ("Error opening file");
+                       else{
+                               fseek (pFile, 0, SEEK_END);
+                               size=ftell (pFile);
+                               fclose (pFile);
+                       }
+               
+                       positions.push_back(size);
+               
+                       return positions;
+}
+/**************************************************************************************************/
+
+inline vector<unsigned long int> divideFile(string filename, int& proc) {
+       try{
+       
+               vector<unsigned long int> filePos;
+               filePos.push_back(0);
+               
+               FILE * pFile;
+               unsigned long int size;
+               
+               //get num bytes in file
+               pFile = fopen (filename.c_str(),"rb");
+               if (pFile==NULL) perror ("Error opening file");
+               else{
+                       fseek (pFile, 0, SEEK_END);
+                       size=ftell (pFile);
+                       fclose (pFile);
+               }
+       
+               //estimate file breaks
+               unsigned long int chunkSize = 0;
+               chunkSize = size / proc;
+               
+               //file to small to divide by processors
+               if (chunkSize == 0)  {  proc = 1;       filePos.push_back(size); return filePos;        }
+       
+               //for each process seekg to closest file break and search for next '>' char. make that the filebreak
+               for (int i = 0; i < proc; i++) {
+                       unsigned long int spot = (i+1) * chunkSize;
+                                       
+                       ifstream in;
+                       openInputFile(filename, in);
+                       in.seekg(spot);
+                       
+                       //look for next '>'
+                       unsigned long int newSpot = spot;
+                       while (!in.eof()) {
+                          char c = in.get();
+                          if (c == '>') {   in.putback(c); newSpot = in.tellg(); break;  }
+                       }
+                               
+                       //there was not another sequence before the end of the file
+                       unsigned long int sanityPos = in.tellg();
+                       if (sanityPos == -1) {  break;  }
+                       else {   filePos.push_back(newSpot);  }
+                       
+                       in.close();
+               }
+               
+               //save end pos
+               filePos.push_back(size);
+               
+               //sanity check filePos
+               for (int i = 0; i < (filePos.size()-1); i++) {
+                       if (filePos[(i+1)] <= filePos[i]) {  filePos.erase(filePos.begin()+(i+1)); i--; }
+               }
+
+               proc = (filePos.size() - 1);
+               
+               return filePos;
+       }
+       catch(exception& e) {
+               cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function divideFile. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n";
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+inline bool checkReleaseVersion(ifstream& file, string version) {
+       try {
+               
+               bool good = true;
+               
+               string line = getline(file);  
+
+               //before we added this check
+               if (line[0] != '#') {  good = false;  }
+               else {
+                       //rip off #
+                       line = line.substr(1);
+                       
+                       vector<string> versionVector;
+                       splitAtChar(version, versionVector, '.');
+                       
+                       //check file version
+                       vector<string> linesVector;
+                       splitAtChar(line, linesVector, '.');
+                       
+                       if (versionVector.size() != linesVector.size()) { good = false; }
+                       else {
+                               for (int j = 0; j < versionVector.size(); j++) {
+                                       int num1, num2;
+                                       convert(versionVector[j], num1);
+                                       convert(linesVector[j], num2);
+                                       
+                                       //if mothurs version is newer than this files version, then we want to remake it
+                                       if (num1 > num2) {  good = false; break;  }
+                               }
+                       }
+                       
+               }
+               
+               if (!good) {  file.close();  }
+               else { file.seekg(0);  }
+               
+               return good;
+       }
+       catch(exception& e) {
+               cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function checkReleaseVersion. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n";
+               exit(1);
+       }
+}
+/**************************************************************************************************/
 #endif