]> git.donarmstrong.com Git - mothur.git/blobdiff - mothur.h
changed how we break up the files on parallelized commands to avoid scanning file.
[mothur.git] / mothur.h
index f101d1f808fdb22d0fac7795dcc6388be6dacaf2..2622e2515e87ed9914f4491b716dc03c501d5411 100644 (file)
--- a/mothur.h
+++ b/mothur.h
@@ -54,6 +54,8 @@
 
 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
        #include <sys/wait.h>
+       #include <sys/time.h>
+       #include <sys/resource.h>
        #include <unistd.h>
        
        #ifdef USE_READLINE
@@ -64,6 +66,9 @@
 #else
        #include <conio.h> //allows unbuffered screen capture from stdin
        #include <direct.h> //get cwd
+       #include <windows.h>
+       #include <psapi.h>
+
 #endif
 
 using namespace std;
@@ -217,7 +222,17 @@ inline void gobble(istream& f){
 }
 /***********************************************************************/
 
-inline string getline(ifstream& fileHandle) {
+inline void gobble(istringstream& f){
+       
+       char d;
+    while(isspace(d=f.get()))          {;}
+       f.putback(d);
+       
+}
+
+/***********************************************************************/
+
+inline string getline(istringstream& fileHandle) {
        try {
        
                string line = "";
@@ -239,7 +254,30 @@ inline string getline(ifstream& fileHandle) {
                exit(1);
        }
 }
+/***********************************************************************/
 
+inline string getline(ifstream& fileHandle) {
+       try {
+       
+               string line = "";
+               
+               while (!fileHandle.eof())       {
+                       //get next character
+                       char c = fileHandle.get(); 
+                       
+                       //are you at the end of the line
+                       if ((c == '\n') || (c == '\r') || (c == '\f')){  break; }       
+                       else {          line += c;              }
+               }
+               
+               return line;
+               
+       }
+       catch(exception& e) {
+               cout << "Error in mothur function getline" << endl;
+               exit(1);
+       }
+}
 /***********************************************************************/
 
 inline bool isTrue(string f){
@@ -254,6 +292,13 @@ inline float roundDist(float dist, int precision){
        
        return int(dist * precision + 0.5)/float(precision);
        
+}
+/***********************************************************************/
+
+inline float ceilDist(float dist, int precision){
+       
+       return int(ceil(dist * precision))/float(precision);
+       
 }
 
 /***********************************************************************/
@@ -487,7 +532,7 @@ inline string getFullPathName(string fileName){
                        }       
                #else
                        if (path.find("~") != -1) { //go to home directory
-                               string homeDir = getenv ("HOME");
+                               string homeDir = getenv ("HOMEPATH");
                                newFileName = homeDir + fileName.substr(fileName.find("~")+1);
                                return newFileName;
                        }else { //find path
@@ -570,7 +615,7 @@ inline int openInputFile(string fileName, ifstream& fileHandle){
        else {
                //check for blank file
                gobble(fileHandle);
-               if (fileHandle.eof()) { cout << completeFileName << " is blank. Please correct." << endl;  return 1;  }
+               if (fileHandle.eof()) { cout << completeFileName << " is blank. Please correct." << endl;   }
                
                return 0;
        }
@@ -625,6 +670,19 @@ inline int getNumSeqs(ifstream& file){
        return numSeqs;
 
 }
+/***********************************************************************/
+inline void getNumSeqs(ifstream& file, int& numSeqs){
+       
+       string input;
+       numSeqs = 0;
+       while(!file.eof()){
+               input = getline(file);
+               if (input.length() != 0) {
+                       if(input[0] == '>'){ numSeqs++; }
+               }
+       }
+}
+
 /***********************************************************************/
 
 inline bool inVector(string member, vector<string> group){
@@ -637,6 +695,29 @@ inline bool inVector(string member, vector<string> group){
 }
 /***********************************************************************/
 
+//This function parses the estimator options and puts them in a vector
+inline void splitAtChar(string& estim, vector<string>& container, char symbol) {
+       try {
+               string individual;
+               
+               while (estim.find_first_of(symbol) != -1) {
+                       individual = estim.substr(0,estim.find_first_of(symbol));
+                       if ((estim.find_first_of(symbol)+1) <= estim.length()) { //checks to make sure you don't have dash at end of string
+                               estim = estim.substr(estim.find_first_of(symbol)+1, estim.length());
+                               container.push_back(individual);
+                       }
+               }
+               //get last one
+               container.push_back(estim);
+       }
+       catch(exception& e) {
+               cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function splitAtDash. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n";
+               exit(1);
+       }       
+}
+
+/***********************************************************************/
+
 //This function parses the estimator options and puts them in a vector
 inline void splitAtDash(string& estim, vector<string>& container) {
        try {
@@ -763,6 +844,7 @@ inline void splitAtEquals(string& key, string& value){
                exit(1);
        }       
 }
+
 /**************************************************************************************************/
 
 inline bool inUsersGroups(string groupname, vector<string> Groups) {
@@ -808,7 +890,13 @@ inline bool anyLabelsToProcess(string label, set<string>& userLabels, string err
                
                //unique is the smallest line
                if (label == "unique") {  return false;  }
-               else { convert(label, labelFloat); }
+               else { 
+                       if (convertTestFloat(label, labelFloat)) {
+                               convert(label, labelFloat); 
+                       }else { //cant convert 
+                               return false;
+                       }
+               }
                
                //go through users set and make them floats
                for(it = userLabels.begin(); it != userLabels.end(); ++it) {
@@ -961,18 +1049,19 @@ inline string sortFile(string distFile, string outputDir){
        }       
 }
 /**************************************************************************************************/
-inline vector<long> setFilePosFasta(string filename, int& num) {
+inline vector<unsigned long int> setFilePosFasta(string filename, int& num) {
 
-                       vector<long> positions;
+                       vector<unsigned long int> positions;
                        ifstream inFASTA;
                        openInputFile(filename, inFASTA);
-                               
+                                               
                        string input;
                        while(!inFASTA.eof()){
-                               input = getline(inFASTA); gobble(inFASTA);
+                               input = getline(inFASTA); 
                                if (input.length() != 0) {
-                                       if(input[0] == '>'){    long pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1);      }
+                                       if(input[0] == '>'){    unsigned long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); }
                                }
+                               gobble(inFASTA); //has to be here since windows line endings are 2 characters and mess up the positions
                        }
                        inFASTA.close();
                
@@ -990,7 +1079,7 @@ inline vector<long> setFilePosFasta(string filename, int& num) {
                                fclose (pFile);
                        }*/
                        
-                       long size = positions[(positions.size()-1)];
+                       unsigned long int size = positions[(positions.size()-1)];
                        ifstream in;
                        openInputFile(filename, in);
                        
@@ -1007,28 +1096,29 @@ inline vector<long> setFilePosFasta(string filename, int& num) {
                        return positions;
 }
 /**************************************************************************************************/
-inline vector<long> setFilePosEachLine(string filename, int& num) {
+inline vector<unsigned long int> setFilePosEachLine(string filename, int& num) {
 
-                       vector<long> positions;
+                       vector<unsigned long int> positions;
                        ifstream in;
                        openInputFile(filename, in);
                                
                        string input;
                        while(!in.eof()){
-                               long lastpos = in.tellg();
-                               input = getline(in); gobble(in);
+                               unsigned long int lastpos = in.tellg();
+                               input = getline(in); 
                                if (input.length() != 0) {
-                                       long pos = in.tellg(); 
+                                       unsigned long int pos = in.tellg(); 
                                        if (pos != -1) { positions.push_back(pos - input.length() - 1); }
                                        else {  positions.push_back(lastpos);  }
                                }
+                               gobble(in); //has to be here since windows line endings are 2 characters and mess up the positions
                        }
                        in.close();
                
                        num = positions.size();
                
                        FILE * pFile;
-                       long size;
+                       unsigned long int size;
                
                        //get num bytes in file
                        pFile = fopen (filename.c_str(),"rb");
@@ -1043,7 +1133,117 @@ inline vector<long> setFilePosEachLine(string filename, int& num) {
                
                        return positions;
 }
+/**************************************************************************************************/
 
+inline vector<unsigned long int> divideFile(string filename, int& proc) {
+       try{
+       
+               vector<unsigned long int> filePos;
+               filePos.push_back(0);
+               
+               FILE * pFile;
+               unsigned long int size;
+               
+               //get num bytes in file
+               pFile = fopen (filename.c_str(),"rb");
+               if (pFile==NULL) perror ("Error opening file");
+               else{
+                       fseek (pFile, 0, SEEK_END);
+                       size=ftell (pFile);
+                       fclose (pFile);
+               }
+       
+               //estimate file breaks
+               unsigned long int chunkSize = 0;
+               chunkSize = size / proc;
+               
+               //file to small to divide by processors
+               if (chunkSize == 0)  {  proc = 1;       filePos.push_back(size); return filePos;        }
+       
+               //for each process seekg to closest file break and search for next '>' char. make that the filebreak
+               for (int i = 0; i < proc; i++) {
+                       unsigned long int spot = (i+1) * chunkSize;
+                       
+                       ifstream in;
+                       openInputFile(filename, in);
+                       in.seekg(spot);
+                       
+                       //look for next '>'
+                       unsigned long int newSpot = spot;
+                       while (!in.eof()) {
+                          char c = in.get();
+                          if (c == '>') {   in.putback(c); newSpot = in.tellg(); break;  }
+                       }
+                       
+                       //there was not another sequence before the end of the file
+                       if (newSpot == spot) {  break;  }
+                       else {   filePos.push_back(newSpot);  }
+                       
+                       in.close();
+               }
+               
+               //save end pos
+               filePos.push_back(size);
+               
+               //sanity check filePos
+               for (int i = 0; i < (filePos.size()-1); i++) {
+                       if (filePos[(i+1)] <= filePos[i]) {  filePos.erase(filePos.begin()+(i+1)); i--; }
+               }
+
+               proc = (filePos.size() - 1);
+               
+               return filePos;
+       }
+       catch(exception& e) {
+               cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function divideFile. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n";
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+inline bool checkReleaseVersion(ifstream& file, string version) {
+       try {
+               
+               bool good = true;
+               
+               string line = getline(file);  
+
+               //before we added this check
+               if (line[0] != '#') {  good = false;  }
+               else {
+                       //rip off #
+                       line = line.substr(1);
+                       
+                       vector<string> versionVector;
+                       splitAtChar(version, versionVector, '.');
+                       
+                       //check file version
+                       vector<string> linesVector;
+                       splitAtChar(line, linesVector, '.');
+                       
+                       if (versionVector.size() != linesVector.size()) { good = false; }
+                       else {
+                               for (int j = 0; j < versionVector.size(); j++) {
+                                       int num1, num2;
+                                       convert(versionVector[j], num1);
+                                       convert(linesVector[j], num2);
+                                       
+                                       //if mothurs version is newer than this files version, then we want to remake it
+                                       if (num1 > num2) {  good = false; break;  }
+                               }
+                       }
+                       
+               }
+               
+               if (!good) {  file.close();  }
+               else { file.seekg(0);  }
+               
+               return good;
+       }
+       catch(exception& e) {
+               cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function checkReleaseVersion. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n";
+               exit(1);
+       }
+}
 /**************************************************************************************************/
 #endif