X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=mothur.h;h=d5086b3909ece580bbb552454ced3a742b790dcd;hb=d84bb41d7dadcfa2c67ce5edb9b94060e5659fa7;hp=062c4814dfa4addeeb923f586c601717daca7c66;hpb=e189982e0a9b7352ad57cc38ccee675f128be22e;p=mothur.git diff --git a/mothur.h b/mothur.h index 062c481..d5086b3 100644 --- a/mothur.h +++ b/mothur.h @@ -54,6 +54,8 @@ #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) #include + #include + #include #include #ifdef USE_READLINE @@ -64,6 +66,9 @@ #else #include //allows unbuffered screen capture from stdin #include //get cwd + #include + #include + #endif using namespace std; @@ -217,7 +222,17 @@ inline void gobble(istream& f){ } /***********************************************************************/ -inline string getline(ifstream& fileHandle) { +inline void gobble(istringstream& f){ + + char d; + while(isspace(d=f.get())) {;} + f.putback(d); + +} + +/***********************************************************************/ + +inline string getline(istringstream& fileHandle) { try { string line = ""; @@ -239,7 +254,30 @@ inline string getline(ifstream& fileHandle) { exit(1); } } +/***********************************************************************/ +inline string getline(ifstream& fileHandle) { + try { + + string line = ""; + + while (!fileHandle.eof()) { + //get next character + char c = fileHandle.get(); + + //are you at the end of the line + if ((c == '\n') || (c == '\r') || (c == '\f')){ break; } + else { line += c; } + } + + return line; + + } + catch(exception& e) { + cout << "Error in mothur function getline" << endl; + exit(1); + } +} /***********************************************************************/ inline bool isTrue(string f){ @@ -254,6 +292,13 @@ inline float roundDist(float dist, int precision){ return int(dist * precision + 0.5)/float(precision); +} +/***********************************************************************/ + +inline float ceilDist(float dist, int precision){ + + return int(ceil(dist * precision))/float(precision); + } /***********************************************************************/ @@ -379,7 +424,7 @@ inline string hasPath(string longName){ string path = ""; size_t found; - found=longName.find_last_of("/\\"); + found=longName.find_last_of("~/\\"); if(found != longName.npos){ path = longName.substr(0, found+1); @@ -433,91 +478,103 @@ inline string getFullPathName(string fileName){ string cwd; //get current working directory #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) - if (path.rfind("./") == -1) { return fileName; } //already complete name - else { newFileName = fileName.substr(fileName.rfind("./")+2); } //save the complete part of the name - char* cwdpath = new char[1024]; + if (path.find("~") != -1) { //go to home directory + string homeDir = getenv ("HOME"); + newFileName = homeDir + fileName.substr(fileName.find("~")+1); + return newFileName; + }else { //find path + if (path.rfind("./") == -1) { return fileName; } //already complete name + else { newFileName = fileName.substr(fileName.rfind("./")+2); } //save the complete part of the name + + char* cwdpath = new char[1024]; - size_t size; - cwdpath=getcwd(cwdpath,size); - - cwd = cwdpath; - - //rip off first '/' - string simpleCWD; - if (cwd.length() > 0) { simpleCWD = cwd.substr(1); } + size_t size; + cwdpath=getcwd(cwdpath,size); - //break apart the current working directory - vector dirs; - while (simpleCWD.find_first_of('/') != -1) { - string dir = simpleCWD.substr(0,simpleCWD.find_first_of('/')); - simpleCWD = simpleCWD.substr(simpleCWD.find_first_of('/')+1, simpleCWD.length()); - dirs.push_back(dir); - } - //get last one // ex. ../../../filename = /user/work/desktop/filename - dirs.push_back(simpleCWD); //ex. dirs[0] = user, dirs[1] = work, dirs[2] = desktop + cwd = cwdpath; + + //rip off first '/' + string simpleCWD; + if (cwd.length() > 0) { simpleCWD = cwd.substr(1); } + + //break apart the current working directory + vector dirs; + while (simpleCWD.find_first_of('/') != -1) { + string dir = simpleCWD.substr(0,simpleCWD.find_first_of('/')); + simpleCWD = simpleCWD.substr(simpleCWD.find_first_of('/')+1, simpleCWD.length()); + dirs.push_back(dir); + } + //get last one // ex. ../../../filename = /user/work/desktop/filename + dirs.push_back(simpleCWD); //ex. dirs[0] = user, dirs[1] = work, dirs[2] = desktop + + int index = dirs.size()-1; - int index = dirs.size()-1; - - while((pos = path.rfind("./")) != -1) { //while you don't have a complete path - if (pos == 0) { break; //you are at the end - }else if (path[(pos-1)] == '.') { //you want your parent directory ../ - path = path.substr(0, pos-1); - index--; - if (index == 0) { break; } - }else if (path[(pos-1)] == '/') { //you want the current working dir ./ - path = path.substr(0, pos); - }else if (pos == 1) { break; //you are at the end - }else { cout << "cannot resolve path for " << fileName << endl; return fileName; } - } - - for (int i = index; i >= 0; i--) { - newFileName = dirs[i] + "/" + newFileName; - } + while((pos = path.rfind("./")) != -1) { //while you don't have a complete path + if (pos == 0) { break; //you are at the end + }else if (path[(pos-1)] == '.') { //you want your parent directory ../ + path = path.substr(0, pos-1); + index--; + if (index == 0) { break; } + }else if (path[(pos-1)] == '/') { //you want the current working dir ./ + path = path.substr(0, pos); + }else if (pos == 1) { break; //you are at the end + }else { cout << "cannot resolve path for " << fileName << endl; return fileName; } + } - newFileName = "/" + newFileName; - return newFileName; + for (int i = index; i >= 0; i--) { + newFileName = dirs[i] + "/" + newFileName; + } + newFileName = "/" + newFileName; + return newFileName; + } #else - if (path.rfind(".\\") == -1) { return fileName; } //already complete name - else { newFileName = fileName.substr(fileName.rfind(".\\")+2); } //save the complete part of the name - - char *cwdpath = NULL; - cwdpath = getcwd(NULL, 0); // or _getcwd - if ( cwdpath != NULL) { cwd = cwdpath; } - else { cwd = ""; } - - //break apart the current working directory - vector dirs; - while (cwd.find_first_of('\\') != -1) { - string dir = cwd.substr(0,cwd.find_first_of('\\')); - cwd = cwd.substr(cwd.find_first_of('\\')+1, cwd.length()); - dirs.push_back(dir); - - } - //get last one - dirs.push_back(cwd); //ex. dirs[0] = user, dirs[1] = work, dirs[2] = desktop + if (path.find("~") != -1) { //go to home directory + string homeDir = getenv ("HOMEPATH"); + newFileName = homeDir + fileName.substr(fileName.find("~")+1); + return newFileName; + }else { //find path + if (path.rfind(".\\") == -1) { return fileName; } //already complete name + else { newFileName = fileName.substr(fileName.rfind(".\\")+2); } //save the complete part of the name + + char *cwdpath = NULL; + cwdpath = getcwd(NULL, 0); // or _getcwd + if ( cwdpath != NULL) { cwd = cwdpath; } + else { cwd = ""; } - int index = dirs.size()-1; - - while((pos = path.rfind(".\\")) != -1) { //while you don't have a complete path - if (pos == 0) { break; //you are at the end - }else if (path[(pos-1)] == '.') { //you want your parent directory ../ - path = path.substr(0, pos-1); - index--; - if (index == 0) { break; } - }else if (path[(pos-1)] == '\\') { //you want the current working dir ./ - path = path.substr(0, pos); - }else if (pos == 1) { break; //you are at the end - }else { cout << "cannot resolve path for " << fileName << endl; return fileName; } - } + //break apart the current working directory + vector dirs; + while (cwd.find_first_of('\\') != -1) { + string dir = cwd.substr(0,cwd.find_first_of('\\')); + cwd = cwd.substr(cwd.find_first_of('\\')+1, cwd.length()); + dirs.push_back(dir); - for (int i = index; i >= 0; i--) { - newFileName = dirs[i] + "\\" + newFileName; - } + } + //get last one + dirs.push_back(cwd); //ex. dirs[0] = user, dirs[1] = work, dirs[2] = desktop + + int index = dirs.size()-1; + + while((pos = path.rfind(".\\")) != -1) { //while you don't have a complete path + if (pos == 0) { break; //you are at the end + }else if (path[(pos-1)] == '.') { //you want your parent directory ../ + path = path.substr(0, pos-1); + index--; + if (index == 0) { break; } + }else if (path[(pos-1)] == '\\') { //you want the current working dir ./ + path = path.substr(0, pos); + }else if (pos == 1) { break; //you are at the end + }else { cout << "cannot resolve path for " << fileName << endl; return fileName; } + } - return newFileName; + for (int i = index; i >= 0; i--) { + newFileName = dirs[i] + "\\" + newFileName; + } + + return newFileName; + } #endif } @@ -558,7 +615,7 @@ inline int openInputFile(string fileName, ifstream& fileHandle){ else { //check for blank file gobble(fileHandle); - if (fileHandle.eof()) { cout << completeFileName << " is blank. Please correct." << endl; return 1; } + if (fileHandle.eof()) { cout << completeFileName << " is blank. Please correct." << endl; } return 0; } @@ -613,6 +670,19 @@ inline int getNumSeqs(ifstream& file){ return numSeqs; } +/***********************************************************************/ +inline void getNumSeqs(ifstream& file, int& numSeqs){ + + string input; + numSeqs = 0; + while(!file.eof()){ + input = getline(file); + if (input.length() != 0) { + if(input[0] == '>'){ numSeqs++; } + } + } +} + /***********************************************************************/ inline bool inVector(string member, vector group){ @@ -625,6 +695,29 @@ inline bool inVector(string member, vector group){ } /***********************************************************************/ +//This function parses the estimator options and puts them in a vector +inline void splitAtChar(string& estim, vector& container, char symbol) { + try { + string individual; + + while (estim.find_first_of(symbol) != -1) { + individual = estim.substr(0,estim.find_first_of(symbol)); + if ((estim.find_first_of(symbol)+1) <= estim.length()) { //checks to make sure you don't have dash at end of string + estim = estim.substr(estim.find_first_of(symbol)+1, estim.length()); + container.push_back(individual); + } + } + //get last one + container.push_back(estim); + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function splitAtDash. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n"; + exit(1); + } +} + +/***********************************************************************/ + //This function parses the estimator options and puts them in a vector inline void splitAtDash(string& estim, vector& container) { try { @@ -751,6 +844,7 @@ inline void splitAtEquals(string& key, string& value){ exit(1); } } + /**************************************************************************************************/ inline bool inUsersGroups(string groupname, vector Groups) { @@ -796,7 +890,13 @@ inline bool anyLabelsToProcess(string label, set& userLabels, string err //unique is the smallest line if (label == "unique") { return false; } - else { convert(label, labelFloat); } + else { + if (convertTestFloat(label, labelFloat)) { + convert(label, labelFloat); + }else { //cant convert + return false; + } + } //go through users set and make them floats for(it = userLabels.begin(); it != userLabels.end(); ++it) { @@ -865,14 +965,16 @@ inline void appendFiles(string temp, string filename) { //open output file in append mode openOutputFileAppend(filename, output); - openInputFile(temp, input); + int ableToOpen = openInputFile(temp, input, "no error"); - while(char c = input.get()){ - if(input.eof()) { break; } - else { output << c; } + if (ableToOpen == 0) { //you opened it + while(char c = input.get()){ + if(input.eof()) { break; } + else { output << c; } + } + input.close(); } - input.close(); output.close(); } catch(exception& e) { @@ -947,24 +1049,25 @@ inline string sortFile(string distFile, string outputDir){ } } /**************************************************************************************************/ -inline vector setFilePosFasta(string filename, int& num) { +inline vector setFilePosFasta(string filename, int& num) { - vector positions; + vector positions; ifstream inFASTA; openInputFile(filename, inFASTA); - + string input; while(!inFASTA.eof()){ - input = getline(inFASTA); gobble(inFASTA); + input = getline(inFASTA); if (input.length() != 0) { - if(input[0] == '>'){ long pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); } + if(input[0] == '>'){ unsigned long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); } } + gobble(inFASTA); //has to be here since windows line endings are 2 characters and mess up the positions } inFASTA.close(); num = positions.size(); - FILE * pFile; + /*FILE * pFile; long size; //get num bytes in file @@ -974,35 +1077,48 @@ inline vector setFilePosFasta(string filename, int& num) { fseek (pFile, 0, SEEK_END); size=ftell (pFile); fclose (pFile); + }*/ + + unsigned long int size = positions[(positions.size()-1)]; + ifstream in; + openInputFile(filename, in); + + in.seekg(size); + + while(char c = in.get()){ + if(in.eof()) { break; } + else { size++; } } + in.close(); positions.push_back(size); return positions; } /**************************************************************************************************/ -inline vector setFilePosEachLine(string filename, int& num) { +inline vector setFilePosEachLine(string filename, int& num) { - vector positions; + vector positions; ifstream in; openInputFile(filename, in); string input; while(!in.eof()){ - long lastpos = in.tellg(); - input = getline(in); gobble(in); + unsigned long int lastpos = in.tellg(); + input = getline(in); if (input.length() != 0) { - long pos = in.tellg(); + unsigned long int pos = in.tellg(); if (pos != -1) { positions.push_back(pos - input.length() - 1); } else { positions.push_back(lastpos); } } + gobble(in); //has to be here since windows line endings are 2 characters and mess up the positions } in.close(); num = positions.size(); FILE * pFile; - long size; + unsigned long int size; //get num bytes in file pFile = fopen (filename.c_str(),"rb"); @@ -1017,7 +1133,118 @@ inline vector setFilePosEachLine(string filename, int& num) { return positions; } +/**************************************************************************************************/ +inline vector divideFile(string filename, int& proc) { + try{ + + vector filePos; + filePos.push_back(0); + + FILE * pFile; + unsigned long int size; + + //get num bytes in file + pFile = fopen (filename.c_str(),"rb"); + if (pFile==NULL) perror ("Error opening file"); + else{ + fseek (pFile, 0, SEEK_END); + size=ftell (pFile); + fclose (pFile); + } + + //estimate file breaks + unsigned long int chunkSize = 0; + chunkSize = size / proc; + + //file to small to divide by processors + if (chunkSize == 0) { proc = 1; filePos.push_back(size); return filePos; } + + //for each process seekg to closest file break and search for next '>' char. make that the filebreak + for (int i = 0; i < proc; i++) { + unsigned long int spot = (i+1) * chunkSize; + + ifstream in; + openInputFile(filename, in); + in.seekg(spot); + + //look for next '>' + unsigned long int newSpot = spot; + while (!in.eof()) { + char c = in.get(); + if (c == '>') { in.putback(c); newSpot = in.tellg(); break; } + } + + //there was not another sequence before the end of the file + unsigned long int sanityPos = in.tellg(); + if (sanityPos == -1) { break; } + else { filePos.push_back(newSpot); } + + in.close(); + } + + //save end pos + filePos.push_back(size); + + //sanity check filePos + for (int i = 0; i < (filePos.size()-1); i++) { + if (filePos[(i+1)] <= filePos[i]) { filePos.erase(filePos.begin()+(i+1)); i--; } + } + + proc = (filePos.size() - 1); + + return filePos; + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function divideFile. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n"; + exit(1); + } +} +/**************************************************************************************************/ +inline bool checkReleaseVersion(ifstream& file, string version) { + try { + + bool good = true; + + string line = getline(file); + + //before we added this check + if (line[0] != '#') { good = false; } + else { + //rip off # + line = line.substr(1); + + vector versionVector; + splitAtChar(version, versionVector, '.'); + + //check file version + vector linesVector; + splitAtChar(line, linesVector, '.'); + + if (versionVector.size() != linesVector.size()) { good = false; } + else { + for (int j = 0; j < versionVector.size(); j++) { + int num1, num2; + convert(versionVector[j], num1); + convert(linesVector[j], num2); + + //if mothurs version is newer than this files version, then we want to remake it + if (num1 > num2) { good = false; break; } + } + } + + } + + if (!good) { file.close(); } + else { file.seekg(0); } + + return good; + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function checkReleaseVersion. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n"; + exit(1); + } +} /**************************************************************************************************/ #endif