X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=mothurout.cpp;h=468c063cb5c7e74a8d5901a1a38203f47409e2d5;hb=16f9c4ab6f39769856b13e048eae2c8eaa413c02;hp=2b06799f3c91f58c3c72ed63c2a6098d0473675e;hpb=86c838c428a9e7d26f902f5492738241fa72c4e7;p=mothur.git diff --git a/mothurout.cpp b/mothurout.cpp index 2b06799..468c063 100644 --- a/mothurout.cpp +++ b/mothurout.cpp @@ -18,8 +18,46 @@ MothurOut* MothurOut::getInstance() { return _uniqueInstance; } /*********************************************************************************************/ +set MothurOut::getCurrentTypes() { + try { + + set types; + types.insert("fasta"); + types.insert("accnos"); + types.insert("column"); + types.insert("design"); + types.insert("group"); + types.insert("list"); + types.insert("name"); + types.insert("oligos"); + types.insert("order"); + types.insert("ordergroup"); + types.insert("phylip"); + types.insert("qfile"); + types.insert("relabund"); + types.insert("sabund"); + types.insert("rabund"); + types.insert("sff"); + types.insert("shared"); + types.insert("taxonomy"); + types.insert("tree"); + types.insert("flow"); + types.insert("biom"); + types.insert("count"); + types.insert("processors"); + + return types; + } + catch(exception& e) { + errorOut(e, "MothurOut", "getCurrentTypes"); + exit(1); + } +} +/*********************************************************************************************/ void MothurOut::printCurrentFiles() { try { + + if (accnosfile != "") { mothurOut("accnos=" + accnosfile); mothurOutEndLine(); } if (columnfile != "") { mothurOut("column=" + columnfile); mothurOutEndLine(); } if (designfile != "") { mothurOut("design=" + designfile); mothurOutEndLine(); } @@ -40,6 +78,8 @@ void MothurOut::printCurrentFiles() { if (taxonomyfile != "") { mothurOut("taxonomy=" + taxonomyfile); mothurOutEndLine(); } if (treefile != "") { mothurOut("tree=" + treefile); mothurOutEndLine(); } if (flowfile != "") { mothurOut("flow=" + flowfile); mothurOutEndLine(); } + if (biomfile != "") { mothurOut("biom=" + biomfile); mothurOutEndLine(); } + if (counttablefile != "") { mothurOut("count=" + counttablefile); mothurOutEndLine(); } if (processors != "1") { mothurOut("processors=" + processors); mothurOutEndLine(); } } @@ -73,6 +113,8 @@ bool MothurOut::hasCurrentFiles() { if (taxonomyfile != "") { return true; } if (treefile != "") { return true; } if (flowfile != "") { return true; } + if (biomfile != "") { return true; } + if (counttablefile != "") { return true; } if (processors != "1") { return true; } return hasCurrent; @@ -107,6 +149,8 @@ void MothurOut::clearCurrentFiles() { accnosfile = ""; taxonomyfile = ""; flowfile = ""; + biomfile = ""; + counttablefile = ""; processors = "1"; } catch(exception& e) { @@ -114,6 +158,81 @@ void MothurOut::clearCurrentFiles() { exit(1); } } +/***********************************************************************/ +string MothurOut::findProgramPath(string programName){ + try { + + string envPath = getenv("PATH"); + string pPath = ""; + + //delimiting path char + char delim; +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) + delim = ':'; +#else + delim = ';'; +#endif + + //break apart path variable by ':' + vector dirs; + splitAtChar(envPath, dirs, delim); + + if (debug) { mothurOut("[DEBUG]: dir's in path: \n"); } + + //get path related to mothur + for (int i = 0; i < dirs.size(); i++) { + + if (debug) { mothurOut("[DEBUG]: " + dirs[i] + "\n"); } + + //to lower so we can find it + string tempLower = ""; + for (int j = 0; j < dirs[i].length(); j++) { tempLower += tolower(dirs[i][j]); } + + //is this mothurs path? + if (tempLower.find(programName) != -1) { pPath = dirs[i]; break; } + } + + if (debug) { mothurOut("[DEBUG]: programPath = " + pPath + "\n"); } + + if (pPath != "") { + //add programName so it looks like what argv would look like +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) + pPath += "/" + programName; +#else + pPath += "\\" + programName; +#endif + }else { + //okay programName is not in the path, so the folder programName is in must be in the path + //lets find out which one + + //get path related to the program + for (int i = 0; i < dirs.size(); i++) { + + if (debug) { mothurOut("[DEBUG]: looking in " + dirs[i] + " for " + programName + " \n"); } + + //is this the programs path? + ifstream in; + string tempIn = dirs[i]; +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) + tempIn += "/" + programName; +#else + tempIn += "\\" + programName; +#endif + openInputFile(tempIn, in, ""); + + //if this file exists + if (in) { in.close(); pPath = tempIn; if (debug) { mothurOut("[DEBUG]: found it, programPath = " + pPath + "\n"); } break; } + } + } + + return pPath; + + } + catch(exception& e) { + errorOut(e, "MothurOut", "findProgramPath"); + exit(1); + } +} /*********************************************************************************************/ void MothurOut::setFileName(string filename) { try { @@ -143,7 +262,7 @@ void MothurOut::setDefaultPath(string pathname) { //add / to name if needed string lastChar = pathname.substr(pathname.length()-1); - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) if (lastChar != "/") { pathname += "/"; } #else if (lastChar != "\\") { pathname += "\\"; } @@ -212,8 +331,8 @@ void MothurOut::mothurOut(string output) { if (pid == 0) { //only one process should output to screen #endif - cout << output; out << output; + logger() << output; #ifdef USE_MPI } @@ -234,8 +353,8 @@ void MothurOut::mothurOutEndLine() { if (pid == 0) { //only one process should output to screen #endif - cout << endl; out << endl; + logger() << endl; #ifdef USE_MPI } @@ -247,6 +366,55 @@ void MothurOut::mothurOutEndLine() { } } /*********************************************************************************************/ +void MothurOut::mothurOut(string output, ofstream& outputFile) { + try { + +#ifdef USE_MPI + int pid; + MPI_Comm_rank(MPI_COMM_WORLD, &pid); + + if (pid == 0) { //only one process should output to screen +#endif + + + out << output; + outputFile << output; + logger() << output; + +#ifdef USE_MPI + } +#endif + + } + catch(exception& e) { + errorOut(e, "MothurOut", "MothurOut"); + exit(1); + } +} +/*********************************************************************************************/ +void MothurOut::mothurOutEndLine(ofstream& outputFile) { + try { +#ifdef USE_MPI + int pid; + MPI_Comm_rank(MPI_COMM_WORLD, &pid); + + if (pid == 0) { //only one process should output to screen +#endif + + out << endl; + outputFile << endl; + logger() << endl; + +#ifdef USE_MPI + } +#endif + } + catch(exception& e) { + errorOut(e, "MothurOut", "MothurOutEndLine"); + exit(1); + } +} +/*********************************************************************************************/ void MothurOut::mothurOutJustToLog(string output) { try { #ifdef USE_MPI @@ -285,7 +453,7 @@ void MothurOut::errorOut(exception& e, string object, string function) { // // On failure, returns 0.0, 0.0 int MothurOut::mem_usage(double& vm_usage, double& resident_set) { - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) vm_usage = 0.0; resident_set = 0.0; @@ -387,7 +555,7 @@ void MothurOut::gobble(istream& f){ char d; while(isspace(d=f.get())) { ;} - f.putback(d); + if(!f.eof()) { f.putback(d); } } catch(exception& e) { errorOut(e, "MothurOut", "gobble"); @@ -399,7 +567,7 @@ void MothurOut::gobble(istringstream& f){ try { char d; while(isspace(d=f.get())) {;} - f.putback(d); + if(!f.eof()) { f.putback(d); } } catch(exception& e) { errorOut(e, "MothurOut", "gobble"); @@ -438,12 +606,12 @@ string MothurOut::getline(ifstream& fileHandle) { string line = ""; - while (!fileHandle.eof()) { + while (fileHandle) { //get next character char c = fileHandle.get(); //are you at the end of the line - if ((c == '\n') || (c == '\r') || (c == '\f')){ break; } + if ((c == '\n') || (c == '\r') || (c == '\f') || (c == EOF)){ break; } else { line += c; } } @@ -457,7 +625,7 @@ string MothurOut::getline(ifstream& fileHandle) { } /***********************************************************************/ -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) #ifdef USE_COMPRESSION inline bool endsWith(string s, const char * suffix){ size_t suffixLength = strlen(suffix); @@ -471,7 +639,7 @@ string MothurOut::getRootName(string longName){ string rootName = longName; -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) #ifdef USE_COMPRESSION if (endsWith(rootName, ".gz") || endsWith(rootName, ".bz2")) { int pos = rootName.find_last_of('.'); @@ -515,6 +683,21 @@ string MothurOut::getSimpleName(string longName){ /***********************************************************************/ +int MothurOut::getRandomIndex(int highest){ + try { + + int random = (int) ((float)(highest+1) * (float)(rand()) / ((float)RAND_MAX+1.0)); + + return random; + } + catch(exception& e) { + errorOut(e, "MothurOut", "getRandomIndex"); + exit(1); + } + +} +/**********************************************************************/ + string MothurOut::getPathName(string longName){ try { string rootPathName = longName; @@ -534,6 +717,48 @@ string MothurOut::getPathName(string longName){ } /***********************************************************************/ +bool MothurOut::dirCheck(string& dirName){ + try { + + string tag = ""; + #ifdef USE_MPI + int pid; + MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are + + tag = toString(pid); + #endif + + //add / to name if needed + string lastChar = dirName.substr(dirName.length()-1); + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) + if (lastChar != "/") { dirName += "/"; } + #else + if (lastChar != "\\") { dirName += "\\"; } + #endif + + //test to make sure directory exists + dirName = getFullPathName(dirName); + string outTemp = dirName + tag + "temp"; + ofstream out; + out.open(outTemp.c_str(), ios::trunc); + if(!out) { + mothurOut(dirName + " directory does not exist or is not writable."); mothurOutEndLine(); + }else{ + out.close(); + mothurRemove(outTemp); + return true; + } + + return false; + } + catch(exception& e) { + errorOut(e, "MothurOut", "dirCheck"); + exit(1); + } + +} +/***********************************************************************/ + string MothurOut::hasPath(string longName){ try { string path = ""; @@ -557,7 +782,7 @@ string MothurOut::hasPath(string longName){ string MothurOut::getExtension(string longName){ try { - string extension = longName; + string extension = ""; if(longName.find_last_of('.') != longName.npos){ int pos = longName.find_last_of('.'); @@ -611,7 +836,7 @@ string MothurOut::getFullPathName(string fileName){ string cwd; //get current working directory - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) if (path.find("~") != -1) { //go to home directory string homeDir; @@ -664,7 +889,7 @@ string MothurOut::getFullPathName(string fileName){ }else if (path[(pos-1)] == '/') { //you want the current working dir ./ path = path.substr(0, pos); }else if (pos == 1) { break; //you are at the end - }else { cout << "cannot resolve path for " << fileName << endl; return fileName; } + }else { mothurOut("cannot resolve path for " + fileName + "\n"); return fileName; } } for (int i = index; i >= 0; i--) { @@ -710,11 +935,11 @@ string MothurOut::getFullPathName(string fileName){ }else if (path[(pos-1)] == '\\') { //you want the current working dir ./ path = path.substr(0, pos); }else if (pos == 1) { break; //you are at the end - }else { cout << "cannot resolve path for " << fileName << endl; return fileName; } + }else { mothurOut("cannot resolve path for " + fileName + "\n"); return fileName; } } for (int i = index; i >= 0; i--) { - newFileName = dirs[i] + "\\" + newFileName; + newFileName = dirs[i] + "\\\\" + newFileName; } return newFileName; @@ -734,7 +959,7 @@ int MothurOut::openInputFile(string fileName, ifstream& fileHandle, string m){ try { //get full path name string completeFileName = getFullPathName(fileName); -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) #ifdef USE_COMPRESSION // check for gzipped or bzipped file if (endsWith(completeFileName, ".gz") || endsWith(completeFileName, ".bz2")) { @@ -749,7 +974,7 @@ int MothurOut::openInputFile(string fileName, ifstream& fileHandle, string m){ cerr << "Decompressing " << completeFileName << " via temporary named pipe " << tempName << "\n"; system(command.c_str()); cerr << "Done decompressing " << completeFileName << "\n"; - remove(tempName.c_str()); + mothurRemove(tempName); exit(EXIT_SUCCESS); } else { cerr << "waiting on child process " << fork_result << "\n"; @@ -780,7 +1005,7 @@ int MothurOut::openInputFile(string fileName, ifstream& fileHandle){ //get full path name string completeFileName = getFullPathName(fileName); -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) #ifdef USE_COMPRESSION // check for gzipped or bzipped file if (endsWith(completeFileName, ".gz") || endsWith(completeFileName, ".bz2")) { @@ -795,7 +1020,7 @@ int MothurOut::openInputFile(string fileName, ifstream& fileHandle){ cerr << "Decompressing " << completeFileName << " via temporary named pipe " << tempName << "\n"; system(command.c_str()); cerr << "Done decompressing " << completeFileName << "\n"; - remove(tempName.c_str()); + mothurRemove(tempName); exit(EXIT_SUCCESS); } else { cerr << "waiting on child process " << fork_result << "\n"; @@ -827,12 +1052,15 @@ int MothurOut::openInputFile(string fileName, ifstream& fileHandle){ int MothurOut::renameFile(string oldName, string newName){ try { + + if (oldName == newName) { return 0; } + ifstream inTest; int exist = openInputFile(newName, inTest, ""); + inTest.close(); - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) if (exist == 0) { //you could open it so you want to delete it - inTest.close(); string command = "rm " + newName; system(command.c_str()); } @@ -840,7 +1068,7 @@ int MothurOut::renameFile(string oldName, string newName){ string command = "mv " + oldName + " " + newName; system(command.c_str()); #else - remove(newName.c_str()); + mothurRemove(newName); int renameOk = rename(oldName.c_str(), newName.c_str()); #endif return 0; @@ -858,7 +1086,7 @@ int MothurOut::openOutputFile(string fileName, ofstream& fileHandle){ try { string completeFileName = getFullPathName(fileName); -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) #ifdef USE_COMPRESSION // check for gzipped file if (endsWith(completeFileName, ".gz") || endsWith(completeFileName, ".bz2")) { @@ -896,7 +1124,7 @@ int MothurOut::openOutputFile(string fileName, ofstream& fileHandle){ } /**************************************************************************************************/ -void MothurOut::appendFiles(string temp, string filename) { +int MothurOut::appendFiles(string temp, string filename) { try{ ofstream output; ifstream input; @@ -906,15 +1134,22 @@ void MothurOut::appendFiles(string temp, string filename) { int ableToOpen = openInputFile(temp, input, "no error"); //int ableToOpen = openInputFile(temp, input); + int numLines = 0; if (ableToOpen == 0) { //you opened it - while(char c = input.get()){ - if(input.eof()) { break; } - else { output << c; } - } + + char buffer[4096]; + while (!input.eof()) { + input.read(buffer, 4096); + output.write(buffer, input.gcount()); + //count number of lines + for (int i = 0; i < input.gcount(); i++) { if (buffer[i] == '\n') {numLines++;} } + } input.close(); } output.close(); + + return numLines; } catch(exception& e) { errorOut(e, "MothurOut", "appendFiles"); @@ -931,7 +1166,7 @@ string MothurOut::sortFile(string distFile, string outputDir){ //if you can, use the unix sort since its been optimized for years - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) string command = "sort -n -k +3 " + distFile + " -o " + outfile; system(command.c_str()); #else //you are stuck with my best attempt... @@ -948,7 +1183,7 @@ string MothurOut::sortFile(string distFile, string outputDir){ string firstName, secondName; float dist; - while (input) { + while (!input.eof()) { input >> firstName >> secondName >> dist; output << dist << '\t' << firstName << '\t' << secondName << endl; gobble(input); @@ -964,20 +1199,21 @@ string MothurOut::sortFile(string distFile, string outputDir){ //read in sorted file and put distance at end again ifstream input2; + ofstream output2; openInputFile(tempOutfile, input2); - openOutputFile(outfile, output); + openOutputFile(outfile, output2); - while (input2) { + while (!input2.eof()) { input2 >> dist >> firstName >> secondName; - output << firstName << '\t' << secondName << '\t' << dist << endl; + output2 << firstName << '\t' << secondName << '\t' << dist << endl; gobble(input2); } input2.close(); - output.close(); + output2.close(); //remove temp files - remove(tempDistFile.c_str()); - remove(tempOutfile.c_str()); + mothurRemove(tempDistFile); + mothurRemove(tempOutfile); #endif return outfile; @@ -988,19 +1224,27 @@ string MothurOut::sortFile(string distFile, string outputDir){ } } /**************************************************************************************************/ -vector MothurOut::setFilePosFasta(string filename, int& num) { +vector MothurOut::setFilePosFasta(string filename, int& num) { try { - vector positions; + vector positions; ifstream inFASTA; - openInputFile(filename, inFASTA); + //openInputFile(filename, inFASTA); + inFASTA.open(filename.c_str(), ios::binary); string input; + unsigned long long count = 0; while(!inFASTA.eof()){ - input = getline(inFASTA); - if (input.length() != 0) { - if(input[0] == '>'){ unsigned long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); } + //input = getline(inFASTA); + //cout << input << '\t' << inFASTA.tellg() << endl; + //if (input.length() != 0) { + // if(input[0] == '>'){ unsigned long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); cout << (pos - input.length() - 1) << endl; } + //} + //gobble(inFASTA); //has to be here since windows line endings are 2 characters and mess up the positions + char c = inFASTA.get(); count++; + if (c == '>') { + positions.push_back(count-1); + //cout << count << endl; } - gobble(inFASTA); //has to be here since windows line endings are 2 characters and mess up the positions } inFASTA.close(); @@ -1018,7 +1262,7 @@ vector MothurOut::setFilePosFasta(string filename, int& num) fclose (pFile); }*/ - unsigned long int size = positions[(positions.size()-1)]; + unsigned long long size = positions[(positions.size()-1)]; ifstream in; openInputFile(filename, in); @@ -1031,6 +1275,7 @@ vector MothurOut::setFilePosFasta(string filename, int& num) in.close(); positions.push_back(size); + positions[0] = 0; return positions; } @@ -1040,31 +1285,41 @@ vector MothurOut::setFilePosFasta(string filename, int& num) } } /**************************************************************************************************/ -vector MothurOut::setFilePosEachLine(string filename, int& num) { +vector MothurOut::setFilePosEachLine(string filename, int& num) { try { filename = getFullPathName(filename); - vector positions; + vector positions; ifstream in; - openInputFile(filename, in); - + //openInputFile(filename, in); + in.open(filename.c_str(), ios::binary); + string input; + unsigned long long count = 0; + positions.push_back(0); + while(!in.eof()){ - unsigned long int lastpos = in.tellg(); - input = getline(in); - if (input.length() != 0) { - unsigned long int pos = in.tellg(); - if (pos != -1) { positions.push_back(pos - input.length() - 1); } - else { positions.push_back(lastpos); } + //getline counting reads + char d = in.get(); count++; + while ((d != '\n') && (d != '\r') && (d != '\f') && (d != in.eof())) { + //get next character + d = in.get(); + count++; } - gobble(in); //has to be here since windows line endings are 2 characters and mess up the positions + + if (!in.eof()) { + d=in.get(); count++; + while(isspace(d) && (d != in.eof())) { d=in.get(); count++;} + } + positions.push_back(count-1); + //cout << count-1 << endl; } in.close(); - num = positions.size(); + num = positions.size()-1; FILE * pFile; - unsigned long int size; + unsigned long long size; //get num bytes in file pFile = fopen (filename.c_str(),"rb"); @@ -1075,7 +1330,7 @@ vector MothurOut::setFilePosEachLine(string filename, int& nu fclose (pFile); } - positions.push_back(size); + positions[(positions.size()-1)] = size; return positions; } @@ -1086,17 +1341,16 @@ vector MothurOut::setFilePosEachLine(string filename, int& nu } /**************************************************************************************************/ -vector MothurOut::divideFile(string filename, int& proc) { +vector MothurOut::divideFile(string filename, int& proc) { try{ - - vector filePos; + vector filePos; filePos.push_back(0); FILE * pFile; - unsigned long int size; + unsigned long long size; filename = getFullPathName(filename); - + //get num bytes in file pFile = fopen (filename.c_str(),"rb"); if (pFile==NULL) perror ("Error opening file"); @@ -1105,9 +1359,11 @@ vector MothurOut::divideFile(string filename, int& proc) { size=ftell (pFile); fclose (pFile); } - + + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) + //estimate file breaks - unsigned long int chunkSize = 0; + unsigned long long chunkSize = 0; chunkSize = size / proc; //file to small to divide by processors @@ -1115,21 +1371,24 @@ vector MothurOut::divideFile(string filename, int& proc) { //for each process seekg to closest file break and search for next '>' char. make that the filebreak for (int i = 0; i < proc; i++) { - unsigned long int spot = (i+1) * chunkSize; + unsigned long long spot = (i+1) * chunkSize; ifstream in; openInputFile(filename, in); in.seekg(spot); //look for next '>' - unsigned long int newSpot = spot; + unsigned long long newSpot = spot; while (!in.eof()) { char c = in.get(); + if (c == '>') { in.putback(c); newSpot = in.tellg(); break; } + else if (int(c) == -1) { break; } + } //there was not another sequence before the end of the file - unsigned long int sanityPos = in.tellg(); + unsigned long long sanityPos = in.tellg(); if (sanityPos == -1) { break; } else { filePos.push_back(newSpot); } @@ -1139,14 +1398,18 @@ vector MothurOut::divideFile(string filename, int& proc) { //save end pos filePos.push_back(size); - + //sanity check filePos for (int i = 0; i < (filePos.size()-1); i++) { if (filePos[(i+1)] <= filePos[i]) { filePos.erase(filePos.begin()+(i+1)); i--; } } proc = (filePos.size() - 1); - +#else + mothurOut("[ERROR]: Windows version should not be calling the divideFile function."); mothurOutEndLine(); + proc=1; + filePos.push_back(size); +#endif return filePos; } catch(exception& e) { @@ -1154,7 +1417,43 @@ vector MothurOut::divideFile(string filename, int& proc) { exit(1); } } - +/**************************************************************************************************/ +int MothurOut::divideFile(string filename, int& proc, vector& files) { + try{ + + vector filePos = divideFile(filename, proc); + + for (int i = 0; i < (filePos.size()-1); i++) { + + //read file chunk + ifstream in; + openInputFile(filename, in); + in.seekg(filePos[i]); + unsigned long long size = filePos[(i+1)] - filePos[i]; + char* chunk = new char[size]; + in.read(chunk, size); + in.close(); + + //open new file + string fileChunkName = filename + "." + toString(i) + ".tmp"; + ofstream out; + openOutputFile(fileChunkName, out); + + out << chunk << endl; + out.close(); + delete[] chunk; + + //save name + files.push_back(fileChunkName); + } + + return 0; + } + catch(exception& e) { + errorOut(e, "MothurOut", "divideFile"); + exit(1); + } +} /***********************************************************************/ bool MothurOut::isTrue(string f){ @@ -1193,65 +1492,226 @@ float MothurOut::ceilDist(float dist, int precision){ exit(1); } } -/**********************************************************************************************************************/ -map MothurOut::readNames(string namefile) { +/***********************************************************************/ + +vector MothurOut::splitWhiteSpace(string& rest, char buffer[], int size){ try { - - map nameMap; - - //open input file + vector pieces; + + for (int i = 0; i < size; i++) { + if (!isspace(buffer[i])) { rest += buffer[i]; } + else { + if (rest != "") { pieces.push_back(rest); rest = ""; } + while (i < size) { //gobble white space + if (isspace(buffer[i])) { i++; } + else { rest = buffer[i]; break; } //cout << "next piece buffer = " << nextPiece << endl; + } + } + } + + return pieces; + } + catch(exception& e) { + errorOut(e, "MothurOut", "splitWhiteSpace"); + exit(1); + } +} +/***********************************************************************/ +vector MothurOut::splitWhiteSpace(string input){ + try { + vector pieces; + string rest = ""; + + for (int i = 0; i < input.length(); i++) { + if (!isspace(input[i])) { rest += input[i]; } + else { + if (rest != "") { pieces.push_back(rest); rest = ""; } + while (i < input.length()) { //gobble white space + if (isspace(input[i])) { i++; } + else { rest = input[i]; break; } //cout << "next piece buffer = " << nextPiece << endl; + } + } + } + + if (rest != "") { pieces.push_back(rest); } + + return pieces; + } + catch(exception& e) { + errorOut(e, "MothurOut", "splitWhiteSpace"); + exit(1); + } +} +/***********************************************************************/ +vector MothurOut::splitWhiteSpaceWithQuotes(string input){ + try { + vector pieces; + string rest = ""; + + int pos = input.find('\''); + int pos2 = input.find('\"'); + + if ((pos == string::npos) && (pos2 == string::npos)) { return splitWhiteSpace(input); } //no quotes to worry about + else { + for (int i = 0; i < input.length(); i++) { + if ((input[i] == '\'') || (input[i] == '\"') || (rest == "\'") || (rest == "\"")) { //grab everything til end or next ' or " + rest += input[i]; + for (int j = i+1; j < input.length(); j++) { + if ((input[j] == '\'') || (input[j] == '\"')) { //then quit + rest += input[j]; + i = j+1; + j+=input.length(); + }else { rest += input[j]; } + } + }else if (!isspace(input[i])) { rest += input[i]; } + else { + if (rest != "") { pieces.push_back(rest); rest = ""; } + while (i < input.length()) { //gobble white space + if (isspace(input[i])) { i++; } + else { rest = input[i]; break; } //cout << "next piece buffer = " << nextPiece << endl; + } + } + } + + if (rest != "") { pieces.push_back(rest); } + } + return pieces; + } + catch(exception& e) { + errorOut(e, "MothurOut", "splitWhiteSpace"); + exit(1); + } +} +//********************************************************************************************************************** +int MothurOut::readTax(string namefile, map& taxMap) { + try { + //open input file ifstream in; openInputFile(namefile, in); - + + string rest = ""; + char buffer[4096]; + bool pairDone = false; + bool columnOne = true; + string firstCol, secondCol; + while (!in.eof()) { if (control_pressed) { break; } - string firstCol, secondCol; - in >> firstCol >> secondCol; gobble(in); - - int num = getNumNames(secondCol); - - nameMap[firstCol] = num; + in.read(buffer, 4096); + vector pieces = splitWhiteSpace(rest, buffer, in.gcount()); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { firstCol = pieces[i]; columnOne=false; } + else { secondCol = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + //are there confidence scores, if so remove them + if (secondCol.find_first_of('(') != -1) { removeConfidences(secondCol); } + map::iterator itTax = taxMap.find(firstCol); + + if(itTax == taxMap.end()) { + bool ignore = false; + if (secondCol != "") { if (secondCol[secondCol.length()-1] != ';') { mothurOut("[ERROR]: " + firstCol + " is missing the final ';', ignoring.\n"); ignore=true; } + } + if (!ignore) { taxMap[firstCol] = secondCol; } + if (debug) { mothurOut("[DEBUG]: name = '" + firstCol + "' tax = '" + secondCol + "'\n"); } + }else { + mothurOut("[ERROR]: " + firstCol + " is already in your taxonomy file, names must be unique./n"); control_pressed = true; + } + pairDone = false; + } + } } in.close(); + + if (rest != "") { + vector pieces = splitWhiteSpace(rest); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { firstCol = pieces[i]; columnOne=false; } + else { secondCol = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + //are there confidence scores, if so remove them + if (secondCol.find_first_of('(') != -1) { removeConfidences(secondCol); } + map::iterator itTax = taxMap.find(firstCol); + + if(itTax == taxMap.end()) { + bool ignore = false; + if (secondCol != "") { if (secondCol[secondCol.length()-1] != ';') { mothurOut("[ERROR]: " + firstCol + " is missing the final ';', ignoring.\n"); ignore=true; } + } + if (!ignore) { taxMap[firstCol] = secondCol; } + if (debug) { mothurOut("[DEBUG]: name = '" + firstCol + "' tax = '" + secondCol + "'\n"); } + }else { + mothurOut("[ERROR]: " + firstCol + " is already in your taxonomy file, names must be unique./n"); control_pressed = true; + } + + pairDone = false; + } + } + } - return nameMap; - + return taxMap.size(); + } catch(exception& e) { - errorOut(e, "MothurOut", "readNames"); + errorOut(e, "MothurOut", "readTax"); exit(1); } } /**********************************************************************************************************************/ -int MothurOut::readNames(string namefile, vector& nameVector, map& fastamap) { +int MothurOut::readNames(string namefile, map& nameMap, bool redund) { try { - int error = 0; - //open input file ifstream in; openInputFile(namefile, in); - + + string rest = ""; + char buffer[4096]; + bool pairDone = false; + bool columnOne = true; + string firstCol, secondCol; + while (!in.eof()) { if (control_pressed) { break; } - string firstCol, secondCol; - in >> firstCol >> secondCol; gobble(in); - - int num = getNumNames(secondCol); - - map::iterator it = fastamap.find(firstCol); - if (it == fastamap.end()) { - error = 1; - mothurOut("[ERROR]: " + firstCol + " is not in your fastafile, but is in your namesfile, please correct."); mothurOutEndLine(); - }else { - seqPriorityNode temp(num, it->second, firstCol); - nameVector.push_back(temp); - } + in.read(buffer, 4096); + vector pieces = splitWhiteSpace(rest, buffer, in.gcount()); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { firstCol = pieces[i]; columnOne=false; } + else { secondCol = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + //parse names into vector + vector theseNames; + splitAtComma(secondCol, theseNames); + for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = firstCol; } + pairDone = false; + } + } } in.close(); + + if (rest != "") { + vector pieces = splitWhiteSpace(rest); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { firstCol = pieces[i]; columnOne=false; } + else { secondCol = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + //parse names into vector + vector theseNames; + splitAtComma(secondCol, theseNames); + for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = firstCol; } + pairDone = false; + } + } + } - return error; + return nameMap.size(); } catch(exception& e) { @@ -1259,77 +1719,652 @@ int MothurOut::readNames(string namefile, vector& nameVector, m exit(1); } } - -/***********************************************************************/ - -int MothurOut::getNumNames(string names){ +/**********************************************************************************************************************/ +int MothurOut::readNames(string namefile, map& nameMap, int flip) { try { - int count = 0; - - if(names != ""){ - count = 1; - for(int i=0;i pieces = splitWhiteSpace(rest, buffer, in.gcount()); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { firstCol = pieces[i]; columnOne=false; } + else { secondCol = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + nameMap[secondCol] = firstCol; + pairDone = false; + } + } } + in.close(); + + if (rest != "") { + vector pieces = splitWhiteSpace(rest); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { firstCol = pieces[i]; columnOne=false; } + else { secondCol = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + nameMap[secondCol] = firstCol; + pairDone = false; + } + } + } + + return nameMap.size(); - return count; } catch(exception& e) { - errorOut(e, "MothurOut", "getNumNames"); + errorOut(e, "MothurOut", "readNames"); exit(1); } } - -/**************************************************************************************************/ - -vector > MothurOut::binomial(int maxOrder){ +/**********************************************************************************************************************/ +int MothurOut::readNames(string namefile, map& nameMap, map& nameCount) { try { - vector > binomial(maxOrder+1); - - for(int i=0;i<=maxOrder;i++){ - binomial[i].resize(maxOrder+1); - binomial[i][0]=1; - binomial[0][i]=0; - } - binomial[0][0]=1; - - binomial[1][0]=1; - binomial[1][1]=1; - - for(int i=2;i<=maxOrder;i++){ - binomial[1][i]=0; - } - - for(int i=2;i<=maxOrder;i++){ - for(int j=1;j<=maxOrder;j++){ - if(i==j){ binomial[i][j]=1; } - if(j>i) { binomial[i][j]=0; } - else { binomial[i][j]=binomial[i-1][j-1]+binomial[i-1][j]; } + nameMap.clear(); nameCount.clear(); + //open input file + ifstream in; + openInputFile(namefile, in); + + string rest = ""; + char buffer[4096]; + bool pairDone = false; + bool columnOne = true; + string firstCol, secondCol; + + while (!in.eof()) { + if (control_pressed) { break; } + + in.read(buffer, 4096); + vector pieces = splitWhiteSpace(rest, buffer, in.gcount()); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { firstCol = pieces[i]; columnOne=false; } + else { secondCol = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + //parse names into vector + vector theseNames; + splitAtComma(secondCol, theseNames); + for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = firstCol; } + nameCount[firstCol] = theseNames.size(); + pairDone = false; + } + } } - } - - return binomial; - + in.close(); + + if (rest != "") { + vector pieces = splitWhiteSpace(rest); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { firstCol = pieces[i]; columnOne=false; } + else { secondCol = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + //parse names into vector + vector theseNames; + splitAtComma(secondCol, theseNames); + for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = firstCol; } + nameCount[firstCol] = theseNames.size(); + pairDone = false; + } + } + + } + return nameMap.size(); + } catch(exception& e) { - errorOut(e, "MothurOut", "binomial"); + errorOut(e, "MothurOut", "readNames"); exit(1); } } -/**************************************************************************************************/ -unsigned int MothurOut::fromBase36(string base36){ +/**********************************************************************************************************************/ +int MothurOut::readNames(string namefile, map& nameMap) { try { - unsigned int num = 0; - - map converts; - converts['A'] = 0; - converts['a'] = 0; - converts['B'] = 1; - converts['b'] = 1; - converts['C'] = 2; + //open input file + ifstream in; + openInputFile(namefile, in); + + string rest = ""; + char buffer[4096]; + bool pairDone = false; + bool columnOne = true; + string firstCol, secondCol; + + while (!in.eof()) { + if (control_pressed) { break; } + + in.read(buffer, 4096); + vector pieces = splitWhiteSpace(rest, buffer, in.gcount()); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { firstCol = pieces[i]; columnOne=false; } + else { secondCol = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { nameMap[firstCol] = secondCol; pairDone = false; } + } + } + in.close(); + + if (rest != "") { + vector pieces = splitWhiteSpace(rest); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { firstCol = pieces[i]; columnOne=false; } + else { secondCol = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { nameMap[firstCol] = secondCol; pairDone = false; } + } + } + + return nameMap.size(); + + } + catch(exception& e) { + errorOut(e, "MothurOut", "readNames"); + exit(1); + } +} +/**********************************************************************************************************************/ +int MothurOut::readNames(string namefile, map >& nameMap) { + try { + //open input file + ifstream in; + openInputFile(namefile, in); + + string rest = ""; + char buffer[4096]; + bool pairDone = false; + bool columnOne = true; + string firstCol, secondCol; + + while (!in.eof()) { + if (control_pressed) { break; } + + in.read(buffer, 4096); + vector pieces = splitWhiteSpace(rest, buffer, in.gcount()); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { firstCol = pieces[i]; columnOne=false; } + else { secondCol = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + vector temp; + splitAtComma(secondCol, temp); + nameMap[firstCol] = temp; + pairDone = false; + } + } + } + in.close(); + + if (rest != "") { + vector pieces = splitWhiteSpace(rest); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { firstCol = pieces[i]; columnOne=false; } + else { secondCol = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + vector temp; + splitAtComma(secondCol, temp); + nameMap[firstCol] = temp; + pairDone = false; + } + } + } + + return nameMap.size(); + } + catch(exception& e) { + errorOut(e, "MothurOut", "readNames"); + exit(1); + } +} +/**********************************************************************************************************************/ +map MothurOut::readNames(string namefile) { + try { + map nameMap; + + //open input file + ifstream in; + openInputFile(namefile, in); + + string rest = ""; + char buffer[4096]; + bool pairDone = false; + bool columnOne = true; + string firstCol, secondCol; + + while (!in.eof()) { + if (control_pressed) { break; } + + in.read(buffer, 4096); + vector pieces = splitWhiteSpace(rest, buffer, in.gcount()); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { firstCol = pieces[i]; columnOne=false; } + else { secondCol = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + int num = getNumNames(secondCol); + nameMap[firstCol] = num; + pairDone = false; + } + } + } + in.close(); + + if (rest != "") { + vector pieces = splitWhiteSpace(rest); + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { firstCol = pieces[i]; columnOne=false; } + else { secondCol = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + int num = getNumNames(secondCol); + nameMap[firstCol] = num; + pairDone = false; + } + } + } + + return nameMap; + + } + catch(exception& e) { + errorOut(e, "MothurOut", "readNames"); + exit(1); + } +} +/**********************************************************************************************************************/ +int MothurOut::readNames(string namefile, vector& nameVector, map& fastamap) { + try { + int error = 0; + + //open input file + ifstream in; + openInputFile(namefile, in); + + string rest = ""; + char buffer[4096]; + bool pairDone = false; + bool columnOne = true; + string firstCol, secondCol; + + while (!in.eof()) { + if (control_pressed) { break; } + + in.read(buffer, 4096); + vector pieces = splitWhiteSpace(rest, buffer, in.gcount()); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { firstCol = pieces[i]; columnOne=false; } + else { secondCol = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + int num = getNumNames(secondCol); + + map::iterator it = fastamap.find(firstCol); + if (it == fastamap.end()) { + error = 1; + mothurOut("[ERROR]: " + firstCol + " is not in your fastafile, but is in your namesfile, please correct."); mothurOutEndLine(); + }else { + seqPriorityNode temp(num, it->second, firstCol); + nameVector.push_back(temp); + } + + pairDone = false; + } + } + } + in.close(); + + if (rest != "") { + vector pieces = splitWhiteSpace(rest); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { firstCol = pieces[i]; columnOne=false; } + else { secondCol = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + int num = getNumNames(secondCol); + + map::iterator it = fastamap.find(firstCol); + if (it == fastamap.end()) { + error = 1; + mothurOut("[ERROR]: " + firstCol + " is not in your fastafile, but is in your namesfile, please correct."); mothurOutEndLine(); + }else { + seqPriorityNode temp(num, it->second, firstCol); + nameVector.push_back(temp); + } + + pairDone = false; + } + } + } + return error; + } + catch(exception& e) { + errorOut(e, "MothurOut", "readNames"); + exit(1); + } +} +//********************************************************************************************************************** +set MothurOut::readAccnos(string accnosfile){ + try { + set names; + ifstream in; + openInputFile(accnosfile, in); + string name; + + string rest = ""; + char buffer[4096]; + + while (!in.eof()) { + if (control_pressed) { break; } + + in.read(buffer, 4096); + vector pieces = splitWhiteSpace(rest, buffer, in.gcount()); + + for (int i = 0; i < pieces.size(); i++) { names.insert(pieces[i]); } + } + in.close(); + + if (rest != "") { + vector pieces = splitWhiteSpace(rest); + for (int i = 0; i < pieces.size(); i++) { names.insert(pieces[i]); } + } + return names; + } + catch(exception& e) { + errorOut(e, "MothurOut", "readAccnos"); + exit(1); + } +} +//********************************************************************************************************************** +int MothurOut::readAccnos(string accnosfile, vector& names){ + try { + names.clear(); + ifstream in; + openInputFile(accnosfile, in); + string name; + + string rest = ""; + char buffer[4096]; + + while (!in.eof()) { + if (control_pressed) { break; } + + in.read(buffer, 4096); + vector pieces = splitWhiteSpace(rest, buffer, in.gcount()); + + for (int i = 0; i < pieces.size(); i++) { names.push_back(pieces[i]); } + } + in.close(); + + if (rest != "") { + vector pieces = splitWhiteSpace(rest); + for (int i = 0; i < pieces.size(); i++) { names.push_back(pieces[i]); } + } + + return 0; + } + catch(exception& e) { + errorOut(e, "MothurOut", "readAccnos"); + exit(1); + } +} +/***********************************************************************/ + +int MothurOut::getNumNames(string names){ + try { + int count = 0; + + if(names != ""){ + count = 1; + for(int i=0;i bigset, vector subset) { + try { + + + if (subset.size() > bigset.size()) { return false; } + + //check if each guy in suset is also in bigset + for (int i = 0; i < subset.size(); i++) { + bool match = false; + for (int j = 0; j < bigset.size(); j++) { + if (subset[i] == bigset[j]) { match = true; break; } + } + + //you have a guy in subset that had no match in bigset + if (match == false) { return false; } + } + + return true; + + } + catch(exception& e) { + errorOut(e, "MothurOut", "isSubset"); + exit(1); + } +} +/***********************************************************************/ +int MothurOut::mothurRemove(string filename){ + try { + filename = getFullPathName(filename); + int error = remove(filename.c_str()); + //if (error != 0) { + // if (errno != ENOENT) { //ENOENT == file does not exist + // string message = "Error deleting file " + filename; + // perror(message.c_str()); + // } + //} + return error; + } + catch(exception& e) { + errorOut(e, "MothurOut", "mothurRemove"); + exit(1); + } +} +/***********************************************************************/ +bool MothurOut::mothurConvert(string item, int& num){ + try { + bool error = false; + + if (isNumeric1(item)) { + convert(item, num); + }else { + num = 0; + error = true; + mothurOut("[ERROR]: cannot convert " + item + " to an integer."); mothurOutEndLine(); + commandInputsConvertError = true; + } + + return error; + } + catch(exception& e) { + errorOut(e, "MothurOut", "mothurConvert"); + exit(1); + } +} +/***********************************************************************/ +bool MothurOut::mothurConvert(string item, intDist& num){ + try { + bool error = false; + + if (isNumeric1(item)) { + convert(item, num); + }else { + num = 0; + error = true; + mothurOut("[ERROR]: cannot convert " + item + " to an integer."); mothurOutEndLine(); + commandInputsConvertError = true; + } + + return error; + } + catch(exception& e) { + errorOut(e, "MothurOut", "mothurConvert"); + exit(1); + } +} + +/***********************************************************************/ +bool MothurOut::isNumeric1(string stringToCheck){ + try { + bool numeric = false; + + if(stringToCheck.find_first_not_of("0123456789.-") == string::npos) { numeric = true; } + + return numeric; + } + catch(exception& e) { + errorOut(e, "MothurOut", "isNumeric1"); + exit(1); + } + +} +/***********************************************************************/ +bool MothurOut::mothurConvert(string item, float& num){ + try { + bool error = false; + + if (isNumeric1(item)) { + convert(item, num); + }else { + num = 0; + error = true; + mothurOut("[ERROR]: cannot convert " + item + " to a float."); mothurOutEndLine(); + commandInputsConvertError = true; + } + + return error; + } + catch(exception& e) { + errorOut(e, "MothurOut", "mothurConvert"); + exit(1); + } +} +/***********************************************************************/ +bool MothurOut::mothurConvert(string item, double& num){ + try { + bool error = false; + + if (isNumeric1(item)) { + convert(item, num); + }else { + num = 0; + error = true; + mothurOut("[ERROR]: cannot convert " + item + " to a double."); mothurOutEndLine(); + commandInputsConvertError = true; + } + + return error; + } + catch(exception& e) { + errorOut(e, "MothurOut", "mothurConvert"); + exit(1); + } +} +/**************************************************************************************************/ + +vector > MothurOut::binomial(int maxOrder){ + try { + vector > binomial(maxOrder+1); + + for(int i=0;i<=maxOrder;i++){ + binomial[i].resize(maxOrder+1); + binomial[i][0]=1; + binomial[0][i]=0; + } + binomial[0][0]=1; + + binomial[1][0]=1; + binomial[1][1]=1; + + for(int i=2;i<=maxOrder;i++){ + binomial[1][i]=0; + } + + for(int i=2;i<=maxOrder;i++){ + for(int j=1;j<=maxOrder;j++){ + if(i==j){ binomial[i][j]=1; } + if(j>i) { binomial[i][j]=0; } + else { binomial[i][j]=binomial[i-1][j-1]+binomial[i-1][j]; } + } + } + + return binomial; + + } + catch(exception& e) { + errorOut(e, "MothurOut", "binomial"); + exit(1); + } +} +/**************************************************************************************************/ +unsigned int MothurOut::fromBase36(string base36){ + try { + unsigned int num = 0; + + map converts; + converts['A'] = 0; + converts['a'] = 0; + converts['B'] = 1; + converts['b'] = 1; + converts['C'] = 2; converts['c'] = 2; converts['D'] = 3; converts['d'] = 3; @@ -1455,6 +2490,9 @@ void MothurOut::getNumSeqs(ifstream& file, int& numSeqs){ //This function parses the estimator options and puts them in a vector void MothurOut::splitAtChar(string& estim, vector& container, char symbol) { try { + + if (symbol == '-') { splitAtDash(estim, container); return; } + string individual = ""; int estimLength = estim.size(); for(int i=0;i& container) { try { string individual = ""; int estimLength = estim.size(); + bool prevEscape = false; for(int i=0;i& container) { try { string individual = ""; int estimLength = estim.size(); + bool prevEscape = false; for(int i=0;i& container) { //This function parses the line options and puts them in a set void MothurOut::splitAtDash(string& estim, set& container) { try { - string individual; + string individual = ""; int lineNum; - - while (estim.find_first_of('-') != -1) { - individual = estim.substr(0,estim.find_first_of('-')); - if ((estim.find_first_of('-')+1) <= estim.length()) { //checks to make sure you don't have dash at end of string - estim = estim.substr(estim.find_first_of('-')+1, estim.length()); - convert(individual, lineNum); //convert the string to int - container.insert(lineNum); + int estimLength = estim.size(); + bool prevEscape = false; + for(int i=0;i& container) { exit(1); } } +/***********************************************************************/ +string MothurOut::makeList(vector& names) { + try { + string list = ""; + + if (names.size() == 0) { return list; } + + for (int i = 0; i < names.size()-1; i++) { list += names[i] + ","; } + + //get last name + list += names[names.size()-1]; + + return list; + } + catch(exception& e) { + errorOut(e, "MothurOut", "makeList"); + exit(1); + } +} + /***********************************************************************/ //This function parses the a string and puts peices in a vector void MothurOut::splitAtComma(string& estim, vector& container) { @@ -1608,6 +2678,25 @@ void MothurOut::splitAtComma(string& estim, vector& container) { exit(1); } } +/***********************************************************************/ +//This function splits up the various option parameters +void MothurOut::splitAtChar(string& prefix, string& suffix, char c){ + try { + prefix = suffix.substr(0,suffix.find_first_of(c)); + if ((suffix.find_first_of(c)+2) <= suffix.length()) { //checks to make sure you don't have comma at end of string + suffix = suffix.substr(suffix.find_first_of(c)+1, suffix.length()); + string space = " "; + while(suffix.at(0) == ' ') + suffix = suffix.substr(1, suffix.length()); + } + + } + catch(exception& e) { + errorOut(e, "MothurOut", "splitAtComma"); + exit(1); + } +} + /***********************************************************************/ //This function splits up the various option parameters @@ -1662,6 +2751,35 @@ bool MothurOut::inUsersGroups(string groupname, vector Groups) { exit(1); } } +/**************************************************************************************************/ + +bool MothurOut::inUsersGroups(vector set, vector< vector > sets) { + try { + for (int i = 0; i < sets.size(); i++) { + if (set == sets[i]) { return true; } + } + return false; + } + catch(exception& e) { + errorOut(e, "MothurOut", "inUsersGroups"); + exit(1); + } +} +/**************************************************************************************************/ + +bool MothurOut::inUsersGroups(int groupname, vector Groups) { + try { + for (int i = 0; i < Groups.size(); i++) { + if (groupname == Groups[i]) { return true; } + } + return false; + } + catch(exception& e) { + errorOut(e, "MothurOut", "inUsersGroups"); + exit(1); + } +} + /**************************************************************************************************/ //returns true if any of the strings in first vector are in second vector bool MothurOut::inUsersGroups(vector groupnames, vector Groups) { @@ -1824,8 +2942,93 @@ bool MothurOut::isContainingOnlyDigits(string input) { } } /**************************************************************************************************/ - - +int MothurOut::removeConfidences(string& tax) { + try { + + string taxon; + string newTax = ""; + + while (tax.find_first_of(';') != -1) { + + if (control_pressed) { return 0; } + + //get taxon + taxon = tax.substr(0,tax.find_first_of(';')); + + int pos = taxon.find_last_of('('); + if (pos != -1) { + //is it a number? + int pos2 = taxon.find_last_of(')'); + if (pos2 != -1) { + string confidenceScore = taxon.substr(pos+1, (pos2-(pos+1))); + if (isNumeric1(confidenceScore)) { + taxon = taxon.substr(0, pos); //rip off confidence + } + } + } + taxon += ";"; + + tax = tax.substr(tax.find_first_of(';')+1, tax.length()); + newTax += taxon; + } + + tax = newTax; + + return 0; + } + catch(exception& e) { + errorOut(e, "MothurOut", "removeConfidences"); + exit(1); + } +} +/**************************************************************************************************/ +string MothurOut::removeQuotes(string tax) { + try { + + string taxon; + string newTax = ""; + + for (int i = 0; i < tax.length(); i++) { + + if (control_pressed) { return newTax; } + + if ((tax[i] != '\'') && (tax[i] != '\"')) { newTax += tax[i]; } + + } + + return newTax; + } + catch(exception& e) { + errorOut(e, "MothurOut", "removeQuotes"); + exit(1); + } +} +/**************************************************************************************************/ +// function for calculating standard deviation +double MothurOut::getStandardDeviation(vector& featureVector){ + try { + //finds sum + double average = 0; + for (int i = 0; i < featureVector.size(); i++) { average += featureVector[i]; } + average /= (double) featureVector.size(); + + //find standard deviation + double stdDev = 0; + for (int i = 0; i < featureVector.size(); i++) { //compute the difference of each dist from the mean, and square the result of each + stdDev += ((featureVector[i] - average) * (featureVector[i] - average)); + } + + stdDev /= (double) featureVector.size(); + stdDev = sqrt(stdDev); + + return stdDev; + } + catch(exception& e) { + errorOut(e, "MothurOut", "getStandardDeviation"); + exit(1); + } +} +/**************************************************************************************************/