X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=mothurout.cpp;h=4df5f96eb6086e8d2e8e96e3f448948540a1c8b4;hb=8da8321bc4d705f6c156248d6229c60a0204f750;hp=7309c174c83027437e67026869f53161101e11bc;hpb=10c8178dc5e3f96ef8e92a986f1eddd13e622173;p=mothur.git diff --git a/mothurout.cpp b/mothurout.cpp index 7309c17..4df5f96 100644 --- a/mothurout.cpp +++ b/mothurout.cpp @@ -39,6 +39,8 @@ void MothurOut::printCurrentFiles() { if (sharedfile != "") { mothurOut("shared=" + sharedfile); mothurOutEndLine(); } if (taxonomyfile != "") { mothurOut("taxonomy=" + taxonomyfile); mothurOutEndLine(); } if (treefile != "") { mothurOut("tree=" + treefile); mothurOutEndLine(); } + if (flowfile != "") { mothurOut("flow=" + flowfile); mothurOutEndLine(); } + if (processors != "1") { mothurOut("processors=" + processors); mothurOutEndLine(); } } catch(exception& e) { @@ -46,6 +48,42 @@ void MothurOut::printCurrentFiles() { exit(1); } } +/*********************************************************************************************/ +bool MothurOut::hasCurrentFiles() { + try { + bool hasCurrent = false; + + if (accnosfile != "") { return true; } + if (columnfile != "") { return true; } + if (designfile != "") { return true; } + if (fastafile != "") { return true; } + if (groupfile != "") { return true; } + if (listfile != "") { return true; } + if (namefile != "") { return true; } + if (oligosfile != "") { return true; } + if (orderfile != "") { return true; } + if (ordergroupfile != "") { return true; } + if (phylipfile != "") { return true; } + if (qualfile != "") { return true; } + if (rabundfile != "") { return true; } + if (relabundfile != "") { return true; } + if (sabundfile != "") { return true; } + if (sfffile != "") { return true; } + if (sharedfile != "") { return true; } + if (taxonomyfile != "") { return true; } + if (treefile != "") { return true; } + if (flowfile != "") { return true; } + if (processors != "1") { return true; } + + return hasCurrent; + + } + catch(exception& e) { + errorOut(e, "MothurOut", "hasCurrentFiles"); + exit(1); + } +} + /*********************************************************************************************/ void MothurOut::clearCurrentFiles() { try { @@ -67,7 +105,9 @@ void MothurOut::clearCurrentFiles() { sfffile = ""; oligosfile = ""; accnosfile = ""; - taxonomyfile = ""; + taxonomyfile = ""; + flowfile = ""; + processors = "1"; } catch(exception& e) { errorOut(e, "MothurOut", "clearCurrentFiles"); @@ -103,7 +143,7 @@ void MothurOut::setDefaultPath(string pathname) { //add / to name if needed string lastChar = pathname.substr(pathname.length()-1); - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) if (lastChar != "/") { pathname += "/"; } #else if (lastChar != "\\") { pathname += "\\"; } @@ -172,8 +212,8 @@ void MothurOut::mothurOut(string output) { if (pid == 0) { //only one process should output to screen #endif - cout << output; out << output; + logger() << output; #ifdef USE_MPI } @@ -194,8 +234,8 @@ void MothurOut::mothurOutEndLine() { if (pid == 0) { //only one process should output to screen #endif - cout << endl; out << endl; + logger() << endl; #ifdef USE_MPI } @@ -207,6 +247,55 @@ void MothurOut::mothurOutEndLine() { } } /*********************************************************************************************/ +void MothurOut::mothurOut(string output, ofstream& outputFile) { + try { + +#ifdef USE_MPI + int pid; + MPI_Comm_rank(MPI_COMM_WORLD, &pid); + + if (pid == 0) { //only one process should output to screen +#endif + + + out << output; + outputFile << output; + logger() << output; + +#ifdef USE_MPI + } +#endif + + } + catch(exception& e) { + errorOut(e, "MothurOut", "MothurOut"); + exit(1); + } +} +/*********************************************************************************************/ +void MothurOut::mothurOutEndLine(ofstream& outputFile) { + try { +#ifdef USE_MPI + int pid; + MPI_Comm_rank(MPI_COMM_WORLD, &pid); + + if (pid == 0) { //only one process should output to screen +#endif + + out << endl; + outputFile << endl; + logger() << endl; + +#ifdef USE_MPI + } +#endif + } + catch(exception& e) { + errorOut(e, "MothurOut", "MothurOutEndLine"); + exit(1); + } +} +/*********************************************************************************************/ void MothurOut::mothurOutJustToLog(string output) { try { #ifdef USE_MPI @@ -245,7 +334,7 @@ void MothurOut::errorOut(exception& e, string object, string function) { // // On failure, returns 0.0, 0.0 int MothurOut::mem_usage(double& vm_usage, double& resident_set) { - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) vm_usage = 0.0; resident_set = 0.0; @@ -347,7 +436,7 @@ void MothurOut::gobble(istream& f){ char d; while(isspace(d=f.get())) { ;} - f.putback(d); + if(!f.eof()) { f.putback(d); } } catch(exception& e) { errorOut(e, "MothurOut", "gobble"); @@ -359,7 +448,7 @@ void MothurOut::gobble(istringstream& f){ try { char d; while(isspace(d=f.get())) {;} - f.putback(d); + if(!f.eof()) { f.putback(d); } } catch(exception& e) { errorOut(e, "MothurOut", "gobble"); @@ -398,12 +487,12 @@ string MothurOut::getline(ifstream& fileHandle) { string line = ""; - while (!fileHandle.eof()) { + while (fileHandle) { //get next character char c = fileHandle.get(); //are you at the end of the line - if ((c == '\n') || (c == '\r') || (c == '\f')){ break; } + if ((c == '\n') || (c == '\r') || (c == '\f') || (c == EOF)){ break; } else { line += c; } } @@ -417,7 +506,7 @@ string MothurOut::getline(ifstream& fileHandle) { } /***********************************************************************/ -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) #ifdef USE_COMPRESSION inline bool endsWith(string s, const char * suffix){ size_t suffixLength = strlen(suffix); @@ -431,7 +520,7 @@ string MothurOut::getRootName(string longName){ string rootName = longName; -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) #ifdef USE_COMPRESSION if (endsWith(rootName, ".gz") || endsWith(rootName, ".bz2")) { int pos = rootName.find_last_of('.'); @@ -475,6 +564,21 @@ string MothurOut::getSimpleName(string longName){ /***********************************************************************/ +int MothurOut::getRandomIndex(int highest){ + try { + + int random = (int) ((float)(highest+1) * (float)(rand()) / ((float)RAND_MAX+1.0)); + + return random; + } + catch(exception& e) { + errorOut(e, "MothurOut", "getRandomIndex"); + exit(1); + } + +} +/**********************************************************************/ + string MothurOut::getPathName(string longName){ try { string rootPathName = longName; @@ -571,7 +675,7 @@ string MothurOut::getFullPathName(string fileName){ string cwd; //get current working directory - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) if (path.find("~") != -1) { //go to home directory string homeDir; @@ -624,7 +728,7 @@ string MothurOut::getFullPathName(string fileName){ }else if (path[(pos-1)] == '/') { //you want the current working dir ./ path = path.substr(0, pos); }else if (pos == 1) { break; //you are at the end - }else { cout << "cannot resolve path for " << fileName << endl; return fileName; } + }else { mothurOut("cannot resolve path for " + fileName + "\n"); return fileName; } } for (int i = index; i >= 0; i--) { @@ -670,7 +774,7 @@ string MothurOut::getFullPathName(string fileName){ }else if (path[(pos-1)] == '\\') { //you want the current working dir ./ path = path.substr(0, pos); }else if (pos == 1) { break; //you are at the end - }else { cout << "cannot resolve path for " << fileName << endl; return fileName; } + }else { mothurOut("cannot resolve path for " + fileName + "\n"); return fileName; } } for (int i = index; i >= 0; i--) { @@ -694,7 +798,7 @@ int MothurOut::openInputFile(string fileName, ifstream& fileHandle, string m){ try { //get full path name string completeFileName = getFullPathName(fileName); -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) #ifdef USE_COMPRESSION // check for gzipped or bzipped file if (endsWith(completeFileName, ".gz") || endsWith(completeFileName, ".bz2")) { @@ -709,7 +813,7 @@ int MothurOut::openInputFile(string fileName, ifstream& fileHandle, string m){ cerr << "Decompressing " << completeFileName << " via temporary named pipe " << tempName << "\n"; system(command.c_str()); cerr << "Done decompressing " << completeFileName << "\n"; - remove(tempName.c_str()); + mothurRemove(tempName); exit(EXIT_SUCCESS); } else { cerr << "waiting on child process " << fork_result << "\n"; @@ -740,7 +844,7 @@ int MothurOut::openInputFile(string fileName, ifstream& fileHandle){ //get full path name string completeFileName = getFullPathName(fileName); -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) #ifdef USE_COMPRESSION // check for gzipped or bzipped file if (endsWith(completeFileName, ".gz") || endsWith(completeFileName, ".bz2")) { @@ -755,7 +859,7 @@ int MothurOut::openInputFile(string fileName, ifstream& fileHandle){ cerr << "Decompressing " << completeFileName << " via temporary named pipe " << tempName << "\n"; system(command.c_str()); cerr << "Done decompressing " << completeFileName << "\n"; - remove(tempName.c_str()); + mothurRemove(tempName); exit(EXIT_SUCCESS); } else { cerr << "waiting on child process " << fork_result << "\n"; @@ -789,10 +893,10 @@ int MothurOut::renameFile(string oldName, string newName){ try { ifstream inTest; int exist = openInputFile(newName, inTest, ""); + inTest.close(); - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) if (exist == 0) { //you could open it so you want to delete it - inTest.close(); string command = "rm " + newName; system(command.c_str()); } @@ -800,7 +904,7 @@ int MothurOut::renameFile(string oldName, string newName){ string command = "mv " + oldName + " " + newName; system(command.c_str()); #else - remove(newName.c_str()); + mothurRemove(newName); int renameOk = rename(oldName.c_str(), newName.c_str()); #endif return 0; @@ -818,7 +922,7 @@ int MothurOut::openOutputFile(string fileName, ofstream& fileHandle){ try { string completeFileName = getFullPathName(fileName); -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) #ifdef USE_COMPRESSION // check for gzipped file if (endsWith(completeFileName, ".gz") || endsWith(completeFileName, ".bz2")) { @@ -856,7 +960,7 @@ int MothurOut::openOutputFile(string fileName, ofstream& fileHandle){ } /**************************************************************************************************/ -void MothurOut::appendFiles(string temp, string filename) { +int MothurOut::appendFiles(string temp, string filename) { try{ ofstream output; ifstream input; @@ -866,15 +970,18 @@ void MothurOut::appendFiles(string temp, string filename) { int ableToOpen = openInputFile(temp, input, "no error"); //int ableToOpen = openInputFile(temp, input); + int numLines = 0; if (ableToOpen == 0) { //you opened it while(char c = input.get()){ if(input.eof()) { break; } - else { output << c; } + else { output << c; if (c == '\n') {numLines++;} } } input.close(); } output.close(); + + return numLines; } catch(exception& e) { errorOut(e, "MothurOut", "appendFiles"); @@ -891,7 +998,7 @@ string MothurOut::sortFile(string distFile, string outputDir){ //if you can, use the unix sort since its been optimized for years - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) string command = "sort -n -k +3 " + distFile + " -o " + outfile; system(command.c_str()); #else //you are stuck with my best attempt... @@ -936,8 +1043,8 @@ string MothurOut::sortFile(string distFile, string outputDir){ output.close(); //remove temp files - remove(tempDistFile.c_str()); - remove(tempOutfile.c_str()); + mothurRemove(tempDistFile); + mothurRemove(tempOutfile); #endif return outfile; @@ -948,19 +1055,27 @@ string MothurOut::sortFile(string distFile, string outputDir){ } } /**************************************************************************************************/ -vector MothurOut::setFilePosFasta(string filename, int& num) { +vector MothurOut::setFilePosFasta(string filename, int& num) { try { - vector positions; + vector positions; ifstream inFASTA; - openInputFile(filename, inFASTA); + //openInputFile(filename, inFASTA); + inFASTA.open(filename.c_str(), ios::binary); string input; + unsigned long long count = 0; while(!inFASTA.eof()){ - input = getline(inFASTA); - if (input.length() != 0) { - if(input[0] == '>'){ unsigned long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); } + //input = getline(inFASTA); + //cout << input << '\t' << inFASTA.tellg() << endl; + //if (input.length() != 0) { + // if(input[0] == '>'){ unsigned long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); cout << (pos - input.length() - 1) << endl; } + //} + //gobble(inFASTA); //has to be here since windows line endings are 2 characters and mess up the positions + char c = inFASTA.get(); count++; + if (c == '>') { + positions.push_back(count-1); + //cout << count << endl; } - gobble(inFASTA); //has to be here since windows line endings are 2 characters and mess up the positions } inFASTA.close(); @@ -978,7 +1093,7 @@ vector MothurOut::setFilePosFasta(string filename, int& num) fclose (pFile); }*/ - unsigned long int size = positions[(positions.size()-1)]; + unsigned long long size = positions[(positions.size()-1)]; ifstream in; openInputFile(filename, in); @@ -991,6 +1106,7 @@ vector MothurOut::setFilePosFasta(string filename, int& num) in.close(); positions.push_back(size); + positions[0] = 0; return positions; } @@ -1000,31 +1116,51 @@ vector MothurOut::setFilePosFasta(string filename, int& num) } } /**************************************************************************************************/ -vector MothurOut::setFilePosEachLine(string filename, int& num) { +vector MothurOut::setFilePosEachLine(string filename, int& num) { try { filename = getFullPathName(filename); - vector positions; + vector positions; ifstream in; - openInputFile(filename, in); - + //openInputFile(filename, in); + in.open(filename.c_str(), ios::binary); + string input; + unsigned long long count = 0; + positions.push_back(0); + while(!in.eof()){ - unsigned long int lastpos = in.tellg(); - input = getline(in); - if (input.length() != 0) { - unsigned long int pos = in.tellg(); - if (pos != -1) { positions.push_back(pos - input.length() - 1); } - else { positions.push_back(lastpos); } + //unsigned long long lastpos = in.tellg(); + //input = getline(in); + //if (input.length() != 0) { + //unsigned long long pos = in.tellg(); + //if (pos != -1) { positions.push_back(pos - input.length() - 1); } + //else { positions.push_back(lastpos); } + //} + //gobble(in); //has to be here since windows line endings are 2 characters and mess up the positions + + + //getline counting reads + char d = in.get(); count++; + while ((d != '\n') && (d != '\r') && (d != '\f') && (d != in.eof())) { + //get next character + d = in.get(); + count++; + } + + if (!in.eof()) { + d=in.get(); count++; + while(isspace(d) && (d != in.eof())) { d=in.get(); count++;} } - gobble(in); //has to be here since windows line endings are 2 characters and mess up the positions + positions.push_back(count-1); + //cout << count-1 << endl; } in.close(); - num = positions.size(); + num = positions.size()-1; FILE * pFile; - unsigned long int size; + unsigned long long size; //get num bytes in file pFile = fopen (filename.c_str(),"rb"); @@ -1035,7 +1171,7 @@ vector MothurOut::setFilePosEachLine(string filename, int& nu fclose (pFile); } - positions.push_back(size); + positions[(positions.size()-1)] = size; return positions; } @@ -1046,17 +1182,16 @@ vector MothurOut::setFilePosEachLine(string filename, int& nu } /**************************************************************************************************/ -vector MothurOut::divideFile(string filename, int& proc) { +vector MothurOut::divideFile(string filename, int& proc) { try{ - - vector filePos; + vector filePos; filePos.push_back(0); FILE * pFile; - unsigned long int size; + unsigned long long size; filename = getFullPathName(filename); - + //get num bytes in file pFile = fopen (filename.c_str(),"rb"); if (pFile==NULL) perror ("Error opening file"); @@ -1065,9 +1200,11 @@ vector MothurOut::divideFile(string filename, int& proc) { size=ftell (pFile); fclose (pFile); } - + + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) + //estimate file breaks - unsigned long int chunkSize = 0; + unsigned long long chunkSize = 0; chunkSize = size / proc; //file to small to divide by processors @@ -1075,21 +1212,24 @@ vector MothurOut::divideFile(string filename, int& proc) { //for each process seekg to closest file break and search for next '>' char. make that the filebreak for (int i = 0; i < proc; i++) { - unsigned long int spot = (i+1) * chunkSize; + unsigned long long spot = (i+1) * chunkSize; ifstream in; openInputFile(filename, in); in.seekg(spot); //look for next '>' - unsigned long int newSpot = spot; + unsigned long long newSpot = spot; while (!in.eof()) { char c = in.get(); + if (c == '>') { in.putback(c); newSpot = in.tellg(); break; } + else if (int(c) == -1) { break; } + } //there was not another sequence before the end of the file - unsigned long int sanityPos = in.tellg(); + unsigned long long sanityPos = in.tellg(); if (sanityPos == -1) { break; } else { filePos.push_back(newSpot); } @@ -1099,14 +1239,18 @@ vector MothurOut::divideFile(string filename, int& proc) { //save end pos filePos.push_back(size); - + //sanity check filePos for (int i = 0; i < (filePos.size()-1); i++) { if (filePos[(i+1)] <= filePos[i]) { filePos.erase(filePos.begin()+(i+1)); i--; } } proc = (filePos.size() - 1); - +#else + mothurOut("[ERROR]: Windows version should not be calling the divideFile function."); mothurOutEndLine(); + proc=1; + filePos.push_back(size); +#endif return filePos; } catch(exception& e) { @@ -1114,7 +1258,43 @@ vector MothurOut::divideFile(string filename, int& proc) { exit(1); } } - +/**************************************************************************************************/ +int MothurOut::divideFile(string filename, int& proc, vector& files) { + try{ + + vector filePos = divideFile(filename, proc); + + for (int i = 0; i < (filePos.size()-1); i++) { + + //read file chunk + ifstream in; + openInputFile(filename, in); + in.seekg(filePos[i]); + unsigned long long size = filePos[(i+1)] - filePos[i]; + char* chunk = new char[size]; + in.read(chunk, size); + in.close(); + + //open new file + string fileChunkName = filename + "." + toString(i) + ".tmp"; + ofstream out; + openOutputFile(fileChunkName, out); + + out << chunk << endl; + out.close(); + delete[] chunk; + + //save name + files.push_back(fileChunkName); + } + + return 0; + } + catch(exception& e) { + errorOut(e, "MothurOut", "divideFile"); + exit(1); + } +} /***********************************************************************/ bool MothurOut::isTrue(string f){ @@ -1154,6 +1334,61 @@ float MothurOut::ceilDist(float dist, int precision){ } } /**********************************************************************************************************************/ +int MothurOut::readNames(string namefile, map& nameMap) { + try { + + //open input file + ifstream in; + openInputFile(namefile, in); + + while (!in.eof()) { + if (control_pressed) { break; } + + string firstCol, secondCol; + in >> firstCol >> secondCol; gobble(in); + + nameMap[firstCol] = secondCol; + } + in.close(); + + return nameMap.size(); + + } + catch(exception& e) { + errorOut(e, "MothurOut", "readNames"); + exit(1); + } +} +/**********************************************************************************************************************/ +int MothurOut::readNames(string namefile, map >& nameMap) { + try { + + //open input file + ifstream in; + openInputFile(namefile, in); + + while (!in.eof()) { + if (control_pressed) { break; } + + string firstCol, secondCol; + in >> firstCol >> secondCol; gobble(in); + + vector temp; + splitAtComma(secondCol, temp); + + nameMap[firstCol] = temp; + } + in.close(); + + return nameMap.size(); + + } + catch(exception& e) { + errorOut(e, "MothurOut", "readNames"); + exit(1); + } +} +/**********************************************************************************************************************/ map MothurOut::readNames(string namefile) { try { @@ -1183,6 +1418,42 @@ map MothurOut::readNames(string namefile) { exit(1); } } +/**********************************************************************************************************************/ +int MothurOut::readNames(string namefile, vector& nameVector, map& fastamap) { + try { + int error = 0; + + //open input file + ifstream in; + openInputFile(namefile, in); + + while (!in.eof()) { + if (control_pressed) { break; } + + string firstCol, secondCol; + in >> firstCol >> secondCol; gobble(in); + + int num = getNumNames(secondCol); + + map::iterator it = fastamap.find(firstCol); + if (it == fastamap.end()) { + error = 1; + mothurOut("[ERROR]: " + firstCol + " is not in your fastafile, but is in your namesfile, please correct."); mothurOutEndLine(); + }else { + seqPriorityNode temp(num, it->second, firstCol); + nameVector.push_back(temp); + } + } + in.close(); + + return error; + + } + catch(exception& e) { + errorOut(e, "MothurOut", "readNames"); + exit(1); + } +} /***********************************************************************/ @@ -1206,7 +1477,123 @@ int MothurOut::getNumNames(string names){ exit(1); } } +/***********************************************************************/ +int MothurOut::getNumChar(string line, char c){ + try { + int count = 0; + + if(line != ""){ + for(int i=0;i > MothurOut::binomial(int maxOrder){ @@ -1748,6 +2135,46 @@ bool MothurOut::isContainingOnlyDigits(string input) { } } /**************************************************************************************************/ +int MothurOut::removeConfidences(string& tax) { + try { + + string taxon; + string newTax = ""; + + while (tax.find_first_of(';') != -1) { + + if (control_pressed) { return 0; } + + //get taxon + taxon = tax.substr(0,tax.find_first_of(';')); + + int pos = taxon.find_last_of('('); + if (pos != -1) { + //is it a number? + int pos2 = taxon.find_last_of(')'); + if (pos2 != -1) { + string confidenceScore = taxon.substr(pos+1, (pos2-(pos+1))); + if (isNumeric1(confidenceScore)) { + taxon = taxon.substr(0, pos); //rip off confidence + } + } + } + taxon += ";"; + + tax = tax.substr(tax.find_first_of(';')+1, tax.length()); + newTax += taxon; + } + + tax = newTax; + + return 0; + } + catch(exception& e) { + errorOut(e, "MothurOut", "removeConfidences"); + exit(1); + } +} +/**************************************************************************************************/