X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=mothurout.cpp;h=c80bff27e4ab69bf26d0fddaae534fe625f6df0b;hb=ae57e166b2ed7b475ec3f466106bd76fabadd063;hp=b30ee3ca630c91fea01343c9d873e709e027a4fb;hpb=8173238f9f94af9baab8471de58bed7c8830948d;p=mothur.git diff --git a/mothurout.cpp b/mothurout.cpp index b30ee3c..c80bff2 100644 --- a/mothurout.cpp +++ b/mothurout.cpp @@ -18,6 +18,103 @@ MothurOut* MothurOut::getInstance() { return _uniqueInstance; } /*********************************************************************************************/ +void MothurOut::printCurrentFiles() { + try { + if (accnosfile != "") { mothurOut("accnos=" + accnosfile); mothurOutEndLine(); } + if (columnfile != "") { mothurOut("column=" + columnfile); mothurOutEndLine(); } + if (designfile != "") { mothurOut("design=" + designfile); mothurOutEndLine(); } + if (fastafile != "") { mothurOut("fasta=" + fastafile); mothurOutEndLine(); } + if (groupfile != "") { mothurOut("group=" + groupfile); mothurOutEndLine(); } + if (listfile != "") { mothurOut("list=" + listfile); mothurOutEndLine(); } + if (namefile != "") { mothurOut("name=" + namefile); mothurOutEndLine(); } + if (oligosfile != "") { mothurOut("oligos=" + oligosfile); mothurOutEndLine(); } + if (orderfile != "") { mothurOut("order=" + orderfile); mothurOutEndLine(); } + if (ordergroupfile != "") { mothurOut("ordergroup=" + ordergroupfile); mothurOutEndLine(); } + if (phylipfile != "") { mothurOut("phylip=" + phylipfile); mothurOutEndLine(); } + if (qualfile != "") { mothurOut("qfile=" + qualfile); mothurOutEndLine(); } + if (rabundfile != "") { mothurOut("rabund=" + rabundfile); mothurOutEndLine(); } + if (relabundfile != "") { mothurOut("relabund=" + relabundfile); mothurOutEndLine(); } + if (sabundfile != "") { mothurOut("sabund=" + sabundfile); mothurOutEndLine(); } + if (sfffile != "") { mothurOut("sff=" + sfffile); mothurOutEndLine(); } + if (sharedfile != "") { mothurOut("shared=" + sharedfile); mothurOutEndLine(); } + if (taxonomyfile != "") { mothurOut("taxonomy=" + taxonomyfile); mothurOutEndLine(); } + if (treefile != "") { mothurOut("tree=" + treefile); mothurOutEndLine(); } + if (flowfile != "") { mothurOut("flow=" + flowfile); mothurOutEndLine(); } + if (processors != "1") { mothurOut("processors=" + processors); mothurOutEndLine(); } + + } + catch(exception& e) { + errorOut(e, "MothurOut", "printCurrentFiles"); + exit(1); + } +} +/*********************************************************************************************/ +bool MothurOut::hasCurrentFiles() { + try { + bool hasCurrent = false; + + if (accnosfile != "") { return true; } + if (columnfile != "") { return true; } + if (designfile != "") { return true; } + if (fastafile != "") { return true; } + if (groupfile != "") { return true; } + if (listfile != "") { return true; } + if (namefile != "") { return true; } + if (oligosfile != "") { return true; } + if (orderfile != "") { return true; } + if (ordergroupfile != "") { return true; } + if (phylipfile != "") { return true; } + if (qualfile != "") { return true; } + if (rabundfile != "") { return true; } + if (relabundfile != "") { return true; } + if (sabundfile != "") { return true; } + if (sfffile != "") { return true; } + if (sharedfile != "") { return true; } + if (taxonomyfile != "") { return true; } + if (treefile != "") { return true; } + if (flowfile != "") { return true; } + if (processors != "1") { return true; } + + return hasCurrent; + + } + catch(exception& e) { + errorOut(e, "MothurOut", "hasCurrentFiles"); + exit(1); + } +} + +/*********************************************************************************************/ +void MothurOut::clearCurrentFiles() { + try { + phylipfile = ""; + columnfile = ""; + listfile = ""; + rabundfile = ""; + sabundfile = ""; + namefile = ""; + groupfile = ""; + designfile = ""; + orderfile = ""; + treefile = ""; + sharedfile = ""; + ordergroupfile = ""; + relabundfile = ""; + fastafile = ""; + qualfile = ""; + sfffile = ""; + oligosfile = ""; + accnosfile = ""; + taxonomyfile = ""; + flowfile = ""; + processors = "1"; + } + catch(exception& e) { + errorOut(e, "MothurOut", "clearCurrentFiles"); + exit(1); + } +} +/*********************************************************************************************/ void MothurOut::setFileName(string filename) { try { logFileName = filename; @@ -150,6 +247,53 @@ void MothurOut::mothurOutEndLine() { } } /*********************************************************************************************/ +void MothurOut::mothurOut(string output, ofstream& outputFile) { + try { + +#ifdef USE_MPI + int pid; + MPI_Comm_rank(MPI_COMM_WORLD, &pid); + + if (pid == 0) { //only one process should output to screen +#endif + + cout << output; + out << output; + outputFile << output; + +#ifdef USE_MPI + } +#endif + } + catch(exception& e) { + errorOut(e, "MothurOut", "MothurOut"); + exit(1); + } +} +/*********************************************************************************************/ +void MothurOut::mothurOutEndLine(ofstream& outputFile) { + try { +#ifdef USE_MPI + int pid; + MPI_Comm_rank(MPI_COMM_WORLD, &pid); + + if (pid == 0) { //only one process should output to screen +#endif + + cout << endl; + out << endl; + outputFile << endl; + +#ifdef USE_MPI + } +#endif + } + catch(exception& e) { + errorOut(e, "MothurOut", "MothurOutEndLine"); + exit(1); + } +} +/*********************************************************************************************/ void MothurOut::mothurOutJustToLog(string output) { try { #ifdef USE_MPI @@ -341,12 +485,12 @@ string MothurOut::getline(ifstream& fileHandle) { string line = ""; - while (!fileHandle.eof()) { + while (fileHandle) { //get next character char c = fileHandle.get(); //are you at the end of the line - if ((c == '\n') || (c == '\r') || (c == '\f')){ break; } + if ((c == '\n') || (c == '\r') || (c == '\f') || (c == EOF)){ break; } else { line += c; } } @@ -418,6 +562,21 @@ string MothurOut::getSimpleName(string longName){ /***********************************************************************/ +int MothurOut::getRandomIndex(int highest){ + try { + + int random = (int) ((float)(highest+1) * (float)(rand()) / ((float)RAND_MAX+1.0)); + + return random; + } + catch(exception& e) { + errorOut(e, "MothurOut", "getRandomIndex"); + exit(1); + } + +} +/**********************************************************************/ + string MothurOut::getPathName(string longName){ try { string rootPathName = longName; @@ -489,6 +648,7 @@ bool MothurOut::isBlank(string fileName){ //check for blank file gobble(fileHandle); if (fileHandle.eof()) { fileHandle.close(); return true; } + fileHandle.close(); } return false; } @@ -651,7 +811,7 @@ int MothurOut::openInputFile(string fileName, ifstream& fileHandle, string m){ cerr << "Decompressing " << completeFileName << " via temporary named pipe " << tempName << "\n"; system(command.c_str()); cerr << "Done decompressing " << completeFileName << "\n"; - remove(tempName.c_str()); + mothurRemove(tempName); exit(EXIT_SUCCESS); } else { cerr << "waiting on child process " << fork_result << "\n"; @@ -697,7 +857,7 @@ int MothurOut::openInputFile(string fileName, ifstream& fileHandle){ cerr << "Decompressing " << completeFileName << " via temporary named pipe " << tempName << "\n"; system(command.c_str()); cerr << "Done decompressing " << completeFileName << "\n"; - remove(tempName.c_str()); + mothurRemove(tempName); exit(EXIT_SUCCESS); } else { cerr << "waiting on child process " << fork_result << "\n"; @@ -742,7 +902,7 @@ int MothurOut::renameFile(string oldName, string newName){ string command = "mv " + oldName + " " + newName; system(command.c_str()); #else - remove(newName.c_str()); + mothurRemove(newName); int renameOk = rename(oldName.c_str(), newName.c_str()); #endif return 0; @@ -878,8 +1038,8 @@ string MothurOut::sortFile(string distFile, string outputDir){ output.close(); //remove temp files - remove(tempDistFile.c_str()); - remove(tempOutfile.c_str()); + mothurRemove(tempDistFile); + mothurRemove(tempOutfile); #endif return outfile; @@ -890,19 +1050,27 @@ string MothurOut::sortFile(string distFile, string outputDir){ } } /**************************************************************************************************/ -vector MothurOut::setFilePosFasta(string filename, int& num) { +vector MothurOut::setFilePosFasta(string filename, int& num) { try { - vector positions; + vector positions; ifstream inFASTA; - openInputFile(filename, inFASTA); + //openInputFile(filename, inFASTA); + inFASTA.open(filename.c_str(), ios::binary); string input; + unsigned long long count = 0; while(!inFASTA.eof()){ - input = getline(inFASTA); - if (input.length() != 0) { - if(input[0] == '>'){ unsigned long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); } + //input = getline(inFASTA); + //cout << input << '\t' << inFASTA.tellg() << endl; + //if (input.length() != 0) { + // if(input[0] == '>'){ unsigned long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); cout << (pos - input.length() - 1) << endl; } + //} + //gobble(inFASTA); //has to be here since windows line endings are 2 characters and mess up the positions + char c = inFASTA.get(); count++; + if (c == '>') { + positions.push_back(count-1); + //cout << count << endl; } - gobble(inFASTA); //has to be here since windows line endings are 2 characters and mess up the positions } inFASTA.close(); @@ -920,19 +1088,20 @@ vector MothurOut::setFilePosFasta(string filename, int& num) fclose (pFile); }*/ - unsigned long int size = positions[(positions.size()-1)]; + unsigned long long size = positions[(positions.size()-1)]; ifstream in; openInputFile(filename, in); in.seekg(size); - while(char c = in.get()){ + while(in.get()){ if(in.eof()) { break; } else { size++; } } in.close(); positions.push_back(size); + positions[0] = 0; return positions; } @@ -942,31 +1111,51 @@ vector MothurOut::setFilePosFasta(string filename, int& num) } } /**************************************************************************************************/ -vector MothurOut::setFilePosEachLine(string filename, int& num) { +vector MothurOut::setFilePosEachLine(string filename, int& num) { try { filename = getFullPathName(filename); - vector positions; + vector positions; ifstream in; - openInputFile(filename, in); - + //openInputFile(filename, in); + in.open(filename.c_str(), ios::binary); + string input; + unsigned long long count = 0; + positions.push_back(0); + while(!in.eof()){ - unsigned long int lastpos = in.tellg(); - input = getline(in); - if (input.length() != 0) { - unsigned long int pos = in.tellg(); - if (pos != -1) { positions.push_back(pos - input.length() - 1); } - else { positions.push_back(lastpos); } + //unsigned long long lastpos = in.tellg(); + //input = getline(in); + //if (input.length() != 0) { + //unsigned long long pos = in.tellg(); + //if (pos != -1) { positions.push_back(pos - input.length() - 1); } + //else { positions.push_back(lastpos); } + //} + //gobble(in); //has to be here since windows line endings are 2 characters and mess up the positions + + + //getline counting reads + char d = in.get(); count++; + while ((d != '\n') && (d != '\r') && (d != '\f') && (d != in.eof())) { + //get next character + d = in.get(); + count++; + } + + if (!in.eof()) { + d=in.get(); count++; + while(isspace(d) && (d != in.eof())) { d=in.get(); count++;} } - gobble(in); //has to be here since windows line endings are 2 characters and mess up the positions + positions.push_back(count-1); + cout << count-1 << endl; } in.close(); - num = positions.size(); + num = positions.size()-1; FILE * pFile; - unsigned long int size; + unsigned long long size; //get num bytes in file pFile = fopen (filename.c_str(),"rb"); @@ -977,7 +1166,7 @@ vector MothurOut::setFilePosEachLine(string filename, int& nu fclose (pFile); } - positions.push_back(size); + positions[(positions.size()-1)] = size; return positions; } @@ -988,14 +1177,14 @@ vector MothurOut::setFilePosEachLine(string filename, int& nu } /**************************************************************************************************/ -vector MothurOut::divideFile(string filename, int& proc) { +vector MothurOut::divideFile(string filename, int& proc) { try{ - vector filePos; + vector filePos; filePos.push_back(0); FILE * pFile; - unsigned long int size; + unsigned long long size; filename = getFullPathName(filename); @@ -1009,7 +1198,7 @@ vector MothurOut::divideFile(string filename, int& proc) { } //estimate file breaks - unsigned long int chunkSize = 0; + unsigned long long chunkSize = 0; chunkSize = size / proc; //file to small to divide by processors @@ -1017,21 +1206,21 @@ vector MothurOut::divideFile(string filename, int& proc) { //for each process seekg to closest file break and search for next '>' char. make that the filebreak for (int i = 0; i < proc; i++) { - unsigned long int spot = (i+1) * chunkSize; + unsigned long long spot = (i+1) * chunkSize; ifstream in; openInputFile(filename, in); in.seekg(spot); //look for next '>' - unsigned long int newSpot = spot; + unsigned long long newSpot = spot; while (!in.eof()) { char c = in.get(); if (c == '>') { in.putback(c); newSpot = in.tellg(); break; } } //there was not another sequence before the end of the file - unsigned long int sanityPos = in.tellg(); + unsigned long long sanityPos = in.tellg(); if (sanityPos == -1) { break; } else { filePos.push_back(newSpot); } @@ -1056,7 +1245,43 @@ vector MothurOut::divideFile(string filename, int& proc) { exit(1); } } - +/**************************************************************************************************/ +int MothurOut::divideFile(string filename, int& proc, vector& files) { + try{ + + vector filePos = divideFile(filename, proc); + + for (int i = 0; i < (filePos.size()-1); i++) { + + //read file chunk + ifstream in; + openInputFile(filename, in); + in.seekg(filePos[i]); + unsigned long long size = filePos[(i+1)] - filePos[i]; + char* chunk = new char[size]; + in.read(chunk, size); + in.close(); + + //open new file + string fileChunkName = filename + "." + toString(i) + ".tmp"; + ofstream out; + openOutputFile(fileChunkName, out); + + out << chunk << endl; + out.close(); + delete[] chunk; + + //save name + files.push_back(fileChunkName); + } + + return 0; + } + catch(exception& e) { + errorOut(e, "MothurOut", "divideFile"); + exit(1); + } +} /***********************************************************************/ bool MothurOut::isTrue(string f){ @@ -1095,6 +1320,99 @@ float MothurOut::ceilDist(float dist, int precision){ exit(1); } } +/**********************************************************************************************************************/ +int MothurOut::readNames(string namefile, map& nameMap) { + try { + + //open input file + ifstream in; + openInputFile(namefile, in); + + while (!in.eof()) { + if (control_pressed) { break; } + + string firstCol, secondCol; + in >> firstCol >> secondCol; gobble(in); + + nameMap[firstCol] = secondCol; + } + in.close(); + + return 0; + + } + catch(exception& e) { + errorOut(e, "MothurOut", "readNames"); + exit(1); + } +} + +/**********************************************************************************************************************/ +map MothurOut::readNames(string namefile) { + try { + + map nameMap; + + //open input file + ifstream in; + openInputFile(namefile, in); + + while (!in.eof()) { + if (control_pressed) { break; } + + string firstCol, secondCol; + in >> firstCol >> secondCol; gobble(in); + + int num = getNumNames(secondCol); + + nameMap[firstCol] = num; + } + in.close(); + + return nameMap; + + } + catch(exception& e) { + errorOut(e, "MothurOut", "readNames"); + exit(1); + } +} +/**********************************************************************************************************************/ +int MothurOut::readNames(string namefile, vector& nameVector, map& fastamap) { + try { + int error = 0; + + //open input file + ifstream in; + openInputFile(namefile, in); + + while (!in.eof()) { + if (control_pressed) { break; } + + string firstCol, secondCol; + in >> firstCol >> secondCol; gobble(in); + + int num = getNumNames(secondCol); + + map::iterator it = fastamap.find(firstCol); + if (it == fastamap.end()) { + error = 1; + mothurOut("[ERROR]: " + firstCol + " is not in your fastafile, but is in your namesfile, please correct."); mothurOutEndLine(); + }else { + seqPriorityNode temp(num, it->second, firstCol); + nameVector.push_back(temp); + } + } + in.close(); + + return error; + + } + catch(exception& e) { + errorOut(e, "MothurOut", "readNames"); + exit(1); + } +} /***********************************************************************/ @@ -1118,7 +1436,18 @@ int MothurOut::getNumNames(string names){ exit(1); } } +/***********************************************************************/ +void MothurOut::mothurRemove(string filename){ + try { + filename = getFullPathName(filename); + remove(filename.c_str()); + } + catch(exception& e) { + errorOut(e, "MothurOut", "mothurRemove"); + exit(1); + } +} /**************************************************************************************************/ vector > MothurOut::binomial(int maxOrder){ @@ -1155,7 +1484,90 @@ vector > MothurOut::binomial(int maxOrder){ exit(1); } } - +/**************************************************************************************************/ +unsigned int MothurOut::fromBase36(string base36){ + try { + unsigned int num = 0; + + map converts; + converts['A'] = 0; + converts['a'] = 0; + converts['B'] = 1; + converts['b'] = 1; + converts['C'] = 2; + converts['c'] = 2; + converts['D'] = 3; + converts['d'] = 3; + converts['E'] = 4; + converts['e'] = 4; + converts['F'] = 5; + converts['f'] = 5; + converts['G'] = 6; + converts['g'] = 6; + converts['H'] = 7; + converts['h'] = 7; + converts['I'] = 8; + converts['i'] = 8; + converts['J'] = 9; + converts['j'] = 9; + converts['K'] = 10; + converts['k'] = 10; + converts['L'] = 11; + converts['l'] = 11; + converts['M'] = 12; + converts['m'] = 12; + converts['N'] = 13; + converts['n'] = 13; + converts['O'] = 14; + converts['o'] = 14; + converts['P'] = 15; + converts['p'] = 15; + converts['Q'] = 16; + converts['q'] = 16; + converts['R'] = 17; + converts['r'] = 17; + converts['S'] = 18; + converts['s'] = 18; + converts['T'] = 19; + converts['t'] = 19; + converts['U'] = 20; + converts['u'] = 20; + converts['V'] = 21; + converts['v'] = 21; + converts['W'] = 22; + converts['w'] = 22; + converts['X'] = 23; + converts['x'] = 23; + converts['Y'] = 24; + converts['y'] = 24; + converts['Z'] = 25; + converts['z'] = 25; + converts['0'] = 26; + converts['1'] = 27; + converts['2'] = 28; + converts['3'] = 29; + converts['4'] = 30; + converts['5'] = 31; + converts['6'] = 32; + converts['7'] = 33; + converts['8'] = 34; + converts['9'] = 35; + + int i = 0; + while (i < base36.length()) { + char c = base36[i]; + num = 36 * num + converts[c]; + i++; + } + + return num; + + } + catch(exception& e) { + errorOut(e, "MothurOut", "fromBase36"); + exit(1); + } +} /***********************************************************************/ int MothurOut::factorial(int num){ @@ -1221,17 +1633,6 @@ void MothurOut::splitAtChar(string& estim, vector& container, char symbo } container.push_back(individual); - /* - - while (estim.find_first_of(symbol) != -1) { - individual = estim.substr(0,estim.find_first_of(symbol)); - if ((estim.find_first_of(symbol)+1) <= estim.length()) { //checks to make sure you don't have dash at end of string - estim = estim.substr(estim.find_first_of(symbol)+1, estim.length()); - container.push_back(individual); - } - } - //get last one - container.push_back(estim); */ } catch(exception& e) { errorOut(e, "MothurOut", "splitAtChar"); @@ -1570,7 +1971,23 @@ bool MothurOut::checkReleaseVersion(ifstream& file, string version) { exit(1); } } - +/**************************************************************************************************/ +bool MothurOut::isContainingOnlyDigits(string input) { + try{ + + //are you a digit in ascii code + for (int i = 0;i < input.length(); i++){ + if( input[i]>47 && input[i]<58){} + else { return false; } + } + + return true; + } + catch(exception& e) { + errorOut(e, "MothurOut", "isContainingOnlyDigits"); + exit(1); + } +} /**************************************************************************************************/