X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=mothurout.cpp;h=eacad376e1e91a519feff3c8e57727662042599b;hb=49d2b7459c5027557564b21e9487dadafbbbdc96;hp=0431d36796dd32cf4ac404dee2843150a4b5b110;hpb=dec4333b64891e0b923c862446cf2e3befa7e3d3;p=mothur.git diff --git a/mothurout.cpp b/mothurout.cpp index 0431d36..eacad37 100644 --- a/mothurout.cpp +++ b/mothurout.cpp @@ -18,8 +18,45 @@ MothurOut* MothurOut::getInstance() { return _uniqueInstance; } /*********************************************************************************************/ +set MothurOut::getCurrentTypes() { + try { + + set types; + types.insert("fasta"); + types.insert("accnos"); + types.insert("column"); + types.insert("design"); + types.insert("group"); + types.insert("list"); + types.insert("name"); + types.insert("oligos"); + types.insert("order"); + types.insert("ordergroup"); + types.insert("phylip"); + types.insert("qfile"); + types.insert("relabund"); + types.insert("sabund"); + types.insert("rabund"); + types.insert("sff"); + types.insert("shared"); + types.insert("taxonomy"); + types.insert("tree"); + types.insert("flow"); + types.insert("biom"); + types.insert("processors"); + + return types; + } + catch(exception& e) { + errorOut(e, "MothurOut", "getCurrentTypes"); + exit(1); + } +} +/*********************************************************************************************/ void MothurOut::printCurrentFiles() { try { + + if (accnosfile != "") { mothurOut("accnos=" + accnosfile); mothurOutEndLine(); } if (columnfile != "") { mothurOut("column=" + columnfile); mothurOutEndLine(); } if (designfile != "") { mothurOut("design=" + designfile); mothurOutEndLine(); } @@ -117,6 +154,81 @@ void MothurOut::clearCurrentFiles() { exit(1); } } +/***********************************************************************/ +string MothurOut::findProgramPath(string programName){ + try { + + string envPath = getenv("PATH"); + string pPath = ""; + + //delimiting path char + char delim; +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) + delim = ':'; +#else + delim = ';'; +#endif + + //break apart path variable by ':' + vector dirs; + splitAtChar(envPath, dirs, delim); + + if (debug) { mothurOut("[DEBUG]: dir's in path: \n"); } + + //get path related to mothur + for (int i = 0; i < dirs.size(); i++) { + + if (debug) { mothurOut("[DEBUG]: " + dirs[i] + "\n"); } + + //to lower so we can find it + string tempLower = ""; + for (int j = 0; j < dirs[i].length(); j++) { tempLower += tolower(dirs[i][j]); } + + //is this mothurs path? + if (tempLower.find(programName) != -1) { pPath = dirs[i]; break; } + } + + if (debug) { mothurOut("[DEBUG]: programPath = " + pPath + "\n"); } + + if (pPath != "") { + //add programName so it looks like what argv would look like +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) + pPath += "/" + programName; +#else + pPath += "\\" + programName; +#endif + }else { + //okay programName is not in the path, so the folder programName is in must be in the path + //lets find out which one + + //get path related to the program + for (int i = 0; i < dirs.size(); i++) { + + if (debug) { mothurOut("[DEBUG]: looking in " + dirs[i] + " for " + programName + " \n"); } + + //is this the programs path? + ifstream in; + string tempIn = dirs[i]; +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) + tempIn += "/" + programName; +#else + tempIn += "\\" + programName; +#endif + openInputFile(tempIn, in, ""); + + //if this file exists + if (in) { in.close(); pPath = tempIn; if (debug) { mothurOut("[DEBUG]: found it, programPath = " + pPath + "\n"); } break; } + } + } + + return pPath; + + } + catch(exception& e) { + errorOut(e, "MothurOut", "findProgramPath"); + exit(1); + } +} /*********************************************************************************************/ void MothurOut::setFileName(string filename) { try { @@ -1017,11 +1129,14 @@ int MothurOut::appendFiles(string temp, string filename) { int numLines = 0; if (ableToOpen == 0) { //you opened it - while(!input.eof()){ - char c = input.get(); - if(input.eof()) { break; } - else { output << c; if (c == '\n') {numLines++;} } - } + + char buffer[4096]; + while (!input.eof()) { + input.read(buffer, 4096); + output.write(buffer, input.gcount()); + //count number of lines + for (int i = 0; i < input.gcount(); i++) { if (buffer[i] == '\n') {numLines++;} } + } input.close(); } @@ -1379,21 +1494,212 @@ float MothurOut::ceilDist(float dist, int precision){ exit(1); } } +/***********************************************************************/ + +vector MothurOut::splitWhiteSpace(string& rest, char buffer[], int size){ + try { + vector pieces; + + for (int i = 0; i < size; i++) { + if (!isspace(buffer[i])) { rest += buffer[i]; } + else { + pieces.push_back(rest); rest = ""; + while (i < size) { //gobble white space + if (isspace(buffer[i])) { i++; } + else { rest = buffer[i]; break; } //cout << "next piece buffer = " << nextPiece << endl; + } + } + } + + return pieces; + } + catch(exception& e) { + errorOut(e, "MothurOut", "splitWhiteSpace"); + exit(1); + } +} +/***********************************************************************/ +vector MothurOut::splitWhiteSpace(string input){ + try { + vector pieces; + string rest = ""; + + for (int i = 0; i < input.length(); i++) { + if (!isspace(input[i])) { rest += input[i]; } + else { + pieces.push_back(rest); rest = ""; + while (i < input.length()) { //gobble white space + if (isspace(input[i])) { i++; } + else { rest = input[i]; break; } //cout << "next piece buffer = " << nextPiece << endl; + } + } + } + + if (rest != "") { pieces.push_back(rest); } + + return pieces; + } + catch(exception& e) { + errorOut(e, "MothurOut", "splitWhiteSpace"); + exit(1); + } +} +//********************************************************************************************************************** +int MothurOut::readTax(string namefile, map& taxMap) { + try { + + //open input file + ifstream in; + openInputFile(namefile, in); + + string rest = ""; + char buffer[4096]; + bool pairDone = false; + bool columnOne = true; + string firstCol, secondCol; + + while (!in.eof()) { + if (control_pressed) { break; } + + in.read(buffer, 4096); + vector pieces = splitWhiteSpace(rest, buffer, in.gcount()); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { firstCol = pieces[i]; columnOne=false; } + else { secondCol = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + //are there confidence scores, if so remove them + if (secondCol.find_first_of('(') != -1) { removeConfidences(secondCol); } + taxMap[firstCol] = secondCol; + if (debug) { mothurOut("[DEBUG]: name = '" + firstCol + "' tax = '" + secondCol + "'\n"); } + pairDone = false; + } + } + } + in.close(); + + return taxMap.size(); + + } + catch(exception& e) { + errorOut(e, "MothurOut", "readTax"); + exit(1); + } +} /**********************************************************************************************************************/ -int MothurOut::readNames(string namefile, map& nameMap) { +int MothurOut::readNames(string namefile, map& nameMap, bool redund) { try { //open input file ifstream in; openInputFile(namefile, in); + + string rest = ""; + char buffer[4096]; + bool pairDone = false; + bool columnOne = true; + string firstCol, secondCol; + + while (!in.eof()) { + if (control_pressed) { break; } + + in.read(buffer, 4096); + vector pieces = splitWhiteSpace(rest, buffer, in.gcount()); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { firstCol = pieces[i]; columnOne=false; } + else { secondCol = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + //parse names into vector + vector theseNames; + splitAtComma(secondCol, theseNames); + for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = firstCol; } + pairDone = false; + } + } + } + in.close(); + return nameMap.size(); + + } + catch(exception& e) { + errorOut(e, "MothurOut", "readNames"); + exit(1); + } +} +/**********************************************************************************************************************/ +int MothurOut::readNames(string namefile, map& nameMap, map& nameCount) { + try { + nameMap.clear(); nameCount.clear(); + //open input file + ifstream in; + openInputFile(namefile, in); + + string rest = ""; + char buffer[4096]; + bool pairDone = false; + bool columnOne = true; + string firstCol, secondCol; + while (!in.eof()) { if (control_pressed) { break; } - string firstCol, secondCol; - in >> firstCol >> secondCol; gobble(in); + in.read(buffer, 4096); + vector pieces = splitWhiteSpace(rest, buffer, in.gcount()); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { firstCol = pieces[i]; columnOne=false; } + else { secondCol = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + //parse names into vector + vector theseNames; + splitAtComma(secondCol, theseNames); + for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = firstCol; } + nameCount[firstCol] = theseNames.size(); + pairDone = false; + } + } + } + in.close(); + + return nameMap.size(); + + } + catch(exception& e) { + errorOut(e, "MothurOut", "readNames"); + exit(1); + } +} +/**********************************************************************************************************************/ +int MothurOut::readNames(string namefile, map& nameMap) { + try { + + //open input file + ifstream in; + openInputFile(namefile, in); + + string rest = ""; + char buffer[4096]; + bool pairDone = false; + bool columnOne = true; + string firstCol, secondCol; + + while (!in.eof()) { + if (control_pressed) { break; } - nameMap[firstCol] = secondCol; + in.read(buffer, 4096); + vector pieces = splitWhiteSpace(rest, buffer, in.gcount()); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { firstCol = pieces[i]; columnOne=false; } + else { secondCol = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { nameMap[firstCol] = secondCol; pairDone = false; } + } } in.close(); @@ -1413,21 +1719,33 @@ int MothurOut::readNames(string namefile, map >& nameMap) ifstream in; openInputFile(namefile, in); + string rest = ""; + char buffer[4096]; + bool pairDone = false; + bool columnOne = true; + string firstCol, secondCol; + while (!in.eof()) { if (control_pressed) { break; } - string firstCol, secondCol; - in >> firstCol >> secondCol; gobble(in); - - vector temp; - splitAtComma(secondCol, temp); - - nameMap[firstCol] = temp; + in.read(buffer, 4096); + vector pieces = splitWhiteSpace(rest, buffer, in.gcount()); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { firstCol = pieces[i]; columnOne=false; } + else { secondCol = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + vector temp; + splitAtComma(secondCol, temp); + nameMap[firstCol] = temp; + pairDone = false; + } + } } in.close(); - + return nameMap.size(); - } catch(exception& e) { errorOut(e, "MothurOut", "readNames"); @@ -1444,18 +1762,30 @@ map MothurOut::readNames(string namefile) { ifstream in; openInputFile(namefile, in); + string rest = ""; + char buffer[4096]; + bool pairDone = false; + bool columnOne = true; + string firstCol, secondCol; + while (!in.eof()) { if (control_pressed) { break; } - string firstCol, secondCol; - in >> firstCol; gobble(in); - in >> secondCol; gobble(in); - - int num = getNumNames(secondCol); - - nameMap[firstCol] = num; + in.read(buffer, 4096); + vector pieces = splitWhiteSpace(rest, buffer, in.gcount()); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { firstCol = pieces[i]; columnOne=false; } + else { secondCol = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + int num = getNumNames(secondCol); + nameMap[firstCol] = num; + pairDone = false; + } + } } - in.close(); + in.close(); return nameMap; @@ -1474,34 +1804,103 @@ int MothurOut::readNames(string namefile, vector& nameVector, m ifstream in; openInputFile(namefile, in); + string rest = ""; + char buffer[4096]; + bool pairDone = false; + bool columnOne = true; + string firstCol, secondCol; + while (!in.eof()) { if (control_pressed) { break; } - string firstCol, secondCol; - in >> firstCol >> secondCol; gobble(in); - - int num = getNumNames(secondCol); - - map::iterator it = fastamap.find(firstCol); - if (it == fastamap.end()) { - error = 1; - mothurOut("[ERROR]: " + firstCol + " is not in your fastafile, but is in your namesfile, please correct."); mothurOutEndLine(); - }else { - seqPriorityNode temp(num, it->second, firstCol); - nameVector.push_back(temp); - } + in.read(buffer, 4096); + vector pieces = splitWhiteSpace(rest, buffer, in.gcount()); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { firstCol = pieces[i]; columnOne=false; } + else { secondCol = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + int num = getNumNames(secondCol); + + map::iterator it = fastamap.find(firstCol); + if (it == fastamap.end()) { + error = 1; + mothurOut("[ERROR]: " + firstCol + " is not in your fastafile, but is in your namesfile, please correct."); mothurOutEndLine(); + }else { + seqPriorityNode temp(num, it->second, firstCol); + nameVector.push_back(temp); + } + + pairDone = false; + } + } } - in.close(); - + in.close(); + return error; - } catch(exception& e) { errorOut(e, "MothurOut", "readNames"); exit(1); } } - +//********************************************************************************************************************** +set MothurOut::readAccnos(string accnosfile){ + try { + set names; + ifstream in; + openInputFile(accnosfile, in); + string name; + + string rest = ""; + char buffer[4096]; + + while (!in.eof()) { + if (control_pressed) { break; } + + in.read(buffer, 4096); + vector pieces = splitWhiteSpace(rest, buffer, in.gcount()); + + for (int i = 0; i < pieces.size(); i++) { names.insert(pieces[i]); } + } + in.close(); + + return names; + } + catch(exception& e) { + errorOut(e, "MothurOut", "readAccnos"); + exit(1); + } +} +//********************************************************************************************************************** +int MothurOut::readAccnos(string accnosfile, vector& names){ + try { + names.clear(); + ifstream in; + openInputFile(accnosfile, in); + string name; + + string rest = ""; + char buffer[4096]; + + while (!in.eof()) { + if (control_pressed) { break; } + + in.read(buffer, 4096); + vector pieces = splitWhiteSpace(rest, buffer, in.gcount()); + + for (int i = 0; i < pieces.size(); i++) { names.push_back(pieces[i]); } + } + in.close(); + + return 0; + } + catch(exception& e) { + errorOut(e, "MothurOut", "readAccnos"); + exit(1); + } +} /***********************************************************************/ int MothurOut::getNumNames(string names){ @@ -1929,6 +2328,26 @@ void MothurOut::splitAtDash(string& estim, set& container) { exit(1); } } +/***********************************************************************/ +string MothurOut::makeList(vector& names) { + try { + string list = ""; + + if (names.size() == 0) { return list; } + + for (int i = 0; i < names.size()-1; i++) { list += names[i] + ","; } + + //get last name + list += names[names.size()-1]; + + return list; + } + catch(exception& e) { + errorOut(e, "MothurOut", "makeList"); + exit(1); + } +} + /***********************************************************************/ //This function parses the a string and puts peices in a vector void MothurOut::splitAtComma(string& estim, vector& container) {