+ vector<string> pieces;
+
+ for (int i = 0; i < size; i++) {
+ if (!isspace(buffer[i])) { rest += buffer[i]; }
+ else {
+ if (rest != "") { pieces.push_back(rest); rest = ""; }
+ while (i < size) { //gobble white space
+ if (isspace(buffer[i])) { i++; }
+ else { rest = buffer[i]; break; } //cout << "next piece buffer = " << nextPiece << endl;
+ }
+ }
+ }
+
+ return pieces;
+ }
+ catch(exception& e) {
+ errorOut(e, "MothurOut", "splitWhiteSpace");
+ exit(1);
+ }
+}
+/***********************************************************************/
+vector<string> MothurOut::splitWhiteSpace(string input){
+ try {
+ vector<string> pieces;
+ string rest = "";
+
+ for (int i = 0; i < input.length(); i++) {
+ if (!isspace(input[i])) { rest += input[i]; }
+ else {
+ if (rest != "") { pieces.push_back(rest); rest = ""; }
+ while (i < input.length()) { //gobble white space
+ if (isspace(input[i])) { i++; }
+ else { rest = input[i]; break; } //cout << "next piece buffer = " << nextPiece << endl;
+ }
+ }
+ }
+
+ if (rest != "") { pieces.push_back(rest); }
+
+ return pieces;
+ }
+ catch(exception& e) {
+ errorOut(e, "MothurOut", "splitWhiteSpace");
+ exit(1);
+ }
+}
+/***********************************************************************/
+vector<string> MothurOut::splitWhiteSpaceWithQuotes(string input){
+ try {
+ vector<string> pieces;
+ string rest = "";
+
+ int pos = input.find('\'');
+ int pos2 = input.find('\"');
+
+ if ((pos == string::npos) && (pos2 == string::npos)) { return splitWhiteSpace(input); } //no quotes to worry about
+ else {
+ for (int i = 0; i < input.length(); i++) {
+ if ((input[i] == '\'') || (input[i] == '\"') || (rest == "\'") || (rest == "\"")) { //grab everything til end or next ' or "
+ rest += input[i];
+ for (int j = i+1; j < input.length(); j++) {
+ if ((input[j] == '\'') || (input[j] == '\"')) { //then quit
+ rest += input[j];
+ i = j+1;
+ j+=input.length();
+ }else { rest += input[j]; }
+ }
+ }else if (!isspace(input[i])) { rest += input[i]; }
+ else {
+ if (rest != "") { pieces.push_back(rest); rest = ""; }
+ while (i < input.length()) { //gobble white space
+ if (isspace(input[i])) { i++; }
+ else { rest = input[i]; break; } //cout << "next piece buffer = " << nextPiece << endl;
+ }
+ }
+ }
+
+ if (rest != "") { pieces.push_back(rest); }
+ }
+ return pieces;
+ }
+ catch(exception& e) {
+ errorOut(e, "MothurOut", "splitWhiteSpace");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+int MothurOut::readTax(string namefile, map<string, string>& taxMap) {
+ try {
+ //open input file
+ ifstream in;
+ openInputFile(namefile, in);
+
+ string rest = "";
+ char buffer[4096];
+ bool pairDone = false;
+ bool columnOne = true;
+ string firstCol, secondCol;
+
+ while (!in.eof()) {
+ if (control_pressed) { break; }
+
+ in.read(buffer, 4096);
+ vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
+
+ for (int i = 0; i < pieces.size(); i++) {
+ if (columnOne) { firstCol = pieces[i]; columnOne=false; }
+ else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
+
+ if (pairDone) {
+ checkName(firstCol);
+ //are there confidence scores, if so remove them
+ if (secondCol.find_first_of('(') != -1) { removeConfidences(secondCol); }
+ map<string, string>::iterator itTax = taxMap.find(firstCol);
+
+ if(itTax == taxMap.end()) {
+ bool ignore = false;
+ if (secondCol != "") { if (secondCol[secondCol.length()-1] != ';') { mothurOut("[ERROR]: " + firstCol + " is missing the final ';', ignoring.\n"); ignore=true; }
+ }
+ if (!ignore) { taxMap[firstCol] = secondCol; }
+ if (debug) { mothurOut("[DEBUG]: name = '" + firstCol + "' tax = '" + secondCol + "'\n"); }
+ }else {
+ mothurOut("[ERROR]: " + firstCol + " is already in your taxonomy file, names must be unique./n"); control_pressed = true;
+ }
+ pairDone = false;
+ }
+ }
+ }
+ in.close();
+
+ if (rest != "") {
+ vector<string> pieces = splitWhiteSpace(rest);
+
+ for (int i = 0; i < pieces.size(); i++) {
+ if (columnOne) { firstCol = pieces[i]; columnOne=false; }
+ else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
+
+ if (pairDone) {
+ checkName(firstCol);
+ //are there confidence scores, if so remove them
+ if (secondCol.find_first_of('(') != -1) { removeConfidences(secondCol); }
+ map<string, string>::iterator itTax = taxMap.find(firstCol);
+
+ if(itTax == taxMap.end()) {
+ bool ignore = false;
+ if (secondCol != "") { if (secondCol[secondCol.length()-1] != ';') { mothurOut("[ERROR]: " + firstCol + " is missing the final ';', ignoring.\n"); ignore=true; }
+ }
+ if (!ignore) { taxMap[firstCol] = secondCol; }
+ if (debug) { mothurOut("[DEBUG]: name = '" + firstCol + "' tax = '" + secondCol + "'\n"); }
+ }else {
+ mothurOut("[ERROR]: " + firstCol + " is already in your taxonomy file, names must be unique./n"); control_pressed = true;
+ }
+
+ pairDone = false;
+ }
+ }
+ }
+
+ return taxMap.size();
+
+ }
+ catch(exception& e) {
+ errorOut(e, "MothurOut", "readTax");
+ exit(1);
+ }
+}
+/**********************************************************************************************************************/
+int MothurOut::readNames(string namefile, map<string, string>& nameMap, bool redund) {
+ try {
+ //open input file
+ ifstream in;
+ openInputFile(namefile, in);
+
+ string rest = "";
+ char buffer[4096];
+ bool pairDone = false;
+ bool columnOne = true;
+ string firstCol, secondCol;
+
+ while (!in.eof()) {
+ if (control_pressed) { break; }
+
+ in.read(buffer, 4096);
+ vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
+
+ for (int i = 0; i < pieces.size(); i++) {
+ if (columnOne) { firstCol = pieces[i]; columnOne=false; }
+ else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
+
+ if (pairDone) {
+ checkName(firstCol);
+ checkName(secondCol);
+
+ //parse names into vector
+ vector<string> theseNames;
+ splitAtComma(secondCol, theseNames);
+ for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = firstCol; }
+ pairDone = false;
+ }
+ }
+ }
+ in.close();
+
+ if (rest != "") {
+ vector<string> pieces = splitWhiteSpace(rest);
+
+ for (int i = 0; i < pieces.size(); i++) {
+ if (columnOne) { firstCol = pieces[i]; columnOne=false; }
+ else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
+
+ if (pairDone) {
+ checkName(firstCol);
+ checkName(secondCol);
+
+ //parse names into vector
+ vector<string> theseNames;
+ splitAtComma(secondCol, theseNames);
+ for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = firstCol; }
+ pairDone = false;
+ }
+ }
+ }
+
+ return nameMap.size();
+
+ }
+ catch(exception& e) {
+ errorOut(e, "MothurOut", "readNames");
+ exit(1);
+ }
+}
+/**********************************************************************************************************************/
+int MothurOut::readNames(string namefile, map<string, string>& nameMap, int flip) {
+ try {
+ //open input file
+ ifstream in;
+ openInputFile(namefile, in);
+
+ string rest = "";
+ char buffer[4096];
+ bool pairDone = false;
+ bool columnOne = true;
+ string firstCol, secondCol;
+
+ while (!in.eof()) {
+ if (control_pressed) { break; }
+
+ in.read(buffer, 4096);
+ vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
+
+ for (int i = 0; i < pieces.size(); i++) {
+ if (columnOne) { firstCol = pieces[i]; columnOne=false; }
+ else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
+
+ if (pairDone) {
+ checkName(firstCol);
+ checkName(secondCol);
+ nameMap[secondCol] = firstCol;
+ pairDone = false;
+ }
+ }
+ }
+ in.close();
+
+ if (rest != "") {
+ vector<string> pieces = splitWhiteSpace(rest);
+
+ for (int i = 0; i < pieces.size(); i++) {
+ if (columnOne) { firstCol = pieces[i]; columnOne=false; }
+ else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
+
+ if (pairDone) {
+ checkName(firstCol);
+ checkName(secondCol);
+ nameMap[secondCol] = firstCol;
+ pairDone = false;
+ }
+ }
+ }
+
+ return nameMap.size();
+
+ }
+ catch(exception& e) {
+ errorOut(e, "MothurOut", "readNames");
+ exit(1);
+ }
+}
+/**********************************************************************************************************************/
+int MothurOut::readNames(string namefile, map<string, string>& nameMap, map<string, int>& nameCount) {
+ try {
+ nameMap.clear(); nameCount.clear();
+ //open input file
+ ifstream in;
+ openInputFile(namefile, in);
+
+ string rest = "";
+ char buffer[4096];
+ bool pairDone = false;
+ bool columnOne = true;
+ string firstCol, secondCol;
+
+ while (!in.eof()) {
+ if (control_pressed) { break; }
+
+ in.read(buffer, 4096);
+ vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
+
+ for (int i = 0; i < pieces.size(); i++) {
+ if (columnOne) { firstCol = pieces[i]; columnOne=false; }
+ else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
+
+ if (pairDone) {
+ checkName(firstCol);
+ checkName(secondCol);
+ //parse names into vector
+ vector<string> theseNames;
+ splitAtComma(secondCol, theseNames);
+ for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = firstCol; }
+ nameCount[firstCol] = theseNames.size();
+ pairDone = false;
+ }
+ }
+ }
+ in.close();
+
+ if (rest != "") {
+ vector<string> pieces = splitWhiteSpace(rest);
+
+ for (int i = 0; i < pieces.size(); i++) {
+ if (columnOne) { firstCol = pieces[i]; columnOne=false; }
+ else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
+
+ if (pairDone) {
+ checkName(firstCol);
+ checkName(secondCol);
+ //parse names into vector
+ vector<string> theseNames;
+ splitAtComma(secondCol, theseNames);
+ for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = firstCol; }
+ nameCount[firstCol] = theseNames.size();
+ pairDone = false;
+ }
+ }
+
+ }
+ return nameMap.size();
+
+ }
+ catch(exception& e) {
+ errorOut(e, "MothurOut", "readNames");
+ exit(1);
+ }
+}
+/**********************************************************************************************************************/
+int MothurOut::readNames(string namefile, map<string, string>& nameMap) {
+ try {
+ //open input file
+ ifstream in;
+ openInputFile(namefile, in);
+
+ string rest = "";
+ char buffer[4096];
+ bool pairDone = false;
+ bool columnOne = true;
+ string firstCol, secondCol;
+
+ while (!in.eof()) {
+ if (control_pressed) { break; }
+
+ in.read(buffer, 4096);
+ vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
+
+ for (int i = 0; i < pieces.size(); i++) {
+ if (columnOne) { firstCol = pieces[i]; columnOne=false; }
+ else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
+
+ if (pairDone) {
+ checkName(firstCol);
+ checkName(secondCol);
+ nameMap[firstCol] = secondCol; pairDone = false; }
+ }
+ }
+ in.close();
+
+ if (rest != "") {
+ vector<string> pieces = splitWhiteSpace(rest);
+
+ for (int i = 0; i < pieces.size(); i++) {
+ if (columnOne) { firstCol = pieces[i]; columnOne=false; }
+ else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
+
+ if (pairDone) {
+ checkName(firstCol);
+ checkName(secondCol);
+ nameMap[firstCol] = secondCol; pairDone = false; }
+ }
+ }
+
+ return nameMap.size();
+
+ }
+ catch(exception& e) {
+ errorOut(e, "MothurOut", "readNames");
+ exit(1);
+ }
+}
+/**********************************************************************************************************************/
+int MothurOut::readNames(string namefile, map<string, vector<string> >& nameMap) {
+ try {
+ //open input file
+ ifstream in;
+ openInputFile(namefile, in);
+
+ string rest = "";
+ char buffer[4096];
+ bool pairDone = false;
+ bool columnOne = true;
+ string firstCol, secondCol;
+
+ while (!in.eof()) {
+ if (control_pressed) { break; }
+
+ in.read(buffer, 4096);
+ vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
+
+ for (int i = 0; i < pieces.size(); i++) {
+ if (columnOne) { firstCol = pieces[i]; columnOne=false; }
+ else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
+
+ if (pairDone) {
+ checkName(firstCol);
+ checkName(secondCol);
+ vector<string> temp;
+ splitAtComma(secondCol, temp);
+ nameMap[firstCol] = temp;
+ pairDone = false;
+ }
+ }
+ }
+ in.close();
+
+ if (rest != "") {
+ vector<string> pieces = splitWhiteSpace(rest);
+
+ for (int i = 0; i < pieces.size(); i++) {
+ if (columnOne) { firstCol = pieces[i]; columnOne=false; }
+ else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
+
+ if (pairDone) {
+ checkName(firstCol);
+ checkName(secondCol);
+ vector<string> temp;
+ splitAtComma(secondCol, temp);
+ nameMap[firstCol] = temp;
+ pairDone = false;
+ }
+ }
+ }
+
+ return nameMap.size();
+ }
+ catch(exception& e) {
+ errorOut(e, "MothurOut", "readNames");
+ exit(1);
+ }
+}
+/**********************************************************************************************************************/
+map<string, int> MothurOut::readNames(string namefile) {
+ try {
+ map<string, int> nameMap;
+
+ //open input file
+ ifstream in;
+ openInputFile(namefile, in);
+
+ string rest = "";
+ char buffer[4096];
+ bool pairDone = false;
+ bool columnOne = true;
+ string firstCol, secondCol;
+
+ while (!in.eof()) {
+ if (control_pressed) { break; }
+
+ in.read(buffer, 4096);
+ vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
+
+ for (int i = 0; i < pieces.size(); i++) {
+ if (columnOne) { firstCol = pieces[i]; columnOne=false; }
+ else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
+
+ if (pairDone) {
+ checkName(firstCol);
+ checkName(secondCol);
+ int num = getNumNames(secondCol);
+ nameMap[firstCol] = num;
+ pairDone = false;
+ }
+ }
+ }
+ in.close();
+
+ if (rest != "") {
+ vector<string> pieces = splitWhiteSpace(rest);
+ for (int i = 0; i < pieces.size(); i++) {
+ if (columnOne) { firstCol = pieces[i]; columnOne=false; }
+ else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
+
+ if (pairDone) {
+ checkName(firstCol);
+ checkName(secondCol);
+ int num = getNumNames(secondCol);
+ nameMap[firstCol] = num;
+ pairDone = false;
+ }
+ }
+ }
+
+ return nameMap;
+
+ }
+ catch(exception& e) {
+ errorOut(e, "MothurOut", "readNames");
+ exit(1);
+ }
+}
+/**********************************************************************************************************************/
+map<string, int> MothurOut::readNames(string namefile, unsigned long int& numSeqs) {
+ try {
+ map<string, int> nameMap;
+ numSeqs = 0;
+
+ //open input file
+ ifstream in;
+ openInputFile(namefile, in);
+
+ string rest = "";
+ char buffer[4096];
+ bool pairDone = false;
+ bool columnOne = true;
+ string firstCol, secondCol;
+
+ while (!in.eof()) {
+ if (control_pressed) { break; }
+
+ in.read(buffer, 4096);
+ vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
+
+ for (int i = 0; i < pieces.size(); i++) {
+ if (columnOne) { firstCol = pieces[i]; columnOne=false; }
+ else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
+
+ if (pairDone) {
+ checkName(firstCol);
+ checkName(secondCol);
+ int num = getNumNames(secondCol);
+ nameMap[firstCol] = num;
+ pairDone = false;
+ numSeqs += num;
+ }
+ }
+ }
+ in.close();
+
+ if (rest != "") {
+ vector<string> pieces = splitWhiteSpace(rest);
+ for (int i = 0; i < pieces.size(); i++) {
+ if (columnOne) { firstCol = pieces[i]; columnOne=false; }
+ else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
+
+ if (pairDone) {
+ checkName(firstCol);
+ checkName(secondCol);
+ int num = getNumNames(secondCol);
+ nameMap[firstCol] = num;
+ pairDone = false;
+ numSeqs += num;
+ }
+ }
+ }
+
+ return nameMap;
+
+ }
+ catch(exception& e) {
+ errorOut(e, "MothurOut", "readNames");
+ exit(1);
+ }
+}
+/************************************************************/
+int MothurOut::checkName(string& name) {
+ try {
+ for (int i = 0; i < name.length(); i++) {
+ if (name[i] == ':') { name[i] = '_'; changedSeqNames = true; }
+ }
+ return 0;
+ }
+ catch(exception& e) {
+ errorOut(e, "MothurOut", "checkName");
+ exit(1);
+ }
+}
+/**********************************************************************************************************************/
+int MothurOut::readNames(string namefile, vector<seqPriorityNode>& nameVector, map<string, string>& fastamap) {
+ try {
+ int error = 0;
+
+ //open input file
+ ifstream in;
+ openInputFile(namefile, in);
+
+ string rest = "";
+ char buffer[4096];
+ bool pairDone = false;
+ bool columnOne = true;
+ string firstCol, secondCol;
+
+ while (!in.eof()) {
+ if (control_pressed) { break; }
+
+ in.read(buffer, 4096);
+ vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
+
+ for (int i = 0; i < pieces.size(); i++) {
+ if (columnOne) { firstCol = pieces[i]; columnOne=false; }
+ else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
+
+ if (pairDone) {
+ checkName(firstCol);
+ checkName(secondCol);
+ int num = getNumNames(secondCol);
+
+ map<string, string>::iterator it = fastamap.find(firstCol);
+ if (it == fastamap.end()) {
+ error = 1;
+ mothurOut("[ERROR]: " + firstCol + " is not in your fastafile, but is in your namesfile, please correct."); mothurOutEndLine();
+ }else {
+ seqPriorityNode temp(num, it->second, firstCol);
+ nameVector.push_back(temp);
+ }
+
+ pairDone = false;
+ }
+ }
+ }
+ in.close();
+
+ if (rest != "") {
+ vector<string> pieces = splitWhiteSpace(rest);
+
+ for (int i = 0; i < pieces.size(); i++) {
+ if (columnOne) { firstCol = pieces[i]; columnOne=false; }
+ else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
+
+ if (pairDone) {
+ checkName(firstCol);
+ checkName(secondCol);
+ int num = getNumNames(secondCol);
+
+ map<string, string>::iterator it = fastamap.find(firstCol);
+ if (it == fastamap.end()) {
+ error = 1;
+ mothurOut("[ERROR]: " + firstCol + " is not in your fastafile, but is in your namesfile, please correct."); mothurOutEndLine();
+ }else {
+ seqPriorityNode temp(num, it->second, firstCol);
+ nameVector.push_back(temp);
+ }
+
+ pairDone = false;
+ }
+ }
+ }
+ return error;
+ }
+ catch(exception& e) {
+ errorOut(e, "MothurOut", "readNames");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+set<string> MothurOut::readAccnos(string accnosfile){
+ try {
+ set<string> names;
+ ifstream in;
+ openInputFile(accnosfile, in);
+ string name;
+
+ string rest = "";
+ char buffer[4096];
+
+ while (!in.eof()) {
+ if (control_pressed) { break; }
+
+ in.read(buffer, 4096);
+ vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
+
+ for (int i = 0; i < pieces.size(); i++) { checkName(pieces[i]); names.insert(pieces[i]); }
+ }
+ in.close();
+
+ if (rest != "") {
+ vector<string> pieces = splitWhiteSpace(rest);
+ for (int i = 0; i < pieces.size(); i++) { checkName(pieces[i]); names.insert(pieces[i]); }
+ }
+ return names;
+ }
+ catch(exception& e) {
+ errorOut(e, "MothurOut", "readAccnos");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+int MothurOut::readAccnos(string accnosfile, vector<string>& names){
+ try {
+ names.clear();
+ ifstream in;
+ openInputFile(accnosfile, in);
+ string name;
+
+ string rest = "";
+ char buffer[4096];
+
+ while (!in.eof()) {
+ if (control_pressed) { break; }
+
+ in.read(buffer, 4096);
+ vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
+
+ for (int i = 0; i < pieces.size(); i++) { checkName(pieces[i]); names.push_back(pieces[i]); }
+ }
+ in.close();
+
+ if (rest != "") {
+ vector<string> pieces = splitWhiteSpace(rest);
+ for (int i = 0; i < pieces.size(); i++) { checkName(pieces[i]); names.push_back(pieces[i]); }
+ }
+
+ return 0;
+ }
+ catch(exception& e) {
+ errorOut(e, "MothurOut", "readAccnos");
+ exit(1);
+ }
+}
+/***********************************************************************/
+
+int MothurOut::getNumNames(string names){
+ try {
+ int count = 0;
+
+ if(names != ""){
+ count = 1;
+ for(int i=0;i<names.size();i++){
+ if(names[i] == ','){
+ count++;
+ }
+ }
+ }
+