X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=mothurout.cpp;h=54cdd33bbe72cfecb241ed0d24d537bf4654af06;hb=4b54ce99af7db8019ea907cd7c2edf789369ada9;hp=64f0bc88713d2c3f56cfb6f98cc1cbc387ee7966;hpb=957d67f7d8bbadfd2930de061e89fd9b149270fd;p=mothur.git diff --git a/mothurout.cpp b/mothurout.cpp index 64f0bc8..54cdd33 100644 --- a/mothurout.cpp +++ b/mothurout.cpp @@ -43,6 +43,7 @@ set MothurOut::getCurrentTypes() { types.insert("tree"); types.insert("flow"); types.insert("biom"); + types.insert("count"); types.insert("processors"); return types; @@ -78,6 +79,7 @@ void MothurOut::printCurrentFiles() { if (treefile != "") { mothurOut("tree=" + treefile); mothurOutEndLine(); } if (flowfile != "") { mothurOut("flow=" + flowfile); mothurOutEndLine(); } if (biomfile != "") { mothurOut("biom=" + biomfile); mothurOutEndLine(); } + if (counttablefile != "") { mothurOut("count=" + counttablefile); mothurOutEndLine(); } if (processors != "1") { mothurOut("processors=" + processors); mothurOutEndLine(); } } @@ -112,6 +114,7 @@ bool MothurOut::hasCurrentFiles() { if (treefile != "") { return true; } if (flowfile != "") { return true; } if (biomfile != "") { return true; } + if (counttablefile != "") { return true; } if (processors != "1") { return true; } return hasCurrent; @@ -147,6 +150,7 @@ void MothurOut::clearCurrentFiles() { taxonomyfile = ""; flowfile = ""; biomfile = ""; + counttablefile = ""; processors = "1"; } catch(exception& e) { @@ -935,7 +939,7 @@ string MothurOut::getFullPathName(string fileName){ } for (int i = index; i >= 0; i--) { - newFileName = dirs[i] + "\\" + newFileName; + newFileName = dirs[i] + "\\\\" + newFileName; } return newFileName; @@ -1048,6 +1052,9 @@ int MothurOut::openInputFile(string fileName, ifstream& fileHandle){ int MothurOut::renameFile(string oldName, string newName){ try { + + if (oldName == newName) { return 0; } + ifstream inTest; int exist = openInputFile(newName, inTest, ""); inTest.close(); @@ -1176,7 +1183,7 @@ string MothurOut::sortFile(string distFile, string outputDir){ string firstName, secondName; float dist; - while (input) { + while (!input.eof()) { input >> firstName >> secondName >> dist; output << dist << '\t' << firstName << '\t' << secondName << endl; gobble(input); @@ -1192,16 +1199,17 @@ string MothurOut::sortFile(string distFile, string outputDir){ //read in sorted file and put distance at end again ifstream input2; + ofstream output2; openInputFile(tempOutfile, input2); - openOutputFile(outfile, output); + openOutputFile(outfile, output2); - while (input2) { + while (!input2.eof()) { input2 >> dist >> firstName >> secondName; - output << firstName << '\t' << secondName << '\t' << dist << endl; + output2 << firstName << '\t' << secondName << '\t' << dist << endl; gobble(input2); } input2.close(); - output.close(); + output2.close(); //remove temp files mothurRemove(tempDistFile); @@ -1291,16 +1299,6 @@ vector MothurOut::setFilePosEachLine(string filename, int& n positions.push_back(0); while(!in.eof()){ - //unsigned long long lastpos = in.tellg(); - //input = getline(in); - //if (input.length() != 0) { - //unsigned long long pos = in.tellg(); - //if (pos != -1) { positions.push_back(pos - input.length() - 1); } - //else { positions.push_back(lastpos); } - //} - //gobble(in); //has to be here since windows line endings are 2 characters and mess up the positions - - //getline counting reads char d = in.get(); count++; while ((d != '\n') && (d != '\r') && (d != '\f') && (d != in.eof())) { @@ -1503,7 +1501,7 @@ vector MothurOut::splitWhiteSpace(string& rest, char buffer[], int size) for (int i = 0; i < size; i++) { if (!isspace(buffer[i])) { rest += buffer[i]; } else { - pieces.push_back(rest); rest = ""; + if (rest != "") { pieces.push_back(rest); rest = ""; } while (i < size) { //gobble white space if (isspace(buffer[i])) { i++; } else { rest = buffer[i]; break; } //cout << "next piece buffer = " << nextPiece << endl; @@ -1527,7 +1525,7 @@ vector MothurOut::splitWhiteSpace(string input){ for (int i = 0; i < input.length(); i++) { if (!isspace(input[i])) { rest += input[i]; } else { - pieces.push_back(rest); rest = ""; + if (rest != "") { pieces.push_back(rest); rest = ""; } while (i < input.length()) { //gobble white space if (isspace(input[i])) { i++; } else { rest = input[i]; break; } //cout << "next piece buffer = " << nextPiece << endl; @@ -1544,10 +1542,49 @@ vector MothurOut::splitWhiteSpace(string input){ exit(1); } } +/***********************************************************************/ +vector MothurOut::splitWhiteSpaceWithQuotes(string input){ + try { + vector pieces; + string rest = ""; + + int pos = input.find('\''); + int pos2 = input.find('\"'); + + if ((pos == string::npos) && (pos2 == string::npos)) { return splitWhiteSpace(input); } //no quotes to worry about + else { + for (int i = 0; i < input.length(); i++) { + if ((input[i] == '\'') || (input[i] == '\"') || (rest == "\'") || (rest == "\"")) { //grab everything til end or next ' or " + rest += input[i]; + for (int j = i+1; j < input.length(); j++) { + if ((input[j] == '\'') || (input[j] == '\"')) { //then quit + rest += input[j]; + i = j+1; + j+=input.length(); + }else { rest += input[j]; } + } + }else if (!isspace(input[i])) { rest += input[i]; } + else { + if (rest != "") { pieces.push_back(rest); rest = ""; } + while (i < input.length()) { //gobble white space + if (isspace(input[i])) { i++; } + else { rest = input[i]; break; } //cout << "next piece buffer = " << nextPiece << endl; + } + } + } + + if (rest != "") { pieces.push_back(rest); } + } + return pieces; + } + catch(exception& e) { + errorOut(e, "MothurOut", "splitWhiteSpace"); + exit(1); + } +} //********************************************************************************************************************** int MothurOut::readTax(string namefile, map& taxMap) { try { - //open input file ifstream in; openInputFile(namefile, in); @@ -1569,15 +1606,53 @@ int MothurOut::readTax(string namefile, map& taxMap) { else { secondCol = pieces[i]; pairDone = true; columnOne=true; } if (pairDone) { + checkName(firstCol); //are there confidence scores, if so remove them if (secondCol.find_first_of('(') != -1) { removeConfidences(secondCol); } - taxMap[firstCol] = secondCol; - if (debug) { mothurOut("[DEBUG]: name = '" + firstCol + "' tax = '" + secondCol + "'\n"); } + map::iterator itTax = taxMap.find(firstCol); + + if(itTax == taxMap.end()) { + bool ignore = false; + if (secondCol != "") { if (secondCol[secondCol.length()-1] != ';') { mothurOut("[ERROR]: " + firstCol + " is missing the final ';', ignoring.\n"); ignore=true; } + } + if (!ignore) { taxMap[firstCol] = secondCol; } + if (debug) { mothurOut("[DEBUG]: name = '" + firstCol + "' tax = '" + secondCol + "'\n"); } + }else { + mothurOut("[ERROR]: " + firstCol + " is already in your taxonomy file, names must be unique./n"); control_pressed = true; + } pairDone = false; } } } in.close(); + + if (rest != "") { + vector pieces = splitWhiteSpace(rest); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { firstCol = pieces[i]; columnOne=false; } + else { secondCol = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + checkName(firstCol); + //are there confidence scores, if so remove them + if (secondCol.find_first_of('(') != -1) { removeConfidences(secondCol); } + map::iterator itTax = taxMap.find(firstCol); + + if(itTax == taxMap.end()) { + bool ignore = false; + if (secondCol != "") { if (secondCol[secondCol.length()-1] != ';') { mothurOut("[ERROR]: " + firstCol + " is missing the final ';', ignoring.\n"); ignore=true; } + } + if (!ignore) { taxMap[firstCol] = secondCol; } + if (debug) { mothurOut("[DEBUG]: name = '" + firstCol + "' tax = '" + secondCol + "'\n"); } + }else { + mothurOut("[ERROR]: " + firstCol + " is already in your taxonomy file, names must be unique./n"); control_pressed = true; + } + + pairDone = false; + } + } + } return taxMap.size(); @@ -1590,7 +1665,6 @@ int MothurOut::readTax(string namefile, map& taxMap) { /**********************************************************************************************************************/ int MothurOut::readNames(string namefile, map& nameMap, bool redund) { try { - //open input file ifstream in; openInputFile(namefile, in); @@ -1612,6 +1686,9 @@ int MothurOut::readNames(string namefile, map& nameMap, bool red else { secondCol = pieces[i]; pairDone = true; columnOne=true; } if (pairDone) { + checkName(firstCol); + checkName(secondCol); + //parse names into vector vector theseNames; splitAtComma(secondCol, theseNames); @@ -1621,6 +1698,26 @@ int MothurOut::readNames(string namefile, map& nameMap, bool red } } in.close(); + + if (rest != "") { + vector pieces = splitWhiteSpace(rest); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { firstCol = pieces[i]; columnOne=false; } + else { secondCol = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + checkName(firstCol); + checkName(secondCol); + + //parse names into vector + vector theseNames; + splitAtComma(secondCol, theseNames); + for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = firstCol; } + pairDone = false; + } + } + } return nameMap.size(); @@ -1633,7 +1730,6 @@ int MothurOut::readNames(string namefile, map& nameMap, bool red /**********************************************************************************************************************/ int MothurOut::readNames(string namefile, map& nameMap, int flip) { try { - //open input file ifstream in; openInputFile(namefile, in); @@ -1655,12 +1751,30 @@ int MothurOut::readNames(string namefile, map& nameMap, int flip else { secondCol = pieces[i]; pairDone = true; columnOne=true; } if (pairDone) { + checkName(firstCol); + checkName(secondCol); nameMap[secondCol] = firstCol; pairDone = false; } } } in.close(); + + if (rest != "") { + vector pieces = splitWhiteSpace(rest); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { firstCol = pieces[i]; columnOne=false; } + else { secondCol = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + checkName(firstCol); + checkName(secondCol); + nameMap[secondCol] = firstCol; + pairDone = false; + } + } + } return nameMap.size(); @@ -1673,7 +1787,7 @@ int MothurOut::readNames(string namefile, map& nameMap, int flip /**********************************************************************************************************************/ int MothurOut::readNames(string namefile, map& nameMap, map& nameCount) { try { - nameMap.clear(); nameCount.clear(); + nameMap.clear(); nameCount.clear(); //open input file ifstream in; openInputFile(namefile, in); @@ -1695,6 +1809,8 @@ int MothurOut::readNames(string namefile, map& nameMap, map theseNames; splitAtComma(secondCol, theseNames); @@ -1706,6 +1822,26 @@ int MothurOut::readNames(string namefile, map& nameMap, map pieces = splitWhiteSpace(rest); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { firstCol = pieces[i]; columnOne=false; } + else { secondCol = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + checkName(firstCol); + checkName(secondCol); + //parse names into vector + vector theseNames; + splitAtComma(secondCol, theseNames); + for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = firstCol; } + nameCount[firstCol] = theseNames.size(); + pairDone = false; + } + } + + } return nameMap.size(); } @@ -1717,7 +1853,6 @@ int MothurOut::readNames(string namefile, map& nameMap, map& nameMap) { try { - //open input file ifstream in; openInputFile(namefile, in); @@ -1738,10 +1873,27 @@ int MothurOut::readNames(string namefile, map& nameMap) { if (columnOne) { firstCol = pieces[i]; columnOne=false; } else { secondCol = pieces[i]; pairDone = true; columnOne=true; } - if (pairDone) { nameMap[firstCol] = secondCol; pairDone = false; } + if (pairDone) { + checkName(firstCol); + checkName(secondCol); + nameMap[firstCol] = secondCol; pairDone = false; } } } in.close(); + + if (rest != "") { + vector pieces = splitWhiteSpace(rest); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { firstCol = pieces[i]; columnOne=false; } + else { secondCol = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + checkName(firstCol); + checkName(secondCol); + nameMap[firstCol] = secondCol; pairDone = false; } + } + } return nameMap.size(); @@ -1753,8 +1905,7 @@ int MothurOut::readNames(string namefile, map& nameMap) { } /**********************************************************************************************************************/ int MothurOut::readNames(string namefile, map >& nameMap) { - try { - + try { //open input file ifstream in; openInputFile(namefile, in); @@ -1776,6 +1927,8 @@ int MothurOut::readNames(string namefile, map >& nameMap) else { secondCol = pieces[i]; pairDone = true; columnOne=true; } if (pairDone) { + checkName(firstCol); + checkName(secondCol); vector temp; splitAtComma(secondCol, temp); nameMap[firstCol] = temp; @@ -1785,6 +1938,24 @@ int MothurOut::readNames(string namefile, map >& nameMap) } in.close(); + if (rest != "") { + vector pieces = splitWhiteSpace(rest); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { firstCol = pieces[i]; columnOne=false; } + else { secondCol = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + checkName(firstCol); + checkName(secondCol); + vector temp; + splitAtComma(secondCol, temp); + nameMap[firstCol] = temp; + pairDone = false; + } + } + } + return nameMap.size(); } catch(exception& e) { @@ -1795,8 +1966,68 @@ int MothurOut::readNames(string namefile, map >& nameMap) /**********************************************************************************************************************/ map MothurOut::readNames(string namefile) { try { + map nameMap; + + //open input file + ifstream in; + openInputFile(namefile, in); + + string rest = ""; + char buffer[4096]; + bool pairDone = false; + bool columnOne = true; + string firstCol, secondCol; + + while (!in.eof()) { + if (control_pressed) { break; } + + in.read(buffer, 4096); + vector pieces = splitWhiteSpace(rest, buffer, in.gcount()); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { firstCol = pieces[i]; columnOne=false; } + else { secondCol = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + checkName(firstCol); + checkName(secondCol); + int num = getNumNames(secondCol); + nameMap[firstCol] = num; + pairDone = false; + } + } + } + in.close(); + + if (rest != "") { + vector pieces = splitWhiteSpace(rest); + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { firstCol = pieces[i]; columnOne=false; } + else { secondCol = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + checkName(firstCol); + checkName(secondCol); + int num = getNumNames(secondCol); + nameMap[firstCol] = num; + pairDone = false; + } + } + } + return nameMap; + + } + catch(exception& e) { + errorOut(e, "MothurOut", "readNames"); + exit(1); + } +} +/**********************************************************************************************************************/ +map MothurOut::readNames(string namefile, unsigned long int& numSeqs) { + try { map nameMap; + numSeqs = 0; //open input file ifstream in; @@ -1819,13 +2050,33 @@ map MothurOut::readNames(string namefile) { else { secondCol = pieces[i]; pairDone = true; columnOne=true; } if (pairDone) { + checkName(firstCol); + checkName(secondCol); int num = getNumNames(secondCol); nameMap[firstCol] = num; pairDone = false; + numSeqs += num; } } } in.close(); + + if (rest != "") { + vector pieces = splitWhiteSpace(rest); + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { firstCol = pieces[i]; columnOne=false; } + else { secondCol = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + checkName(firstCol); + checkName(secondCol); + int num = getNumNames(secondCol); + nameMap[firstCol] = num; + pairDone = false; + numSeqs += num; + } + } + } return nameMap; @@ -1835,6 +2086,19 @@ map MothurOut::readNames(string namefile) { exit(1); } } +/************************************************************/ +int MothurOut::checkName(string& name) { + try { + for (int i = 0; i < name.length(); i++) { + if (name[i] == ':') { name[i] = '_'; changedSeqNames = true; } + } + return 0; + } + catch(exception& e) { + errorOut(e, "MothurOut", "checkName"); + exit(1); + } +} /**********************************************************************************************************************/ int MothurOut::readNames(string namefile, vector& nameVector, map& fastamap) { try { @@ -1861,6 +2125,8 @@ int MothurOut::readNames(string namefile, vector& nameVector, m else { secondCol = pieces[i]; pairDone = true; columnOne=true; } if (pairDone) { + checkName(firstCol); + checkName(secondCol); int num = getNumNames(secondCol); map::iterator it = fastamap.find(firstCol); @@ -1878,6 +2144,31 @@ int MothurOut::readNames(string namefile, vector& nameVector, m } in.close(); + if (rest != "") { + vector pieces = splitWhiteSpace(rest); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { firstCol = pieces[i]; columnOne=false; } + else { secondCol = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + checkName(firstCol); + checkName(secondCol); + int num = getNumNames(secondCol); + + map::iterator it = fastamap.find(firstCol); + if (it == fastamap.end()) { + error = 1; + mothurOut("[ERROR]: " + firstCol + " is not in your fastafile, but is in your namesfile, please correct."); mothurOutEndLine(); + }else { + seqPriorityNode temp(num, it->second, firstCol); + nameVector.push_back(temp); + } + + pairDone = false; + } + } + } return error; } catch(exception& e) { @@ -1888,7 +2179,7 @@ int MothurOut::readNames(string namefile, vector& nameVector, m //********************************************************************************************************************** set MothurOut::readAccnos(string accnosfile){ try { - set names; + set names; ifstream in; openInputFile(accnosfile, in); string name; @@ -1902,10 +2193,14 @@ set MothurOut::readAccnos(string accnosfile){ in.read(buffer, 4096); vector pieces = splitWhiteSpace(rest, buffer, in.gcount()); - for (int i = 0; i < pieces.size(); i++) { names.insert(pieces[i]); } + for (int i = 0; i < pieces.size(); i++) { checkName(pieces[i]); names.insert(pieces[i]); } } in.close(); + if (rest != "") { + vector pieces = splitWhiteSpace(rest); + for (int i = 0; i < pieces.size(); i++) { checkName(pieces[i]); names.insert(pieces[i]); } + } return names; } catch(exception& e) { @@ -1930,9 +2225,14 @@ int MothurOut::readAccnos(string accnosfile, vector& names){ in.read(buffer, 4096); vector pieces = splitWhiteSpace(rest, buffer, in.gcount()); - for (int i = 0; i < pieces.size(); i++) { names.push_back(pieces[i]); } + for (int i = 0; i < pieces.size(); i++) { checkName(pieces[i]); names.push_back(pieces[i]); } } in.close(); + + if (rest != "") { + vector pieces = splitWhiteSpace(rest); + for (int i = 0; i < pieces.size(); i++) { checkName(pieces[i]); names.push_back(pieces[i]); } + } return 0; } @@ -1984,6 +2284,32 @@ int MothurOut::getNumChar(string line, char c){ exit(1); } } +//********************************************************************************************************************** +bool MothurOut::isSubset(vector bigset, vector subset) { + try { + + + if (subset.size() > bigset.size()) { return false; } + + //check if each guy in suset is also in bigset + for (int i = 0; i < subset.size(); i++) { + bool match = false; + for (int j = 0; j < bigset.size(); j++) { + if (subset[i] == bigset[j]) { match = true; break; } + } + + //you have a guy in subset that had no match in bigset + if (match == false) { return false; } + } + + return true; + + } + catch(exception& e) { + errorOut(e, "MothurOut", "isSubset"); + exit(1); + } +} /***********************************************************************/ int MothurOut::mothurRemove(string filename){ try { @@ -2023,6 +2349,28 @@ bool MothurOut::mothurConvert(string item, int& num){ exit(1); } } +/***********************************************************************/ +bool MothurOut::mothurConvert(string item, intDist& num){ + try { + bool error = false; + + if (isNumeric1(item)) { + convert(item, num); + }else { + num = 0; + error = true; + mothurOut("[ERROR]: cannot convert " + item + " to an integer."); mothurOutEndLine(); + commandInputsConvertError = true; + } + + return error; + } + catch(exception& e) { + errorOut(e, "MothurOut", "mothurConvert"); + exit(1); + } +} + /***********************************************************************/ bool MothurOut::isNumeric1(string stringToCheck){ try { @@ -2252,6 +2600,9 @@ void MothurOut::getNumSeqs(ifstream& file, int& numSeqs){ //This function parses the estimator options and puts them in a vector void MothurOut::splitAtChar(string& estim, vector& container, char symbol) { try { + + if (symbol == '-') { splitAtDash(estim, container); return; } + string individual = ""; int estimLength = estim.size(); for(int i=0;i& container) { try { string individual = ""; int estimLength = estim.size(); + bool prevEscape = false; for(int i=0;i& container) { try { string individual = ""; int estimLength = estim.size(); + bool prevEscape = false; for(int i=0;i& container) { //This function parses the line options and puts them in a set void MothurOut::splitAtDash(string& estim, set& container) { try { - string individual; + string individual = ""; int lineNum; - - while (estim.find_first_of('-') != -1) { - individual = estim.substr(0,estim.find_first_of('-')); - if ((estim.find_first_of('-')+1) <= estim.length()) { //checks to make sure you don't have dash at end of string - estim = estim.substr(estim.find_first_of('-')+1, estim.length()); - convert(individual, lineNum); //convert the string to int - container.insert(lineNum); + int estimLength = estim.size(); + bool prevEscape = false; + for(int i=0;i Groups) { exit(1); } } +/**************************************************************************************************/ + +bool MothurOut::inUsersGroups(vector set, vector< vector > sets) { + try { + for (int i = 0; i < sets.size(); i++) { + if (set == sets[i]) { return true; } + } + return false; + } + catch(exception& e) { + errorOut(e, "MothurOut", "inUsersGroups"); + exit(1); + } +} +/**************************************************************************************************/ + +bool MothurOut::inUsersGroups(int groupname, vector Groups) { + try { + for (int i = 0; i < Groups.size(); i++) { + if (groupname == Groups[i]) { return true; } + } + return false; + } + catch(exception& e) { + errorOut(e, "MothurOut", "inUsersGroups"); + exit(1); + } +} + /**************************************************************************************************/ //returns true if any of the strings in first vector are in second vector bool MothurOut::inUsersGroups(vector groupnames, vector Groups) { @@ -2642,6 +3034,253 @@ bool MothurOut::checkReleaseVersion(ifstream& file, string version) { exit(1); } } +/**************************************************************************************************/ +vector MothurOut::getAverages(vector< vector >& dists) { + try{ + vector averages; //averages.resize(numComp, 0.0); + for (int i = 0; i < dists[0].size(); i++) { averages.push_back(0.0); } + + for (int thisIter = 0; thisIter < dists.size(); thisIter++) { + for (int i = 0; i < dists[thisIter].size(); i++) { + averages[i] += dists[thisIter][i]; + } + } + + //finds average. + for (int i = 0; i < averages.size(); i++) { averages[i] /= (double) dists.size(); } + + return averages; + } + catch(exception& e) { + errorOut(e, "MothurOut", "getAverages"); + exit(1); + } +} +/**************************************************************************************************/ +vector MothurOut::getStandardDeviation(vector< vector >& dists) { + try{ + + vector averages = getAverages(dists); + + //find standard deviation + vector stdDev; //stdDev.resize(numComp, 0.0); + for (int i = 0; i < dists[0].size(); i++) { stdDev.push_back(0.0); } + + for (int thisIter = 0; thisIter < dists.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each + for (int j = 0; j < dists[thisIter].size(); j++) { + stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j])); + } + } + for (int i = 0; i < stdDev.size(); i++) { + stdDev[i] /= (double) dists.size(); + stdDev[i] = sqrt(stdDev[i]); + } + + return stdDev; + } + catch(exception& e) { + errorOut(e, "MothurOut", "getAverages"); + exit(1); + } +} +/**************************************************************************************************/ +vector MothurOut::getStandardDeviation(vector< vector >& dists, vector& averages) { + try{ + //find standard deviation + vector stdDev; //stdDev.resize(numComp, 0.0); + for (int i = 0; i < dists[0].size(); i++) { stdDev.push_back(0.0); } + + for (int thisIter = 0; thisIter < dists.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each + for (int j = 0; j < dists[thisIter].size(); j++) { + stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j])); + } + } + for (int i = 0; i < stdDev.size(); i++) { + stdDev[i] /= (double) dists.size(); + stdDev[i] = sqrt(stdDev[i]); + } + + return stdDev; + } + catch(exception& e) { + errorOut(e, "MothurOut", "getAverages"); + exit(1); + } +} +/**************************************************************************************************/ +vector< vector > MothurOut::getAverages(vector< vector< vector > >& calcDistsTotals, string mode) { + try{ + + vector< vector > calcAverages; //calcAverages.resize(calcDistsTotals[0].size()); + for (int i = 0; i < calcDistsTotals[0].size(); i++) { //initialize sums to zero. + //calcAverages[i].resize(calcDistsTotals[0][i].size()); + vector temp; + for (int j = 0; j < calcDistsTotals[0][i].size(); j++) { + seqDist tempDist; + tempDist.seq1 = calcDistsTotals[0][i][j].seq1; + tempDist.seq2 = calcDistsTotals[0][i][j].seq2; + tempDist.dist = 0.0; + temp.push_back(tempDist); + } + calcAverages.push_back(temp); + } + + if (mode == "average") { + for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //sum all groups dists for each calculator + for (int i = 0; i < calcAverages.size(); i++) { //initialize sums to zero. + for (int j = 0; j < calcAverages[i].size(); j++) { + calcAverages[i][j].dist += calcDistsTotals[thisIter][i][j].dist; + } + } + } + + for (int i = 0; i < calcAverages.size(); i++) { //finds average. + for (int j = 0; j < calcAverages[i].size(); j++) { + calcAverages[i][j].dist /= (float) calcDistsTotals.size(); + } + } + }else { //find median + for (int i = 0; i < calcAverages.size(); i++) { //for each calc + for (int j = 0; j < calcAverages[i].size(); j++) { //for each comparison + vector dists; + for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //for each subsample + dists.push_back(calcDistsTotals[thisIter][i][j].dist); + } + sort(dists.begin(), dists.end()); + calcAverages[i][j].dist = dists[(calcDistsTotals.size()/2)]; + } + } + } + + return calcAverages; + } + catch(exception& e) { + errorOut(e, "MothurOut", "getAverages"); + exit(1); + } +} +/**************************************************************************************************/ +vector< vector > MothurOut::getAverages(vector< vector< vector > >& calcDistsTotals) { + try{ + + vector< vector > calcAverages; //calcAverages.resize(calcDistsTotals[0].size()); + for (int i = 0; i < calcDistsTotals[0].size(); i++) { //initialize sums to zero. + //calcAverages[i].resize(calcDistsTotals[0][i].size()); + vector temp; + for (int j = 0; j < calcDistsTotals[0][i].size(); j++) { + seqDist tempDist; + tempDist.seq1 = calcDistsTotals[0][i][j].seq1; + tempDist.seq2 = calcDistsTotals[0][i][j].seq2; + tempDist.dist = 0.0; + temp.push_back(tempDist); + } + calcAverages.push_back(temp); + } + + + for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //sum all groups dists for each calculator + for (int i = 0; i < calcAverages.size(); i++) { //initialize sums to zero. + for (int j = 0; j < calcAverages[i].size(); j++) { + calcAverages[i][j].dist += calcDistsTotals[thisIter][i][j].dist; + } + } + } + + for (int i = 0; i < calcAverages.size(); i++) { //finds average. + for (int j = 0; j < calcAverages[i].size(); j++) { + calcAverages[i][j].dist /= (float) calcDistsTotals.size(); + } + } + + return calcAverages; + } + catch(exception& e) { + errorOut(e, "MothurOut", "getAverages"); + exit(1); + } +} +/**************************************************************************************************/ +vector< vector > MothurOut::getStandardDeviation(vector< vector< vector > >& calcDistsTotals) { + try{ + + vector< vector > calcAverages = getAverages(calcDistsTotals); + + //find standard deviation + vector< vector > stdDev; + for (int i = 0; i < calcDistsTotals[0].size(); i++) { //initialize sums to zero. + vector temp; + for (int j = 0; j < calcDistsTotals[0][i].size(); j++) { + seqDist tempDist; + tempDist.seq1 = calcDistsTotals[0][i][j].seq1; + tempDist.seq2 = calcDistsTotals[0][i][j].seq2; + tempDist.dist = 0.0; + temp.push_back(tempDist); + } + stdDev.push_back(temp); + } + + for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each + for (int i = 0; i < stdDev.size(); i++) { + for (int j = 0; j < stdDev[i].size(); j++) { + stdDev[i][j].dist += ((calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist) * (calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist)); + } + } + } + + for (int i = 0; i < stdDev.size(); i++) { //finds average. + for (int j = 0; j < stdDev[i].size(); j++) { + stdDev[i][j].dist /= (float) calcDistsTotals.size(); + stdDev[i][j].dist = sqrt(stdDev[i][j].dist); + } + } + + return stdDev; + } + catch(exception& e) { + errorOut(e, "MothurOut", "getAverages"); + exit(1); + } +} +/**************************************************************************************************/ +vector< vector > MothurOut::getStandardDeviation(vector< vector< vector > >& calcDistsTotals, vector< vector >& calcAverages) { + try{ + //find standard deviation + vector< vector > stdDev; + for (int i = 0; i < calcDistsTotals[0].size(); i++) { //initialize sums to zero. + vector temp; + for (int j = 0; j < calcDistsTotals[0][i].size(); j++) { + seqDist tempDist; + tempDist.seq1 = calcDistsTotals[0][i][j].seq1; + tempDist.seq2 = calcDistsTotals[0][i][j].seq2; + tempDist.dist = 0.0; + temp.push_back(tempDist); + } + stdDev.push_back(temp); + } + + for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each + for (int i = 0; i < stdDev.size(); i++) { + for (int j = 0; j < stdDev[i].size(); j++) { + stdDev[i][j].dist += ((calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist) * (calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist)); + } + } + } + + for (int i = 0; i < stdDev.size(); i++) { //finds average. + for (int j = 0; j < stdDev[i].size(); j++) { + stdDev[i][j].dist /= (float) calcDistsTotals.size(); + stdDev[i][j].dist = sqrt(stdDev[i][j].dist); + } + } + + return stdDev; + } + catch(exception& e) { + errorOut(e, "MothurOut", "getAverages"); + exit(1); + } +} + /**************************************************************************************************/ bool MothurOut::isContainingOnlyDigits(string input) { try{ @@ -2700,8 +3339,53 @@ int MothurOut::removeConfidences(string& tax) { } } /**************************************************************************************************/ - - +string MothurOut::removeQuotes(string tax) { + try { + + string taxon; + string newTax = ""; + + for (int i = 0; i < tax.length(); i++) { + + if (control_pressed) { return newTax; } + + if ((tax[i] != '\'') && (tax[i] != '\"')) { newTax += tax[i]; } + + } + + return newTax; + } + catch(exception& e) { + errorOut(e, "MothurOut", "removeQuotes"); + exit(1); + } +} +/**************************************************************************************************/ +// function for calculating standard deviation +double MothurOut::getStandardDeviation(vector& featureVector){ + try { + //finds sum + double average = 0; + for (int i = 0; i < featureVector.size(); i++) { average += featureVector[i]; } + average /= (double) featureVector.size(); + + //find standard deviation + double stdDev = 0; + for (int i = 0; i < featureVector.size(); i++) { //compute the difference of each dist from the mean, and square the result of each + stdDev += ((featureVector[i] - average) * (featureVector[i] - average)); + } + + stdDev /= (double) featureVector.size(); + stdDev = sqrt(stdDev); + + return stdDev; + } + catch(exception& e) { + errorOut(e, "MothurOut", "getStandardDeviation"); + exit(1); + } +} +/**************************************************************************************************/