X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=mothurout.cpp;h=54cdd33bbe72cfecb241ed0d24d537bf4654af06;hb=4b54ce99af7db8019ea907cd7c2edf789369ada9;hp=37c0916d4c49e1f8ed8ca979c7ff61b7e1a13ef3;hpb=5c5c0428f6d548c28a8b903ac80efed4f92d59db;p=mothur.git diff --git a/mothurout.cpp b/mothurout.cpp index 37c0916..54cdd33 100644 --- a/mothurout.cpp +++ b/mothurout.cpp @@ -1183,7 +1183,7 @@ string MothurOut::sortFile(string distFile, string outputDir){ string firstName, secondName; float dist; - while (input) { + while (!input.eof()) { input >> firstName >> secondName >> dist; output << dist << '\t' << firstName << '\t' << secondName << endl; gobble(input); @@ -1199,16 +1199,17 @@ string MothurOut::sortFile(string distFile, string outputDir){ //read in sorted file and put distance at end again ifstream input2; + ofstream output2; openInputFile(tempOutfile, input2); - openOutputFile(outfile, output); + openOutputFile(outfile, output2); - while (input2) { + while (!input2.eof()) { input2 >> dist >> firstName >> secondName; - output << firstName << '\t' << secondName << '\t' << dist << endl; + output2 << firstName << '\t' << secondName << '\t' << dist << endl; gobble(input2); } input2.close(); - output.close(); + output2.close(); //remove temp files mothurRemove(tempDistFile); @@ -1605,10 +1606,20 @@ int MothurOut::readTax(string namefile, map& taxMap) { else { secondCol = pieces[i]; pairDone = true; columnOne=true; } if (pairDone) { + checkName(firstCol); //are there confidence scores, if so remove them if (secondCol.find_first_of('(') != -1) { removeConfidences(secondCol); } - taxMap[firstCol] = secondCol; - if (debug) { mothurOut("[DEBUG]: name = '" + firstCol + "' tax = '" + secondCol + "'\n"); } + map::iterator itTax = taxMap.find(firstCol); + + if(itTax == taxMap.end()) { + bool ignore = false; + if (secondCol != "") { if (secondCol[secondCol.length()-1] != ';') { mothurOut("[ERROR]: " + firstCol + " is missing the final ';', ignoring.\n"); ignore=true; } + } + if (!ignore) { taxMap[firstCol] = secondCol; } + if (debug) { mothurOut("[DEBUG]: name = '" + firstCol + "' tax = '" + secondCol + "'\n"); } + }else { + mothurOut("[ERROR]: " + firstCol + " is already in your taxonomy file, names must be unique./n"); control_pressed = true; + } pairDone = false; } } @@ -1623,10 +1634,21 @@ int MothurOut::readTax(string namefile, map& taxMap) { else { secondCol = pieces[i]; pairDone = true; columnOne=true; } if (pairDone) { + checkName(firstCol); //are there confidence scores, if so remove them if (secondCol.find_first_of('(') != -1) { removeConfidences(secondCol); } - taxMap[firstCol] = secondCol; - if (debug) { mothurOut("[DEBUG]: name = '" + firstCol + "' tax = '" + secondCol + "'\n"); } + map::iterator itTax = taxMap.find(firstCol); + + if(itTax == taxMap.end()) { + bool ignore = false; + if (secondCol != "") { if (secondCol[secondCol.length()-1] != ';') { mothurOut("[ERROR]: " + firstCol + " is missing the final ';', ignoring.\n"); ignore=true; } + } + if (!ignore) { taxMap[firstCol] = secondCol; } + if (debug) { mothurOut("[DEBUG]: name = '" + firstCol + "' tax = '" + secondCol + "'\n"); } + }else { + mothurOut("[ERROR]: " + firstCol + " is already in your taxonomy file, names must be unique./n"); control_pressed = true; + } + pairDone = false; } } @@ -1664,6 +1686,9 @@ int MothurOut::readNames(string namefile, map& nameMap, bool red else { secondCol = pieces[i]; pairDone = true; columnOne=true; } if (pairDone) { + checkName(firstCol); + checkName(secondCol); + //parse names into vector vector theseNames; splitAtComma(secondCol, theseNames); @@ -1682,10 +1707,13 @@ int MothurOut::readNames(string namefile, map& nameMap, bool red else { secondCol = pieces[i]; pairDone = true; columnOne=true; } if (pairDone) { + checkName(firstCol); + checkName(secondCol); + //parse names into vector vector theseNames; splitAtComma(secondCol, theseNames); - for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = firstCol; } + for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = firstCol; } pairDone = false; } } @@ -1723,6 +1751,8 @@ int MothurOut::readNames(string namefile, map& nameMap, int flip else { secondCol = pieces[i]; pairDone = true; columnOne=true; } if (pairDone) { + checkName(firstCol); + checkName(secondCol); nameMap[secondCol] = firstCol; pairDone = false; } @@ -1738,6 +1768,8 @@ int MothurOut::readNames(string namefile, map& nameMap, int flip else { secondCol = pieces[i]; pairDone = true; columnOne=true; } if (pairDone) { + checkName(firstCol); + checkName(secondCol); nameMap[secondCol] = firstCol; pairDone = false; } @@ -1777,6 +1809,8 @@ int MothurOut::readNames(string namefile, map& nameMap, map theseNames; splitAtComma(secondCol, theseNames); @@ -1796,6 +1830,8 @@ int MothurOut::readNames(string namefile, map& nameMap, map theseNames; splitAtComma(secondCol, theseNames); @@ -1837,7 +1873,10 @@ int MothurOut::readNames(string namefile, map& nameMap) { if (columnOne) { firstCol = pieces[i]; columnOne=false; } else { secondCol = pieces[i]; pairDone = true; columnOne=true; } - if (pairDone) { nameMap[firstCol] = secondCol; pairDone = false; } + if (pairDone) { + checkName(firstCol); + checkName(secondCol); + nameMap[firstCol] = secondCol; pairDone = false; } } } in.close(); @@ -1849,7 +1888,10 @@ int MothurOut::readNames(string namefile, map& nameMap) { if (columnOne) { firstCol = pieces[i]; columnOne=false; } else { secondCol = pieces[i]; pairDone = true; columnOne=true; } - if (pairDone) { nameMap[firstCol] = secondCol; pairDone = false; } + if (pairDone) { + checkName(firstCol); + checkName(secondCol); + nameMap[firstCol] = secondCol; pairDone = false; } } } @@ -1885,6 +1927,8 @@ int MothurOut::readNames(string namefile, map >& nameMap) else { secondCol = pieces[i]; pairDone = true; columnOne=true; } if (pairDone) { + checkName(firstCol); + checkName(secondCol); vector temp; splitAtComma(secondCol, temp); nameMap[firstCol] = temp; @@ -1902,6 +1946,8 @@ int MothurOut::readNames(string namefile, map >& nameMap) else { secondCol = pieces[i]; pairDone = true; columnOne=true; } if (pairDone) { + checkName(firstCol); + checkName(secondCol); vector temp; splitAtComma(secondCol, temp); nameMap[firstCol] = temp; @@ -1943,9 +1989,73 @@ map MothurOut::readNames(string namefile) { else { secondCol = pieces[i]; pairDone = true; columnOne=true; } if (pairDone) { + checkName(firstCol); + checkName(secondCol); + int num = getNumNames(secondCol); + nameMap[firstCol] = num; + pairDone = false; + } + } + } + in.close(); + + if (rest != "") { + vector pieces = splitWhiteSpace(rest); + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { firstCol = pieces[i]; columnOne=false; } + else { secondCol = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + checkName(firstCol); + checkName(secondCol); + int num = getNumNames(secondCol); + nameMap[firstCol] = num; + pairDone = false; + } + } + } + + return nameMap; + + } + catch(exception& e) { + errorOut(e, "MothurOut", "readNames"); + exit(1); + } +} +/**********************************************************************************************************************/ +map MothurOut::readNames(string namefile, unsigned long int& numSeqs) { + try { + map nameMap; + numSeqs = 0; + + //open input file + ifstream in; + openInputFile(namefile, in); + + string rest = ""; + char buffer[4096]; + bool pairDone = false; + bool columnOne = true; + string firstCol, secondCol; + + while (!in.eof()) { + if (control_pressed) { break; } + + in.read(buffer, 4096); + vector pieces = splitWhiteSpace(rest, buffer, in.gcount()); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { firstCol = pieces[i]; columnOne=false; } + else { secondCol = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + checkName(firstCol); + checkName(secondCol); int num = getNumNames(secondCol); nameMap[firstCol] = num; pairDone = false; + numSeqs += num; } } } @@ -1958,9 +2068,12 @@ map MothurOut::readNames(string namefile) { else { secondCol = pieces[i]; pairDone = true; columnOne=true; } if (pairDone) { + checkName(firstCol); + checkName(secondCol); int num = getNumNames(secondCol); nameMap[firstCol] = num; pairDone = false; + numSeqs += num; } } } @@ -1973,6 +2086,19 @@ map MothurOut::readNames(string namefile) { exit(1); } } +/************************************************************/ +int MothurOut::checkName(string& name) { + try { + for (int i = 0; i < name.length(); i++) { + if (name[i] == ':') { name[i] = '_'; changedSeqNames = true; } + } + return 0; + } + catch(exception& e) { + errorOut(e, "MothurOut", "checkName"); + exit(1); + } +} /**********************************************************************************************************************/ int MothurOut::readNames(string namefile, vector& nameVector, map& fastamap) { try { @@ -1999,6 +2125,8 @@ int MothurOut::readNames(string namefile, vector& nameVector, m else { secondCol = pieces[i]; pairDone = true; columnOne=true; } if (pairDone) { + checkName(firstCol); + checkName(secondCol); int num = getNumNames(secondCol); map::iterator it = fastamap.find(firstCol); @@ -2024,6 +2152,8 @@ int MothurOut::readNames(string namefile, vector& nameVector, m else { secondCol = pieces[i]; pairDone = true; columnOne=true; } if (pairDone) { + checkName(firstCol); + checkName(secondCol); int num = getNumNames(secondCol); map::iterator it = fastamap.find(firstCol); @@ -2063,13 +2193,13 @@ set MothurOut::readAccnos(string accnosfile){ in.read(buffer, 4096); vector pieces = splitWhiteSpace(rest, buffer, in.gcount()); - for (int i = 0; i < pieces.size(); i++) { names.insert(pieces[i]); } + for (int i = 0; i < pieces.size(); i++) { checkName(pieces[i]); names.insert(pieces[i]); } } in.close(); if (rest != "") { vector pieces = splitWhiteSpace(rest); - for (int i = 0; i < pieces.size(); i++) { names.insert(pieces[i]); } + for (int i = 0; i < pieces.size(); i++) { checkName(pieces[i]); names.insert(pieces[i]); } } return names; } @@ -2095,13 +2225,13 @@ int MothurOut::readAccnos(string accnosfile, vector& names){ in.read(buffer, 4096); vector pieces = splitWhiteSpace(rest, buffer, in.gcount()); - for (int i = 0; i < pieces.size(); i++) { names.push_back(pieces[i]); } + for (int i = 0; i < pieces.size(); i++) { checkName(pieces[i]); names.push_back(pieces[i]); } } in.close(); if (rest != "") { vector pieces = splitWhiteSpace(rest); - for (int i = 0; i < pieces.size(); i++) { names.push_back(pieces[i]); } + for (int i = 0; i < pieces.size(); i++) { checkName(pieces[i]); names.push_back(pieces[i]); } } return 0; @@ -2470,6 +2600,9 @@ void MothurOut::getNumSeqs(ifstream& file, int& numSeqs){ //This function parses the estimator options and puts them in a vector void MothurOut::splitAtChar(string& estim, vector& container, char symbol) { try { + + if (symbol == '-') { splitAtDash(estim, container); return; } + string individual = ""; int estimLength = estim.size(); for(int i=0;i MothurOut::getAverages(vector< vector >& dists) { + try{ + vector averages; //averages.resize(numComp, 0.0); + for (int i = 0; i < dists[0].size(); i++) { averages.push_back(0.0); } + + for (int thisIter = 0; thisIter < dists.size(); thisIter++) { + for (int i = 0; i < dists[thisIter].size(); i++) { + averages[i] += dists[thisIter][i]; + } + } + + //finds average. + for (int i = 0; i < averages.size(); i++) { averages[i] /= (double) dists.size(); } + + return averages; + } + catch(exception& e) { + errorOut(e, "MothurOut", "getAverages"); + exit(1); + } +} +/**************************************************************************************************/ +vector MothurOut::getStandardDeviation(vector< vector >& dists) { + try{ + + vector averages = getAverages(dists); + + //find standard deviation + vector stdDev; //stdDev.resize(numComp, 0.0); + for (int i = 0; i < dists[0].size(); i++) { stdDev.push_back(0.0); } + + for (int thisIter = 0; thisIter < dists.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each + for (int j = 0; j < dists[thisIter].size(); j++) { + stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j])); + } + } + for (int i = 0; i < stdDev.size(); i++) { + stdDev[i] /= (double) dists.size(); + stdDev[i] = sqrt(stdDev[i]); + } + + return stdDev; + } + catch(exception& e) { + errorOut(e, "MothurOut", "getAverages"); + exit(1); + } +} +/**************************************************************************************************/ +vector MothurOut::getStandardDeviation(vector< vector >& dists, vector& averages) { + try{ + //find standard deviation + vector stdDev; //stdDev.resize(numComp, 0.0); + for (int i = 0; i < dists[0].size(); i++) { stdDev.push_back(0.0); } + + for (int thisIter = 0; thisIter < dists.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each + for (int j = 0; j < dists[thisIter].size(); j++) { + stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j])); + } + } + for (int i = 0; i < stdDev.size(); i++) { + stdDev[i] /= (double) dists.size(); + stdDev[i] = sqrt(stdDev[i]); + } + + return stdDev; + } + catch(exception& e) { + errorOut(e, "MothurOut", "getAverages"); + exit(1); + } +} +/**************************************************************************************************/ +vector< vector > MothurOut::getAverages(vector< vector< vector > >& calcDistsTotals, string mode) { + try{ + + vector< vector > calcAverages; //calcAverages.resize(calcDistsTotals[0].size()); + for (int i = 0; i < calcDistsTotals[0].size(); i++) { //initialize sums to zero. + //calcAverages[i].resize(calcDistsTotals[0][i].size()); + vector temp; + for (int j = 0; j < calcDistsTotals[0][i].size(); j++) { + seqDist tempDist; + tempDist.seq1 = calcDistsTotals[0][i][j].seq1; + tempDist.seq2 = calcDistsTotals[0][i][j].seq2; + tempDist.dist = 0.0; + temp.push_back(tempDist); + } + calcAverages.push_back(temp); + } + + if (mode == "average") { + for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //sum all groups dists for each calculator + for (int i = 0; i < calcAverages.size(); i++) { //initialize sums to zero. + for (int j = 0; j < calcAverages[i].size(); j++) { + calcAverages[i][j].dist += calcDistsTotals[thisIter][i][j].dist; + } + } + } + + for (int i = 0; i < calcAverages.size(); i++) { //finds average. + for (int j = 0; j < calcAverages[i].size(); j++) { + calcAverages[i][j].dist /= (float) calcDistsTotals.size(); + } + } + }else { //find median + for (int i = 0; i < calcAverages.size(); i++) { //for each calc + for (int j = 0; j < calcAverages[i].size(); j++) { //for each comparison + vector dists; + for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //for each subsample + dists.push_back(calcDistsTotals[thisIter][i][j].dist); + } + sort(dists.begin(), dists.end()); + calcAverages[i][j].dist = dists[(calcDistsTotals.size()/2)]; + } + } + } + + return calcAverages; + } + catch(exception& e) { + errorOut(e, "MothurOut", "getAverages"); + exit(1); + } +} +/**************************************************************************************************/ +vector< vector > MothurOut::getAverages(vector< vector< vector > >& calcDistsTotals) { + try{ + + vector< vector > calcAverages; //calcAverages.resize(calcDistsTotals[0].size()); + for (int i = 0; i < calcDistsTotals[0].size(); i++) { //initialize sums to zero. + //calcAverages[i].resize(calcDistsTotals[0][i].size()); + vector temp; + for (int j = 0; j < calcDistsTotals[0][i].size(); j++) { + seqDist tempDist; + tempDist.seq1 = calcDistsTotals[0][i][j].seq1; + tempDist.seq2 = calcDistsTotals[0][i][j].seq2; + tempDist.dist = 0.0; + temp.push_back(tempDist); + } + calcAverages.push_back(temp); + } + + + for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //sum all groups dists for each calculator + for (int i = 0; i < calcAverages.size(); i++) { //initialize sums to zero. + for (int j = 0; j < calcAverages[i].size(); j++) { + calcAverages[i][j].dist += calcDistsTotals[thisIter][i][j].dist; + } + } + } + + for (int i = 0; i < calcAverages.size(); i++) { //finds average. + for (int j = 0; j < calcAverages[i].size(); j++) { + calcAverages[i][j].dist /= (float) calcDistsTotals.size(); + } + } + + return calcAverages; + } + catch(exception& e) { + errorOut(e, "MothurOut", "getAverages"); + exit(1); + } +} +/**************************************************************************************************/ +vector< vector > MothurOut::getStandardDeviation(vector< vector< vector > >& calcDistsTotals) { + try{ + + vector< vector > calcAverages = getAverages(calcDistsTotals); + + //find standard deviation + vector< vector > stdDev; + for (int i = 0; i < calcDistsTotals[0].size(); i++) { //initialize sums to zero. + vector temp; + for (int j = 0; j < calcDistsTotals[0][i].size(); j++) { + seqDist tempDist; + tempDist.seq1 = calcDistsTotals[0][i][j].seq1; + tempDist.seq2 = calcDistsTotals[0][i][j].seq2; + tempDist.dist = 0.0; + temp.push_back(tempDist); + } + stdDev.push_back(temp); + } + + for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each + for (int i = 0; i < stdDev.size(); i++) { + for (int j = 0; j < stdDev[i].size(); j++) { + stdDev[i][j].dist += ((calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist) * (calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist)); + } + } + } + + for (int i = 0; i < stdDev.size(); i++) { //finds average. + for (int j = 0; j < stdDev[i].size(); j++) { + stdDev[i][j].dist /= (float) calcDistsTotals.size(); + stdDev[i][j].dist = sqrt(stdDev[i][j].dist); + } + } + + return stdDev; + } + catch(exception& e) { + errorOut(e, "MothurOut", "getAverages"); + exit(1); + } +} +/**************************************************************************************************/ +vector< vector > MothurOut::getStandardDeviation(vector< vector< vector > >& calcDistsTotals, vector< vector >& calcAverages) { + try{ + //find standard deviation + vector< vector > stdDev; + for (int i = 0; i < calcDistsTotals[0].size(); i++) { //initialize sums to zero. + vector temp; + for (int j = 0; j < calcDistsTotals[0][i].size(); j++) { + seqDist tempDist; + tempDist.seq1 = calcDistsTotals[0][i][j].seq1; + tempDist.seq2 = calcDistsTotals[0][i][j].seq2; + tempDist.dist = 0.0; + temp.push_back(tempDist); + } + stdDev.push_back(temp); + } + + for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each + for (int i = 0; i < stdDev.size(); i++) { + for (int j = 0; j < stdDev[i].size(); j++) { + stdDev[i][j].dist += ((calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist) * (calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist)); + } + } + } + + for (int i = 0; i < stdDev.size(); i++) { //finds average. + for (int j = 0; j < stdDev[i].size(); j++) { + stdDev[i][j].dist /= (float) calcDistsTotals.size(); + stdDev[i][j].dist = sqrt(stdDev[i][j].dist); + } + } + + return stdDev; + } + catch(exception& e) { + errorOut(e, "MothurOut", "getAverages"); + exit(1); + } +} + /**************************************************************************************************/ bool MothurOut::isContainingOnlyDigits(string input) { try{