else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
if (pairDone) {
+ checkName(firstCol);
//are there confidence scores, if so remove them
if (secondCol.find_first_of('(') != -1) { removeConfidences(secondCol); }
- taxMap[firstCol] = secondCol;
- if (debug) { mothurOut("[DEBUG]: name = '" + firstCol + "' tax = '" + secondCol + "'\n"); }
+ map<string, string>::iterator itTax = taxMap.find(firstCol);
+
+ if(itTax == taxMap.end()) {
+ bool ignore = false;
+ if (secondCol != "") { if (secondCol[secondCol.length()-1] != ';') { mothurOut("[ERROR]: " + firstCol + " is missing the final ';', ignoring.\n"); ignore=true; }
+ }
+ if (!ignore) { taxMap[firstCol] = secondCol; }
+ if (debug) { mothurOut("[DEBUG]: name = '" + firstCol + "' tax = '" + secondCol + "'\n"); }
+ }else {
+ mothurOut("[ERROR]: " + firstCol + " is already in your taxonomy file, names must be unique./n"); control_pressed = true;
+ }
pairDone = false;
}
}
else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
if (pairDone) {
+ checkName(firstCol);
//are there confidence scores, if so remove them
if (secondCol.find_first_of('(') != -1) { removeConfidences(secondCol); }
- taxMap[firstCol] = secondCol;
- if (debug) { mothurOut("[DEBUG]: name = '" + firstCol + "' tax = '" + secondCol + "'\n"); }
+ map<string, string>::iterator itTax = taxMap.find(firstCol);
+
+ if(itTax == taxMap.end()) {
+ bool ignore = false;
+ if (secondCol != "") { if (secondCol[secondCol.length()-1] != ';') { mothurOut("[ERROR]: " + firstCol + " is missing the final ';', ignoring.\n"); ignore=true; }
+ }
+ if (!ignore) { taxMap[firstCol] = secondCol; }
+ if (debug) { mothurOut("[DEBUG]: name = '" + firstCol + "' tax = '" + secondCol + "'\n"); }
+ }else {
+ mothurOut("[ERROR]: " + firstCol + " is already in your taxonomy file, names must be unique./n"); control_pressed = true;
+ }
+
pairDone = false;
}
}
else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
if (pairDone) {
+ checkName(firstCol);
+ checkName(secondCol);
+
//parse names into vector
vector<string> theseNames;
splitAtComma(secondCol, theseNames);
else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
if (pairDone) {
+ checkName(firstCol);
+ checkName(secondCol);
+
//parse names into vector
vector<string> theseNames;
splitAtComma(secondCol, theseNames);
- for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = firstCol; }
+ for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = firstCol; }
pairDone = false;
}
}
else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
if (pairDone) {
+ checkName(firstCol);
+ checkName(secondCol);
nameMap[secondCol] = firstCol;
pairDone = false;
}
else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
if (pairDone) {
+ checkName(firstCol);
+ checkName(secondCol);
nameMap[secondCol] = firstCol;
pairDone = false;
}
else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
if (pairDone) {
+ checkName(firstCol);
+ checkName(secondCol);
//parse names into vector
vector<string> theseNames;
splitAtComma(secondCol, theseNames);
else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
if (pairDone) {
+ checkName(firstCol);
+ checkName(secondCol);
//parse names into vector
vector<string> theseNames;
splitAtComma(secondCol, theseNames);
if (columnOne) { firstCol = pieces[i]; columnOne=false; }
else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
- if (pairDone) { nameMap[firstCol] = secondCol; pairDone = false; }
+ if (pairDone) {
+ checkName(firstCol);
+ checkName(secondCol);
+ nameMap[firstCol] = secondCol; pairDone = false; }
}
}
in.close();
if (columnOne) { firstCol = pieces[i]; columnOne=false; }
else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
- if (pairDone) { nameMap[firstCol] = secondCol; pairDone = false; }
+ if (pairDone) {
+ checkName(firstCol);
+ checkName(secondCol);
+ nameMap[firstCol] = secondCol; pairDone = false; }
}
}
else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
if (pairDone) {
+ checkName(firstCol);
+ checkName(secondCol);
vector<string> temp;
splitAtComma(secondCol, temp);
nameMap[firstCol] = temp;
else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
if (pairDone) {
+ checkName(firstCol);
+ checkName(secondCol);
vector<string> temp;
splitAtComma(secondCol, temp);
nameMap[firstCol] = temp;
else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
if (pairDone) {
+ checkName(firstCol);
+ checkName(secondCol);
+ int num = getNumNames(secondCol);
+ nameMap[firstCol] = num;
+ pairDone = false;
+ }
+ }
+ }
+ in.close();
+
+ if (rest != "") {
+ vector<string> pieces = splitWhiteSpace(rest);
+ for (int i = 0; i < pieces.size(); i++) {
+ if (columnOne) { firstCol = pieces[i]; columnOne=false; }
+ else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
+
+ if (pairDone) {
+ checkName(firstCol);
+ checkName(secondCol);
+ int num = getNumNames(secondCol);
+ nameMap[firstCol] = num;
+ pairDone = false;
+ }
+ }
+ }
+
+ return nameMap;
+
+ }
+ catch(exception& e) {
+ errorOut(e, "MothurOut", "readNames");
+ exit(1);
+ }
+}
+/**********************************************************************************************************************/
+map<string, int> MothurOut::readNames(string namefile, unsigned long int& numSeqs) {
+ try {
+ map<string, int> nameMap;
+ numSeqs = 0;
+
+ //open input file
+ ifstream in;
+ openInputFile(namefile, in);
+
+ string rest = "";
+ char buffer[4096];
+ bool pairDone = false;
+ bool columnOne = true;
+ string firstCol, secondCol;
+
+ while (!in.eof()) {
+ if (control_pressed) { break; }
+
+ in.read(buffer, 4096);
+ vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
+
+ for (int i = 0; i < pieces.size(); i++) {
+ if (columnOne) { firstCol = pieces[i]; columnOne=false; }
+ else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
+
+ if (pairDone) {
+ checkName(firstCol);
+ checkName(secondCol);
int num = getNumNames(secondCol);
nameMap[firstCol] = num;
pairDone = false;
+ numSeqs += num;
}
}
}
else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
if (pairDone) {
+ checkName(firstCol);
+ checkName(secondCol);
int num = getNumNames(secondCol);
nameMap[firstCol] = num;
pairDone = false;
+ numSeqs += num;
}
}
}
exit(1);
}
}
+/************************************************************/
+int MothurOut::checkName(string& name) {
+ try {
+ for (int i = 0; i < name.length(); i++) {
+ if (name[i] == ':') { name[i] = '_'; changedSeqNames = true; }
+ }
+ return 0;
+ }
+ catch(exception& e) {
+ errorOut(e, "MothurOut", "checkName");
+ exit(1);
+ }
+}
/**********************************************************************************************************************/
int MothurOut::readNames(string namefile, vector<seqPriorityNode>& nameVector, map<string, string>& fastamap) {
try {
else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
if (pairDone) {
+ checkName(firstCol);
+ checkName(secondCol);
int num = getNumNames(secondCol);
map<string, string>::iterator it = fastamap.find(firstCol);
else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
if (pairDone) {
+ checkName(firstCol);
+ checkName(secondCol);
int num = getNumNames(secondCol);
map<string, string>::iterator it = fastamap.find(firstCol);
in.read(buffer, 4096);
vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
- for (int i = 0; i < pieces.size(); i++) { names.insert(pieces[i]); }
+ for (int i = 0; i < pieces.size(); i++) { checkName(pieces[i]); names.insert(pieces[i]); }
}
in.close();
if (rest != "") {
vector<string> pieces = splitWhiteSpace(rest);
- for (int i = 0; i < pieces.size(); i++) { names.insert(pieces[i]); }
+ for (int i = 0; i < pieces.size(); i++) { checkName(pieces[i]); names.insert(pieces[i]); }
}
return names;
}
in.read(buffer, 4096);
vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
- for (int i = 0; i < pieces.size(); i++) { names.push_back(pieces[i]); }
+ for (int i = 0; i < pieces.size(); i++) { checkName(pieces[i]); names.push_back(pieces[i]); }
}
in.close();
if (rest != "") {
vector<string> pieces = splitWhiteSpace(rest);
- for (int i = 0; i < pieces.size(); i++) { names.push_back(pieces[i]); }
+ for (int i = 0; i < pieces.size(); i++) { checkName(pieces[i]); names.push_back(pieces[i]); }
}
return 0;
exit(1);
}
}
+/**************************************************************************************************/
+vector<double> MothurOut::getAverages(vector< vector<double> >& dists) {
+ try{
+ vector<double> averages; //averages.resize(numComp, 0.0);
+ for (int i = 0; i < dists[0].size(); i++) { averages.push_back(0.0); }
+
+ for (int thisIter = 0; thisIter < dists.size(); thisIter++) {
+ for (int i = 0; i < dists[thisIter].size(); i++) {
+ averages[i] += dists[thisIter][i];
+ }
+ }
+
+ //finds average.
+ for (int i = 0; i < averages.size(); i++) { averages[i] /= (double) dists.size(); }
+
+ return averages;
+ }
+ catch(exception& e) {
+ errorOut(e, "MothurOut", "getAverages");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+vector<double> MothurOut::getStandardDeviation(vector< vector<double> >& dists) {
+ try{
+
+ vector<double> averages = getAverages(dists);
+
+ //find standard deviation
+ vector<double> stdDev; //stdDev.resize(numComp, 0.0);
+ for (int i = 0; i < dists[0].size(); i++) { stdDev.push_back(0.0); }
+
+ for (int thisIter = 0; thisIter < dists.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each
+ for (int j = 0; j < dists[thisIter].size(); j++) {
+ stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j]));
+ }
+ }
+ for (int i = 0; i < stdDev.size(); i++) {
+ stdDev[i] /= (double) dists.size();
+ stdDev[i] = sqrt(stdDev[i]);
+ }
+
+ return stdDev;
+ }
+ catch(exception& e) {
+ errorOut(e, "MothurOut", "getAverages");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+vector<double> MothurOut::getStandardDeviation(vector< vector<double> >& dists, vector<double>& averages) {
+ try{
+ //find standard deviation
+ vector<double> stdDev; //stdDev.resize(numComp, 0.0);
+ for (int i = 0; i < dists[0].size(); i++) { stdDev.push_back(0.0); }
+
+ for (int thisIter = 0; thisIter < dists.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each
+ for (int j = 0; j < dists[thisIter].size(); j++) {
+ stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j]));
+ }
+ }
+ for (int i = 0; i < stdDev.size(); i++) {
+ stdDev[i] /= (double) dists.size();
+ stdDev[i] = sqrt(stdDev[i]);
+ }
+
+ return stdDev;
+ }
+ catch(exception& e) {
+ errorOut(e, "MothurOut", "getAverages");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+vector< vector<seqDist> > MothurOut::getAverages(vector< vector< vector<seqDist> > >& calcDistsTotals, string mode) {
+ try{
+
+ vector< vector<seqDist> > calcAverages; //calcAverages.resize(calcDistsTotals[0].size());
+ for (int i = 0; i < calcDistsTotals[0].size(); i++) { //initialize sums to zero.
+ //calcAverages[i].resize(calcDistsTotals[0][i].size());
+ vector<seqDist> temp;
+ for (int j = 0; j < calcDistsTotals[0][i].size(); j++) {
+ seqDist tempDist;
+ tempDist.seq1 = calcDistsTotals[0][i][j].seq1;
+ tempDist.seq2 = calcDistsTotals[0][i][j].seq2;
+ tempDist.dist = 0.0;
+ temp.push_back(tempDist);
+ }
+ calcAverages.push_back(temp);
+ }
+
+ if (mode == "average") {
+ for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //sum all groups dists for each calculator
+ for (int i = 0; i < calcAverages.size(); i++) { //initialize sums to zero.
+ for (int j = 0; j < calcAverages[i].size(); j++) {
+ calcAverages[i][j].dist += calcDistsTotals[thisIter][i][j].dist;
+ }
+ }
+ }
+
+ for (int i = 0; i < calcAverages.size(); i++) { //finds average.
+ for (int j = 0; j < calcAverages[i].size(); j++) {
+ calcAverages[i][j].dist /= (float) calcDistsTotals.size();
+ }
+ }
+ }else { //find median
+ for (int i = 0; i < calcAverages.size(); i++) { //for each calc
+ for (int j = 0; j < calcAverages[i].size(); j++) { //for each comparison
+ vector<double> dists;
+ for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //for each subsample
+ dists.push_back(calcDistsTotals[thisIter][i][j].dist);
+ }
+ sort(dists.begin(), dists.end());
+ calcAverages[i][j].dist = dists[(calcDistsTotals.size()/2)];
+ }
+ }
+ }
+
+ return calcAverages;
+ }
+ catch(exception& e) {
+ errorOut(e, "MothurOut", "getAverages");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+vector< vector<seqDist> > MothurOut::getAverages(vector< vector< vector<seqDist> > >& calcDistsTotals) {
+ try{
+
+ vector< vector<seqDist> > calcAverages; //calcAverages.resize(calcDistsTotals[0].size());
+ for (int i = 0; i < calcDistsTotals[0].size(); i++) { //initialize sums to zero.
+ //calcAverages[i].resize(calcDistsTotals[0][i].size());
+ vector<seqDist> temp;
+ for (int j = 0; j < calcDistsTotals[0][i].size(); j++) {
+ seqDist tempDist;
+ tempDist.seq1 = calcDistsTotals[0][i][j].seq1;
+ tempDist.seq2 = calcDistsTotals[0][i][j].seq2;
+ tempDist.dist = 0.0;
+ temp.push_back(tempDist);
+ }
+ calcAverages.push_back(temp);
+ }
+
+
+ for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //sum all groups dists for each calculator
+ for (int i = 0; i < calcAverages.size(); i++) { //initialize sums to zero.
+ for (int j = 0; j < calcAverages[i].size(); j++) {
+ calcAverages[i][j].dist += calcDistsTotals[thisIter][i][j].dist;
+ }
+ }
+ }
+
+ for (int i = 0; i < calcAverages.size(); i++) { //finds average.
+ for (int j = 0; j < calcAverages[i].size(); j++) {
+ calcAverages[i][j].dist /= (float) calcDistsTotals.size();
+ }
+ }
+
+ return calcAverages;
+ }
+ catch(exception& e) {
+ errorOut(e, "MothurOut", "getAverages");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+vector< vector<seqDist> > MothurOut::getStandardDeviation(vector< vector< vector<seqDist> > >& calcDistsTotals) {
+ try{
+
+ vector< vector<seqDist> > calcAverages = getAverages(calcDistsTotals);
+
+ //find standard deviation
+ vector< vector<seqDist> > stdDev;
+ for (int i = 0; i < calcDistsTotals[0].size(); i++) { //initialize sums to zero.
+ vector<seqDist> temp;
+ for (int j = 0; j < calcDistsTotals[0][i].size(); j++) {
+ seqDist tempDist;
+ tempDist.seq1 = calcDistsTotals[0][i][j].seq1;
+ tempDist.seq2 = calcDistsTotals[0][i][j].seq2;
+ tempDist.dist = 0.0;
+ temp.push_back(tempDist);
+ }
+ stdDev.push_back(temp);
+ }
+
+ for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each
+ for (int i = 0; i < stdDev.size(); i++) {
+ for (int j = 0; j < stdDev[i].size(); j++) {
+ stdDev[i][j].dist += ((calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist) * (calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist));
+ }
+ }
+ }
+
+ for (int i = 0; i < stdDev.size(); i++) { //finds average.
+ for (int j = 0; j < stdDev[i].size(); j++) {
+ stdDev[i][j].dist /= (float) calcDistsTotals.size();
+ stdDev[i][j].dist = sqrt(stdDev[i][j].dist);
+ }
+ }
+
+ return stdDev;
+ }
+ catch(exception& e) {
+ errorOut(e, "MothurOut", "getAverages");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+vector< vector<seqDist> > MothurOut::getStandardDeviation(vector< vector< vector<seqDist> > >& calcDistsTotals, vector< vector<seqDist> >& calcAverages) {
+ try{
+ //find standard deviation
+ vector< vector<seqDist> > stdDev;
+ for (int i = 0; i < calcDistsTotals[0].size(); i++) { //initialize sums to zero.
+ vector<seqDist> temp;
+ for (int j = 0; j < calcDistsTotals[0][i].size(); j++) {
+ seqDist tempDist;
+ tempDist.seq1 = calcDistsTotals[0][i][j].seq1;
+ tempDist.seq2 = calcDistsTotals[0][i][j].seq2;
+ tempDist.dist = 0.0;
+ temp.push_back(tempDist);
+ }
+ stdDev.push_back(temp);
+ }
+
+ for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each
+ for (int i = 0; i < stdDev.size(); i++) {
+ for (int j = 0; j < stdDev[i].size(); j++) {
+ stdDev[i][j].dist += ((calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist) * (calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist));
+ }
+ }
+ }
+
+ for (int i = 0; i < stdDev.size(); i++) { //finds average.
+ for (int j = 0; j < stdDev[i].size(); j++) {
+ stdDev[i][j].dist /= (float) calcDistsTotals.size();
+ stdDev[i][j].dist = sqrt(stdDev[i][j].dist);
+ }
+ }
+
+ return stdDev;
+ }
+ catch(exception& e) {
+ errorOut(e, "MothurOut", "getAverages");
+ exit(1);
+ }
+}
+
/**************************************************************************************************/
bool MothurOut::isContainingOnlyDigits(string input) {
try{