From 20071b183e619c122bf9f63b4bb42722507c4e4a Mon Sep 17 00:00:00 2001 From: Sarah Westcott Date: Fri, 18 Jan 2013 08:50:25 -0500 Subject: [PATCH] fixed seq. error align=f issue. added std and ave calcs to mothurout. working on unifrac bug report --- mothurout.cpp | 73 ++++++++++++++++++++++++++++++++++++ mothurout.h | 3 ++ refchimeratest.cpp | 3 +- seqerrorcommand.cpp | 7 +++- unifracunweightedcommand.cpp | 35 +++++++++++++---- 5 files changed, 110 insertions(+), 11 deletions(-) diff --git a/mothurout.cpp b/mothurout.cpp index 468c063..240e7ec 100644 --- a/mothurout.cpp +++ b/mothurout.cpp @@ -2925,6 +2925,79 @@ bool MothurOut::checkReleaseVersion(ifstream& file, string version) { } } /**************************************************************************************************/ +vector MothurOut::getAverages(vector< vector >& dists) { + try{ + vector averages; //averages.resize(numComp, 0.0); + for (int i = 0; i < dists[0].size(); i++) { averages.push_back(0.0); } + + for (int thisIter = 0; thisIter < dists.size(); thisIter++) { + for (int i = 0; i < dists[thisIter].size(); i++) { + averages[i] += dists[thisIter][i]; + } + } + + //finds average. + for (int i = 0; i < averages.size(); i++) { averages[i] /= (double) dists.size(); } + + return averages; + } + catch(exception& e) { + errorOut(e, "MothurOut", "getAverages"); + exit(1); + } +} +/**************************************************************************************************/ +vector MothurOut::getStandardDeviation(vector< vector >& dists) { + try{ + + vector averages = getAverages(dists); + + //find standard deviation + vector stdDev; //stdDev.resize(numComp, 0.0); + for (int i = 0; i < dists[0].size(); i++) { stdDev.push_back(0.0); } + + for (int thisIter = 0; thisIter < dists.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each + for (int j = 0; j < dists[thisIter].size(); j++) { + stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j])); + } + } + for (int i = 0; i < stdDev.size(); i++) { + stdDev[i] /= (double) dists.size(); + stdDev[i] = sqrt(stdDev[i]); + } + + return stdDev; + } + catch(exception& e) { + errorOut(e, "MothurOut", "getAverages"); + exit(1); + } +} +/**************************************************************************************************/ +vector MothurOut::getStandardDeviation(vector< vector >& dists, vector& averages) { + try{ + //find standard deviation + vector stdDev; //stdDev.resize(numComp, 0.0); + for (int i = 0; i < dists[0].size(); i++) { stdDev.push_back(0.0); } + + for (int thisIter = 0; thisIter < dists.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each + for (int j = 0; j < dists[thisIter].size(); j++) { + stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j])); + } + } + for (int i = 0; i < stdDev.size(); i++) { + stdDev[i] /= (double) dists.size(); + stdDev[i] = sqrt(stdDev[i]); + } + + return stdDev; + } + catch(exception& e) { + errorOut(e, "MothurOut", "getAverages"); + exit(1); + } +} +/**************************************************************************************************/ bool MothurOut::isContainingOnlyDigits(string input) { try{ diff --git a/mothurout.h b/mothurout.h index 0d2e86f..84f9be3 100644 --- a/mothurout.h +++ b/mothurout.h @@ -155,6 +155,9 @@ class MothurOut { unsigned int fromBase36(string); int getRandomIndex(int); //highest double getStandardDeviation(vector&); + vector getStandardDeviation(vector< vector >&); + vector getStandardDeviation(vector< vector >&, vector&); + vector getAverages(vector< vector >&); int control_pressed; bool executing, runParse, jumble, gui, mothurCalling, debug; diff --git a/refchimeratest.cpp b/refchimeratest.cpp index 701fb9e..c084bff 100644 --- a/refchimeratest.cpp +++ b/refchimeratest.cpp @@ -24,7 +24,8 @@ RefChimeraTest::RefChimeraTest(vector& refs, bool aligned) : aligned(a referenceSeqs.resize(numRefSeqs); referenceNames.resize(numRefSeqs); for(int i=0;imothurOut("we are ignoring the report file if your sequences are not aligned. we will check that the sequences in your fasta and and qual fileare the same length."); + m->mothurOut("we are ignoring the report file if your sequences are not aligned. we will check that the sequences in your fasta and and qual file are the same length."); m->mothurOutEndLine(); } } @@ -759,7 +759,10 @@ int SeqErrorCommand::driver(string filename, string qFileName, string rFileName, int numParentSeqs = -1; int closestRefIndex = -1; - numParentSeqs = chimeraTest.analyzeQuery(query.getName(), query.getAligned(), outChimeraReport); + string querySeq = query.getAligned(); + if (!aligned) { querySeq = query.getUnaligned(); } + + numParentSeqs = chimeraTest.analyzeQuery(query.getName(), querySeq, outChimeraReport); closestRefIndex = chimeraTest.getClosestRefIndex(); diff --git a/unifracunweightedcommand.cpp b/unifracunweightedcommand.cpp index 5f3cffc..19a2ece 100644 --- a/unifracunweightedcommand.cpp +++ b/unifracunweightedcommand.cpp @@ -481,20 +481,29 @@ int UnifracUnweightedCommand::execute() { int UnifracUnweightedCommand::getAverageSTDMatrices(vector< vector >& dists, int treeNum) { try { //we need to find the average distance and standard deviation for each groups distance - //finds sum - vector averages; averages.resize(numComp, 0); + vector averages; //averages.resize(numComp, 0.0); + for (int i = 0; i < numComp; i++) { averages.push_back(0.0); } + + if (m->debug) { m->mothurOut("[DEBUG]: numcomparisons = " + toString(numComp) + ", subsampleIters = " + toString(subsampleIters) + "\n"); } + for (int thisIter = 0; thisIter < subsampleIters; thisIter++) { for (int i = 0; i < dists[thisIter].size(); i++) { averages[i] += dists[thisIter][i]; } } + if (m->debug) { m->mothurOut("[DEBUG]: numcomparisons = " + toString(numComp) + ", subsampleIters = " + toString(subsampleIters) + "\n"); } + //finds average. - for (int i = 0; i < averages.size(); i++) { averages[i] /= (float) subsampleIters; } + for (int i = 0; i < averages.size(); i++) { + averages[i] /= (float) subsampleIters; + if (m->debug) { m->mothurOut("[DEBUG]: i = " + toString(i) + ", averages[i] = " + toString(averages[i]) + "\n"); } + } //find standard deviation - vector stdDev; stdDev.resize(numComp, 0); + vector stdDev; //stdDev.resize(numComp, 0.0); + for (int i = 0; i < numComp; i++) { stdDev.push_back(0.0); } for (int thisIter = 0; thisIter < subsampleIters; thisIter++) { //compute the difference of each dist from the mean, and square the result of each for (int j = 0; j < dists[thisIter].size(); j++) { @@ -504,20 +513,28 @@ int UnifracUnweightedCommand::getAverageSTDMatrices(vector< vector >& di for (int i = 0; i < stdDev.size(); i++) { stdDev[i] /= (float) subsampleIters; stdDev[i] = sqrt(stdDev[i]); + if (m->debug) { m->mothurOut("[DEBUG]: i = " + toString(i) + ", stdDev[i] = " + toString(stdDev[i]) + "\n"); } } //make matrix with scores in it - vector< vector > avedists; avedists.resize(m->getNumGroups()); + vector< vector > avedists; //avedists.resize(m->getNumGroups()); for (int i = 0; i < m->getNumGroups(); i++) { - avedists[i].resize(m->getNumGroups(), 0.0); + vector temp; + for (int j = 0; j < m->getNumGroups(); j++) { temp.push_back(0.0); } + avedists.push_back(temp); } //make matrix with scores in it - vector< vector > stddists; stddists.resize(m->getNumGroups()); + vector< vector > stddists; //stddists.resize(m->getNumGroups()); for (int i = 0; i < m->getNumGroups(); i++) { - stddists[i].resize(m->getNumGroups(), 0.0); + vector temp; + for (int j = 0; j < m->getNumGroups(); j++) { temp.push_back(0.0); } + //stddists[i].resize(m->getNumGroups(), 0.0); + stddists.push_back(temp); } + if (m->debug) { m->mothurOut("[DEBUG]: about to fill matrix.\n"); } + //flip it so you can print it int count = 0; for (int r=0; rgetNumGroups(); r++) { @@ -530,6 +547,8 @@ int UnifracUnweightedCommand::getAverageSTDMatrices(vector< vector >& di } } + if (m->debug) { m->mothurOut("[DEBUG]: done filling matrix.\n"); } + map variables; variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(treefile)); variables["[tag]"] = toString(treeNum+1); -- 2.39.2