From e6c4a7223199d800d496356604eb34dfe273673d Mon Sep 17 00:00:00 2001 From: westcott Date: Fri, 21 Jan 2011 12:54:26 +0000 Subject: [PATCH] working on nmds --- linearalgebra.cpp | 54 ++++++++++++++++++++++++++++------- nmdscommand.cpp | 71 ++++++++++++++++++++++++++++++++--------------- pcacommand.cpp | 6 ++-- pcoacommand.cpp | 6 ++-- 4 files changed, 99 insertions(+), 38 deletions(-) diff --git a/linearalgebra.cpp b/linearalgebra.cpp index 6b56597..39e0848 100644 --- a/linearalgebra.cpp +++ b/linearalgebra.cpp @@ -329,45 +329,79 @@ vector< vector > LinearAlgebra::calculateEuclidianDistance(vector< vecto double LinearAlgebra::calcPearson(vector< vector >& euclidDists, vector< vector >& userDists){ try { + /* euclidDists.clear(); + userDists.clear(); + + euclidDists.resize(1); + userDists.resize(1); + + userDists[0].push_back(0.3070833); + userDists[0].push_back(0.3244475); + userDists[0].push_back(0.6055993); + userDists[0].push_back(0.3372481); + userDists[0].push_back(0.9151715); + userDists[0].push_back(0.6182255); + userDists[0].push_back(0.7748142); + userDists[0].push_back(0.08554735); + userDists[0].push_back(0.6343481); + userDists[0].push_back(0.4049274); + + euclidDists[0].push_back(0.3342815); + euclidDists[0].push_back(0.3173829); + euclidDists[0].push_back(0.6852404); + euclidDists[0].push_back(0.7819186); + euclidDists[0].push_back(0.5705242); + euclidDists[0].push_back(0.8007263); + euclidDists[0].push_back(0.8561724); + euclidDists[0].push_back(0.4901089); + euclidDists[0].push_back(0.7027247); + euclidDists[0].push_back(0.7669696);*/ + + //find average for - X + int count = 0; vector averageEuclid; averageEuclid.resize(euclidDists.size(), 0.0); for (int i = 0; i < euclidDists.size(); i++) { for (int j = 0; j < euclidDists[i].size(); j++) { averageEuclid[i] += euclidDists[i][j]; + count++; } } - for (int i = 0; i < averageEuclid.size(); i++) { averageEuclid[i] = averageEuclid[i] / (float) euclidDists.size(); } - + for (int i = 0; i < averageEuclid.size(); i++) { averageEuclid[i] = averageEuclid[i] / (float) count; } + //find average for - Y + count = 0; vector averageUser; averageUser.resize(userDists.size(), 0.0); for (int i = 0; i < userDists.size(); i++) { for (int j = 0; j < userDists[i].size(); j++) { - averageUser[i] += userDists[i][j]; + averageUser[i] += userDists[i][j]; + count++; } } - for (int i = 0; i < averageUser.size(); i++) { averageUser[i] = averageUser[i] / (float) userDists.size(); } - + for (int i = 0; i < averageUser.size(); i++) { averageUser[i] = averageUser[i] / (float) count; } + double numerator = 0.0; double denomTerm1 = 0.0; double denomTerm2 = 0.0; for (int i = 0; i < euclidDists.size(); i++) { - for (int k = 0; k < i; k++) { + for (int k = 0; k < euclidDists[i].size(); k++) { float Yi = userDists[i][k]; float Xi = euclidDists[i][k]; - numerator += ((Xi - averageEuclid[k]) * (Yi - averageUser[k])); - denomTerm1 += ((Xi - averageEuclid[k]) * (Xi - averageEuclid[k])); - denomTerm2 += ((Yi - averageUser[k]) * (Yi - averageUser[k])); + numerator += ((Xi - averageEuclid[i]) * (Yi - averageUser[i])); + denomTerm1 += ((Xi - averageEuclid[i]) * (Xi - averageEuclid[i])); + denomTerm2 += ((Yi - averageUser[i]) * (Yi - averageUser[i])); } } double denom = (sqrt(denomTerm1) * sqrt(denomTerm2)); double r = numerator / denom; - + return r; + } catch(exception& e) { m->errorOut(e, "LinearAlgebra", "calculateEuclidianDistance"); diff --git a/nmdscommand.cpp b/nmdscommand.cpp index cfd70c2..4adb57c 100644 --- a/nmdscommand.cpp +++ b/nmdscommand.cpp @@ -30,6 +30,7 @@ NMDSCommand::NMDSCommand(){ vector tempOutNames; outputTypes["nmds"] = tempOutNames; outputTypes["stress"] = tempOutNames; + outputTypes["iters"] = tempOutNames; } catch(exception& e) { m->errorOut(e, "NMDSCommand", "NMDSCommand"); @@ -108,6 +109,7 @@ NMDSCommand::NMDSCommand(string option) { //initialize outputTypes vector tempOutNames; outputTypes["nmds"] = tempOutNames; + outputTypes["iters"] = tempOutNames; outputTypes["stress"] = tempOutNames; //required parameters @@ -193,25 +195,28 @@ int NMDSCommand::execute(){ vector< vector > axes; if (axesfile != "") { axes = readAxes(names); } + string outputFileName = outputDir + m->getRootName(m->getSimpleName(phylipfile)) + "nmds.iters"; + string stressFileName = outputDir + m->getRootName(m->getSimpleName(phylipfile)) + "stress.nmds"; + outputNames.push_back(outputFileName); outputTypes["iters"].push_back(outputFileName); + outputNames.push_back(stressFileName); outputTypes["stress"].push_back(stressFileName); + + ofstream out, out2; + m->openOutputFile(outputFileName, out); + m->openOutputFile(stressFileName, out2); + + out2.setf(ios::fixed, ios::floatfield); + out2.setf(ios::showpoint); + out.setf(ios::fixed, ios::floatfield); + out.setf(ios::showpoint); + + out2 << "Dimension\tIter\tStress\tCorr" << endl; + + double bestStress = 10000000; + vector< vector > bestConfig; + for (int i = mindim; i <= maxdim; i++) { m->mothurOut("Processing Dimension: " + toString(i)); m->mothurOutEndLine(); - string outputFileName = outputDir + m->getRootName(m->getSimpleName(phylipfile)) + "dim" + toString(i) + ".nmds"; - string stressFileName = outputDir + m->getRootName(m->getSimpleName(phylipfile)) + "dim" + toString(i) + ".stress.nmds"; - outputNames.push_back(outputFileName); outputTypes["nmds"].push_back(outputFileName); - outputNames.push_back(stressFileName); outputTypes["stress"].push_back(stressFileName); - - ofstream out, out2; - m->openOutputFile(outputFileName, out); - m->openOutputFile(stressFileName, out2); - - out2.setf(ios::fixed, ios::floatfield); - out2.setf(ios::showpoint); - out.setf(ios::fixed, ios::floatfield); - out.setf(ios::showpoint); - - out2 << "Iter\tStress\tCorr" << endl; - for (int j = 0; j < iters; j++) { m->mothurOut(toString(j+1)); m->mothurOutEndLine(); @@ -231,25 +236,47 @@ int NMDSCommand::execute(){ if (m->control_pressed) { out.close(); out2.close(); for (int k = 0; k < outputNames.size(); k++) { remove(outputNames[k].c_str()); } return 0; } //calc correlation between original distances and euclidean distances from this config - double corr = linearCalc.calcPearson(matrix, newEuclid); + double corr = linearCalc.calcPearson(newEuclid, matrix); corr *= corr; if (m->control_pressed) { out.close(); out2.close(); for (int k = 0; k < outputNames.size(); k++) { remove(outputNames[k].c_str()); } return 0; } //output results out << "Config" << (j+1) << '\t'; - for (int k = 0; k < i; k++) { out << "X" << (k+1) << '\t'; } + for (int k = 0; k < i; k++) { out << "axis" << (k+1) << '\t'; } out << endl; - out2 << (j+1) << '\t' << stress << '\t' << corr << endl; + out2 << i << '\t' << (j+1) << '\t' << stress << '\t' << corr << endl; output(endConfig, names, out); + //save best + if (stress < bestStress) { + bestStress = stress; + bestConfig = endConfig; + } + if (m->control_pressed) { out.close(); out2.close(); for (int k = 0; k < outputNames.size(); k++) { remove(outputNames[k].c_str()); } return 0; } - } - - out.close(); out2.close(); } + out.close(); out2.close(); + + //output best config + string BestFileName = outputDir + m->getRootName(m->getSimpleName(phylipfile)) + "nmds.axes"; + outputNames.push_back(BestFileName); outputTypes["nmds"].push_back(BestFileName); + + ofstream outBest; + m->openOutputFile(BestFileName, outBest); + outBest.setf(ios::fixed, ios::floatfield); + outBest.setf(ios::showpoint); + + outBest << '\t'; + for (int k = 0; k < bestConfig.size(); k++) { outBest << "axis" << (k+1) << '\t'; } + outBest << endl; + + output(bestConfig, names, outBest); + + outBest.close(); + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } m->mothurOutEndLine(); diff --git a/pcacommand.cpp b/pcacommand.cpp index 0842c53..89e1cb7 100644 --- a/pcacommand.cpp +++ b/pcacommand.cpp @@ -364,11 +364,11 @@ void PCACommand::output(string fnameRoot, vector name_list, vector name_list, vector