double LinearAlgebra::calcPearson(vector< vector<double> >& euclidDists, vector< vector<double> >& userDists){
try {
+ /* euclidDists.clear();
+ userDists.clear();
+
+ euclidDists.resize(1);
+ userDists.resize(1);
+
+ userDists[0].push_back(0.3070833);
+ userDists[0].push_back(0.3244475);
+ userDists[0].push_back(0.6055993);
+ userDists[0].push_back(0.3372481);
+ userDists[0].push_back(0.9151715);
+ userDists[0].push_back(0.6182255);
+ userDists[0].push_back(0.7748142);
+ userDists[0].push_back(0.08554735);
+ userDists[0].push_back(0.6343481);
+ userDists[0].push_back(0.4049274);
+
+ euclidDists[0].push_back(0.3342815);
+ euclidDists[0].push_back(0.3173829);
+ euclidDists[0].push_back(0.6852404);
+ euclidDists[0].push_back(0.7819186);
+ euclidDists[0].push_back(0.5705242);
+ euclidDists[0].push_back(0.8007263);
+ euclidDists[0].push_back(0.8561724);
+ euclidDists[0].push_back(0.4901089);
+ euclidDists[0].push_back(0.7027247);
+ euclidDists[0].push_back(0.7669696);*/
+
+
//find average for - X
+ int count = 0;
vector<float> averageEuclid; averageEuclid.resize(euclidDists.size(), 0.0);
for (int i = 0; i < euclidDists.size(); i++) {
for (int j = 0; j < euclidDists[i].size(); j++) {
averageEuclid[i] += euclidDists[i][j];
+ count++;
}
}
- for (int i = 0; i < averageEuclid.size(); i++) { averageEuclid[i] = averageEuclid[i] / (float) euclidDists.size(); }
-
+ for (int i = 0; i < averageEuclid.size(); i++) { averageEuclid[i] = averageEuclid[i] / (float) count; }
+
//find average for - Y
+ count = 0;
vector<float> averageUser; averageUser.resize(userDists.size(), 0.0);
for (int i = 0; i < userDists.size(); i++) {
for (int j = 0; j < userDists[i].size(); j++) {
- averageUser[i] += userDists[i][j];
+ averageUser[i] += userDists[i][j];
+ count++;
}
}
- for (int i = 0; i < averageUser.size(); i++) { averageUser[i] = averageUser[i] / (float) userDists.size(); }
-
+ for (int i = 0; i < averageUser.size(); i++) { averageUser[i] = averageUser[i] / (float) count; }
+
double numerator = 0.0;
double denomTerm1 = 0.0;
double denomTerm2 = 0.0;
for (int i = 0; i < euclidDists.size(); i++) {
- for (int k = 0; k < i; k++) {
+ for (int k = 0; k < euclidDists[i].size(); k++) {
float Yi = userDists[i][k];
float Xi = euclidDists[i][k];
- numerator += ((Xi - averageEuclid[k]) * (Yi - averageUser[k]));
- denomTerm1 += ((Xi - averageEuclid[k]) * (Xi - averageEuclid[k]));
- denomTerm2 += ((Yi - averageUser[k]) * (Yi - averageUser[k]));
+ numerator += ((Xi - averageEuclid[i]) * (Yi - averageUser[i]));
+ denomTerm1 += ((Xi - averageEuclid[i]) * (Xi - averageEuclid[i]));
+ denomTerm2 += ((Yi - averageUser[i]) * (Yi - averageUser[i]));
}
}
double denom = (sqrt(denomTerm1) * sqrt(denomTerm2));
double r = numerator / denom;
-
+
return r;
+
}
catch(exception& e) {
m->errorOut(e, "LinearAlgebra", "calculateEuclidianDistance");
vector<string> tempOutNames;
outputTypes["nmds"] = tempOutNames;
outputTypes["stress"] = tempOutNames;
+ outputTypes["iters"] = tempOutNames;
}
catch(exception& e) {
m->errorOut(e, "NMDSCommand", "NMDSCommand");
//initialize outputTypes
vector<string> tempOutNames;
outputTypes["nmds"] = tempOutNames;
+ outputTypes["iters"] = tempOutNames;
outputTypes["stress"] = tempOutNames;
//required parameters
vector< vector<double> > axes;
if (axesfile != "") { axes = readAxes(names); }
+ string outputFileName = outputDir + m->getRootName(m->getSimpleName(phylipfile)) + "nmds.iters";
+ string stressFileName = outputDir + m->getRootName(m->getSimpleName(phylipfile)) + "stress.nmds";
+ outputNames.push_back(outputFileName); outputTypes["iters"].push_back(outputFileName);
+ outputNames.push_back(stressFileName); outputTypes["stress"].push_back(stressFileName);
+
+ ofstream out, out2;
+ m->openOutputFile(outputFileName, out);
+ m->openOutputFile(stressFileName, out2);
+
+ out2.setf(ios::fixed, ios::floatfield);
+ out2.setf(ios::showpoint);
+ out.setf(ios::fixed, ios::floatfield);
+ out.setf(ios::showpoint);
+
+ out2 << "Dimension\tIter\tStress\tCorr" << endl;
+
+ double bestStress = 10000000;
+ vector< vector<double> > bestConfig;
+
for (int i = mindim; i <= maxdim; i++) {
m->mothurOut("Processing Dimension: " + toString(i)); m->mothurOutEndLine();
- string outputFileName = outputDir + m->getRootName(m->getSimpleName(phylipfile)) + "dim" + toString(i) + ".nmds";
- string stressFileName = outputDir + m->getRootName(m->getSimpleName(phylipfile)) + "dim" + toString(i) + ".stress.nmds";
- outputNames.push_back(outputFileName); outputTypes["nmds"].push_back(outputFileName);
- outputNames.push_back(stressFileName); outputTypes["stress"].push_back(stressFileName);
-
- ofstream out, out2;
- m->openOutputFile(outputFileName, out);
- m->openOutputFile(stressFileName, out2);
-
- out2.setf(ios::fixed, ios::floatfield);
- out2.setf(ios::showpoint);
- out.setf(ios::fixed, ios::floatfield);
- out.setf(ios::showpoint);
-
- out2 << "Iter\tStress\tCorr" << endl;
-
for (int j = 0; j < iters; j++) {
m->mothurOut(toString(j+1)); m->mothurOutEndLine();
if (m->control_pressed) { out.close(); out2.close(); for (int k = 0; k < outputNames.size(); k++) { remove(outputNames[k].c_str()); } return 0; }
//calc correlation between original distances and euclidean distances from this config
- double corr = linearCalc.calcPearson(matrix, newEuclid);
+ double corr = linearCalc.calcPearson(newEuclid, matrix);
corr *= corr;
if (m->control_pressed) { out.close(); out2.close(); for (int k = 0; k < outputNames.size(); k++) { remove(outputNames[k].c_str()); } return 0; }
//output results
out << "Config" << (j+1) << '\t';
- for (int k = 0; k < i; k++) { out << "X" << (k+1) << '\t'; }
+ for (int k = 0; k < i; k++) { out << "axis" << (k+1) << '\t'; }
out << endl;
- out2 << (j+1) << '\t' << stress << '\t' << corr << endl;
+ out2 << i << '\t' << (j+1) << '\t' << stress << '\t' << corr << endl;
output(endConfig, names, out);
+ //save best
+ if (stress < bestStress) {
+ bestStress = stress;
+ bestConfig = endConfig;
+ }
+
if (m->control_pressed) { out.close(); out2.close(); for (int k = 0; k < outputNames.size(); k++) { remove(outputNames[k].c_str()); } return 0; }
-
}
-
- out.close(); out2.close();
}
+ out.close(); out2.close();
+
+ //output best config
+ string BestFileName = outputDir + m->getRootName(m->getSimpleName(phylipfile)) + "nmds.axes";
+ outputNames.push_back(BestFileName); outputTypes["nmds"].push_back(BestFileName);
+
+ ofstream outBest;
+ m->openOutputFile(BestFileName, outBest);
+ outBest.setf(ios::fixed, ios::floatfield);
+ outBest.setf(ios::showpoint);
+
+ outBest << '\t';
+ for (int k = 0; k < bestConfig.size(); k++) { outBest << "axis" << (k+1) << '\t'; }
+ outBest << endl;
+
+ output(bestConfig, names, outBest);
+
+ outBest.close();
+
if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
m->mothurOutEndLine();
}
}
- ofstream pcaData((fnameRoot+".pca").c_str(), ios::trunc);
+ ofstream pcaData((fnameRoot+".pca.axes").c_str(), ios::trunc);
pcaData.setf(ios::fixed, ios::floatfield);
pcaData.setf(ios::showpoint);
- outputNames.push_back(fnameRoot+".pca");
- outputTypes["pca"].push_back(fnameRoot+".pca");
+ outputNames.push_back(fnameRoot+".pca.axes");
+ outputTypes["pca"].push_back(fnameRoot+".pca.axes");
ofstream pcaLoadings((fnameRoot+".pca.loadings").c_str(), ios::trunc);
pcaLoadings.setf(ios::fixed, ios::floatfield);
}
}
- ofstream pcaData((fnameRoot+"pcoa").c_str(), ios::trunc);
+ ofstream pcaData((fnameRoot+"pcoa.axes").c_str(), ios::trunc);
pcaData.setf(ios::fixed, ios::floatfield);
pcaData.setf(ios::showpoint);
- outputNames.push_back(fnameRoot+"pcoa");
- outputTypes["pcoa"].push_back(fnameRoot+"pcoa");
+ outputNames.push_back(fnameRoot+"pcoa.axes");
+ outputTypes["pcoa"].push_back(fnameRoot+"pcoa.axes");
ofstream pcaLoadings((fnameRoot+"pcoa.loadings").c_str(), ios::trunc);
pcaLoadings.setf(ios::fixed, ios::floatfield);