5 * Created by westcott on 1/11/11.
6 * Copyright 2011 Schloss Lab. All rights reserved.
10 #include "nmdscommand.h"
11 #include "readphylipvector.h"
13 //**********************************************************************************************************************
14 vector<string> NMDSCommand::getValidParameters(){
16 string Array[] = {"phylip","axes","mindim","maxdim","iters","maxiters","epsilon","outputdir","inputdir"};
17 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
21 m->errorOut(e, "NMDSCommand", "getValidParameters");
25 //**********************************************************************************************************************
26 NMDSCommand::NMDSCommand(){
29 //initialize outputTypes
30 vector<string> tempOutNames;
31 outputTypes["nmds"] = tempOutNames;
32 outputTypes["stress"] = tempOutNames;
33 outputTypes["iters"] = tempOutNames;
36 m->errorOut(e, "NMDSCommand", "NMDSCommand");
40 //**********************************************************************************************************************
41 vector<string> NMDSCommand::getRequiredParameters(){
43 string Array[] = {"phylip"};
44 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
48 m->errorOut(e, "NMDSCommand", "getRequiredParameters");
52 //**********************************************************************************************************************
53 vector<string> NMDSCommand::getRequiredFiles(){
55 vector<string> myArray;
59 m->errorOut(e, "NMDSCommand", "getRequiredFiles");
63 //**********************************************************************************************************************
65 NMDSCommand::NMDSCommand(string option) {
69 //allow user to run help
70 if(option == "help") { help(); abort = true; }
73 //valid paramters for this command
74 string Array[] = {"phylip","axes","mindim","maxdim","iters","maxiters","epsilon","outputdir", "inputdir"};
75 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
77 OptionParser parser(option);
78 map<string, string> parameters = parser. getParameters();
80 ValidParameters validParameter;
81 map<string, string>::iterator it;
83 //check to make sure all parameters are valid for command
84 for (it = parameters.begin(); it != parameters.end(); it++) {
85 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
87 //if the user changes the input directory command factory will send this info to us in the output parameter
88 string inputDir = validParameter.validFile(parameters, "inputdir", false);
89 if (inputDir == "not found"){ inputDir = ""; }
92 it = parameters.find("phylip");
93 //user has given a template file
94 if(it != parameters.end()){
95 path = m->hasPath(it->second);
96 //if the user has not given a path then, add inputdir. else leave path alone.
97 if (path == "") { parameters["phylip"] = inputDir + it->second; }
100 it = parameters.find("axes");
101 //user has given a template file
102 if(it != parameters.end()){
103 path = m->hasPath(it->second);
104 //if the user has not given a path then, add inputdir. else leave path alone.
105 if (path == "") { parameters["axes"] = inputDir + it->second; }
109 //initialize outputTypes
110 vector<string> tempOutNames;
111 outputTypes["nmds"] = tempOutNames;
112 outputTypes["iters"] = tempOutNames;
113 outputTypes["stress"] = tempOutNames;
115 //required parameters
116 phylipfile = validParameter.validFile(parameters, "phylip", true);
117 if (phylipfile == "not open") { phylipfile = ""; abort = true; }
118 else if (phylipfile == "not found") { phylipfile = ""; m->mothurOut("You must provide a distance file before running the nmds command."); m->mothurOutEndLine(); abort = true; }
120 axesfile = validParameter.validFile(parameters, "axes", true);
121 if (axesfile == "not open") { axesfile = ""; abort = true; }
122 else if (axesfile == "not found") { axesfile = ""; }
124 //if the user changes the output directory command factory will send this info to us in the output parameter
125 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
127 outputDir += m->hasPath(phylipfile); //if user entered a file with a path then preserve it
130 string temp = validParameter.validFile(parameters, "mindim", false); if (temp == "not found") { temp = "2"; }
131 convert(temp, mindim);
133 temp = validParameter.validFile(parameters, "maxiters", false); if (temp == "not found") { temp = "500"; }
134 convert(temp, maxIters);
136 temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "10"; }
137 convert(temp, iters);
139 temp = validParameter.validFile(parameters, "maxdim", false); if (temp == "not found") { temp = "2"; }
140 convert(temp, maxdim);
142 temp = validParameter.validFile(parameters, "epsilon", false); if (temp == "not found") { temp = "0.000000000001"; }
143 convert(temp, epsilon);
145 if (mindim < 1) { m->mothurOut("mindim must be at least 1."); m->mothurOutEndLine(); abort = true; }
146 if (maxdim < mindim) { m->mothurOut("maxdim must be greater than mindim."); m->mothurOutEndLine(); abort = true; }
150 catch(exception& e) {
151 m->errorOut(e, "NMDSCommand", "NMDSCommand");
155 //**********************************************************************************************************************
156 void NMDSCommand::help(){
158 m->mothurOut("The nmds command is modelled after the nmds code written in R by Sarah Goslee, using Non-metric multidimensional scaling function using the majorization algorithm from Borg & Groenen 1997, Modern Multidimensional Scaling."); m->mothurOutEndLine();
159 m->mothurOut("The nmds command parameters are phylip, axes, mindim, maxdim, maxiters, iters and epsilon."); m->mothurOutEndLine();
160 m->mothurOut("The phylip parameter allows you to enter your distance file."); m->mothurOutEndLine();
161 m->mothurOut("The axes parameter allows you to enter a file containing a starting configuration."); m->mothurOutEndLine();
162 m->mothurOut("The maxdim parameter allows you to select how maximum dimensions to use. Default=2"); m->mothurOutEndLine();
163 m->mothurOut("The mindim parameter allows you to select how minimum dimensions to use. Default=2"); m->mothurOutEndLine();
164 m->mothurOut("The maxiters parameter allows you to select the maximum number of iters to try with each random configuration. Default=500"); m->mothurOutEndLine();
165 m->mothurOut("The iters parameter allows you to select the number of random configuration to try. Default=10"); m->mothurOutEndLine();
166 m->mothurOut("The epsilon parameter allows you to select set an acceptable stopping point. Default=1e-12."); m->mothurOutEndLine();
167 m->mothurOut("Example nmds(phylip=yourDistanceFile).\n");
168 m->mothurOut("Note: No spaces between parameter labels (i.e. phylip), '=' and parameters (i.e.yourDistanceFile).\n\n");
170 catch(exception& e) {
171 m->errorOut(e, "NMDSCommand", "help");
175 //**********************************************************************************************************************
176 NMDSCommand::~NMDSCommand(){}
177 //**********************************************************************************************************************
178 int NMDSCommand::execute(){
181 if (abort == true) { return 0; }
183 cout.setf(ios::fixed, ios::floatfield);
184 cout.setf(ios::showpoint);
186 vector<string> names;
187 vector< vector< double> > matrix;
189 //read in phylip file
190 ReadPhylipVector readFile(phylipfile);
191 names = readFile.read(matrix);
192 if (m->control_pressed) { return 0; }
195 vector< vector<double> > axes;
196 if (axesfile != "") { axes = readAxes(names); }
198 string outputFileName = outputDir + m->getRootName(m->getSimpleName(phylipfile)) + "nmds.iters";
199 string stressFileName = outputDir + m->getRootName(m->getSimpleName(phylipfile)) + "stress.nmds";
200 outputNames.push_back(outputFileName); outputTypes["iters"].push_back(outputFileName);
201 outputNames.push_back(stressFileName); outputTypes["stress"].push_back(stressFileName);
204 m->openOutputFile(outputFileName, out);
205 m->openOutputFile(stressFileName, out2);
207 out2.setf(ios::fixed, ios::floatfield);
208 out2.setf(ios::showpoint);
209 out.setf(ios::fixed, ios::floatfield);
210 out.setf(ios::showpoint);
212 out2 << "Dimension\tIter\tStress\tCorr" << endl;
214 double bestStress = 10000000;
215 vector< vector<double> > bestConfig;
217 for (int i = mindim; i <= maxdim; i++) {
218 m->mothurOut("Processing Dimension: " + toString(i)); m->mothurOutEndLine();
220 for (int j = 0; j < iters; j++) {
221 m->mothurOut(toString(j+1)); m->mothurOutEndLine();
223 //get configuration - either randomly generate or resize to this dimension
224 vector< vector<double> > thisConfig;
225 if (axesfile == "") { thisConfig = generateStartingConfiguration(names.size(), i); }
226 else { thisConfig = getConfiguration(axes, i); }
227 if (m->control_pressed) { out.close(); out2.close(); for (int k = 0; k < outputNames.size(); k++) { remove(outputNames[k].c_str()); } return 0; }
229 //calc nmds for this dimension
231 vector< vector<double> > endConfig = nmdsCalc(matrix, thisConfig, stress);
232 if (m->control_pressed) { out.close(); out2.close(); for (int k = 0; k < outputNames.size(); k++) { remove(outputNames[k].c_str()); } return 0; }
234 //calc euclid distances for new config
235 vector< vector<double> > newEuclid = linearCalc.calculateEuclidianDistance(endConfig);
236 if (m->control_pressed) { out.close(); out2.close(); for (int k = 0; k < outputNames.size(); k++) { remove(outputNames[k].c_str()); } return 0; }
238 //calc correlation between original distances and euclidean distances from this config
239 double corr = linearCalc.calcPearson(newEuclid, matrix);
241 if (m->control_pressed) { out.close(); out2.close(); for (int k = 0; k < outputNames.size(); k++) { remove(outputNames[k].c_str()); } return 0; }
244 out << "Config" << (j+1) << '\t';
245 for (int k = 0; k < i; k++) { out << "axis" << (k+1) << '\t'; }
247 out2 << i << '\t' << (j+1) << '\t' << stress << '\t' << corr << endl;
249 output(endConfig, names, out);
252 if (stress < bestStress) {
254 bestConfig = endConfig;
257 if (m->control_pressed) { out.close(); out2.close(); for (int k = 0; k < outputNames.size(); k++) { remove(outputNames[k].c_str()); } return 0; }
261 out.close(); out2.close();
264 string BestFileName = outputDir + m->getRootName(m->getSimpleName(phylipfile)) + "nmds.axes";
265 outputNames.push_back(BestFileName); outputTypes["nmds"].push_back(BestFileName);
268 m->openOutputFile(BestFileName, outBest);
269 outBest.setf(ios::fixed, ios::floatfield);
270 outBest.setf(ios::showpoint);
273 for (int k = 0; k < bestConfig.size(); k++) { outBest << "axis" << (k+1) << '\t'; }
276 output(bestConfig, names, outBest);
280 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
282 m->mothurOutEndLine();
283 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
284 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
285 m->mothurOutEndLine();
289 catch(exception& e) {
290 m->errorOut(e, "NMDSCommand", "execute");
294 //**********************************************************************************************************************
295 vector< vector<double> > NMDSCommand::nmdsCalc(vector< vector<double> >& matrix, vector< vector<double> >& config, double& stress1) {
298 vector< vector<double> > newConfig = config;
300 //calc euclid distances
301 vector< vector<double> > euclid = linearCalc.calculateEuclidianDistance(newConfig);
302 if (m->control_pressed) { return newConfig; }
304 double stress2 = calculateStress(matrix, euclid);
305 stress1 = stress2 + 1.0 + epsilon;
308 while ((count < maxIters) && (abs(stress1 - stress2) > epsilon)) {
313 if (m->control_pressed) { return newConfig; }
315 vector< vector<double> > b; b.resize(euclid.size());
316 for (int i = 0; i < b.size(); i++) { b[i].resize(euclid[i].size(), 0.0); }
318 vector<double> columnSums; columnSums.resize(euclid.size(), 0.0);
319 for (int i = 0; i < euclid.size(); i++) {
320 for (int j = 0; j < euclid[i].size(); j++) {
321 //eliminate divide by zero error
322 if (euclid[i][j] != 0) {
323 b[i][j] = matrix[i][j] / euclid[i][j];
324 columnSums[j] += b[i][j];
330 //put in diagonal sums
331 for (int i = 0; i < euclid.size(); i++) { b[i][i] = columnSums[i]; }
333 int numInLowerTriangle = matrix.size() * (matrix.size()-1) / 2.0;
334 double n = (1.0 + sqrt(1.0 + 8.0 * numInLowerTriangle)) / 2.0;
337 newConfig = linearCalc.matrix_mult(newConfig, b);
338 for (int i = 0; i < newConfig.size(); i++) {
339 for (int j = 0; j < newConfig[i].size(); j++) {
340 newConfig[i][j] *= (1.0 / n);
344 euclid = linearCalc.calculateEuclidianDistance(newConfig);
346 stress2 = calculateStress(matrix, euclid);
351 catch(exception& e) {
352 m->errorOut(e, "NMDSCommand", "generateStartingConfiguration");
357 //**********************************************************************************************************************
358 //generate random config
359 vector< vector<double> > NMDSCommand::generateStartingConfiguration(int numNames, int dimension) {
361 vector< vector<double> > axes; axes.resize(dimension);
362 for (int i = 0; i < axes.size(); i++) { axes[i].resize(numNames); }
364 //generate random number between -1 and 1, precision 6
365 for (int i = 0; i < axes.size(); i++) {
366 for (int j = 0; j < axes[i].size(); j++) {
368 if (m->control_pressed) { return axes; }
370 //generate random int between 0 and 99999
371 int myrand = (int)((float)(rand()) / ((RAND_MAX / 99998) + 1));
373 //generate random sign
374 int mysign = (int)((float)(rand()) / ((RAND_MAX / 99998) + 1));
376 //if mysign is even then sign = positive, else sign = negative
377 if ((mysign % 2) == 0) { mysign = 1.0; }
378 else { mysign = -1.0; }
380 axes[i][j] = mysign * myrand / (float) 100000;
386 catch(exception& e) {
387 m->errorOut(e, "NMDSCommand", "generateStartingConfiguration");
391 //**********************************************************************************************************************
392 //normalize configuration
393 int NMDSCommand::normalizeConfiguration(vector< vector<double> >& axes, int numNames, int dimension) {
395 vector<double> averageAxes; averageAxes.resize(dimension, 0.0);
398 for (int i = 0; i < axes.size(); i++) {
399 for (int j = 0; j < axes[i].size(); j++) { averageAxes[i] += axes[i][j]; }
401 averageAxes[i] /= (float) numNames;
405 double sumDenom = 0.0;
406 for (int i = 0; i < axes.size(); i++) {
407 for (int j = 0; j < axes[i].size(); j++) {
408 sumDenom += ((axes[i][j] - averageAxes[i]) * (axes[i][j] - averageAxes[i]));
412 double denom = sqrt((sumDenom / (float) (axes.size() * numNames)));
414 for (int i = 0; i < axes.size(); i++) {
415 for (int j = 0; j < axes[i].size(); j++) {
416 axes[i][j] = (axes[i][j] - averageAxes[i]) / denom;
422 catch(exception& e) {
423 m->errorOut(e, "NMDSCommand", "normalizeConfiguration");
427 //**********************************************************************************************************************
429 vector< vector<double> > NMDSCommand::getConfiguration(vector< vector<double> >& axes, int dimension) {
431 vector< vector<double> > newAxes; newAxes.resize(dimension);
433 for (int i = 0; i < dimension; i++) {
434 newAxes[i] = axes[i];
439 catch(exception& e) {
440 m->errorOut(e, "NMDSCommand", "getConfiguration");
444 //**********************************************************************************************************************
445 //find raw stress, and normalize using
446 double NMDSCommand::calculateStress(vector< vector<double> >& matrix, vector< vector<double> >& config) {
448 double normStress = 0.0;
450 double rawStress = 0.0;
453 for (int i = 0; i < matrix.size(); i++) {
454 for (int j = 0; j < matrix[i].size(); j++) {
455 if (m->control_pressed) { return normStress; }
457 rawStress += ((matrix[i][j] - config[i][j]) * (matrix[i][j] - config[i][j]));
458 denom += (config[i][j] * config[i][j]);
463 if ((rawStress != 0.0) && (denom != 0.0)) {
464 normStress = sqrt((rawStress / denom));
469 catch(exception& e) {
470 m->errorOut(e, "NMDSCommand", "calculateStress");
475 //**********************************************************************************************************************
476 int NMDSCommand::output(vector< vector<double> >& config, vector<string>& names, ofstream& out) {
479 for (int i = 0; i < names.size(); i++) {
481 out << names[i] << '\t';
483 for (int j = 0; j < config.size(); j++) {
484 out << config[j][i] << '\t';
494 catch(exception& e) {
495 m->errorOut(e, "NMDSCommand", "output");
499 /*****************************************************************/
500 vector< vector<double> > NMDSCommand::readAxes(vector<string> names){
503 m->openInputFile(axesfile, in);
505 string headerLine = m->getline(in); m->gobble(in);
507 //count the number of axis you are reading
511 int pos = headerLine.find("axis");
512 if (pos != string::npos) {
514 headerLine = headerLine.substr(pos+4);
515 }else { done = true; }
518 if (maxdim > count) {
519 m->mothurOut("You requested maxdim = " + toString(maxdim) + ", but your file only includes " + toString(count) + ". Using " + toString(count) + "."); m->mothurOutEndLine();
521 if (maxdim < mindim) { m->mothurOut("Also adjusting mindim to " + toString(maxdim-1) + "."); m->mothurOutEndLine(); }
524 vector< vector<double> > axes; axes.resize(maxdim);
525 for (int i = 0; i < axes.size(); i++) { axes[i].resize(names.size(), 0.0); }
527 map <string, vector<double> > orderedAxes;
528 map <string, vector<double> >::iterator it;
532 if (m->control_pressed) { in.close(); return axes; }
535 in >> group; m->gobble(in);
538 if (!m->inUsersGroups(group, names)) { ignore = true; m->mothurOut(group + " is in your axes file and not in your distance file, ignoring."); m->mothurOutEndLine(); }
540 vector<double> thisGroupsAxes;
541 for (int i = 0; i < count; i++) {
545 //only save the axis we want
546 if (i < maxdim) { thisGroupsAxes.push_back(temp); }
549 if (!ignore) { orderedAxes[group] = thisGroupsAxes; }
556 if (names.size() != orderedAxes.size()) { m->mothurOut("[ERROR]: your axes file does not match your distance file, aborting."); m->mothurOutEndLine(); m->control_pressed = true; return axes; }
558 //put axes info in same order as distance file, just in case
559 for (int i = 0; i < names.size(); i++) {
560 it = orderedAxes.find(names[i]);
562 if (it != orderedAxes.end()) {
563 vector<double> thisGroupsAxes = it->second;
565 for (int j = 0; j < thisGroupsAxes.size(); j++) {
566 axes[j][i] = thisGroupsAxes[j];
569 }else { m->mothurOut("[ERROR]: your axes file does not match your distance file, aborting."); m->mothurOutEndLine(); m->control_pressed = true; return axes; }
574 catch(exception& e) {
575 m->errorOut(e, "NMDSCommand", "readAxes");
579 /**********************************************************************************************************************/