5 * Created by westcott on 1/11/11.
6 * Copyright 2011 Schloss Lab. All rights reserved.
10 #include "nmdscommand.h"
11 #include "readphylipvector.h"
13 //**********************************************************************************************************************
14 vector<string> NMDSCommand::setParameters(){
16 CommandParameter paxes("axes", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(paxes);
17 CommandParameter pphylip("phylip", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pphylip);
18 CommandParameter pmaxdim("maxdim", "Number", "", "2", "", "", "",false,false); parameters.push_back(pmaxdim);
19 CommandParameter pmindim("mindim", "Number", "", "2", "", "", "",false,false); parameters.push_back(pmindim);
20 CommandParameter piters("iters", "Number", "", "10", "", "", "",false,false); parameters.push_back(piters);
21 CommandParameter pmaxiters("maxiters", "Number", "", "500", "", "", "",false,false); parameters.push_back(pmaxiters);
22 CommandParameter pepsilon("epsilon", "Number", "", "0.000000000001", "", "", "",false,false); parameters.push_back(pepsilon);
23 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
24 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
26 vector<string> myArray;
27 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
31 m->errorOut(e, "NMDSCommand", "setParameters");
35 //**********************************************************************************************************************
36 string NMDSCommand::getHelpString(){
38 string helpString = "";
39 helpString += "The nmds command is modelled after the nmds code written in R by Sarah Goslee, using Non-metric multidimensional scaling function using the majorization algorithm from Borg & Groenen 1997, Modern Multidimensional Scaling.\n";
40 helpString += "The nmds command parameters are phylip, axes, mindim, maxdim, maxiters, iters and epsilon.\n";
41 helpString += "The phylip parameter allows you to enter your distance file.\n";
42 helpString += "The axes parameter allows you to enter a file containing a starting configuration.\n";
43 helpString += "The maxdim parameter allows you to select the maximum dimensions to use. Default=2\n";
44 helpString += "The mindim parameter allows you to select the minimum dimensions to use. Default=2\n";
45 helpString += "The maxiters parameter allows you to select the maximum number of iters to try with each random configuration. Default=500\n";
46 helpString += "The iters parameter allows you to select the number of random configuration to try. Default=10\n";
47 helpString += "The epsilon parameter allows you to select set an acceptable stopping point. Default=1e-12.\n";
48 helpString += "Example nmds(phylip=yourDistanceFile).\n";
49 helpString += "Note: No spaces between parameter labels (i.e. phylip), '=' and parameters (i.e.yourDistanceFile).\n";
53 m->errorOut(e, "NMDSCommand", "getHelpString");
57 //**********************************************************************************************************************
58 string NMDSCommand::getOutputFileNameTag(string type, string inputName=""){
60 string outputFileName = "";
61 map<string, vector<string> >::iterator it;
63 //is this a type this command creates
64 it = outputTypes.find(type);
65 if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
67 if (type == "nmds") { outputFileName = "nmds.axes"; }
68 else if (type == "stress") { outputFileName = "nmds.stress"; }
69 else if (type == "iters") { outputFileName = "nmds.iters"; }
70 else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
72 return outputFileName;
75 m->errorOut(e, "NMDSCommand", "getOutputFileNameTag");
79 //**********************************************************************************************************************
80 NMDSCommand::NMDSCommand(){
82 abort = true; calledHelp = true;
84 vector<string> tempOutNames;
85 outputTypes["nmds"] = tempOutNames;
86 outputTypes["stress"] = tempOutNames;
87 outputTypes["iters"] = tempOutNames;
90 m->errorOut(e, "NMDSCommand", "NMDSCommand");
94 //**********************************************************************************************************************
96 NMDSCommand::NMDSCommand(string option) {
98 abort = false; calledHelp = false;
100 //allow user to run help
101 if(option == "help") { help(); abort = true; calledHelp = true; }
102 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
105 vector<string> myArray = setParameters();
107 OptionParser parser(option);
108 map<string, string> parameters = parser. getParameters();
110 ValidParameters validParameter;
111 map<string, string>::iterator it;
113 //check to make sure all parameters are valid for command
114 for (it = parameters.begin(); it != parameters.end(); it++) {
115 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
117 //if the user changes the input directory command factory will send this info to us in the output parameter
118 string inputDir = validParameter.validFile(parameters, "inputdir", false);
119 if (inputDir == "not found"){ inputDir = ""; }
122 it = parameters.find("phylip");
123 //user has given a template file
124 if(it != parameters.end()){
125 path = m->hasPath(it->second);
126 //if the user has not given a path then, add inputdir. else leave path alone.
127 if (path == "") { parameters["phylip"] = inputDir + it->second; }
130 it = parameters.find("axes");
131 //user has given a template file
132 if(it != parameters.end()){
133 path = m->hasPath(it->second);
134 //if the user has not given a path then, add inputdir. else leave path alone.
135 if (path == "") { parameters["axes"] = inputDir + it->second; }
139 //initialize outputTypes
140 vector<string> tempOutNames;
141 outputTypes["nmds"] = tempOutNames;
142 outputTypes["iters"] = tempOutNames;
143 outputTypes["stress"] = tempOutNames;
145 //required parameters
146 phylipfile = validParameter.validFile(parameters, "phylip", true);
147 if (phylipfile == "not open") { phylipfile = ""; abort = true; }
148 else if (phylipfile == "not found") {
149 //if there is a current phylip file, use it
150 phylipfile = m->getPhylipFile();
151 if (phylipfile != "") { m->mothurOut("Using " + phylipfile + " as input file for the phylip parameter."); m->mothurOutEndLine(); }
152 else { m->mothurOut("You have no current phylip file and the phylip parameter is required."); m->mothurOutEndLine(); abort = true; }
153 }else { m->setPhylipFile(phylipfile); }
155 axesfile = validParameter.validFile(parameters, "axes", true);
156 if (axesfile == "not open") { axesfile = ""; abort = true; }
157 else if (axesfile == "not found") { axesfile = ""; }
159 //if the user changes the output directory command factory will send this info to us in the output parameter
160 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
162 outputDir += m->hasPath(phylipfile); //if user entered a file with a path then preserve it
165 string temp = validParameter.validFile(parameters, "mindim", false); if (temp == "not found") { temp = "2"; }
166 m->mothurConvert(temp, mindim);
168 temp = validParameter.validFile(parameters, "maxiters", false); if (temp == "not found") { temp = "500"; }
169 m->mothurConvert(temp, maxIters);
171 temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "10"; }
172 m->mothurConvert(temp, iters);
174 temp = validParameter.validFile(parameters, "maxdim", false); if (temp == "not found") { temp = "2"; }
175 m->mothurConvert(temp, maxdim);
177 temp = validParameter.validFile(parameters, "epsilon", false); if (temp == "not found") { temp = "0.000000000001"; }
178 m->mothurConvert(temp, epsilon);
180 if (mindim < 1) { m->mothurOut("mindim must be at least 1."); m->mothurOutEndLine(); abort = true; }
181 if (maxdim < mindim) { maxdim = mindim; }
185 catch(exception& e) {
186 m->errorOut(e, "NMDSCommand", "NMDSCommand");
190 //**********************************************************************************************************************
191 int NMDSCommand::execute(){
194 if (abort == true) { if (calledHelp) { return 0; } return 2; }
196 cout.setf(ios::fixed, ios::floatfield);
197 cout.setf(ios::showpoint);
199 vector<string> names;
200 vector< vector< double> > matrix;
202 //read in phylip file
203 ReadPhylipVector readFile(phylipfile);
204 names = readFile.read(matrix);
205 if (m->control_pressed) { return 0; }
208 vector< vector<double> > axes;
209 if (axesfile != "") { axes = readAxes(names); }
211 string outputFileName = outputDir + m->getRootName(m->getSimpleName(phylipfile)) + getOutputFileNameTag("iters");
212 string stressFileName = outputDir + m->getRootName(m->getSimpleName(phylipfile)) + getOutputFileNameTag("stress");
213 outputNames.push_back(outputFileName); outputTypes["iters"].push_back(outputFileName);
214 outputNames.push_back(stressFileName); outputTypes["stress"].push_back(stressFileName);
217 m->openOutputFile(outputFileName, out);
218 m->openOutputFile(stressFileName, out2);
220 out2.setf(ios::fixed, ios::floatfield);
221 out2.setf(ios::showpoint);
222 out.setf(ios::fixed, ios::floatfield);
223 out.setf(ios::showpoint);
225 out2 << "Dimension\tIter\tStress\tRsq" << endl;
227 double bestStress = 10000000;
228 double bestR2 = 10000000;
229 vector< vector<double> > bestConfig;
232 for (int i = mindim; i <= maxdim; i++) {
233 m->mothurOut("Processing Dimension: " + toString(i)); m->mothurOutEndLine();
235 for (int j = 0; j < iters; j++) {
236 m->mothurOut(toString(j+1)); m->mothurOutEndLine();
238 //get configuration - either randomly generate or resize to this dimension
239 vector< vector<double> > thisConfig;
240 if (axesfile == "") { thisConfig = generateStartingConfiguration(names.size(), i); }
241 else { thisConfig = getConfiguration(axes, i); }
242 if (m->control_pressed) { out.close(); out2.close(); for (int k = 0; k < outputNames.size(); k++) { m->mothurRemove(outputNames[k]); } return 0; }
244 //calc nmds for this dimension
246 vector< vector<double> > endConfig = nmdsCalc(matrix, thisConfig, stress);
247 if (m->control_pressed) { out.close(); out2.close(); for (int k = 0; k < outputNames.size(); k++) { m->mothurRemove(outputNames[k]); } return 0; }
249 //calc euclid distances for new config
250 vector< vector<double> > newEuclid = linearCalc.calculateEuclidianDistance(endConfig);
251 if (m->control_pressed) { out.close(); out2.close(); for (int k = 0; k < outputNames.size(); k++) { m->mothurRemove(outputNames[k]); } return 0; }
253 //calc correlation between original distances and euclidean distances from this config
254 double rsquared = linearCalc.calcPearson(newEuclid, matrix);
255 rsquared *= rsquared;
256 if (m->control_pressed) { out.close(); out2.close(); for (int k = 0; k < outputNames.size(); k++) { m->mothurRemove(outputNames[k]); } return 0; }
259 out << "Config" << (j+1) << '\t';
260 for (int k = 0; k < i; k++) { out << "axis" << (k+1) << '\t'; }
262 out2 << i << '\t' << (j+1) << '\t' << stress << '\t' << rsquared << endl;
264 output(endConfig, names, out);
267 if (stress < bestStress) {
271 bestConfig = endConfig;
274 if (m->control_pressed) { out.close(); out2.close(); for (int k = 0; k < outputNames.size(); k++) { m->mothurRemove(outputNames[k]); } return 0; }
278 out.close(); out2.close();
281 string BestFileName = outputDir + m->getRootName(m->getSimpleName(phylipfile)) + getOutputFileNameTag("nmds");
282 outputNames.push_back(BestFileName); outputTypes["nmds"].push_back(BestFileName);
284 m->mothurOut("\nNumber of dimensions:\t" + toString(bestDim) + "\n");
285 m->mothurOut("Lowest stress :\t" + toString(bestStress) + "\n");
286 m->mothurOut("R-squared for configuration:\t" + toString(bestR2) + "\n");
289 m->openOutputFile(BestFileName, outBest);
290 outBest.setf(ios::fixed, ios::floatfield);
291 outBest.setf(ios::showpoint);
294 for (int k = 0; k < bestConfig.size(); k++) { outBest << "axis" << (k+1) << '\t'; }
297 output(bestConfig, names, outBest);
301 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
303 m->mothurOutEndLine();
304 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
305 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
306 m->mothurOutEndLine();
310 catch(exception& e) {
311 m->errorOut(e, "NMDSCommand", "execute");
315 //**********************************************************************************************************************
316 vector< vector<double> > NMDSCommand::nmdsCalc(vector< vector<double> >& matrix, vector< vector<double> >& config, double& stress1) {
319 vector< vector<double> > newConfig = config;
321 //calc euclid distances
322 vector< vector<double> > euclid = linearCalc.calculateEuclidianDistance(newConfig);
323 if (m->control_pressed) { return newConfig; }
325 double stress2 = calculateStress(matrix, euclid);
326 stress1 = stress2 + 1.0 + epsilon;
329 while ((count < maxIters) && (abs(stress1 - stress2) > epsilon)) {
334 if (m->control_pressed) { return newConfig; }
336 vector< vector<double> > b; b.resize(euclid.size());
337 for (int i = 0; i < b.size(); i++) { b[i].resize(euclid[i].size(), 0.0); }
339 vector<double> columnSums; columnSums.resize(euclid.size(), 0.0);
340 for (int i = 0; i < euclid.size(); i++) {
341 for (int j = 0; j < euclid[i].size(); j++) {
342 //eliminate divide by zero error
343 if (euclid[i][j] != 0) {
344 b[i][j] = matrix[i][j] / euclid[i][j];
345 columnSums[j] += b[i][j];
351 //put in diagonal sums
352 for (int i = 0; i < euclid.size(); i++) { b[i][i] = columnSums[i]; }
354 int numInLowerTriangle = matrix.size() * (matrix.size()-1) / 2.0;
355 double n = (1.0 + sqrt(1.0 + 8.0 * numInLowerTriangle)) / 2.0;
358 newConfig = linearCalc.matrix_mult(newConfig, b);
359 for (int i = 0; i < newConfig.size(); i++) {
360 for (int j = 0; j < newConfig[i].size(); j++) {
361 newConfig[i][j] *= (1.0 / n);
365 euclid = linearCalc.calculateEuclidianDistance(newConfig);
367 stress2 = calculateStress(matrix, euclid);
372 catch(exception& e) {
373 m->errorOut(e, "NMDSCommand", "generateStartingConfiguration");
378 //**********************************************************************************************************************
379 //generate random config
380 vector< vector<double> > NMDSCommand::generateStartingConfiguration(int numNames, int dimension) {
382 vector< vector<double> > axes; axes.resize(dimension);
383 for (int i = 0; i < axes.size(); i++) { axes[i].resize(numNames); }
385 //generate random number between -1 and 1, precision 6
386 for (int i = 0; i < axes.size(); i++) {
387 for (int j = 0; j < axes[i].size(); j++) {
389 if (m->control_pressed) { return axes; }
391 //generate random int between 0 and 99999
392 int myrand = (int)((float)(rand()) / ((RAND_MAX / 99998) + 1));
394 //generate random sign
395 int mysign = (int)((float)(rand()) / ((RAND_MAX / 99998) + 1));
397 //if mysign is even then sign = positive, else sign = negative
398 if ((mysign % 2) == 0) { mysign = 1.0; }
399 else { mysign = -1.0; }
401 axes[i][j] = mysign * myrand / (float) 100000;
407 catch(exception& e) {
408 m->errorOut(e, "NMDSCommand", "generateStartingConfiguration");
412 //**********************************************************************************************************************
413 //normalize configuration
414 int NMDSCommand::normalizeConfiguration(vector< vector<double> >& axes, int numNames, int dimension) {
416 vector<double> averageAxes; averageAxes.resize(dimension, 0.0);
419 for (int i = 0; i < axes.size(); i++) {
420 for (int j = 0; j < axes[i].size(); j++) { averageAxes[i] += axes[i][j]; }
422 averageAxes[i] /= (float) numNames;
426 double sumDenom = 0.0;
427 for (int i = 0; i < axes.size(); i++) {
428 for (int j = 0; j < axes[i].size(); j++) {
429 sumDenom += ((axes[i][j] - averageAxes[i]) * (axes[i][j] - averageAxes[i]));
433 double denom = sqrt((sumDenom / (float) (axes.size() * numNames)));
435 for (int i = 0; i < axes.size(); i++) {
436 for (int j = 0; j < axes[i].size(); j++) {
437 axes[i][j] = (axes[i][j] - averageAxes[i]) / denom;
443 catch(exception& e) {
444 m->errorOut(e, "NMDSCommand", "normalizeConfiguration");
448 //**********************************************************************************************************************
450 vector< vector<double> > NMDSCommand::getConfiguration(vector< vector<double> >& axes, int dimension) {
452 vector< vector<double> > newAxes; newAxes.resize(dimension);
454 for (int i = 0; i < dimension; i++) {
455 newAxes[i] = axes[i];
460 catch(exception& e) {
461 m->errorOut(e, "NMDSCommand", "getConfiguration");
465 //**********************************************************************************************************************
466 //find raw stress, and normalize using
467 double NMDSCommand::calculateStress(vector< vector<double> >& matrix, vector< vector<double> >& config) {
469 double normStress = 0.0;
471 double rawStress = 0.0;
474 for (int i = 0; i < matrix.size(); i++) {
475 for (int j = 0; j < matrix[i].size(); j++) {
476 if (m->control_pressed) { return normStress; }
478 rawStress += ((matrix[i][j] - config[i][j]) * (matrix[i][j] - config[i][j]));
479 denom += (config[i][j] * config[i][j]);
484 if ((rawStress != 0.0) && (denom != 0.0)) {
485 normStress = sqrt((rawStress / denom));
490 catch(exception& e) {
491 m->errorOut(e, "NMDSCommand", "calculateStress");
496 //**********************************************************************************************************************
497 int NMDSCommand::output(vector< vector<double> >& config, vector<string>& names, ofstream& out) {
500 for (int i = 0; i < names.size(); i++) {
502 out << names[i] << '\t';
504 for (int j = 0; j < config.size(); j++) {
505 out << config[j][i] << '\t';
515 catch(exception& e) {
516 m->errorOut(e, "NMDSCommand", "output");
520 /*****************************************************************/
521 vector< vector<double> > NMDSCommand::readAxes(vector<string> names){
524 m->openInputFile(axesfile, in);
526 string headerLine = m->getline(in); m->gobble(in);
528 //count the number of axis you are reading
532 int pos = headerLine.find("axis");
533 if (pos != string::npos) {
535 headerLine = headerLine.substr(pos+4);
536 }else { done = true; }
539 if (maxdim > count) {
540 m->mothurOut("You requested maxdim = " + toString(maxdim) + ", but your file only includes " + toString(count) + ". Using " + toString(count) + "."); m->mothurOutEndLine();
542 if (maxdim < mindim) { m->mothurOut("Also adjusting mindim to " + toString(maxdim-1) + "."); m->mothurOutEndLine(); }
545 vector< vector<double> > axes; axes.resize(maxdim);
546 for (int i = 0; i < axes.size(); i++) { axes[i].resize(names.size(), 0.0); }
548 map <string, vector<double> > orderedAxes;
549 map <string, vector<double> >::iterator it;
553 if (m->control_pressed) { in.close(); return axes; }
556 in >> group; m->gobble(in);
559 if (!m->inUsersGroups(group, names)) { ignore = true; m->mothurOut(group + " is in your axes file and not in your distance file, ignoring."); m->mothurOutEndLine(); }
561 vector<double> thisGroupsAxes;
562 for (int i = 0; i < count; i++) {
566 //only save the axis we want
567 if (i < maxdim) { thisGroupsAxes.push_back(temp); }
570 if (!ignore) { orderedAxes[group] = thisGroupsAxes; }
577 if (names.size() != orderedAxes.size()) { m->mothurOut("[ERROR]: your axes file does not match your distance file, aborting."); m->mothurOutEndLine(); m->control_pressed = true; return axes; }
579 //put axes info in same order as distance file, just in case
580 for (int i = 0; i < names.size(); i++) {
581 it = orderedAxes.find(names[i]);
583 if (it != orderedAxes.end()) {
584 vector<double> thisGroupsAxes = it->second;
586 for (int j = 0; j < thisGroupsAxes.size(); j++) {
587 axes[j][i] = thisGroupsAxes[j];
590 }else { m->mothurOut("[ERROR]: your axes file does not match your distance file, aborting."); m->mothurOutEndLine(); m->control_pressed = true; return axes; }
595 catch(exception& e) {
596 m->errorOut(e, "NMDSCommand", "readAxes");
600 /**********************************************************************************************************************/