5 * Created by westcott on 1/11/11.
6 * Copyright 2011 Schloss Lab. All rights reserved.
10 #include "nmdscommand.h"
11 #include "readphylipvector.h"
13 //**********************************************************************************************************************
14 vector<string> NMDSCommand::getValidParameters(){
16 string Array[] = {"phylip","axes","dimension","maxiters","step","outputdir","inputdir"};
17 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
21 m->errorOut(e, "NMDSCommand", "getValidParameters");
25 //**********************************************************************************************************************
26 NMDSCommand::NMDSCommand(){
29 //initialize outputTypes
30 vector<string> tempOutNames;
31 outputTypes["nmds"] = tempOutNames;
32 outputTypes["stress"] = tempOutNames;
35 m->errorOut(e, "NMDSCommand", "NMDSCommand");
39 //**********************************************************************************************************************
40 vector<string> NMDSCommand::getRequiredParameters(){
42 string Array[] = {"phylip"};
43 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
47 m->errorOut(e, "NMDSCommand", "getRequiredParameters");
51 //**********************************************************************************************************************
52 vector<string> NMDSCommand::getRequiredFiles(){
54 vector<string> myArray;
58 m->errorOut(e, "NMDSCommand", "getRequiredFiles");
62 //**********************************************************************************************************************
64 NMDSCommand::NMDSCommand(string option) {
68 //allow user to run help
69 if(option == "help") { help(); abort = true; }
72 //valid paramters for this command
73 string Array[] = {"phylip","axes","dimension","maxiters","step","outputdir", "inputdir"};
74 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
76 OptionParser parser(option);
77 map<string, string> parameters = parser. getParameters();
79 ValidParameters validParameter;
80 map<string, string>::iterator it;
82 //check to make sure all parameters are valid for command
83 for (it = parameters.begin(); it != parameters.end(); it++) {
84 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
86 //if the user changes the input directory command factory will send this info to us in the output parameter
87 string inputDir = validParameter.validFile(parameters, "inputdir", false);
88 if (inputDir == "not found"){ inputDir = ""; }
91 it = parameters.find("phylip");
92 //user has given a template file
93 if(it != parameters.end()){
94 path = m->hasPath(it->second);
95 //if the user has not given a path then, add inputdir. else leave path alone.
96 if (path == "") { parameters["phylip"] = inputDir + it->second; }
99 it = parameters.find("axes");
100 //user has given a template file
101 if(it != parameters.end()){
102 path = m->hasPath(it->second);
103 //if the user has not given a path then, add inputdir. else leave path alone.
104 if (path == "") { parameters["axes"] = inputDir + it->second; }
108 //initialize outputTypes
109 vector<string> tempOutNames;
110 outputTypes["nmds"] = tempOutNames;
111 outputTypes["stress"] = tempOutNames;
113 //required parameters
114 phylipfile = validParameter.validFile(parameters, "phylip", true);
115 if (phylipfile == "not open") { phylipfile = ""; abort = true; }
116 else if (phylipfile == "not found") { phylipfile = ""; m->mothurOut("You must provide a distance file before running the nmds command."); m->mothurOutEndLine(); abort = true; }
118 axesfile = validParameter.validFile(parameters, "axes", true);
119 if (axesfile == "not open") { axesfile = ""; abort = true; }
120 else if (axesfile == "not found") { axesfile = ""; }
122 //if the user changes the output directory command factory will send this info to us in the output parameter
123 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
125 outputDir += m->hasPath(phylipfile); //if user entered a file with a path then preserve it
128 string temp = validParameter.validFile(parameters, "dimension", false); if (temp == "not found") { temp = "2"; }
129 convert(temp, dimension);
131 temp = validParameter.validFile(parameters, "maxiters", false); if (temp == "not found") { temp = "1000"; }
132 convert(temp, maxIters);
134 temp = validParameter.validFile(parameters, "step", false); if (temp == "not found") { temp = "0.2"; }
137 temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "2"; }
138 convert(temp, cutoff);
143 catch(exception& e) {
144 m->errorOut(e, "NMDSCommand", "NMDSCommand");
148 //**********************************************************************************************************************
149 void NMDSCommand::help(){
152 m->mothurOut("The nmds command parameters are phylip, axes, dimension, maxiters, cutoff and step."); m->mothurOutEndLine();
153 m->mothurOut("The phylip parameter allows you to enter your distance file."); m->mothurOutEndLine();
154 m->mothurOut("The axes parameter allows you to enter a file containing a starting configuration."); m->mothurOutEndLine();
155 m->mothurOut("The dimension parameter allows you to select how many dimensions to use. Default=2"); m->mothurOutEndLine();
156 m->mothurOut("The maxiters parameter allows you to select the maximum number of iters to try. Default=1000"); m->mothurOutEndLine();
157 m->mothurOut("The cutoff parameter allows you to select set an acceptable percentage of magnitude. Default=2, meaning when magnitude of g reaches 2% of it's starting value the process will stop."); m->mothurOutEndLine();
158 m->mothurOut("The step parameter allows you to set a starting step. Default=0.2"); m->mothurOutEndLine();
159 m->mothurOut("Example nmds(phylip=yourDistanceFile).\n");
160 m->mothurOut("Note: No spaces between parameter labels (i.e. phylip), '=' and parameters (i.e.yourDistanceFile).\n\n");
162 catch(exception& e) {
163 m->errorOut(e, "NMDSCommand", "help");
167 //**********************************************************************************************************************
168 NMDSCommand::~NMDSCommand(){}
169 //**********************************************************************************************************************
170 int NMDSCommand::execute(){
173 if (abort == true) { return 0; }
175 cout.setf(ios::fixed, ios::floatfield);
176 cout.setf(ios::showpoint);
177 cerr.setf(ios::fixed, ios::floatfield);
178 cerr.setf(ios::showpoint);
180 vector<string> names;
181 vector<seqDist> matrix; //seqDist = int, int, float - index of seq1 in names, index of seq2 in names, their distance
183 //read in phylip file
184 ReadPhylipVector readFile(phylipfile);
185 names = readFile.read(matrix);
186 if (m->control_pressed) { return 0; }
188 //randomly generate the starting configuration - step 2
189 vector< vector<double> > axes;
190 if (axesfile == "") { axes = generateStartingConfiguration(names.size()); }
191 else { axes = readAxes(names); }
192 if (m->control_pressed) { return 0; }
194 //sort matrix from smallest distance to largest - step 5
195 sort(matrix.begin(), matrix.end(), compareSequenceDistance);
199 vector<double> previousStresses;
200 vector< vector<double> > previousGradient = axes;
201 double initialMagnitude;
202 m->mothurOutEndLine(); m->mothurOut("Iter\tStress\tMagnitude"); m->mothurOutEndLine();
203 while ((count != maxIters) && (!stable)) {
206 //normalize axes - step 3
207 normalizeConfiguration(axes, names.size());
208 if (m->control_pressed) { return 0; }
210 //calculate Euclidean distances - step 4
211 vector< vector<double> > euclid = linearCalc.calculateEuclidianDistance(axes);
212 if (m->control_pressed) { return 0; }
214 //order euclid elements in same order as matrix - step 6
215 //if there are ties in the matrix we want to arrange the euclid distances in the best way so we do not to add unnecessary stress
216 vector<seqDist> eDists;
217 vector<seqDist> ties;
218 for (int i = 0; i < matrix.size(); i++) {
220 seqDist temp(matrix[i].seq1, matrix[i].seq2, euclid[matrix[i].seq1][matrix[i].seq2]);
221 ties.push_back(temp);
223 if (i != matrix.size()-1) { // you are not the last so you can look ahead
224 if (matrix[i].dist != matrix[i+1].dist) { // you are done with ties, sort and save them, then continue
225 sort(ties.begin(), ties.end(), compareSequenceDistance);
226 for (int k = 0; k < ties.size(); k++) { eDists.push_back(ties[k]); }
229 }else { // you are the last one
230 sort(ties.begin(), ties.end(), compareSequenceDistance);
231 for (int k = 0; k < ties.size(); k++) { eDists.push_back(ties[k]); }
235 for (int i = 0; i < euclid.size(); i++) { euclid[i].clear(); } euclid.clear();
236 if (m->control_pressed) { return 0; }
238 //find D - from step 7
239 vector<seqDist> D = satisfyMonotonicity(eDists);
240 if (m->control_pressed) { return 0; }
242 //calculate the raw stress and normalize it - steps 8 and 9
244 double stress = calculateStress(eDists, D, rawStress);
245 previousStresses.push_back(stress);
246 if (stress == 0) { m->mothurOut("Stress reached zero after " + toString(count) + " iters, stopping."); m->mothurOutEndLine(); break; }
247 if (m->control_pressed) { return 0; }
249 //calculate stress gradient - step 10
250 vector< vector<double> > stressGradient = calculateStressGradientVector(eDists, D, rawStress, stress, axes);
251 if (m->control_pressed) { return 0; }
253 //calculate magnitude
254 double magnitude = calculateMagnitude(stressGradient);
255 if (count == 1) { initialMagnitude = magnitude; }
256 if (m->control_pressed) { return 0; }
258 //save gradient before adjusting config.
259 previousGradient = stressGradient;
261 if ((count % 100) == 0) { m->mothurOut(toString(count) + "\t" + toString(previousStresses[previousStresses.size()-1]) + "\t" + toString(magnitude)); m->mothurOutEndLine(); }
263 //are we done - we are done if percentage of magnitude compared to initial magnitude is less than cutoff
264 double percentage = magnitude / initialMagnitude;
265 if (percentage < cutoff) { stable = true; }
268 //calculate new step size
269 step = calculateStep(previousGradient, stressGradient, previousStresses);
270 cout << "count = " << count << '\t' << step << endl;
271 if (m->control_pressed) { return 0; }
274 axes = calculateNewConfiguration(magnitude, axes, stressGradient);
275 if (m->control_pressed) { return 0; }
279 if (m->control_pressed) { return 0; }
281 string outputFileName = outputDir + m->getRootName(m->getSimpleName(phylipfile)) + "nmds";
282 string stressFileName = outputDir + m->getRootName(m->getSimpleName(phylipfile)) + "stress.nmds";
283 outputNames.push_back(outputFileName); outputTypes["nmds"].push_back(outputFileName);
284 outputNames.push_back(stressFileName); outputTypes["stress"].push_back(stressFileName);
286 output(outputFileName, stressFileName, previousGradient, previousStresses, names);
288 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
290 m->mothurOutEndLine();
291 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
292 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
293 m->mothurOutEndLine();
297 catch(exception& e) {
298 m->errorOut(e, "NMDSCommand", "execute");
302 //**********************************************************************************************************************
303 //generate random config
304 vector< vector<double> > NMDSCommand::generateStartingConfiguration(int numNames) {
306 vector< vector<double> > axes; axes.resize(dimension);
307 for (int i = 0; i < axes.size(); i++) { axes[i].resize(numNames); }
309 //generate random number between -1 and 1, precision 6
310 for (int i = 0; i < axes.size(); i++) {
311 for (int j = 0; j < axes[i].size(); j++) {
313 if (m->control_pressed) { return axes; }
315 //generate random int between 0 and 99999
316 int myrand = (int)((float)(rand()) / ((RAND_MAX / 99998) + 1));
318 //generate random sign
319 int mysign = (int)((float)(rand()) / ((RAND_MAX / 99998) + 1));
321 //if mysign is even then sign = positive, else sign = negative
322 if ((mysign % 2) == 0) { mysign = 1.0; }
323 else { mysign = -1.0; }
325 axes[i][j] = mysign * myrand / (float) 100000;
331 catch(exception& e) {
332 m->errorOut(e, "NMDSCommand", "generateStartingConfiguration");
336 //**********************************************************************************************************************
337 //normalize configuration
338 int NMDSCommand::normalizeConfiguration(vector< vector<double> >& axes, int numNames) {
340 vector<double> averageAxes; averageAxes.resize(dimension, 0.0);
343 for (int i = 0; i < axes.size(); i++) {
344 for (int j = 0; j < axes[i].size(); j++) { averageAxes[i] += axes[i][j]; }
346 averageAxes[i] /= (float) numNames;
350 double sumDenom = 0.0;
351 for (int i = 0; i < axes.size(); i++) {
352 for (int j = 0; j < axes[i].size(); j++) {
353 sumDenom += ((axes[i][j] - averageAxes[i]) * (axes[i][j] - averageAxes[i]));
357 double denom = sqrt((sumDenom / (float) (axes.size() * numNames)));
359 for (int i = 0; i < axes.size(); i++) {
360 for (int j = 0; j < axes[i].size(); j++) {
361 axes[i][j] = (axes[i][j] - averageAxes[i]) / denom;
367 catch(exception& e) {
368 m->errorOut(e, "NMDSCommand", "normalizeConfiguration");
372 //**********************************************************************************************************************
373 //adjust eDists so that it creates monotonically increasing series of succesive values that increase or stay the same, but never decrease
374 vector<seqDist> NMDSCommand::satisfyMonotonicity(vector<seqDist> eDists) {
377 vector<seqDist> D = eDists;
379 for (int i = 0; i < (D.size()-1); i++) {
381 if (m->control_pressed) { return D; }
383 //is the distance in i+1 smaller than i, if yes then adjust
384 if (D[i+1].dist < D[i].dist) { D[i+1].dist = D[i].dist; }
389 catch(exception& e) {
390 m->errorOut(e, "NMDSCommand", "satisfyMonotonicity");
394 //**********************************************************************************************************************
395 //find raw stress, and normalize using
396 double NMDSCommand::calculateStress(vector<seqDist>& eDists, vector<seqDist>& D, double& rawStress) {
398 double normStress = 0.0;
403 for (int i = 0; i < D.size(); i++) {
405 if (m->control_pressed) { return normStress; }
407 rawStress += ((eDists[i].dist - D[i].dist) * (eDists[i].dist - D[i].dist));
408 denom += (eDists[i].dist * eDists[i].dist);
412 if (rawStress != 0.0) {
413 normStress = 100 * sqrt((rawStress / denom));
418 catch(exception& e) {
419 m->errorOut(e, "NMDSCommand", "calculateStress");
423 //**********************************************************************************************************************
424 vector< vector<double> > NMDSCommand::calculateStressGradientVector(vector<seqDist>& eDists, vector<seqDist>& D, double rawStress, double stress, vector< vector<double> >& axes) {
426 vector< vector<double> > gradient; gradient.resize(dimension);
427 for (int i = 0; i < gradient.size(); i++) { gradient[i].resize(axes[0].size(), 0.0); }
430 for (int i = 0; i < eDists.size(); i++) { sumDij += (eDists[i].dist * eDists[i].dist); }
432 for (int i = 0; i < eDists.size(); i++) {
434 for (int j = 0; j < dimension; j++) {
436 if (m->control_pressed) { return gradient; }
438 double firstTerm1 = (stress / rawStress) * (eDists[i].dist - D[i].dist);
439 double firstTerm2 = eDists[i].dist * (stress / sumDij);
440 double firstTerm = firstTerm1 - firstTerm2;
442 double secondTerm = (axes[j][eDists[i].seq1] - axes[j][eDists[i].seq2]) / eDists[i].dist;
444 double results = (firstTerm * secondTerm);
446 gradient[j][eDists[i].seq1] += results;
447 gradient[j][eDists[i].seq2] -= results;
453 catch(exception& e) {
454 m->errorOut(e, "NMDSCommand", "calculateStressGradientVector");
458 //**********************************************************************************************************************
459 double NMDSCommand::calculateMagnitude(vector< vector<double> >& gradient) {
461 double magnitude = 0.0;
464 for (int i = 0; i < gradient.size(); i++) {
465 for (int j = 0; j < gradient[i].size(); j++) {
466 sum += (gradient[i][j] * gradient[i][j]);
470 magnitude = sqrt(((1.0/(float)gradient[0].size()) * sum));
474 catch(exception& e) {
475 m->errorOut(e, "NMDSCommand", "calculateMagnitude");
479 //**********************************************************************************************************************
480 //described in Kruskal paper page 121 + 122
481 double NMDSCommand::calculateStep(vector< vector<double> >& prevGrad, vector< vector<double> >& grad, vector<double>& prevStress) {
483 double newStep = step;
487 double sumDenom1 = 0.0;
488 double sumDenom2 = 0.0;
489 for (int i = 0; i < prevGrad.size(); i++) {
490 for (int j = 0; j < prevGrad[i].size(); j++) {
491 sumDenom1 += (grad[i][j] * grad[i][j]);
492 sumDenom2 += (prevGrad[i][j] * prevGrad[i][j]);
493 sumNum += (grad[i][j] * prevGrad[i][j]);
497 double cosTheta = sumNum / (sqrt(sumDenom1) * sqrt(sumDenom2));
498 cosTheta *= cosTheta;
501 double angle = pow(4.0, cosTheta);
504 double currentStress = prevStress[prevStress.size()-1];
505 double lastStress = prevStress[0];
506 if (prevStress.size() > 1) { lastStress = prevStress[prevStress.size()-2]; }
507 double fivePrevStress = prevStress[0];
508 if (prevStress.size() > 5) { fivePrevStress = prevStress[prevStress.size()-6]; }
510 double fiveStepRatio = min(1.0, (currentStress / fivePrevStress));
512 //calc relaxation factor
513 double relaxation = 1.3 / (1.0 + pow(fiveStepRatio, 5.0));
515 //calc good luck factor
516 double goodLuck = min(1.0, (currentStress / lastStress));
519 cout << "\ncos = " << cosTheta << " step = " << step << " angle = " << angle << " relaxation = " << relaxation << " goodluck = " << goodLuck << endl;
520 newStep = step * angle * relaxation * goodLuck;
524 catch(exception& e) {
525 m->errorOut(e, "NMDSCommand", "calculateStep");
529 //**********************************************************************************************************************
530 vector< vector<double> > NMDSCommand::calculateNewConfiguration(double magnitude, vector< vector<double> >& axes, vector< vector<double> >& gradient) {
533 vector< vector<double> > newAxes = axes;
535 for (int i = 0; i < newAxes.size(); i++) {
537 if (m->control_pressed) { return newAxes; }
539 for (int j = 0; j < newAxes[i].size(); j++) {
540 newAxes[i][j] = axes[i][j] + ((step / magnitude) * gradient[i][j]);
546 catch(exception& e) {
547 m->errorOut(e, "NMDSCommand", "calculateNewConfiguration");
551 //**********************************************************************************************************************
552 int NMDSCommand::output(string outputFileName, string stressFileName, vector< vector<double> >& config, vector<double>& stresses, vector<string>& names) {
556 m->openOutputFile(outputFileName, out);
557 m->openOutputFile(stressFileName, out2);
561 for (int i = 0; i < dimension; i++) { out << "axis" << (i+1) << '\t'; }
564 out2 << "Iter\tStress" << endl;
567 for (int i = 0; i < config[0].size(); i++) {
569 if (m->control_pressed) { out.close(); out2.close(); return 0; }
571 out << names[i] << '\t';
573 for (int j = 0; j < config.size(); j++) {
574 out << config[j][i] << '\t';
582 for (int j = 0; j < stresses.size(); j++) {
583 if (m->control_pressed) { out2.close(); return 0; }
585 out2 << (j+1) << '\t' << stresses[j] << endl;
592 catch(exception& e) {
593 m->errorOut(e, "NMDSCommand", "output");
597 /*****************************************************************/
598 vector< vector<double> > NMDSCommand::readAxes(vector<string> names){
600 vector< vector<double> > axes;
603 m->openInputFile(axesfile, in);
605 string headerLine = m->getline(in); m->gobble(in);
607 //count the number of axis you are reading
611 int pos = headerLine.find("axis");
612 if (pos != string::npos) {
614 headerLine = headerLine.substr(pos+4);
615 }else { done = true; }
618 if (dimension > count) { m->mothurOut("You requested " + toString(dimension) + " axes, but your file only includes " + toString(count) + ". Using " + toString(count) + "."); m->mothurOutEndLine(); dimension = count; }
622 if (m->control_pressed) { in.close(); return axes; }
625 in >> group; m->gobble(in);
628 if (!m->inUsersGroups(group, names)) { ignore = true; m->mothurOut(group + " is in your axes file and not in your distance file, ignoring."); m->mothurOutEndLine(); }
630 vector<double> thisGroupsAxes;
631 for (int i = 0; i < count; i++) {
635 //only save the axis we want
636 if (i < dimension) { thisGroupsAxes.push_back(temp); }
639 if (!ignore) { axes.push_back(thisGroupsAxes); }
646 if (names.size() != axes.size()) { m->mothurOut("[ERROR]: your axes file does not match your distance file, aborting."); m->mothurOutEndLine(); m->control_pressed = true; }
650 catch(exception& e) {
651 m->errorOut(e, "NMDSCommand", "readAxes");
655 //**********************************************************************************************************************