]> git.donarmstrong.com Git - mothur.git/blob - nmdscommand.cpp
nmds command done
[mothur.git] / nmdscommand.cpp
1 /*
2  *  nmdscommand.cpp
3  *  mothur
4  *
5  *  Created by westcott on 1/11/11.
6  *  Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "nmdscommand.h"
11 #include "readphylipvector.h"
12
13 //**********************************************************************************************************************
14 vector<string> NMDSCommand::getValidParameters(){       
15         try {
16                 string Array[] =  {"phylip","axes","mindim","maxdim","iters","maxiters","epsilon","outputdir","inputdir"};
17                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
18                 return myArray;
19         }
20         catch(exception& e) {
21                 m->errorOut(e, "NMDSCommand", "getValidParameters");
22                 exit(1);
23         }
24 }
25 //**********************************************************************************************************************
26 NMDSCommand::NMDSCommand(){     
27         try {
28                 abort = true;
29                 //initialize outputTypes
30                 vector<string> tempOutNames;
31                 outputTypes["nmds"] = tempOutNames;
32                 outputTypes["stress"] = tempOutNames;
33                 outputTypes["iters"] = tempOutNames;
34         }
35         catch(exception& e) {
36                 m->errorOut(e, "NMDSCommand", "NMDSCommand");
37                 exit(1);
38         }
39 }
40 //**********************************************************************************************************************
41 vector<string> NMDSCommand::getRequiredParameters(){    
42         try {
43                 string Array[] =  {"phylip"};
44                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
45                 return myArray;
46         }
47         catch(exception& e) {
48                 m->errorOut(e, "NMDSCommand", "getRequiredParameters");
49                 exit(1);
50         }
51 }
52 //**********************************************************************************************************************
53 vector<string> NMDSCommand::getRequiredFiles(){ 
54         try {
55                 vector<string> myArray;
56                 return myArray;
57         }
58         catch(exception& e) {
59                 m->errorOut(e, "NMDSCommand", "getRequiredFiles");
60                 exit(1);
61         }
62 }
63 //**********************************************************************************************************************
64
65 NMDSCommand::NMDSCommand(string option)  {
66         try {
67                 abort = false;
68                 
69                 //allow user to run help
70                 if(option == "help") { help(); abort = true; }
71                 
72                 else {
73                         //valid paramters for this command
74                         string Array[] =  {"phylip","axes","mindim","maxdim","iters","maxiters","epsilon","outputdir", "inputdir"};
75                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
76                         
77                         OptionParser parser(option);
78                         map<string, string> parameters = parser. getParameters();
79                         
80                         ValidParameters validParameter;
81                         map<string, string>::iterator it;
82                         
83                         //check to make sure all parameters are valid for command
84                         for (it = parameters.begin(); it != parameters.end(); it++) { 
85                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
86                         }
87                         //if the user changes the input directory command factory will send this info to us in the output parameter 
88                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
89                         if (inputDir == "not found"){   inputDir = "";          }
90                         else {
91                                 string path;
92                                 it = parameters.find("phylip");
93                                 //user has given a template file
94                                 if(it != parameters.end()){ 
95                                         path = m->hasPath(it->second);
96                                         //if the user has not given a path then, add inputdir. else leave path alone.
97                                         if (path == "") {       parameters["phylip"] = inputDir + it->second;           }
98                                 }
99                                 
100                                 it = parameters.find("axes");
101                                 //user has given a template file
102                                 if(it != parameters.end()){ 
103                                         path = m->hasPath(it->second);
104                                         //if the user has not given a path then, add inputdir. else leave path alone.
105                                         if (path == "") {       parameters["axes"] = inputDir + it->second;             }
106                                 }
107                         }
108                         
109                         //initialize outputTypes
110                         vector<string> tempOutNames;
111                         outputTypes["nmds"] = tempOutNames;
112                         outputTypes["iters"] = tempOutNames;
113                         outputTypes["stress"] = tempOutNames;
114                         
115                         //required parameters
116                         phylipfile = validParameter.validFile(parameters, "phylip", true);
117                         if (phylipfile == "not open") { phylipfile = ""; abort = true; }
118                         else if (phylipfile == "not found") { phylipfile = ""; m->mothurOut("You must provide a distance file before running the nmds command."); m->mothurOutEndLine(); abort = true; }        
119                         
120                         axesfile = validParameter.validFile(parameters, "axes", true);
121                         if (axesfile == "not open") { axesfile = ""; abort = true; }
122                         else if (axesfile == "not found") { axesfile = "";  }                           
123                         
124                         //if the user changes the output directory command factory will send this info to us in the output parameter 
125                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
126                                 outputDir = ""; 
127                                 outputDir += m->hasPath(phylipfile); //if user entered a file with a path then preserve it      
128                         }
129                         
130                         string temp = validParameter.validFile(parameters, "mindim", false);    if (temp == "not found") {      temp = "2";     }
131                         convert(temp, mindim);
132                         
133                         temp = validParameter.validFile(parameters, "maxiters", false); if (temp == "not found") {      temp = "500";   }
134                         convert(temp, maxIters);
135                         
136                         temp = validParameter.validFile(parameters, "iters", false);    if (temp == "not found") {      temp = "10";    }
137                         convert(temp, iters);
138                         
139                         temp = validParameter.validFile(parameters, "maxdim", false);   if (temp == "not found") {      temp = "2";     }
140                         convert(temp, maxdim);
141                         
142                         temp = validParameter.validFile(parameters, "epsilon", false);  if (temp == "not found") {      temp = "0.000000000001";        }
143                         convert(temp, epsilon); 
144                         
145                         if (mindim < 1) { m->mothurOut("mindim must be at least 1."); m->mothurOutEndLine(); abort = true; }
146                         if (maxdim < mindim) { m->mothurOut("maxdim must be greater than mindim."); m->mothurOutEndLine(); abort = true; }
147                 }
148                 
149         }
150         catch(exception& e) {
151                 m->errorOut(e, "NMDSCommand", "NMDSCommand");
152                 exit(1);
153         }
154 }
155 //**********************************************************************************************************************
156 void NMDSCommand::help(){
157         try {
158                 m->mothurOut("The nmds command is modelled after the nmds code written in R by Sarah Goslee, using Non-metric multidimensional scaling function using the majorization algorithm from Borg & Groenen 1997, Modern Multidimensional Scaling."); m->mothurOutEndLine();
159                 m->mothurOut("The nmds command parameters are phylip, axes, mindim, maxdim, maxiters, iters and epsilon."); m->mothurOutEndLine();
160                 m->mothurOut("The phylip parameter allows you to enter your distance file."); m->mothurOutEndLine();
161                 m->mothurOut("The axes parameter allows you to enter a file containing a starting configuration."); m->mothurOutEndLine();
162                 m->mothurOut("The maxdim parameter allows you to select how maximum dimensions to use. Default=2"); m->mothurOutEndLine();
163                 m->mothurOut("The mindim parameter allows you to select how minimum dimensions to use. Default=2"); m->mothurOutEndLine();
164                 m->mothurOut("The maxiters parameter allows you to select the maximum number of iters to try with each random configuration. Default=500"); m->mothurOutEndLine();
165                 m->mothurOut("The iters parameter allows you to select the number of random configuration to try. Default=10"); m->mothurOutEndLine();
166                 m->mothurOut("The epsilon parameter allows you to select set an acceptable stopping point. Default=1e-12."); m->mothurOutEndLine();
167                 m->mothurOut("Example nmds(phylip=yourDistanceFile).\n");
168                 m->mothurOut("Note: No spaces between parameter labels (i.e. phylip), '=' and parameters (i.e.yourDistanceFile).\n\n");
169         }
170         catch(exception& e) {
171                 m->errorOut(e, "NMDSCommand", "help");
172                 exit(1);
173         }
174 }
175 //**********************************************************************************************************************
176 NMDSCommand::~NMDSCommand(){}
177 //**********************************************************************************************************************
178 int NMDSCommand::execute(){
179         try {
180                 
181                 if (abort == true) { return 0; }
182                 
183                 cout.setf(ios::fixed, ios::floatfield);
184                 cout.setf(ios::showpoint);
185                 
186                 vector<string> names;
187                 vector< vector< double> > matrix; 
188                 
189                 //read in phylip file
190                 ReadPhylipVector readFile(phylipfile);
191                 names = readFile.read(matrix);
192                 if (m->control_pressed) { return 0; }
193                 
194                 //read axes
195                 vector< vector<double> > axes;
196                 if (axesfile != "") {  axes = readAxes(names);          }
197                 
198                 string outputFileName = outputDir + m->getRootName(m->getSimpleName(phylipfile)) + "nmds.iters";
199                 string stressFileName = outputDir + m->getRootName(m->getSimpleName(phylipfile)) + "stress.nmds";
200                 outputNames.push_back(outputFileName); outputTypes["iters"].push_back(outputFileName);
201                 outputNames.push_back(stressFileName); outputTypes["stress"].push_back(stressFileName);
202                 
203                 ofstream out, out2;
204                 m->openOutputFile(outputFileName, out);
205                 m->openOutputFile(stressFileName, out2);
206                 
207                 out2.setf(ios::fixed, ios::floatfield);
208                 out2.setf(ios::showpoint);
209                 out.setf(ios::fixed, ios::floatfield);
210                 out.setf(ios::showpoint);
211                 
212                 out2 << "Dimension\tIter\tStress\tCorr" << endl;
213                 
214                 double bestStress = 10000000;
215                 vector< vector<double> > bestConfig;
216                 
217                 for (int i = mindim; i <= maxdim; i++) {
218                         m->mothurOut("Processing Dimension: " + toString(i)); m->mothurOutEndLine();
219                         
220                         for (int j = 0; j < iters; j++) {
221                                 m->mothurOut(toString(j+1)); m->mothurOutEndLine(); 
222                                 
223                                 //get configuration - either randomly generate or resize to this dimension
224                                 vector< vector<double> > thisConfig;
225                                 if (axesfile == "") {   thisConfig = generateStartingConfiguration(names.size(), i);            }
226                                 else                            {       thisConfig = getConfiguration(axes, i);                                                         }
227                                 if (m->control_pressed) { out.close(); out2.close(); for (int k = 0; k < outputNames.size(); k++) {     remove(outputNames[k].c_str()); } return 0; }
228                                 
229                                 //calc nmds for this dimension
230                                 double stress;
231                                 vector< vector<double> > endConfig = nmdsCalc(matrix, thisConfig, stress);
232                                 if (m->control_pressed) { out.close(); out2.close(); for (int k = 0; k < outputNames.size(); k++) {     remove(outputNames[k].c_str()); } return 0; }
233                                 
234                                 //calc euclid distances for new config
235                                 vector< vector<double> > newEuclid = linearCalc.calculateEuclidianDistance(endConfig);
236                                 if (m->control_pressed) { out.close(); out2.close(); for (int k = 0; k < outputNames.size(); k++) {     remove(outputNames[k].c_str()); } return 0; }
237                                 
238                                 //calc correlation between original distances and euclidean distances from this config
239                                 double corr = linearCalc.calcPearson(newEuclid, matrix);
240                                 corr *= corr;
241                                 if (m->control_pressed) { out.close(); out2.close(); for (int k = 0; k < outputNames.size(); k++) {     remove(outputNames[k].c_str()); } return 0; }
242                                 
243                                 //output results
244                                 out << "Config" << (j+1) << '\t';
245                                 for (int k = 0; k < i; k++) { out << "axis" << (k+1) << '\t'; }
246                                 out << endl;
247                                 out2 << i << '\t' << (j+1) << '\t' << stress << '\t' << corr << endl;
248                                 
249                                 output(endConfig, names, out);
250                                 
251                                 //save best
252                                 if (stress < bestStress) {
253                                         bestStress = stress;
254                                         bestConfig = endConfig;
255                                 }
256                                 
257                                 if (m->control_pressed) { out.close(); out2.close(); for (int k = 0; k < outputNames.size(); k++) {     remove(outputNames[k].c_str()); } return 0; }
258                         }
259                 }
260                 
261                 out.close(); out2.close();
262                 
263                 //output best config
264                 string BestFileName = outputDir + m->getRootName(m->getSimpleName(phylipfile)) + "nmds.axes";
265                 outputNames.push_back(BestFileName); outputTypes["nmds"].push_back(BestFileName);
266                 
267                 ofstream outBest;
268                 m->openOutputFile(BestFileName, outBest);
269                 outBest.setf(ios::fixed, ios::floatfield);
270                 outBest.setf(ios::showpoint);
271                 
272                 outBest << '\t';
273                 for (int k = 0; k < bestConfig.size(); k++) { outBest << "axis" << (k+1) << '\t'; }
274                 outBest << endl;
275                 
276                 output(bestConfig, names, outBest);
277                 
278                 outBest.close();
279                 
280                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str()); } return 0; }
281                 
282                 m->mothurOutEndLine();
283                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
284                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
285                 m->mothurOutEndLine();
286                 
287                 return 0;
288         }
289         catch(exception& e) {
290                 m->errorOut(e, "NMDSCommand", "execute");
291                 exit(1);
292         }
293 }
294 //**********************************************************************************************************************
295 vector< vector<double> > NMDSCommand::nmdsCalc(vector< vector<double> >& matrix, vector< vector<double> >& config, double& stress1) {
296         try {
297                 
298                 vector< vector<double> > newConfig = config;
299                 
300                 //calc euclid distances
301                 vector< vector<double> > euclid = linearCalc.calculateEuclidianDistance(newConfig);
302                 if (m->control_pressed) { return newConfig; }           
303                 
304                 double stress2 = calculateStress(matrix, euclid);
305                 stress1 = stress2 + 1.0 + epsilon;
306                 
307                 int count = 0;
308                 while ((count < maxIters) && (abs(stress1 - stress2) > epsilon)) {
309                         count++;
310                         
311                         stress1 = stress2;
312                         
313                         if (m->control_pressed) { return newConfig; }
314                         
315                         vector< vector<double> > b; b.resize(euclid.size());
316                         for (int i = 0; i < b.size(); i++) { b[i].resize(euclid[i].size(), 0.0); }
317                         
318                         vector<double> columnSums; columnSums.resize(euclid.size(), 0.0);
319                         for (int i = 0; i < euclid.size(); i++) {
320                                 for (int j = 0; j < euclid[i].size(); j++) {
321                                         //eliminate divide by zero error
322                                         if (euclid[i][j] != 0) { 
323                                                 b[i][j] = matrix[i][j] / euclid[i][j];
324                                                 columnSums[j] += b[i][j];
325                                                 b[i][j] *= -1.0;
326                                         }
327                                 }
328                         }
329                         
330                         //put in diagonal sums
331                         for (int i = 0; i < euclid.size(); i++) {  b[i][i] = columnSums[i]; }
332                         
333                         int numInLowerTriangle = matrix.size() * (matrix.size()-1) / 2.0;
334                         double n = (1.0 + sqrt(1.0 + 8.0 * numInLowerTriangle)) / 2.0;
335                         
336                         //matrix mult
337                         newConfig = linearCalc.matrix_mult(newConfig, b);
338                         for (int i = 0; i < newConfig.size(); i++) {
339                                 for (int j = 0; j < newConfig[i].size(); j++) {
340                                         newConfig[i][j] *= (1.0 / n);
341                                 }
342                         }
343                         
344                         euclid = linearCalc.calculateEuclidianDistance(newConfig);
345                         
346                         stress2 = calculateStress(matrix, euclid);
347                 }
348                 
349                 return newConfig;
350         }
351         catch(exception& e) {
352                 m->errorOut(e, "NMDSCommand", "generateStartingConfiguration");
353                 exit(1);
354         }
355 }
356
357 //**********************************************************************************************************************
358 //generate random config
359 vector< vector<double> > NMDSCommand::generateStartingConfiguration(int numNames, int dimension) {
360         try {
361                 vector< vector<double> > axes;  axes.resize(dimension);
362                 for (int i = 0; i < axes.size(); i++) {  axes[i].resize(numNames); }
363                 
364                 //generate random number between -1 and 1, precision 6
365                 for (int i = 0; i < axes.size(); i++) {
366                         for (int j = 0; j < axes[i].size(); j++) {
367                                 
368                                 if (m->control_pressed) { return axes; }
369                                 
370                                 //generate random int between 0 and 99999
371                                 int myrand = (int)((float)(rand()) / ((RAND_MAX / 99998) + 1));
372                                 
373                                 //generate random sign
374                                 int mysign = (int)((float)(rand()) / ((RAND_MAX / 99998) + 1));
375                                 
376                                 //if mysign is even then sign = positive, else sign = negative
377                                 if ((mysign % 2) == 0) { mysign = 1.0; }
378                                 else { mysign = -1.0; }
379                                 
380                                 axes[i][j] = mysign * myrand / (float) 100000;
381                         }
382                 }
383
384                 return axes;
385         }
386         catch(exception& e) {
387                 m->errorOut(e, "NMDSCommand", "generateStartingConfiguration");
388                 exit(1);
389         }
390 }
391 //**********************************************************************************************************************
392 //normalize configuration
393 int NMDSCommand::normalizeConfiguration(vector< vector<double> >& axes, int numNames, int dimension) {
394         try {
395                 vector<double> averageAxes; averageAxes.resize(dimension, 0.0);
396                 
397                 //find average
398                 for (int i = 0; i < axes.size(); i++) {
399                         for (int j = 0; j < axes[i].size(); j++) {      averageAxes[i] += axes[i][j];   }
400                         
401                         averageAxes[i] /= (float) numNames;
402                 }
403                 
404                 //normalize axes
405                 double sumDenom = 0.0;
406                 for (int i = 0; i < axes.size(); i++) {
407                         for (int j = 0; j < axes[i].size(); j++) {
408                                 sumDenom += ((axes[i][j] - averageAxes[i]) * (axes[i][j] - averageAxes[i]));
409                         }
410                 }
411                 
412                 double denom = sqrt((sumDenom / (float) (axes.size() * numNames)));
413                 
414                 for (int i = 0; i < axes.size(); i++) {
415                         for (int j = 0; j < axes[i].size(); j++) {
416                                 axes[i][j] = (axes[i][j] - averageAxes[i]) / denom;
417                         }
418                 }
419                 
420                 return 0;
421         }
422         catch(exception& e) {
423                 m->errorOut(e, "NMDSCommand", "normalizeConfiguration");
424                 exit(1);
425         }
426 }
427 //**********************************************************************************************************************
428 //get configuration
429 vector< vector<double> > NMDSCommand::getConfiguration(vector< vector<double> >& axes, int dimension) {
430         try {
431                 vector< vector<double> > newAxes; newAxes.resize(dimension);
432                 
433                 for (int i = 0; i < dimension; i++) {
434                         newAxes[i] = axes[i];
435                 }
436                                 
437                 return newAxes;
438         }
439         catch(exception& e) {
440                 m->errorOut(e, "NMDSCommand", "getConfiguration");
441                 exit(1);
442         }
443 }
444 //**********************************************************************************************************************
445 //find raw stress, and normalize using
446 double NMDSCommand::calculateStress(vector< vector<double> >& matrix, vector< vector<double> >& config) {
447         try {
448                 double normStress = 0.0;
449                 double denom = 0.0;
450                 double rawStress = 0.0;
451                 
452                 //find raw stress
453                 for (int i = 0; i < matrix.size(); i++) {
454                         for (int j = 0; j < matrix[i].size(); j++) {
455                                 if (m->control_pressed) { return normStress; }
456                                 
457                                 rawStress += ((matrix[i][j] - config[i][j]) * (matrix[i][j] - config[i][j]));
458                                 denom += (config[i][j] * config[i][j]);
459                         }
460                 }
461                 
462                 //normalize stress
463                 if ((rawStress != 0.0) && (denom != 0.0)) {
464                         normStress = sqrt((rawStress / denom));
465                 }
466
467                 return normStress;
468         }
469         catch(exception& e) {
470                 m->errorOut(e, "NMDSCommand", "calculateStress");
471                 exit(1);
472         }
473 }
474
475 //**********************************************************************************************************************
476 int NMDSCommand::output(vector< vector<double> >& config, vector<string>& names, ofstream& out) {
477         try {
478                 
479                 for (int i = 0; i < names.size(); i++) {
480                         
481                         out << names[i] << '\t';
482                         
483                         for (int j = 0; j < config.size(); j++) {
484                                 out << config[j][i] << '\t';
485                         }
486                         
487                         out << endl;
488                 }
489                 
490                 out << endl << endl;
491                         
492                 return 0;
493         }
494         catch(exception& e) {
495                 m->errorOut(e, "NMDSCommand", "output");
496                 exit(1);
497         }
498 }
499 /*****************************************************************/
500 vector< vector<double> > NMDSCommand::readAxes(vector<string> names){
501         try {
502                 ifstream in;
503                 m->openInputFile(axesfile, in);
504                 
505                 string headerLine = m->getline(in); m->gobble(in);
506                 
507                 //count the number of axis you are reading
508                 bool done = false;
509                 int count = 0;
510                 while (!done) {
511                         int pos = headerLine.find("axis");
512                         if (pos != string::npos) {
513                                 count++;
514                                 headerLine = headerLine.substr(pos+4);
515                         }else { done = true; }
516                 }
517                 
518                 if (maxdim > count) { 
519                         m->mothurOut("You requested maxdim = " + toString(maxdim) + ", but your file only includes " + toString(count) + ". Using " + toString(count) + "."); m->mothurOutEndLine(); 
520                         maxdim = count; 
521                         if (maxdim < mindim) { m->mothurOut("Also adjusting mindim to " + toString(maxdim-1) + "."); m->mothurOutEndLine(); }
522                 }
523                 
524                 vector< vector<double> > axes;  axes.resize(maxdim);
525                 for (int i = 0; i < axes.size(); i++) { axes[i].resize(names.size(), 0.0); }
526                 
527                 map <string, vector<double> > orderedAxes;
528                 map     <string, vector<double> >::iterator it;
529                 
530                 while (!in.eof()) {
531                         
532                         if (m->control_pressed) { in.close(); return axes; }
533                         
534                         string group = "";
535                         in >> group; m->gobble(in);
536                         
537                         bool ignore = false;
538                         if (!m->inUsersGroups(group, names)) { ignore = true; m->mothurOut(group + " is in your axes file and not in your distance file, ignoring."); m->mothurOutEndLine(); }
539                         
540                         vector<double> thisGroupsAxes;
541                         for (int i = 0; i < count; i++) {
542                                 float temp = 0.0;
543                                 in >> temp; 
544                                 
545                                 //only save the axis we want
546                                 if (i < maxdim) {  thisGroupsAxes.push_back(temp); }
547                         }
548                         
549                         if (!ignore) {  orderedAxes[group] = thisGroupsAxes; }
550                         
551                         m->gobble(in);
552                 }
553                 in.close();
554                                 
555                 //sanity check
556                 if (names.size() != orderedAxes.size()) { m->mothurOut("[ERROR]: your axes file does not match your distance file, aborting."); m->mothurOutEndLine(); m->control_pressed = true; return axes; }
557                 
558                 //put axes info in same order as distance file, just in case
559                 for (int i = 0; i < names.size(); i++) {
560                         it = orderedAxes.find(names[i]);
561                         
562                         if (it != orderedAxes.end()) {
563                                 vector<double> thisGroupsAxes = it->second;
564                                 
565                                 for (int j = 0; j < thisGroupsAxes.size(); j++) {
566                                         axes[j][i] = thisGroupsAxes[j];
567                                 }
568                                 
569                         }else { m->mothurOut("[ERROR]: your axes file does not match your distance file, aborting."); m->mothurOutEndLine(); m->control_pressed = true; return axes; }
570                 }
571                 
572                 return axes;
573         }
574         catch(exception& e) {
575                 m->errorOut(e, "NMDSCommand", "readAxes");      
576                 exit(1);
577         }
578 }
579 /**********************************************************************************************************************/
580
581
582