]> git.donarmstrong.com Git - mothur.git/blob - nmdscommand.cpp
added [ERROR] flag if command aborts
[mothur.git] / nmdscommand.cpp
1 /*
2  *  nmdscommand.cpp
3  *  mothur
4  *
5  *  Created by westcott on 1/11/11.
6  *  Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "nmdscommand.h"
11 #include "readphylipvector.h"
12
13 //**********************************************************************************************************************
14 vector<string> NMDSCommand::getValidParameters(){       
15         try {
16                 string Array[] =  {"phylip","axes","mindim","maxdim","iters","maxiters","epsilon","outputdir","inputdir"};
17                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
18                 return myArray;
19         }
20         catch(exception& e) {
21                 m->errorOut(e, "NMDSCommand", "getValidParameters");
22                 exit(1);
23         }
24 }
25 //**********************************************************************************************************************
26 NMDSCommand::NMDSCommand(){     
27         try {
28                 abort = true; calledHelp = true; 
29                 vector<string> tempOutNames;
30                 outputTypes["nmds"] = tempOutNames;
31                 outputTypes["stress"] = tempOutNames;
32                 outputTypes["iters"] = tempOutNames;
33         }
34         catch(exception& e) {
35                 m->errorOut(e, "NMDSCommand", "NMDSCommand");
36                 exit(1);
37         }
38 }
39 //**********************************************************************************************************************
40 vector<string> NMDSCommand::getRequiredParameters(){    
41         try {
42                 string Array[] =  {"phylip"};
43                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
44                 return myArray;
45         }
46         catch(exception& e) {
47                 m->errorOut(e, "NMDSCommand", "getRequiredParameters");
48                 exit(1);
49         }
50 }
51 //**********************************************************************************************************************
52 vector<string> NMDSCommand::getRequiredFiles(){ 
53         try {
54                 vector<string> myArray;
55                 return myArray;
56         }
57         catch(exception& e) {
58                 m->errorOut(e, "NMDSCommand", "getRequiredFiles");
59                 exit(1);
60         }
61 }
62 //**********************************************************************************************************************
63
64 NMDSCommand::NMDSCommand(string option)  {
65         try {
66                 abort = false; calledHelp = false;   
67                 
68                 //allow user to run help
69                 if(option == "help") { help(); abort = true; calledHelp = true; }
70                 
71                 else {
72                         //valid paramters for this command
73                         string Array[] =  {"phylip","axes","mindim","maxdim","iters","maxiters","epsilon","outputdir", "inputdir"};
74                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
75                         
76                         OptionParser parser(option);
77                         map<string, string> parameters = parser. getParameters();
78                         
79                         ValidParameters validParameter;
80                         map<string, string>::iterator it;
81                         
82                         //check to make sure all parameters are valid for command
83                         for (it = parameters.begin(); it != parameters.end(); it++) { 
84                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
85                         }
86                         //if the user changes the input directory command factory will send this info to us in the output parameter 
87                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
88                         if (inputDir == "not found"){   inputDir = "";          }
89                         else {
90                                 string path;
91                                 it = parameters.find("phylip");
92                                 //user has given a template file
93                                 if(it != parameters.end()){ 
94                                         path = m->hasPath(it->second);
95                                         //if the user has not given a path then, add inputdir. else leave path alone.
96                                         if (path == "") {       parameters["phylip"] = inputDir + it->second;           }
97                                 }
98                                 
99                                 it = parameters.find("axes");
100                                 //user has given a template file
101                                 if(it != parameters.end()){ 
102                                         path = m->hasPath(it->second);
103                                         //if the user has not given a path then, add inputdir. else leave path alone.
104                                         if (path == "") {       parameters["axes"] = inputDir + it->second;             }
105                                 }
106                         }
107                         
108                         //initialize outputTypes
109                         vector<string> tempOutNames;
110                         outputTypes["nmds"] = tempOutNames;
111                         outputTypes["iters"] = tempOutNames;
112                         outputTypes["stress"] = tempOutNames;
113                         
114                         //required parameters
115                         phylipfile = validParameter.validFile(parameters, "phylip", true);
116                         if (phylipfile == "not open") { phylipfile = ""; abort = true; }
117                         else if (phylipfile == "not found") { phylipfile = ""; m->mothurOut("You must provide a distance file before running the nmds command."); m->mothurOutEndLine(); abort = true; }        
118                         
119                         axesfile = validParameter.validFile(parameters, "axes", true);
120                         if (axesfile == "not open") { axesfile = ""; abort = true; }
121                         else if (axesfile == "not found") { axesfile = "";  }                           
122                         
123                         //if the user changes the output directory command factory will send this info to us in the output parameter 
124                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
125                                 outputDir = ""; 
126                                 outputDir += m->hasPath(phylipfile); //if user entered a file with a path then preserve it      
127                         }
128                         
129                         string temp = validParameter.validFile(parameters, "mindim", false);    if (temp == "not found") {      temp = "2";     }
130                         convert(temp, mindim);
131                         
132                         temp = validParameter.validFile(parameters, "maxiters", false); if (temp == "not found") {      temp = "500";   }
133                         convert(temp, maxIters);
134                         
135                         temp = validParameter.validFile(parameters, "iters", false);    if (temp == "not found") {      temp = "10";    }
136                         convert(temp, iters);
137                         
138                         temp = validParameter.validFile(parameters, "maxdim", false);   if (temp == "not found") {      temp = "2";     }
139                         convert(temp, maxdim);
140                         
141                         temp = validParameter.validFile(parameters, "epsilon", false);  if (temp == "not found") {      temp = "0.000000000001";        }
142                         convert(temp, epsilon); 
143                         
144                         if (mindim < 1) { m->mothurOut("mindim must be at least 1."); m->mothurOutEndLine(); abort = true; }
145                         if (maxdim < mindim) { m->mothurOut("maxdim must be greater than mindim."); m->mothurOutEndLine(); abort = true; }
146                 }
147                 
148         }
149         catch(exception& e) {
150                 m->errorOut(e, "NMDSCommand", "NMDSCommand");
151                 exit(1);
152         }
153 }
154 //**********************************************************************************************************************
155 void NMDSCommand::help(){
156         try {
157                 m->mothurOut("The nmds command is modelled after the nmds code written in R by Sarah Goslee, using Non-metric multidimensional scaling function using the majorization algorithm from Borg & Groenen 1997, Modern Multidimensional Scaling."); m->mothurOutEndLine();
158                 m->mothurOut("The nmds command parameters are phylip, axes, mindim, maxdim, maxiters, iters and epsilon."); m->mothurOutEndLine();
159                 m->mothurOut("The phylip parameter allows you to enter your distance file."); m->mothurOutEndLine();
160                 m->mothurOut("The axes parameter allows you to enter a file containing a starting configuration."); m->mothurOutEndLine();
161                 m->mothurOut("The maxdim parameter allows you to select the maximum dimensions to use. Default=2"); m->mothurOutEndLine();
162                 m->mothurOut("The mindim parameter allows you to select the minimum dimensions to use. Default=2"); m->mothurOutEndLine();
163                 m->mothurOut("The maxiters parameter allows you to select the maximum number of iters to try with each random configuration. Default=500"); m->mothurOutEndLine();
164                 m->mothurOut("The iters parameter allows you to select the number of random configuration to try. Default=10"); m->mothurOutEndLine();
165                 m->mothurOut("The epsilon parameter allows you to select set an acceptable stopping point. Default=1e-12."); m->mothurOutEndLine();
166                 m->mothurOut("Example nmds(phylip=yourDistanceFile).\n");
167                 m->mothurOut("Note: No spaces between parameter labels (i.e. phylip), '=' and parameters (i.e.yourDistanceFile).\n\n");
168         }
169         catch(exception& e) {
170                 m->errorOut(e, "NMDSCommand", "help");
171                 exit(1);
172         }
173 }
174 //**********************************************************************************************************************
175 NMDSCommand::~NMDSCommand(){}
176 //**********************************************************************************************************************
177 int NMDSCommand::execute(){
178         try {
179                 
180                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
181                 
182                 cout.setf(ios::fixed, ios::floatfield);
183                 cout.setf(ios::showpoint);
184                 
185                 vector<string> names;
186                 vector< vector< double> > matrix; 
187                 
188                 //read in phylip file
189                 ReadPhylipVector readFile(phylipfile);
190                 names = readFile.read(matrix);
191                 if (m->control_pressed) { return 0; }
192                 
193                 //read axes
194                 vector< vector<double> > axes;
195                 if (axesfile != "") {  axes = readAxes(names);          }
196                 
197                 string outputFileName = outputDir + m->getRootName(m->getSimpleName(phylipfile)) + "nmds.iters";
198                 string stressFileName = outputDir + m->getRootName(m->getSimpleName(phylipfile)) + "stress.nmds";
199                 outputNames.push_back(outputFileName); outputTypes["iters"].push_back(outputFileName);
200                 outputNames.push_back(stressFileName); outputTypes["stress"].push_back(stressFileName);
201                 
202                 ofstream out, out2;
203                 m->openOutputFile(outputFileName, out);
204                 m->openOutputFile(stressFileName, out2);
205                 
206                 out2.setf(ios::fixed, ios::floatfield);
207                 out2.setf(ios::showpoint);
208                 out.setf(ios::fixed, ios::floatfield);
209                 out.setf(ios::showpoint);
210                 
211                 out2 << "Dimension\tIter\tStress\tCorr" << endl;
212                 
213                 double bestStress = 10000000;
214                 vector< vector<double> > bestConfig;
215                 
216                 for (int i = mindim; i <= maxdim; i++) {
217                         m->mothurOut("Processing Dimension: " + toString(i)); m->mothurOutEndLine();
218                         
219                         for (int j = 0; j < iters; j++) {
220                                 m->mothurOut(toString(j+1)); m->mothurOutEndLine(); 
221                                 
222                                 //get configuration - either randomly generate or resize to this dimension
223                                 vector< vector<double> > thisConfig;
224                                 if (axesfile == "") {   thisConfig = generateStartingConfiguration(names.size(), i);            }
225                                 else                            {       thisConfig = getConfiguration(axes, i);                                                         }
226                                 if (m->control_pressed) { out.close(); out2.close(); for (int k = 0; k < outputNames.size(); k++) {     remove(outputNames[k].c_str()); } return 0; }
227                                 
228                                 //calc nmds for this dimension
229                                 double stress;
230                                 vector< vector<double> > endConfig = nmdsCalc(matrix, thisConfig, stress);
231                                 if (m->control_pressed) { out.close(); out2.close(); for (int k = 0; k < outputNames.size(); k++) {     remove(outputNames[k].c_str()); } return 0; }
232                                 
233                                 //calc euclid distances for new config
234                                 vector< vector<double> > newEuclid = linearCalc.calculateEuclidianDistance(endConfig);
235                                 if (m->control_pressed) { out.close(); out2.close(); for (int k = 0; k < outputNames.size(); k++) {     remove(outputNames[k].c_str()); } return 0; }
236                                 
237                                 //calc correlation between original distances and euclidean distances from this config
238                                 double corr = linearCalc.calcPearson(newEuclid, matrix);
239                                 corr *= corr;
240                                 if (m->control_pressed) { out.close(); out2.close(); for (int k = 0; k < outputNames.size(); k++) {     remove(outputNames[k].c_str()); } return 0; }
241                                 
242                                 //output results
243                                 out << "Config" << (j+1) << '\t';
244                                 for (int k = 0; k < i; k++) { out << "axis" << (k+1) << '\t'; }
245                                 out << endl;
246                                 out2 << i << '\t' << (j+1) << '\t' << stress << '\t' << corr << endl;
247                                 
248                                 output(endConfig, names, out);
249                                 
250                                 //save best
251                                 if (stress < bestStress) {
252                                         bestStress = stress;
253                                         bestConfig = endConfig;
254                                 }
255                                 
256                                 if (m->control_pressed) { out.close(); out2.close(); for (int k = 0; k < outputNames.size(); k++) {     remove(outputNames[k].c_str()); } return 0; }
257                         }
258                 }
259                 
260                 out.close(); out2.close();
261                 
262                 //output best config
263                 string BestFileName = outputDir + m->getRootName(m->getSimpleName(phylipfile)) + "nmds.axes";
264                 outputNames.push_back(BestFileName); outputTypes["nmds"].push_back(BestFileName);
265                 
266                 ofstream outBest;
267                 m->openOutputFile(BestFileName, outBest);
268                 outBest.setf(ios::fixed, ios::floatfield);
269                 outBest.setf(ios::showpoint);
270                 
271                 outBest << '\t';
272                 for (int k = 0; k < bestConfig.size(); k++) { outBest << "axis" << (k+1) << '\t'; }
273                 outBest << endl;
274                 
275                 output(bestConfig, names, outBest);
276                 
277                 outBest.close();
278                 
279                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str()); } return 0; }
280                 
281                 m->mothurOutEndLine();
282                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
283                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
284                 m->mothurOutEndLine();
285                 
286                 return 0;
287         }
288         catch(exception& e) {
289                 m->errorOut(e, "NMDSCommand", "execute");
290                 exit(1);
291         }
292 }
293 //**********************************************************************************************************************
294 vector< vector<double> > NMDSCommand::nmdsCalc(vector< vector<double> >& matrix, vector< vector<double> >& config, double& stress1) {
295         try {
296                 
297                 vector< vector<double> > newConfig = config;
298                 
299                 //calc euclid distances
300                 vector< vector<double> > euclid = linearCalc.calculateEuclidianDistance(newConfig);
301                 if (m->control_pressed) { return newConfig; }           
302                 
303                 double stress2 = calculateStress(matrix, euclid);
304                 stress1 = stress2 + 1.0 + epsilon;
305                 
306                 int count = 0;
307                 while ((count < maxIters) && (abs(stress1 - stress2) > epsilon)) {
308                         count++;
309                         
310                         stress1 = stress2;
311                         
312                         if (m->control_pressed) { return newConfig; }
313                         
314                         vector< vector<double> > b; b.resize(euclid.size());
315                         for (int i = 0; i < b.size(); i++) { b[i].resize(euclid[i].size(), 0.0); }
316                         
317                         vector<double> columnSums; columnSums.resize(euclid.size(), 0.0);
318                         for (int i = 0; i < euclid.size(); i++) {
319                                 for (int j = 0; j < euclid[i].size(); j++) {
320                                         //eliminate divide by zero error
321                                         if (euclid[i][j] != 0) { 
322                                                 b[i][j] = matrix[i][j] / euclid[i][j];
323                                                 columnSums[j] += b[i][j];
324                                                 b[i][j] *= -1.0;
325                                         }
326                                 }
327                         }
328                         
329                         //put in diagonal sums
330                         for (int i = 0; i < euclid.size(); i++) {  b[i][i] = columnSums[i]; }
331                         
332                         int numInLowerTriangle = matrix.size() * (matrix.size()-1) / 2.0;
333                         double n = (1.0 + sqrt(1.0 + 8.0 * numInLowerTriangle)) / 2.0;
334                         
335                         //matrix mult
336                         newConfig = linearCalc.matrix_mult(newConfig, b);
337                         for (int i = 0; i < newConfig.size(); i++) {
338                                 for (int j = 0; j < newConfig[i].size(); j++) {
339                                         newConfig[i][j] *= (1.0 / n);
340                                 }
341                         }
342                         
343                         euclid = linearCalc.calculateEuclidianDistance(newConfig);
344                         
345                         stress2 = calculateStress(matrix, euclid);
346                 }
347                 
348                 return newConfig;
349         }
350         catch(exception& e) {
351                 m->errorOut(e, "NMDSCommand", "generateStartingConfiguration");
352                 exit(1);
353         }
354 }
355
356 //**********************************************************************************************************************
357 //generate random config
358 vector< vector<double> > NMDSCommand::generateStartingConfiguration(int numNames, int dimension) {
359         try {
360                 vector< vector<double> > axes;  axes.resize(dimension);
361                 for (int i = 0; i < axes.size(); i++) {  axes[i].resize(numNames); }
362                 
363                 //generate random number between -1 and 1, precision 6
364                 for (int i = 0; i < axes.size(); i++) {
365                         for (int j = 0; j < axes[i].size(); j++) {
366                                 
367                                 if (m->control_pressed) { return axes; }
368                                 
369                                 //generate random int between 0 and 99999
370                                 int myrand = (int)((float)(rand()) / ((RAND_MAX / 99998) + 1));
371                                 
372                                 //generate random sign
373                                 int mysign = (int)((float)(rand()) / ((RAND_MAX / 99998) + 1));
374                                 
375                                 //if mysign is even then sign = positive, else sign = negative
376                                 if ((mysign % 2) == 0) { mysign = 1.0; }
377                                 else { mysign = -1.0; }
378                                 
379                                 axes[i][j] = mysign * myrand / (float) 100000;
380                         }
381                 }
382
383                 return axes;
384         }
385         catch(exception& e) {
386                 m->errorOut(e, "NMDSCommand", "generateStartingConfiguration");
387                 exit(1);
388         }
389 }
390 //**********************************************************************************************************************
391 //normalize configuration
392 int NMDSCommand::normalizeConfiguration(vector< vector<double> >& axes, int numNames, int dimension) {
393         try {
394                 vector<double> averageAxes; averageAxes.resize(dimension, 0.0);
395                 
396                 //find average
397                 for (int i = 0; i < axes.size(); i++) {
398                         for (int j = 0; j < axes[i].size(); j++) {      averageAxes[i] += axes[i][j];   }
399                         
400                         averageAxes[i] /= (float) numNames;
401                 }
402                 
403                 //normalize axes
404                 double sumDenom = 0.0;
405                 for (int i = 0; i < axes.size(); i++) {
406                         for (int j = 0; j < axes[i].size(); j++) {
407                                 sumDenom += ((axes[i][j] - averageAxes[i]) * (axes[i][j] - averageAxes[i]));
408                         }
409                 }
410                 
411                 double denom = sqrt((sumDenom / (float) (axes.size() * numNames)));
412                 
413                 for (int i = 0; i < axes.size(); i++) {
414                         for (int j = 0; j < axes[i].size(); j++) {
415                                 axes[i][j] = (axes[i][j] - averageAxes[i]) / denom;
416                         }
417                 }
418                 
419                 return 0;
420         }
421         catch(exception& e) {
422                 m->errorOut(e, "NMDSCommand", "normalizeConfiguration");
423                 exit(1);
424         }
425 }
426 //**********************************************************************************************************************
427 //get configuration
428 vector< vector<double> > NMDSCommand::getConfiguration(vector< vector<double> >& axes, int dimension) {
429         try {
430                 vector< vector<double> > newAxes; newAxes.resize(dimension);
431                 
432                 for (int i = 0; i < dimension; i++) {
433                         newAxes[i] = axes[i];
434                 }
435                                 
436                 return newAxes;
437         }
438         catch(exception& e) {
439                 m->errorOut(e, "NMDSCommand", "getConfiguration");
440                 exit(1);
441         }
442 }
443 //**********************************************************************************************************************
444 //find raw stress, and normalize using
445 double NMDSCommand::calculateStress(vector< vector<double> >& matrix, vector< vector<double> >& config) {
446         try {
447                 double normStress = 0.0;
448                 double denom = 0.0;
449                 double rawStress = 0.0;
450                 
451                 //find raw stress
452                 for (int i = 0; i < matrix.size(); i++) {
453                         for (int j = 0; j < matrix[i].size(); j++) {
454                                 if (m->control_pressed) { return normStress; }
455                                 
456                                 rawStress += ((matrix[i][j] - config[i][j]) * (matrix[i][j] - config[i][j]));
457                                 denom += (config[i][j] * config[i][j]);
458                         }
459                 }
460                 
461                 //normalize stress
462                 if ((rawStress != 0.0) && (denom != 0.0)) {
463                         normStress = sqrt((rawStress / denom));
464                 }
465
466                 return normStress;
467         }
468         catch(exception& e) {
469                 m->errorOut(e, "NMDSCommand", "calculateStress");
470                 exit(1);
471         }
472 }
473
474 //**********************************************************************************************************************
475 int NMDSCommand::output(vector< vector<double> >& config, vector<string>& names, ofstream& out) {
476         try {
477                 
478                 for (int i = 0; i < names.size(); i++) {
479                         
480                         out << names[i] << '\t';
481                         
482                         for (int j = 0; j < config.size(); j++) {
483                                 out << config[j][i] << '\t';
484                         }
485                         
486                         out << endl;
487                 }
488                 
489                 out << endl << endl;
490                         
491                 return 0;
492         }
493         catch(exception& e) {
494                 m->errorOut(e, "NMDSCommand", "output");
495                 exit(1);
496         }
497 }
498 /*****************************************************************/
499 vector< vector<double> > NMDSCommand::readAxes(vector<string> names){
500         try {
501                 ifstream in;
502                 m->openInputFile(axesfile, in);
503                 
504                 string headerLine = m->getline(in); m->gobble(in);
505                 
506                 //count the number of axis you are reading
507                 bool done = false;
508                 int count = 0;
509                 while (!done) {
510                         int pos = headerLine.find("axis");
511                         if (pos != string::npos) {
512                                 count++;
513                                 headerLine = headerLine.substr(pos+4);
514                         }else { done = true; }
515                 }
516                 
517                 if (maxdim > count) { 
518                         m->mothurOut("You requested maxdim = " + toString(maxdim) + ", but your file only includes " + toString(count) + ". Using " + toString(count) + "."); m->mothurOutEndLine(); 
519                         maxdim = count; 
520                         if (maxdim < mindim) { m->mothurOut("Also adjusting mindim to " + toString(maxdim-1) + "."); m->mothurOutEndLine(); }
521                 }
522                 
523                 vector< vector<double> > axes;  axes.resize(maxdim);
524                 for (int i = 0; i < axes.size(); i++) { axes[i].resize(names.size(), 0.0); }
525                 
526                 map <string, vector<double> > orderedAxes;
527                 map     <string, vector<double> >::iterator it;
528                 
529                 while (!in.eof()) {
530                         
531                         if (m->control_pressed) { in.close(); return axes; }
532                         
533                         string group = "";
534                         in >> group; m->gobble(in);
535                         
536                         bool ignore = false;
537                         if (!m->inUsersGroups(group, names)) { ignore = true; m->mothurOut(group + " is in your axes file and not in your distance file, ignoring."); m->mothurOutEndLine(); }
538                         
539                         vector<double> thisGroupsAxes;
540                         for (int i = 0; i < count; i++) {
541                                 float temp = 0.0;
542                                 in >> temp; 
543                                 
544                                 //only save the axis we want
545                                 if (i < maxdim) {  thisGroupsAxes.push_back(temp); }
546                         }
547                         
548                         if (!ignore) {  orderedAxes[group] = thisGroupsAxes; }
549                         
550                         m->gobble(in);
551                 }
552                 in.close();
553                                 
554                 //sanity check
555                 if (names.size() != orderedAxes.size()) { m->mothurOut("[ERROR]: your axes file does not match your distance file, aborting."); m->mothurOutEndLine(); m->control_pressed = true; return axes; }
556                 
557                 //put axes info in same order as distance file, just in case
558                 for (int i = 0; i < names.size(); i++) {
559                         it = orderedAxes.find(names[i]);
560                         
561                         if (it != orderedAxes.end()) {
562                                 vector<double> thisGroupsAxes = it->second;
563                                 
564                                 for (int j = 0; j < thisGroupsAxes.size(); j++) {
565                                         axes[j][i] = thisGroupsAxes[j];
566                                 }
567                                 
568                         }else { m->mothurOut("[ERROR]: your axes file does not match your distance file, aborting."); m->mothurOutEndLine(); m->control_pressed = true; return axes; }
569                 }
570                 
571                 return axes;
572         }
573         catch(exception& e) {
574                 m->errorOut(e, "NMDSCommand", "readAxes");      
575                 exit(1);
576         }
577 }
578 /**********************************************************************************************************************/
579
580
581