]> git.donarmstrong.com Git - mothur.git/blob - nmdscommand.cpp
working on nmds command
[mothur.git] / nmdscommand.cpp
1 /*
2  *  nmdscommand.cpp
3  *  mothur
4  *
5  *  Created by westcott on 1/11/11.
6  *  Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "nmdscommand.h"
11 #include "readphylipvector.h"
12
13 //**********************************************************************************************************************
14 vector<string> NMDSCommand::getValidParameters(){       
15         try {
16                 string Array[] =  {"phylip","axes","mindim","maxdim","iters","maxiters","epsilon","outputdir","inputdir"};
17                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
18                 return myArray;
19         }
20         catch(exception& e) {
21                 m->errorOut(e, "NMDSCommand", "getValidParameters");
22                 exit(1);
23         }
24 }
25 //**********************************************************************************************************************
26 NMDSCommand::NMDSCommand(){     
27         try {
28                 abort = true;
29                 //initialize outputTypes
30                 vector<string> tempOutNames;
31                 outputTypes["nmds"] = tempOutNames;
32                 outputTypes["stress"] = tempOutNames;
33         }
34         catch(exception& e) {
35                 m->errorOut(e, "NMDSCommand", "NMDSCommand");
36                 exit(1);
37         }
38 }
39 //**********************************************************************************************************************
40 vector<string> NMDSCommand::getRequiredParameters(){    
41         try {
42                 string Array[] =  {"phylip"};
43                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
44                 return myArray;
45         }
46         catch(exception& e) {
47                 m->errorOut(e, "NMDSCommand", "getRequiredParameters");
48                 exit(1);
49         }
50 }
51 //**********************************************************************************************************************
52 vector<string> NMDSCommand::getRequiredFiles(){ 
53         try {
54                 vector<string> myArray;
55                 return myArray;
56         }
57         catch(exception& e) {
58                 m->errorOut(e, "NMDSCommand", "getRequiredFiles");
59                 exit(1);
60         }
61 }
62 //**********************************************************************************************************************
63
64 NMDSCommand::NMDSCommand(string option)  {
65         try {
66                 abort = false;
67                 
68                 //allow user to run help
69                 if(option == "help") { help(); abort = true; }
70                 
71                 else {
72                         //valid paramters for this command
73                         string Array[] =  {"phylip","axes","mindim","maxdim","iters","maxiters","epsilon","outputdir", "inputdir"};
74                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
75                         
76                         OptionParser parser(option);
77                         map<string, string> parameters = parser. getParameters();
78                         
79                         ValidParameters validParameter;
80                         map<string, string>::iterator it;
81                         
82                         //check to make sure all parameters are valid for command
83                         for (it = parameters.begin(); it != parameters.end(); it++) { 
84                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
85                         }
86                         //if the user changes the input directory command factory will send this info to us in the output parameter 
87                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
88                         if (inputDir == "not found"){   inputDir = "";          }
89                         else {
90                                 string path;
91                                 it = parameters.find("phylip");
92                                 //user has given a template file
93                                 if(it != parameters.end()){ 
94                                         path = m->hasPath(it->second);
95                                         //if the user has not given a path then, add inputdir. else leave path alone.
96                                         if (path == "") {       parameters["phylip"] = inputDir + it->second;           }
97                                 }
98                                 
99                                 it = parameters.find("axes");
100                                 //user has given a template file
101                                 if(it != parameters.end()){ 
102                                         path = m->hasPath(it->second);
103                                         //if the user has not given a path then, add inputdir. else leave path alone.
104                                         if (path == "") {       parameters["axes"] = inputDir + it->second;             }
105                                 }
106                         }
107                         
108                         //initialize outputTypes
109                         vector<string> tempOutNames;
110                         outputTypes["nmds"] = tempOutNames;
111                         outputTypes["stress"] = tempOutNames;
112                         
113                         //required parameters
114                         phylipfile = validParameter.validFile(parameters, "phylip", true);
115                         if (phylipfile == "not open") { phylipfile = ""; abort = true; }
116                         else if (phylipfile == "not found") { phylipfile = ""; m->mothurOut("You must provide a distance file before running the nmds command."); m->mothurOutEndLine(); abort = true; }        
117                         
118                         axesfile = validParameter.validFile(parameters, "axes", true);
119                         if (axesfile == "not open") { axesfile = ""; abort = true; }
120                         else if (axesfile == "not found") { axesfile = "";  }                           
121                         
122                         //if the user changes the output directory command factory will send this info to us in the output parameter 
123                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
124                                 outputDir = ""; 
125                                 outputDir += m->hasPath(phylipfile); //if user entered a file with a path then preserve it      
126                         }
127                         
128                         string temp = validParameter.validFile(parameters, "mindim", false);    if (temp == "not found") {      temp = "1";     }
129                         convert(temp, mindim);
130                         
131                         temp = validParameter.validFile(parameters, "maxiters", false); if (temp == "not found") {      temp = "500";   }
132                         convert(temp, maxIters);
133                         
134                         temp = validParameter.validFile(parameters, "iters", false);    if (temp == "not found") {      temp = "10";    }
135                         convert(temp, iters);
136                         
137                         temp = validParameter.validFile(parameters, "maxdim", false);   if (temp == "not found") {      temp = "2";     }
138                         convert(temp, maxdim);
139                         
140                         temp = validParameter.validFile(parameters, "epsilon", false);  if (temp == "not found") {      temp = "0.000000000001";        }
141                         convert(temp, epsilon); 
142                         
143                         if (mindim < 1) { m->mothurOut("mindim must be at least 1."); m->mothurOutEndLine(); abort = true; }
144                         if (maxdim < mindim) { m->mothurOut("maxdim must be greater than mindim."); m->mothurOutEndLine(); abort = true; }
145                 }
146                 
147         }
148         catch(exception& e) {
149                 m->errorOut(e, "NMDSCommand", "NMDSCommand");
150                 exit(1);
151         }
152 }
153 //**********************************************************************************************************************
154 void NMDSCommand::help(){
155         try {
156                 m->mothurOut("The nmds command is modelled after the nmds code written in R by Sarah Goslee, using Non-metric multidimensional scaling function using the majorization algorithm from Borg & Groenen 1997, Modern Multidimensional Scaling."); m->mothurOutEndLine();
157                 m->mothurOut("The nmds command parameters are phylip, axes, mindim, maxdim, maxiters, iters and epsilon."); m->mothurOutEndLine();
158                 m->mothurOut("The phylip parameter allows you to enter your distance file."); m->mothurOutEndLine();
159                 m->mothurOut("The axes parameter allows you to enter a file containing a starting configuration."); m->mothurOutEndLine();
160                 m->mothurOut("The maxdim parameter allows you to select how maximum dimensions to use. Default=2"); m->mothurOutEndLine();
161                 m->mothurOut("The mindim parameter allows you to select how minimum dimensions to use. Default=1"); m->mothurOutEndLine();
162                 m->mothurOut("The maxiters parameter allows you to select the maximum number of iters to try with each random configuration. Default=500"); m->mothurOutEndLine();
163                 m->mothurOut("The iters parameter allows you to select the number of random configuration to try. Default=10"); m->mothurOutEndLine();
164                 m->mothurOut("The epsilon parameter allows you to select set an acceptable stopping point. Default=1e-12."); m->mothurOutEndLine();
165                 m->mothurOut("Example nmds(phylip=yourDistanceFile).\n");
166                 m->mothurOut("Note: No spaces between parameter labels (i.e. phylip), '=' and parameters (i.e.yourDistanceFile).\n\n");
167         }
168         catch(exception& e) {
169                 m->errorOut(e, "NMDSCommand", "help");
170                 exit(1);
171         }
172 }
173 //**********************************************************************************************************************
174 NMDSCommand::~NMDSCommand(){}
175 //**********************************************************************************************************************
176 int NMDSCommand::execute(){
177         try {
178                 
179                 if (abort == true) { return 0; }
180                 
181                 cout.setf(ios::fixed, ios::floatfield);
182                 cout.setf(ios::showpoint);
183                 
184                 vector<string> names;
185                 vector< vector< double> > matrix; 
186                 
187                 //read in phylip file
188                 ReadPhylipVector readFile(phylipfile);
189                 names = readFile.read(matrix);
190                 if (m->control_pressed) { return 0; }
191                 
192                 //read axes
193                 vector< vector<double> > axes;
194                 if (axesfile != "") {  axes = readAxes(names);          }
195                 
196                 for (int i = mindim; i <= maxdim; i++) {
197                         m->mothurOut("Processing Dimension: " + toString(i)); m->mothurOutEndLine();
198                         
199                         string outputFileName = outputDir + m->getRootName(m->getSimpleName(phylipfile)) + "dim" + toString(i) + ".nmds";
200                         string stressFileName = outputDir + m->getRootName(m->getSimpleName(phylipfile)) + "dim" + toString(i) + ".stress.nmds";
201                         outputNames.push_back(outputFileName); outputTypes["nmds"].push_back(outputFileName);
202                         outputNames.push_back(stressFileName); outputTypes["stress"].push_back(stressFileName);
203                         
204                         ofstream out, out2;
205                         m->openOutputFile(outputFileName, out);
206                         m->openOutputFile(stressFileName, out2);
207                         
208                         out2.setf(ios::fixed, ios::floatfield);
209                         out2.setf(ios::showpoint);
210                         out.setf(ios::fixed, ios::floatfield);
211                         out.setf(ios::showpoint);
212                         
213                         out2 << "Iter\tStress\tCorr" << endl;
214                         
215                         for (int j = 0; j < iters; j++) {
216                                 m->mothurOut(toString(j+1)); m->mothurOutEndLine(); 
217                                 
218                                 //get configuration - either randomly generate or resize to this dimension
219                                 vector< vector<double> > thisConfig;
220                                 if (axesfile == "") {   thisConfig = generateStartingConfiguration(names.size(), i);            }
221                                 else                            {       thisConfig = getConfiguration(axes, i);                                                         }
222                                 if (m->control_pressed) { out.close(); out2.close(); for (int k = 0; k < outputNames.size(); k++) {     remove(outputNames[k].c_str()); } return 0; }
223                                 
224                                 //calc nmds for this dimension
225                                 double stress;
226                                 vector< vector<double> > endConfig = nmdsCalc(matrix, thisConfig, stress);
227                                 if (m->control_pressed) { out.close(); out2.close(); for (int k = 0; k < outputNames.size(); k++) {     remove(outputNames[k].c_str()); } return 0; }
228                                 
229                                 //calc euclid distances for new config
230                                 vector< vector<double> > newEuclid = linearCalc.calculateEuclidianDistance(endConfig);
231                                 if (m->control_pressed) { out.close(); out2.close(); for (int k = 0; k < outputNames.size(); k++) {     remove(outputNames[k].c_str()); } return 0; }
232                                 
233                                 //calc correlation between original distances and euclidean distances from this config
234                                 double corr = linearCalc.calcPearson(matrix, newEuclid);
235                                 corr *= corr;
236                                 if (m->control_pressed) { out.close(); out2.close(); for (int k = 0; k < outputNames.size(); k++) {     remove(outputNames[k].c_str()); } return 0; }
237                                 
238                                 //output results
239                                 out << "Config" << (j+1) << '\t';
240                                 for (int k = 0; k < i; k++) { out << "X" << (k+1) << '\t'; }
241                                 out << endl;
242                                 out2 << (j+1) << '\t' << stress << '\t' << corr << endl;
243                                 
244                                 output(endConfig, names, out);
245                                 
246                                 if (m->control_pressed) { out.close(); out2.close(); for (int k = 0; k < outputNames.size(); k++) {     remove(outputNames[k].c_str()); } return 0; }
247
248                         }
249                         
250                         out.close(); out2.close();
251                 }
252                 
253                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str()); } return 0; }
254                 
255                 m->mothurOutEndLine();
256                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
257                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
258                 m->mothurOutEndLine();
259                 
260                 return 0;
261         }
262         catch(exception& e) {
263                 m->errorOut(e, "NMDSCommand", "execute");
264                 exit(1);
265         }
266 }
267 //**********************************************************************************************************************
268 vector< vector<double> > NMDSCommand::nmdsCalc(vector< vector<double> >& matrix, vector< vector<double> >& config, double& stress1) {
269         try {
270                 
271                 vector< vector<double> > newConfig = config;
272                 
273                 //calc euclid distances
274                 vector< vector<double> > euclid = linearCalc.calculateEuclidianDistance(newConfig);
275                 if (m->control_pressed) { return newConfig; }           
276                 
277                 double stress2 = calculateStress(matrix, euclid);
278                 stress1 = stress2 + 1.0 + epsilon;
279                 
280                 int count = 0;
281                 while ((count < maxIters) && (abs(stress1 - stress2) > epsilon)) {
282                         count++;
283                         
284                         stress1 = stress2;
285                         
286                         if (m->control_pressed) { return newConfig; }
287                         
288                         vector< vector<double> > b; b.resize(euclid.size());
289                         for (int i = 0; i < b.size(); i++) { b[i].resize(euclid[i].size(), 0.0); }
290                         
291                         vector<double> columnSums; columnSums.resize(euclid.size(), 0.0);
292                         for (int i = 0; i < euclid.size(); i++) {
293                                 for (int j = 0; j < euclid[i].size(); j++) {
294                                         //eliminate divide by zero error
295                                         if (euclid[i][j] != 0) { 
296                                                 b[i][j] = matrix[i][j] / euclid[i][j];
297                                                 columnSums[j] += b[i][j];
298                                                 b[i][j] *= -1.0;
299                                         }
300                                 }
301                         }
302                         
303                         //put in diagonal sums
304                         for (int i = 0; i < euclid.size(); i++) {  b[i][i] = columnSums[i]; }
305                         
306                         int numInLowerTriangle = matrix.size() * (matrix.size()-1) / 2.0;
307                         double n = (1.0 + sqrt(1.0 + 8.0 * numInLowerTriangle)) / 2.0;
308                         
309                         //matrix mult
310                         newConfig = linearCalc.matrix_mult(newConfig, b);
311                         for (int i = 0; i < newConfig.size(); i++) {
312                                 for (int j = 0; j < newConfig[i].size(); j++) {
313                                         newConfig[i][j] *= (1.0 / n);
314                                 }
315                         }
316                         
317                         euclid = linearCalc.calculateEuclidianDistance(newConfig);
318                         
319                         stress2 = calculateStress(matrix, euclid);
320                 }
321                 
322                 return newConfig;
323         }
324         catch(exception& e) {
325                 m->errorOut(e, "NMDSCommand", "generateStartingConfiguration");
326                 exit(1);
327         }
328 }
329
330 //**********************************************************************************************************************
331 //generate random config
332 vector< vector<double> > NMDSCommand::generateStartingConfiguration(int numNames, int dimension) {
333         try {
334                 vector< vector<double> > axes;  axes.resize(dimension);
335                 for (int i = 0; i < axes.size(); i++) {  axes[i].resize(numNames); }
336                 
337                 //generate random number between -1 and 1, precision 6
338                 for (int i = 0; i < axes.size(); i++) {
339                         for (int j = 0; j < axes[i].size(); j++) {
340                                 
341                                 if (m->control_pressed) { return axes; }
342                                 
343                                 //generate random int between 0 and 99999
344                                 int myrand = (int)((float)(rand()) / ((RAND_MAX / 99998) + 1));
345                                 
346                                 //generate random sign
347                                 int mysign = (int)((float)(rand()) / ((RAND_MAX / 99998) + 1));
348                                 
349                                 //if mysign is even then sign = positive, else sign = negative
350                                 if ((mysign % 2) == 0) { mysign = 1.0; }
351                                 else { mysign = -1.0; }
352                                 
353                                 axes[i][j] = mysign * myrand / (float) 100000;
354                         }
355                 }
356
357                 return axes;
358         }
359         catch(exception& e) {
360                 m->errorOut(e, "NMDSCommand", "generateStartingConfiguration");
361                 exit(1);
362         }
363 }
364 //**********************************************************************************************************************
365 //normalize configuration
366 int NMDSCommand::normalizeConfiguration(vector< vector<double> >& axes, int numNames, int dimension) {
367         try {
368                 vector<double> averageAxes; averageAxes.resize(dimension, 0.0);
369                 
370                 //find average
371                 for (int i = 0; i < axes.size(); i++) {
372                         for (int j = 0; j < axes[i].size(); j++) {      averageAxes[i] += axes[i][j];   }
373                         
374                         averageAxes[i] /= (float) numNames;
375                 }
376                 
377                 //normalize axes
378                 double sumDenom = 0.0;
379                 for (int i = 0; i < axes.size(); i++) {
380                         for (int j = 0; j < axes[i].size(); j++) {
381                                 sumDenom += ((axes[i][j] - averageAxes[i]) * (axes[i][j] - averageAxes[i]));
382                         }
383                 }
384                 
385                 double denom = sqrt((sumDenom / (float) (axes.size() * numNames)));
386                 
387                 for (int i = 0; i < axes.size(); i++) {
388                         for (int j = 0; j < axes[i].size(); j++) {
389                                 axes[i][j] = (axes[i][j] - averageAxes[i]) / denom;
390                         }
391                 }
392                 
393                 return 0;
394         }
395         catch(exception& e) {
396                 m->errorOut(e, "NMDSCommand", "normalizeConfiguration");
397                 exit(1);
398         }
399 }
400 //**********************************************************************************************************************
401 //get configuration
402 vector< vector<double> > NMDSCommand::getConfiguration(vector< vector<double> >& axes, int dimension) {
403         try {
404                 vector< vector<double> > newAxes; newAxes.resize(dimension);
405                 
406                 for (int i = 0; i < dimension; i++) {
407                         newAxes[i] = axes[i];
408                 }
409                                 
410                 return newAxes;
411         }
412         catch(exception& e) {
413                 m->errorOut(e, "NMDSCommand", "getConfiguration");
414                 exit(1);
415         }
416 }
417 //**********************************************************************************************************************
418 //find raw stress, and normalize using
419 double NMDSCommand::calculateStress(vector< vector<double> >& matrix, vector< vector<double> >& config) {
420         try {
421                 double normStress = 0.0;
422                 double denom = 0.0;
423                 double rawStress = 0.0;
424                 
425                 //find raw stress
426                 for (int i = 0; i < matrix.size(); i++) {
427                         for (int j = 0; j < matrix[i].size(); j++) {
428                                 if (m->control_pressed) { return normStress; }
429                                 
430                                 rawStress += ((matrix[i][j] - config[i][j]) * (matrix[i][j] - config[i][j]));
431                                 denom += (config[i][j] * config[i][j]);
432                         }
433                 }
434                 
435                 //normalize stress
436                 if ((rawStress != 0.0) && (denom != 0.0)) {
437                         normStress = sqrt((rawStress / denom));
438                 }
439
440                 return normStress;
441         }
442         catch(exception& e) {
443                 m->errorOut(e, "NMDSCommand", "calculateStress");
444                 exit(1);
445         }
446 }
447
448 //**********************************************************************************************************************
449 int NMDSCommand::output(vector< vector<double> >& config, vector<string>& names, ofstream& out) {
450         try {
451                 
452                 for (int i = 0; i < names.size(); i++) {
453                         
454                         out << names[i] << '\t';
455                         
456                         for (int j = 0; j < config.size(); j++) {
457                                 out << config[j][i] << '\t';
458                         }
459                         
460                         out << endl;
461                 }
462                 
463                 out << endl << endl;
464                         
465                 return 0;
466         }
467         catch(exception& e) {
468                 m->errorOut(e, "NMDSCommand", "output");
469                 exit(1);
470         }
471 }
472 /*****************************************************************/
473 vector< vector<double> > NMDSCommand::readAxes(vector<string> names){
474         try {
475                 ifstream in;
476                 m->openInputFile(axesfile, in);
477                 
478                 string headerLine = m->getline(in); m->gobble(in);
479                 
480                 //count the number of axis you are reading
481                 bool done = false;
482                 int count = 0;
483                 while (!done) {
484                         int pos = headerLine.find("axis");
485                         if (pos != string::npos) {
486                                 count++;
487                                 headerLine = headerLine.substr(pos+4);
488                         }else { done = true; }
489                 }
490                 
491                 if (maxdim > count) { 
492                         m->mothurOut("You requested maxdim = " + toString(maxdim) + ", but your file only includes " + toString(count) + ". Using " + toString(count) + "."); m->mothurOutEndLine(); 
493                         maxdim = count; 
494                         if (maxdim < mindim) { m->mothurOut("Also adjusting mindim to " + toString(maxdim-1) + "."); m->mothurOutEndLine(); }
495                 }
496                 
497                 vector< vector<double> > axes;  axes.resize(maxdim);
498                 for (int i = 0; i < axes.size(); i++) { axes[i].resize(names.size(), 0.0); }
499                 
500                 map <string, vector<double> > orderedAxes;
501                 map     <string, vector<double> >::iterator it;
502                 
503                 while (!in.eof()) {
504                         
505                         if (m->control_pressed) { in.close(); return axes; }
506                         
507                         string group = "";
508                         in >> group; m->gobble(in);
509                         
510                         bool ignore = false;
511                         if (!m->inUsersGroups(group, names)) { ignore = true; m->mothurOut(group + " is in your axes file and not in your distance file, ignoring."); m->mothurOutEndLine(); }
512                         
513                         vector<double> thisGroupsAxes;
514                         for (int i = 0; i < count; i++) {
515                                 float temp = 0.0;
516                                 in >> temp; 
517                                 
518                                 //only save the axis we want
519                                 if (i < maxdim) {  thisGroupsAxes.push_back(temp); }
520                         }
521                         
522                         if (!ignore) {  orderedAxes[group] = thisGroupsAxes; }
523                         
524                         m->gobble(in);
525                 }
526                 in.close();
527                                 
528                 //sanity check
529                 if (names.size() != orderedAxes.size()) { m->mothurOut("[ERROR]: your axes file does not match your distance file, aborting."); m->mothurOutEndLine(); m->control_pressed = true; return axes; }
530                 
531                 //put axes info in same order as distance file, just in case
532                 for (int i = 0; i < names.size(); i++) {
533                         it = orderedAxes.find(names[i]);
534                         
535                         if (it != orderedAxes.end()) {
536                                 vector<double> thisGroupsAxes = it->second;
537                                 
538                                 for (int j = 0; j < thisGroupsAxes.size(); j++) {
539                                         axes[j][i] = thisGroupsAxes[j];
540                                 }
541                                 
542                         }else { m->mothurOut("[ERROR]: your axes file does not match your distance file, aborting."); m->mothurOutEndLine(); m->control_pressed = true; return axes; }
543                 }
544                 
545                 return axes;
546         }
547         catch(exception& e) {
548                 m->errorOut(e, "NMDSCommand", "readAxes");      
549                 exit(1);
550         }
551 }
552 /**********************************************************************************************************************/
553
554
555