]> git.donarmstrong.com Git - mothur.git/blob - corraxescommand.cpp
working on kendall
[mothur.git] / corraxescommand.cpp
1 /*
2  *  corraxescommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 12/22/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "corraxescommand.h"
11 #include "sharedutilities.h"
12
13 //********************************************************************************************************************
14 //sorts highest to lowest
15 inline bool compareSpearman(spearmanRank left, spearmanRank right){
16         return (left.score > right.score);      
17
18 //********************************************************************************************************************
19 //sorts lowest to highest
20 inline bool compareSpearmanReverse(spearmanRank left, spearmanRank right){
21         return (left.score < right.score);      
22
23 //**********************************************************************************************************************
24 vector<string> CorrAxesCommand::getValidParameters(){   
25         try {
26                 string Array[] =  {"axes","shared","relabund","numaxes","label","groups","method","metadata","outputdir","inputdir"};
27                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
28                 return myArray;
29         }
30         catch(exception& e) {
31                 m->errorOut(e, "CorrAxesCommand", "getValidParameters");
32                 exit(1);
33         }
34 }
35 //**********************************************************************************************************************
36 vector<string> CorrAxesCommand::getRequiredParameters(){        
37         try {
38                 string Array[] =  {"axes"};
39                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
40                 return myArray;
41         }
42         catch(exception& e) {
43                 m->errorOut(e, "CorrAxesCommand", "getRequiredParameters");
44                 exit(1);
45         }
46 }
47 //**********************************************************************************************************************
48 CorrAxesCommand::CorrAxesCommand(){     
49         try {
50                 abort = true;
51                 //initialize outputTypes
52                 vector<string> tempOutNames;
53                 outputTypes["corr.axes"] = tempOutNames;
54         }
55         catch(exception& e) {
56                 m->errorOut(e, "CorrAxesCommand", "CorrAxesCommand");
57                 exit(1);
58         }
59 }
60
61 //**********************************************************************************************************************
62 vector<string> CorrAxesCommand::getRequiredFiles(){     
63         try {
64                 vector<string> myArray;
65                 return myArray;
66         }
67         catch(exception& e) {
68                 m->errorOut(e, "CorrAxesCommand", "getRequiredFiles");
69                 exit(1);
70         }
71 }
72 //**********************************************************************************************************************
73 CorrAxesCommand::CorrAxesCommand(string option)  {
74         try {
75                 abort = false;
76                 globaldata = GlobalData::getInstance();
77                 
78                 //allow user to run help
79                 if(option == "help") { help(); abort = true; }
80                 
81                 else {
82                         //valid paramters for this command
83                         string Array[] =  {"axes","shared","relabund","numaxes","label","groups","method","metadata","outputdir","inputdir"};
84                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
85                         
86                         OptionParser parser(option);
87                         map<string, string> parameters = parser.getParameters();
88                         
89                         ValidParameters validParameter;
90                         map<string, string>::iterator it;
91                         
92                         //check to make sure all parameters are valid for command
93                         for (it = parameters.begin(); it != parameters.end(); it++) { 
94                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
95                         }
96                         
97                         vector<string> tempOutNames;
98                         outputTypes["corr.axes"] = tempOutNames;
99                         
100                         //if the user changes the input directory command factory will send this info to us in the output parameter 
101                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
102                         if (inputDir == "not found"){   inputDir = "";          }
103                         else {
104                                 string path;
105                                 it = parameters.find("axes");
106                                 //user has given a template file
107                                 if(it != parameters.end()){ 
108                                         path = m->hasPath(it->second);
109                                         //if the user has not given a path then, add inputdir. else leave path alone.
110                                         if (path == "") {       parameters["axes"] = inputDir + it->second;             }
111                                 }
112                                 
113                                 it = parameters.find("shared");
114                                 //user has given a template file
115                                 if(it != parameters.end()){ 
116                                         path = m->hasPath(it->second);
117                                         //if the user has not given a path then, add inputdir. else leave path alone.
118                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
119                                 }
120                                 
121                                 it = parameters.find("relabund");
122                                 //user has given a template file
123                                 if(it != parameters.end()){ 
124                                         path = m->hasPath(it->second);
125                                         //if the user has not given a path then, add inputdir. else leave path alone.
126                                         if (path == "") {       parameters["relabund"] = inputDir + it->second;         }
127                                 }
128                                 
129                                 it = parameters.find("metadata");
130                                 //user has given a template file
131                                 if(it != parameters.end()){ 
132                                         path = m->hasPath(it->second);
133                                         //if the user has not given a path then, add inputdir. else leave path alone.
134                                         if (path == "") {       parameters["metadata"] = inputDir + it->second;         }
135                                 }
136                         }
137                         
138                         
139                         //check for required parameters
140                         axesfile = validParameter.validFile(parameters, "axes", true);
141                         if (axesfile == "not open") { abort = true; }
142                         else if (axesfile == "not found") { axesfile = ""; m->mothurOut("axes is a required parameter for the corr.axes command."); m->mothurOutEndLine(); abort = true;  }     
143                         
144                         sharedfile = validParameter.validFile(parameters, "shared", true);
145                         if (sharedfile == "not open") { abort = true; }
146                         else if (sharedfile == "not found") { sharedfile = ""; }
147                         else { inputFileName = sharedfile; }
148                         
149                         relabundfile = validParameter.validFile(parameters, "relabund", true);
150                         if (relabundfile == "not open") { abort = true; }
151                         else if (relabundfile == "not found") { relabundfile = ""; }
152                         else { inputFileName = relabundfile; }
153                         
154                         metadatafile = validParameter.validFile(parameters, "metadata", true);
155                         if (metadatafile == "not open") { abort = true; }
156                         else if (metadatafile == "not found") { metadatafile = ""; }
157                         else { inputFileName = metadatafile; }
158                         
159                         groups = validParameter.validFile(parameters, "groups", false);                 
160                         if (groups == "not found") { groups = "";  pickedGroups = false;  }
161                         else { 
162                                 pickedGroups = true;
163                                 m->splitAtDash(groups, Groups); 
164                         }                       
165                         globaldata->Groups = Groups;
166                         
167                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(inputFileName);  }
168                         
169                         label = validParameter.validFile(parameters, "label", false);                   
170                         if (label == "not found") { label = ""; m->mothurOut("You did not provide a label, I will use the first label in your inputfile."); m->mothurOutEndLine(); label=""; }  
171                         
172                         if ((relabundfile == "") && (sharedfile == "") && (metadatafile == "")) { m->mothurOut("You must provide either a shared, relabund, or metadata file."); m->mothurOutEndLine(); abort = true;  }
173                         
174                         if (metadatafile != "") {
175                                 if ((relabundfile != "") || (sharedfile != "")) { m->mothurOut("You may only use one of the following : shared, relabund or metadata file."); m->mothurOutEndLine(); abort = true;  }
176                         }else {
177                                 if ((relabundfile != "") && (sharedfile != "")) { m->mothurOut("You may only use one of the following : shared, relabund or metadata file."); m->mothurOutEndLine(); abort = true;  }
178                         }
179                         string temp;
180                         temp = validParameter.validFile(parameters, "numaxes", false);          if (temp == "not found"){       temp = "3";                             }
181                         convert(temp, numaxes); 
182                         
183                         method = validParameter.validFile(parameters, "method", false);         if (method == "not found"){     method = "pearson";             }
184                         
185                         if ((method != "pearson") && (method != "spearman") && (method != "kendall")) { m->mothurOut(method + " is not a valid method. Valid methods are pearson, spearman, and kendall."); m->mothurOutEndLine(); abort = true; }
186                         
187                 }
188         }
189         catch(exception& e) {
190                 m->errorOut(e, "CorrAxesCommand", "CorrAxesCommand");           
191                 exit(1);
192         }
193 }
194 //**********************************************************************************************************************
195
196 void CorrAxesCommand::help(){
197         try {
198                 m->mothurOut("The corr.axes command reads a shared, relabund or metadata file as well as an axes file and calculates the correlation coefficient.\n");
199                 m->mothurOut("The corr.axes command parameters are shared, relabund, axes, metadata, groups, method, numaxes and label.  The shared, relabund or metadata and axes parameters are required.  If shared is given the relative abundance is calculated.\n");
200                 m->mothurOut("The groups parameter allows you to specify which of the groups you would like included. The group names are separated by dashes.\n");
201                 m->mothurOut("The label parameter allows you to select what distance level you would like used, if none is given the first distance is used.\n");
202                 m->mothurOut("The method parameter allows you to select what method you would like to use. Options are pearson, spearman and kendall. Default=pearson.\n");
203                 m->mothurOut("The numaxes parameter allows you to select the number of axes you would like to use. Default=3.\n");
204                 m->mothurOut("The corr.axes command should be in the following format: corr.axes(axes=yourPcoaFile, shared=yourSharedFile, method=yourMethod).\n");
205                 m->mothurOut("Example corr.axes(axes=genus.pool.thetayc.genus.lt.pcoa, shared=genus.pool.shared, method=kendall).\n");
206                 m->mothurOut("The corr.axes command outputs a .corr.axes file.\n");
207                 m->mothurOut("Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n\n");
208         }
209         catch(exception& e) {
210                 m->errorOut(e, "CorrAxesCommand", "help");      
211                 exit(1);
212         }
213 }
214
215 //**********************************************************************************************************************
216
217 CorrAxesCommand::~CorrAxesCommand(){}
218
219 //**********************************************************************************************************************
220
221 int CorrAxesCommand::execute(){
222         try {
223                 
224                 if (abort == true) { return 0; }
225                 
226                 /*************************************************************************************/
227                 // use smart distancing to get right sharedRabund and convert to relabund if needed  //
228                 /************************************************************************************/
229                 if (sharedfile != "") {  
230                         InputData* input = new InputData(sharedfile, "sharedfile");
231                         getSharedFloat(input); 
232                         delete input;
233                         
234                         if (m->control_pressed) {  for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } return 0; }
235                         if (lookupFloat[0] == NULL) { m->mothurOut("[ERROR] reading relabund file."); m->mothurOutEndLine(); return 0; }
236                         
237                 }else if (relabundfile != "") { 
238                         InputData* input = new InputData(relabundfile, "relabund");
239                         getSharedFloat(input); 
240                         delete input;
241                         
242                         if (m->control_pressed) {  for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } return 0; }
243                         if (lookupFloat[0] == NULL) { m->mothurOut("[ERROR] reading relabund file."); m->mothurOutEndLine(); return 0; }
244                         
245                 }else if (metadatafile != "") { 
246                         getMetadata();  //reads metadata file and store in lookupFloat, saves column headings in metadataLabels for later
247                         if (m->control_pressed) {  for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } return 0; }
248                         if (lookupFloat[0] == NULL) { m->mothurOut("[ERROR] reading metadata file."); m->mothurOutEndLine(); return 0; }
249                         
250                         if (pickedGroups) { eliminateZeroOTUS(lookupFloat); }
251                         
252                 }else { m->mothurOut("[ERROR]: no file given."); m->mothurOutEndLine(); return 0; }
253                 
254                 if (m->control_pressed) {  for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } return 0; }
255                 
256                 //this is for a sanity check to make sure the axes file and shared file match
257                 for (int i = 0; i < lookupFloat.size(); i++) { names.insert(lookupFloat[i]->getGroup()); }
258                 
259                 /*************************************************************************************/
260                 // read axes file and check for file mismatches with shared or relabund file         //
261                 /************************************************************************************/
262                 
263                 //read axes file
264                 map<string, vector<float> > axes = readAxes();
265                 
266                 if (m->control_pressed) {  for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } return 0; }
267                 
268                 //sanity check, the read only adds groups that are in the shared or relabund file, but we want to make sure the axes file isn't missing anyone
269                 if (axes.size() != lookupFloat.size()) { 
270                         map<string, vector<float> >::iterator it;
271                         for (int i = 0; i < lookupFloat.size(); i++) {
272                                 it = axes.find(lookupFloat[i]->getGroup());
273                                 if (it == axes.end()) { m->mothurOut(lookupFloat[i]->getGroup() + " is in your shared of relabund file but not in your axes file, please correct."); m->mothurOutEndLine(); }
274                         }
275                         m->control_pressed = true;
276                 }
277                 
278                 if (m->control_pressed) {  for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } return 0; }
279                 
280                 /*************************************************************************************/
281                 // calc the r values                                                                                                                            //
282                 /************************************************************************************/
283                 
284                 string outputFileName = outputDir + m->getRootName(m->getSimpleName(inputFileName)) + method + ".corr.axes";
285                 outputNames.push_back(outputFileName); outputTypes["corr.axes"].push_back(outputFileName);      
286                 ofstream out;
287                 m->openOutputFile(outputFileName, out);
288                 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
289                 
290                 //output headings
291                 if (metadatafile == "") {  out << "OTU";        }
292                 else {  out << "Feature";                                               }
293
294                 for (int i = 0; i < numaxes; i++) { out << '\t' << "axis" << (i+1); }
295                 out << "\tlength" << endl;
296                 
297                 if (method == "pearson")                {  calcPearson(axes, out);      }
298                 else if (method == "spearman")  {  calcSpearman(axes, out); }
299                 else if (method == "kendall")   {  calcKendall(axes, out);      }
300                 else { m->mothurOut("[ERROR]: Invalid method."); m->mothurOutEndLine(); }
301                 
302                 out.close();
303                 for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  }
304                 
305                 if (m->control_pressed) {  return 0; }
306
307                 m->mothurOutEndLine();
308                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
309                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
310                 m->mothurOutEndLine();
311                 
312                 return 0;
313         }
314         catch(exception& e) {
315                 m->errorOut(e, "CorrAxesCommand", "execute");   
316                 exit(1);
317         }
318 }
319 //**********************************************************************************************************************
320 int CorrAxesCommand::calcPearson(map<string, vector<float> >& axes, ofstream& out) {
321    try {
322            
323            //find average of each axis - X
324            vector<float> averageAxes; averageAxes.resize(numaxes, 0.0);
325            for (map<string, vector<float> >::iterator it = axes.begin(); it != axes.end(); it++) {
326                    vector<float> temp = it->second;
327                    for (int i = 0; i < temp.size(); i++) {
328                            averageAxes[i] += temp[i];  
329                    }
330            }
331            
332            for (int i = 0; i < averageAxes.size(); i++) {  averageAxes[i] = averageAxes[i] / (float) axes.size(); }
333            
334            //for each otu
335            for (int i = 0; i < lookupFloat[0]->getNumBins(); i++) {
336                    
337                    if (metadatafile == "") {  out << i+1;       }
338                    else {  out << metadataLabels[i];            }
339                                    
340                    //find the averages this otu - Y
341                    float sumOtu = 0.0;
342                    for (int j = 0; j < lookupFloat.size(); j++) {
343                            sumOtu += lookupFloat[j]->getAbundance(i);
344                    }
345                    float Ybar = sumOtu / (float) lookupFloat.size();
346                    
347                    vector<float> rValues(averageAxes.size());
348
349                    //find r value for each axis
350                    for (int k = 0; k < averageAxes.size(); k++) {
351                            
352                            double r = 0.0;
353                            double numerator = 0.0;
354                            double denomTerm1 = 0.0;
355                            double denomTerm2 = 0.0;
356                            for (int j = 0; j < lookupFloat.size(); j++) {
357                                    float Yi = lookupFloat[j]->getAbundance(i);
358                                    float Xi = axes[lookupFloat[j]->getGroup()][k];
359                                    
360                                    numerator += ((Xi - averageAxes[k]) * (Yi - Ybar));
361                                    denomTerm1 += ((Xi - averageAxes[k]) * (Xi - averageAxes[k]));
362                                    denomTerm2 += ((Yi - Ybar) * (Yi - Ybar));
363                            }
364                            
365                            double denom = (sqrt(denomTerm1) * sqrt(denomTerm2));
366                            
367                            r = numerator / denom;
368                            rValues[k] = r;
369                            out << '\t' << r; 
370                    }
371                    
372                    double sum = 0;
373                    for(int k=0;k<rValues.size();k++){
374                            sum += rValues[k] * rValues[k];
375                    }
376                    out << '\t' << sqrt(sum) << endl;
377            }
378                    
379            return 0;
380    }
381    catch(exception& e) {
382            m->errorOut(e, "CorrAxesCommand", "calcPearson");
383            exit(1);
384    }
385 }
386 //**********************************************************************************************************************
387 int CorrAxesCommand::calcSpearman(map<string, vector<float> >& axes, ofstream& out) {
388         try {
389                 
390                 /*axes.clear();
391                 axes["1a"].push_back(1);
392                 axes["1b"].push_back(1);
393                 axes["1c"].push_back(1);
394                 axes["2a"].push_back(2);
395                 axes["2b"].push_back(2);
396                 axes["2c"].push_back(2);
397                 axes["3a"].push_back(3);
398                 axes["3b"].push_back(3);
399                 axes["3c"].push_back(3);
400                 axes["4a"].push_back(4);
401                 axes["4b"].push_back(4);
402                 axes["4c"].push_back(4);
403                 axes["5a"].push_back(5);
404                 axes["5b"].push_back(5);
405                 axes["5c"].push_back(5);*/
406                 
407                 //format data
408                 vector< map<float, int> > tableX; tableX.resize(numaxes);
409                 map<float, int>::iterator itTable;
410                 vector< vector<spearmanRank> > scores; scores.resize(numaxes);
411                 for (map<string, vector<float> >::iterator it = axes.begin(); it != axes.end(); it++) {
412                         vector<float> temp = it->second;
413                         for (int i = 0; i < temp.size(); i++) {
414                                 spearmanRank member(it->first, temp[i]);
415                                 scores[i].push_back(member);  
416                                 
417                                 //count number of repeats
418                                 itTable = tableX[i].find(temp[i]);
419                                 if (itTable == tableX[i].end()) { 
420                                         tableX[i][temp[i]] = 1;
421                                 }else {
422                                         tableX[i][temp[i]]++;
423                                 }
424                         }
425                 }
426                 
427                 //calc LX
428                 //for each axis
429                 vector<double> Lx; Lx.resize(numaxes, 0.0);
430                 for (int i = 0; i < numaxes; i++) {
431                         for (itTable = tableX[i].begin(); itTable != tableX[i].end(); itTable++) {
432                                 double tx = (double) itTable->second;
433                                 Lx[i] += ((pow(tx, 3.0) - tx) / 12.0);
434                         }
435                 }
436                 
437                 //sort each axis
438                 for (int i = 0; i < numaxes; i++) {  sort(scores[i].begin(), scores[i].end(), compareSpearman); }
439                 
440                 //find ranks of xi in each axis
441                 map<string, vector<float> > rankAxes;
442                 for (int i = 0; i < numaxes; i++) {
443                         
444                         vector<spearmanRank> ties;
445                         int rankTotal = 0;
446                         for (int j = 0; j < scores[i].size(); j++) {
447                                 rankTotal += (j+1);
448                                 ties.push_back(scores[i][j]);
449                                 
450                                 if (j != (scores[i].size()-1)) { // you are not the last so you can look ahead
451                                         if (scores[i][j].score != scores[i][j+1].score) { // you are done with ties, rank them and continue
452
453                                                 for (int k = 0; k < ties.size(); k++) {
454                                                         float thisrank = rankTotal / (float) ties.size();
455                                                         rankAxes[ties[k].name].push_back(thisrank);
456                                                 }
457                                                 ties.clear();
458                                                 rankTotal = 0;
459                                         }
460                                 }else { // you are the last one
461                                         
462                                         for (int k = 0; k < ties.size(); k++) {
463                                                 float thisrank = rankTotal / (float) ties.size();
464                                                 rankAxes[ties[k].name].push_back(thisrank);
465                                                 
466                                         }
467                                 }
468                         }
469                 }
470                 
471                                 
472                 //for each otu
473                 for (int i = 0; i < lookupFloat[0]->getNumBins(); i++) {
474                         
475                         if (metadatafile == "") {  out << i+1;  }
476                         else {  out << metadataLabels[i];               }
477                         
478                         //find the ranks of this otu - Y
479                         vector<spearmanRank> otuScores;
480                         map<float, int> tableY;
481                         for (int j = 0; j < lookupFloat.size(); j++) {
482                                 spearmanRank member(lookupFloat[j]->getGroup(), lookupFloat[j]->getAbundance(i));
483                                 otuScores.push_back(member);
484                                 
485                                 itTable = tableY.find(member.score);
486                                 if (itTable == tableY.end()) { 
487                                         tableY[member.score] = 1;
488                                 }else {
489                                         tableY[member.score]++;
490                                 }
491                                 
492                         }
493                         
494                         //calc Ly
495                         double Ly = 0.0;
496                         for (itTable = tableY.begin(); itTable != tableY.end(); itTable++) {
497                                 double ty = (double) itTable->second;
498                                 Ly += ((pow(ty, 3.0) - ty) / 12.0);
499                         }
500                         
501                         sort(otuScores.begin(), otuScores.end(), compareSpearman);
502                         
503                         map<string, float> rankOtus;
504                         vector<spearmanRank> ties;
505                         int rankTotal = 0;
506                         for (int j = 0; j < otuScores.size(); j++) {
507                                 rankTotal += (j+1);
508                                 ties.push_back(otuScores[j]);
509                                 
510                                 if (j != (otuScores.size()-1)) { // you are not the last so you can look ahead
511                                         if (otuScores[j].score != otuScores[j+1].score) { // you are done with ties, rank them and continue
512                                                 
513                                                 for (int k = 0; k < ties.size(); k++) {
514                                                         float thisrank = rankTotal / (float) ties.size();
515                                                         rankOtus[ties[k].name] = thisrank;
516                                                 }
517                                                 ties.clear();
518                                                 rankTotal = 0;
519                                         }
520                                 }else { // you are the last one
521                                         
522                                         for (int k = 0; k < ties.size(); k++) {
523                                                 float thisrank = rankTotal / (float) ties.size();
524                                                 rankOtus[ties[k].name] = thisrank;
525                                         }
526                                 }
527                         }
528                         vector<double> pValues(numaxes);        
529
530                         //calc spearman ranks for each axis for this otu
531                         for (int j = 0; j < numaxes; j++) {
532                                 
533                                 double di = 0.0;
534                                 for (int k = 0; k < lookupFloat.size(); k++) {
535                                         
536                                         float xi = rankAxes[lookupFloat[k]->getGroup()][j];
537                                         float yi = rankOtus[lookupFloat[k]->getGroup()];
538                                         
539                                         di += ((xi - yi) * (xi - yi));
540                                 }
541                                 
542                                 double p = 0.0;
543                                 
544                                 double n = (double) lookupFloat.size();
545                                 
546                                 double SX2 = ((pow(n, 3.0) - n) / 12.0) - Lx[j];
547                                 double SY2 = ((pow(n, 3.0) - n) / 12.0) - Ly;
548                                 
549                                 p = (SX2 + SY2 - di) / (2.0 * sqrt((SX2*SY2)));
550                                 
551                                 out  << '\t' << p;
552                                 
553                                 pValues[j] = p;
554                         }
555
556                         double sum = 0;
557                         for(int k=0;k<numaxes;k++){
558                                 sum += pValues[k] * pValues[k];
559                         }
560                         out << '\t' << sqrt(sum) << endl;
561                 }
562                 
563                 return 0;
564         }
565         catch(exception& e) {
566                 m->errorOut(e, "CorrAxesCommand", "calcSpearman");
567                 exit(1);
568         }
569 }
570 //**********************************************************************************************************************
571 int CorrAxesCommand::calcKendall(map<string, vector<float> >& axes, ofstream& out) {
572         try {
573                 
574                 numaxes = 1;
575                 axes.clear();
576                  axes["1a"].push_back(1);
577                  axes["1b"].push_back(1);
578                  axes["1c"].push_back(1);
579                  axes["2a"].push_back(2);
580                  axes["2b"].push_back(2);
581                  axes["2c"].push_back(2);
582                  axes["3a"].push_back(3);
583                  axes["3b"].push_back(3);
584                  axes["3c"].push_back(3);
585                  axes["4a"].push_back(4);
586                  axes["4b"].push_back(4);
587                  axes["4c"].push_back(4);
588                  axes["5a"].push_back(5);
589                  axes["5b"].push_back(5);
590                  axes["5c"].push_back(5);
591                 
592                 
593                 //format data
594                 vector< vector<spearmanRank> > scores; scores.resize(numaxes);
595                 for (map<string, vector<float> >::iterator it = axes.begin(); it != axes.end(); it++) {
596                         vector<float> temp = it->second;
597                         for (int i = 0; i < temp.size(); i++) {
598                                 spearmanRank member(it->first, temp[i]);
599                                 scores[i].push_back(member);  
600                         }
601                 }
602                 
603                 //sort each axis
604                 for (int i = 0; i < numaxes; i++) {  sort(scores[i].begin(), scores[i].end(), compareSpearman); }
605                 
606                 //convert scores to ranks of xi in each axis
607                 for (int i = 0; i < numaxes; i++) {
608                         
609                         vector<spearmanRank*> ties;
610                         int rankTotal = 0;
611                         for (int j = 0; j < scores[i].size(); j++) {
612                                 rankTotal += (j+1);
613                                 ties.push_back(&(scores[i][j]));
614                                 
615                                 if (j != scores[i].size()-1) { // you are not the last so you can look ahead
616                                         if (scores[i][j].score != scores[i][j+1].score) { // you are done with ties, rank them and continue
617                                                 for (int k = 0; k < ties.size(); k++) {
618                                                         float thisrank = rankTotal / (float) ties.size();
619                                                         (*ties[k]).score = thisrank;
620                                                 }
621                                                 ties.clear();
622                                                 rankTotal = 0;
623                                         }
624                                 }else { // you are the last one
625                                         for (int k = 0; k < ties.size(); k++) {
626                                                 float thisrank = rankTotal / (float) ties.size();
627                                                 (*ties[k]).score = thisrank;
628                                         }
629                                 }
630                         }
631                 }
632                 cout << "axes ranks = ";
633                 for(int i = 0; i < scores[0].size(); i++) {  cout << scores[0][i].score << '\t'; }
634                 cout << endl;   
635                  lookupFloat.clear();
636                  lookupFloat.resize(15);
637                  for (int i = 0; i < lookupFloat.size(); i++) {
638                  lookupFloat[i] = new SharedRAbundFloatVector();
639                  }
640                  lookupFloat[0]->push_back(0.2288227, "1a");  lookupFloat[0]->setGroup("1a");
641                  lookupFloat[1]->push_back(0.7394062, "1b");  lookupFloat[1]->setGroup("1b");
642                  lookupFloat[2]->push_back(0.4521187, "1c");  lookupFloat[2]->setGroup("1c");
643                  lookupFloat[3]->push_back(0.1598630, "2a");  lookupFloat[3]->setGroup("2a");
644                  lookupFloat[4]->push_back(0.09588156, "2b");  lookupFloat[4]->setGroup("2b");
645                  
646                  lookupFloat[5]->push_back(0.933174, "2c");  lookupFloat[5]->setGroup("2c");
647                  lookupFloat[6]->push_back(0.3958304, "3a");  lookupFloat[6]->setGroup("3a");;
648                  lookupFloat[7]->push_back(0.2364419, "3b");  lookupFloat[7]->setGroup("3b");
649                  lookupFloat[8]->push_back(0.1697712, "3c");  lookupFloat[8]->setGroup("3c");
650                  lookupFloat[9]->push_back(0.4077173, "4a");  lookupFloat[9]->setGroup("4a");
651                  
652                  lookupFloat[10]->push_back(0.6116547, "4b");  lookupFloat[10]->setGroup("4b");
653                  lookupFloat[11]->push_back(0.9374322, "4c");  lookupFloat[11]->setGroup("4c");
654                  lookupFloat[12]->push_back(0.852184, "5a");  lookupFloat[12]->setGroup("5a");
655                  lookupFloat[13]->push_back(0.845094, "5b");  lookupFloat[13]->setGroup("5b");
656                  lookupFloat[14]->push_back(0.5795778, "5c");  lookupFloat[14]->setGroup("5c");
657                 
658                 
659                 //for each otu
660                 for (int i = 0; i < lookupFloat[0]->getNumBins(); i++) {
661                 
662                         if (metadatafile == "") {  out << i+1;  }
663                         else {  out << metadataLabels[i];               }
664                         
665                         //find the ranks of this otu - Y
666                         vector<spearmanRank> otuScores;
667                         for (int j = 0; j < lookupFloat.size(); j++) {
668                                 spearmanRank member(lookupFloat[j]->getGroup(), lookupFloat[j]->getAbundance(i));
669                                 otuScores.push_back(member);
670                         }
671                                                 
672                         sort(otuScores.begin(), otuScores.end(), compareSpearman);
673                         
674                         map<string, float> rankOtus;
675                         vector<spearmanRank> ties;
676                         int rankTotal = 0;
677                         for (int j = 0; j < otuScores.size(); j++) {
678                                 rankTotal += (j+1);
679                                 ties.push_back(otuScores[j]);
680                                 
681                                 if (j != otuScores.size()-1) { // you are not the last so you can look ahead
682                                         if (otuScores[j].score != otuScores[j+1].score) { // you are done with ties, rank them and continue
683                                                 for (int k = 0; k < ties.size(); k++) {
684                                                         float thisrank = rankTotal / (float) ties.size();
685                                                         rankOtus[ties[k].name] = thisrank;
686                                                 }
687                                                 ties.clear();
688                                                 rankTotal = 0;
689                                         }
690                                 }else { // you are the last one
691                                         for (int k = 0; k < ties.size(); k++) {
692                                                 float thisrank = rankTotal / (float) ties.size();
693                                                 rankOtus[ties[k].name] = thisrank;
694                                         }
695                                 }
696                         }
697                         
698                         
699                         vector<double> pValues(numaxes);
700                         
701                         //calc spearman ranks for each axis for this otu
702                         for (int j = 0; j < numaxes; j++) {
703                         
704                                 int numCoor = 0;
705                                 int numDisCoor = 0;
706                                 
707                                 //assemble otus ranks in same order as axis ranks, if ties, put in the best order possible
708                                 //NOTE: after this ordering the scores[j] indexes may not match the otus indexes within tied sections
709                                 //since we do not use the axes ranks except to order the otu ranks, I did not take the time to reorder the axes ranks
710                                 vector<spearmanRank> otus; 
711                                 vector<spearmanRank> otusTemp;
712                                 for (int l = 0; l < scores[j].size(); l++) {   
713                                         spearmanRank member(scores[j][l].name, rankOtus[scores[j][l].name]);
714                                         otusTemp.push_back(member);
715                                         
716                                         if (l != (scores[j].size()-1)) { // you are not the last so you can look ahead
717                                                 if (scores[j][l].score != scores[j][l+1].score) { // you are done with ties, order them and continue
718                                                         //order otus within tied section in the best way possible to make coor pairs
719                                                         sort(otusTemp.begin(), otusTemp.end(), compareSpearmanReverse);
720                                                         
721                                                         //save order in otus
722                                                         for (int h = 0; h < otusTemp.size(); h++) { ; otus.push_back(otusTemp[h]); }
723                                                         
724                                                         otusTemp.clear();
725                                                 }
726                                         }else { // you are the last one
727                                                 //order otus within tied section in the best way possible to make coor pairs
728                                                 sort(otusTemp.begin(), otusTemp.end(), compareSpearmanReverse);
729                                         
730                                                 //save order in otus
731                                                 for (int h = 0; h < otusTemp.size(); h++) {  otus.push_back(otusTemp[h]); }
732                                         }
733                                 }
734                                 
735                                 cout << "otu ranks = ";
736                                 for (int h = 0; h < otus.size(); h++ ) { cout << otus[h].score << '\t'; }
737                                 cout << endl;
738                                 
739                                 int count = 0;
740                                 for (int l = 0; l < scores[j].size(); l++) {
741                                         
742                                         int numWithHigherRank = 0;
743                                         int numWithLowerRank = 0;
744                                         float thisrank = otus[l].score;
745                                         
746                                         for (int u = l; u < scores[j].size(); u++) {
747                                                 if (otus[u].score > thisrank) { numWithHigherRank++; }
748                                                 else if (otus[u].score < thisrank) { numWithLowerRank++; }
749                                                 count++;
750                                         }
751                                         
752                                         numCoor += numWithHigherRank;
753                                         numDisCoor += numWithLowerRank;
754                                 }
755                                 
756                                 //sample size
757                                 //int n = lookupFloat.size();
758                                 //comparing to yourself
759                                 count -= lookupFloat.size();
760                                 
761                                 //double p = ( (4 * P) / (float) ((n * (n - 1)) - numTies) ) - 1.0;
762                                 double p = (numCoor - numDisCoor) / (float) count;
763                                 cout << "numCoor = " << numCoor << " numDisCoor = " << numDisCoor << " p = " << p << " count = " << count << endl;
764                                 out << '\t' << p;
765                                 pValues[j] = p;
766
767                         }
768                         
769                         double sum = 0;
770                         for(int k=0;k<numaxes;k++){
771                                 sum += pValues[k] * pValues[k];
772                         }
773                         out << '\t' << sqrt(sum) << endl;
774                 }
775                 
776                 return 0;
777         }
778         catch(exception& e) {
779                 m->errorOut(e, "CorrAxesCommand", "calcKendall");
780                 exit(1);
781         }
782 }
783 //**********************************************************************************************************************
784 int CorrAxesCommand::getSharedFloat(InputData* input){
785         try {
786                 lookupFloat = input->getSharedRAbundFloatVectors();
787                 string lastLabel = lookupFloat[0]->getLabel();
788                 
789                 if (label == "") { label = lastLabel;  return 0; }
790                 
791                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
792                 set<string> labels; labels.insert(label);
793                 set<string> processedLabels;
794                 set<string> userLabels = labels;
795                 
796                 //as long as you are not at the end of the file or done wih the lines you want
797                 while((lookupFloat[0] != NULL) && (userLabels.size() != 0)) {
798                         
799                         if (m->control_pressed) {  return 0;  }
800                         
801                         if(labels.count(lookupFloat[0]->getLabel()) == 1){
802                                 processedLabels.insert(lookupFloat[0]->getLabel());
803                                 userLabels.erase(lookupFloat[0]->getLabel());
804                                 break;
805                         }
806                         
807                         if ((m->anyLabelsToProcess(lookupFloat[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
808                                 string saveLabel = lookupFloat[0]->getLabel();
809                                 
810                                 for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } 
811                                 lookupFloat = input->getSharedRAbundFloatVectors(lastLabel);
812                                 
813                                 processedLabels.insert(lookupFloat[0]->getLabel());
814                                 userLabels.erase(lookupFloat[0]->getLabel());
815                                 
816                                 //restore real lastlabel to save below
817                                 lookupFloat[0]->setLabel(saveLabel);
818                                 break;
819                         }
820                         
821                         lastLabel = lookupFloat[0]->getLabel();                 
822                         
823                         //get next line to process
824                         //prevent memory leak
825                         for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } 
826                         lookupFloat = input->getSharedRAbundFloatVectors();
827                 }
828                 
829                 
830                 if (m->control_pressed) { return 0;  }
831                 
832                 //output error messages about any remaining user labels
833                 set<string>::iterator it;
834                 bool needToRun = false;
835                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
836                         m->mothurOut("Your file does not include the label " + *it); 
837                         if (processedLabels.count(lastLabel) != 1) {
838                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
839                                 needToRun = true;
840                         }else {
841                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
842                         }
843                 }
844                 
845                 //run last label if you need to
846                 if (needToRun == true)  {
847                         for (int i = 0; i < lookupFloat.size(); i++) {  if (lookupFloat[i] != NULL) {   delete lookupFloat[i];  } } 
848                         lookupFloat = input->getSharedRAbundFloatVectors(lastLabel);
849                 }       
850                 
851                 return 0;
852         }
853         catch(exception& e) {
854                 m->errorOut(e, "CorrAxesCommand", "getSharedFloat");    
855                 exit(1);
856         }
857 }
858 //**********************************************************************************************************************
859 int CorrAxesCommand::eliminateZeroOTUS(vector<SharedRAbundFloatVector*>& thislookup) {
860         try {
861                 
862                 vector<SharedRAbundFloatVector*> newLookup;
863                 for (int i = 0; i < thislookup.size(); i++) {
864                         SharedRAbundFloatVector* temp = new SharedRAbundFloatVector();
865                         temp->setLabel(thislookup[i]->getLabel());
866                         temp->setGroup(thislookup[i]->getGroup());
867                         newLookup.push_back(temp);
868                 }
869                 
870                 //for each bin
871                 for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
872                         if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
873                         
874                         //look at each sharedRabund and make sure they are not all zero
875                         bool allZero = true;
876                         for (int j = 0; j < thislookup.size(); j++) {
877                                 if (thislookup[j]->getAbundance(i) != 0) { allZero = false;  break;  }
878                         }
879                         
880                         //if they are not all zero add this bin
881                         if (!allZero) {
882                                 for (int j = 0; j < thislookup.size(); j++) {
883                                         newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
884                                 }
885                         }
886                 }
887                 
888                 for (int j = 0; j < thislookup.size(); j++) {  delete thislookup[j];  }
889                 
890                 thislookup = newLookup;
891                 
892                 return 0;
893                 
894         }
895         catch(exception& e) {
896                 m->errorOut(e, "CorrAxesCommand", "eliminateZeroOTUS");
897                 exit(1);
898         }
899 }
900 /*****************************************************************/
901 map<string, vector<float> > CorrAxesCommand::readAxes(){
902         try {
903                 map<string, vector<float> > axes;
904                 
905                 ifstream in;
906                 m->openInputFile(axesfile, in);
907                 
908                 string headerLine = m->getline(in); m->gobble(in);
909                 
910                 //count the number of axis you are reading
911                 bool done = false;
912                 int count = 0;
913                 while (!done) {
914                         int pos = headerLine.find("axis");
915                         if (pos != string::npos) {
916                                 count++;
917                                 headerLine = headerLine.substr(pos+4);
918                         }else { done = true; }
919                 }
920                 
921                 if (numaxes > count) { m->mothurOut("You requested " + toString(numaxes) + " axes, but your file only includes " + toString(count) + ". Using " + toString(count) + "."); m->mothurOutEndLine(); numaxes = count; }
922                 
923                 while (!in.eof()) {
924                         
925                         if (m->control_pressed) { in.close(); return axes; }
926                         
927                         string group = "";
928                         in >> group; m->gobble(in);
929                         
930                         vector<float> thisGroupsAxes;
931                         for (int i = 0; i < count; i++) {
932                                 float temp = 0.0;
933                                 in >> temp; 
934                                 
935                                 //only save the axis we want
936                                 if (i < numaxes) {  thisGroupsAxes.push_back(temp); }
937                         }
938                         
939                         //save group if its one the user selected
940                         if (names.count(group) != 0) {
941                                 map<string, vector<float> >::iterator it = axes.find(group);
942                                 
943                                 if (it == axes.end()) {
944                                         axes[group] = thisGroupsAxes;
945                                 }else {
946                                         m->mothurOut(group + " is already in your axes file, using first definition."); m->mothurOutEndLine();
947                                 }
948                         }
949                         
950                         m->gobble(in);
951                 }
952                 in.close();
953                 
954                 return axes;
955         }
956         catch(exception& e) {
957                 m->errorOut(e, "CorrAxesCommand", "readAxes");  
958                 exit(1);
959         }
960 }
961 /*****************************************************************/
962 int CorrAxesCommand::getMetadata(){
963         try {
964                 vector<string> groupNames;
965                 
966                 ifstream in;
967                 m->openInputFile(axesfile, in);
968                 
969                 string headerLine = m->getline(in); m->gobble(in);
970                 istringstream iss (headerLine,istringstream::in);
971                 
972                 //read the first label, because it refers to the groups
973                 string columnLabel;
974                 iss >> columnLabel; m->gobble(iss); 
975                 
976                 //save names of columns you are reading
977                 while (!iss.eof()) {
978                         iss >> columnLabel; m->gobble(iss);
979                         metadataLabels.push_back(columnLabel);
980                 }
981                 int count = metadataLabels.size();
982                 
983                 //read rest of file
984                 while (!in.eof()) {
985                         
986                         if (m->control_pressed) { in.close(); return 0; }
987                         
988                         string group = "";
989                         in >> group; m->gobble(in);
990                         groupNames.push_back(group);
991                         
992                         SharedRAbundFloatVector* tempLookup = new SharedRAbundFloatVector();
993                         tempLookup->setGroup(group);
994                         tempLookup->setLabel("1");
995                         
996                         for (int i = 0; i < count; i++) {
997                                 float temp = 0.0;
998                                 in >> temp; 
999                                 
1000                                 tempLookup->push_back(temp, group);
1001                         }
1002                         
1003                         lookupFloat.push_back(tempLookup);
1004                         
1005                         m->gobble(in);
1006                 }
1007                 in.close();
1008                 
1009                 //remove any groups the user does not want, and set globaldata->groups with only valid groups
1010                 SharedUtil* util;
1011                 util = new SharedUtil();
1012                 
1013                 util->setGroups(globaldata->Groups, groupNames);
1014                 
1015                 for (int i = 0; i < lookupFloat.size(); i++) {
1016                         //if this sharedrabund is not from a group the user wants then delete it.
1017                         if (util->isValidGroup(lookupFloat[i]->getGroup(), globaldata->Groups) == false) { 
1018                                 delete lookupFloat[i]; lookupFloat[i] = NULL;
1019                                 lookupFloat.erase(lookupFloat.begin()+i); 
1020                                 i--; 
1021                         }
1022                 }
1023                 
1024                 delete util;
1025                 
1026                 return 0;
1027         }
1028         catch(exception& e) {
1029                 m->errorOut(e, "CorrAxesCommand", "getMetadata");       
1030                 exit(1);
1031         }
1032 }
1033 /*****************************************************************/
1034
1035
1036
1037
1038
1039