]> git.donarmstrong.com Git - mothur.git/blob - corraxescommand.cpp
changes while testing
[mothur.git] / corraxescommand.cpp
1 /*
2  *  corraxescommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 12/22/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "corraxescommand.h"
11 #include "sharedutilities.h"
12 #include "linearalgebra.h"
13
14 //**********************************************************************************************************************
15 vector<string> CorrAxesCommand::setParameters(){        
16         try {
17                 CommandParameter paxes("axes", "InputTypes", "", "", "none", "none", "none","corraxes",false,true,true); parameters.push_back(paxes);
18                 CommandParameter pshared("shared", "InputTypes", "", "", "SharedRelMeta", "SharedRelMeta", "none","",false,false,true); parameters.push_back(pshared);
19                 CommandParameter prelabund("relabund", "InputTypes", "", "", "SharedRelMeta", "SharedRelMeta", "none","",false,false,true); parameters.push_back(prelabund);
20                 CommandParameter pmetadata("metadata", "InputTypes", "", "", "SharedRelMeta", "SharedRelMeta", "none","",false,false); parameters.push_back(pmetadata);
21                 CommandParameter pnumaxes("numaxes", "Number", "", "3", "", "", "","",false,false); parameters.push_back(pnumaxes);
22                 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
23                 CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
24                 CommandParameter pmethod("method", "Multiple", "pearson-spearman-kendall", "pearson", "", "", "","",false,false); parameters.push_back(pmethod);
25                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
26                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
27                 
28                 vector<string> myArray;
29                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
30                 return myArray;
31         }
32         catch(exception& e) {
33                 m->errorOut(e, "CorrAxesCommand", "setParameters");
34                 exit(1);
35         }
36 }
37 //**********************************************************************************************************************
38 string CorrAxesCommand::getHelpString(){        
39         try {
40                 string helpString = "";
41                 helpString += "The corr.axes command reads a shared, relabund or metadata file as well as an axes file and calculates the correlation coefficient.\n";
42                 helpString += "The corr.axes command parameters are shared, relabund, axes, metadata, groups, method, numaxes and label.  The shared, relabund or metadata and axes parameters are required.  If shared is given the relative abundance is calculated.\n";
43                 helpString += "The groups parameter allows you to specify which of the groups you would like included. The group names are separated by dashes.\n";
44                 helpString += "The label parameter allows you to select what distance level you would like used, if none is given the first distance is used.\n";
45                 helpString += "The method parameter allows you to select what method you would like to use. Options are pearson, spearman and kendall. Default=pearson.\n";
46                 helpString += "The numaxes parameter allows you to select the number of axes you would like to use. Default=3.\n";
47                 helpString += "The corr.axes command should be in the following format: corr.axes(axes=yourPcoaFile, shared=yourSharedFile, method=yourMethod).\n";
48                 helpString += "Example corr.axes(axes=genus.pool.thetayc.genus.lt.pcoa, shared=genus.pool.shared, method=kendall).\n";
49                 helpString += "The corr.axes command outputs a .corr.axes file.\n";
50                 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
51                 return helpString;
52         }
53         catch(exception& e) {
54                 m->errorOut(e, "CorrAxesCommand", "getHelpString");
55                 exit(1);
56         }
57 }
58 //**********************************************************************************************************************
59 string CorrAxesCommand::getOutputPattern(string type) {
60     try {
61         string pattern = "";
62         
63         if (type == "corraxes") {  pattern = "[filename],[tag],corr.axes"; }
64         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
65         
66         return pattern;
67     }
68     catch(exception& e) {
69         m->errorOut(e, "CorrAxesCommand", "getOutputPattern");
70         exit(1);
71     }
72 }
73
74 //**********************************************************************************************************************
75 CorrAxesCommand::CorrAxesCommand(){     
76         try {
77                 abort = true; calledHelp = true; 
78                 setParameters();
79                 vector<string> tempOutNames;
80                 outputTypes["corraxes"] = tempOutNames;
81         }
82         catch(exception& e) {
83                 m->errorOut(e, "CorrAxesCommand", "CorrAxesCommand");
84                 exit(1);
85         }
86 }
87 //**********************************************************************************************************************
88 CorrAxesCommand::CorrAxesCommand(string option)  {
89         try {
90                 abort = false; calledHelp = false;   
91                 
92                 //allow user to run help
93                 if(option == "help") { help(); abort = true; calledHelp = true; }
94                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
95                 
96                 else {
97                         vector<string> myArray = setParameters();
98                         
99                         OptionParser parser(option);
100                         map<string, string> parameters = parser.getParameters();
101                         
102                         ValidParameters validParameter;
103                         map<string, string>::iterator it;
104                         
105                         //check to make sure all parameters are valid for command
106                         for (it = parameters.begin(); it != parameters.end(); it++) { 
107                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
108                         }
109                         
110                         vector<string> tempOutNames;
111                         outputTypes["corraxes"] = tempOutNames;
112                         
113                         //if the user changes the input directory command factory will send this info to us in the output parameter 
114                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
115                         if (inputDir == "not found"){   inputDir = "";          }
116                         else {
117                                 string path;
118                                 it = parameters.find("axes");
119                                 //user has given a template file
120                                 if(it != parameters.end()){ 
121                                         path = m->hasPath(it->second);
122                                         //if the user has not given a path then, add inputdir. else leave path alone.
123                                         if (path == "") {       parameters["axes"] = inputDir + it->second;             }
124                                 }
125                                 
126                                 it = parameters.find("shared");
127                                 //user has given a template file
128                                 if(it != parameters.end()){ 
129                                         path = m->hasPath(it->second);
130                                         //if the user has not given a path then, add inputdir. else leave path alone.
131                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
132                                 }
133                                 
134                                 it = parameters.find("relabund");
135                                 //user has given a template file
136                                 if(it != parameters.end()){ 
137                                         path = m->hasPath(it->second);
138                                         //if the user has not given a path then, add inputdir. else leave path alone.
139                                         if (path == "") {       parameters["relabund"] = inputDir + it->second;         }
140                                 }
141                                 
142                                 it = parameters.find("metadata");
143                                 //user has given a template file
144                                 if(it != parameters.end()){ 
145                                         path = m->hasPath(it->second);
146                                         //if the user has not given a path then, add inputdir. else leave path alone.
147                                         if (path == "") {       parameters["metadata"] = inputDir + it->second;         }
148                                 }
149                         }
150                         
151                         
152                         //check for required parameters
153                         axesfile = validParameter.validFile(parameters, "axes", true);
154                         if (axesfile == "not open") { abort = true; }
155                         else if (axesfile == "not found") { axesfile = ""; m->mothurOut("axes is a required parameter for the corr.axes command."); m->mothurOutEndLine(); abort = true;  }     
156                         
157                         sharedfile = validParameter.validFile(parameters, "shared", true);
158                         if (sharedfile == "not open") { abort = true; }
159                         else if (sharedfile == "not found") { sharedfile = ""; }
160                         else { inputFileName = sharedfile; m->setSharedFile(sharedfile); }
161                         
162                         relabundfile = validParameter.validFile(parameters, "relabund", true);
163                         if (relabundfile == "not open") { abort = true; }
164                         else if (relabundfile == "not found") { relabundfile = ""; }
165                         else { inputFileName = relabundfile; m->setRelAbundFile(relabundfile); }
166                         
167                         metadatafile = validParameter.validFile(parameters, "metadata", true);
168                         if (metadatafile == "not open") { abort = true; }
169                         else if (metadatafile == "not found") { metadatafile = ""; }
170                         else { inputFileName = metadatafile; }
171                         
172                         groups = validParameter.validFile(parameters, "groups", false);                 
173                         if (groups == "not found") { groups = "";  pickedGroups = false;  }
174                         else { 
175                                 pickedGroups = true;
176                                 m->splitAtDash(groups, Groups); 
177                         }                       
178                         m->setGroups(Groups);
179                         
180                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(inputFileName);  }
181                         
182                         label = validParameter.validFile(parameters, "label", false);                   
183                         if (label == "not found") { label = ""; m->mothurOut("You did not provide a label, I will use the first label in your inputfile."); m->mothurOutEndLine(); label=""; }  
184                         
185                         if ((relabundfile == "") && (sharedfile == "") && (metadatafile == "")) { 
186                                 //is there are current file available for any of these?
187                                 //give priority to shared, then relabund
188                                 //if there is a current shared file, use it
189                                 sharedfile = m->getSharedFile(); 
190                                 if (sharedfile != "") { inputFileName = sharedfile; m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
191                                 else { 
192                                         relabundfile = m->getRelAbundFile(); 
193                                         if (relabundfile != "") { inputFileName = relabundfile;  m->mothurOut("Using " + relabundfile + " as input file for the relabund parameter."); m->mothurOutEndLine(); }
194                                         else { 
195                                                 m->mothurOut("You must provide either a shared, relabund, or metadata file."); m->mothurOutEndLine(); abort = true; 
196                                         }
197                                 }
198                         }       
199                         
200                         if (metadatafile != "") {
201                                 if ((relabundfile != "") || (sharedfile != "")) { m->mothurOut("You may only use one of the following : shared, relabund or metadata file."); m->mothurOutEndLine(); abort = true;  }
202                         }else {
203                                 if ((relabundfile != "") && (sharedfile != "")) { m->mothurOut("You may only use one of the following : shared, relabund or metadata file."); m->mothurOutEndLine(); abort = true;  }
204                         }
205                         string temp;
206                         temp = validParameter.validFile(parameters, "numaxes", false);          if (temp == "not found"){       temp = "3";                             }
207                         m->mothurConvert(temp, numaxes); 
208                         
209                         method = validParameter.validFile(parameters, "method", false);         if (method == "not found"){     method = "pearson";             }
210                         
211                         if ((method != "pearson") && (method != "spearman") && (method != "kendall")) { m->mothurOut(method + " is not a valid method. Valid methods are pearson, spearman, and kendall."); m->mothurOutEndLine(); abort = true; }
212                         
213                 }
214         }
215         catch(exception& e) {
216                 m->errorOut(e, "CorrAxesCommand", "CorrAxesCommand");           
217                 exit(1);
218         }
219 }
220 //**********************************************************************************************************************
221
222 int CorrAxesCommand::execute(){
223         try {
224                 
225                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
226                 
227                 /*************************************************************************************/
228                 // use smart distancing to get right sharedRabund and convert to relabund if needed  //
229                 /************************************************************************************/
230                 if (sharedfile != "") {  
231                         InputData* input = new InputData(sharedfile, "sharedfile");
232                         getSharedFloat(input); 
233                         delete input;
234                         
235                         if (m->control_pressed) {  for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } return 0; }
236                         if (lookupFloat[0] == NULL) { m->mothurOut("[ERROR] reading relabund file."); m->mothurOutEndLine(); return 0; }
237                         
238                 }else if (relabundfile != "") { 
239                         InputData* input = new InputData(relabundfile, "relabund");
240                         getSharedFloat(input); 
241                         delete input;
242                         
243                         if (m->control_pressed) {  for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } return 0; }
244                         if (lookupFloat[0] == NULL) { m->mothurOut("[ERROR] reading relabund file."); m->mothurOutEndLine(); return 0; }
245                         
246                 }else if (metadatafile != "") { 
247                         getMetadata();  //reads metadata file and store in lookupFloat, saves column headings in metadataLabels for later
248                         if (m->control_pressed) {  for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } return 0; }
249                         if (lookupFloat[0] == NULL) { m->mothurOut("[ERROR] reading metadata file."); m->mothurOutEndLine(); return 0; }
250                         
251                         if (pickedGroups) { eliminateZeroOTUS(lookupFloat); }
252                         
253                 }else { m->mothurOut("[ERROR]: no file given."); m->mothurOutEndLine(); return 0; }
254                 
255                 if (m->control_pressed) {  for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } return 0; }
256                 
257                 //this is for a sanity check to make sure the axes file and shared file match
258                 for (int i = 0; i < lookupFloat.size(); i++) { names.insert(lookupFloat[i]->getGroup()); }
259                 
260                 /*************************************************************************************/
261                 // read axes file and check for file mismatches with shared or relabund file         //
262                 /************************************************************************************/
263                 
264                 //read axes file
265                 map<string, vector<float> > axes = readAxes();
266                 
267                 if (m->control_pressed) {  for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } return 0; }
268                 
269                 //sanity check, the read only adds groups that are in the shared or relabund file, but we want to make sure the axes file isn't missing anyone
270                 if (axes.size() != lookupFloat.size()) { 
271                         map<string, vector<float> >::iterator it;
272                         for (int i = 0; i < lookupFloat.size(); i++) {
273                                 it = axes.find(lookupFloat[i]->getGroup());
274                                 if (it == axes.end()) { m->mothurOut(lookupFloat[i]->getGroup() + " is in your shared of relabund file but not in your axes file, please correct."); m->mothurOutEndLine(); }
275                         }
276                         m->control_pressed = true;
277                 }
278                 
279                 if (m->control_pressed) {  for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } return 0; }
280                 
281                 /*************************************************************************************/
282                 // calc the r values                                                                                                                            //
283                 /************************************************************************************/
284         map<string, string> variables; 
285         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(inputFileName));
286         variables["[tag]"] = method;
287                 string outputFileName = getOutputFileName("corraxes", variables);
288                 outputNames.push_back(outputFileName); outputTypes["corraxes"].push_back(outputFileName);       
289                 ofstream out;
290                 m->openOutputFile(outputFileName, out);
291                 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
292                 
293                 //output headings
294                 if (metadatafile == "") {  out << "OTU";        }
295                 else {  out << "Feature";                                               }
296
297                 for (int i = 0; i < numaxes; i++) { out << '\t' << "axis" << (i+1) << "\tp-value"; }
298                 out << "\tlength" << endl;
299                 
300                 if (method == "pearson")                {  calcPearson(axes, out);      }
301                 else if (method == "spearman")  {  calcSpearman(axes, out); }
302                 else if (method == "kendall")   {  calcKendall(axes, out);      }
303                 else { m->mothurOut("[ERROR]: Invalid method."); m->mothurOutEndLine(); }
304                 
305                 out.close();
306                 for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  }
307                 
308                 if (m->control_pressed) {  return 0; }
309
310                 m->mothurOutEndLine();
311                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
312                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
313                 m->mothurOutEndLine();
314                 
315                 return 0;
316         }
317         catch(exception& e) {
318                 m->errorOut(e, "CorrAxesCommand", "execute");   
319                 exit(1);
320         }
321 }
322 //**********************************************************************************************************************
323 int CorrAxesCommand::calcPearson(map<string, vector<float> >& axes, ofstream& out) {
324    try {
325            
326        LinearAlgebra linear;
327        
328            //find average of each axis - X
329            vector<float> averageAxes; averageAxes.resize(numaxes, 0.0);
330            for (map<string, vector<float> >::iterator it = axes.begin(); it != axes.end(); it++) {
331                    vector<float> temp = it->second;
332                    for (int i = 0; i < temp.size(); i++) {
333                            averageAxes[i] += temp[i];  
334                    }
335            }
336            
337            for (int i = 0; i < averageAxes.size(); i++) {  averageAxes[i] = averageAxes[i] / (float) axes.size(); }
338            
339            //for each otu
340            for (int i = 0; i < lookupFloat[0]->getNumBins(); i++) {
341                    
342                    if (metadatafile == "") {  out << m->currentBinLabels[i];    }
343                    else {  out << metadataLabels[i];            }
344                                    
345                    //find the averages this otu - Y
346                    float sumOtu = 0.0;
347                    for (int j = 0; j < lookupFloat.size(); j++) {
348                            sumOtu += lookupFloat[j]->getAbundance(i);
349                    }
350                    float Ybar = sumOtu / (float) lookupFloat.size();
351                    
352                    vector<float> rValues(averageAxes.size());
353
354                    //find r value for each axis
355                    for (int k = 0; k < averageAxes.size(); k++) {
356                            
357                            double r = 0.0;
358                            double numerator = 0.0;
359                            double denomTerm1 = 0.0;
360                            double denomTerm2 = 0.0;
361                            for (int j = 0; j < lookupFloat.size(); j++) {
362                                    float Yi = lookupFloat[j]->getAbundance(i);
363                                    float Xi = axes[lookupFloat[j]->getGroup()][k];
364                                    
365                                    numerator += ((Xi - averageAxes[k]) * (Yi - Ybar));
366                                    denomTerm1 += ((Xi - averageAxes[k]) * (Xi - averageAxes[k]));
367                                    denomTerm2 += ((Yi - Ybar) * (Yi - Ybar));
368                            }
369                            
370                            double denom = (sqrt(denomTerm1) * sqrt(denomTerm2));
371                            
372                            r = numerator / denom;
373                
374                if (isnan(r) || isinf(r)) { r = 0.0; }
375                
376                            rValues[k] = r;
377                            out << '\t' << r; 
378                
379                double sig = linear.calcPearsonSig(lookupFloat.size(), r);
380                
381                out << '\t' << sig;
382                    }
383                    
384                    double sum = 0;
385                    for(int k=0;k<rValues.size();k++){
386                            sum += rValues[k] * rValues[k];
387                    }
388                    out << '\t' << sqrt(sum) << endl;
389            }
390                    
391            return 0;
392    }
393    catch(exception& e) {
394            m->errorOut(e, "CorrAxesCommand", "calcPearson");
395            exit(1);
396    }
397 }
398 //**********************************************************************************************************************
399 int CorrAxesCommand::calcSpearman(map<string, vector<float> >& axes, ofstream& out) {
400         try {
401                 
402         LinearAlgebra linear;
403         vector<double> sf; 
404         
405                 //format data
406                 vector< map<float, int> > tableX; tableX.resize(numaxes);
407                 map<float, int>::iterator itTable;
408                 vector< vector<spearmanRank> > scores; scores.resize(numaxes);
409                 for (map<string, vector<float> >::iterator it = axes.begin(); it != axes.end(); it++) {
410                         vector<float> temp = it->second;
411                         for (int i = 0; i < temp.size(); i++) {
412                                 spearmanRank member(it->first, temp[i]);
413                                 scores[i].push_back(member);  
414                                 
415                                 //count number of repeats
416                                 itTable = tableX[i].find(temp[i]);
417                                 if (itTable == tableX[i].end()) { 
418                                         tableX[i][temp[i]] = 1;
419                                 }else {
420                                         tableX[i][temp[i]]++;
421                                 }
422                         }
423                 }
424                 
425                 //calc LX
426                 //for each axis
427                 vector<double> Lx; Lx.resize(numaxes, 0.0);
428                 for (int i = 0; i < numaxes; i++) {
429                         for (itTable = tableX[i].begin(); itTable != tableX[i].end(); itTable++) {
430                                 double tx = (double) itTable->second;
431                                 Lx[i] += ((pow(tx, 3.0) - tx) / 12.0);
432                         }
433                 }
434                 
435                 //sort each axis
436                 for (int i = 0; i < numaxes; i++) {  sort(scores[i].begin(), scores[i].end(), compareSpearman); }
437                 
438                 //find ranks of xi in each axis
439                 map<string, vector<float> > rankAxes;
440                 for (int i = 0; i < numaxes; i++) {
441                         
442                         vector<spearmanRank> ties;
443                         int rankTotal = 0;
444             double sfTemp = 0.0;
445                         for (int j = 0; j < scores[i].size(); j++) {
446                                 rankTotal += (j+1);
447                                 ties.push_back(scores[i][j]);
448                                 
449                                 if (j != (scores[i].size()-1)) { // you are not the last so you can look ahead
450                                         if (scores[i][j].score != scores[i][j+1].score) { // you are done with ties, rank them and continue
451
452                                                 for (int k = 0; k < ties.size(); k++) {
453                                                         float thisrank = rankTotal / (float) ties.size();
454                                                         rankAxes[ties[k].name].push_back(thisrank);
455                                                 }
456                         int t = ties.size();
457                         sfTemp += (t*t*t-t);
458                                                 ties.clear();
459                                                 rankTotal = 0;
460                                         }
461                                 }else { // you are the last one
462                                         
463                                         for (int k = 0; k < ties.size(); k++) {
464                                                 float thisrank = rankTotal / (float) ties.size();
465                                                 rankAxes[ties[k].name].push_back(thisrank);
466                                                 
467                                         }
468                                 }
469                         }
470             sf.push_back(sfTemp);
471                 }
472                 
473                                 
474                 //for each otu
475                 for (int i = 0; i < lookupFloat[0]->getNumBins(); i++) {
476                         
477                         if (metadatafile == "") {  out << m->currentBinLabels[i];       }
478                         else {  out << metadataLabels[i];               }
479                         
480                         //find the ranks of this otu - Y
481                         vector<spearmanRank> otuScores;
482                         map<float, int> tableY;
483                         for (int j = 0; j < lookupFloat.size(); j++) {
484                                 spearmanRank member(lookupFloat[j]->getGroup(), lookupFloat[j]->getAbundance(i));
485                                 otuScores.push_back(member);
486                                 
487                                 itTable = tableY.find(member.score);
488                                 if (itTable == tableY.end()) { 
489                                         tableY[member.score] = 1;
490                                 }else {
491                                         tableY[member.score]++;
492                                 }
493                                 
494                         }
495                         
496                         //calc Ly
497                         double Ly = 0.0;
498                         for (itTable = tableY.begin(); itTable != tableY.end(); itTable++) {
499                                 double ty = (double) itTable->second;
500                                 Ly += ((pow(ty, 3.0) - ty) / 12.0);
501                         }
502                         
503                         sort(otuScores.begin(), otuScores.end(), compareSpearman);
504                         
505             double sg = 0.0;
506                         map<string, float> rankOtus;
507                         vector<spearmanRank> ties;
508                         int rankTotal = 0;
509                         for (int j = 0; j < otuScores.size(); j++) {
510                                 rankTotal += (j+1);
511                                 ties.push_back(otuScores[j]);
512                                 
513                                 if (j != (otuScores.size()-1)) { // you are not the last so you can look ahead
514                                         if (otuScores[j].score != otuScores[j+1].score) { // you are done with ties, rank them and continue
515                                                 
516                                                 for (int k = 0; k < ties.size(); k++) {
517                                                         float thisrank = rankTotal / (float) ties.size();
518                                                         rankOtus[ties[k].name] = thisrank;
519                                                 }
520                         int t = ties.size();
521                         sg += (t*t*t-t);
522                                                 ties.clear();
523                                                 rankTotal = 0;
524                                         }
525                                 }else { // you are the last one
526                                         
527                                         for (int k = 0; k < ties.size(); k++) {
528                                                 float thisrank = rankTotal / (float) ties.size();
529                                                 rankOtus[ties[k].name] = thisrank;
530                                         }
531                                 }
532                         }
533                         vector<double> pValues(numaxes);        
534
535                         //calc spearman ranks for each axis for this otu
536                         for (int j = 0; j < numaxes; j++) {
537                                 
538                                 double di = 0.0;
539                                 for (int k = 0; k < lookupFloat.size(); k++) {
540                                         
541                                         float xi = rankAxes[lookupFloat[k]->getGroup()][j];
542                                         float yi = rankOtus[lookupFloat[k]->getGroup()];
543                                         
544                                         di += ((xi - yi) * (xi - yi));
545                                 }
546                                 
547                                 double p = 0.0;
548                                 
549                                 double n = (double) lookupFloat.size();
550                                 
551                                 double SX2 = ((pow(n, 3.0) - n) / 12.0) - Lx[j];
552                                 double SY2 = ((pow(n, 3.0) - n) / 12.0) - Ly;
553                                 
554                                 p = (SX2 + SY2 - di) / (2.0 * sqrt((SX2*SY2)));
555                                 
556                 if (isnan(p) || isinf(p)) { p = 0.0; }
557                 
558                                 out  << '\t' << p;
559                                 
560                                 pValues[j] = p;
561                 
562                 double sig = linear.calcSpearmanSig(n, sf[j], sg, di);            
563                 out  << '\t' << sig;
564                 
565                         }
566
567                         double sum = 0;
568                         for(int k=0;k<numaxes;k++){
569                                 sum += pValues[k] * pValues[k];
570                         }
571                         out << '\t' << sqrt(sum) << endl;
572                 }
573                 
574                 return 0;
575         }
576         catch(exception& e) {
577                 m->errorOut(e, "CorrAxesCommand", "calcSpearman");
578                 exit(1);
579         }
580 }
581 //**********************************************************************************************************************
582 int CorrAxesCommand::calcKendall(map<string, vector<float> >& axes, ofstream& out) {
583         try {
584                 
585         LinearAlgebra linear;
586         
587                 //format data
588                 vector< vector<spearmanRank> > scores; scores.resize(numaxes);
589                 for (map<string, vector<float> >::iterator it = axes.begin(); it != axes.end(); it++) {
590                         vector<float> temp = it->second;
591                         for (int i = 0; i < temp.size(); i++) {
592                                 spearmanRank member(it->first, temp[i]);
593                                 scores[i].push_back(member);  
594                         }
595                 }
596                 
597                 //sort each axis
598                 for (int i = 0; i < numaxes; i++) {  sort(scores[i].begin(), scores[i].end(), compareSpearman); }
599                 
600                 //convert scores to ranks of xi in each axis
601                 for (int i = 0; i < numaxes; i++) {
602                         
603                         vector<spearmanRank*> ties;
604                         int rankTotal = 0;
605                         for (int j = 0; j < scores[i].size(); j++) {
606                                 rankTotal += (j+1);
607                                 ties.push_back(&(scores[i][j]));
608                                 
609                                 if (j != scores[i].size()-1) { // you are not the last so you can look ahead
610                                         if (scores[i][j].score != scores[i][j+1].score) { // you are done with ties, rank them and continue
611                                                 for (int k = 0; k < ties.size(); k++) {
612                                                         float thisrank = rankTotal / (float) ties.size();
613                                                         (*ties[k]).score = thisrank;
614                                                 }
615                                                 ties.clear();
616                                                 rankTotal = 0;
617                                         }
618                                 }else { // you are the last one
619                                         for (int k = 0; k < ties.size(); k++) {
620                                                 float thisrank = rankTotal / (float) ties.size();
621                                                 (*ties[k]).score = thisrank;
622                                         }
623                                 }
624                         }
625                 }
626                 
627                 //for each otu
628                 for (int i = 0; i < lookupFloat[0]->getNumBins(); i++) {
629                 
630                         if (metadatafile == "") {  out << m->currentBinLabels[i];       }
631                         else {  out << metadataLabels[i];               }
632                         
633                         //find the ranks of this otu - Y
634                         vector<spearmanRank> otuScores;
635                         for (int j = 0; j < lookupFloat.size(); j++) {
636                                 spearmanRank member(lookupFloat[j]->getGroup(), lookupFloat[j]->getAbundance(i));
637                                 otuScores.push_back(member);
638                         }
639                                                 
640                         sort(otuScores.begin(), otuScores.end(), compareSpearman);
641                         
642                         map<string, float> rankOtus;
643                         vector<spearmanRank> ties;
644                         int rankTotal = 0;
645                         for (int j = 0; j < otuScores.size(); j++) {
646                                 rankTotal += (j+1);
647                                 ties.push_back(otuScores[j]);
648                                 
649                                 if (j != otuScores.size()-1) { // you are not the last so you can look ahead
650                                         if (otuScores[j].score != otuScores[j+1].score) { // you are done with ties, rank them and continue
651                                                 for (int k = 0; k < ties.size(); k++) {
652                                                         float thisrank = rankTotal / (float) ties.size();
653                                                         rankOtus[ties[k].name] = thisrank;
654                                                 }
655                                                 ties.clear();
656                                                 rankTotal = 0;
657                                         }
658                                 }else { // you are the last one
659                                         for (int k = 0; k < ties.size(); k++) {
660                                                 float thisrank = rankTotal / (float) ties.size();
661                                                 rankOtus[ties[k].name] = thisrank;
662                                         }
663                                 }
664                         }
665                         
666                         
667                         vector<double> pValues(numaxes);
668                         
669                         //calc spearman ranks for each axis for this otu
670                         for (int j = 0; j < numaxes; j++) {
671                         
672                                 int numCoor = 0;
673                                 int numDisCoor = 0;
674                                 
675                                 vector<spearmanRank> otus; 
676                                 vector<spearmanRank> otusTemp;
677                                 for (int l = 0; l < scores[j].size(); l++) {   
678                                         spearmanRank member(scores[j][l].name, rankOtus[scores[j][l].name]);
679                                         otus.push_back(member);
680                                 }
681                                 
682                                 int count = 0;
683                                 for (int l = 0; l < scores[j].size(); l++) {
684                                         
685                                         int numWithHigherRank = 0;
686                                         int numWithLowerRank = 0;
687                                         float thisrank = otus[l].score;
688                                         
689                                         for (int u = l+1; u < scores[j].size(); u++) {
690                                                 if (otus[u].score > thisrank) { numWithHigherRank++; }
691                                                 else if (otus[u].score < thisrank) { numWithLowerRank++; }
692                                                 count++;
693                                         }
694                                         
695                                         numCoor += numWithHigherRank;
696                                         numDisCoor += numWithLowerRank;
697                                 }
698                                 
699                                 double p = (numCoor - numDisCoor) / (float) count;
700                  if (isnan(p) || isinf(p)) { p = 0.0; }
701                 
702                                 out << '\t' << p;
703                                 pValues[j] = p;
704                 
705                 double sig = linear.calcKendallSig(scores[j].size(), p);
706                 
707                 out << '\t' << sig;
708                         }
709                         
710                         double sum = 0;
711                         for(int k=0;k<numaxes;k++){
712                                 sum += pValues[k] * pValues[k];
713                         }
714                         out << '\t' << sqrt(sum) << endl;
715                 }
716                 
717                 return 0;
718         }
719         catch(exception& e) {
720                 m->errorOut(e, "CorrAxesCommand", "calcKendall");
721                 exit(1);
722         }
723 }
724 //**********************************************************************************************************************
725 int CorrAxesCommand::getSharedFloat(InputData* input){
726         try {
727                 lookupFloat = input->getSharedRAbundFloatVectors();
728                 string lastLabel = lookupFloat[0]->getLabel();
729                 
730                 if (label == "") { label = lastLabel;  return 0; }
731                 
732                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
733                 set<string> labels; labels.insert(label);
734                 set<string> processedLabels;
735                 set<string> userLabels = labels;
736                 
737                 //as long as you are not at the end of the file or done wih the lines you want
738                 while((lookupFloat[0] != NULL) && (userLabels.size() != 0)) {
739                         
740                         if (m->control_pressed) {  return 0;  }
741                         
742                         if(labels.count(lookupFloat[0]->getLabel()) == 1){
743                                 processedLabels.insert(lookupFloat[0]->getLabel());
744                                 userLabels.erase(lookupFloat[0]->getLabel());
745                                 break;
746                         }
747                         
748                         if ((m->anyLabelsToProcess(lookupFloat[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
749                                 string saveLabel = lookupFloat[0]->getLabel();
750                                 
751                                 for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } 
752                                 lookupFloat = input->getSharedRAbundFloatVectors(lastLabel);
753                                 
754                                 processedLabels.insert(lookupFloat[0]->getLabel());
755                                 userLabels.erase(lookupFloat[0]->getLabel());
756                                 
757                                 //restore real lastlabel to save below
758                                 lookupFloat[0]->setLabel(saveLabel);
759                                 break;
760                         }
761                         
762                         lastLabel = lookupFloat[0]->getLabel();                 
763                         
764                         //get next line to process
765                         //prevent memory leak
766                         for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } 
767                         lookupFloat = input->getSharedRAbundFloatVectors();
768                 }
769                 
770                 
771                 if (m->control_pressed) { return 0;  }
772                 
773                 //output error messages about any remaining user labels
774                 set<string>::iterator it;
775                 bool needToRun = false;
776                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
777                         m->mothurOut("Your file does not include the label " + *it); 
778                         if (processedLabels.count(lastLabel) != 1) {
779                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
780                                 needToRun = true;
781                         }else {
782                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
783                         }
784                 }
785                 
786                 //run last label if you need to
787                 if (needToRun == true)  {
788                         for (int i = 0; i < lookupFloat.size(); i++) {  if (lookupFloat[i] != NULL) {   delete lookupFloat[i];  } } 
789                         lookupFloat = input->getSharedRAbundFloatVectors(lastLabel);
790                 }       
791                 
792                 return 0;
793         }
794         catch(exception& e) {
795                 m->errorOut(e, "CorrAxesCommand", "getSharedFloat");    
796                 exit(1);
797         }
798 }
799 //**********************************************************************************************************************
800 int CorrAxesCommand::eliminateZeroOTUS(vector<SharedRAbundFloatVector*>& thislookup) {
801         try {
802                 
803                 vector<SharedRAbundFloatVector*> newLookup;
804                 for (int i = 0; i < thislookup.size(); i++) {
805                         SharedRAbundFloatVector* temp = new SharedRAbundFloatVector();
806                         temp->setLabel(thislookup[i]->getLabel());
807                         temp->setGroup(thislookup[i]->getGroup());
808                         newLookup.push_back(temp);
809                 }
810                 
811                 //for each bin
812                 vector<string> newBinLabels;
813                 string snumBins = toString(thislookup[0]->getNumBins());
814                 for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
815                         if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
816                         
817                         //look at each sharedRabund and make sure they are not all zero
818                         bool allZero = true;
819                         for (int j = 0; j < thislookup.size(); j++) {
820                                 if (thislookup[j]->getAbundance(i) != 0) { allZero = false;  break;  }
821                         }
822                         
823                         //if they are not all zero add this bin
824                         if (!allZero) {
825                                 for (int j = 0; j < thislookup.size(); j++) {
826                                         newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
827                                 }
828                                 
829                                 //if there is a bin label use it otherwise make one
830                                 string binLabel = "Otu";
831                                 string sbinNumber = toString(i+1);
832                                 if (sbinNumber.length() < snumBins.length()) { 
833                                         int diff = snumBins.length() - sbinNumber.length();
834                                         for (int h = 0; h < diff; h++) { binLabel += "0"; }
835                                 }
836                                 binLabel += sbinNumber; 
837                                 if (i < m->currentBinLabels.size()) {  binLabel = m->currentBinLabels[i]; }
838                                 
839                                 newBinLabels.push_back(binLabel);
840                         }
841                 }
842                 
843                 for (int j = 0; j < thislookup.size(); j++) {  delete thislookup[j];  }
844                 
845                 thislookup = newLookup;
846                 m->currentBinLabels = newBinLabels;
847                 
848                 return 0;
849                 
850         }
851         catch(exception& e) {
852                 m->errorOut(e, "CorrAxesCommand", "eliminateZeroOTUS");
853                 exit(1);
854         }
855 }
856 /*****************************************************************/
857 map<string, vector<float> > CorrAxesCommand::readAxes(){
858         try {
859                 map<string, vector<float> > axes;
860                 
861                 ifstream in;
862                 m->openInputFile(axesfile, in);
863                 
864                 string headerLine = m->getline(in); m->gobble(in);
865                 
866                 //count the number of axis you are reading
867                 bool done = false;
868                 int count = 0;
869                 while (!done) {
870                         int pos = headerLine.find("axis");
871                         if (pos != string::npos) {
872                                 count++;
873                                 headerLine = headerLine.substr(pos+4);
874                         }else { done = true; }
875                 }
876                 
877                 if (numaxes > count) { m->mothurOut("You requested " + toString(numaxes) + " axes, but your file only includes " + toString(count) + ". Using " + toString(count) + "."); m->mothurOutEndLine(); numaxes = count; }
878                 
879                 while (!in.eof()) {
880                         
881                         if (m->control_pressed) { in.close(); return axes; }
882                         
883                         string group = "";
884                         in >> group; m->gobble(in);
885                         
886                         vector<float> thisGroupsAxes;
887                         for (int i = 0; i < count; i++) {
888                                 float temp = 0.0;
889                                 in >> temp; 
890                                 
891                                 //only save the axis we want
892                                 if (i < numaxes) {  thisGroupsAxes.push_back(temp); }
893                         }
894                         
895                         //save group if its one the user selected
896                         if (names.count(group) != 0) {
897                                 map<string, vector<float> >::iterator it = axes.find(group);
898                                 
899                                 if (it == axes.end()) {
900                                         axes[group] = thisGroupsAxes;
901                                 }else {
902                                         m->mothurOut(group + " is already in your axes file, using first definition."); m->mothurOutEndLine();
903                                 }
904                         }
905                         
906                         m->gobble(in);
907                 }
908                 in.close();
909                 
910                 return axes;
911         }
912         catch(exception& e) {
913                 m->errorOut(e, "CorrAxesCommand", "readAxes");  
914                 exit(1);
915         }
916 }
917 /*****************************************************************/
918 int CorrAxesCommand::getMetadata(){
919         try {
920                 vector<string> groupNames;
921                 
922                 ifstream in;
923                 m->openInputFile(metadatafile, in);
924                 
925                 string headerLine = m->getline(in); m->gobble(in);
926                 vector<string> pieces = m->splitWhiteSpace(headerLine);
927                 
928                 //save names of columns you are reading
929                 for (int i = 1; i < pieces.size(); i++) {
930                         metadataLabels.push_back(pieces[i]);
931                 }
932                 int count = metadataLabels.size();
933                         
934                 //read rest of file
935                 while (!in.eof()) {
936                         
937                         if (m->control_pressed) { in.close(); return 0; }
938                         
939                         string group = "";
940                         in >> group; m->gobble(in);
941                         groupNames.push_back(group);
942                                 
943                         SharedRAbundFloatVector* tempLookup = new SharedRAbundFloatVector();
944                         tempLookup->setGroup(group);
945                         tempLookup->setLabel("1");
946                         
947                         for (int i = 0; i < count; i++) {
948                                 float temp = 0.0;
949                                 in >> temp; 
950                                 tempLookup->push_back(temp, group);
951                         }
952                         
953                         lookupFloat.push_back(tempLookup);
954                         
955                         m->gobble(in);
956                 }
957                 in.close();
958                 
959                 //remove any groups the user does not want, and set globaldata->groups with only valid groups
960                 SharedUtil* util;
961                 util = new SharedUtil();
962                 Groups = m->getGroups();
963                 util->setGroups(Groups, groupNames);
964                 m->setGroups(Groups);
965                 
966                 for (int i = 0; i < lookupFloat.size(); i++) {
967                         //if this sharedrabund is not from a group the user wants then delete it.
968                         if (util->isValidGroup(lookupFloat[i]->getGroup(), m->getGroups()) == false) { 
969                                 delete lookupFloat[i]; lookupFloat[i] = NULL;
970                                 lookupFloat.erase(lookupFloat.begin()+i); 
971                                 i--; 
972                         }
973                 }
974                 
975                 delete util;
976                 
977                 return 0;
978         }
979         catch(exception& e) {
980                 m->errorOut(e, "CorrAxesCommand", "getMetadata");       
981                 exit(1);
982         }
983 }
984 /*****************************************************************/
985
986
987
988
989
990