]> git.donarmstrong.com Git - mothur.git/blob - corraxescommand.cpp
added significance scores to core.axes. added design file to get.groups and remove...
[mothur.git] / corraxescommand.cpp
1 /*
2  *  corraxescommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 12/22/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "corraxescommand.h"
11 #include "sharedutilities.h"
12
13 //**********************************************************************************************************************
14 vector<string> CorrAxesCommand::setParameters(){        
15         try {
16                 CommandParameter paxes("axes", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(paxes);
17                 CommandParameter pshared("shared", "InputTypes", "", "", "SharedRelMeta", "SharedRelMeta", "none",false,false); parameters.push_back(pshared);
18                 CommandParameter prelabund("relabund", "InputTypes", "", "", "SharedRelMeta", "SharedRelMeta", "none",false,false); parameters.push_back(prelabund);
19                 CommandParameter pmetadata("metadata", "InputTypes", "", "", "SharedRelMeta", "SharedRelMeta", "none",false,false); parameters.push_back(pmetadata);
20                 CommandParameter pnumaxes("numaxes", "Number", "", "3", "", "", "",false,false); parameters.push_back(pnumaxes);
21                 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
22                 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
23                 CommandParameter pmethod("method", "Multiple", "pearson-spearman-kendall", "pearson", "", "", "",false,false); parameters.push_back(pmethod);
24                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
25                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
26                 
27                 vector<string> myArray;
28                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
29                 return myArray;
30         }
31         catch(exception& e) {
32                 m->errorOut(e, "CorrAxesCommand", "setParameters");
33                 exit(1);
34         }
35 }
36 //**********************************************************************************************************************
37 string CorrAxesCommand::getHelpString(){        
38         try {
39                 string helpString = "";
40                 helpString += "The corr.axes command reads a shared, relabund or metadata file as well as an axes file and calculates the correlation coefficient.\n";
41                 helpString += "The corr.axes command parameters are shared, relabund, axes, metadata, groups, method, numaxes and label.  The shared, relabund or metadata and axes parameters are required.  If shared is given the relative abundance is calculated.\n";
42                 helpString += "The groups parameter allows you to specify which of the groups you would like included. The group names are separated by dashes.\n";
43                 helpString += "The label parameter allows you to select what distance level you would like used, if none is given the first distance is used.\n";
44                 helpString += "The method parameter allows you to select what method you would like to use. Options are pearson, spearman and kendall. Default=pearson.\n";
45                 helpString += "The numaxes parameter allows you to select the number of axes you would like to use. Default=3.\n";
46                 helpString += "The corr.axes command should be in the following format: corr.axes(axes=yourPcoaFile, shared=yourSharedFile, method=yourMethod).\n";
47                 helpString += "Example corr.axes(axes=genus.pool.thetayc.genus.lt.pcoa, shared=genus.pool.shared, method=kendall).\n";
48                 helpString += "The corr.axes command outputs a .corr.axes file.\n";
49                 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
50                 return helpString;
51         }
52         catch(exception& e) {
53                 m->errorOut(e, "CorrAxesCommand", "getHelpString");
54                 exit(1);
55         }
56 }
57 //**********************************************************************************************************************
58 CorrAxesCommand::CorrAxesCommand(){     
59         try {
60                 abort = true; calledHelp = true; 
61                 setParameters();
62                 vector<string> tempOutNames;
63                 outputTypes["corr.axes"] = tempOutNames;
64         }
65         catch(exception& e) {
66                 m->errorOut(e, "CorrAxesCommand", "CorrAxesCommand");
67                 exit(1);
68         }
69 }
70 //**********************************************************************************************************************
71 CorrAxesCommand::CorrAxesCommand(string option)  {
72         try {
73                 abort = false; calledHelp = false;   
74                 
75                 //allow user to run help
76                 if(option == "help") { help(); abort = true; calledHelp = true; }
77                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
78                 
79                 else {
80                         vector<string> myArray = setParameters();
81                         
82                         OptionParser parser(option);
83                         map<string, string> parameters = parser.getParameters();
84                         
85                         ValidParameters validParameter;
86                         map<string, string>::iterator it;
87                         
88                         //check to make sure all parameters are valid for command
89                         for (it = parameters.begin(); it != parameters.end(); it++) { 
90                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
91                         }
92                         
93                         vector<string> tempOutNames;
94                         outputTypes["corr.axes"] = tempOutNames;
95                         
96                         //if the user changes the input directory command factory will send this info to us in the output parameter 
97                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
98                         if (inputDir == "not found"){   inputDir = "";          }
99                         else {
100                                 string path;
101                                 it = parameters.find("axes");
102                                 //user has given a template file
103                                 if(it != parameters.end()){ 
104                                         path = m->hasPath(it->second);
105                                         //if the user has not given a path then, add inputdir. else leave path alone.
106                                         if (path == "") {       parameters["axes"] = inputDir + it->second;             }
107                                 }
108                                 
109                                 it = parameters.find("shared");
110                                 //user has given a template file
111                                 if(it != parameters.end()){ 
112                                         path = m->hasPath(it->second);
113                                         //if the user has not given a path then, add inputdir. else leave path alone.
114                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
115                                 }
116                                 
117                                 it = parameters.find("relabund");
118                                 //user has given a template file
119                                 if(it != parameters.end()){ 
120                                         path = m->hasPath(it->second);
121                                         //if the user has not given a path then, add inputdir. else leave path alone.
122                                         if (path == "") {       parameters["relabund"] = inputDir + it->second;         }
123                                 }
124                                 
125                                 it = parameters.find("metadata");
126                                 //user has given a template file
127                                 if(it != parameters.end()){ 
128                                         path = m->hasPath(it->second);
129                                         //if the user has not given a path then, add inputdir. else leave path alone.
130                                         if (path == "") {       parameters["metadata"] = inputDir + it->second;         }
131                                 }
132                         }
133                         
134                         
135                         //check for required parameters
136                         axesfile = validParameter.validFile(parameters, "axes", true);
137                         if (axesfile == "not open") { abort = true; }
138                         else if (axesfile == "not found") { axesfile = ""; m->mothurOut("axes is a required parameter for the corr.axes command."); m->mothurOutEndLine(); abort = true;  }     
139                         
140                         sharedfile = validParameter.validFile(parameters, "shared", true);
141                         if (sharedfile == "not open") { abort = true; }
142                         else if (sharedfile == "not found") { sharedfile = ""; }
143                         else { inputFileName = sharedfile; m->setSharedFile(sharedfile); }
144                         
145                         relabundfile = validParameter.validFile(parameters, "relabund", true);
146                         if (relabundfile == "not open") { abort = true; }
147                         else if (relabundfile == "not found") { relabundfile = ""; }
148                         else { inputFileName = relabundfile; m->setRelAbundFile(relabundfile); }
149                         
150                         metadatafile = validParameter.validFile(parameters, "metadata", true);
151                         if (metadatafile == "not open") { abort = true; }
152                         else if (metadatafile == "not found") { metadatafile = ""; }
153                         else { inputFileName = metadatafile; }
154                         
155                         groups = validParameter.validFile(parameters, "groups", false);                 
156                         if (groups == "not found") { groups = "";  pickedGroups = false;  }
157                         else { 
158                                 pickedGroups = true;
159                                 m->splitAtDash(groups, Groups); 
160                         }                       
161                         m->setGroups(Groups);
162                         
163                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(inputFileName);  }
164                         
165                         label = validParameter.validFile(parameters, "label", false);                   
166                         if (label == "not found") { label = ""; m->mothurOut("You did not provide a label, I will use the first label in your inputfile."); m->mothurOutEndLine(); label=""; }  
167                         
168                         if ((relabundfile == "") && (sharedfile == "") && (metadatafile == "")) { 
169                                 //is there are current file available for any of these?
170                                 //give priority to shared, then relabund
171                                 //if there is a current shared file, use it
172                                 sharedfile = m->getSharedFile(); 
173                                 if (sharedfile != "") { inputFileName = sharedfile; m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
174                                 else { 
175                                         relabundfile = m->getRelAbundFile(); 
176                                         if (relabundfile != "") { inputFileName = relabundfile;  m->mothurOut("Using " + relabundfile + " as input file for the relabund parameter."); m->mothurOutEndLine(); }
177                                         else { 
178                                                 m->mothurOut("You must provide either a shared, relabund, or metadata file."); m->mothurOutEndLine(); abort = true; 
179                                         }
180                                 }
181                         }       
182                         
183                         if (metadatafile != "") {
184                                 if ((relabundfile != "") || (sharedfile != "")) { m->mothurOut("You may only use one of the following : shared, relabund or metadata file."); m->mothurOutEndLine(); abort = true;  }
185                         }else {
186                                 if ((relabundfile != "") && (sharedfile != "")) { m->mothurOut("You may only use one of the following : shared, relabund or metadata file."); m->mothurOutEndLine(); abort = true;  }
187                         }
188                         string temp;
189                         temp = validParameter.validFile(parameters, "numaxes", false);          if (temp == "not found"){       temp = "3";                             }
190                         m->mothurConvert(temp, numaxes); 
191                         
192                         method = validParameter.validFile(parameters, "method", false);         if (method == "not found"){     method = "pearson";             }
193                         
194                         if ((method != "pearson") && (method != "spearman") && (method != "kendall")) { m->mothurOut(method + " is not a valid method. Valid methods are pearson, spearman, and kendall."); m->mothurOutEndLine(); abort = true; }
195                         
196                 }
197         }
198         catch(exception& e) {
199                 m->errorOut(e, "CorrAxesCommand", "CorrAxesCommand");           
200                 exit(1);
201         }
202 }
203 //**********************************************************************************************************************
204
205 int CorrAxesCommand::execute(){
206         try {
207                 
208                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
209                 
210                 /*************************************************************************************/
211                 // use smart distancing to get right sharedRabund and convert to relabund if needed  //
212                 /************************************************************************************/
213                 if (sharedfile != "") {  
214                         InputData* input = new InputData(sharedfile, "sharedfile");
215                         getSharedFloat(input); 
216                         delete input;
217                         
218                         if (m->control_pressed) {  for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } return 0; }
219                         if (lookupFloat[0] == NULL) { m->mothurOut("[ERROR] reading relabund file."); m->mothurOutEndLine(); return 0; }
220                         
221                 }else if (relabundfile != "") { 
222                         InputData* input = new InputData(relabundfile, "relabund");
223                         getSharedFloat(input); 
224                         delete input;
225                         
226                         if (m->control_pressed) {  for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } return 0; }
227                         if (lookupFloat[0] == NULL) { m->mothurOut("[ERROR] reading relabund file."); m->mothurOutEndLine(); return 0; }
228                         
229                 }else if (metadatafile != "") { 
230                         getMetadata();  //reads metadata file and store in lookupFloat, saves column headings in metadataLabels for later
231                         if (m->control_pressed) {  for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } return 0; }
232                         if (lookupFloat[0] == NULL) { m->mothurOut("[ERROR] reading metadata file."); m->mothurOutEndLine(); return 0; }
233                         
234                         if (pickedGroups) { eliminateZeroOTUS(lookupFloat); }
235                         
236                 }else { m->mothurOut("[ERROR]: no file given."); m->mothurOutEndLine(); return 0; }
237                 
238                 if (m->control_pressed) {  for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } return 0; }
239                 
240                 //this is for a sanity check to make sure the axes file and shared file match
241                 for (int i = 0; i < lookupFloat.size(); i++) { names.insert(lookupFloat[i]->getGroup()); }
242                 
243                 /*************************************************************************************/
244                 // read axes file and check for file mismatches with shared or relabund file         //
245                 /************************************************************************************/
246                 
247                 //read axes file
248                 map<string, vector<float> > axes = readAxes();
249                 
250                 if (m->control_pressed) {  for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } return 0; }
251                 
252                 //sanity check, the read only adds groups that are in the shared or relabund file, but we want to make sure the axes file isn't missing anyone
253                 if (axes.size() != lookupFloat.size()) { 
254                         map<string, vector<float> >::iterator it;
255                         for (int i = 0; i < lookupFloat.size(); i++) {
256                                 it = axes.find(lookupFloat[i]->getGroup());
257                                 if (it == axes.end()) { m->mothurOut(lookupFloat[i]->getGroup() + " is in your shared of relabund file but not in your axes file, please correct."); m->mothurOutEndLine(); }
258                         }
259                         m->control_pressed = true;
260                 }
261                 
262                 if (m->control_pressed) {  for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } return 0; }
263                 
264                 /*************************************************************************************/
265                 // calc the r values                                                                                                                            //
266                 /************************************************************************************/
267                 
268                 string outputFileName = outputDir + m->getRootName(m->getSimpleName(inputFileName)) + method + ".corr.axes";
269                 outputNames.push_back(outputFileName); outputTypes["corr.axes"].push_back(outputFileName);      
270                 ofstream out;
271                 m->openOutputFile(outputFileName, out);
272                 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
273                 
274                 //output headings
275                 if (metadatafile == "") {  out << "OTU";        }
276                 else {  out << "Feature";                                               }
277
278                 for (int i = 0; i < numaxes; i++) { out << '\t' << "axis" << (i+1) << "\tp-value"; }
279                 out << "\tlength" << endl;
280                 
281                 if (method == "pearson")                {  calcPearson(axes, out);      }
282                 else if (method == "spearman")  {  calcSpearman(axes, out); }
283                 else if (method == "kendall")   {  calcKendall(axes, out);      }
284                 else { m->mothurOut("[ERROR]: Invalid method."); m->mothurOutEndLine(); }
285                 
286                 out.close();
287                 for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  }
288                 
289                 if (m->control_pressed) {  return 0; }
290
291                 m->mothurOutEndLine();
292                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
293                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
294                 m->mothurOutEndLine();
295                 
296                 return 0;
297         }
298         catch(exception& e) {
299                 m->errorOut(e, "CorrAxesCommand", "execute");   
300                 exit(1);
301         }
302 }
303 //**********************************************************************************************************************
304 int CorrAxesCommand::calcPearson(map<string, vector<float> >& axes, ofstream& out) {
305    try {
306            
307            //find average of each axis - X
308            vector<float> averageAxes; averageAxes.resize(numaxes, 0.0);
309            for (map<string, vector<float> >::iterator it = axes.begin(); it != axes.end(); it++) {
310                    vector<float> temp = it->second;
311                    for (int i = 0; i < temp.size(); i++) {
312                            averageAxes[i] += temp[i];  
313                    }
314            }
315            
316            for (int i = 0; i < averageAxes.size(); i++) {  averageAxes[i] = averageAxes[i] / (float) axes.size(); }
317            
318            //for each otu
319            for (int i = 0; i < lookupFloat[0]->getNumBins(); i++) {
320                    
321                    if (metadatafile == "") {  out << i+1;       }
322                    else {  out << metadataLabels[i];            }
323                                    
324                    //find the averages this otu - Y
325                    float sumOtu = 0.0;
326                    for (int j = 0; j < lookupFloat.size(); j++) {
327                            sumOtu += lookupFloat[j]->getAbundance(i);
328                    }
329                    float Ybar = sumOtu / (float) lookupFloat.size();
330                    
331                    vector<float> rValues(averageAxes.size());
332
333                    //find r value for each axis
334                    for (int k = 0; k < averageAxes.size(); k++) {
335                            
336                            double r = 0.0;
337                            double numerator = 0.0;
338                            double denomTerm1 = 0.0;
339                            double denomTerm2 = 0.0;
340                            for (int j = 0; j < lookupFloat.size(); j++) {
341                                    float Yi = lookupFloat[j]->getAbundance(i);
342                                    float Xi = axes[lookupFloat[j]->getGroup()][k];
343                                    
344                                    numerator += ((Xi - averageAxes[k]) * (Yi - Ybar));
345                                    denomTerm1 += ((Xi - averageAxes[k]) * (Xi - averageAxes[k]));
346                                    denomTerm2 += ((Yi - Ybar) * (Yi - Ybar));
347                            }
348                            
349                            double denom = (sqrt(denomTerm1) * sqrt(denomTerm2));
350                            
351                            r = numerator / denom;
352                
353                if (isnan(r) || isinf(r)) { r = 0.0; }
354                
355                            rValues[k] = r;
356                            out << '\t' << r; 
357                
358                //signifigance calc - http://faculty.vassar.edu/lowry/ch4apx.html
359                double temp =  (1- (r*r)) / (double) (lookupFloat.size()-2);
360                temp = sqrt(temp);
361                double sig = r / temp;
362                if (isnan(sig) || isinf(sig)) { sig = 0.0; }
363                
364                out << '\t' << sig;
365                    }
366                    
367                    double sum = 0;
368                    for(int k=0;k<rValues.size();k++){
369                            sum += rValues[k] * rValues[k];
370                    }
371                    out << '\t' << sqrt(sum) << endl;
372            }
373                    
374            return 0;
375    }
376    catch(exception& e) {
377            m->errorOut(e, "CorrAxesCommand", "calcPearson");
378            exit(1);
379    }
380 }
381 //**********************************************************************************************************************
382 int CorrAxesCommand::calcSpearman(map<string, vector<float> >& axes, ofstream& out) {
383         try {
384                 
385                 //format data
386                 vector< map<float, int> > tableX; tableX.resize(numaxes);
387                 map<float, int>::iterator itTable;
388                 vector< vector<spearmanRank> > scores; scores.resize(numaxes);
389                 for (map<string, vector<float> >::iterator it = axes.begin(); it != axes.end(); it++) {
390                         vector<float> temp = it->second;
391                         for (int i = 0; i < temp.size(); i++) {
392                                 spearmanRank member(it->first, temp[i]);
393                                 scores[i].push_back(member);  
394                                 
395                                 //count number of repeats
396                                 itTable = tableX[i].find(temp[i]);
397                                 if (itTable == tableX[i].end()) { 
398                                         tableX[i][temp[i]] = 1;
399                                 }else {
400                                         tableX[i][temp[i]]++;
401                                 }
402                         }
403                 }
404                 
405                 //calc LX
406                 //for each axis
407                 vector<double> Lx; Lx.resize(numaxes, 0.0);
408                 for (int i = 0; i < numaxes; i++) {
409                         for (itTable = tableX[i].begin(); itTable != tableX[i].end(); itTable++) {
410                                 double tx = (double) itTable->second;
411                                 Lx[i] += ((pow(tx, 3.0) - tx) / 12.0);
412                         }
413                 }
414                 
415                 //sort each axis
416                 for (int i = 0; i < numaxes; i++) {  sort(scores[i].begin(), scores[i].end(), compareSpearman); }
417                 
418                 //find ranks of xi in each axis
419                 map<string, vector<float> > rankAxes;
420                 for (int i = 0; i < numaxes; i++) {
421                         
422                         vector<spearmanRank> ties;
423                         int rankTotal = 0;
424                         for (int j = 0; j < scores[i].size(); j++) {
425                                 rankTotal += (j+1);
426                                 ties.push_back(scores[i][j]);
427                                 
428                                 if (j != (scores[i].size()-1)) { // you are not the last so you can look ahead
429                                         if (scores[i][j].score != scores[i][j+1].score) { // you are done with ties, rank them and continue
430
431                                                 for (int k = 0; k < ties.size(); k++) {
432                                                         float thisrank = rankTotal / (float) ties.size();
433                                                         rankAxes[ties[k].name].push_back(thisrank);
434                                                 }
435                                                 ties.clear();
436                                                 rankTotal = 0;
437                                         }
438                                 }else { // you are the last one
439                                         
440                                         for (int k = 0; k < ties.size(); k++) {
441                                                 float thisrank = rankTotal / (float) ties.size();
442                                                 rankAxes[ties[k].name].push_back(thisrank);
443                                                 
444                                         }
445                                 }
446                         }
447                 }
448                 
449                                 
450                 //for each otu
451                 for (int i = 0; i < lookupFloat[0]->getNumBins(); i++) {
452                         
453                         if (metadatafile == "") {  out << i+1;  }
454                         else {  out << metadataLabels[i];               }
455                         
456                         //find the ranks of this otu - Y
457                         vector<spearmanRank> otuScores;
458                         map<float, int> tableY;
459                         for (int j = 0; j < lookupFloat.size(); j++) {
460                                 spearmanRank member(lookupFloat[j]->getGroup(), lookupFloat[j]->getAbundance(i));
461                                 otuScores.push_back(member);
462                                 
463                                 itTable = tableY.find(member.score);
464                                 if (itTable == tableY.end()) { 
465                                         tableY[member.score] = 1;
466                                 }else {
467                                         tableY[member.score]++;
468                                 }
469                                 
470                         }
471                         
472                         //calc Ly
473                         double Ly = 0.0;
474                         for (itTable = tableY.begin(); itTable != tableY.end(); itTable++) {
475                                 double ty = (double) itTable->second;
476                                 Ly += ((pow(ty, 3.0) - ty) / 12.0);
477                         }
478                         
479                         sort(otuScores.begin(), otuScores.end(), compareSpearman);
480                         
481                         map<string, float> rankOtus;
482                         vector<spearmanRank> ties;
483                         int rankTotal = 0;
484                         for (int j = 0; j < otuScores.size(); j++) {
485                                 rankTotal += (j+1);
486                                 ties.push_back(otuScores[j]);
487                                 
488                                 if (j != (otuScores.size()-1)) { // you are not the last so you can look ahead
489                                         if (otuScores[j].score != otuScores[j+1].score) { // you are done with ties, rank them and continue
490                                                 
491                                                 for (int k = 0; k < ties.size(); k++) {
492                                                         float thisrank = rankTotal / (float) ties.size();
493                                                         rankOtus[ties[k].name] = thisrank;
494                                                 }
495                                                 ties.clear();
496                                                 rankTotal = 0;
497                                         }
498                                 }else { // you are the last one
499                                         
500                                         for (int k = 0; k < ties.size(); k++) {
501                                                 float thisrank = rankTotal / (float) ties.size();
502                                                 rankOtus[ties[k].name] = thisrank;
503                                         }
504                                 }
505                         }
506                         vector<double> pValues(numaxes);        
507
508                         //calc spearman ranks for each axis for this otu
509                         for (int j = 0; j < numaxes; j++) {
510                                 
511                                 double di = 0.0;
512                                 for (int k = 0; k < lookupFloat.size(); k++) {
513                                         
514                                         float xi = rankAxes[lookupFloat[k]->getGroup()][j];
515                                         float yi = rankOtus[lookupFloat[k]->getGroup()];
516                                         
517                                         di += ((xi - yi) * (xi - yi));
518                                 }
519                                 
520                                 double p = 0.0;
521                                 
522                                 double n = (double) lookupFloat.size();
523                                 
524                                 double SX2 = ((pow(n, 3.0) - n) / 12.0) - Lx[j];
525                                 double SY2 = ((pow(n, 3.0) - n) / 12.0) - Ly;
526                                 
527                                 p = (SX2 + SY2 - di) / (2.0 * sqrt((SX2*SY2)));
528                                 
529                 if (isnan(p) || isinf(p)) { p = 0.0; }
530                 
531                                 out  << '\t' << p;
532                                 
533                                 pValues[j] = p;
534                 
535                 //signifigance calc - http://en.wikipedia.org/wiki/Spearman%27s_rank_correlation_coefficient
536                 double temp = (lookupFloat.size()-2) / (double) (1- (p*p));
537                 temp = sqrt(temp);
538                 double sig = p*temp;
539                 if (isnan(sig) || isinf(sig)) { sig = 0.0; }
540                 
541                 out  << '\t' << sig;
542                 
543                         }
544
545                         double sum = 0;
546                         for(int k=0;k<numaxes;k++){
547                                 sum += pValues[k] * pValues[k];
548                         }
549                         out << '\t' << sqrt(sum) << endl;
550                 }
551                 
552                 return 0;
553         }
554         catch(exception& e) {
555                 m->errorOut(e, "CorrAxesCommand", "calcSpearman");
556                 exit(1);
557         }
558 }
559 //**********************************************************************************************************************
560 int CorrAxesCommand::calcKendall(map<string, vector<float> >& axes, ofstream& out) {
561         try {
562                 
563                 //format data
564                 vector< vector<spearmanRank> > scores; scores.resize(numaxes);
565                 for (map<string, vector<float> >::iterator it = axes.begin(); it != axes.end(); it++) {
566                         vector<float> temp = it->second;
567                         for (int i = 0; i < temp.size(); i++) {
568                                 spearmanRank member(it->first, temp[i]);
569                                 scores[i].push_back(member);  
570                         }
571                 }
572                 
573                 //sort each axis
574                 for (int i = 0; i < numaxes; i++) {  sort(scores[i].begin(), scores[i].end(), compareSpearman); }
575                 
576                 //convert scores to ranks of xi in each axis
577                 for (int i = 0; i < numaxes; i++) {
578                         
579                         vector<spearmanRank*> ties;
580                         int rankTotal = 0;
581                         for (int j = 0; j < scores[i].size(); j++) {
582                                 rankTotal += (j+1);
583                                 ties.push_back(&(scores[i][j]));
584                                 
585                                 if (j != scores[i].size()-1) { // you are not the last so you can look ahead
586                                         if (scores[i][j].score != scores[i][j+1].score) { // you are done with ties, rank them and continue
587                                                 for (int k = 0; k < ties.size(); k++) {
588                                                         float thisrank = rankTotal / (float) ties.size();
589                                                         (*ties[k]).score = thisrank;
590                                                 }
591                                                 ties.clear();
592                                                 rankTotal = 0;
593                                         }
594                                 }else { // you are the last one
595                                         for (int k = 0; k < ties.size(); k++) {
596                                                 float thisrank = rankTotal / (float) ties.size();
597                                                 (*ties[k]).score = thisrank;
598                                         }
599                                 }
600                         }
601                 }
602                 
603                 //for each otu
604                 for (int i = 0; i < lookupFloat[0]->getNumBins(); i++) {
605                 
606                         if (metadatafile == "") {  out << i+1;  }
607                         else {  out << metadataLabels[i];               }
608                         
609                         //find the ranks of this otu - Y
610                         vector<spearmanRank> otuScores;
611                         for (int j = 0; j < lookupFloat.size(); j++) {
612                                 spearmanRank member(lookupFloat[j]->getGroup(), lookupFloat[j]->getAbundance(i));
613                                 otuScores.push_back(member);
614                         }
615                                                 
616                         sort(otuScores.begin(), otuScores.end(), compareSpearman);
617                         
618                         map<string, float> rankOtus;
619                         vector<spearmanRank> ties;
620                         int rankTotal = 0;
621                         for (int j = 0; j < otuScores.size(); j++) {
622                                 rankTotal += (j+1);
623                                 ties.push_back(otuScores[j]);
624                                 
625                                 if (j != otuScores.size()-1) { // you are not the last so you can look ahead
626                                         if (otuScores[j].score != otuScores[j+1].score) { // you are done with ties, rank them and continue
627                                                 for (int k = 0; k < ties.size(); k++) {
628                                                         float thisrank = rankTotal / (float) ties.size();
629                                                         rankOtus[ties[k].name] = thisrank;
630                                                 }
631                                                 ties.clear();
632                                                 rankTotal = 0;
633                                         }
634                                 }else { // you are the last one
635                                         for (int k = 0; k < ties.size(); k++) {
636                                                 float thisrank = rankTotal / (float) ties.size();
637                                                 rankOtus[ties[k].name] = thisrank;
638                                         }
639                                 }
640                         }
641                         
642                         
643                         vector<double> pValues(numaxes);
644                         
645                         //calc spearman ranks for each axis for this otu
646                         for (int j = 0; j < numaxes; j++) {
647                         
648                                 int numCoor = 0;
649                                 int numDisCoor = 0;
650                                 
651                                 vector<spearmanRank> otus; 
652                                 vector<spearmanRank> otusTemp;
653                                 for (int l = 0; l < scores[j].size(); l++) {   
654                                         spearmanRank member(scores[j][l].name, rankOtus[scores[j][l].name]);
655                                         otus.push_back(member);
656                                 }
657                                 
658                                 int count = 0;
659                                 for (int l = 0; l < scores[j].size(); l++) {
660                                         
661                                         int numWithHigherRank = 0;
662                                         int numWithLowerRank = 0;
663                                         float thisrank = otus[l].score;
664                                         
665                                         for (int u = l+1; u < scores[j].size(); u++) {
666                                                 if (otus[u].score > thisrank) { numWithHigherRank++; }
667                                                 else if (otus[u].score < thisrank) { numWithLowerRank++; }
668                                                 count++;
669                                         }
670                                         
671                                         numCoor += numWithHigherRank;
672                                         numDisCoor += numWithLowerRank;
673                                 }
674                                 
675                                 double p = (numCoor - numDisCoor) / (float) count;
676                  if (isnan(p) || isinf(p)) { p = 0.0; }
677                 
678                                 out << '\t' << p;
679                                 pValues[j] = p;
680                 
681                 //calc signif - zA - http://en.wikipedia.org/wiki/Kendall_tau_rank_correlation_coefficient#Significance_tests
682                 double numer = 3.0 * (numCoor - numDisCoor);
683                 int n = scores[j].size();
684                 double denom = n * (n-1) * (2*n + 5) / (double) 2.0;
685                 denom = sqrt(denom);
686                 double sig = numer / denom;
687                 
688                 if (isnan(sig) || isinf(sig)) { sig = 0.0; }
689                 
690                 out << '\t' << sig;
691                         }
692                         
693                         double sum = 0;
694                         for(int k=0;k<numaxes;k++){
695                                 sum += pValues[k] * pValues[k];
696                         }
697                         out << '\t' << sqrt(sum) << endl;
698                 }
699                 
700                 return 0;
701         }
702         catch(exception& e) {
703                 m->errorOut(e, "CorrAxesCommand", "calcKendall");
704                 exit(1);
705         }
706 }
707 //**********************************************************************************************************************
708 int CorrAxesCommand::getSharedFloat(InputData* input){
709         try {
710                 lookupFloat = input->getSharedRAbundFloatVectors();
711                 string lastLabel = lookupFloat[0]->getLabel();
712                 
713                 if (label == "") { label = lastLabel;  return 0; }
714                 
715                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
716                 set<string> labels; labels.insert(label);
717                 set<string> processedLabels;
718                 set<string> userLabels = labels;
719                 
720                 //as long as you are not at the end of the file or done wih the lines you want
721                 while((lookupFloat[0] != NULL) && (userLabels.size() != 0)) {
722                         
723                         if (m->control_pressed) {  return 0;  }
724                         
725                         if(labels.count(lookupFloat[0]->getLabel()) == 1){
726                                 processedLabels.insert(lookupFloat[0]->getLabel());
727                                 userLabels.erase(lookupFloat[0]->getLabel());
728                                 break;
729                         }
730                         
731                         if ((m->anyLabelsToProcess(lookupFloat[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
732                                 string saveLabel = lookupFloat[0]->getLabel();
733                                 
734                                 for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } 
735                                 lookupFloat = input->getSharedRAbundFloatVectors(lastLabel);
736                                 
737                                 processedLabels.insert(lookupFloat[0]->getLabel());
738                                 userLabels.erase(lookupFloat[0]->getLabel());
739                                 
740                                 //restore real lastlabel to save below
741                                 lookupFloat[0]->setLabel(saveLabel);
742                                 break;
743                         }
744                         
745                         lastLabel = lookupFloat[0]->getLabel();                 
746                         
747                         //get next line to process
748                         //prevent memory leak
749                         for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } 
750                         lookupFloat = input->getSharedRAbundFloatVectors();
751                 }
752                 
753                 
754                 if (m->control_pressed) { return 0;  }
755                 
756                 //output error messages about any remaining user labels
757                 set<string>::iterator it;
758                 bool needToRun = false;
759                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
760                         m->mothurOut("Your file does not include the label " + *it); 
761                         if (processedLabels.count(lastLabel) != 1) {
762                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
763                                 needToRun = true;
764                         }else {
765                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
766                         }
767                 }
768                 
769                 //run last label if you need to
770                 if (needToRun == true)  {
771                         for (int i = 0; i < lookupFloat.size(); i++) {  if (lookupFloat[i] != NULL) {   delete lookupFloat[i];  } } 
772                         lookupFloat = input->getSharedRAbundFloatVectors(lastLabel);
773                 }       
774                 
775                 return 0;
776         }
777         catch(exception& e) {
778                 m->errorOut(e, "CorrAxesCommand", "getSharedFloat");    
779                 exit(1);
780         }
781 }
782 //**********************************************************************************************************************
783 int CorrAxesCommand::eliminateZeroOTUS(vector<SharedRAbundFloatVector*>& thislookup) {
784         try {
785                 
786                 vector<SharedRAbundFloatVector*> newLookup;
787                 for (int i = 0; i < thislookup.size(); i++) {
788                         SharedRAbundFloatVector* temp = new SharedRAbundFloatVector();
789                         temp->setLabel(thislookup[i]->getLabel());
790                         temp->setGroup(thislookup[i]->getGroup());
791                         newLookup.push_back(temp);
792                 }
793                 
794                 //for each bin
795                 vector<string> newBinLabels;
796                 string snumBins = toString(thislookup[0]->getNumBins());
797                 for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
798                         if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
799                         
800                         //look at each sharedRabund and make sure they are not all zero
801                         bool allZero = true;
802                         for (int j = 0; j < thislookup.size(); j++) {
803                                 if (thislookup[j]->getAbundance(i) != 0) { allZero = false;  break;  }
804                         }
805                         
806                         //if they are not all zero add this bin
807                         if (!allZero) {
808                                 for (int j = 0; j < thislookup.size(); j++) {
809                                         newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
810                                 }
811                                 
812                                 //if there is a bin label use it otherwise make one
813                                 string binLabel = "Otu";
814                                 string sbinNumber = toString(i+1);
815                                 if (sbinNumber.length() < snumBins.length()) { 
816                                         int diff = snumBins.length() - sbinNumber.length();
817                                         for (int h = 0; h < diff; h++) { binLabel += "0"; }
818                                 }
819                                 binLabel += sbinNumber; 
820                                 if (i < m->currentBinLabels.size()) {  binLabel = m->currentBinLabels[i]; }
821                                 
822                                 newBinLabels.push_back(binLabel);
823                         }
824                 }
825                 
826                 for (int j = 0; j < thislookup.size(); j++) {  delete thislookup[j];  }
827                 
828                 thislookup = newLookup;
829                 m->currentBinLabels = newBinLabels;
830                 
831                 return 0;
832                 
833         }
834         catch(exception& e) {
835                 m->errorOut(e, "CorrAxesCommand", "eliminateZeroOTUS");
836                 exit(1);
837         }
838 }
839 /*****************************************************************/
840 map<string, vector<float> > CorrAxesCommand::readAxes(){
841         try {
842                 map<string, vector<float> > axes;
843                 
844                 ifstream in;
845                 m->openInputFile(axesfile, in);
846                 
847                 string headerLine = m->getline(in); m->gobble(in);
848                 
849                 //count the number of axis you are reading
850                 bool done = false;
851                 int count = 0;
852                 while (!done) {
853                         int pos = headerLine.find("axis");
854                         if (pos != string::npos) {
855                                 count++;
856                                 headerLine = headerLine.substr(pos+4);
857                         }else { done = true; }
858                 }
859                 
860                 if (numaxes > count) { m->mothurOut("You requested " + toString(numaxes) + " axes, but your file only includes " + toString(count) + ". Using " + toString(count) + "."); m->mothurOutEndLine(); numaxes = count; }
861                 
862                 while (!in.eof()) {
863                         
864                         if (m->control_pressed) { in.close(); return axes; }
865                         
866                         string group = "";
867                         in >> group; m->gobble(in);
868                         
869                         vector<float> thisGroupsAxes;
870                         for (int i = 0; i < count; i++) {
871                                 float temp = 0.0;
872                                 in >> temp; 
873                                 
874                                 //only save the axis we want
875                                 if (i < numaxes) {  thisGroupsAxes.push_back(temp); }
876                         }
877                         
878                         //save group if its one the user selected
879                         if (names.count(group) != 0) {
880                                 map<string, vector<float> >::iterator it = axes.find(group);
881                                 
882                                 if (it == axes.end()) {
883                                         axes[group] = thisGroupsAxes;
884                                 }else {
885                                         m->mothurOut(group + " is already in your axes file, using first definition."); m->mothurOutEndLine();
886                                 }
887                         }
888                         
889                         m->gobble(in);
890                 }
891                 in.close();
892                 
893                 return axes;
894         }
895         catch(exception& e) {
896                 m->errorOut(e, "CorrAxesCommand", "readAxes");  
897                 exit(1);
898         }
899 }
900 /*****************************************************************/
901 int CorrAxesCommand::getMetadata(){
902         try {
903                 vector<string> groupNames;
904                 
905                 ifstream in;
906                 m->openInputFile(metadatafile, in);
907                 
908                 string headerLine = m->getline(in); m->gobble(in);
909                 istringstream iss (headerLine,istringstream::in);
910                 
911                 //read the first label, because it refers to the groups
912                 string columnLabel;
913                 iss >> columnLabel; m->gobble(iss); 
914                 
915                 //save names of columns you are reading
916                 while (!iss.eof()) {
917                         iss >> columnLabel; m->gobble(iss);
918                         metadataLabels.push_back(columnLabel);
919                 }
920                 int count = metadataLabels.size();
921                         
922                 //read rest of file
923                 while (!in.eof()) {
924                         
925                         if (m->control_pressed) { in.close(); return 0; }
926                         
927                         string group = "";
928                         in >> group; m->gobble(in);
929                         groupNames.push_back(group);
930                                 
931                         SharedRAbundFloatVector* tempLookup = new SharedRAbundFloatVector();
932                         tempLookup->setGroup(group);
933                         tempLookup->setLabel("1");
934                         
935                         for (int i = 0; i < count; i++) {
936                                 float temp = 0.0;
937                                 in >> temp; 
938                                 tempLookup->push_back(temp, group);
939                         }
940                         
941                         lookupFloat.push_back(tempLookup);
942                         
943                         m->gobble(in);
944                 }
945                 in.close();
946                 
947                 //remove any groups the user does not want, and set globaldata->groups with only valid groups
948                 SharedUtil* util;
949                 util = new SharedUtil();
950                 Groups = m->getGroups();
951                 util->setGroups(Groups, groupNames);
952                 m->setGroups(Groups);
953                 
954                 for (int i = 0; i < lookupFloat.size(); i++) {
955                         //if this sharedrabund is not from a group the user wants then delete it.
956                         if (util->isValidGroup(lookupFloat[i]->getGroup(), m->getGroups()) == false) { 
957                                 delete lookupFloat[i]; lookupFloat[i] = NULL;
958                                 lookupFloat.erase(lookupFloat.begin()+i); 
959                                 i--; 
960                         }
961                 }
962                 
963                 delete util;
964                 
965                 return 0;
966         }
967         catch(exception& e) {
968                 m->errorOut(e, "CorrAxesCommand", "getMetadata");       
969                 exit(1);
970         }
971 }
972 /*****************************************************************/
973
974
975
976
977
978