]> git.donarmstrong.com Git - mothur.git/blob - corraxescommand.cpp
Merge remote-tracking branch 'mothur/master'
[mothur.git] / corraxescommand.cpp
1 /*
2  *  corraxescommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 12/22/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "corraxescommand.h"
11 #include "sharedutilities.h"
12 #include "linearalgebra.h"
13
14 //**********************************************************************************************************************
15 vector<string> CorrAxesCommand::setParameters(){        
16         try {
17                 CommandParameter paxes("axes", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(paxes);
18                 CommandParameter pshared("shared", "InputTypes", "", "", "SharedRelMeta", "SharedRelMeta", "none",false,false); parameters.push_back(pshared);
19                 CommandParameter prelabund("relabund", "InputTypes", "", "", "SharedRelMeta", "SharedRelMeta", "none",false,false); parameters.push_back(prelabund);
20                 CommandParameter pmetadata("metadata", "InputTypes", "", "", "SharedRelMeta", "SharedRelMeta", "none",false,false); parameters.push_back(pmetadata);
21                 CommandParameter pnumaxes("numaxes", "Number", "", "3", "", "", "",false,false); parameters.push_back(pnumaxes);
22                 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
23                 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
24                 CommandParameter pmethod("method", "Multiple", "pearson-spearman-kendall", "pearson", "", "", "",false,false); parameters.push_back(pmethod);
25                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
26                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
27                 
28                 vector<string> myArray;
29                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
30                 return myArray;
31         }
32         catch(exception& e) {
33                 m->errorOut(e, "CorrAxesCommand", "setParameters");
34                 exit(1);
35         }
36 }
37 //**********************************************************************************************************************
38 string CorrAxesCommand::getHelpString(){        
39         try {
40                 string helpString = "";
41                 helpString += "The corr.axes command reads a shared, relabund or metadata file as well as an axes file and calculates the correlation coefficient.\n";
42                 helpString += "The corr.axes command parameters are shared, relabund, axes, metadata, groups, method, numaxes and label.  The shared, relabund or metadata and axes parameters are required.  If shared is given the relative abundance is calculated.\n";
43                 helpString += "The groups parameter allows you to specify which of the groups you would like included. The group names are separated by dashes.\n";
44                 helpString += "The label parameter allows you to select what distance level you would like used, if none is given the first distance is used.\n";
45                 helpString += "The method parameter allows you to select what method you would like to use. Options are pearson, spearman and kendall. Default=pearson.\n";
46                 helpString += "The numaxes parameter allows you to select the number of axes you would like to use. Default=3.\n";
47                 helpString += "The corr.axes command should be in the following format: corr.axes(axes=yourPcoaFile, shared=yourSharedFile, method=yourMethod).\n";
48                 helpString += "Example corr.axes(axes=genus.pool.thetayc.genus.lt.pcoa, shared=genus.pool.shared, method=kendall).\n";
49                 helpString += "The corr.axes command outputs a .corr.axes file.\n";
50                 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
51                 return helpString;
52         }
53         catch(exception& e) {
54                 m->errorOut(e, "CorrAxesCommand", "getHelpString");
55                 exit(1);
56         }
57 }
58 //**********************************************************************************************************************
59 CorrAxesCommand::CorrAxesCommand(){     
60         try {
61                 abort = true; calledHelp = true; 
62                 setParameters();
63                 vector<string> tempOutNames;
64                 outputTypes["corr.axes"] = tempOutNames;
65         }
66         catch(exception& e) {
67                 m->errorOut(e, "CorrAxesCommand", "CorrAxesCommand");
68                 exit(1);
69         }
70 }
71 //**********************************************************************************************************************
72 CorrAxesCommand::CorrAxesCommand(string option)  {
73         try {
74                 abort = false; calledHelp = false;   
75                 
76                 //allow user to run help
77                 if(option == "help") { help(); abort = true; calledHelp = true; }
78                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
79                 
80                 else {
81                         vector<string> myArray = setParameters();
82                         
83                         OptionParser parser(option);
84                         map<string, string> parameters = parser.getParameters();
85                         
86                         ValidParameters validParameter;
87                         map<string, string>::iterator it;
88                         
89                         //check to make sure all parameters are valid for command
90                         for (it = parameters.begin(); it != parameters.end(); it++) { 
91                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
92                         }
93                         
94                         vector<string> tempOutNames;
95                         outputTypes["corr.axes"] = tempOutNames;
96                         
97                         //if the user changes the input directory command factory will send this info to us in the output parameter 
98                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
99                         if (inputDir == "not found"){   inputDir = "";          }
100                         else {
101                                 string path;
102                                 it = parameters.find("axes");
103                                 //user has given a template file
104                                 if(it != parameters.end()){ 
105                                         path = m->hasPath(it->second);
106                                         //if the user has not given a path then, add inputdir. else leave path alone.
107                                         if (path == "") {       parameters["axes"] = inputDir + it->second;             }
108                                 }
109                                 
110                                 it = parameters.find("shared");
111                                 //user has given a template file
112                                 if(it != parameters.end()){ 
113                                         path = m->hasPath(it->second);
114                                         //if the user has not given a path then, add inputdir. else leave path alone.
115                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
116                                 }
117                                 
118                                 it = parameters.find("relabund");
119                                 //user has given a template file
120                                 if(it != parameters.end()){ 
121                                         path = m->hasPath(it->second);
122                                         //if the user has not given a path then, add inputdir. else leave path alone.
123                                         if (path == "") {       parameters["relabund"] = inputDir + it->second;         }
124                                 }
125                                 
126                                 it = parameters.find("metadata");
127                                 //user has given a template file
128                                 if(it != parameters.end()){ 
129                                         path = m->hasPath(it->second);
130                                         //if the user has not given a path then, add inputdir. else leave path alone.
131                                         if (path == "") {       parameters["metadata"] = inputDir + it->second;         }
132                                 }
133                         }
134                         
135                         
136                         //check for required parameters
137                         axesfile = validParameter.validFile(parameters, "axes", true);
138                         if (axesfile == "not open") { abort = true; }
139                         else if (axesfile == "not found") { axesfile = ""; m->mothurOut("axes is a required parameter for the corr.axes command."); m->mothurOutEndLine(); abort = true;  }     
140                         
141                         sharedfile = validParameter.validFile(parameters, "shared", true);
142                         if (sharedfile == "not open") { abort = true; }
143                         else if (sharedfile == "not found") { sharedfile = ""; }
144                         else { inputFileName = sharedfile; m->setSharedFile(sharedfile); }
145                         
146                         relabundfile = validParameter.validFile(parameters, "relabund", true);
147                         if (relabundfile == "not open") { abort = true; }
148                         else if (relabundfile == "not found") { relabundfile = ""; }
149                         else { inputFileName = relabundfile; m->setRelAbundFile(relabundfile); }
150                         
151                         metadatafile = validParameter.validFile(parameters, "metadata", true);
152                         if (metadatafile == "not open") { abort = true; }
153                         else if (metadatafile == "not found") { metadatafile = ""; }
154                         else { inputFileName = metadatafile; }
155                         
156                         groups = validParameter.validFile(parameters, "groups", false);                 
157                         if (groups == "not found") { groups = "";  pickedGroups = false;  }
158                         else { 
159                                 pickedGroups = true;
160                                 m->splitAtDash(groups, Groups); 
161                         }                       
162                         m->setGroups(Groups);
163                         
164                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(inputFileName);  }
165                         
166                         label = validParameter.validFile(parameters, "label", false);                   
167                         if (label == "not found") { label = ""; m->mothurOut("You did not provide a label, I will use the first label in your inputfile."); m->mothurOutEndLine(); label=""; }  
168                         
169                         if ((relabundfile == "") && (sharedfile == "") && (metadatafile == "")) { 
170                                 //is there are current file available for any of these?
171                                 //give priority to shared, then relabund
172                                 //if there is a current shared file, use it
173                                 sharedfile = m->getSharedFile(); 
174                                 if (sharedfile != "") { inputFileName = sharedfile; m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
175                                 else { 
176                                         relabundfile = m->getRelAbundFile(); 
177                                         if (relabundfile != "") { inputFileName = relabundfile;  m->mothurOut("Using " + relabundfile + " as input file for the relabund parameter."); m->mothurOutEndLine(); }
178                                         else { 
179                                                 m->mothurOut("You must provide either a shared, relabund, or metadata file."); m->mothurOutEndLine(); abort = true; 
180                                         }
181                                 }
182                         }       
183                         
184                         if (metadatafile != "") {
185                                 if ((relabundfile != "") || (sharedfile != "")) { m->mothurOut("You may only use one of the following : shared, relabund or metadata file."); m->mothurOutEndLine(); abort = true;  }
186                         }else {
187                                 if ((relabundfile != "") && (sharedfile != "")) { m->mothurOut("You may only use one of the following : shared, relabund or metadata file."); m->mothurOutEndLine(); abort = true;  }
188                         }
189                         string temp;
190                         temp = validParameter.validFile(parameters, "numaxes", false);          if (temp == "not found"){       temp = "3";                             }
191                         m->mothurConvert(temp, numaxes); 
192                         
193                         method = validParameter.validFile(parameters, "method", false);         if (method == "not found"){     method = "pearson";             }
194                         
195                         if ((method != "pearson") && (method != "spearman") && (method != "kendall")) { m->mothurOut(method + " is not a valid method. Valid methods are pearson, spearman, and kendall."); m->mothurOutEndLine(); abort = true; }
196                         
197                 }
198         }
199         catch(exception& e) {
200                 m->errorOut(e, "CorrAxesCommand", "CorrAxesCommand");           
201                 exit(1);
202         }
203 }
204 //**********************************************************************************************************************
205
206 int CorrAxesCommand::execute(){
207         try {
208                 
209                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
210                 
211                 /*************************************************************************************/
212                 // use smart distancing to get right sharedRabund and convert to relabund if needed  //
213                 /************************************************************************************/
214                 if (sharedfile != "") {  
215                         InputData* input = new InputData(sharedfile, "sharedfile");
216                         getSharedFloat(input); 
217                         delete input;
218                         
219                         if (m->control_pressed) {  for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } return 0; }
220                         if (lookupFloat[0] == NULL) { m->mothurOut("[ERROR] reading relabund file."); m->mothurOutEndLine(); return 0; }
221                         
222                 }else if (relabundfile != "") { 
223                         InputData* input = new InputData(relabundfile, "relabund");
224                         getSharedFloat(input); 
225                         delete input;
226                         
227                         if (m->control_pressed) {  for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } return 0; }
228                         if (lookupFloat[0] == NULL) { m->mothurOut("[ERROR] reading relabund file."); m->mothurOutEndLine(); return 0; }
229                         
230                 }else if (metadatafile != "") { 
231                         getMetadata();  //reads metadata file and store in lookupFloat, saves column headings in metadataLabels for later
232                         if (m->control_pressed) {  for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } return 0; }
233                         if (lookupFloat[0] == NULL) { m->mothurOut("[ERROR] reading metadata file."); m->mothurOutEndLine(); return 0; }
234                         
235                         if (pickedGroups) { eliminateZeroOTUS(lookupFloat); }
236                         
237                 }else { m->mothurOut("[ERROR]: no file given."); m->mothurOutEndLine(); return 0; }
238                 
239                 if (m->control_pressed) {  for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } return 0; }
240                 
241                 //this is for a sanity check to make sure the axes file and shared file match
242                 for (int i = 0; i < lookupFloat.size(); i++) { names.insert(lookupFloat[i]->getGroup()); }
243                 
244                 /*************************************************************************************/
245                 // read axes file and check for file mismatches with shared or relabund file         //
246                 /************************************************************************************/
247                 
248                 //read axes file
249                 map<string, vector<float> > axes = readAxes();
250                 
251                 if (m->control_pressed) {  for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } return 0; }
252                 
253                 //sanity check, the read only adds groups that are in the shared or relabund file, but we want to make sure the axes file isn't missing anyone
254                 if (axes.size() != lookupFloat.size()) { 
255                         map<string, vector<float> >::iterator it;
256                         for (int i = 0; i < lookupFloat.size(); i++) {
257                                 it = axes.find(lookupFloat[i]->getGroup());
258                                 if (it == axes.end()) { m->mothurOut(lookupFloat[i]->getGroup() + " is in your shared of relabund file but not in your axes file, please correct."); m->mothurOutEndLine(); }
259                         }
260                         m->control_pressed = true;
261                 }
262                 
263                 if (m->control_pressed) {  for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } return 0; }
264                 
265                 /*************************************************************************************/
266                 // calc the r values                                                                                                                            //
267                 /************************************************************************************/
268                 
269                 string outputFileName = outputDir + m->getRootName(m->getSimpleName(inputFileName)) + method + ".corr.axes";
270                 outputNames.push_back(outputFileName); outputTypes["corr.axes"].push_back(outputFileName);      
271                 ofstream out;
272                 m->openOutputFile(outputFileName, out);
273                 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
274                 
275                 //output headings
276                 if (metadatafile == "") {  out << "OTU";        }
277                 else {  out << "Feature";                                               }
278
279                 for (int i = 0; i < numaxes; i++) { out << '\t' << "axis" << (i+1) << "\tp-value"; }
280                 out << "\tlength" << endl;
281                 
282                 if (method == "pearson")                {  calcPearson(axes, out);      }
283                 else if (method == "spearman")  {  calcSpearman(axes, out); }
284                 else if (method == "kendall")   {  calcKendall(axes, out);      }
285                 else { m->mothurOut("[ERROR]: Invalid method."); m->mothurOutEndLine(); }
286                 
287                 out.close();
288                 for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  }
289                 
290                 if (m->control_pressed) {  return 0; }
291
292                 m->mothurOutEndLine();
293                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
294                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
295                 m->mothurOutEndLine();
296                 
297                 return 0;
298         }
299         catch(exception& e) {
300                 m->errorOut(e, "CorrAxesCommand", "execute");   
301                 exit(1);
302         }
303 }
304 //**********************************************************************************************************************
305 int CorrAxesCommand::calcPearson(map<string, vector<float> >& axes, ofstream& out) {
306    try {
307            
308        LinearAlgebra linear;
309        
310            //find average of each axis - X
311            vector<float> averageAxes; averageAxes.resize(numaxes, 0.0);
312            for (map<string, vector<float> >::iterator it = axes.begin(); it != axes.end(); it++) {
313                    vector<float> temp = it->second;
314                    for (int i = 0; i < temp.size(); i++) {
315                            averageAxes[i] += temp[i];  
316                    }
317            }
318            
319            for (int i = 0; i < averageAxes.size(); i++) {  averageAxes[i] = averageAxes[i] / (float) axes.size(); }
320            
321            //for each otu
322            for (int i = 0; i < lookupFloat[0]->getNumBins(); i++) {
323                    
324                    if (metadatafile == "") {  out << m->currentBinLabels[i];    }
325                    else {  out << metadataLabels[i];            }
326                                    
327                    //find the averages this otu - Y
328                    float sumOtu = 0.0;
329                    for (int j = 0; j < lookupFloat.size(); j++) {
330                            sumOtu += lookupFloat[j]->getAbundance(i);
331                    }
332                    float Ybar = sumOtu / (float) lookupFloat.size();
333                    
334                    vector<float> rValues(averageAxes.size());
335
336                    //find r value for each axis
337                    for (int k = 0; k < averageAxes.size(); k++) {
338                            
339                            double r = 0.0;
340                            double numerator = 0.0;
341                            double denomTerm1 = 0.0;
342                            double denomTerm2 = 0.0;
343                            for (int j = 0; j < lookupFloat.size(); j++) {
344                                    float Yi = lookupFloat[j]->getAbundance(i);
345                                    float Xi = axes[lookupFloat[j]->getGroup()][k];
346                                    
347                                    numerator += ((Xi - averageAxes[k]) * (Yi - Ybar));
348                                    denomTerm1 += ((Xi - averageAxes[k]) * (Xi - averageAxes[k]));
349                                    denomTerm2 += ((Yi - Ybar) * (Yi - Ybar));
350                            }
351                            
352                            double denom = (sqrt(denomTerm1) * sqrt(denomTerm2));
353                            
354                            r = numerator / denom;
355                
356                if (isnan(r) || isinf(r)) { r = 0.0; }
357                
358                            rValues[k] = r;
359                            out << '\t' << r; 
360                
361                double sig = linear.calcPearsonSig(lookupFloat.size(), r);
362                
363                out << '\t' << sig;
364                    }
365                    
366                    double sum = 0;
367                    for(int k=0;k<rValues.size();k++){
368                            sum += rValues[k] * rValues[k];
369                    }
370                    out << '\t' << sqrt(sum) << endl;
371            }
372                    
373            return 0;
374    }
375    catch(exception& e) {
376            m->errorOut(e, "CorrAxesCommand", "calcPearson");
377            exit(1);
378    }
379 }
380 //**********************************************************************************************************************
381 int CorrAxesCommand::calcSpearman(map<string, vector<float> >& axes, ofstream& out) {
382         try {
383                 
384         LinearAlgebra linear;
385         vector<double> sf; 
386         
387                 //format data
388                 vector< map<float, int> > tableX; tableX.resize(numaxes);
389                 map<float, int>::iterator itTable;
390                 vector< vector<spearmanRank> > scores; scores.resize(numaxes);
391                 for (map<string, vector<float> >::iterator it = axes.begin(); it != axes.end(); it++) {
392                         vector<float> temp = it->second;
393                         for (int i = 0; i < temp.size(); i++) {
394                                 spearmanRank member(it->first, temp[i]);
395                                 scores[i].push_back(member);  
396                                 
397                                 //count number of repeats
398                                 itTable = tableX[i].find(temp[i]);
399                                 if (itTable == tableX[i].end()) { 
400                                         tableX[i][temp[i]] = 1;
401                                 }else {
402                                         tableX[i][temp[i]]++;
403                                 }
404                         }
405                 }
406                 
407                 //calc LX
408                 //for each axis
409                 vector<double> Lx; Lx.resize(numaxes, 0.0);
410                 for (int i = 0; i < numaxes; i++) {
411                         for (itTable = tableX[i].begin(); itTable != tableX[i].end(); itTable++) {
412                                 double tx = (double) itTable->second;
413                                 Lx[i] += ((pow(tx, 3.0) - tx) / 12.0);
414                         }
415                 }
416                 
417                 //sort each axis
418                 for (int i = 0; i < numaxes; i++) {  sort(scores[i].begin(), scores[i].end(), compareSpearman); }
419                 
420                 //find ranks of xi in each axis
421                 map<string, vector<float> > rankAxes;
422                 for (int i = 0; i < numaxes; i++) {
423                         
424                         vector<spearmanRank> ties;
425                         int rankTotal = 0;
426             double sfTemp = 0.0;
427                         for (int j = 0; j < scores[i].size(); j++) {
428                                 rankTotal += (j+1);
429                                 ties.push_back(scores[i][j]);
430                                 
431                                 if (j != (scores[i].size()-1)) { // you are not the last so you can look ahead
432                                         if (scores[i][j].score != scores[i][j+1].score) { // you are done with ties, rank them and continue
433
434                                                 for (int k = 0; k < ties.size(); k++) {
435                                                         float thisrank = rankTotal / (float) ties.size();
436                                                         rankAxes[ties[k].name].push_back(thisrank);
437                                                 }
438                         int t = ties.size();
439                         sfTemp += (t*t*t-t);
440                                                 ties.clear();
441                                                 rankTotal = 0;
442                                         }
443                                 }else { // you are the last one
444                                         
445                                         for (int k = 0; k < ties.size(); k++) {
446                                                 float thisrank = rankTotal / (float) ties.size();
447                                                 rankAxes[ties[k].name].push_back(thisrank);
448                                                 
449                                         }
450                                 }
451                         }
452             sf.push_back(sfTemp);
453                 }
454                 
455                                 
456                 //for each otu
457                 for (int i = 0; i < lookupFloat[0]->getNumBins(); i++) {
458                         
459                         if (metadatafile == "") {  out << m->currentBinLabels[i];       }
460                         else {  out << metadataLabels[i];               }
461                         
462                         //find the ranks of this otu - Y
463                         vector<spearmanRank> otuScores;
464                         map<float, int> tableY;
465                         for (int j = 0; j < lookupFloat.size(); j++) {
466                                 spearmanRank member(lookupFloat[j]->getGroup(), lookupFloat[j]->getAbundance(i));
467                                 otuScores.push_back(member);
468                                 
469                                 itTable = tableY.find(member.score);
470                                 if (itTable == tableY.end()) { 
471                                         tableY[member.score] = 1;
472                                 }else {
473                                         tableY[member.score]++;
474                                 }
475                                 
476                         }
477                         
478                         //calc Ly
479                         double Ly = 0.0;
480                         for (itTable = tableY.begin(); itTable != tableY.end(); itTable++) {
481                                 double ty = (double) itTable->second;
482                                 Ly += ((pow(ty, 3.0) - ty) / 12.0);
483                         }
484                         
485                         sort(otuScores.begin(), otuScores.end(), compareSpearman);
486                         
487             double sg = 0.0;
488                         map<string, float> rankOtus;
489                         vector<spearmanRank> ties;
490                         int rankTotal = 0;
491                         for (int j = 0; j < otuScores.size(); j++) {
492                                 rankTotal += (j+1);
493                                 ties.push_back(otuScores[j]);
494                                 
495                                 if (j != (otuScores.size()-1)) { // you are not the last so you can look ahead
496                                         if (otuScores[j].score != otuScores[j+1].score) { // you are done with ties, rank them and continue
497                                                 
498                                                 for (int k = 0; k < ties.size(); k++) {
499                                                         float thisrank = rankTotal / (float) ties.size();
500                                                         rankOtus[ties[k].name] = thisrank;
501                                                 }
502                         int t = ties.size();
503                         sg += (t*t*t-t);
504                                                 ties.clear();
505                                                 rankTotal = 0;
506                                         }
507                                 }else { // you are the last one
508                                         
509                                         for (int k = 0; k < ties.size(); k++) {
510                                                 float thisrank = rankTotal / (float) ties.size();
511                                                 rankOtus[ties[k].name] = thisrank;
512                                         }
513                                 }
514                         }
515                         vector<double> pValues(numaxes);        
516
517                         //calc spearman ranks for each axis for this otu
518                         for (int j = 0; j < numaxes; j++) {
519                                 
520                                 double di = 0.0;
521                                 for (int k = 0; k < lookupFloat.size(); k++) {
522                                         
523                                         float xi = rankAxes[lookupFloat[k]->getGroup()][j];
524                                         float yi = rankOtus[lookupFloat[k]->getGroup()];
525                                         
526                                         di += ((xi - yi) * (xi - yi));
527                                 }
528                                 
529                                 double p = 0.0;
530                                 
531                                 double n = (double) lookupFloat.size();
532                                 
533                                 double SX2 = ((pow(n, 3.0) - n) / 12.0) - Lx[j];
534                                 double SY2 = ((pow(n, 3.0) - n) / 12.0) - Ly;
535                                 
536                                 p = (SX2 + SY2 - di) / (2.0 * sqrt((SX2*SY2)));
537                                 
538                 if (isnan(p) || isinf(p)) { p = 0.0; }
539                 
540                                 out  << '\t' << p;
541                                 
542                                 pValues[j] = p;
543                 
544                 double sig = linear.calcSpearmanSig(n, sf[j], sg, di);            
545                 out  << '\t' << sig;
546                 
547                         }
548
549                         double sum = 0;
550                         for(int k=0;k<numaxes;k++){
551                                 sum += pValues[k] * pValues[k];
552                         }
553                         out << '\t' << sqrt(sum) << endl;
554                 }
555                 
556                 return 0;
557         }
558         catch(exception& e) {
559                 m->errorOut(e, "CorrAxesCommand", "calcSpearman");
560                 exit(1);
561         }
562 }
563 //**********************************************************************************************************************
564 int CorrAxesCommand::calcKendall(map<string, vector<float> >& axes, ofstream& out) {
565         try {
566                 
567         LinearAlgebra linear;
568         
569                 //format data
570                 vector< vector<spearmanRank> > scores; scores.resize(numaxes);
571                 for (map<string, vector<float> >::iterator it = axes.begin(); it != axes.end(); it++) {
572                         vector<float> temp = it->second;
573                         for (int i = 0; i < temp.size(); i++) {
574                                 spearmanRank member(it->first, temp[i]);
575                                 scores[i].push_back(member);  
576                         }
577                 }
578                 
579                 //sort each axis
580                 for (int i = 0; i < numaxes; i++) {  sort(scores[i].begin(), scores[i].end(), compareSpearman); }
581                 
582                 //convert scores to ranks of xi in each axis
583                 for (int i = 0; i < numaxes; i++) {
584                         
585                         vector<spearmanRank*> ties;
586                         int rankTotal = 0;
587                         for (int j = 0; j < scores[i].size(); j++) {
588                                 rankTotal += (j+1);
589                                 ties.push_back(&(scores[i][j]));
590                                 
591                                 if (j != scores[i].size()-1) { // you are not the last so you can look ahead
592                                         if (scores[i][j].score != scores[i][j+1].score) { // you are done with ties, rank them and continue
593                                                 for (int k = 0; k < ties.size(); k++) {
594                                                         float thisrank = rankTotal / (float) ties.size();
595                                                         (*ties[k]).score = thisrank;
596                                                 }
597                                                 ties.clear();
598                                                 rankTotal = 0;
599                                         }
600                                 }else { // you are the last one
601                                         for (int k = 0; k < ties.size(); k++) {
602                                                 float thisrank = rankTotal / (float) ties.size();
603                                                 (*ties[k]).score = thisrank;
604                                         }
605                                 }
606                         }
607                 }
608                 
609                 //for each otu
610                 for (int i = 0; i < lookupFloat[0]->getNumBins(); i++) {
611                 
612                         if (metadatafile == "") {  out << m->currentBinLabels[i];       }
613                         else {  out << metadataLabels[i];               }
614                         
615                         //find the ranks of this otu - Y
616                         vector<spearmanRank> otuScores;
617                         for (int j = 0; j < lookupFloat.size(); j++) {
618                                 spearmanRank member(lookupFloat[j]->getGroup(), lookupFloat[j]->getAbundance(i));
619                                 otuScores.push_back(member);
620                         }
621                                                 
622                         sort(otuScores.begin(), otuScores.end(), compareSpearman);
623                         
624                         map<string, float> rankOtus;
625                         vector<spearmanRank> ties;
626                         int rankTotal = 0;
627                         for (int j = 0; j < otuScores.size(); j++) {
628                                 rankTotal += (j+1);
629                                 ties.push_back(otuScores[j]);
630                                 
631                                 if (j != otuScores.size()-1) { // you are not the last so you can look ahead
632                                         if (otuScores[j].score != otuScores[j+1].score) { // you are done with ties, rank them and continue
633                                                 for (int k = 0; k < ties.size(); k++) {
634                                                         float thisrank = rankTotal / (float) ties.size();
635                                                         rankOtus[ties[k].name] = thisrank;
636                                                 }
637                                                 ties.clear();
638                                                 rankTotal = 0;
639                                         }
640                                 }else { // you are the last one
641                                         for (int k = 0; k < ties.size(); k++) {
642                                                 float thisrank = rankTotal / (float) ties.size();
643                                                 rankOtus[ties[k].name] = thisrank;
644                                         }
645                                 }
646                         }
647                         
648                         
649                         vector<double> pValues(numaxes);
650                         
651                         //calc spearman ranks for each axis for this otu
652                         for (int j = 0; j < numaxes; j++) {
653                         
654                                 int numCoor = 0;
655                                 int numDisCoor = 0;
656                                 
657                                 vector<spearmanRank> otus; 
658                                 vector<spearmanRank> otusTemp;
659                                 for (int l = 0; l < scores[j].size(); l++) {   
660                                         spearmanRank member(scores[j][l].name, rankOtus[scores[j][l].name]);
661                                         otus.push_back(member);
662                                 }
663                                 
664                                 int count = 0;
665                                 for (int l = 0; l < scores[j].size(); l++) {
666                                         
667                                         int numWithHigherRank = 0;
668                                         int numWithLowerRank = 0;
669                                         float thisrank = otus[l].score;
670                                         
671                                         for (int u = l+1; u < scores[j].size(); u++) {
672                                                 if (otus[u].score > thisrank) { numWithHigherRank++; }
673                                                 else if (otus[u].score < thisrank) { numWithLowerRank++; }
674                                                 count++;
675                                         }
676                                         
677                                         numCoor += numWithHigherRank;
678                                         numDisCoor += numWithLowerRank;
679                                 }
680                                 
681                                 double p = (numCoor - numDisCoor) / (float) count;
682                  if (isnan(p) || isinf(p)) { p = 0.0; }
683                 
684                                 out << '\t' << p;
685                                 pValues[j] = p;
686                 
687                 double sig = linear.calcKendallSig(scores[j].size(), p);
688                 
689                 out << '\t' << sig;
690                         }
691                         
692                         double sum = 0;
693                         for(int k=0;k<numaxes;k++){
694                                 sum += pValues[k] * pValues[k];
695                         }
696                         out << '\t' << sqrt(sum) << endl;
697                 }
698                 
699                 return 0;
700         }
701         catch(exception& e) {
702                 m->errorOut(e, "CorrAxesCommand", "calcKendall");
703                 exit(1);
704         }
705 }
706 //**********************************************************************************************************************
707 int CorrAxesCommand::getSharedFloat(InputData* input){
708         try {
709                 lookupFloat = input->getSharedRAbundFloatVectors();
710                 string lastLabel = lookupFloat[0]->getLabel();
711                 
712                 if (label == "") { label = lastLabel;  return 0; }
713                 
714                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
715                 set<string> labels; labels.insert(label);
716                 set<string> processedLabels;
717                 set<string> userLabels = labels;
718                 
719                 //as long as you are not at the end of the file or done wih the lines you want
720                 while((lookupFloat[0] != NULL) && (userLabels.size() != 0)) {
721                         
722                         if (m->control_pressed) {  return 0;  }
723                         
724                         if(labels.count(lookupFloat[0]->getLabel()) == 1){
725                                 processedLabels.insert(lookupFloat[0]->getLabel());
726                                 userLabels.erase(lookupFloat[0]->getLabel());
727                                 break;
728                         }
729                         
730                         if ((m->anyLabelsToProcess(lookupFloat[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
731                                 string saveLabel = lookupFloat[0]->getLabel();
732                                 
733                                 for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } 
734                                 lookupFloat = input->getSharedRAbundFloatVectors(lastLabel);
735                                 
736                                 processedLabels.insert(lookupFloat[0]->getLabel());
737                                 userLabels.erase(lookupFloat[0]->getLabel());
738                                 
739                                 //restore real lastlabel to save below
740                                 lookupFloat[0]->setLabel(saveLabel);
741                                 break;
742                         }
743                         
744                         lastLabel = lookupFloat[0]->getLabel();                 
745                         
746                         //get next line to process
747                         //prevent memory leak
748                         for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } 
749                         lookupFloat = input->getSharedRAbundFloatVectors();
750                 }
751                 
752                 
753                 if (m->control_pressed) { return 0;  }
754                 
755                 //output error messages about any remaining user labels
756                 set<string>::iterator it;
757                 bool needToRun = false;
758                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
759                         m->mothurOut("Your file does not include the label " + *it); 
760                         if (processedLabels.count(lastLabel) != 1) {
761                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
762                                 needToRun = true;
763                         }else {
764                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
765                         }
766                 }
767                 
768                 //run last label if you need to
769                 if (needToRun == true)  {
770                         for (int i = 0; i < lookupFloat.size(); i++) {  if (lookupFloat[i] != NULL) {   delete lookupFloat[i];  } } 
771                         lookupFloat = input->getSharedRAbundFloatVectors(lastLabel);
772                 }       
773                 
774                 return 0;
775         }
776         catch(exception& e) {
777                 m->errorOut(e, "CorrAxesCommand", "getSharedFloat");    
778                 exit(1);
779         }
780 }
781 //**********************************************************************************************************************
782 int CorrAxesCommand::eliminateZeroOTUS(vector<SharedRAbundFloatVector*>& thislookup) {
783         try {
784                 
785                 vector<SharedRAbundFloatVector*> newLookup;
786                 for (int i = 0; i < thislookup.size(); i++) {
787                         SharedRAbundFloatVector* temp = new SharedRAbundFloatVector();
788                         temp->setLabel(thislookup[i]->getLabel());
789                         temp->setGroup(thislookup[i]->getGroup());
790                         newLookup.push_back(temp);
791                 }
792                 
793                 //for each bin
794                 vector<string> newBinLabels;
795                 string snumBins = toString(thislookup[0]->getNumBins());
796                 for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
797                         if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
798                         
799                         //look at each sharedRabund and make sure they are not all zero
800                         bool allZero = true;
801                         for (int j = 0; j < thislookup.size(); j++) {
802                                 if (thislookup[j]->getAbundance(i) != 0) { allZero = false;  break;  }
803                         }
804                         
805                         //if they are not all zero add this bin
806                         if (!allZero) {
807                                 for (int j = 0; j < thislookup.size(); j++) {
808                                         newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
809                                 }
810                                 
811                                 //if there is a bin label use it otherwise make one
812                                 string binLabel = "Otu";
813                                 string sbinNumber = toString(i+1);
814                                 if (sbinNumber.length() < snumBins.length()) { 
815                                         int diff = snumBins.length() - sbinNumber.length();
816                                         for (int h = 0; h < diff; h++) { binLabel += "0"; }
817                                 }
818                                 binLabel += sbinNumber; 
819                                 if (i < m->currentBinLabels.size()) {  binLabel = m->currentBinLabels[i]; }
820                                 
821                                 newBinLabels.push_back(binLabel);
822                         }
823                 }
824                 
825                 for (int j = 0; j < thislookup.size(); j++) {  delete thislookup[j];  }
826                 
827                 thislookup = newLookup;
828                 m->currentBinLabels = newBinLabels;
829                 
830                 return 0;
831                 
832         }
833         catch(exception& e) {
834                 m->errorOut(e, "CorrAxesCommand", "eliminateZeroOTUS");
835                 exit(1);
836         }
837 }
838 /*****************************************************************/
839 map<string, vector<float> > CorrAxesCommand::readAxes(){
840         try {
841                 map<string, vector<float> > axes;
842                 
843                 ifstream in;
844                 m->openInputFile(axesfile, in);
845                 
846                 string headerLine = m->getline(in); m->gobble(in);
847                 
848                 //count the number of axis you are reading
849                 bool done = false;
850                 int count = 0;
851                 while (!done) {
852                         int pos = headerLine.find("axis");
853                         if (pos != string::npos) {
854                                 count++;
855                                 headerLine = headerLine.substr(pos+4);
856                         }else { done = true; }
857                 }
858                 
859                 if (numaxes > count) { m->mothurOut("You requested " + toString(numaxes) + " axes, but your file only includes " + toString(count) + ". Using " + toString(count) + "."); m->mothurOutEndLine(); numaxes = count; }
860                 
861                 while (!in.eof()) {
862                         
863                         if (m->control_pressed) { in.close(); return axes; }
864                         
865                         string group = "";
866                         in >> group; m->gobble(in);
867                         
868                         vector<float> thisGroupsAxes;
869                         for (int i = 0; i < count; i++) {
870                                 float temp = 0.0;
871                                 in >> temp; 
872                                 
873                                 //only save the axis we want
874                                 if (i < numaxes) {  thisGroupsAxes.push_back(temp); }
875                         }
876                         
877                         //save group if its one the user selected
878                         if (names.count(group) != 0) {
879                                 map<string, vector<float> >::iterator it = axes.find(group);
880                                 
881                                 if (it == axes.end()) {
882                                         axes[group] = thisGroupsAxes;
883                                 }else {
884                                         m->mothurOut(group + " is already in your axes file, using first definition."); m->mothurOutEndLine();
885                                 }
886                         }
887                         
888                         m->gobble(in);
889                 }
890                 in.close();
891                 
892                 return axes;
893         }
894         catch(exception& e) {
895                 m->errorOut(e, "CorrAxesCommand", "readAxes");  
896                 exit(1);
897         }
898 }
899 /*****************************************************************/
900 int CorrAxesCommand::getMetadata(){
901         try {
902                 vector<string> groupNames;
903                 
904                 ifstream in;
905                 m->openInputFile(metadatafile, in);
906                 
907                 string headerLine = m->getline(in); m->gobble(in);
908                 istringstream iss (headerLine,istringstream::in);
909                 
910                 //read the first label, because it refers to the groups
911                 string columnLabel;
912                 iss >> columnLabel; m->gobble(iss); 
913                 
914                 //save names of columns you are reading
915                 while (!iss.eof()) {
916                         iss >> columnLabel; m->gobble(iss);
917                         metadataLabels.push_back(columnLabel);
918                 }
919                 int count = metadataLabels.size();
920                         
921                 //read rest of file
922                 while (!in.eof()) {
923                         
924                         if (m->control_pressed) { in.close(); return 0; }
925                         
926                         string group = "";
927                         in >> group; m->gobble(in);
928                         groupNames.push_back(group);
929                                 
930                         SharedRAbundFloatVector* tempLookup = new SharedRAbundFloatVector();
931                         tempLookup->setGroup(group);
932                         tempLookup->setLabel("1");
933                         
934                         for (int i = 0; i < count; i++) {
935                                 float temp = 0.0;
936                                 in >> temp; 
937                                 tempLookup->push_back(temp, group);
938                         }
939                         
940                         lookupFloat.push_back(tempLookup);
941                         
942                         m->gobble(in);
943                 }
944                 in.close();
945                 
946                 //remove any groups the user does not want, and set globaldata->groups with only valid groups
947                 SharedUtil* util;
948                 util = new SharedUtil();
949                 Groups = m->getGroups();
950                 util->setGroups(Groups, groupNames);
951                 m->setGroups(Groups);
952                 
953                 for (int i = 0; i < lookupFloat.size(); i++) {
954                         //if this sharedrabund is not from a group the user wants then delete it.
955                         if (util->isValidGroup(lookupFloat[i]->getGroup(), m->getGroups()) == false) { 
956                                 delete lookupFloat[i]; lookupFloat[i] = NULL;
957                                 lookupFloat.erase(lookupFloat.begin()+i); 
958                                 i--; 
959                         }
960                 }
961                 
962                 delete util;
963                 
964                 return 0;
965         }
966         catch(exception& e) {
967                 m->errorOut(e, "CorrAxesCommand", "getMetadata");       
968                 exit(1);
969         }
970 }
971 /*****************************************************************/
972
973
974
975
976
977