]> git.donarmstrong.com Git - mothur.git/blob - corraxescommand.cpp
Merge remote-tracking branch 'mothur/master'
[mothur.git] / corraxescommand.cpp
1 /*
2  *  corraxescommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 12/22/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "corraxescommand.h"
11 #include "sharedutilities.h"
12 #include "linearalgebra.h"
13
14 //**********************************************************************************************************************
15 vector<string> CorrAxesCommand::setParameters(){        
16         try {
17                 CommandParameter paxes("axes", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(paxes);
18                 CommandParameter pshared("shared", "InputTypes", "", "", "SharedRelMeta", "SharedRelMeta", "none",false,false); parameters.push_back(pshared);
19                 CommandParameter prelabund("relabund", "InputTypes", "", "", "SharedRelMeta", "SharedRelMeta", "none",false,false); parameters.push_back(prelabund);
20                 CommandParameter pmetadata("metadata", "InputTypes", "", "", "SharedRelMeta", "SharedRelMeta", "none",false,false); parameters.push_back(pmetadata);
21                 CommandParameter pnumaxes("numaxes", "Number", "", "3", "", "", "",false,false); parameters.push_back(pnumaxes);
22                 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
23                 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
24                 CommandParameter pmethod("method", "Multiple", "pearson-spearman-kendall", "pearson", "", "", "",false,false); parameters.push_back(pmethod);
25                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
26                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
27                 
28                 vector<string> myArray;
29                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
30                 return myArray;
31         }
32         catch(exception& e) {
33                 m->errorOut(e, "CorrAxesCommand", "setParameters");
34                 exit(1);
35         }
36 }
37 //**********************************************************************************************************************
38 string CorrAxesCommand::getHelpString(){        
39         try {
40                 string helpString = "";
41                 helpString += "The corr.axes command reads a shared, relabund or metadata file as well as an axes file and calculates the correlation coefficient.\n";
42                 helpString += "The corr.axes command parameters are shared, relabund, axes, metadata, groups, method, numaxes and label.  The shared, relabund or metadata and axes parameters are required.  If shared is given the relative abundance is calculated.\n";
43                 helpString += "The groups parameter allows you to specify which of the groups you would like included. The group names are separated by dashes.\n";
44                 helpString += "The label parameter allows you to select what distance level you would like used, if none is given the first distance is used.\n";
45                 helpString += "The method parameter allows you to select what method you would like to use. Options are pearson, spearman and kendall. Default=pearson.\n";
46                 helpString += "The numaxes parameter allows you to select the number of axes you would like to use. Default=3.\n";
47                 helpString += "The corr.axes command should be in the following format: corr.axes(axes=yourPcoaFile, shared=yourSharedFile, method=yourMethod).\n";
48                 helpString += "Example corr.axes(axes=genus.pool.thetayc.genus.lt.pcoa, shared=genus.pool.shared, method=kendall).\n";
49                 helpString += "The corr.axes command outputs a .corr.axes file.\n";
50                 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
51                 return helpString;
52         }
53         catch(exception& e) {
54                 m->errorOut(e, "CorrAxesCommand", "getHelpString");
55                 exit(1);
56         }
57 }
58 //**********************************************************************************************************************
59 string CorrAxesCommand::getOutputFileNameTag(string type, string inputName=""){ 
60         try {
61         string outputFileName = "";
62                 map<string, vector<string> >::iterator it;
63         
64         //is this a type this command creates
65         it = outputTypes.find(type);
66         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
67         else {
68             if (type == "corraxes") {  outputFileName =  "corr.axes"; }
69             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
70         }
71         return outputFileName;
72         }
73         catch(exception& e) {
74                 m->errorOut(e, "CorrAxesCommand", "getOutputFileNameTag");
75                 exit(1);
76         }
77 }
78 //**********************************************************************************************************************
79 CorrAxesCommand::CorrAxesCommand(){     
80         try {
81                 abort = true; calledHelp = true; 
82                 setParameters();
83                 vector<string> tempOutNames;
84                 outputTypes["corraxes"] = tempOutNames;
85         }
86         catch(exception& e) {
87                 m->errorOut(e, "CorrAxesCommand", "CorrAxesCommand");
88                 exit(1);
89         }
90 }
91 //**********************************************************************************************************************
92 CorrAxesCommand::CorrAxesCommand(string option)  {
93         try {
94                 abort = false; calledHelp = false;   
95                 
96                 //allow user to run help
97                 if(option == "help") { help(); abort = true; calledHelp = true; }
98                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
99                 
100                 else {
101                         vector<string> myArray = setParameters();
102                         
103                         OptionParser parser(option);
104                         map<string, string> parameters = parser.getParameters();
105                         
106                         ValidParameters validParameter;
107                         map<string, string>::iterator it;
108                         
109                         //check to make sure all parameters are valid for command
110                         for (it = parameters.begin(); it != parameters.end(); it++) { 
111                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
112                         }
113                         
114                         vector<string> tempOutNames;
115                         outputTypes["corraxes"] = tempOutNames;
116                         
117                         //if the user changes the input directory command factory will send this info to us in the output parameter 
118                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
119                         if (inputDir == "not found"){   inputDir = "";          }
120                         else {
121                                 string path;
122                                 it = parameters.find("axes");
123                                 //user has given a template file
124                                 if(it != parameters.end()){ 
125                                         path = m->hasPath(it->second);
126                                         //if the user has not given a path then, add inputdir. else leave path alone.
127                                         if (path == "") {       parameters["axes"] = inputDir + it->second;             }
128                                 }
129                                 
130                                 it = parameters.find("shared");
131                                 //user has given a template file
132                                 if(it != parameters.end()){ 
133                                         path = m->hasPath(it->second);
134                                         //if the user has not given a path then, add inputdir. else leave path alone.
135                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
136                                 }
137                                 
138                                 it = parameters.find("relabund");
139                                 //user has given a template file
140                                 if(it != parameters.end()){ 
141                                         path = m->hasPath(it->second);
142                                         //if the user has not given a path then, add inputdir. else leave path alone.
143                                         if (path == "") {       parameters["relabund"] = inputDir + it->second;         }
144                                 }
145                                 
146                                 it = parameters.find("metadata");
147                                 //user has given a template file
148                                 if(it != parameters.end()){ 
149                                         path = m->hasPath(it->second);
150                                         //if the user has not given a path then, add inputdir. else leave path alone.
151                                         if (path == "") {       parameters["metadata"] = inputDir + it->second;         }
152                                 }
153                         }
154                         
155                         
156                         //check for required parameters
157                         axesfile = validParameter.validFile(parameters, "axes", true);
158                         if (axesfile == "not open") { abort = true; }
159                         else if (axesfile == "not found") { axesfile = ""; m->mothurOut("axes is a required parameter for the corr.axes command."); m->mothurOutEndLine(); abort = true;  }     
160                         
161                         sharedfile = validParameter.validFile(parameters, "shared", true);
162                         if (sharedfile == "not open") { abort = true; }
163                         else if (sharedfile == "not found") { sharedfile = ""; }
164                         else { inputFileName = sharedfile; m->setSharedFile(sharedfile); }
165                         
166                         relabundfile = validParameter.validFile(parameters, "relabund", true);
167                         if (relabundfile == "not open") { abort = true; }
168                         else if (relabundfile == "not found") { relabundfile = ""; }
169                         else { inputFileName = relabundfile; m->setRelAbundFile(relabundfile); }
170                         
171                         metadatafile = validParameter.validFile(parameters, "metadata", true);
172                         if (metadatafile == "not open") { abort = true; }
173                         else if (metadatafile == "not found") { metadatafile = ""; }
174                         else { inputFileName = metadatafile; }
175                         
176                         groups = validParameter.validFile(parameters, "groups", false);                 
177                         if (groups == "not found") { groups = "";  pickedGroups = false;  }
178                         else { 
179                                 pickedGroups = true;
180                                 m->splitAtDash(groups, Groups); 
181                         }                       
182                         m->setGroups(Groups);
183                         
184                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(inputFileName);  }
185                         
186                         label = validParameter.validFile(parameters, "label", false);                   
187                         if (label == "not found") { label = ""; m->mothurOut("You did not provide a label, I will use the first label in your inputfile."); m->mothurOutEndLine(); label=""; }  
188                         
189                         if ((relabundfile == "") && (sharedfile == "") && (metadatafile == "")) { 
190                                 //is there are current file available for any of these?
191                                 //give priority to shared, then relabund
192                                 //if there is a current shared file, use it
193                                 sharedfile = m->getSharedFile(); 
194                                 if (sharedfile != "") { inputFileName = sharedfile; m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
195                                 else { 
196                                         relabundfile = m->getRelAbundFile(); 
197                                         if (relabundfile != "") { inputFileName = relabundfile;  m->mothurOut("Using " + relabundfile + " as input file for the relabund parameter."); m->mothurOutEndLine(); }
198                                         else { 
199                                                 m->mothurOut("You must provide either a shared, relabund, or metadata file."); m->mothurOutEndLine(); abort = true; 
200                                         }
201                                 }
202                         }       
203                         
204                         if (metadatafile != "") {
205                                 if ((relabundfile != "") || (sharedfile != "")) { m->mothurOut("You may only use one of the following : shared, relabund or metadata file."); m->mothurOutEndLine(); abort = true;  }
206                         }else {
207                                 if ((relabundfile != "") && (sharedfile != "")) { m->mothurOut("You may only use one of the following : shared, relabund or metadata file."); m->mothurOutEndLine(); abort = true;  }
208                         }
209                         string temp;
210                         temp = validParameter.validFile(parameters, "numaxes", false);          if (temp == "not found"){       temp = "3";                             }
211                         m->mothurConvert(temp, numaxes); 
212                         
213                         method = validParameter.validFile(parameters, "method", false);         if (method == "not found"){     method = "pearson";             }
214                         
215                         if ((method != "pearson") && (method != "spearman") && (method != "kendall")) { m->mothurOut(method + " is not a valid method. Valid methods are pearson, spearman, and kendall."); m->mothurOutEndLine(); abort = true; }
216                         
217                 }
218         }
219         catch(exception& e) {
220                 m->errorOut(e, "CorrAxesCommand", "CorrAxesCommand");           
221                 exit(1);
222         }
223 }
224 //**********************************************************************************************************************
225
226 int CorrAxesCommand::execute(){
227         try {
228                 
229                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
230                 
231                 /*************************************************************************************/
232                 // use smart distancing to get right sharedRabund and convert to relabund if needed  //
233                 /************************************************************************************/
234                 if (sharedfile != "") {  
235                         InputData* input = new InputData(sharedfile, "sharedfile");
236                         getSharedFloat(input); 
237                         delete input;
238                         
239                         if (m->control_pressed) {  for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } return 0; }
240                         if (lookupFloat[0] == NULL) { m->mothurOut("[ERROR] reading relabund file."); m->mothurOutEndLine(); return 0; }
241                         
242                 }else if (relabundfile != "") { 
243                         InputData* input = new InputData(relabundfile, "relabund");
244                         getSharedFloat(input); 
245                         delete input;
246                         
247                         if (m->control_pressed) {  for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } return 0; }
248                         if (lookupFloat[0] == NULL) { m->mothurOut("[ERROR] reading relabund file."); m->mothurOutEndLine(); return 0; }
249                         
250                 }else if (metadatafile != "") { 
251                         getMetadata();  //reads metadata file and store in lookupFloat, saves column headings in metadataLabels for later
252                         if (m->control_pressed) {  for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } return 0; }
253                         if (lookupFloat[0] == NULL) { m->mothurOut("[ERROR] reading metadata file."); m->mothurOutEndLine(); return 0; }
254                         
255                         if (pickedGroups) { eliminateZeroOTUS(lookupFloat); }
256                         
257                 }else { m->mothurOut("[ERROR]: no file given."); m->mothurOutEndLine(); return 0; }
258                 
259                 if (m->control_pressed) {  for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } return 0; }
260                 
261                 //this is for a sanity check to make sure the axes file and shared file match
262                 for (int i = 0; i < lookupFloat.size(); i++) { names.insert(lookupFloat[i]->getGroup()); }
263                 
264                 /*************************************************************************************/
265                 // read axes file and check for file mismatches with shared or relabund file         //
266                 /************************************************************************************/
267                 
268                 //read axes file
269                 map<string, vector<float> > axes = readAxes();
270                 
271                 if (m->control_pressed) {  for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } return 0; }
272                 
273                 //sanity check, the read only adds groups that are in the shared or relabund file, but we want to make sure the axes file isn't missing anyone
274                 if (axes.size() != lookupFloat.size()) { 
275                         map<string, vector<float> >::iterator it;
276                         for (int i = 0; i < lookupFloat.size(); i++) {
277                                 it = axes.find(lookupFloat[i]->getGroup());
278                                 if (it == axes.end()) { m->mothurOut(lookupFloat[i]->getGroup() + " is in your shared of relabund file but not in your axes file, please correct."); m->mothurOutEndLine(); }
279                         }
280                         m->control_pressed = true;
281                 }
282                 
283                 if (m->control_pressed) {  for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } return 0; }
284                 
285                 /*************************************************************************************/
286                 // calc the r values                                                                                                                            //
287                 /************************************************************************************/
288                 
289                 string outputFileName = outputDir + m->getRootName(m->getSimpleName(inputFileName)) + method + "." + getOutputFileNameTag("corraxes");
290                 outputNames.push_back(outputFileName); outputTypes["corraxes"].push_back(outputFileName);       
291                 ofstream out;
292                 m->openOutputFile(outputFileName, out);
293                 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
294                 
295                 //output headings
296                 if (metadatafile == "") {  out << "OTU";        }
297                 else {  out << "Feature";                                               }
298
299                 for (int i = 0; i < numaxes; i++) { out << '\t' << "axis" << (i+1) << "\tp-value"; }
300                 out << "\tlength" << endl;
301                 
302                 if (method == "pearson")                {  calcPearson(axes, out);      }
303                 else if (method == "spearman")  {  calcSpearman(axes, out); }
304                 else if (method == "kendall")   {  calcKendall(axes, out);      }
305                 else { m->mothurOut("[ERROR]: Invalid method."); m->mothurOutEndLine(); }
306                 
307                 out.close();
308                 for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  }
309                 
310                 if (m->control_pressed) {  return 0; }
311
312                 m->mothurOutEndLine();
313                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
314                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
315                 m->mothurOutEndLine();
316                 
317                 return 0;
318         }
319         catch(exception& e) {
320                 m->errorOut(e, "CorrAxesCommand", "execute");   
321                 exit(1);
322         }
323 }
324 //**********************************************************************************************************************
325 int CorrAxesCommand::calcPearson(map<string, vector<float> >& axes, ofstream& out) {
326    try {
327            
328        LinearAlgebra linear;
329        
330            //find average of each axis - X
331            vector<float> averageAxes; averageAxes.resize(numaxes, 0.0);
332            for (map<string, vector<float> >::iterator it = axes.begin(); it != axes.end(); it++) {
333                    vector<float> temp = it->second;
334                    for (int i = 0; i < temp.size(); i++) {
335                            averageAxes[i] += temp[i];  
336                    }
337            }
338            
339            for (int i = 0; i < averageAxes.size(); i++) {  averageAxes[i] = averageAxes[i] / (float) axes.size(); }
340            
341            //for each otu
342            for (int i = 0; i < lookupFloat[0]->getNumBins(); i++) {
343                    
344                    if (metadatafile == "") {  out << m->currentBinLabels[i];    }
345                    else {  out << metadataLabels[i];            }
346                                    
347                    //find the averages this otu - Y
348                    float sumOtu = 0.0;
349                    for (int j = 0; j < lookupFloat.size(); j++) {
350                            sumOtu += lookupFloat[j]->getAbundance(i);
351                    }
352                    float Ybar = sumOtu / (float) lookupFloat.size();
353                    
354                    vector<float> rValues(averageAxes.size());
355
356                    //find r value for each axis
357                    for (int k = 0; k < averageAxes.size(); k++) {
358                            
359                            double r = 0.0;
360                            double numerator = 0.0;
361                            double denomTerm1 = 0.0;
362                            double denomTerm2 = 0.0;
363                            for (int j = 0; j < lookupFloat.size(); j++) {
364                                    float Yi = lookupFloat[j]->getAbundance(i);
365                                    float Xi = axes[lookupFloat[j]->getGroup()][k];
366                                    
367                                    numerator += ((Xi - averageAxes[k]) * (Yi - Ybar));
368                                    denomTerm1 += ((Xi - averageAxes[k]) * (Xi - averageAxes[k]));
369                                    denomTerm2 += ((Yi - Ybar) * (Yi - Ybar));
370                            }
371                            
372                            double denom = (sqrt(denomTerm1) * sqrt(denomTerm2));
373                            
374                            r = numerator / denom;
375                
376                if (isnan(r) || isinf(r)) { r = 0.0; }
377                
378                            rValues[k] = r;
379                            out << '\t' << r; 
380                
381                double sig = linear.calcPearsonSig(lookupFloat.size(), r);
382                
383                out << '\t' << sig;
384                    }
385                    
386                    double sum = 0;
387                    for(int k=0;k<rValues.size();k++){
388                            sum += rValues[k] * rValues[k];
389                    }
390                    out << '\t' << sqrt(sum) << endl;
391            }
392                    
393            return 0;
394    }
395    catch(exception& e) {
396            m->errorOut(e, "CorrAxesCommand", "calcPearson");
397            exit(1);
398    }
399 }
400 //**********************************************************************************************************************
401 int CorrAxesCommand::calcSpearman(map<string, vector<float> >& axes, ofstream& out) {
402         try {
403                 
404         LinearAlgebra linear;
405         vector<double> sf; 
406         
407                 //format data
408                 vector< map<float, int> > tableX; tableX.resize(numaxes);
409                 map<float, int>::iterator itTable;
410                 vector< vector<spearmanRank> > scores; scores.resize(numaxes);
411                 for (map<string, vector<float> >::iterator it = axes.begin(); it != axes.end(); it++) {
412                         vector<float> temp = it->second;
413                         for (int i = 0; i < temp.size(); i++) {
414                                 spearmanRank member(it->first, temp[i]);
415                                 scores[i].push_back(member);  
416                                 
417                                 //count number of repeats
418                                 itTable = tableX[i].find(temp[i]);
419                                 if (itTable == tableX[i].end()) { 
420                                         tableX[i][temp[i]] = 1;
421                                 }else {
422                                         tableX[i][temp[i]]++;
423                                 }
424                         }
425                 }
426                 
427                 //calc LX
428                 //for each axis
429                 vector<double> Lx; Lx.resize(numaxes, 0.0);
430                 for (int i = 0; i < numaxes; i++) {
431                         for (itTable = tableX[i].begin(); itTable != tableX[i].end(); itTable++) {
432                                 double tx = (double) itTable->second;
433                                 Lx[i] += ((pow(tx, 3.0) - tx) / 12.0);
434                         }
435                 }
436                 
437                 //sort each axis
438                 for (int i = 0; i < numaxes; i++) {  sort(scores[i].begin(), scores[i].end(), compareSpearman); }
439                 
440                 //find ranks of xi in each axis
441                 map<string, vector<float> > rankAxes;
442                 for (int i = 0; i < numaxes; i++) {
443                         
444                         vector<spearmanRank> ties;
445                         int rankTotal = 0;
446             double sfTemp = 0.0;
447                         for (int j = 0; j < scores[i].size(); j++) {
448                                 rankTotal += (j+1);
449                                 ties.push_back(scores[i][j]);
450                                 
451                                 if (j != (scores[i].size()-1)) { // you are not the last so you can look ahead
452                                         if (scores[i][j].score != scores[i][j+1].score) { // you are done with ties, rank them and continue
453
454                                                 for (int k = 0; k < ties.size(); k++) {
455                                                         float thisrank = rankTotal / (float) ties.size();
456                                                         rankAxes[ties[k].name].push_back(thisrank);
457                                                 }
458                         int t = ties.size();
459                         sfTemp += (t*t*t-t);
460                                                 ties.clear();
461                                                 rankTotal = 0;
462                                         }
463                                 }else { // you are the last one
464                                         
465                                         for (int k = 0; k < ties.size(); k++) {
466                                                 float thisrank = rankTotal / (float) ties.size();
467                                                 rankAxes[ties[k].name].push_back(thisrank);
468                                                 
469                                         }
470                                 }
471                         }
472             sf.push_back(sfTemp);
473                 }
474                 
475                                 
476                 //for each otu
477                 for (int i = 0; i < lookupFloat[0]->getNumBins(); i++) {
478                         
479                         if (metadatafile == "") {  out << m->currentBinLabels[i];       }
480                         else {  out << metadataLabels[i];               }
481                         
482                         //find the ranks of this otu - Y
483                         vector<spearmanRank> otuScores;
484                         map<float, int> tableY;
485                         for (int j = 0; j < lookupFloat.size(); j++) {
486                                 spearmanRank member(lookupFloat[j]->getGroup(), lookupFloat[j]->getAbundance(i));
487                                 otuScores.push_back(member);
488                                 
489                                 itTable = tableY.find(member.score);
490                                 if (itTable == tableY.end()) { 
491                                         tableY[member.score] = 1;
492                                 }else {
493                                         tableY[member.score]++;
494                                 }
495                                 
496                         }
497                         
498                         //calc Ly
499                         double Ly = 0.0;
500                         for (itTable = tableY.begin(); itTable != tableY.end(); itTable++) {
501                                 double ty = (double) itTable->second;
502                                 Ly += ((pow(ty, 3.0) - ty) / 12.0);
503                         }
504                         
505                         sort(otuScores.begin(), otuScores.end(), compareSpearman);
506                         
507             double sg = 0.0;
508                         map<string, float> rankOtus;
509                         vector<spearmanRank> ties;
510                         int rankTotal = 0;
511                         for (int j = 0; j < otuScores.size(); j++) {
512                                 rankTotal += (j+1);
513                                 ties.push_back(otuScores[j]);
514                                 
515                                 if (j != (otuScores.size()-1)) { // you are not the last so you can look ahead
516                                         if (otuScores[j].score != otuScores[j+1].score) { // you are done with ties, rank them and continue
517                                                 
518                                                 for (int k = 0; k < ties.size(); k++) {
519                                                         float thisrank = rankTotal / (float) ties.size();
520                                                         rankOtus[ties[k].name] = thisrank;
521                                                 }
522                         int t = ties.size();
523                         sg += (t*t*t-t);
524                                                 ties.clear();
525                                                 rankTotal = 0;
526                                         }
527                                 }else { // you are the last one
528                                         
529                                         for (int k = 0; k < ties.size(); k++) {
530                                                 float thisrank = rankTotal / (float) ties.size();
531                                                 rankOtus[ties[k].name] = thisrank;
532                                         }
533                                 }
534                         }
535                         vector<double> pValues(numaxes);        
536
537                         //calc spearman ranks for each axis for this otu
538                         for (int j = 0; j < numaxes; j++) {
539                                 
540                                 double di = 0.0;
541                                 for (int k = 0; k < lookupFloat.size(); k++) {
542                                         
543                                         float xi = rankAxes[lookupFloat[k]->getGroup()][j];
544                                         float yi = rankOtus[lookupFloat[k]->getGroup()];
545                                         
546                                         di += ((xi - yi) * (xi - yi));
547                                 }
548                                 
549                                 double p = 0.0;
550                                 
551                                 double n = (double) lookupFloat.size();
552                                 
553                                 double SX2 = ((pow(n, 3.0) - n) / 12.0) - Lx[j];
554                                 double SY2 = ((pow(n, 3.0) - n) / 12.0) - Ly;
555                                 
556                                 p = (SX2 + SY2 - di) / (2.0 * sqrt((SX2*SY2)));
557                                 
558                 if (isnan(p) || isinf(p)) { p = 0.0; }
559                 
560                                 out  << '\t' << p;
561                                 
562                                 pValues[j] = p;
563                 
564                 double sig = linear.calcSpearmanSig(n, sf[j], sg, di);            
565                 out  << '\t' << sig;
566                 
567                         }
568
569                         double sum = 0;
570                         for(int k=0;k<numaxes;k++){
571                                 sum += pValues[k] * pValues[k];
572                         }
573                         out << '\t' << sqrt(sum) << endl;
574                 }
575                 
576                 return 0;
577         }
578         catch(exception& e) {
579                 m->errorOut(e, "CorrAxesCommand", "calcSpearman");
580                 exit(1);
581         }
582 }
583 //**********************************************************************************************************************
584 int CorrAxesCommand::calcKendall(map<string, vector<float> >& axes, ofstream& out) {
585         try {
586                 
587         LinearAlgebra linear;
588         
589                 //format data
590                 vector< vector<spearmanRank> > scores; scores.resize(numaxes);
591                 for (map<string, vector<float> >::iterator it = axes.begin(); it != axes.end(); it++) {
592                         vector<float> temp = it->second;
593                         for (int i = 0; i < temp.size(); i++) {
594                                 spearmanRank member(it->first, temp[i]);
595                                 scores[i].push_back(member);  
596                         }
597                 }
598                 
599                 //sort each axis
600                 for (int i = 0; i < numaxes; i++) {  sort(scores[i].begin(), scores[i].end(), compareSpearman); }
601                 
602                 //convert scores to ranks of xi in each axis
603                 for (int i = 0; i < numaxes; i++) {
604                         
605                         vector<spearmanRank*> ties;
606                         int rankTotal = 0;
607                         for (int j = 0; j < scores[i].size(); j++) {
608                                 rankTotal += (j+1);
609                                 ties.push_back(&(scores[i][j]));
610                                 
611                                 if (j != scores[i].size()-1) { // you are not the last so you can look ahead
612                                         if (scores[i][j].score != scores[i][j+1].score) { // you are done with ties, rank them and continue
613                                                 for (int k = 0; k < ties.size(); k++) {
614                                                         float thisrank = rankTotal / (float) ties.size();
615                                                         (*ties[k]).score = thisrank;
616                                                 }
617                                                 ties.clear();
618                                                 rankTotal = 0;
619                                         }
620                                 }else { // you are the last one
621                                         for (int k = 0; k < ties.size(); k++) {
622                                                 float thisrank = rankTotal / (float) ties.size();
623                                                 (*ties[k]).score = thisrank;
624                                         }
625                                 }
626                         }
627                 }
628                 
629                 //for each otu
630                 for (int i = 0; i < lookupFloat[0]->getNumBins(); i++) {
631                 
632                         if (metadatafile == "") {  out << m->currentBinLabels[i];       }
633                         else {  out << metadataLabels[i];               }
634                         
635                         //find the ranks of this otu - Y
636                         vector<spearmanRank> otuScores;
637                         for (int j = 0; j < lookupFloat.size(); j++) {
638                                 spearmanRank member(lookupFloat[j]->getGroup(), lookupFloat[j]->getAbundance(i));
639                                 otuScores.push_back(member);
640                         }
641                                                 
642                         sort(otuScores.begin(), otuScores.end(), compareSpearman);
643                         
644                         map<string, float> rankOtus;
645                         vector<spearmanRank> ties;
646                         int rankTotal = 0;
647                         for (int j = 0; j < otuScores.size(); j++) {
648                                 rankTotal += (j+1);
649                                 ties.push_back(otuScores[j]);
650                                 
651                                 if (j != otuScores.size()-1) { // you are not the last so you can look ahead
652                                         if (otuScores[j].score != otuScores[j+1].score) { // you are done with ties, rank them and continue
653                                                 for (int k = 0; k < ties.size(); k++) {
654                                                         float thisrank = rankTotal / (float) ties.size();
655                                                         rankOtus[ties[k].name] = thisrank;
656                                                 }
657                                                 ties.clear();
658                                                 rankTotal = 0;
659                                         }
660                                 }else { // you are the last one
661                                         for (int k = 0; k < ties.size(); k++) {
662                                                 float thisrank = rankTotal / (float) ties.size();
663                                                 rankOtus[ties[k].name] = thisrank;
664                                         }
665                                 }
666                         }
667                         
668                         
669                         vector<double> pValues(numaxes);
670                         
671                         //calc spearman ranks for each axis for this otu
672                         for (int j = 0; j < numaxes; j++) {
673                         
674                                 int numCoor = 0;
675                                 int numDisCoor = 0;
676                                 
677                                 vector<spearmanRank> otus; 
678                                 vector<spearmanRank> otusTemp;
679                                 for (int l = 0; l < scores[j].size(); l++) {   
680                                         spearmanRank member(scores[j][l].name, rankOtus[scores[j][l].name]);
681                                         otus.push_back(member);
682                                 }
683                                 
684                                 int count = 0;
685                                 for (int l = 0; l < scores[j].size(); l++) {
686                                         
687                                         int numWithHigherRank = 0;
688                                         int numWithLowerRank = 0;
689                                         float thisrank = otus[l].score;
690                                         
691                                         for (int u = l+1; u < scores[j].size(); u++) {
692                                                 if (otus[u].score > thisrank) { numWithHigherRank++; }
693                                                 else if (otus[u].score < thisrank) { numWithLowerRank++; }
694                                                 count++;
695                                         }
696                                         
697                                         numCoor += numWithHigherRank;
698                                         numDisCoor += numWithLowerRank;
699                                 }
700                                 
701                                 double p = (numCoor - numDisCoor) / (float) count;
702                  if (isnan(p) || isinf(p)) { p = 0.0; }
703                 
704                                 out << '\t' << p;
705                                 pValues[j] = p;
706                 
707                 double sig = linear.calcKendallSig(scores[j].size(), p);
708                 
709                 out << '\t' << sig;
710                         }
711                         
712                         double sum = 0;
713                         for(int k=0;k<numaxes;k++){
714                                 sum += pValues[k] * pValues[k];
715                         }
716                         out << '\t' << sqrt(sum) << endl;
717                 }
718                 
719                 return 0;
720         }
721         catch(exception& e) {
722                 m->errorOut(e, "CorrAxesCommand", "calcKendall");
723                 exit(1);
724         }
725 }
726 //**********************************************************************************************************************
727 int CorrAxesCommand::getSharedFloat(InputData* input){
728         try {
729                 lookupFloat = input->getSharedRAbundFloatVectors();
730                 string lastLabel = lookupFloat[0]->getLabel();
731                 
732                 if (label == "") { label = lastLabel;  return 0; }
733                 
734                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
735                 set<string> labels; labels.insert(label);
736                 set<string> processedLabels;
737                 set<string> userLabels = labels;
738                 
739                 //as long as you are not at the end of the file or done wih the lines you want
740                 while((lookupFloat[0] != NULL) && (userLabels.size() != 0)) {
741                         
742                         if (m->control_pressed) {  return 0;  }
743                         
744                         if(labels.count(lookupFloat[0]->getLabel()) == 1){
745                                 processedLabels.insert(lookupFloat[0]->getLabel());
746                                 userLabels.erase(lookupFloat[0]->getLabel());
747                                 break;
748                         }
749                         
750                         if ((m->anyLabelsToProcess(lookupFloat[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
751                                 string saveLabel = lookupFloat[0]->getLabel();
752                                 
753                                 for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } 
754                                 lookupFloat = input->getSharedRAbundFloatVectors(lastLabel);
755                                 
756                                 processedLabels.insert(lookupFloat[0]->getLabel());
757                                 userLabels.erase(lookupFloat[0]->getLabel());
758                                 
759                                 //restore real lastlabel to save below
760                                 lookupFloat[0]->setLabel(saveLabel);
761                                 break;
762                         }
763                         
764                         lastLabel = lookupFloat[0]->getLabel();                 
765                         
766                         //get next line to process
767                         //prevent memory leak
768                         for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } 
769                         lookupFloat = input->getSharedRAbundFloatVectors();
770                 }
771                 
772                 
773                 if (m->control_pressed) { return 0;  }
774                 
775                 //output error messages about any remaining user labels
776                 set<string>::iterator it;
777                 bool needToRun = false;
778                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
779                         m->mothurOut("Your file does not include the label " + *it); 
780                         if (processedLabels.count(lastLabel) != 1) {
781                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
782                                 needToRun = true;
783                         }else {
784                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
785                         }
786                 }
787                 
788                 //run last label if you need to
789                 if (needToRun == true)  {
790                         for (int i = 0; i < lookupFloat.size(); i++) {  if (lookupFloat[i] != NULL) {   delete lookupFloat[i];  } } 
791                         lookupFloat = input->getSharedRAbundFloatVectors(lastLabel);
792                 }       
793                 
794                 return 0;
795         }
796         catch(exception& e) {
797                 m->errorOut(e, "CorrAxesCommand", "getSharedFloat");    
798                 exit(1);
799         }
800 }
801 //**********************************************************************************************************************
802 int CorrAxesCommand::eliminateZeroOTUS(vector<SharedRAbundFloatVector*>& thislookup) {
803         try {
804                 
805                 vector<SharedRAbundFloatVector*> newLookup;
806                 for (int i = 0; i < thislookup.size(); i++) {
807                         SharedRAbundFloatVector* temp = new SharedRAbundFloatVector();
808                         temp->setLabel(thislookup[i]->getLabel());
809                         temp->setGroup(thislookup[i]->getGroup());
810                         newLookup.push_back(temp);
811                 }
812                 
813                 //for each bin
814                 vector<string> newBinLabels;
815                 string snumBins = toString(thislookup[0]->getNumBins());
816                 for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
817                         if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
818                         
819                         //look at each sharedRabund and make sure they are not all zero
820                         bool allZero = true;
821                         for (int j = 0; j < thislookup.size(); j++) {
822                                 if (thislookup[j]->getAbundance(i) != 0) { allZero = false;  break;  }
823                         }
824                         
825                         //if they are not all zero add this bin
826                         if (!allZero) {
827                                 for (int j = 0; j < thislookup.size(); j++) {
828                                         newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
829                                 }
830                                 
831                                 //if there is a bin label use it otherwise make one
832                                 string binLabel = "Otu";
833                                 string sbinNumber = toString(i+1);
834                                 if (sbinNumber.length() < snumBins.length()) { 
835                                         int diff = snumBins.length() - sbinNumber.length();
836                                         for (int h = 0; h < diff; h++) { binLabel += "0"; }
837                                 }
838                                 binLabel += sbinNumber; 
839                                 if (i < m->currentBinLabels.size()) {  binLabel = m->currentBinLabels[i]; }
840                                 
841                                 newBinLabels.push_back(binLabel);
842                         }
843                 }
844                 
845                 for (int j = 0; j < thislookup.size(); j++) {  delete thislookup[j];  }
846                 
847                 thislookup = newLookup;
848                 m->currentBinLabels = newBinLabels;
849                 
850                 return 0;
851                 
852         }
853         catch(exception& e) {
854                 m->errorOut(e, "CorrAxesCommand", "eliminateZeroOTUS");
855                 exit(1);
856         }
857 }
858 /*****************************************************************/
859 map<string, vector<float> > CorrAxesCommand::readAxes(){
860         try {
861                 map<string, vector<float> > axes;
862                 
863                 ifstream in;
864                 m->openInputFile(axesfile, in);
865                 
866                 string headerLine = m->getline(in); m->gobble(in);
867                 
868                 //count the number of axis you are reading
869                 bool done = false;
870                 int count = 0;
871                 while (!done) {
872                         int pos = headerLine.find("axis");
873                         if (pos != string::npos) {
874                                 count++;
875                                 headerLine = headerLine.substr(pos+4);
876                         }else { done = true; }
877                 }
878                 
879                 if (numaxes > count) { m->mothurOut("You requested " + toString(numaxes) + " axes, but your file only includes " + toString(count) + ". Using " + toString(count) + "."); m->mothurOutEndLine(); numaxes = count; }
880                 
881                 while (!in.eof()) {
882                         
883                         if (m->control_pressed) { in.close(); return axes; }
884                         
885                         string group = "";
886                         in >> group; m->gobble(in);
887                         
888                         vector<float> thisGroupsAxes;
889                         for (int i = 0; i < count; i++) {
890                                 float temp = 0.0;
891                                 in >> temp; 
892                                 
893                                 //only save the axis we want
894                                 if (i < numaxes) {  thisGroupsAxes.push_back(temp); }
895                         }
896                         
897                         //save group if its one the user selected
898                         if (names.count(group) != 0) {
899                                 map<string, vector<float> >::iterator it = axes.find(group);
900                                 
901                                 if (it == axes.end()) {
902                                         axes[group] = thisGroupsAxes;
903                                 }else {
904                                         m->mothurOut(group + " is already in your axes file, using first definition."); m->mothurOutEndLine();
905                                 }
906                         }
907                         
908                         m->gobble(in);
909                 }
910                 in.close();
911                 
912                 return axes;
913         }
914         catch(exception& e) {
915                 m->errorOut(e, "CorrAxesCommand", "readAxes");  
916                 exit(1);
917         }
918 }
919 /*****************************************************************/
920 int CorrAxesCommand::getMetadata(){
921         try {
922                 vector<string> groupNames;
923                 
924                 ifstream in;
925                 m->openInputFile(metadatafile, in);
926                 
927                 string headerLine = m->getline(in); m->gobble(in);
928                 istringstream iss (headerLine,istringstream::in);
929                 
930                 //read the first label, because it refers to the groups
931                 string columnLabel;
932                 iss >> columnLabel; m->gobble(iss); 
933                 
934                 //save names of columns you are reading
935                 while (!iss.eof()) {
936                         iss >> columnLabel; m->gobble(iss);
937                         metadataLabels.push_back(columnLabel);
938                 }
939                 int count = metadataLabels.size();
940                         
941                 //read rest of file
942                 while (!in.eof()) {
943                         
944                         if (m->control_pressed) { in.close(); return 0; }
945                         
946                         string group = "";
947                         in >> group; m->gobble(in);
948                         groupNames.push_back(group);
949                                 
950                         SharedRAbundFloatVector* tempLookup = new SharedRAbundFloatVector();
951                         tempLookup->setGroup(group);
952                         tempLookup->setLabel("1");
953                         
954                         for (int i = 0; i < count; i++) {
955                                 float temp = 0.0;
956                                 in >> temp; 
957                                 tempLookup->push_back(temp, group);
958                         }
959                         
960                         lookupFloat.push_back(tempLookup);
961                         
962                         m->gobble(in);
963                 }
964                 in.close();
965                 
966                 //remove any groups the user does not want, and set globaldata->groups with only valid groups
967                 SharedUtil* util;
968                 util = new SharedUtil();
969                 Groups = m->getGroups();
970                 util->setGroups(Groups, groupNames);
971                 m->setGroups(Groups);
972                 
973                 for (int i = 0; i < lookupFloat.size(); i++) {
974                         //if this sharedrabund is not from a group the user wants then delete it.
975                         if (util->isValidGroup(lookupFloat[i]->getGroup(), m->getGroups()) == false) { 
976                                 delete lookupFloat[i]; lookupFloat[i] = NULL;
977                                 lookupFloat.erase(lookupFloat.begin()+i); 
978                                 i--; 
979                         }
980                 }
981                 
982                 delete util;
983                 
984                 return 0;
985         }
986         catch(exception& e) {
987                 m->errorOut(e, "CorrAxesCommand", "getMetadata");       
988                 exit(1);
989         }
990 }
991 /*****************************************************************/
992
993
994
995
996
997