]> git.donarmstrong.com Git - mothur.git/blob - corraxescommand.cpp
chimera.slayer fix
[mothur.git] / corraxescommand.cpp
1 /*
2  *  corraxescommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 12/22/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "corraxescommand.h"
11
12 //**********************************************************************************************************************
13 vector<string> CorrAxesCommand::getValidParameters(){   
14         try {
15                 string Array[] =  {"axes","shared","relabund","numaxes","label","groups","method","metastats","outputdir","inputdir"};
16                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
17                 return myArray;
18         }
19         catch(exception& e) {
20                 m->errorOut(e, "CorrAxesCommand", "getValidParameters");
21                 exit(1);
22         }
23 }
24 //**********************************************************************************************************************
25 vector<string> CorrAxesCommand::getRequiredParameters(){        
26         try {
27                 string Array[] =  {"axes"};
28                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
29                 return myArray;
30         }
31         catch(exception& e) {
32                 m->errorOut(e, "CorrAxesCommand", "getRequiredParameters");
33                 exit(1);
34         }
35 }
36 //**********************************************************************************************************************
37 CorrAxesCommand::CorrAxesCommand(){     
38         try {
39                 abort = true;
40                 //initialize outputTypes
41                 vector<string> tempOutNames;
42                 outputTypes["corr.axes"] = tempOutNames;
43         }
44         catch(exception& e) {
45                 m->errorOut(e, "CorrAxesCommand", "CorrAxesCommand");
46                 exit(1);
47         }
48 }
49
50 //**********************************************************************************************************************
51 vector<string> CorrAxesCommand::getRequiredFiles(){     
52         try {
53                 vector<string> myArray;
54                 return myArray;
55         }
56         catch(exception& e) {
57                 m->errorOut(e, "CorrAxesCommand", "getRequiredFiles");
58                 exit(1);
59         }
60 }
61 //**********************************************************************************************************************
62 CorrAxesCommand::CorrAxesCommand(string option)  {
63         try {
64                 abort = false;
65                 globaldata = GlobalData::getInstance();
66                 
67                 //allow user to run help
68                 if(option == "help") { help(); abort = true; }
69                 
70                 else {
71                         //valid paramters for this command
72                         string Array[] =  {"axes","shared","relabund","numaxes","label","groups","method","metastats","outputdir","inputdir"};
73                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
74                         
75                         OptionParser parser(option);
76                         map<string, string> parameters = parser.getParameters();
77                         
78                         ValidParameters validParameter;
79                         map<string, string>::iterator it;
80                         
81                         //check to make sure all parameters are valid for command
82                         for (it = parameters.begin(); it != parameters.end(); it++) { 
83                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
84                         }
85                         
86                         vector<string> tempOutNames;
87                         outputTypes["corr.axes"] = tempOutNames;
88                         
89                         //if the user changes the input directory command factory will send this info to us in the output parameter 
90                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
91                         if (inputDir == "not found"){   inputDir = "";          }
92                         else {
93                                 string path;
94                                 it = parameters.find("axes");
95                                 //user has given a template file
96                                 if(it != parameters.end()){ 
97                                         path = m->hasPath(it->second);
98                                         //if the user has not given a path then, add inputdir. else leave path alone.
99                                         if (path == "") {       parameters["axes"] = inputDir + it->second;             }
100                                 }
101                                 
102                                 it = parameters.find("shared");
103                                 //user has given a template file
104                                 if(it != parameters.end()){ 
105                                         path = m->hasPath(it->second);
106                                         //if the user has not given a path then, add inputdir. else leave path alone.
107                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
108                                 }
109                                 
110                                 it = parameters.find("relabund");
111                                 //user has given a template file
112                                 if(it != parameters.end()){ 
113                                         path = m->hasPath(it->second);
114                                         //if the user has not given a path then, add inputdir. else leave path alone.
115                                         if (path == "") {       parameters["relabund"] = inputDir + it->second;         }
116                                 }
117                         }
118                         
119                         
120                         //check for required parameters
121                         axesfile = validParameter.validFile(parameters, "axes", true);
122                         if (axesfile == "not open") { abort = true; }
123                         else if (axesfile == "not found") { axesfile = ""; m->mothurOut("axes is a required parameter for the corr.axes command."); m->mothurOutEndLine(); abort = true;  }     
124                         
125                         sharedfile = validParameter.validFile(parameters, "shared", true);
126                         if (sharedfile == "not open") { abort = true; }
127                         else if (sharedfile == "not found") { sharedfile = ""; }
128                         else { inputFileName = sharedfile; }
129                         
130                         relabundfile = validParameter.validFile(parameters, "relabund", true);
131                         if (relabundfile == "not open") { abort = true; }
132                         else if (relabundfile == "not found") { relabundfile = ""; }
133                         else { inputFileName = relabundfile; }
134                         
135                         groups = validParameter.validFile(parameters, "groups", false);                 
136                         if (groups == "not found") { groups = "";  pickedGroups = false;  }
137                         else { 
138                                 pickedGroups = true;
139                                 m->splitAtDash(groups, Groups); 
140                         }                       
141                         globaldata->Groups = Groups;
142                         
143                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(inputFileName);  }
144                         
145                         label = validParameter.validFile(parameters, "label", false);                   
146                         if (label == "not found") { label = ""; m->mothurOut("You did not provide a label, I will use the first label in your inputfile."); m->mothurOutEndLine(); label=""; }  
147                         
148                         if ((relabundfile == "") && (sharedfile == "")) { m->mothurOut("You must provide either a shared or relabund file."); m->mothurOutEndLine(); abort = true;  }
149                         
150                         if ((relabundfile != "") && (sharedfile != "")) { m->mothurOut("You may not use both a shared and relabund file."); m->mothurOutEndLine(); abort = true;  }
151                         
152                         string temp;
153                         temp = validParameter.validFile(parameters, "numaxes", false);          if (temp == "not found"){       temp = "3";                             }
154                         convert(temp, numaxes); 
155                         
156                         method = validParameter.validFile(parameters, "method", false);         if (method == "not found"){     method = "pearson";             }
157                         
158                         if ((method != "pearson") && (method != "spearman") && (method != "kendall")) { m->mothurOut(method + " is not a valid method. Valid methods are pearson, spearman, and kendall."); m->mothurOutEndLine(); abort = true; }
159                         
160                 }
161         }
162         catch(exception& e) {
163                 m->errorOut(e, "CorrAxesCommand", "CorrAxesCommand");           
164                 exit(1);
165         }
166 }
167 //**********************************************************************************************************************
168
169 void CorrAxesCommand::help(){
170         try {
171
172         }
173         catch(exception& e) {
174                 m->errorOut(e, "CorrAxesCommand", "help");      
175                 exit(1);
176         }
177 }
178
179 //**********************************************************************************************************************
180
181 CorrAxesCommand::~CorrAxesCommand(){}
182
183 //**********************************************************************************************************************
184
185 int CorrAxesCommand::execute(){
186         try {
187                 
188                 if (abort == true) { return 0; }
189                 
190                 /*************************************************************************************/
191                 // use smart distancing to get right sharedRabund and convert to relabund if needed  //
192                 /************************************************************************************/
193                 if (sharedfile != "") {  
194                         getShared(); 
195                         if (m->control_pressed) {  for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } return 0; }
196                         if (lookup[0] == NULL) { m->mothurOut("[ERROR] reading shared file."); m->mothurOutEndLine(); return 0; }
197                         
198                         //fills lookupFloat with relative abundance values from lookup
199                         convertToRelabund();
200                         
201                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
202                 }else { 
203                         getSharedFloat(); 
204                         if (m->control_pressed) {  for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } return 0; }
205                         if (lookupFloat[0] == NULL) { m->mothurOut("[ERROR] reading relabund file."); m->mothurOutEndLine(); return 0; }
206                         
207                         if (pickedGroups) { eliminateZeroOTUS(lookupFloat); }
208                 }
209                 
210                 if (m->control_pressed) {  for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } return 0; }
211                 
212                 //this is for a sanity check to make sure the axes file and shared file match
213                 for (int i = 0; i < lookupFloat.size(); i++) { names.insert(lookupFloat[i]->getGroup()); }
214                 
215                 /*************************************************************************************/
216                 // read axes file and check for file mismatches with shared or relabund file         //
217                 /************************************************************************************/
218                 
219                 //read axes file
220                 map<string, vector<float> > axes = readAxes();
221                 
222                 if (m->control_pressed) {  for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } return 0; }
223                 
224                 //sanity check, the read only adds groups that are in the shared or relabund file, but we want to make sure the axes file isn't missing anyone
225                 if (axes.size() != lookupFloat.size()) { 
226                         map<string, vector<float> >::iterator it;
227                         for (int i = 0; i < lookupFloat.size(); i++) {
228                                 it = axes.find(lookupFloat[i]->getGroup());
229                                 if (it == axes.end()) { m->mothurOut(lookupFloat[i]->getGroup() + " is in your shared of relabund file but not in your axes file, please correct."); m->mothurOutEndLine(); }
230                         }
231                         m->control_pressed = true;
232                 }
233                 
234                 if (m->control_pressed) {  for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } return 0; }
235                 
236                 string outputFileName = outputDir + m->getRootName(m->getSimpleName(inputFileName)) + "corr.axes";
237                 outputNames.push_back(outputFileName); outputTypes["corr.axes"].push_back(outputFileName);      
238                 ofstream out;
239                 m->openOutputFile(outputFileName, out);
240                 
241                 if (method == "pearson")                {  calcPearson(axes, out);      }
242                 //else if (method == "spearman")        {  calcSpearman(axes, out); }
243                 //else if (method == "kendall") {  calcKendal(axes, out);       }
244                 //else { m->mothurOut("[ERROR]: Invalid method."); m->mothurOutEndLine(); }
245                 
246                 out.close();
247                 for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  }
248                 
249                 if (m->control_pressed) {  return 0; }
250
251                 m->mothurOutEndLine();
252                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
253                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
254                 m->mothurOutEndLine();
255                 
256                 return 0;
257         }
258         catch(exception& e) {
259                 m->errorOut(e, "CorrAxesCommand", "execute");   
260                 exit(1);
261         }
262 }
263 //**********************************************************************************************************************
264 int CorrAxesCommand::calcPearson(map<string, vector<float> >& axes, ofstream& out) {
265    try {
266            
267            //find average of each axis - X
268            vector<float> averageAxes; averageAxes.resize(numaxes, 0.0);
269            for (map<string, vector<float> >::iterator it = axes.begin(); it != axes.end(); it++) {
270                    vector<float> temp = it->second;
271                    for (int i = 0; i < temp.size(); i++) {
272                            averageAxes[i] += temp[i];  
273                    }
274            }
275            
276            for (int i = 0; i < averageAxes.size(); i++) {  averageAxes[i] = averageAxes[i] / (float) axes.size(); }
277            
278            //for each otu
279            for (int i = 0; i < lookupFloat[0]->getNumBins(); i++) {
280                    
281                    //find the averages this otu - Y
282                    float sumOtu = 0.0;
283                    for (int j = 0; j < lookupFloat.size(); j++) {
284                            sumOtu += lookupFloat[j]->getAbundance(i);
285                    }
286                    float Ybar = sumOtu / (float) lookupFloat.size();
287                    
288                    //find the average
289            }
290                    
291            return 0;
292    }
293    catch(exception& e) {
294            m->errorOut(e, "CorrAxesCommand", "calcPearson");
295            exit(1);
296    }
297 }
298 //**********************************************************************************************************************
299 int CorrAxesCommand::getShared(){
300         try {
301                 InputData* input = new InputData(sharedfile, "sharedfile");
302                 lookup = input->getSharedRAbundVectors();
303                 string lastLabel = lookup[0]->getLabel();
304                 
305                 if (label == "") { label = lastLabel; delete input; return 0; }
306                 
307                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
308                 set<string> labels; labels.insert(label);
309                 set<string> processedLabels;
310                 set<string> userLabels = labels;
311                 
312                 //as long as you are not at the end of the file or done wih the lines you want
313                 while((lookup[0] != NULL) && (userLabels.size() != 0)) {
314                         if (m->control_pressed) {  delete input; return 0;  }
315                         
316                         if(labels.count(lookup[0]->getLabel()) == 1){
317                                 processedLabels.insert(lookup[0]->getLabel());
318                                 userLabels.erase(lookup[0]->getLabel());
319                                 break;
320                         }
321                         
322                         if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
323                                 string saveLabel = lookup[0]->getLabel();
324                                 
325                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
326                                 lookup = input->getSharedRAbundVectors(lastLabel);
327                                 
328                                 processedLabels.insert(lookup[0]->getLabel());
329                                 userLabels.erase(lookup[0]->getLabel());
330                                 
331                                 //restore real lastlabel to save below
332                                 lookup[0]->setLabel(saveLabel);
333                                 break;
334                         }
335                         
336                         lastLabel = lookup[0]->getLabel();                      
337                         
338                         //get next line to process
339                         //prevent memory leak
340                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
341                         lookup = input->getSharedRAbundVectors();
342                 }
343                 
344                 
345                 if (m->control_pressed) { delete input; return 0;  }
346                 
347                 //output error messages about any remaining user labels
348                 set<string>::iterator it;
349                 bool needToRun = false;
350                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
351                         m->mothurOut("Your file does not include the label " + *it); 
352                         if (processedLabels.count(lastLabel) != 1) {
353                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
354                                 needToRun = true;
355                         }else {
356                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
357                         }
358                 }
359                 
360                 //run last label if you need to
361                 if (needToRun == true)  {
362                         for (int i = 0; i < lookup.size(); i++) {  if (lookup[i] != NULL) {     delete lookup[i];       } } 
363                         lookup = input->getSharedRAbundVectors(lastLabel);
364                 }       
365                 
366                 delete input;
367                 return 0;
368         }
369         catch(exception& e) {
370                 m->errorOut(e, "CorrAxesCommand", "getShared"); 
371                 exit(1);
372         }
373 }
374 //**********************************************************************************************************************
375 int CorrAxesCommand::getSharedFloat(){
376         try {
377                 InputData* input = new InputData(relabundfile, "relabund");
378                 lookupFloat = input->getSharedRAbundFloatVectors();
379                 string lastLabel = lookupFloat[0]->getLabel();
380                 
381                 if (label == "") { label = lastLabel; delete input; return 0; }
382                 
383                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
384                 set<string> labels; labels.insert(label);
385                 set<string> processedLabels;
386                 set<string> userLabels = labels;
387                 
388                 //as long as you are not at the end of the file or done wih the lines you want
389                 while((lookupFloat[0] != NULL) && (userLabels.size() != 0)) {
390                         
391                         if (m->control_pressed) {  delete input; return 0;  }
392                         
393                         if(labels.count(lookupFloat[0]->getLabel()) == 1){
394                                 processedLabels.insert(lookupFloat[0]->getLabel());
395                                 userLabels.erase(lookupFloat[0]->getLabel());
396                                 break;
397                         }
398                         
399                         if ((m->anyLabelsToProcess(lookupFloat[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
400                                 string saveLabel = lookupFloat[0]->getLabel();
401                                 
402                                 for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } 
403                                 lookupFloat = input->getSharedRAbundFloatVectors(lastLabel);
404                                 
405                                 processedLabels.insert(lookupFloat[0]->getLabel());
406                                 userLabels.erase(lookupFloat[0]->getLabel());
407                                 
408                                 //restore real lastlabel to save below
409                                 lookupFloat[0]->setLabel(saveLabel);
410                                 break;
411                         }
412                         
413                         lastLabel = lookupFloat[0]->getLabel();                 
414                         
415                         //get next line to process
416                         //prevent memory leak
417                         for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } 
418                         lookupFloat = input->getSharedRAbundFloatVectors();
419                 }
420                 
421                 
422                 if (m->control_pressed) { delete input; return 0;  }
423                 
424                 //output error messages about any remaining user labels
425                 set<string>::iterator it;
426                 bool needToRun = false;
427                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
428                         m->mothurOut("Your file does not include the label " + *it); 
429                         if (processedLabels.count(lastLabel) != 1) {
430                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
431                                 needToRun = true;
432                         }else {
433                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
434                         }
435                 }
436                 
437                 //run last label if you need to
438                 if (needToRun == true)  {
439                         for (int i = 0; i < lookupFloat.size(); i++) {  if (lookupFloat[i] != NULL) {   delete lookupFloat[i];  } } 
440                         lookupFloat = input->getSharedRAbundFloatVectors(lastLabel);
441                 }       
442                 
443                 delete input;
444                 return 0;
445         }
446         catch(exception& e) {
447                 m->errorOut(e, "CorrAxesCommand", "getSharedFloat");    
448                 exit(1);
449         }
450 }
451 /*****************************************************************/
452 int CorrAxesCommand::convertToRelabund(){
453         try {
454                 
455                 vector<SharedRAbundFloatVector*> newLookup;
456                 for (int i = 0; i < lookup.size(); i++) {
457                         SharedRAbundFloatVector* temp = new SharedRAbundFloatVector();
458                         temp->setLabel(lookup[i]->getLabel());
459                         temp->setGroup(lookup[i]->getGroup());
460                         newLookup.push_back(temp);
461                 }
462                 
463                 for (int i = 0; i < lookup.size(); i++) {
464                         
465                         for (int j = 0; j < lookup[i]->getNumBins(); j++) {
466                                 
467                                 if (m->control_pressed) { return 0; }
468                                 
469                                 int abund = lookup[i]->getAbundance(j);
470                                 
471                                 float relabund = abund / (float) lookup[i]->getNumSeqs();
472                                 
473                                 newLookup[i]->push_back(relabund, lookup[i]->getGroup());
474                         }
475                 }
476                 
477                 if (pickedGroups) { eliminateZeroOTUS(newLookup); }
478                 
479                 lookupFloat = newLookup;
480                 
481                 return 0;
482         }
483         catch(exception& e) {
484                 m->errorOut(e, "CorrAxesCommand", "convertToRelabund"); 
485                 exit(1);
486         }
487 }
488 //**********************************************************************************************************************
489 int CorrAxesCommand::eliminateZeroOTUS(vector<SharedRAbundFloatVector*>& thislookup) {
490         try {
491                 
492                 vector<SharedRAbundFloatVector*> newLookup;
493                 for (int i = 0; i < thislookup.size(); i++) {
494                         SharedRAbundFloatVector* temp = new SharedRAbundFloatVector();
495                         temp->setLabel(thislookup[i]->getLabel());
496                         temp->setGroup(thislookup[i]->getGroup());
497                         newLookup.push_back(temp);
498                 }
499                 
500                 //for each bin
501                 for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
502                         if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
503                         
504                         //look at each sharedRabund and make sure they are not all zero
505                         bool allZero = true;
506                         for (int j = 0; j < thislookup.size(); j++) {
507                                 if (thislookup[j]->getAbundance(i) != 0) { allZero = false;  break;  }
508                         }
509                         
510                         //if they are not all zero add this bin
511                         if (!allZero) {
512                                 for (int j = 0; j < thislookup.size(); j++) {
513                                         newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
514                                 }
515                         }
516                 }
517                 
518                 for (int j = 0; j < thislookup.size(); j++) {  delete thislookup[j];  }
519                 
520                 thislookup = newLookup;
521                 
522                 return 0;
523                 
524         }
525         catch(exception& e) {
526                 m->errorOut(e, "CorrAxesCommand", "eliminateZeroOTUS");
527                 exit(1);
528         }
529 }
530 /*****************************************************************/
531 map<string, vector<float> > CorrAxesCommand::readAxes(){
532         try {
533                 map<string, vector<float> > axes;
534                 
535                 ifstream in;
536                 m->openInputFile(axesfile, in);
537                 
538                 string headerLine = m->getline(in); m->gobble(in);
539                 
540                 //count the number of axis you are reading
541                 bool done = false;
542                 int count = 0;
543                 while (!done) {
544                         int pos = headerLine.find("axis");
545                         if (pos != string::npos) {
546                                 count++;
547                                 headerLine = headerLine.substr(pos+4);
548                         }else { done = true; }
549                 }
550                 cout << "count = " << count << endl;    
551                 
552                 while (!in.eof()) {
553                         
554                         if (m->control_pressed) { in.close(); return axes; }
555                         
556                         string group = "";
557                         in >> group; m->gobble(in);
558                         
559                         vector<float> thisGroupsAxes;
560                         for (int i = 0; i < count; i++) {
561                                 float temp = 0.0;
562                                 in >> temp; 
563                                 
564                                 //only save the axis we want
565                                 if (i < numaxes) {  thisGroupsAxes.push_back(temp); }
566                         }
567                         
568                         //save group if its one the user selected
569                         if (names.count(group) != 0) {
570                                 map<string, vector<float> >::iterator it = axes.find(group);
571                                 
572                                 if (it == axes.end()) {
573                                         axes[group] = thisGroupsAxes;
574                                 }else {
575                                         m->mothurOut(group + " is already in your axes file, using first definition."); m->mothurOutEndLine();
576                                 }
577                         }
578                         
579                         m->gobble(in);
580                 }
581                 in.close();
582                 
583                 return axes;
584         }
585         catch(exception& e) {
586                 m->errorOut(e, "CorrAxesCommand", "convertToRelabund"); 
587                 exit(1);
588         }
589 }
590 /*****************************************************************/
591
592
593
594
595
596