5 * Created by westcott on 12/22/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "corraxescommand.h"
12 //**********************************************************************************************************************
13 vector<string> CorrAxesCommand::getValidParameters(){
15 string Array[] = {"axes","shared","relabund","numaxes","label","groups","method","metadata","outputdir","inputdir"};
16 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
20 m->errorOut(e, "CorrAxesCommand", "getValidParameters");
24 //**********************************************************************************************************************
25 vector<string> CorrAxesCommand::getRequiredParameters(){
27 string Array[] = {"axes"};
28 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
32 m->errorOut(e, "CorrAxesCommand", "getRequiredParameters");
36 //**********************************************************************************************************************
37 CorrAxesCommand::CorrAxesCommand(){
40 //initialize outputTypes
41 vector<string> tempOutNames;
42 outputTypes["corr.axes"] = tempOutNames;
45 m->errorOut(e, "CorrAxesCommand", "CorrAxesCommand");
50 //**********************************************************************************************************************
51 vector<string> CorrAxesCommand::getRequiredFiles(){
53 vector<string> myArray;
57 m->errorOut(e, "CorrAxesCommand", "getRequiredFiles");
61 //**********************************************************************************************************************
62 CorrAxesCommand::CorrAxesCommand(string option) {
65 globaldata = GlobalData::getInstance();
67 //allow user to run help
68 if(option == "help") { help(); abort = true; }
71 //valid paramters for this command
72 string Array[] = {"axes","shared","relabund","numaxes","label","groups","method","metadata","outputdir","inputdir"};
73 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
75 OptionParser parser(option);
76 map<string, string> parameters = parser.getParameters();
78 ValidParameters validParameter;
79 map<string, string>::iterator it;
81 //check to make sure all parameters are valid for command
82 for (it = parameters.begin(); it != parameters.end(); it++) {
83 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
86 vector<string> tempOutNames;
87 outputTypes["corr.axes"] = tempOutNames;
89 //if the user changes the input directory command factory will send this info to us in the output parameter
90 string inputDir = validParameter.validFile(parameters, "inputdir", false);
91 if (inputDir == "not found"){ inputDir = ""; }
94 it = parameters.find("axes");
95 //user has given a template file
96 if(it != parameters.end()){
97 path = m->hasPath(it->second);
98 //if the user has not given a path then, add inputdir. else leave path alone.
99 if (path == "") { parameters["axes"] = inputDir + it->second; }
102 it = parameters.find("shared");
103 //user has given a template file
104 if(it != parameters.end()){
105 path = m->hasPath(it->second);
106 //if the user has not given a path then, add inputdir. else leave path alone.
107 if (path == "") { parameters["shared"] = inputDir + it->second; }
110 it = parameters.find("relabund");
111 //user has given a template file
112 if(it != parameters.end()){
113 path = m->hasPath(it->second);
114 //if the user has not given a path then, add inputdir. else leave path alone.
115 if (path == "") { parameters["relabund"] = inputDir + it->second; }
118 it = parameters.find("metadata");
119 //user has given a template file
120 if(it != parameters.end()){
121 path = m->hasPath(it->second);
122 //if the user has not given a path then, add inputdir. else leave path alone.
123 if (path == "") { parameters["metadata"] = inputDir + it->second; }
128 //check for required parameters
129 axesfile = validParameter.validFile(parameters, "axes", true);
130 if (axesfile == "not open") { abort = true; }
131 else if (axesfile == "not found") { axesfile = ""; m->mothurOut("axes is a required parameter for the corr.axes command."); m->mothurOutEndLine(); abort = true; }
133 sharedfile = validParameter.validFile(parameters, "shared", true);
134 if (sharedfile == "not open") { abort = true; }
135 else if (sharedfile == "not found") { sharedfile = ""; }
136 else { inputFileName = sharedfile; }
138 relabundfile = validParameter.validFile(parameters, "relabund", true);
139 if (relabundfile == "not open") { abort = true; }
140 else if (relabundfile == "not found") { relabundfile = ""; }
141 else { inputFileName = relabundfile; }
143 metadatafile = validParameter.validFile(parameters, "metadata", true);
144 if (metadatafile == "not open") { abort = true; }
145 else if (metadatafile == "not found") { metadatafile = ""; }
147 groups = validParameter.validFile(parameters, "groups", false);
148 if (groups == "not found") { groups = ""; pickedGroups = false; }
151 m->splitAtDash(groups, Groups);
153 globaldata->Groups = Groups;
155 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(inputFileName); }
157 label = validParameter.validFile(parameters, "label", false);
158 if (label == "not found") { label = ""; m->mothurOut("You did not provide a label, I will use the first label in your inputfile."); m->mothurOutEndLine(); label=""; }
160 if ((relabundfile == "") && (sharedfile == "")) { m->mothurOut("You must provide either a shared or relabund file."); m->mothurOutEndLine(); abort = true; }
162 if ((relabundfile != "") && (sharedfile != "")) { m->mothurOut("You may not use both a shared and relabund file."); m->mothurOutEndLine(); abort = true; }
165 temp = validParameter.validFile(parameters, "numaxes", false); if (temp == "not found"){ temp = "3"; }
166 convert(temp, numaxes);
168 method = validParameter.validFile(parameters, "method", false); if (method == "not found"){ method = "pearson"; }
170 if ((method != "pearson") && (method != "spearman") && (method != "kendall")) { m->mothurOut(method + " is not a valid method. Valid methods are pearson, spearman, and kendall."); m->mothurOutEndLine(); abort = true; }
174 catch(exception& e) {
175 m->errorOut(e, "CorrAxesCommand", "CorrAxesCommand");
179 //**********************************************************************************************************************
181 void CorrAxesCommand::help(){
185 catch(exception& e) {
186 m->errorOut(e, "CorrAxesCommand", "help");
191 //**********************************************************************************************************************
193 CorrAxesCommand::~CorrAxesCommand(){}
195 //**********************************************************************************************************************
197 int CorrAxesCommand::execute(){
200 if (abort == true) { return 0; }
202 /*************************************************************************************/
203 // use smart distancing to get right sharedRabund and convert to relabund if needed //
204 /************************************************************************************/
205 if (sharedfile != "") {
207 if (m->control_pressed) { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } return 0; }
208 if (lookup[0] == NULL) { m->mothurOut("[ERROR] reading shared file."); m->mothurOutEndLine(); return 0; }
210 //fills lookupFloat with relative abundance values from lookup
213 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
216 if (m->control_pressed) { for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; } return 0; }
217 if (lookupFloat[0] == NULL) { m->mothurOut("[ERROR] reading relabund file."); m->mothurOutEndLine(); return 0; }
219 if (pickedGroups) { eliminateZeroOTUS(lookupFloat); }
222 if (m->control_pressed) { for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; } return 0; }
224 //this is for a sanity check to make sure the axes file and shared file match
225 for (int i = 0; i < lookupFloat.size(); i++) { names.insert(lookupFloat[i]->getGroup()); }
227 /*************************************************************************************/
228 // read axes file and check for file mismatches with shared or relabund file //
229 /************************************************************************************/
232 map<string, vector<float> > axes = readAxes();
234 if (m->control_pressed) { for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; } return 0; }
236 //sanity check, the read only adds groups that are in the shared or relabund file, but we want to make sure the axes file isn't missing anyone
237 if (axes.size() != lookupFloat.size()) {
238 map<string, vector<float> >::iterator it;
239 for (int i = 0; i < lookupFloat.size(); i++) {
240 it = axes.find(lookupFloat[i]->getGroup());
241 if (it == axes.end()) { m->mothurOut(lookupFloat[i]->getGroup() + " is in your shared of relabund file but not in your axes file, please correct."); m->mothurOutEndLine(); }
243 m->control_pressed = true;
246 if (m->control_pressed) { for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; } return 0; }
248 /*************************************************************************************/
249 // calc the r values //
250 /************************************************************************************/
252 string outputFileName = outputDir + m->getRootName(m->getSimpleName(inputFileName)) + "corr.axes";
253 outputNames.push_back(outputFileName); outputTypes["corr.axes"].push_back(outputFileName);
255 m->openOutputFile(outputFileName, out);
256 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
260 for (int i = 0; i < numaxes; i++) { out << "axis" << (i+1) << '\t'; }
263 if (method == "pearson") { calcPearson(axes, out); }
264 else if (method == "spearman") { calcSpearman(axes, out); }
265 //else if (method == "kendall") { calcKendal(axes, out); }
266 //else { m->mothurOut("[ERROR]: Invalid method."); m->mothurOutEndLine(); }
269 for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; }
271 if (m->control_pressed) { return 0; }
273 m->mothurOutEndLine();
274 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
275 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
276 m->mothurOutEndLine();
280 catch(exception& e) {
281 m->errorOut(e, "CorrAxesCommand", "execute");
285 //**********************************************************************************************************************
286 int CorrAxesCommand::calcPearson(map<string, vector<float> >& axes, ofstream& out) {
289 //find average of each axis - X
290 vector<float> averageAxes; averageAxes.resize(numaxes, 0.0);
291 for (map<string, vector<float> >::iterator it = axes.begin(); it != axes.end(); it++) {
292 vector<float> temp = it->second;
293 for (int i = 0; i < temp.size(); i++) {
294 averageAxes[i] += temp[i];
298 for (int i = 0; i < averageAxes.size(); i++) { averageAxes[i] = averageAxes[i] / (float) axes.size(); }
301 for (int i = 0; i < lookupFloat[0]->getNumBins(); i++) {
305 //find the averages this otu - Y
307 for (int j = 0; j < lookupFloat.size(); j++) {
308 sumOtu += lookupFloat[j]->getAbundance(i);
310 float Ybar = sumOtu / (float) lookupFloat.size();
312 //find r value for each axis
313 for (int k = 0; k < averageAxes.size(); k++) {
316 double numerator = 0.0;
317 double denomTerm1 = 0.0;
318 double denomTerm2 = 0.0;
319 for (int j = 0; j < lookupFloat.size(); j++) {
320 float Yi = lookupFloat[j]->getAbundance(i);
321 float Xi = axes[lookupFloat[j]->getGroup()][k];
323 numerator += ((Xi - averageAxes[k]) * (Yi - Ybar));
324 denomTerm1 += ((Xi - averageAxes[k]) * (Xi - averageAxes[k]));
325 denomTerm2 += ((Yi - Ybar) * (Yi - Ybar));
328 double denom = (sqrt(denomTerm1) * sqrt(denomTerm2));
330 r = numerator / denom;
340 catch(exception& e) {
341 m->errorOut(e, "CorrAxesCommand", "calcPearson");
345 //**********************************************************************************************************************
346 int CorrAxesCommand::calcSpearman(map<string, vector<float> >& axes, ofstream& out) {
349 //find average of each axis - X
350 vector<float> averageAxes; averageAxes.resize(numaxes, 0.0);
351 for (map<string, vector<float> >::iterator it = axes.begin(); it != axes.end(); it++) {
352 vector<float> temp = it->second;
353 for (int i = 0; i < temp.size(); i++) {
354 averageAxes[i] += temp[i];
358 for (int i = 0; i < averageAxes.size(); i++) { averageAxes[i] = averageAxes[i] / (float) axes.size(); }
361 for (int i = 0; i < lookupFloat[0]->getNumBins(); i++) {
365 //find the averages this otu - Y
367 for (int j = 0; j < lookupFloat.size(); j++) {
368 sumOtu += lookupFloat[j]->getAbundance(i);
370 float Ybar = sumOtu / (float) lookupFloat.size();
372 //find r value for each axis
373 for (int k = 0; k < averageAxes.size(); k++) {
376 double numerator = 0.0;
377 double denomTerm1 = 0.0;
378 double denomTerm2 = 0.0;
379 for (int j = 0; j < lookupFloat.size(); j++) {
380 float Yi = lookupFloat[j]->getAbundance(i);
381 float Xi = axes[lookupFloat[j]->getGroup()][k];
383 numerator += ((Xi - averageAxes[k]) * (Yi - Ybar));
384 denomTerm1 += ((Xi - averageAxes[k]) * (Xi - averageAxes[k]));
385 denomTerm2 += ((Yi - Ybar) * (Yi - Ybar));
388 double denom = (sqrt(denomTerm1 * denomTerm2));
390 r = numerator / denom;
400 catch(exception& e) {
401 m->errorOut(e, "CorrAxesCommand", "calcSpearman");
405 //**********************************************************************************************************************
406 int CorrAxesCommand::getShared(){
408 InputData* input = new InputData(sharedfile, "sharedfile");
409 lookup = input->getSharedRAbundVectors();
410 string lastLabel = lookup[0]->getLabel();
412 if (label == "") { label = lastLabel; delete input; return 0; }
414 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
415 set<string> labels; labels.insert(label);
416 set<string> processedLabels;
417 set<string> userLabels = labels;
419 //as long as you are not at the end of the file or done wih the lines you want
420 while((lookup[0] != NULL) && (userLabels.size() != 0)) {
421 if (m->control_pressed) { delete input; return 0; }
423 if(labels.count(lookup[0]->getLabel()) == 1){
424 processedLabels.insert(lookup[0]->getLabel());
425 userLabels.erase(lookup[0]->getLabel());
429 if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
430 string saveLabel = lookup[0]->getLabel();
432 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
433 lookup = input->getSharedRAbundVectors(lastLabel);
435 processedLabels.insert(lookup[0]->getLabel());
436 userLabels.erase(lookup[0]->getLabel());
438 //restore real lastlabel to save below
439 lookup[0]->setLabel(saveLabel);
443 lastLabel = lookup[0]->getLabel();
445 //get next line to process
446 //prevent memory leak
447 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
448 lookup = input->getSharedRAbundVectors();
452 if (m->control_pressed) { delete input; return 0; }
454 //output error messages about any remaining user labels
455 set<string>::iterator it;
456 bool needToRun = false;
457 for (it = userLabels.begin(); it != userLabels.end(); it++) {
458 m->mothurOut("Your file does not include the label " + *it);
459 if (processedLabels.count(lastLabel) != 1) {
460 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
463 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
467 //run last label if you need to
468 if (needToRun == true) {
469 for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
470 lookup = input->getSharedRAbundVectors(lastLabel);
476 catch(exception& e) {
477 m->errorOut(e, "CorrAxesCommand", "getShared");
481 //**********************************************************************************************************************
482 int CorrAxesCommand::getSharedFloat(){
484 InputData* input = new InputData(relabundfile, "relabund");
485 lookupFloat = input->getSharedRAbundFloatVectors();
486 string lastLabel = lookupFloat[0]->getLabel();
488 if (label == "") { label = lastLabel; delete input; return 0; }
490 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
491 set<string> labels; labels.insert(label);
492 set<string> processedLabels;
493 set<string> userLabels = labels;
495 //as long as you are not at the end of the file or done wih the lines you want
496 while((lookupFloat[0] != NULL) && (userLabels.size() != 0)) {
498 if (m->control_pressed) { delete input; return 0; }
500 if(labels.count(lookupFloat[0]->getLabel()) == 1){
501 processedLabels.insert(lookupFloat[0]->getLabel());
502 userLabels.erase(lookupFloat[0]->getLabel());
506 if ((m->anyLabelsToProcess(lookupFloat[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
507 string saveLabel = lookupFloat[0]->getLabel();
509 for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; }
510 lookupFloat = input->getSharedRAbundFloatVectors(lastLabel);
512 processedLabels.insert(lookupFloat[0]->getLabel());
513 userLabels.erase(lookupFloat[0]->getLabel());
515 //restore real lastlabel to save below
516 lookupFloat[0]->setLabel(saveLabel);
520 lastLabel = lookupFloat[0]->getLabel();
522 //get next line to process
523 //prevent memory leak
524 for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; }
525 lookupFloat = input->getSharedRAbundFloatVectors();
529 if (m->control_pressed) { delete input; return 0; }
531 //output error messages about any remaining user labels
532 set<string>::iterator it;
533 bool needToRun = false;
534 for (it = userLabels.begin(); it != userLabels.end(); it++) {
535 m->mothurOut("Your file does not include the label " + *it);
536 if (processedLabels.count(lastLabel) != 1) {
537 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
540 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
544 //run last label if you need to
545 if (needToRun == true) {
546 for (int i = 0; i < lookupFloat.size(); i++) { if (lookupFloat[i] != NULL) { delete lookupFloat[i]; } }
547 lookupFloat = input->getSharedRAbundFloatVectors(lastLabel);
553 catch(exception& e) {
554 m->errorOut(e, "CorrAxesCommand", "getSharedFloat");
558 /*****************************************************************/
559 int CorrAxesCommand::convertToRelabund(){
562 vector<SharedRAbundFloatVector*> newLookup;
563 for (int i = 0; i < lookup.size(); i++) {
564 SharedRAbundFloatVector* temp = new SharedRAbundFloatVector();
565 temp->setLabel(lookup[i]->getLabel());
566 temp->setGroup(lookup[i]->getGroup());
567 newLookup.push_back(temp);
570 for (int i = 0; i < lookup.size(); i++) {
572 for (int j = 0; j < lookup[i]->getNumBins(); j++) {
574 if (m->control_pressed) { return 0; }
576 int abund = lookup[i]->getAbundance(j);
578 float relabund = abund / (float) lookup[i]->getNumSeqs();
580 newLookup[i]->push_back(relabund, lookup[i]->getGroup());
584 if (pickedGroups) { eliminateZeroOTUS(newLookup); }
586 lookupFloat = newLookup;
590 catch(exception& e) {
591 m->errorOut(e, "CorrAxesCommand", "convertToRelabund");
595 //**********************************************************************************************************************
596 int CorrAxesCommand::eliminateZeroOTUS(vector<SharedRAbundFloatVector*>& thislookup) {
599 vector<SharedRAbundFloatVector*> newLookup;
600 for (int i = 0; i < thislookup.size(); i++) {
601 SharedRAbundFloatVector* temp = new SharedRAbundFloatVector();
602 temp->setLabel(thislookup[i]->getLabel());
603 temp->setGroup(thislookup[i]->getGroup());
604 newLookup.push_back(temp);
608 for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
609 if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; }
611 //look at each sharedRabund and make sure they are not all zero
613 for (int j = 0; j < thislookup.size(); j++) {
614 if (thislookup[j]->getAbundance(i) != 0) { allZero = false; break; }
617 //if they are not all zero add this bin
619 for (int j = 0; j < thislookup.size(); j++) {
620 newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
625 for (int j = 0; j < thislookup.size(); j++) { delete thislookup[j]; }
627 thislookup = newLookup;
632 catch(exception& e) {
633 m->errorOut(e, "CorrAxesCommand", "eliminateZeroOTUS");
637 /*****************************************************************/
638 map<string, vector<float> > CorrAxesCommand::readAxes(){
640 map<string, vector<float> > axes;
643 m->openInputFile(axesfile, in);
645 string headerLine = m->getline(in); m->gobble(in);
647 //count the number of axis you are reading
651 int pos = headerLine.find("axis");
652 if (pos != string::npos) {
654 headerLine = headerLine.substr(pos+4);
655 }else { done = true; }
660 if (m->control_pressed) { in.close(); return axes; }
663 in >> group; m->gobble(in);
665 vector<float> thisGroupsAxes;
666 for (int i = 0; i < count; i++) {
670 //only save the axis we want
671 if (i < numaxes) { thisGroupsAxes.push_back(temp); }
674 //save group if its one the user selected
675 if (names.count(group) != 0) {
676 map<string, vector<float> >::iterator it = axes.find(group);
678 if (it == axes.end()) {
679 axes[group] = thisGroupsAxes;
681 m->mothurOut(group + " is already in your axes file, using first definition."); m->mothurOutEndLine();
691 catch(exception& e) {
692 m->errorOut(e, "CorrAxesCommand", "convertToRelabund");
696 /*****************************************************************/