2 * otuassociationcommand.cpp
5 * Created by westcott on 1/19/12.
6 * Copyright 2012 Schloss Lab. All rights reserved.
10 #include "otuassociationcommand.h"
11 #include "linearalgebra.h"
13 //**********************************************************************************************************************
14 vector<string> OTUAssociationCommand::setParameters(){
16 CommandParameter pshared("shared", "InputTypes", "", "", "SharedRelMeta", "SharedRelMeta", "none",false,false); parameters.push_back(pshared);
17 CommandParameter prelabund("relabund", "InputTypes", "", "", "SharedRelMeta", "SharedRelMeta", "none",false,false); parameters.push_back(prelabund);
18 CommandParameter pmetadata("metadata", "InputTypes", "", "", "SharedRelMeta", "SharedRelMeta", "none",false,false); parameters.push_back(pmetadata);
19 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
20 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
21 CommandParameter pmethod("method", "Multiple", "pearson-spearman-kendall", "pearson", "", "", "",false,false); parameters.push_back(pmethod);
22 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
23 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
25 vector<string> myArray;
26 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
30 m->errorOut(e, "OTUAssociationCommand", "setParameters");
34 //**********************************************************************************************************************
35 string OTUAssociationCommand::getHelpString(){
37 string helpString = "";
38 helpString += "The otu.association command reads a shared or relabund file and calculates the correlation coefficients between otus.\n";
39 helpString += "If you provide a metadata file, mothur will calculate te correlation bewteen the metadata and the otus.\n";
40 helpString += "The otu.association command parameters are shared, relabund, metadata, groups, method and label. The shared or relabund parameter is required.\n";
41 helpString += "The groups parameter allows you to specify which of the groups you would like included. The group names are separated by dashes.\n";
42 helpString += "The label parameter allows you to select what distances level you would like used, and are also separated by dashes.\n";
43 helpString += "The method parameter allows you to select what method you would like to use. Options are pearson, spearman and kendall. Default=pearson.\n";
44 helpString += "The otu.association command should be in the following format: otu.association(shared=yourSharedFile, method=yourMethod).\n";
45 helpString += "Example otu.association(shared=genus.pool.shared, method=kendall).\n";
46 helpString += "The otu.association command outputs a .otu.corr file.\n";
47 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
51 m->errorOut(e, "OTUAssociationCommand", "getHelpString");
55 //**********************************************************************************************************************
56 string OTUAssociationCommand::getOutputFileNameTag(string type, string inputName=""){
58 string outputFileName = "";
59 map<string, vector<string> >::iterator it;
61 //is this a type this command creates
62 it = outputTypes.find(type);
63 if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
65 if (type == "otucorr") { outputFileName = "otu.corr"; }
66 else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
68 return outputFileName;
71 m->errorOut(e, "OTUAssociationCommand", "getOutputFileNameTag");
75 //**********************************************************************************************************************
76 OTUAssociationCommand::OTUAssociationCommand(){
78 abort = true; calledHelp = true;
80 vector<string> tempOutNames;
81 outputTypes["otucorr"] = tempOutNames;
84 m->errorOut(e, "OTUAssociationCommand", "OTUAssociationCommand");
88 //**********************************************************************************************************************
89 OTUAssociationCommand::OTUAssociationCommand(string option) {
91 abort = false; calledHelp = false;
94 //allow user to run help
95 if(option == "help") { help(); abort = true; calledHelp = true; }
96 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
99 vector<string> myArray = setParameters();
101 OptionParser parser(option);
102 map<string, string> parameters = parser.getParameters();
104 ValidParameters validParameter;
105 map<string, string>::iterator it;
107 //check to make sure all parameters are valid for command
108 for (it = parameters.begin(); it != parameters.end(); it++) {
109 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
112 vector<string> tempOutNames;
113 outputTypes["otucorr"] = tempOutNames;
115 //if the user changes the input directory command factory will send this info to us in the output parameter
116 string inputDir = validParameter.validFile(parameters, "inputdir", false);
117 if (inputDir == "not found"){ inputDir = ""; }
120 it = parameters.find("shared");
121 //user has given a template file
122 if(it != parameters.end()){
123 path = m->hasPath(it->second);
124 //if the user has not given a path then, add inputdir. else leave path alone.
125 if (path == "") { parameters["shared"] = inputDir + it->second; }
128 it = parameters.find("relabund");
129 //user has given a template file
130 if(it != parameters.end()){
131 path = m->hasPath(it->second);
132 //if the user has not given a path then, add inputdir. else leave path alone.
133 if (path == "") { parameters["relabund"] = inputDir + it->second; }
136 it = parameters.find("metadata");
137 //user has given a template file
138 if(it != parameters.end()){
139 path = m->hasPath(it->second);
140 //if the user has not given a path then, add inputdir. else leave path alone.
141 if (path == "") { parameters["metadata"] = inputDir + it->second; }
146 //check for required parameters
147 sharedfile = validParameter.validFile(parameters, "shared", true);
148 if (sharedfile == "not open") { abort = true; }
149 else if (sharedfile == "not found") { sharedfile = ""; }
150 else { inputFileName = sharedfile; m->setSharedFile(sharedfile); }
152 relabundfile = validParameter.validFile(parameters, "relabund", true);
153 if (relabundfile == "not open") { abort = true; }
154 else if (relabundfile == "not found") { relabundfile = ""; }
155 else { inputFileName = relabundfile; m->setRelAbundFile(relabundfile); }
157 metadatafile = validParameter.validFile(parameters, "metadata", true);
158 if (metadatafile == "not open") { abort = true; metadatafile = ""; }
159 else if (metadatafile == "not found") { metadatafile = ""; }
161 groups = validParameter.validFile(parameters, "groups", false);
162 if (groups == "not found") { groups = ""; pickedGroups = false; }
165 m->splitAtDash(groups, Groups);
167 m->setGroups(Groups);
169 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(inputFileName); }
171 label = validParameter.validFile(parameters, "label", false);
172 if (label == "not found") { label = ""; }
174 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
175 else { allLines = 1; }
178 if ((relabundfile == "") && (sharedfile == "")) {
179 //is there are current file available for any of these?
180 //give priority to shared, then relabund
181 //if there is a current shared file, use it
182 sharedfile = m->getSharedFile();
183 if (sharedfile != "") { inputFileName = sharedfile; m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
185 relabundfile = m->getRelAbundFile();
186 if (relabundfile != "") { inputFileName = relabundfile; m->mothurOut("Using " + relabundfile + " as input file for the relabund parameter."); m->mothurOutEndLine(); }
188 m->mothurOut("You must provide either a shared or relabund file."); m->mothurOutEndLine(); abort = true;
194 if ((relabundfile != "") && (sharedfile != "")) { m->mothurOut("You may only use one of the following : shared or relabund file."); m->mothurOutEndLine(); abort = true; }
196 method = validParameter.validFile(parameters, "method", false); if (method == "not found"){ method = "pearson"; }
198 if ((method != "pearson") && (method != "spearman") && (method != "kendall")) { m->mothurOut(method + " is not a valid method. Valid methods are pearson, spearman, and kendall."); m->mothurOutEndLine(); abort = true; }
202 catch(exception& e) {
203 m->errorOut(e, "OTUAssociationCommand", "OTUAssociationCommand");
207 //**********************************************************************************************************************
209 int OTUAssociationCommand::execute(){
212 if (abort == true) { if (calledHelp) { return 0; } return 2; }
214 if (metadatafile != "") { readMetadata(); }
216 //function are identical just different datatypes
217 if (sharedfile != "") { processShared(); }
218 else if (relabundfile != "") { processRelabund(); }
220 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
222 m->mothurOutEndLine();
223 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
224 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
225 m->mothurOutEndLine();
229 catch(exception& e) {
230 m->errorOut(e, "OTUAssociationCommand", "execute");
234 //**********************************************************************************************************************
235 int OTUAssociationCommand::processShared(){
237 InputData* input = new InputData(sharedfile, "sharedfile");
238 vector<SharedRAbundVector*> lookup = input->getSharedRAbundVectors();
239 string lastLabel = lookup[0]->getLabel();
241 if (metadatafile != "") {
244 if (metadata[0].size() != lookup.size()) { m->mothurOut("[ERROR]: You have selected to use " + toString(metadata[0].size()) + " data rows from the metadata file, but " + toString(lookup.size()) + " from the shared file.\n"); m->control_pressed = true; error=true;}
246 //maybe add extra info here?? compare groups in each file??
250 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
251 set<string> processedLabels;
252 set<string> userLabels = labels;
254 //as long as you are not at the end of the file or done wih the lines you want
255 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
257 if (m->control_pressed) { delete input; return 0; }
259 if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
260 processedLabels.insert(lookup[0]->getLabel());
261 userLabels.erase(lookup[0]->getLabel());
263 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
267 if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
268 string saveLabel = lookup[0]->getLabel();
270 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
271 lookup = input->getSharedRAbundVectors(lastLabel);
273 processedLabels.insert(lookup[0]->getLabel());
274 userLabels.erase(lookup[0]->getLabel());
276 //restore real lastlabel to save below
277 lookup[0]->setLabel(saveLabel);
279 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
283 lastLabel = lookup[0]->getLabel();
285 //get next line to process
286 //prevent memory leak
287 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
288 lookup = input->getSharedRAbundVectors();
292 if (m->control_pressed) { delete input; return 0; }
294 //output error messages about any remaining user labels
295 set<string>::iterator it;
296 bool needToRun = false;
297 for (it = userLabels.begin(); it != userLabels.end(); it++) {
298 m->mothurOut("Your file does not include the label " + *it);
299 if (processedLabels.count(lastLabel) != 1) {
300 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
303 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
307 //run last label if you need to
308 if (needToRun == true) {
309 for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
310 lookup = input->getSharedRAbundVectors(lastLabel);
312 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
320 catch(exception& e) {
321 m->errorOut(e, "OTUAssociationCommand", "processShared");
325 //**********************************************************************************************************************
326 int OTUAssociationCommand::process(vector<SharedRAbundVector*>& lookup){
329 string outputFileName = outputDir + m->getRootName(m->getSimpleName(inputFileName)) + lookup[0]->getLabel() + "." + method + "." + getOutputFileNameTag("otucorr");
330 outputNames.push_back(outputFileName); outputTypes["otucorr"].push_back(outputFileName);
333 m->openOutputFile(outputFileName, out);
334 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
337 if (metadatafile == "") { out << "OTUA\tOTUB\t" << method << "Coef\tSignificance\n"; }
338 else { out << "OTUA\tMetadata\t" << method << "Coef\tSignificance\n"; }
341 vector< vector<double> > xy; xy.resize(lookup[0]->getNumBins());
342 for (int i = 0; i < lookup[0]->getNumBins(); i++) { for (int j = 0; j < lookup.size(); j++) { xy[i].push_back(lookup[j]->getAbundance(i)); } }
344 LinearAlgebra linear;
345 if (metadatafile == "") {//compare otus
346 for (int i = 0; i < xy.size(); i++) {
348 for (int k = 0; k < i; k++) {
350 if (m->control_pressed) { out.close(); return 0; }
354 if (method == "spearman") { coef = linear.calcSpearman(xy[i], xy[k], sig); }
355 else if (method == "pearson") { coef = linear.calcPearson(xy[i], xy[k], sig); }
356 else if (method == "kendall") { coef = linear.calcKendall(xy[i], xy[k], sig); }
357 else { m->mothurOut("[ERROR]: invalid method, choices are spearman, pearson or kendall."); m->mothurOutEndLine(); m->control_pressed = true; }
359 out << m->binLabelsInFile[i] << '\t' << m->binLabelsInFile[k] << '\t' << coef << '\t' << sig << endl;
362 }else { //compare otus to metadata
363 for (int i = 0; i < xy.size(); i++) {
365 for (int k = 0; k < metadata.size(); k++) {
367 if (m->control_pressed) { out.close(); return 0; }
371 if (method == "spearman") { coef = linear.calcSpearman(xy[i], metadata[k], sig); }
372 else if (method == "pearson") { coef = linear.calcPearson(xy[i], metadata[k], sig); }
373 else if (method == "kendall") { coef = linear.calcKendall(xy[i], metadata[k], sig); }
374 else { m->mothurOut("[ERROR]: invalid method, choices are spearman, pearson or kendall."); m->mothurOutEndLine(); m->control_pressed = true; }
376 out << m->binLabelsInFile[i] << '\t' << metadataLabels[k] << '\t' << coef << '\t' << sig << endl;
387 catch(exception& e) {
388 m->errorOut(e, "OTUAssociationCommand", "process");
392 //**********************************************************************************************************************
393 int OTUAssociationCommand::processRelabund(){
395 InputData* input = new InputData(relabundfile, "relabund");
396 vector<SharedRAbundFloatVector*> lookup = input->getSharedRAbundFloatVectors();
397 string lastLabel = lookup[0]->getLabel();
399 if (metadatafile != "") {
402 if (metadata[0].size() != lookup.size()) { m->mothurOut("[ERROR]: You have selected to use " + toString(metadata[0].size()) + " data rows from the metadata file, but " + toString(lookup.size()) + " from the relabund file.\n"); m->control_pressed = true; error=true;}
404 //maybe add extra info here?? compare groups in each file??
410 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
411 set<string> processedLabels;
412 set<string> userLabels = labels;
414 //as long as you are not at the end of the file or done wih the lines you want
415 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
417 if (m->control_pressed) { delete input; return 0; }
419 if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
420 processedLabels.insert(lookup[0]->getLabel());
421 userLabels.erase(lookup[0]->getLabel());
423 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
427 if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
428 string saveLabel = lookup[0]->getLabel();
430 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
431 lookup = input->getSharedRAbundFloatVectors(lastLabel);
433 processedLabels.insert(lookup[0]->getLabel());
434 userLabels.erase(lookup[0]->getLabel());
436 //restore real lastlabel to save below
437 lookup[0]->setLabel(saveLabel);
439 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
443 lastLabel = lookup[0]->getLabel();
445 //get next line to process
446 //prevent memory leak
447 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
448 lookup = input->getSharedRAbundFloatVectors();
452 if (m->control_pressed) { delete input; return 0; }
454 //output error messages about any remaining user labels
455 set<string>::iterator it;
456 bool needToRun = false;
457 for (it = userLabels.begin(); it != userLabels.end(); it++) {
458 m->mothurOut("Your file does not include the label " + *it);
459 if (processedLabels.count(lastLabel) != 1) {
460 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
463 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
467 //run last label if you need to
468 if (needToRun == true) {
469 for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
470 lookup = input->getSharedRAbundFloatVectors(lastLabel);
472 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
480 catch(exception& e) {
481 m->errorOut(e, "OTUAssociationCommand", "processRelabund");
485 //**********************************************************************************************************************
486 int OTUAssociationCommand::process(vector<SharedRAbundFloatVector*>& lookup){
489 string outputFileName = outputDir + m->getRootName(m->getSimpleName(inputFileName)) + lookup[0]->getLabel() + "." + method + "." + getOutputFileNameTag("otucorr");
490 outputNames.push_back(outputFileName); outputTypes["otucorr"].push_back(outputFileName);
493 m->openOutputFile(outputFileName, out);
494 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
497 if (metadatafile == "") { out << "OTUA\tOTUB\t" << method << "Coef\tSignificance\n"; }
498 else { out << "OTUA\tMetadata\t" << method << "Coef\tSignificance\n"; }
500 vector< vector<double> > xy; xy.resize(lookup[0]->getNumBins());
501 for (int i = 0; i < lookup[0]->getNumBins(); i++) { for (int j = 0; j < lookup.size(); j++) { xy[i].push_back(lookup[j]->getAbundance(i)); } }
503 LinearAlgebra linear;
504 if (metadatafile == "") {//compare otus
505 for (int i = 0; i < xy.size(); i++) {
507 for (int k = 0; k < i; k++) {
509 if (m->control_pressed) { out.close(); return 0; }
513 if (method == "spearman") { coef = linear.calcSpearman(xy[i], xy[k], sig); }
514 else if (method == "pearson") { coef = linear.calcPearson(xy[i], xy[k], sig); }
515 else if (method == "kendall") { coef = linear.calcKendall(xy[i], xy[k], sig); }
516 else { m->mothurOut("[ERROR]: invalid method, choices are spearman, pearson or kendall."); m->mothurOutEndLine(); m->control_pressed = true; }
518 out << m->binLabelsInFile[i] << '\t' << m->binLabelsInFile[k] << '\t' << coef << '\t' << sig << endl;
521 }else { //compare otus to metadata
522 for (int i = 0; i < xy.size(); i++) {
524 for (int k = 0; k < metadata.size(); k++) {
526 if (m->control_pressed) { out.close(); return 0; }
530 if (method == "spearman") { coef = linear.calcSpearman(xy[i], metadata[k], sig); }
531 else if (method == "pearson") { coef = linear.calcPearson(xy[i], metadata[k], sig); }
532 else if (method == "kendall") { coef = linear.calcKendall(xy[i], metadata[k], sig); }
533 else { m->mothurOut("[ERROR]: invalid method, choices are spearman, pearson or kendall."); m->mothurOutEndLine(); m->control_pressed = true; }
535 out << m->binLabelsInFile[i] << '\t' << metadataLabels[k] << '\t' << coef << '\t' << sig << endl;
546 catch(exception& e) {
547 m->errorOut(e, "OTUAssociationCommand", "process");
551 /*****************************************************************/
552 int OTUAssociationCommand::readMetadata(){
555 m->openInputFile(metadatafile, in);
557 string headerLine = m->getline(in); m->gobble(in);
558 istringstream iss (headerLine,istringstream::in);
560 //read the first label, because it refers to the groups
562 iss >> columnLabel; m->gobble(iss);
564 //save names of columns you are reading
566 iss >> columnLabel; m->gobble(iss);
567 metadataLabels.push_back(columnLabel);
569 int count = metadataLabels.size();
574 if (m->control_pressed) { in.close(); return 0; }
577 in >> group; m->gobble(in);
579 SharedRAbundFloatVector* tempLookup = new SharedRAbundFloatVector();
580 tempLookup->setGroup(group);
581 tempLookup->setLabel("1");
583 for (int i = 0; i < count; i++) {
586 tempLookup->push_back(temp, group);
589 metadataLookup.push_back(tempLookup);
597 catch(exception& e) {
598 m->errorOut(e, "OTUAssociationCommand", "readMetadata");
602 /*****************************************************************/
603 //eliminate groups user did not pick, remove zeroed out otus, fill metadata vector.
604 int OTUAssociationCommand::getMetadata(){
607 vector<string> mGroups = m->getGroups();
610 for (int i = 0; i < metadataLookup.size(); i++) {
611 //if this sharedrabund is not from a group the user wants then delete it.
612 if (!(m->inUsersGroups(metadataLookup[i]->getGroup(), mGroups))) {
613 delete metadataLookup[i]; metadataLookup[i] = NULL;
614 metadataLookup.erase(metadataLookup.begin()+i);
620 vector<SharedRAbundFloatVector*> newLookup;
621 for (int i = 0; i < metadataLookup.size(); i++) {
622 SharedRAbundFloatVector* temp = new SharedRAbundFloatVector();
623 temp->setLabel(metadataLookup[i]->getLabel());
624 temp->setGroup(metadataLookup[i]->getGroup());
625 newLookup.push_back(temp);
629 vector<string> newBinLabels;
630 for (int i = 0; i < metadataLookup[0]->getNumBins(); i++) {
631 if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; }
633 //look at each sharedRabund and make sure they are not all zero
635 for (int j = 0; j < metadataLookup.size(); j++) {
636 if (metadataLookup[j]->getAbundance(i) != 0) { allZero = false; break; }
639 //if they are not all zero add this bin
641 for (int j = 0; j < metadataLookup.size(); j++) {
642 newLookup[j]->push_back(metadataLookup[j]->getAbundance(i), metadataLookup[j]->getGroup());
644 newBinLabels.push_back(metadataLabels[i]);
648 metadataLabels = newBinLabels;
650 for (int j = 0; j < metadataLookup.size(); j++) { delete metadataLookup[j]; }
651 metadataLookup.clear();
653 metadata.resize(newLookup[0]->getNumBins());
654 for (int i = 0; i < newLookup[0]->getNumBins(); i++) { for (int j = 0; j < newLookup.size(); j++) { metadata[i].push_back(newLookup[j]->getAbundance(i)); } }
656 for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; }
660 catch(exception& e) {
661 m->errorOut(e, "OTUAssociationCommand", "getMetadata");
665 /*****************************************************************/