2 * otuassociationcommand.cpp
5 * Created by westcott on 1/19/12.
6 * Copyright 2012 Schloss Lab. All rights reserved.
10 #include "otuassociationcommand.h"
11 #include "linearalgebra.h"
13 //**********************************************************************************************************************
14 vector<string> OTUAssociationCommand::setParameters(){
16 CommandParameter pshared("shared", "InputTypes", "", "", "SharedRelMeta", "SharedRelMeta", "none",false,false); parameters.push_back(pshared);
17 CommandParameter prelabund("relabund", "InputTypes", "", "", "SharedRelMeta", "SharedRelMeta", "none",false,false); parameters.push_back(prelabund);
18 CommandParameter pmetadata("metadata", "InputTypes", "", "", "SharedRelMeta", "SharedRelMeta", "none",false,false); parameters.push_back(pmetadata);
19 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
20 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
21 CommandParameter pmethod("method", "Multiple", "pearson-spearman-kendall", "pearson", "", "", "",false,false); parameters.push_back(pmethod);
22 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
23 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
25 vector<string> myArray;
26 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
30 m->errorOut(e, "OTUAssociationCommand", "setParameters");
34 //**********************************************************************************************************************
35 string OTUAssociationCommand::getHelpString(){
37 string helpString = "";
38 helpString += "The otu.association command reads a shared or relabund file and calculates the correlation coefficients between otus.\n";
39 helpString += "If you provide a metadata file, mothur will calculate te correlation bewteen the metadata and the otus.\n";
40 helpString += "The otu.association command parameters are shared, relabund, metadata, groups, method and label. The shared or relabund parameter is required.\n";
41 helpString += "The groups parameter allows you to specify which of the groups you would like included. The group names are separated by dashes.\n";
42 helpString += "The label parameter allows you to select what distances level you would like used, and are also separated by dashes.\n";
43 helpString += "The method parameter allows you to select what method you would like to use. Options are pearson, spearman and kendall. Default=pearson.\n";
44 helpString += "The otu.association command should be in the following format: otu.association(shared=yourSharedFile, method=yourMethod).\n";
45 helpString += "Example otu.association(shared=genus.pool.shared, method=kendall).\n";
46 helpString += "The otu.association command outputs a .otu.corr file.\n";
47 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
51 m->errorOut(e, "OTUAssociationCommand", "getHelpString");
55 //**********************************************************************************************************************
56 OTUAssociationCommand::OTUAssociationCommand(){
58 abort = true; calledHelp = true;
60 vector<string> tempOutNames;
61 outputTypes["otu.corr"] = tempOutNames;
64 m->errorOut(e, "OTUAssociationCommand", "OTUAssociationCommand");
68 //**********************************************************************************************************************
69 OTUAssociationCommand::OTUAssociationCommand(string option) {
71 abort = false; calledHelp = false;
74 //allow user to run help
75 if(option == "help") { help(); abort = true; calledHelp = true; }
76 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
79 vector<string> myArray = setParameters();
81 OptionParser parser(option);
82 map<string, string> parameters = parser.getParameters();
84 ValidParameters validParameter;
85 map<string, string>::iterator it;
87 //check to make sure all parameters are valid for command
88 for (it = parameters.begin(); it != parameters.end(); it++) {
89 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
92 vector<string> tempOutNames;
93 outputTypes["otu.corr"] = tempOutNames;
95 //if the user changes the input directory command factory will send this info to us in the output parameter
96 string inputDir = validParameter.validFile(parameters, "inputdir", false);
97 if (inputDir == "not found"){ inputDir = ""; }
100 it = parameters.find("shared");
101 //user has given a template file
102 if(it != parameters.end()){
103 path = m->hasPath(it->second);
104 //if the user has not given a path then, add inputdir. else leave path alone.
105 if (path == "") { parameters["shared"] = inputDir + it->second; }
108 it = parameters.find("relabund");
109 //user has given a template file
110 if(it != parameters.end()){
111 path = m->hasPath(it->second);
112 //if the user has not given a path then, add inputdir. else leave path alone.
113 if (path == "") { parameters["relabund"] = inputDir + it->second; }
116 it = parameters.find("metadata");
117 //user has given a template file
118 if(it != parameters.end()){
119 path = m->hasPath(it->second);
120 //if the user has not given a path then, add inputdir. else leave path alone.
121 if (path == "") { parameters["metadata"] = inputDir + it->second; }
126 //check for required parameters
127 sharedfile = validParameter.validFile(parameters, "shared", true);
128 if (sharedfile == "not open") { abort = true; }
129 else if (sharedfile == "not found") { sharedfile = ""; }
130 else { inputFileName = sharedfile; m->setSharedFile(sharedfile); }
132 relabundfile = validParameter.validFile(parameters, "relabund", true);
133 if (relabundfile == "not open") { abort = true; }
134 else if (relabundfile == "not found") { relabundfile = ""; }
135 else { inputFileName = relabundfile; m->setRelAbundFile(relabundfile); }
137 metadatafile = validParameter.validFile(parameters, "metadata", true);
138 if (metadatafile == "not open") { abort = true; metadatafile = ""; }
139 else if (metadatafile == "not found") { metadatafile = ""; }
141 groups = validParameter.validFile(parameters, "groups", false);
142 if (groups == "not found") { groups = ""; pickedGroups = false; }
145 m->splitAtDash(groups, Groups);
147 m->setGroups(Groups);
149 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(inputFileName); }
151 label = validParameter.validFile(parameters, "label", false);
152 if (label == "not found") { label = ""; }
154 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
155 else { allLines = 1; }
158 if ((relabundfile == "") && (sharedfile == "")) {
159 //is there are current file available for any of these?
160 //give priority to shared, then relabund
161 //if there is a current shared file, use it
162 sharedfile = m->getSharedFile();
163 if (sharedfile != "") { inputFileName = sharedfile; m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
165 relabundfile = m->getRelAbundFile();
166 if (relabundfile != "") { inputFileName = relabundfile; m->mothurOut("Using " + relabundfile + " as input file for the relabund parameter."); m->mothurOutEndLine(); }
168 m->mothurOut("You must provide either a shared or relabund file."); m->mothurOutEndLine(); abort = true;
174 if ((relabundfile != "") && (sharedfile != "")) { m->mothurOut("You may only use one of the following : shared or relabund file."); m->mothurOutEndLine(); abort = true; }
176 method = validParameter.validFile(parameters, "method", false); if (method == "not found"){ method = "pearson"; }
178 if ((method != "pearson") && (method != "spearman") && (method != "kendall")) { m->mothurOut(method + " is not a valid method. Valid methods are pearson, spearman, and kendall."); m->mothurOutEndLine(); abort = true; }
182 catch(exception& e) {
183 m->errorOut(e, "OTUAssociationCommand", "OTUAssociationCommand");
187 //**********************************************************************************************************************
189 int OTUAssociationCommand::execute(){
192 if (abort == true) { if (calledHelp) { return 0; } return 2; }
194 if (metadatafile != "") { readMetadata(); }
196 //function are identical just different datatypes
197 if (sharedfile != "") { processShared(); }
198 else if (relabundfile != "") { processRelabund(); }
200 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
202 m->mothurOutEndLine();
203 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
204 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
205 m->mothurOutEndLine();
209 catch(exception& e) {
210 m->errorOut(e, "OTUAssociationCommand", "execute");
214 //**********************************************************************************************************************
215 int OTUAssociationCommand::processShared(){
217 InputData* input = new InputData(sharedfile, "sharedfile");
218 vector<SharedRAbundVector*> lookup = input->getSharedRAbundVectors();
219 string lastLabel = lookup[0]->getLabel();
221 if (metadatafile != "") {
224 if (metadata[0].size() != lookup.size()) { m->mothurOut("[ERROR]: You have selected to use " + toString(metadata[0].size()) + " data rows from the metadata file, but " + toString(lookup.size()) + " from the shared file.\n"); m->control_pressed = true; error=true;}
226 //maybe add extra info here?? compare groups in each file??
230 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
231 set<string> processedLabels;
232 set<string> userLabels = labels;
234 //as long as you are not at the end of the file or done wih the lines you want
235 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
237 if (m->control_pressed) { delete input; return 0; }
239 if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
240 processedLabels.insert(lookup[0]->getLabel());
241 userLabels.erase(lookup[0]->getLabel());
243 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
247 if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
248 string saveLabel = lookup[0]->getLabel();
250 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
251 lookup = input->getSharedRAbundVectors(lastLabel);
253 processedLabels.insert(lookup[0]->getLabel());
254 userLabels.erase(lookup[0]->getLabel());
256 //restore real lastlabel to save below
257 lookup[0]->setLabel(saveLabel);
259 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
263 lastLabel = lookup[0]->getLabel();
265 //get next line to process
266 //prevent memory leak
267 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
268 lookup = input->getSharedRAbundVectors();
272 if (m->control_pressed) { delete input; return 0; }
274 //output error messages about any remaining user labels
275 set<string>::iterator it;
276 bool needToRun = false;
277 for (it = userLabels.begin(); it != userLabels.end(); it++) {
278 m->mothurOut("Your file does not include the label " + *it);
279 if (processedLabels.count(lastLabel) != 1) {
280 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
283 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
287 //run last label if you need to
288 if (needToRun == true) {
289 for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
290 lookup = input->getSharedRAbundVectors(lastLabel);
292 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
300 catch(exception& e) {
301 m->errorOut(e, "OTUAssociationCommand", "processShared");
305 //**********************************************************************************************************************
306 int OTUAssociationCommand::process(vector<SharedRAbundVector*>& lookup){
309 string outputFileName = outputDir + m->getRootName(m->getSimpleName(inputFileName)) + lookup[0]->getLabel() + "." + method + ".otu.corr";
310 outputNames.push_back(outputFileName); outputTypes["shared"].push_back(outputFileName);
313 m->openOutputFile(outputFileName, out);
314 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
317 if (metadatafile == "") { out << "OTUA\tOTUB\t" << method << "Coef\tSignificance\n"; }
318 else { out << "OTUA\tMetadata\t" << method << "Coef\tSignificance\n"; }
321 vector< vector<double> > xy; xy.resize(lookup[0]->getNumBins());
322 for (int i = 0; i < lookup[0]->getNumBins(); i++) { for (int j = 0; j < lookup.size(); j++) { xy[i].push_back(lookup[j]->getAbundance(i)); } }
324 LinearAlgebra linear;
325 if (metadatafile == "") {//compare otus
326 for (int i = 0; i < xy.size(); i++) {
328 for (int k = 0; k < i; k++) {
330 if (m->control_pressed) { out.close(); return 0; }
334 if (method == "spearman") { coef = linear.calcSpearman(xy[i], xy[k], sig); }
335 else if (method == "pearson") { coef = linear.calcPearson(xy[i], xy[k], sig); }
336 else if (method == "kendall") { coef = linear.calcKendall(xy[i], xy[k], sig); }
337 else { m->mothurOut("[ERROR]: invalid method, choices are spearman, pearson or kendall."); m->mothurOutEndLine(); m->control_pressed = true; }
339 out << m->binLabelsInFile[i] << '\t' << m->binLabelsInFile[k] << '\t' << coef << '\t' << sig << endl;
342 }else { //compare otus to metadata
343 for (int i = 0; i < xy.size(); i++) {
345 for (int k = 0; k < metadata.size(); k++) {
347 if (m->control_pressed) { out.close(); return 0; }
351 if (method == "spearman") { coef = linear.calcSpearman(xy[i], metadata[k], sig); }
352 else if (method == "pearson") { coef = linear.calcPearson(xy[i], metadata[k], sig); }
353 else if (method == "kendall") { coef = linear.calcKendall(xy[i], metadata[k], sig); }
354 else { m->mothurOut("[ERROR]: invalid method, choices are spearman, pearson or kendall."); m->mothurOutEndLine(); m->control_pressed = true; }
356 out << m->binLabelsInFile[i] << '\t' << metadataLabels[k] << '\t' << coef << '\t' << sig << endl;
367 catch(exception& e) {
368 m->errorOut(e, "OTUAssociationCommand", "process");
372 //**********************************************************************************************************************
373 int OTUAssociationCommand::processRelabund(){
375 InputData* input = new InputData(relabundfile, "relabund");
376 vector<SharedRAbundFloatVector*> lookup = input->getSharedRAbundFloatVectors();
377 string lastLabel = lookup[0]->getLabel();
379 if (metadatafile != "") {
382 if (metadata[0].size() != lookup.size()) { m->mothurOut("[ERROR]: You have selected to use " + toString(metadata[0].size()) + " data rows from the metadata file, but " + toString(lookup.size()) + " from the relabund file.\n"); m->control_pressed = true; error=true;}
384 //maybe add extra info here?? compare groups in each file??
390 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
391 set<string> processedLabels;
392 set<string> userLabels = labels;
394 //as long as you are not at the end of the file or done wih the lines you want
395 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
397 if (m->control_pressed) { delete input; return 0; }
399 if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
400 processedLabels.insert(lookup[0]->getLabel());
401 userLabels.erase(lookup[0]->getLabel());
403 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
407 if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
408 string saveLabel = lookup[0]->getLabel();
410 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
411 lookup = input->getSharedRAbundFloatVectors(lastLabel);
413 processedLabels.insert(lookup[0]->getLabel());
414 userLabels.erase(lookup[0]->getLabel());
416 //restore real lastlabel to save below
417 lookup[0]->setLabel(saveLabel);
419 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
423 lastLabel = lookup[0]->getLabel();
425 //get next line to process
426 //prevent memory leak
427 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
428 lookup = input->getSharedRAbundFloatVectors();
432 if (m->control_pressed) { delete input; return 0; }
434 //output error messages about any remaining user labels
435 set<string>::iterator it;
436 bool needToRun = false;
437 for (it = userLabels.begin(); it != userLabels.end(); it++) {
438 m->mothurOut("Your file does not include the label " + *it);
439 if (processedLabels.count(lastLabel) != 1) {
440 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
443 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
447 //run last label if you need to
448 if (needToRun == true) {
449 for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
450 lookup = input->getSharedRAbundFloatVectors(lastLabel);
452 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
460 catch(exception& e) {
461 m->errorOut(e, "OTUAssociationCommand", "processRelabund");
465 //**********************************************************************************************************************
466 int OTUAssociationCommand::process(vector<SharedRAbundFloatVector*>& lookup){
469 string outputFileName = outputDir + m->getRootName(m->getSimpleName(inputFileName)) + lookup[0]->getLabel() + "." + method + ".otu.corr";
470 outputNames.push_back(outputFileName); outputTypes["shared"].push_back(outputFileName);
473 m->openOutputFile(outputFileName, out);
474 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
477 if (metadatafile == "") { out << "OTUA\tOTUB\t" << method << "Coef\tSignificance\n"; }
478 else { out << "OTUA\tMetadata\t" << method << "Coef\tSignificance\n"; }
480 vector< vector<double> > xy; xy.resize(lookup[0]->getNumBins());
481 for (int i = 0; i < lookup[0]->getNumBins(); i++) { for (int j = 0; j < lookup.size(); j++) { xy[i].push_back(lookup[j]->getAbundance(i)); } }
483 LinearAlgebra linear;
484 if (metadatafile == "") {//compare otus
485 for (int i = 0; i < xy.size(); i++) {
487 for (int k = 0; k < i; k++) {
489 if (m->control_pressed) { out.close(); return 0; }
493 if (method == "spearman") { coef = linear.calcSpearman(xy[i], xy[k], sig); }
494 else if (method == "pearson") { coef = linear.calcPearson(xy[i], xy[k], sig); }
495 else if (method == "kendall") { coef = linear.calcKendall(xy[i], xy[k], sig); }
496 else { m->mothurOut("[ERROR]: invalid method, choices are spearman, pearson or kendall."); m->mothurOutEndLine(); m->control_pressed = true; }
498 out << m->binLabelsInFile[i] << '\t' << m->binLabelsInFile[k] << '\t' << coef << '\t' << sig << endl;
501 }else { //compare otus to metadata
502 for (int i = 0; i < xy.size(); i++) {
504 for (int k = 0; k < metadata.size(); k++) {
506 if (m->control_pressed) { out.close(); return 0; }
510 if (method == "spearman") { coef = linear.calcSpearman(xy[i], metadata[k], sig); }
511 else if (method == "pearson") { coef = linear.calcPearson(xy[i], metadata[k], sig); }
512 else if (method == "kendall") { coef = linear.calcKendall(xy[i], metadata[k], sig); }
513 else { m->mothurOut("[ERROR]: invalid method, choices are spearman, pearson or kendall."); m->mothurOutEndLine(); m->control_pressed = true; }
515 out << m->binLabelsInFile[i] << '\t' << metadataLabels[k] << '\t' << coef << '\t' << sig << endl;
526 catch(exception& e) {
527 m->errorOut(e, "OTUAssociationCommand", "process");
531 /*****************************************************************/
532 int OTUAssociationCommand::readMetadata(){
535 m->openInputFile(metadatafile, in);
537 string headerLine = m->getline(in); m->gobble(in);
538 istringstream iss (headerLine,istringstream::in);
540 //read the first label, because it refers to the groups
542 iss >> columnLabel; m->gobble(iss);
544 //save names of columns you are reading
546 iss >> columnLabel; m->gobble(iss);
547 metadataLabels.push_back(columnLabel);
549 int count = metadataLabels.size();
554 if (m->control_pressed) { in.close(); return 0; }
557 in >> group; m->gobble(in);
559 SharedRAbundFloatVector* tempLookup = new SharedRAbundFloatVector();
560 tempLookup->setGroup(group);
561 tempLookup->setLabel("1");
563 for (int i = 0; i < count; i++) {
566 tempLookup->push_back(temp, group);
569 metadataLookup.push_back(tempLookup);
577 catch(exception& e) {
578 m->errorOut(e, "OTUAssociationCommand", "readMetadata");
582 /*****************************************************************/
583 //eliminate groups user did not pick, remove zeroed out otus, fill metadata vector.
584 int OTUAssociationCommand::getMetadata(){
587 vector<string> mGroups = m->getGroups();
590 for (int i = 0; i < metadataLookup.size(); i++) {
591 //if this sharedrabund is not from a group the user wants then delete it.
592 if (!(m->inUsersGroups(metadataLookup[i]->getGroup(), mGroups))) {
593 delete metadataLookup[i]; metadataLookup[i] = NULL;
594 metadataLookup.erase(metadataLookup.begin()+i);
600 vector<SharedRAbundFloatVector*> newLookup;
601 for (int i = 0; i < metadataLookup.size(); i++) {
602 SharedRAbundFloatVector* temp = new SharedRAbundFloatVector();
603 temp->setLabel(metadataLookup[i]->getLabel());
604 temp->setGroup(metadataLookup[i]->getGroup());
605 newLookup.push_back(temp);
609 vector<string> newBinLabels;
610 for (int i = 0; i < metadataLookup[0]->getNumBins(); i++) {
611 if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; }
613 //look at each sharedRabund and make sure they are not all zero
615 for (int j = 0; j < metadataLookup.size(); j++) {
616 if (metadataLookup[j]->getAbundance(i) != 0) { allZero = false; break; }
619 //if they are not all zero add this bin
621 for (int j = 0; j < metadataLookup.size(); j++) {
622 newLookup[j]->push_back(metadataLookup[j]->getAbundance(i), metadataLookup[j]->getGroup());
624 newBinLabels.push_back(metadataLabels[i]);
628 metadataLabels = newBinLabels;
630 for (int j = 0; j < metadataLookup.size(); j++) { delete metadataLookup[j]; }
631 metadataLookup.clear();
633 metadata.resize(newLookup[0]->getNumBins());
634 for (int i = 0; i < newLookup[0]->getNumBins(); i++) { for (int j = 0; j < newLookup.size(); j++) { metadata[i].push_back(newLookup[j]->getAbundance(i)); } }
636 for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; }
640 catch(exception& e) {
641 m->errorOut(e, "OTUAssociationCommand", "getMetadata");
645 /*****************************************************************/