5 * Created by Sarah Westcott on 3/30/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "venncommand.h"
14 //#include "jackknife.h"
15 #include "sharedsobscollectsummary.h"
16 #include "sharedchao1.h"
17 #include "sharedace.h"
20 //**********************************************************************************************************************
21 vector<string> VennCommand::setParameters(){
23 CommandParameter plist("list", "InputTypes", "", "", "LRSS", "LRSS", "none",false,false); parameters.push_back(plist);
24 CommandParameter pshared("shared", "InputTypes", "", "", "LRSS", "LRSS", "none",false,false); parameters.push_back(pshared);
25 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
26 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
27 CommandParameter pcalc("calc", "String", "", "", "", "", "",false,false); parameters.push_back(pcalc);
28 CommandParameter pabund("abund", "Number", "", "10", "", "", "",false,false); parameters.push_back(pabund);
29 CommandParameter pnseqs("nseqs", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pnseqs);
30 CommandParameter ppermute("permute", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(ppermute);
31 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
32 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
34 vector<string> myArray;
35 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
39 m->errorOut(e, "VennCommand", "setParameters");
43 //**********************************************************************************************************************
44 string VennCommand::getHelpString(){
46 string helpString = "";
47 helpString += "The venn command parameters are list, shared, groups, calc, abund, nseqs, permute and label. shared, relabund, list, rabund or sabund is required unless you have a valid current file.\n";
48 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like included in your venn diagram, you may only use a maximum of 4 groups.\n";
49 helpString += "The group names are separated by dashes. The label allows you to select what distance levels you would like a venn diagram created for, and are also separated by dashes.\n";
50 helpString += "The venn command should be in the following format: venn(groups=yourGroups, calc=yourCalcs, label=yourLabels, abund=yourAbund).\n";
51 helpString += "Example venn(groups=A-B-C, calc=sharedsobs-sharedchao, abund=20).\n";
52 helpString += "The default value for groups is all the groups in your groupfile up to 4, and all labels in your inputfile will be used.\n";
53 helpString += "The default value for calc is sobs if you have only read a list file or if you have selected only one group, and sharedsobs if you have multiple groups.\n";
54 helpString += "The default available estimators for calc are sobs, chao and ace if you have only read a list file, and sharedsobs, sharedchao and sharedace if you have read a shared file.\n";
55 helpString += "The nseqs parameter will output the number of sequences represented by the otus in the picture, default=F.\n";
56 helpString += "If you have more than 4 groups, the permute parameter will find all possible combos of 4 of your groups and create pictures for them, default=F.\n";
57 helpString += "The only estimators available four 4 groups are sharedsobs and sharedchao.\n";
58 helpString += "The venn command outputs a .svg file for each calculator you specify at each distance you choose.\n";
59 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
63 m->errorOut(e, "VennCommand", "getHelpString");
68 //**********************************************************************************************************************
69 VennCommand::VennCommand(){
71 abort = true; calledHelp = true;
73 vector<string> tempOutNames;
74 outputTypes["svg"] = tempOutNames;
77 m->errorOut(e, "VennCommand", "VennCommand");
81 //**********************************************************************************************************************
83 VennCommand::VennCommand(string option) {
85 abort = false; calledHelp = false;
88 //allow user to run help
89 if(option == "help") { help(); abort = true; calledHelp = true; }
92 vector<string> myArray = setParameters();
94 OptionParser parser(option);
95 map<string,string> parameters = parser.getParameters();
96 map<string,string>::iterator it;
98 ValidParameters validParameter;
100 //check to make sure all parameters are valid for command
101 for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
102 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
105 //if the user changes the input directory command factory will send this info to us in the output parameter
106 string inputDir = validParameter.validFile(parameters, "inputdir", false);
107 if (inputDir == "not found"){ inputDir = ""; }
110 it = parameters.find("shared");
111 //user has given a template file
112 if(it != parameters.end()){
113 path = m->hasPath(it->second);
114 //if the user has not given a path then, add inputdir. else leave path alone.
115 if (path == "") { parameters["shared"] = inputDir + it->second; }
118 it = parameters.find("list");
119 //user has given a template file
120 if(it != parameters.end()){
121 path = m->hasPath(it->second);
122 //if the user has not given a path then, add inputdir. else leave path alone.
123 if (path == "") { parameters["list"] = inputDir + it->second; }
127 //check for required parameters
128 listfile = validParameter.validFile(parameters, "list", true);
129 if (listfile == "not open") { listfile = ""; abort = true; }
130 else if (listfile == "not found") { listfile = ""; }
131 else { format = "list"; inputfile = listfile; }
133 sharedfile = validParameter.validFile(parameters, "shared", true);
134 if (sharedfile == "not open") { sharedfile = ""; abort = true; }
135 else if (sharedfile == "not found") { sharedfile = ""; }
136 else { format = "sharedfile"; inputfile = sharedfile; }
138 if ((sharedfile == "") && (listfile == "")) {
139 //is there are current file available for any of these?
140 //give priority to shared, then list, then rabund, then sabund
141 //if there is a current shared file, use it
142 sharedfile = m->getSharedFile();
143 if (sharedfile != "") { inputfile = sharedfile; format = "sharedfile"; m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
145 listfile = m->getListFile();
146 if (listfile != "") { inputfile = listfile; format = "list"; m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); }
148 m->mothurOut("No valid current files. You must provide a list or shared file."); m->mothurOutEndLine();
154 //if the user changes the output directory command factory will send this info to us in the output parameter
155 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(inputfile); }
157 //check for optional parameter and set defaults
158 // ...at some point should added some additional type checking...
159 label = validParameter.validFile(parameters, "label", false);
160 if (label == "not found") { label = ""; }
162 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
163 else { allLines = 1; }
166 groups = validParameter.validFile(parameters, "groups", false);
167 if (groups == "not found") { groups = ""; }
169 m->splitAtDash(groups, Groups);
173 calc = validParameter.validFile(parameters, "calc", false);
174 if (calc == "not found") {
175 if(format == "list") { calc = "sobs"; }
176 else { calc = "sharedsobs"; }
179 if (calc == "default") {
180 if(format == "list") { calc = "sobs"; }
181 else { calc = "sharedsobs"; }
184 m->splitAtDash(calc, Estimators);
187 temp = validParameter.validFile(parameters, "abund", false); if (temp == "not found") { temp = "10"; }
188 convert(temp, abund);
190 temp = validParameter.validFile(parameters, "nseqs", false); if (temp == "not found"){ temp = "f"; }
191 nseqs = m->isTrue(temp);
193 temp = validParameter.validFile(parameters, "permute", false); if (temp == "not found"){ temp = "f"; }
194 perm = m->isTrue(temp);
199 catch(exception& e) {
200 m->errorOut(e, "VennCommand", "VennCommand");
204 //**********************************************************************************************************************
206 int VennCommand::execute(){
209 if (abort == true) { if (calledHelp) { return 0; } return 2; }
211 ValidCalculators validCalculator;
213 if (format == "list") {
214 for (int i=0; i<Estimators.size(); i++) {
215 if (validCalculator.isValidCalculator("vennsingle", Estimators[i]) == true) {
216 if (Estimators[i] == "sobs") {
217 vennCalculators.push_back(new Sobs());
218 }else if (Estimators[i] == "chao") {
219 vennCalculators.push_back(new Chao1());
220 }else if (Estimators[i] == "ace") {
223 vennCalculators.push_back(new Ace(abund));
228 for (int i=0; i<Estimators.size(); i++) {
229 if (validCalculator.isValidCalculator("vennshared", Estimators[i]) == true) {
230 if (Estimators[i] == "sharedsobs") {
231 vennCalculators.push_back(new SharedSobsCS());
232 }else if (Estimators[i] == "sharedchao") {
233 vennCalculators.push_back(new SharedChao1());
234 }else if (Estimators[i] == "sharedace") {
235 vennCalculators.push_back(new SharedAce());
241 //if the users entered no valid calculators don't execute command
242 if (vennCalculators.size() == 0) { m->mothurOut("No valid calculators given, please correct."); m->mothurOutEndLine(); return 0; }
244 venn = new Venn(outputDir, nseqs, inputfile);
245 input = new InputData(inputfile, format);
249 if (format == "sharedfile") {
250 lookup = input->getSharedRAbundVectors();
251 lastLabel = lookup[0]->getLabel();
253 if ((lookup.size() > 4) && (perm)) { combosOfFour = findCombinations(lookup.size()); }
254 }else if (format == "list") {
255 sabund = input->getSAbundVector();
256 lastLabel = sabund->getLabel();
259 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
260 set<string> processedLabels;
261 set<string> userLabels = labels;
263 if (format != "list") {
265 //as long as you are not at the end of the file or done wih the lines you want
266 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
268 if (m->control_pressed) {
269 for (int i = 0; i < vennCalculators.size(); i++) { delete vennCalculators[i]; }
270 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
271 m->Groups.clear(); delete venn; delete input;
272 for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); }
276 if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
277 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
278 processedLabels.insert(lookup[0]->getLabel());
279 userLabels.erase(lookup[0]->getLabel());
281 if ((lookup.size() > 4) && (!perm)){
282 m->mothurOut("Error: Too many groups chosen. You may use up to 4 groups with the venn command. I will use the first four groups in your groupfile. If you set perm=t, I will find all possible combos of 4 groups."); m->mothurOutEndLine();
283 for (int i = lookup.size(); i > 4; i--) { lookup.pop_back(); } //no memmory leak because pop_back calls destructor
285 vector<string> outfilenames = venn->getPic(lookup, vennCalculators);
286 for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); outputTypes["svg"].push_back(outfilenames[i]); } }
288 }else if ((lookup.size() > 4) && (perm)) {
289 set< set<int> >::iterator it3;
290 set<int>::iterator it2;
291 for (it3 = combosOfFour.begin(); it3 != combosOfFour.end(); it3++) {
293 set<int> poss = *it3;
294 vector<SharedRAbundVector*> subset;
295 for (it2 = poss.begin(); it2 != poss.end(); it2++) { subset.push_back(lookup[*it2]); }
297 vector<string> outfilenames = venn->getPic(subset, vennCalculators);
298 for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); outputTypes["svg"].push_back(outfilenames[i]); } }
301 vector<string> outfilenames = venn->getPic(lookup, vennCalculators);
302 for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); outputTypes["svg"].push_back(outfilenames[i]); } }
306 if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
307 string saveLabel = lookup[0]->getLabel();
309 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
310 lookup = input->getSharedRAbundVectors(lastLabel);
312 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
313 processedLabels.insert(lookup[0]->getLabel());
314 userLabels.erase(lookup[0]->getLabel());
316 if ((lookup.size() > 4) && (!perm)){
317 m->mothurOut("Error: Too many groups chosen. You may use up to 4 groups with the venn command. I will use the first four groups in your groupfile. If you set perm=t, I will find all possible combos of 4 groups."); m->mothurOutEndLine();
318 for (int i = lookup.size(); i > 4; i--) { lookup.pop_back(); } //no memmory leak because pop_back calls destructor
320 vector<string> outfilenames = venn->getPic(lookup, vennCalculators);
321 for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); outputTypes["svg"].push_back(outfilenames[i]); } }
323 }else if ((lookup.size() > 4) && (perm)) {
324 set< set<int> >::iterator it3;
325 set<int>::iterator it2;
326 for (it3 = combosOfFour.begin(); it3 != combosOfFour.end(); it3++) {
328 set<int> poss = *it3;
329 vector<SharedRAbundVector*> subset;
330 for (it2 = poss.begin(); it2 != poss.end(); it2++) { subset.push_back(lookup[*it2]); }
332 vector<string> outfilenames = venn->getPic(subset, vennCalculators);
333 for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); outputTypes["svg"].push_back(outfilenames[i]); } }
336 vector<string> outfilenames = venn->getPic(lookup, vennCalculators);
337 for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); outputTypes["svg"].push_back(outfilenames[i]); } }
340 //restore real lastlabel to save below
341 lookup[0]->setLabel(saveLabel);
345 lastLabel = lookup[0]->getLabel();
347 //get next line to process
348 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
349 lookup = input->getSharedRAbundVectors();
352 if (m->control_pressed) {
353 for (int i = 0; i < vennCalculators.size(); i++) { delete vennCalculators[i]; }
354 m->Groups.clear(); delete venn; delete input;
355 for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); }
360 //output error messages about any remaining user labels
361 set<string>::iterator it;
362 bool needToRun = false;
363 for (it = userLabels.begin(); it != userLabels.end(); it++) {
364 m->mothurOut("Your file does not include the label " + *it);
365 if (processedLabels.count(lastLabel) != 1) {
366 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
369 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
373 //run last label if you need to
374 if (needToRun == true) {
375 for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
376 lookup = input->getSharedRAbundVectors(lastLabel);
378 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
379 processedLabels.insert(lookup[0]->getLabel());
380 userLabels.erase(lookup[0]->getLabel());
382 if ((lookup.size() > 4) && (!perm)){
383 m->mothurOut("Error: Too many groups chosen. You may use up to 4 groups with the venn command. I will use the first four groups in your groupfile. If you set perm=t, I will find all possible combos of 4 groups."); m->mothurOutEndLine();
384 for (int i = lookup.size(); i > 4; i--) { lookup.pop_back(); } //no memmory leak because pop_back calls destructor
386 vector<string> outfilenames = venn->getPic(lookup, vennCalculators);
387 for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); outputTypes["svg"].push_back(outfilenames[i]); } }
389 }else if ((lookup.size() > 4) && (perm)) {
390 set< set<int> >::iterator it3;
391 set<int>::iterator it2;
392 for (it3 = combosOfFour.begin(); it3 != combosOfFour.end(); it3++) {
394 set<int> poss = *it3;
395 vector<SharedRAbundVector*> subset;
396 for (it2 = poss.begin(); it2 != poss.end(); it2++) { subset.push_back(lookup[*it2]); }
398 vector<string> outfilenames = venn->getPic(subset, vennCalculators);
399 for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); outputTypes["svg"].push_back(outfilenames[i]); } }
402 vector<string> outfilenames = venn->getPic(lookup, vennCalculators);
403 for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); outputTypes["svg"].push_back(outfilenames[i]); } }
406 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
410 //reset groups parameter
413 if (m->control_pressed) {
414 m->Groups.clear(); delete venn; delete input;
415 for (int i = 0; i < vennCalculators.size(); i++) { delete vennCalculators[i]; }
416 for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); }
423 while((sabund != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
425 if (m->control_pressed) {
426 for (int i = 0; i < vennCalculators.size(); i++) { delete vennCalculators[i]; }
427 delete sabund; delete venn; delete input;
428 for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); }
432 if(allLines == 1 || labels.count(sabund->getLabel()) == 1){
434 m->mothurOut(sabund->getLabel()); m->mothurOutEndLine();
435 vector<string> outfilenames = venn->getPic(sabund, vennCalculators);
436 for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); outputTypes["svg"].push_back(outfilenames[i]); } }
439 processedLabels.insert(sabund->getLabel());
440 userLabels.erase(sabund->getLabel());
443 if ((m->anyLabelsToProcess(sabund->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
444 string saveLabel = sabund->getLabel();
447 sabund = input->getSAbundVector(lastLabel);
449 m->mothurOut(sabund->getLabel()); m->mothurOutEndLine();
450 vector<string> outfilenames = venn->getPic(sabund, vennCalculators);
451 for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); outputTypes["svg"].push_back(outfilenames[i]); } }
454 processedLabels.insert(sabund->getLabel());
455 userLabels.erase(sabund->getLabel());
457 //restore real lastlabel to save below
458 sabund->setLabel(saveLabel);
461 lastLabel = sabund->getLabel();
464 sabund = input->getSAbundVector();
467 if (m->control_pressed) {
468 for (int i = 0; i < vennCalculators.size(); i++) { delete vennCalculators[i]; }
469 for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); }
470 delete venn; delete input;
474 //output error messages about any remaining user labels
475 set<string>::iterator it;
476 bool needToRun = false;
477 for (it = userLabels.begin(); it != userLabels.end(); it++) {
478 m->mothurOut("Your file does not include the label " + *it);
479 if (processedLabels.count(lastLabel) != 1) {
480 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
483 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
487 //run last label if you need to
488 if (needToRun == true) {
489 if (sabund != NULL) { delete sabund; }
490 sabund = input->getSAbundVector(lastLabel);
492 m->mothurOut(sabund->getLabel()); m->mothurOutEndLine();
493 vector<string> outfilenames = venn->getPic(sabund, vennCalculators);
494 for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); outputTypes["svg"].push_back(outfilenames[i]); } }
500 if (m->control_pressed) {
501 delete venn; delete input;
502 for (int i = 0; i < vennCalculators.size(); i++) { delete vennCalculators[i]; }
503 for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); }
508 for (int i = 0; i < vennCalculators.size(); i++) { delete vennCalculators[i]; }
509 delete venn; delete input;
511 m->mothurOutEndLine();
512 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
513 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
514 m->mothurOutEndLine();
519 catch(exception& e) {
520 m->errorOut(e, "VennCommand", "execute");
524 //**********************************************************************************************************************
525 //returns a vector of sets containing the 4 group combinations
526 set< set<int> > VennCommand::findCombinations(int lookupSize){
528 set< set<int> > combos;
531 for (int i = 0; i < lookupSize; i++) { possibles.insert(i); }
533 getCombos(possibles, combos);
538 catch(exception& e) {
539 m->errorOut(e, "VennCommand", "findCombinations");
543 //**********************************************************************************************************************
544 //recusively finds combos of 4
545 int VennCommand::getCombos(set<int> possibles, set< set<int> >& combos){
548 if (possibles.size() == 4) { //done
549 if (combos.count(possibles) == 0) { //no dups
550 combos.insert(possibles);
552 }else { //we still have work to do
553 set<int>::iterator it;
554 set<int>::iterator it2;
555 for (it = possibles.begin(); it != possibles.end(); it++) {
557 set<int> newPossibles;
558 for (it2 = possibles.begin(); it2 != possibles.end(); it2++) { //all possible combos of one length smaller
560 newPossibles.insert(*it2);
563 getCombos(newPossibles, combos);
569 catch(exception& e) {
570 m->errorOut(e, "VennCommand", "getCombos");
575 //**********************************************************************************************************************