5 * Created by Sarah Westcott on 3/30/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "venncommand.h"
14 //#include "jackknife.h"
15 #include "sharedsobscollectsummary.h"
16 #include "sharedchao1.h"
17 #include "sharedace.h"
20 //**********************************************************************************************************************
21 vector<string> VennCommand::setParameters(){
23 CommandParameter plist("list", "InputTypes", "", "", "LRSS", "LRSS", "none",false,false); parameters.push_back(plist);
24 CommandParameter pshared("shared", "InputTypes", "", "", "LRSS", "LRSS", "none",false,false); parameters.push_back(pshared);
25 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
26 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
27 CommandParameter pcalc("calc", "String", "", "", "", "", "",false,false); parameters.push_back(pcalc);
28 CommandParameter pabund("abund", "Number", "", "10", "", "", "",false,false); parameters.push_back(pabund);
29 CommandParameter pnseqs("nseqs", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pnseqs);
30 CommandParameter ppermute("permute", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(ppermute);
31 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
32 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
34 vector<string> myArray;
35 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
39 m->errorOut(e, "VennCommand", "setParameters");
43 //**********************************************************************************************************************
44 string VennCommand::getHelpString(){
46 string helpString = "";
47 helpString += "The venn command parameters are list, shared, groups, calc, abund, nseqs, permute and label. shared, relabund, list, rabund or sabund is required unless you have a valid current file.\n";
48 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like included in your venn diagram, you may only use a maximum of 4 groups.\n";
49 helpString += "The group names are separated by dashes. The label allows you to select what distance levels you would like a venn diagram created for, and are also separated by dashes.\n";
50 helpString += "The venn command should be in the following format: venn(groups=yourGroups, calc=yourCalcs, label=yourLabels, abund=yourAbund).\n";
51 helpString += "Example venn(groups=A-B-C, calc=sharedsobs-sharedchao, abund=20).\n";
52 helpString += "The default value for groups is all the groups in your groupfile up to 4, and all labels in your inputfile will be used.\n";
53 helpString += "The default value for calc is sobs if you have only read a list file or if you have selected only one group, and sharedsobs if you have multiple groups.\n";
54 helpString += "The default available estimators for calc are sobs, chao and ace if you have only read a list file, and sharedsobs, sharedchao and sharedace if you have read a shared file.\n";
55 helpString += "The nseqs parameter will output the number of sequences represented by the otus in the picture, default=F.\n";
56 helpString += "If you have more than 4 groups, the permute parameter will find all possible combos of 4 of your groups and create pictures for them, default=F.\n";
57 helpString += "The only estimators available four 4 groups are sharedsobs and sharedchao.\n";
58 helpString += "The venn command outputs a .svg file for each calculator you specify at each distance you choose.\n";
59 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
63 m->errorOut(e, "VennCommand", "getHelpString");
68 //**********************************************************************************************************************
69 VennCommand::VennCommand(){
71 abort = true; calledHelp = true;
73 vector<string> tempOutNames;
74 outputTypes["svg"] = tempOutNames;
77 m->errorOut(e, "VennCommand", "VennCommand");
81 //**********************************************************************************************************************
83 VennCommand::VennCommand(string option) {
85 abort = false; calledHelp = false;
88 //allow user to run help
89 if(option == "help") { help(); abort = true; calledHelp = true; }
90 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
93 vector<string> myArray = setParameters();
95 OptionParser parser(option);
96 map<string,string> parameters = parser.getParameters();
97 map<string,string>::iterator it;
99 ValidParameters validParameter;
101 //check to make sure all parameters are valid for command
102 for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
103 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
106 //if the user changes the input directory command factory will send this info to us in the output parameter
107 string inputDir = validParameter.validFile(parameters, "inputdir", false);
108 if (inputDir == "not found"){ inputDir = ""; }
111 it = parameters.find("shared");
112 //user has given a template file
113 if(it != parameters.end()){
114 path = m->hasPath(it->second);
115 //if the user has not given a path then, add inputdir. else leave path alone.
116 if (path == "") { parameters["shared"] = inputDir + it->second; }
119 it = parameters.find("list");
120 //user has given a template file
121 if(it != parameters.end()){
122 path = m->hasPath(it->second);
123 //if the user has not given a path then, add inputdir. else leave path alone.
124 if (path == "") { parameters["list"] = inputDir + it->second; }
128 //check for required parameters
129 listfile = validParameter.validFile(parameters, "list", true);
130 if (listfile == "not open") { listfile = ""; abort = true; }
131 else if (listfile == "not found") { listfile = ""; }
132 else { format = "list"; inputfile = listfile; }
134 sharedfile = validParameter.validFile(parameters, "shared", true);
135 if (sharedfile == "not open") { sharedfile = ""; abort = true; }
136 else if (sharedfile == "not found") { sharedfile = ""; }
137 else { format = "sharedfile"; inputfile = sharedfile; }
139 if ((sharedfile == "") && (listfile == "")) {
140 //is there are current file available for any of these?
141 //give priority to shared, then list, then rabund, then sabund
142 //if there is a current shared file, use it
143 sharedfile = m->getSharedFile();
144 if (sharedfile != "") { inputfile = sharedfile; format = "sharedfile"; m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
146 listfile = m->getListFile();
147 if (listfile != "") { inputfile = listfile; format = "list"; m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); }
149 m->mothurOut("No valid current files. You must provide a list or shared file."); m->mothurOutEndLine();
155 //if the user changes the output directory command factory will send this info to us in the output parameter
156 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(inputfile); }
158 //check for optional parameter and set defaults
159 // ...at some point should added some additional type checking...
160 label = validParameter.validFile(parameters, "label", false);
161 if (label == "not found") { label = ""; }
163 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
164 else { allLines = 1; }
167 groups = validParameter.validFile(parameters, "groups", false);
168 if (groups == "not found") { groups = ""; }
170 m->splitAtDash(groups, Groups);
174 calc = validParameter.validFile(parameters, "calc", false);
175 if (calc == "not found") {
176 if(format == "list") { calc = "sobs"; }
177 else { calc = "sharedsobs"; }
180 if (calc == "default") {
181 if(format == "list") { calc = "sobs"; }
182 else { calc = "sharedsobs"; }
185 m->splitAtDash(calc, Estimators);
188 temp = validParameter.validFile(parameters, "abund", false); if (temp == "not found") { temp = "10"; }
189 convert(temp, abund);
191 temp = validParameter.validFile(parameters, "nseqs", false); if (temp == "not found"){ temp = "f"; }
192 nseqs = m->isTrue(temp);
194 temp = validParameter.validFile(parameters, "permute", false); if (temp == "not found"){ temp = "f"; }
195 perm = m->isTrue(temp);
200 catch(exception& e) {
201 m->errorOut(e, "VennCommand", "VennCommand");
205 //**********************************************************************************************************************
207 int VennCommand::execute(){
210 if (abort == true) { if (calledHelp) { return 0; } return 2; }
212 ValidCalculators validCalculator;
214 if (format == "list") {
215 for (int i=0; i<Estimators.size(); i++) {
216 if (validCalculator.isValidCalculator("vennsingle", Estimators[i]) == true) {
217 if (Estimators[i] == "sobs") {
218 vennCalculators.push_back(new Sobs());
219 }else if (Estimators[i] == "chao") {
220 vennCalculators.push_back(new Chao1());
221 }else if (Estimators[i] == "ace") {
224 vennCalculators.push_back(new Ace(abund));
229 for (int i=0; i<Estimators.size(); i++) {
230 if (validCalculator.isValidCalculator("vennshared", Estimators[i]) == true) {
231 if (Estimators[i] == "sharedsobs") {
232 vennCalculators.push_back(new SharedSobsCS());
233 }else if (Estimators[i] == "sharedchao") {
234 vennCalculators.push_back(new SharedChao1());
235 }else if (Estimators[i] == "sharedace") {
236 vennCalculators.push_back(new SharedAce());
242 //if the users entered no valid calculators don't execute command
243 if (vennCalculators.size() == 0) { m->mothurOut("No valid calculators given, please correct."); m->mothurOutEndLine(); return 0; }
245 venn = new Venn(outputDir, nseqs, inputfile);
246 input = new InputData(inputfile, format);
250 if (format == "sharedfile") {
251 lookup = input->getSharedRAbundVectors();
252 lastLabel = lookup[0]->getLabel();
254 if ((lookup.size() > 4) && (perm)) { combosOfFour = findCombinations(lookup.size()); }
255 }else if (format == "list") {
256 sabund = input->getSAbundVector();
257 lastLabel = sabund->getLabel();
260 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
261 set<string> processedLabels;
262 set<string> userLabels = labels;
264 if (format != "list") {
266 //as long as you are not at the end of the file or done wih the lines you want
267 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
269 if (m->control_pressed) {
270 for (int i = 0; i < vennCalculators.size(); i++) { delete vennCalculators[i]; }
271 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
272 m->Groups.clear(); delete venn; delete input;
273 for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); }
277 if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
278 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
279 processedLabels.insert(lookup[0]->getLabel());
280 userLabels.erase(lookup[0]->getLabel());
282 if ((lookup.size() > 4) && (!perm)){
283 m->mothurOut("Error: Too many groups chosen. You may use up to 4 groups with the venn command. I will use the first four groups in your groupfile. If you set perm=t, I will find all possible combos of 4 groups."); m->mothurOutEndLine();
284 for (int i = lookup.size(); i > 4; i--) { lookup.pop_back(); } //no memmory leak because pop_back calls destructor
286 vector<string> outfilenames = venn->getPic(lookup, vennCalculators);
287 for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); outputTypes["svg"].push_back(outfilenames[i]); } }
289 }else if ((lookup.size() > 4) && (perm)) {
290 set< set<int> >::iterator it3;
291 set<int>::iterator it2;
292 for (it3 = combosOfFour.begin(); it3 != combosOfFour.end(); it3++) {
294 set<int> poss = *it3;
295 vector<SharedRAbundVector*> subset;
296 for (it2 = poss.begin(); it2 != poss.end(); it2++) { subset.push_back(lookup[*it2]); }
298 vector<string> outfilenames = venn->getPic(subset, vennCalculators);
299 for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); outputTypes["svg"].push_back(outfilenames[i]); } }
302 vector<string> outfilenames = venn->getPic(lookup, vennCalculators);
303 for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); outputTypes["svg"].push_back(outfilenames[i]); } }
307 if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
308 string saveLabel = lookup[0]->getLabel();
310 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
311 lookup = input->getSharedRAbundVectors(lastLabel);
313 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
314 processedLabels.insert(lookup[0]->getLabel());
315 userLabels.erase(lookup[0]->getLabel());
317 if ((lookup.size() > 4) && (!perm)){
318 m->mothurOut("Error: Too many groups chosen. You may use up to 4 groups with the venn command. I will use the first four groups in your groupfile. If you set perm=t, I will find all possible combos of 4 groups."); m->mothurOutEndLine();
319 for (int i = lookup.size(); i > 4; i--) { lookup.pop_back(); } //no memmory leak because pop_back calls destructor
321 vector<string> outfilenames = venn->getPic(lookup, vennCalculators);
322 for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); outputTypes["svg"].push_back(outfilenames[i]); } }
324 }else if ((lookup.size() > 4) && (perm)) {
325 set< set<int> >::iterator it3;
326 set<int>::iterator it2;
327 for (it3 = combosOfFour.begin(); it3 != combosOfFour.end(); it3++) {
329 set<int> poss = *it3;
330 vector<SharedRAbundVector*> subset;
331 for (it2 = poss.begin(); it2 != poss.end(); it2++) { subset.push_back(lookup[*it2]); }
333 vector<string> outfilenames = venn->getPic(subset, vennCalculators);
334 for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); outputTypes["svg"].push_back(outfilenames[i]); } }
337 vector<string> outfilenames = venn->getPic(lookup, vennCalculators);
338 for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); outputTypes["svg"].push_back(outfilenames[i]); } }
341 //restore real lastlabel to save below
342 lookup[0]->setLabel(saveLabel);
346 lastLabel = lookup[0]->getLabel();
348 //get next line to process
349 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
350 lookup = input->getSharedRAbundVectors();
353 if (m->control_pressed) {
354 for (int i = 0; i < vennCalculators.size(); i++) { delete vennCalculators[i]; }
355 m->Groups.clear(); delete venn; delete input;
356 for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); }
361 //output error messages about any remaining user labels
362 set<string>::iterator it;
363 bool needToRun = false;
364 for (it = userLabels.begin(); it != userLabels.end(); it++) {
365 m->mothurOut("Your file does not include the label " + *it);
366 if (processedLabels.count(lastLabel) != 1) {
367 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
370 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
374 //run last label if you need to
375 if (needToRun == true) {
376 for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
377 lookup = input->getSharedRAbundVectors(lastLabel);
379 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
380 processedLabels.insert(lookup[0]->getLabel());
381 userLabels.erase(lookup[0]->getLabel());
383 if ((lookup.size() > 4) && (!perm)){
384 m->mothurOut("Error: Too many groups chosen. You may use up to 4 groups with the venn command. I will use the first four groups in your groupfile. If you set perm=t, I will find all possible combos of 4 groups."); m->mothurOutEndLine();
385 for (int i = lookup.size(); i > 4; i--) { lookup.pop_back(); } //no memmory leak because pop_back calls destructor
387 vector<string> outfilenames = venn->getPic(lookup, vennCalculators);
388 for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); outputTypes["svg"].push_back(outfilenames[i]); } }
390 }else if ((lookup.size() > 4) && (perm)) {
391 set< set<int> >::iterator it3;
392 set<int>::iterator it2;
393 for (it3 = combosOfFour.begin(); it3 != combosOfFour.end(); it3++) {
395 set<int> poss = *it3;
396 vector<SharedRAbundVector*> subset;
397 for (it2 = poss.begin(); it2 != poss.end(); it2++) { subset.push_back(lookup[*it2]); }
399 vector<string> outfilenames = venn->getPic(subset, vennCalculators);
400 for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); outputTypes["svg"].push_back(outfilenames[i]); } }
403 vector<string> outfilenames = venn->getPic(lookup, vennCalculators);
404 for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); outputTypes["svg"].push_back(outfilenames[i]); } }
407 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
411 //reset groups parameter
414 if (m->control_pressed) {
415 m->Groups.clear(); delete venn; delete input;
416 for (int i = 0; i < vennCalculators.size(); i++) { delete vennCalculators[i]; }
417 for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); }
424 while((sabund != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
426 if (m->control_pressed) {
427 for (int i = 0; i < vennCalculators.size(); i++) { delete vennCalculators[i]; }
428 delete sabund; delete venn; delete input;
429 for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); }
433 if(allLines == 1 || labels.count(sabund->getLabel()) == 1){
435 m->mothurOut(sabund->getLabel()); m->mothurOutEndLine();
436 vector<string> outfilenames = venn->getPic(sabund, vennCalculators);
437 for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); outputTypes["svg"].push_back(outfilenames[i]); } }
440 processedLabels.insert(sabund->getLabel());
441 userLabels.erase(sabund->getLabel());
444 if ((m->anyLabelsToProcess(sabund->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
445 string saveLabel = sabund->getLabel();
448 sabund = input->getSAbundVector(lastLabel);
450 m->mothurOut(sabund->getLabel()); m->mothurOutEndLine();
451 vector<string> outfilenames = venn->getPic(sabund, vennCalculators);
452 for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); outputTypes["svg"].push_back(outfilenames[i]); } }
455 processedLabels.insert(sabund->getLabel());
456 userLabels.erase(sabund->getLabel());
458 //restore real lastlabel to save below
459 sabund->setLabel(saveLabel);
462 lastLabel = sabund->getLabel();
465 sabund = input->getSAbundVector();
468 if (m->control_pressed) {
469 for (int i = 0; i < vennCalculators.size(); i++) { delete vennCalculators[i]; }
470 for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); }
471 delete venn; delete input;
475 //output error messages about any remaining user labels
476 set<string>::iterator it;
477 bool needToRun = false;
478 for (it = userLabels.begin(); it != userLabels.end(); it++) {
479 m->mothurOut("Your file does not include the label " + *it);
480 if (processedLabels.count(lastLabel) != 1) {
481 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
484 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
488 //run last label if you need to
489 if (needToRun == true) {
490 if (sabund != NULL) { delete sabund; }
491 sabund = input->getSAbundVector(lastLabel);
493 m->mothurOut(sabund->getLabel()); m->mothurOutEndLine();
494 vector<string> outfilenames = venn->getPic(sabund, vennCalculators);
495 for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); outputTypes["svg"].push_back(outfilenames[i]); } }
501 if (m->control_pressed) {
502 delete venn; delete input;
503 for (int i = 0; i < vennCalculators.size(); i++) { delete vennCalculators[i]; }
504 for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); }
509 for (int i = 0; i < vennCalculators.size(); i++) { delete vennCalculators[i]; }
510 delete venn; delete input;
512 m->mothurOutEndLine();
513 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
514 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
515 m->mothurOutEndLine();
520 catch(exception& e) {
521 m->errorOut(e, "VennCommand", "execute");
525 //**********************************************************************************************************************
526 //returns a vector of sets containing the 4 group combinations
527 set< set<int> > VennCommand::findCombinations(int lookupSize){
529 set< set<int> > combos;
532 for (int i = 0; i < lookupSize; i++) { possibles.insert(i); }
534 getCombos(possibles, combos);
539 catch(exception& e) {
540 m->errorOut(e, "VennCommand", "findCombinations");
544 //**********************************************************************************************************************
545 //recusively finds combos of 4
546 int VennCommand::getCombos(set<int> possibles, set< set<int> >& combos){
549 if (possibles.size() == 4) { //done
550 if (combos.count(possibles) == 0) { //no dups
551 combos.insert(possibles);
553 }else { //we still have work to do
554 set<int>::iterator it;
555 set<int>::iterator it2;
556 for (it = possibles.begin(); it != possibles.end(); it++) {
558 set<int> newPossibles;
559 for (it2 = possibles.begin(); it2 != possibles.end(); it2++) { //all possible combos of one length smaller
561 newPossibles.insert(*it2);
564 getCombos(newPossibles, combos);
570 catch(exception& e) {
571 m->errorOut(e, "VennCommand", "getCombos");
576 //**********************************************************************************************************************