5 * Created by Sarah Westcott on 3/30/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "venncommand.h"
14 //#include "jackknife.h"
15 #include "sharedsobscollectsummary.h"
16 #include "sharedchao1.h"
17 #include "sharedace.h"
20 //**********************************************************************************************************************
21 vector<string> VennCommand::setParameters(){
23 CommandParameter plist("list", "InputTypes", "", "", "LRSS", "LRSS", "none","svg",false,false,true); parameters.push_back(plist);
24 CommandParameter pshared("shared", "InputTypes", "", "", "LRSS", "LRSS", "none","svg",false,false,true); parameters.push_back(pshared);
25 CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
26 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
27 CommandParameter pcalc("calc", "String", "", "", "", "", "","",false,false); parameters.push_back(pcalc);
28 CommandParameter pabund("abund", "Number", "", "10", "", "", "","",false,false); parameters.push_back(pabund);
29 CommandParameter pnseqs("nseqs", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pnseqs);
30 CommandParameter pfontsize("fontsize", "Number", "", "24", "", "", "","",false,false); parameters.push_back(pfontsize);
31 CommandParameter ppermute("permute", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(ppermute);
32 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
33 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
35 vector<string> myArray;
36 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
40 m->errorOut(e, "VennCommand", "setParameters");
44 //**********************************************************************************************************************
45 string VennCommand::getHelpString(){
47 string helpString = "";
48 helpString += "The venn command parameters are list, shared, groups, calc, abund, nseqs, permute, fontsize and label. shared, relabund, list, rabund or sabund is required unless you have a valid current file.\n";
49 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like included in your venn diagram, you may only use a maximum of 4 groups.\n";
50 helpString += "The group names are separated by dashes. The label allows you to select what distance levels you would like a venn diagram created for, and are also separated by dashes.\n";
51 helpString += "The fontsize parameter allows you to adjust the font size of the picture created, default=24.\n";
52 helpString += "The venn command should be in the following format: venn(groups=yourGroups, calc=yourCalcs, label=yourLabels, abund=yourAbund).\n";
53 helpString += "Example venn(groups=A-B-C, calc=sharedsobs-sharedchao, abund=20).\n";
54 helpString += "The default value for groups is all the groups in your groupfile up to 4, and all labels in your inputfile will be used.\n";
55 helpString += "The default value for calc is sobs if you have only read a list file or if you have selected only one group, and sharedsobs if you have multiple groups.\n";
56 helpString += "The default available estimators for calc are sobs, chao and ace if you have only read a list file, and sharedsobs, sharedchao and sharedace if you have read a shared file.\n";
57 helpString += "The nseqs parameter will output the number of sequences represented by the otus in the picture, default=F.\n";
58 helpString += "If you have more than 4 groups, the permute parameter will find all possible combos of 4 of your groups and create pictures for them, default=F.\n";
59 helpString += "The only estimators available four 4 groups are sharedsobs and sharedchao.\n";
60 helpString += "The venn command outputs a .svg file for each calculator you specify at each distance you choose.\n";
61 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
65 m->errorOut(e, "VennCommand", "getHelpString");
69 //**********************************************************************************************************************
70 string VennCommand::getOutputPattern(string type) {
74 if (type == "svg") { pattern = "[filename],svg"; }
75 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
80 m->errorOut(e, "VennCommand", "getOutputPattern");
85 //**********************************************************************************************************************
86 VennCommand::VennCommand(){
88 abort = true; calledHelp = true;
90 vector<string> tempOutNames;
91 outputTypes["svg"] = tempOutNames;
94 m->errorOut(e, "VennCommand", "VennCommand");
98 //**********************************************************************************************************************
100 VennCommand::VennCommand(string option) {
102 abort = false; calledHelp = false;
105 //allow user to run help
106 if(option == "help") { help(); abort = true; calledHelp = true; }
107 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
110 vector<string> myArray = setParameters();
112 OptionParser parser(option);
113 map<string,string> parameters = parser.getParameters();
114 map<string,string>::iterator it;
116 ValidParameters validParameter;
118 //check to make sure all parameters are valid for command
119 for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
120 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
123 //if the user changes the input directory command factory will send this info to us in the output parameter
124 string inputDir = validParameter.validFile(parameters, "inputdir", false);
125 if (inputDir == "not found"){ inputDir = ""; }
128 it = parameters.find("shared");
129 //user has given a template file
130 if(it != parameters.end()){
131 path = m->hasPath(it->second);
132 //if the user has not given a path then, add inputdir. else leave path alone.
133 if (path == "") { parameters["shared"] = inputDir + it->second; }
136 it = parameters.find("list");
137 //user has given a template file
138 if(it != parameters.end()){
139 path = m->hasPath(it->second);
140 //if the user has not given a path then, add inputdir. else leave path alone.
141 if (path == "") { parameters["list"] = inputDir + it->second; }
145 //check for required parameters
146 listfile = validParameter.validFile(parameters, "list", true);
147 if (listfile == "not open") { listfile = ""; abort = true; }
148 else if (listfile == "not found") { listfile = ""; }
149 else { format = "list"; inputfile = listfile; m->setListFile(listfile); }
151 sharedfile = validParameter.validFile(parameters, "shared", true);
152 if (sharedfile == "not open") { sharedfile = ""; abort = true; }
153 else if (sharedfile == "not found") { sharedfile = ""; }
154 else { format = "sharedfile"; inputfile = sharedfile; m->setSharedFile(sharedfile); }
156 if ((sharedfile == "") && (listfile == "")) {
157 //is there are current file available for any of these?
158 //give priority to shared, then list, then rabund, then sabund
159 //if there is a current shared file, use it
160 sharedfile = m->getSharedFile();
161 if (sharedfile != "") { inputfile = sharedfile; format = "sharedfile"; m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
163 listfile = m->getListFile();
164 if (listfile != "") { inputfile = listfile; format = "list"; m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); }
166 m->mothurOut("No valid current files. You must provide a list or shared file."); m->mothurOutEndLine();
172 //if the user changes the output directory command factory will send this info to us in the output parameter
173 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(inputfile); }
175 //check for optional parameter and set defaults
176 // ...at some point should added some additional type checking...
177 label = validParameter.validFile(parameters, "label", false);
178 if (label == "not found") { label = ""; }
180 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
181 else { allLines = 1; }
184 groups = validParameter.validFile(parameters, "groups", false);
185 if (groups == "not found") { groups = ""; }
187 m->splitAtDash(groups, Groups);
188 m->setGroups(Groups);
191 calc = validParameter.validFile(parameters, "calc", false);
192 if (calc == "not found") {
193 if(format == "list") { calc = "sobs"; }
194 else { calc = "sharedsobs"; }
197 if (calc == "default") {
198 if(format == "list") { calc = "sobs"; }
199 else { calc = "sharedsobs"; }
202 m->splitAtDash(calc, Estimators);
203 if (m->inUsersGroups("citation", Estimators)) {
204 ValidCalculators validCalc; validCalc.printCitations(Estimators);
205 //remove citation from list of calcs
206 for (int i = 0; i < Estimators.size(); i++) { if (Estimators[i] == "citation") { Estimators.erase(Estimators.begin()+i); break; } }
210 temp = validParameter.validFile(parameters, "abund", false); if (temp == "not found") { temp = "10"; }
211 m->mothurConvert(temp, abund);
213 temp = validParameter.validFile(parameters, "nseqs", false); if (temp == "not found"){ temp = "f"; }
214 nseqs = m->isTrue(temp);
216 temp = validParameter.validFile(parameters, "permute", false); if (temp == "not found"){ temp = "f"; }
217 perm = m->isTrue(temp);
219 temp = validParameter.validFile(parameters, "fontsize", false); if (temp == "not found") { temp = "24"; }
220 m->mothurConvert(temp, fontsize);
225 catch(exception& e) {
226 m->errorOut(e, "VennCommand", "VennCommand");
230 //**********************************************************************************************************************
232 int VennCommand::execute(){
235 if (abort == true) { if (calledHelp) { return 0; } return 2; }
237 ValidCalculators validCalculator;
239 if (format == "list") {
240 for (int i=0; i<Estimators.size(); i++) {
241 if (validCalculator.isValidCalculator("vennsingle", Estimators[i]) == true) {
242 if (Estimators[i] == "sobs") {
243 vennCalculators.push_back(new Sobs());
244 }else if (Estimators[i] == "chao") {
245 vennCalculators.push_back(new Chao1());
246 }else if (Estimators[i] == "ace") {
247 if(abund < 5) { abund = 10; }
248 vennCalculators.push_back(new Ace(abund));
253 for (int i=0; i<Estimators.size(); i++) {
254 if (validCalculator.isValidCalculator("vennshared", Estimators[i]) == true) {
255 if (Estimators[i] == "sharedsobs") {
256 vennCalculators.push_back(new SharedSobsCS());
257 }else if (Estimators[i] == "sharedchao") {
258 vennCalculators.push_back(new SharedChao1());
259 }else if (Estimators[i] == "sharedace") {
260 vennCalculators.push_back(new SharedAce());
266 //if the users entered no valid calculators don't execute command
267 if (vennCalculators.size() == 0) { m->mothurOut("No valid calculators given, please correct."); m->mothurOutEndLine(); return 0; }
269 venn = new Venn(outputDir, nseqs, inputfile, fontsize);
270 input = new InputData(inputfile, format);
274 if (format == "sharedfile") {
275 lookup = input->getSharedRAbundVectors();
276 lastLabel = lookup[0]->getLabel();
278 if ((lookup.size() > 4) && (perm)) { combosOfFour = findCombinations(lookup.size()); }
279 }else if (format == "list") {
280 sabund = input->getSAbundVector();
281 lastLabel = sabund->getLabel();
284 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
285 set<string> processedLabels;
286 set<string> userLabels = labels;
288 if (format != "list") {
290 //as long as you are not at the end of the file or done wih the lines you want
291 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
293 if (m->control_pressed) {
294 for (int i = 0; i < vennCalculators.size(); i++) { delete vennCalculators[i]; }
295 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
296 m->clearGroups(); delete venn; delete input;
297 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }
301 if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
302 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
303 processedLabels.insert(lookup[0]->getLabel());
304 userLabels.erase(lookup[0]->getLabel());
306 if ((lookup.size() > 4) && (!perm)){
307 m->mothurOut("Error: Too many groups chosen. You may use up to 4 groups with the venn command. I will use the first four groups in your groupfile. If you set perm=t, I will find all possible combos of 4 groups."); m->mothurOutEndLine();
308 for (int i = lookup.size(); i > 4; i--) { lookup.pop_back(); } //no memmory leak because pop_back calls destructor
310 vector<string> outfilenames = venn->getPic(lookup, vennCalculators);
311 for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); outputTypes["svg"].push_back(outfilenames[i]); } }
313 }else if ((lookup.size() > 4) && (perm)) {
314 set< set<int> >::iterator it3;
315 set<int>::iterator it2;
316 for (it3 = combosOfFour.begin(); it3 != combosOfFour.end(); it3++) {
318 set<int> poss = *it3;
319 vector<SharedRAbundVector*> subset;
320 for (it2 = poss.begin(); it2 != poss.end(); it2++) { subset.push_back(lookup[*it2]); }
322 vector<string> outfilenames = venn->getPic(subset, vennCalculators);
323 for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); outputTypes["svg"].push_back(outfilenames[i]); } }
326 vector<string> outfilenames = venn->getPic(lookup, vennCalculators);
327 for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); outputTypes["svg"].push_back(outfilenames[i]); } }
331 if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
332 string saveLabel = lookup[0]->getLabel();
334 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
335 lookup = input->getSharedRAbundVectors(lastLabel);
337 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
338 processedLabels.insert(lookup[0]->getLabel());
339 userLabels.erase(lookup[0]->getLabel());
341 if ((lookup.size() > 4) && (!perm)){
342 m->mothurOut("Error: Too many groups chosen. You may use up to 4 groups with the venn command. I will use the first four groups in your groupfile. If you set perm=t, I will find all possible combos of 4 groups."); m->mothurOutEndLine();
343 for (int i = lookup.size(); i > 4; i--) { lookup.pop_back(); } //no memmory leak because pop_back calls destructor
345 vector<string> outfilenames = venn->getPic(lookup, vennCalculators);
346 for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); outputTypes["svg"].push_back(outfilenames[i]); } }
348 }else if ((lookup.size() > 4) && (perm)) {
349 set< set<int> >::iterator it3;
350 set<int>::iterator it2;
351 for (it3 = combosOfFour.begin(); it3 != combosOfFour.end(); it3++) {
353 set<int> poss = *it3;
354 vector<SharedRAbundVector*> subset;
355 for (it2 = poss.begin(); it2 != poss.end(); it2++) { subset.push_back(lookup[*it2]); }
357 vector<string> outfilenames = venn->getPic(subset, vennCalculators);
358 for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); outputTypes["svg"].push_back(outfilenames[i]); } }
361 vector<string> outfilenames = venn->getPic(lookup, vennCalculators);
362 for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); outputTypes["svg"].push_back(outfilenames[i]); } }
365 //restore real lastlabel to save below
366 lookup[0]->setLabel(saveLabel);
370 lastLabel = lookup[0]->getLabel();
372 //get next line to process
373 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
374 lookup = input->getSharedRAbundVectors();
377 if (m->control_pressed) {
378 for (int i = 0; i < vennCalculators.size(); i++) { delete vennCalculators[i]; }
379 m->clearGroups(); delete venn; delete input;
380 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }
385 //output error messages about any remaining user labels
386 set<string>::iterator it;
387 bool needToRun = false;
388 for (it = userLabels.begin(); it != userLabels.end(); it++) {
389 m->mothurOut("Your file does not include the label " + *it);
390 if (processedLabels.count(lastLabel) != 1) {
391 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
394 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
398 //run last label if you need to
399 if (needToRun == true) {
400 for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
401 lookup = input->getSharedRAbundVectors(lastLabel);
403 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
404 processedLabels.insert(lookup[0]->getLabel());
405 userLabels.erase(lookup[0]->getLabel());
407 if ((lookup.size() > 4) && (!perm)){
408 m->mothurOut("Error: Too many groups chosen. You may use up to 4 groups with the venn command. I will use the first four groups in your groupfile. If you set perm=t, I will find all possible combos of 4 groups."); m->mothurOutEndLine();
409 for (int i = lookup.size(); i > 4; i--) { lookup.pop_back(); } //no memmory leak because pop_back calls destructor
411 vector<string> outfilenames = venn->getPic(lookup, vennCalculators);
412 for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); outputTypes["svg"].push_back(outfilenames[i]); } }
414 }else if ((lookup.size() > 4) && (perm)) {
415 set< set<int> >::iterator it3;
416 set<int>::iterator it2;
417 for (it3 = combosOfFour.begin(); it3 != combosOfFour.end(); it3++) {
419 set<int> poss = *it3;
420 vector<SharedRAbundVector*> subset;
421 for (it2 = poss.begin(); it2 != poss.end(); it2++) { subset.push_back(lookup[*it2]); }
423 vector<string> outfilenames = venn->getPic(subset, vennCalculators);
424 for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); outputTypes["svg"].push_back(outfilenames[i]); } }
427 vector<string> outfilenames = venn->getPic(lookup, vennCalculators);
428 for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); outputTypes["svg"].push_back(outfilenames[i]); } }
431 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
435 //reset groups parameter
438 if (m->control_pressed) {
439 m->clearGroups(); delete venn; delete input;
440 for (int i = 0; i < vennCalculators.size(); i++) { delete vennCalculators[i]; }
441 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }
448 while((sabund != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
450 if (m->control_pressed) {
451 for (int i = 0; i < vennCalculators.size(); i++) { delete vennCalculators[i]; }
452 delete sabund; delete venn; delete input;
453 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }
457 if(allLines == 1 || labels.count(sabund->getLabel()) == 1){
459 m->mothurOut(sabund->getLabel()); m->mothurOutEndLine();
460 vector<string> outfilenames = venn->getPic(sabund, vennCalculators);
461 for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); outputTypes["svg"].push_back(outfilenames[i]); } }
464 processedLabels.insert(sabund->getLabel());
465 userLabels.erase(sabund->getLabel());
468 if ((m->anyLabelsToProcess(sabund->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
469 string saveLabel = sabund->getLabel();
472 sabund = input->getSAbundVector(lastLabel);
474 m->mothurOut(sabund->getLabel()); m->mothurOutEndLine();
475 vector<string> outfilenames = venn->getPic(sabund, vennCalculators);
476 for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); outputTypes["svg"].push_back(outfilenames[i]); } }
479 processedLabels.insert(sabund->getLabel());
480 userLabels.erase(sabund->getLabel());
482 //restore real lastlabel to save below
483 sabund->setLabel(saveLabel);
486 lastLabel = sabund->getLabel();
489 sabund = input->getSAbundVector();
492 if (m->control_pressed) {
493 for (int i = 0; i < vennCalculators.size(); i++) { delete vennCalculators[i]; }
494 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }
495 delete venn; delete input;
499 //output error messages about any remaining user labels
500 set<string>::iterator it;
501 bool needToRun = false;
502 for (it = userLabels.begin(); it != userLabels.end(); it++) {
503 m->mothurOut("Your file does not include the label " + *it);
504 if (processedLabels.count(lastLabel) != 1) {
505 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
508 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
512 //run last label if you need to
513 if (needToRun == true) {
514 if (sabund != NULL) { delete sabund; }
515 sabund = input->getSAbundVector(lastLabel);
517 m->mothurOut(sabund->getLabel()); m->mothurOutEndLine();
518 vector<string> outfilenames = venn->getPic(sabund, vennCalculators);
519 for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); outputTypes["svg"].push_back(outfilenames[i]); } }
525 if (m->control_pressed) {
526 delete venn; delete input;
527 for (int i = 0; i < vennCalculators.size(); i++) { delete vennCalculators[i]; }
528 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }
533 for (int i = 0; i < vennCalculators.size(); i++) { delete vennCalculators[i]; }
534 delete venn; delete input;
536 m->mothurOutEndLine();
537 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
538 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
539 m->mothurOutEndLine();
544 catch(exception& e) {
545 m->errorOut(e, "VennCommand", "execute");
549 //**********************************************************************************************************************
550 //returns a vector of sets containing the 4 group combinations
551 set< set<int> > VennCommand::findCombinations(int lookupSize){
553 set< set<int> > combos;
556 for (int i = 0; i < lookupSize; i++) { possibles.insert(i); }
558 getCombos(possibles, combos);
563 catch(exception& e) {
564 m->errorOut(e, "VennCommand", "findCombinations");
568 //**********************************************************************************************************************
569 //recusively finds combos of 4
570 int VennCommand::getCombos(set<int> possibles, set< set<int> >& combos){
573 if (possibles.size() == 4) { //done
574 if (combos.count(possibles) == 0) { //no dups
575 combos.insert(possibles);
577 }else { //we still have work to do
578 set<int>::iterator it;
579 set<int>::iterator it2;
580 for (it = possibles.begin(); it != possibles.end(); it++) {
582 set<int> newPossibles;
583 for (it2 = possibles.begin(); it2 != possibles.end(); it2++) { //all possible combos of one length smaller
585 newPossibles.insert(*it2);
588 getCombos(newPossibles, combos);
594 catch(exception& e) {
595 m->errorOut(e, "VennCommand", "getCombos");
600 //**********************************************************************************************************************