5 * Created by Sarah Westcott on 3/30/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "venncommand.h"
14 //#include "jackknife.h"
15 #include "sharedsobscollectsummary.h"
16 #include "sharedchao1.h"
17 #include "sharedace.h"
20 //**********************************************************************************************************************
21 vector<string> VennCommand::setParameters(){
23 CommandParameter plist("list", "InputTypes", "", "", "LRSS", "LRSS", "none",false,false); parameters.push_back(plist);
24 CommandParameter pshared("shared", "InputTypes", "", "", "LRSS", "LRSS", "none",false,false); parameters.push_back(pshared);
25 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
26 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
27 CommandParameter pcalc("calc", "String", "", "", "", "", "",false,false); parameters.push_back(pcalc);
28 CommandParameter pabund("abund", "Number", "", "10", "", "", "",false,false); parameters.push_back(pabund);
29 CommandParameter pnseqs("nseqs", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pnseqs);
30 CommandParameter ppermute("permute", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(ppermute);
31 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
32 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
34 vector<string> myArray;
35 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
39 m->errorOut(e, "VennCommand", "setParameters");
43 //**********************************************************************************************************************
44 string VennCommand::getHelpString(){
46 string helpString = "";
47 helpString += "The venn command parameters are list, shared, groups, calc, abund, nseqs, permute and label. shared, relabund, list, rabund or sabund is required unless you have a valid current file.\n";
48 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like included in your venn diagram, you may only use a maximum of 4 groups.\n";
49 helpString += "The group names are separated by dashes. The label allows you to select what distance levels you would like a venn diagram created for, and are also separated by dashes.\n";
50 helpString += "The venn command should be in the following format: venn(groups=yourGroups, calc=yourCalcs, label=yourLabels, abund=yourAbund).\n";
51 helpString += "Example venn(groups=A-B-C, calc=sharedsobs-sharedchao, abund=20).\n";
52 helpString += "The default value for groups is all the groups in your groupfile up to 4, and all labels in your inputfile will be used.\n";
53 helpString += "The default value for calc is sobs if you have only read a list file or if you have selected only one group, and sharedsobs if you have multiple groups.\n";
54 helpString += "The default available estimators for calc are sobs, chao and ace if you have only read a list file, and sharedsobs, sharedchao and sharedace if you have read a shared file.\n";
55 helpString += "The nseqs parameter will output the number of sequences represented by the otus in the picture, default=F.\n";
56 helpString += "If you have more than 4 groups, the permute parameter will find all possible combos of 4 of your groups and create pictures for them, default=F.\n";
57 helpString += "The only estimators available four 4 groups are sharedsobs and sharedchao.\n";
58 helpString += "The venn command outputs a .svg file for each calculator you specify at each distance you choose.\n";
59 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
63 m->errorOut(e, "VennCommand", "getHelpString");
68 //**********************************************************************************************************************
69 VennCommand::VennCommand(){
71 abort = true; calledHelp = true;
73 vector<string> tempOutNames;
74 outputTypes["svg"] = tempOutNames;
77 m->errorOut(e, "VennCommand", "VennCommand");
81 //**********************************************************************************************************************
83 VennCommand::VennCommand(string option) {
85 abort = false; calledHelp = false;
88 //allow user to run help
89 if(option == "help") { help(); abort = true; calledHelp = true; }
90 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
93 vector<string> myArray = setParameters();
95 OptionParser parser(option);
96 map<string,string> parameters = parser.getParameters();
97 map<string,string>::iterator it;
99 ValidParameters validParameter;
101 //check to make sure all parameters are valid for command
102 for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
103 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
106 //if the user changes the input directory command factory will send this info to us in the output parameter
107 string inputDir = validParameter.validFile(parameters, "inputdir", false);
108 if (inputDir == "not found"){ inputDir = ""; }
111 it = parameters.find("shared");
112 //user has given a template file
113 if(it != parameters.end()){
114 path = m->hasPath(it->second);
115 //if the user has not given a path then, add inputdir. else leave path alone.
116 if (path == "") { parameters["shared"] = inputDir + it->second; }
119 it = parameters.find("list");
120 //user has given a template file
121 if(it != parameters.end()){
122 path = m->hasPath(it->second);
123 //if the user has not given a path then, add inputdir. else leave path alone.
124 if (path == "") { parameters["list"] = inputDir + it->second; }
128 //check for required parameters
129 listfile = validParameter.validFile(parameters, "list", true);
130 if (listfile == "not open") { listfile = ""; abort = true; }
131 else if (listfile == "not found") { listfile = ""; }
132 else { format = "list"; inputfile = listfile; m->setListFile(listfile); }
134 sharedfile = validParameter.validFile(parameters, "shared", true);
135 if (sharedfile == "not open") { sharedfile = ""; abort = true; }
136 else if (sharedfile == "not found") { sharedfile = ""; }
137 else { format = "sharedfile"; inputfile = sharedfile; m->setSharedFile(sharedfile); }
139 if ((sharedfile == "") && (listfile == "")) {
140 //is there are current file available for any of these?
141 //give priority to shared, then list, then rabund, then sabund
142 //if there is a current shared file, use it
143 sharedfile = m->getSharedFile();
144 if (sharedfile != "") { inputfile = sharedfile; format = "sharedfile"; m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
146 listfile = m->getListFile();
147 if (listfile != "") { inputfile = listfile; format = "list"; m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); }
149 m->mothurOut("No valid current files. You must provide a list or shared file."); m->mothurOutEndLine();
155 //if the user changes the output directory command factory will send this info to us in the output parameter
156 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(inputfile); }
158 //check for optional parameter and set defaults
159 // ...at some point should added some additional type checking...
160 label = validParameter.validFile(parameters, "label", false);
161 if (label == "not found") { label = ""; }
163 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
164 else { allLines = 1; }
167 groups = validParameter.validFile(parameters, "groups", false);
168 if (groups == "not found") { groups = ""; }
170 m->splitAtDash(groups, Groups);
171 m->setGroups(Groups);
174 calc = validParameter.validFile(parameters, "calc", false);
175 if (calc == "not found") {
176 if(format == "list") { calc = "sobs"; }
177 else { calc = "sharedsobs"; }
180 if (calc == "default") {
181 if(format == "list") { calc = "sobs"; }
182 else { calc = "sharedsobs"; }
185 m->splitAtDash(calc, Estimators);
186 if (m->inUsersGroups("citation", Estimators)) {
187 ValidCalculators validCalc; validCalc.printCitations(Estimators);
188 //remove citation from list of calcs
189 for (int i = 0; i < Estimators.size(); i++) { if (Estimators[i] == "citation") { Estimators.erase(Estimators.begin()+i); break; } }
193 temp = validParameter.validFile(parameters, "abund", false); if (temp == "not found") { temp = "10"; }
194 convert(temp, abund);
196 temp = validParameter.validFile(parameters, "nseqs", false); if (temp == "not found"){ temp = "f"; }
197 nseqs = m->isTrue(temp);
199 temp = validParameter.validFile(parameters, "permute", false); if (temp == "not found"){ temp = "f"; }
200 perm = m->isTrue(temp);
205 catch(exception& e) {
206 m->errorOut(e, "VennCommand", "VennCommand");
210 //**********************************************************************************************************************
212 int VennCommand::execute(){
215 if (abort == true) { if (calledHelp) { return 0; } return 2; }
217 ValidCalculators validCalculator;
219 if (format == "list") {
220 for (int i=0; i<Estimators.size(); i++) {
221 if (validCalculator.isValidCalculator("vennsingle", Estimators[i]) == true) {
222 if (Estimators[i] == "sobs") {
223 vennCalculators.push_back(new Sobs());
224 }else if (Estimators[i] == "chao") {
225 vennCalculators.push_back(new Chao1());
226 }else if (Estimators[i] == "ace") {
229 vennCalculators.push_back(new Ace(abund));
234 for (int i=0; i<Estimators.size(); i++) {
235 if (validCalculator.isValidCalculator("vennshared", Estimators[i]) == true) {
236 if (Estimators[i] == "sharedsobs") {
237 vennCalculators.push_back(new SharedSobsCS());
238 }else if (Estimators[i] == "sharedchao") {
239 vennCalculators.push_back(new SharedChao1());
240 }else if (Estimators[i] == "sharedace") {
241 vennCalculators.push_back(new SharedAce());
247 //if the users entered no valid calculators don't execute command
248 if (vennCalculators.size() == 0) { m->mothurOut("No valid calculators given, please correct."); m->mothurOutEndLine(); return 0; }
250 venn = new Venn(outputDir, nseqs, inputfile);
251 input = new InputData(inputfile, format);
255 if (format == "sharedfile") {
256 lookup = input->getSharedRAbundVectors();
257 lastLabel = lookup[0]->getLabel();
259 if ((lookup.size() > 4) && (perm)) { combosOfFour = findCombinations(lookup.size()); }
260 }else if (format == "list") {
261 sabund = input->getSAbundVector();
262 lastLabel = sabund->getLabel();
265 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
266 set<string> processedLabels;
267 set<string> userLabels = labels;
269 if (format != "list") {
271 //as long as you are not at the end of the file or done wih the lines you want
272 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
274 if (m->control_pressed) {
275 for (int i = 0; i < vennCalculators.size(); i++) { delete vennCalculators[i]; }
276 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
277 m->clearGroups(); delete venn; delete input;
278 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }
282 if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
283 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
284 processedLabels.insert(lookup[0]->getLabel());
285 userLabels.erase(lookup[0]->getLabel());
287 if ((lookup.size() > 4) && (!perm)){
288 m->mothurOut("Error: Too many groups chosen. You may use up to 4 groups with the venn command. I will use the first four groups in your groupfile. If you set perm=t, I will find all possible combos of 4 groups."); m->mothurOutEndLine();
289 for (int i = lookup.size(); i > 4; i--) { lookup.pop_back(); } //no memmory leak because pop_back calls destructor
291 vector<string> outfilenames = venn->getPic(lookup, vennCalculators);
292 for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); outputTypes["svg"].push_back(outfilenames[i]); } }
294 }else if ((lookup.size() > 4) && (perm)) {
295 set< set<int> >::iterator it3;
296 set<int>::iterator it2;
297 for (it3 = combosOfFour.begin(); it3 != combosOfFour.end(); it3++) {
299 set<int> poss = *it3;
300 vector<SharedRAbundVector*> subset;
301 for (it2 = poss.begin(); it2 != poss.end(); it2++) { subset.push_back(lookup[*it2]); }
303 vector<string> outfilenames = venn->getPic(subset, vennCalculators);
304 for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); outputTypes["svg"].push_back(outfilenames[i]); } }
307 vector<string> outfilenames = venn->getPic(lookup, vennCalculators);
308 for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); outputTypes["svg"].push_back(outfilenames[i]); } }
312 if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
313 string saveLabel = lookup[0]->getLabel();
315 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
316 lookup = input->getSharedRAbundVectors(lastLabel);
318 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
319 processedLabels.insert(lookup[0]->getLabel());
320 userLabels.erase(lookup[0]->getLabel());
322 if ((lookup.size() > 4) && (!perm)){
323 m->mothurOut("Error: Too many groups chosen. You may use up to 4 groups with the venn command. I will use the first four groups in your groupfile. If you set perm=t, I will find all possible combos of 4 groups."); m->mothurOutEndLine();
324 for (int i = lookup.size(); i > 4; i--) { lookup.pop_back(); } //no memmory leak because pop_back calls destructor
326 vector<string> outfilenames = venn->getPic(lookup, vennCalculators);
327 for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); outputTypes["svg"].push_back(outfilenames[i]); } }
329 }else if ((lookup.size() > 4) && (perm)) {
330 set< set<int> >::iterator it3;
331 set<int>::iterator it2;
332 for (it3 = combosOfFour.begin(); it3 != combosOfFour.end(); it3++) {
334 set<int> poss = *it3;
335 vector<SharedRAbundVector*> subset;
336 for (it2 = poss.begin(); it2 != poss.end(); it2++) { subset.push_back(lookup[*it2]); }
338 vector<string> outfilenames = venn->getPic(subset, vennCalculators);
339 for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); outputTypes["svg"].push_back(outfilenames[i]); } }
342 vector<string> outfilenames = venn->getPic(lookup, vennCalculators);
343 for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); outputTypes["svg"].push_back(outfilenames[i]); } }
346 //restore real lastlabel to save below
347 lookup[0]->setLabel(saveLabel);
351 lastLabel = lookup[0]->getLabel();
353 //get next line to process
354 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
355 lookup = input->getSharedRAbundVectors();
358 if (m->control_pressed) {
359 for (int i = 0; i < vennCalculators.size(); i++) { delete vennCalculators[i]; }
360 m->clearGroups(); delete venn; delete input;
361 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }
366 //output error messages about any remaining user labels
367 set<string>::iterator it;
368 bool needToRun = false;
369 for (it = userLabels.begin(); it != userLabels.end(); it++) {
370 m->mothurOut("Your file does not include the label " + *it);
371 if (processedLabels.count(lastLabel) != 1) {
372 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
375 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
379 //run last label if you need to
380 if (needToRun == true) {
381 for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
382 lookup = input->getSharedRAbundVectors(lastLabel);
384 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
385 processedLabels.insert(lookup[0]->getLabel());
386 userLabels.erase(lookup[0]->getLabel());
388 if ((lookup.size() > 4) && (!perm)){
389 m->mothurOut("Error: Too many groups chosen. You may use up to 4 groups with the venn command. I will use the first four groups in your groupfile. If you set perm=t, I will find all possible combos of 4 groups."); m->mothurOutEndLine();
390 for (int i = lookup.size(); i > 4; i--) { lookup.pop_back(); } //no memmory leak because pop_back calls destructor
392 vector<string> outfilenames = venn->getPic(lookup, vennCalculators);
393 for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); outputTypes["svg"].push_back(outfilenames[i]); } }
395 }else if ((lookup.size() > 4) && (perm)) {
396 set< set<int> >::iterator it3;
397 set<int>::iterator it2;
398 for (it3 = combosOfFour.begin(); it3 != combosOfFour.end(); it3++) {
400 set<int> poss = *it3;
401 vector<SharedRAbundVector*> subset;
402 for (it2 = poss.begin(); it2 != poss.end(); it2++) { subset.push_back(lookup[*it2]); }
404 vector<string> outfilenames = venn->getPic(subset, vennCalculators);
405 for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); outputTypes["svg"].push_back(outfilenames[i]); } }
408 vector<string> outfilenames = venn->getPic(lookup, vennCalculators);
409 for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); outputTypes["svg"].push_back(outfilenames[i]); } }
412 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
416 //reset groups parameter
419 if (m->control_pressed) {
420 m->clearGroups(); delete venn; delete input;
421 for (int i = 0; i < vennCalculators.size(); i++) { delete vennCalculators[i]; }
422 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }
429 while((sabund != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
431 if (m->control_pressed) {
432 for (int i = 0; i < vennCalculators.size(); i++) { delete vennCalculators[i]; }
433 delete sabund; delete venn; delete input;
434 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }
438 if(allLines == 1 || labels.count(sabund->getLabel()) == 1){
440 m->mothurOut(sabund->getLabel()); m->mothurOutEndLine();
441 vector<string> outfilenames = venn->getPic(sabund, vennCalculators);
442 for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); outputTypes["svg"].push_back(outfilenames[i]); } }
445 processedLabels.insert(sabund->getLabel());
446 userLabels.erase(sabund->getLabel());
449 if ((m->anyLabelsToProcess(sabund->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
450 string saveLabel = sabund->getLabel();
453 sabund = input->getSAbundVector(lastLabel);
455 m->mothurOut(sabund->getLabel()); m->mothurOutEndLine();
456 vector<string> outfilenames = venn->getPic(sabund, vennCalculators);
457 for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); outputTypes["svg"].push_back(outfilenames[i]); } }
460 processedLabels.insert(sabund->getLabel());
461 userLabels.erase(sabund->getLabel());
463 //restore real lastlabel to save below
464 sabund->setLabel(saveLabel);
467 lastLabel = sabund->getLabel();
470 sabund = input->getSAbundVector();
473 if (m->control_pressed) {
474 for (int i = 0; i < vennCalculators.size(); i++) { delete vennCalculators[i]; }
475 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }
476 delete venn; delete input;
480 //output error messages about any remaining user labels
481 set<string>::iterator it;
482 bool needToRun = false;
483 for (it = userLabels.begin(); it != userLabels.end(); it++) {
484 m->mothurOut("Your file does not include the label " + *it);
485 if (processedLabels.count(lastLabel) != 1) {
486 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
489 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
493 //run last label if you need to
494 if (needToRun == true) {
495 if (sabund != NULL) { delete sabund; }
496 sabund = input->getSAbundVector(lastLabel);
498 m->mothurOut(sabund->getLabel()); m->mothurOutEndLine();
499 vector<string> outfilenames = venn->getPic(sabund, vennCalculators);
500 for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); outputTypes["svg"].push_back(outfilenames[i]); } }
506 if (m->control_pressed) {
507 delete venn; delete input;
508 for (int i = 0; i < vennCalculators.size(); i++) { delete vennCalculators[i]; }
509 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }
514 for (int i = 0; i < vennCalculators.size(); i++) { delete vennCalculators[i]; }
515 delete venn; delete input;
517 m->mothurOutEndLine();
518 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
519 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
520 m->mothurOutEndLine();
525 catch(exception& e) {
526 m->errorOut(e, "VennCommand", "execute");
530 //**********************************************************************************************************************
531 //returns a vector of sets containing the 4 group combinations
532 set< set<int> > VennCommand::findCombinations(int lookupSize){
534 set< set<int> > combos;
537 for (int i = 0; i < lookupSize; i++) { possibles.insert(i); }
539 getCombos(possibles, combos);
544 catch(exception& e) {
545 m->errorOut(e, "VennCommand", "findCombinations");
549 //**********************************************************************************************************************
550 //recusively finds combos of 4
551 int VennCommand::getCombos(set<int> possibles, set< set<int> >& combos){
554 if (possibles.size() == 4) { //done
555 if (combos.count(possibles) == 0) { //no dups
556 combos.insert(possibles);
558 }else { //we still have work to do
559 set<int>::iterator it;
560 set<int>::iterator it2;
561 for (it = possibles.begin(); it != possibles.end(); it++) {
563 set<int> newPossibles;
564 for (it2 = possibles.begin(); it2 != possibles.end(); it2++) { //all possible combos of one length smaller
566 newPossibles.insert(*it2);
569 getCombos(newPossibles, combos);
575 catch(exception& e) {
576 m->errorOut(e, "VennCommand", "getCombos");
581 //**********************************************************************************************************************