X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=venncommand.cpp;h=7601ed8e667b10e2b2cd6eaf48c8f5ce365cc6f0;hb=6d12ed0ba66fb35e9e2781fe3ca361e2293f2476;hp=57228ad4c789124126d71781357f4328bfd143a5;hpb=74844a60d80c6dd06e3fb02ee9b928424f9019b0;p=mothur.git diff --git a/venncommand.cpp b/venncommand.cpp index 57228ad..7601ed8 100644 --- a/venncommand.cpp +++ b/venncommand.cpp @@ -15,8 +15,57 @@ #include "sharedsobscollectsummary.h" #include "sharedchao1.h" #include "sharedace.h" +#include "nseqs.h" +//********************************************************************************************************************** +vector VennCommand::getValidParameters(){ + try { + string Array[] = {"groups","label","calc","permute", "abund","nseqs","outputdir","inputdir"}; + vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + return myArray; + } + catch(exception& e) { + m->errorOut(e, "VennCommand", "getValidParameters"); + exit(1); + } +} +//********************************************************************************************************************** +VennCommand::VennCommand(){ + try { + abort = true; + //initialize outputTypes + vector tempOutNames; + outputTypes["svg"] = tempOutNames; + } + catch(exception& e) { + m->errorOut(e, "VennCommand", "VennCommand"); + exit(1); + } +} +//********************************************************************************************************************** +vector VennCommand::getRequiredParameters(){ + try { + vector myArray; + return myArray; + } + catch(exception& e) { + m->errorOut(e, "VennCommand", "getRequiredParameters"); + exit(1); + } +} +//********************************************************************************************************************** +vector VennCommand::getRequiredFiles(){ + try { + string Array[] = {"list","shared","or"}; + vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + return myArray; + } + catch(exception& e) { + m->errorOut(e, "VennCommand", "getRequiredFiles"); + exit(1); + } +} //********************************************************************************************************************** VennCommand::VennCommand(string option) { @@ -31,7 +80,7 @@ VennCommand::VennCommand(string option) { else { //valid paramters for this command - string AlignArray[] = {"groups","label","calc", "abund","outputdir","inputdir"}; + string AlignArray[] = {"groups","label","calc","permute", "abund","nseqs","outputdir","inputdir"}; vector myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string))); OptionParser parser(option); @@ -52,7 +101,7 @@ VennCommand::VennCommand(string option) { //if the user changes the output directory command factory will send this info to us in the output parameter outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; - outputDir += hasPath(globaldata->inputFileName); //if user entered a file with a path then preserve it + outputDir += m->hasPath(globaldata->inputFileName); //if user entered a file with a path then preserve it } //check for optional parameter and set defaults @@ -60,7 +109,7 @@ VennCommand::VennCommand(string option) { label = validParameter.validFile(parameters, "label", false); if (label == "not found") { label = ""; } else { - if(label != "all") { splitAtDash(label, labels); allLines = 0; } + if(label != "all") { m->splitAtDash(label, labels); allLines = 0; } else { allLines = 1; } } @@ -73,7 +122,7 @@ VennCommand::VennCommand(string option) { groups = validParameter.validFile(parameters, "groups", false); if (groups == "not found") { groups = ""; } else { - splitAtDash(groups, Groups); + m->splitAtDash(groups, Groups); globaldata->Groups = Groups; } @@ -89,11 +138,17 @@ VennCommand::VennCommand(string option) { else { calc = "sharedsobs"; } } } - splitAtDash(calc, Estimators); + m->splitAtDash(calc, Estimators); string temp; temp = validParameter.validFile(parameters, "abund", false); if (temp == "not found") { temp = "10"; } convert(temp, abund); + + temp = validParameter.validFile(parameters, "nseqs", false); if (temp == "not found"){ temp = "f"; } + nseqs = m->isTrue(temp); + + temp = validParameter.validFile(parameters, "permute", false); if (temp == "not found"){ temp = "f"; } + perm = m->isTrue(temp); if (abort == false) { validCalculator = new ValidCalculators(); @@ -128,8 +183,11 @@ VennCommand::VennCommand(string option) { } } - venn = new Venn(outputDir); + //if the users entered no valid calculators don't execute command + if (vennCalculators.size() == 0) { m->mothurOut("No valid calculators given, please correct."); m->mothurOutEndLine(); abort = true; } + else { venn = new Venn(outputDir, nseqs); } } + } @@ -146,7 +204,7 @@ VennCommand::VennCommand(string option) { void VennCommand::help(){ try { m->mothurOut("The venn command can only be executed after a successful read.otu command.\n"); - m->mothurOut("The venn command parameters are groups, calc, abund and label. No parameters are required.\n"); + m->mothurOut("The venn command parameters are groups, calc, abund, nseqs, permute and label. No parameters are required.\n"); m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like included in your venn diagram, you may only use a maximum of 4 groups.\n"); m->mothurOut("The group names are separated by dashes. The label allows you to select what distance levels you would like a venn diagram created for, and are also separated by dashes.\n"); m->mothurOut("The venn command should be in the following format: venn(groups=yourGroups, calc=yourCalcs, label=yourLabels, abund=yourAbund).\n"); @@ -154,7 +212,9 @@ void VennCommand::help(){ m->mothurOut("The default value for groups is all the groups in your groupfile up to 4, and all labels in your inputfile will be used.\n"); m->mothurOut("The default value for calc is sobs if you have only read a list file or if you have selected only one group, and sharedsobs if you have multiple groups.\n"); m->mothurOut("The default available estimators for calc are sobs, chao and ace if you have only read a list file, and sharedsobs, sharedchao and sharedace if you have read a list and group file or a shared file.\n"); - m->mothurOut("The only estmiator available four 4 groups is sharedsobs.\n"); + m->mothurOut("The nseqs parameter will output the number of sequences represented by the otus in the picture, default=F.\n"); + m->mothurOut("If you have more than 4 groups, the permute parameter will find all possible combos of 4 of your groups and create pictures for them, default=F.\n"); + m->mothurOut("The only estimators available four 4 groups are sharedsobs and sharedchao.\n"); m->mothurOut("The venn command outputs a .svg file for each calculator you specify at each distance you choose.\n"); m->mothurOut("Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n\n"); } @@ -173,6 +233,7 @@ VennCommand::~VennCommand(){ delete read; delete venn; globaldata->sabund = NULL; + delete validCalculator; } } @@ -185,10 +246,6 @@ int VennCommand::execute(){ if (abort == true) { return 0; } string lastLabel; - vector outputNames; - - //if the users entered no valid calculators don't execute command - if (vennCalculators.size() == 0) { return 0; } if (format == "sharedfile") { //you have groups @@ -198,6 +255,8 @@ int VennCommand::execute(){ input = globaldata->ginput; lookup = input->getSharedRAbundVectors(); lastLabel = lookup[0]->getLabel(); + + if ((lookup.size() > 4) && (perm)) { combosOfFour = findCombinations(lookup.size()); } }else if (format == "list") { //you are using just a list file and have only one group read = new ReadOTUFile(globaldata->inputFileName); @@ -211,27 +270,51 @@ int VennCommand::execute(){ //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. set processedLabels; set userLabels = labels; - + if (format != "list") { //as long as you are not at the end of the file or done wih the lines you want while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) { + + if (m->control_pressed) { + for (int i = 0; i < vennCalculators.size(); i++) { delete vennCalculators[i]; } + for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } + globaldata->Groups.clear(); + for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } + return 0; + } if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){ m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); processedLabels.insert(lookup[0]->getLabel()); userLabels.erase(lookup[0]->getLabel()); - if (lookup.size() > 4) { - m->mothurOut("Error: Too many groups chosen. You may use up to 4 groups with the venn command. I will use the first four groups in your groupfile."); m->mothurOutEndLine(); + if ((lookup.size() > 4) && (!perm)){ + m->mothurOut("Error: Too many groups chosen. You may use up to 4 groups with the venn command. I will use the first four groups in your groupfile. If you set perm=t, I will find all possible combos of 4 groups."); m->mothurOutEndLine(); for (int i = lookup.size(); i > 4; i--) { lookup.pop_back(); } //no memmory leak because pop_back calls destructor - } - vector outfilenames = venn->getPic(lookup, vennCalculators); - for(int i = 0; i < outfilenames.size(); i++) { outputNames.push_back(outfilenames[i]); } + vector outfilenames = venn->getPic(lookup, vennCalculators); + for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); outputTypes["svg"].push_back(outfilenames[i]); } } + + }else if ((lookup.size() > 4) && (perm)) { + set< set >::iterator it3; + set::iterator it2; + for (it3 = combosOfFour.begin(); it3 != combosOfFour.end(); it3++) { + + set poss = *it3; + vector subset; + for (it2 = poss.begin(); it2 != poss.end(); it2++) { subset.push_back(lookup[*it2]); } + + vector outfilenames = venn->getPic(subset, vennCalculators); + for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); outputTypes["svg"].push_back(outfilenames[i]); } } + } + }else { + vector outfilenames = venn->getPic(lookup, vennCalculators); + for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); outputTypes["svg"].push_back(outfilenames[i]); } } + } } - if ((anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { + if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { string saveLabel = lookup[0]->getLabel(); for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } @@ -241,13 +324,30 @@ int VennCommand::execute(){ processedLabels.insert(lookup[0]->getLabel()); userLabels.erase(lookup[0]->getLabel()); - if (lookup.size() > 4) { - m->mothurOut("Error: Too many groups chosen. You may use up to 4 groups with the venn command. I will use the first four groups in your groupfile."); m->mothurOutEndLine(); + if ((lookup.size() > 4) && (!perm)){ + m->mothurOut("Error: Too many groups chosen. You may use up to 4 groups with the venn command. I will use the first four groups in your groupfile. If you set perm=t, I will find all possible combos of 4 groups."); m->mothurOutEndLine(); for (int i = lookup.size(); i > 4; i--) { lookup.pop_back(); } //no memmory leak because pop_back calls destructor - } - vector outfilenames = venn->getPic(lookup, vennCalculators); - for(int i = 0; i < outfilenames.size(); i++) { outputNames.push_back(outfilenames[i]); } + vector outfilenames = venn->getPic(lookup, vennCalculators); + for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); outputTypes["svg"].push_back(outfilenames[i]); } } + + }else if ((lookup.size() > 4) && (perm)) { + set< set >::iterator it3; + set::iterator it2; + for (it3 = combosOfFour.begin(); it3 != combosOfFour.end(); it3++) { + + set poss = *it3; + vector subset; + for (it2 = poss.begin(); it2 != poss.end(); it2++) { subset.push_back(lookup[*it2]); } + + vector outfilenames = venn->getPic(subset, vennCalculators); + for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); outputTypes["svg"].push_back(outfilenames[i]); } } + } + }else { + vector outfilenames = venn->getPic(lookup, vennCalculators); + for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); outputTypes["svg"].push_back(outfilenames[i]); } } + } + //restore real lastlabel to save below lookup[0]->setLabel(saveLabel); } @@ -260,6 +360,14 @@ int VennCommand::execute(){ lookup = input->getSharedRAbundVectors(); } + if (m->control_pressed) { + for (int i = 0; i < vennCalculators.size(); i++) { delete vennCalculators[i]; } + globaldata->Groups.clear(); + for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } + return 0; + } + + //output error messages about any remaining user labels set::iterator it; bool needToRun = false; @@ -282,13 +390,30 @@ int VennCommand::execute(){ processedLabels.insert(lookup[0]->getLabel()); userLabels.erase(lookup[0]->getLabel()); - if (lookup.size() > 4) { - m->mothurOut("Error: Too many groups chosen. You may use up to 4 groups with the venn command. I will use the first four groups in your groupfile."); m->mothurOutEndLine(); + if ((lookup.size() > 4) && (!perm)){ + m->mothurOut("Error: Too many groups chosen. You may use up to 4 groups with the venn command. I will use the first four groups in your groupfile. If you set perm=t, I will find all possible combos of 4 groups."); m->mothurOutEndLine(); for (int i = lookup.size(); i > 4; i--) { lookup.pop_back(); } //no memmory leak because pop_back calls destructor - } - vector outfilenames = venn->getPic(lookup, vennCalculators); - for(int i = 0; i < outfilenames.size(); i++) { outputNames.push_back(outfilenames[i]); } + + vector outfilenames = venn->getPic(lookup, vennCalculators); + for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); outputTypes["svg"].push_back(outfilenames[i]); } } + }else if ((lookup.size() > 4) && (perm)) { + set< set >::iterator it3; + set::iterator it2; + for (it3 = combosOfFour.begin(); it3 != combosOfFour.end(); it3++) { + + set poss = *it3; + vector subset; + for (it2 = poss.begin(); it2 != poss.end(); it2++) { subset.push_back(lookup[*it2]); } + + vector outfilenames = venn->getPic(subset, vennCalculators); + for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); outputTypes["svg"].push_back(outfilenames[i]); } } + } + }else { + vector outfilenames = venn->getPic(lookup, vennCalculators); + for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); outputTypes["svg"].push_back(outfilenames[i]); } } + } + for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } } @@ -296,22 +421,36 @@ int VennCommand::execute(){ //reset groups parameter globaldata->Groups.clear(); + if (m->control_pressed) { + for (int i = 0; i < vennCalculators.size(); i++) { delete vennCalculators[i]; } + for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } + return 0; + } + + }else{ while((sabund != NULL) && ((allLines == 1) || (userLabels.size() != 0))) { + + if (m->control_pressed) { + for (int i = 0; i < vennCalculators.size(); i++) { delete vennCalculators[i]; } + delete sabund; + for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } + return 0; + } if(allLines == 1 || labels.count(sabund->getLabel()) == 1){ m->mothurOut(sabund->getLabel()); m->mothurOutEndLine(); vector outfilenames = venn->getPic(sabund, vennCalculators); - for(int i = 0; i < outfilenames.size(); i++) { outputNames.push_back(outfilenames[i]); } + for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); outputTypes["svg"].push_back(outfilenames[i]); } } processedLabels.insert(sabund->getLabel()); userLabels.erase(sabund->getLabel()); } - if ((anyLabelsToProcess(sabund->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { + if ((m->anyLabelsToProcess(sabund->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { string saveLabel = sabund->getLabel(); delete sabund; @@ -319,7 +458,7 @@ int VennCommand::execute(){ m->mothurOut(sabund->getLabel()); m->mothurOutEndLine(); vector outfilenames = venn->getPic(sabund, vennCalculators); - for(int i = 0; i < outfilenames.size(); i++) { outputNames.push_back(outfilenames[i]); } + for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); outputTypes["svg"].push_back(outfilenames[i]); } } processedLabels.insert(sabund->getLabel()); @@ -335,6 +474,12 @@ int VennCommand::execute(){ sabund = input->getSAbundVector(); } + if (m->control_pressed) { + for (int i = 0; i < vennCalculators.size(); i++) { delete vennCalculators[i]; } + for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } + return 0; + } + //output error messages about any remaining user labels set::iterator it; bool needToRun = false; @@ -355,12 +500,17 @@ int VennCommand::execute(){ m->mothurOut(sabund->getLabel()); m->mothurOutEndLine(); vector outfilenames = venn->getPic(sabund, vennCalculators); - for(int i = 0; i < outfilenames.size(); i++) { outputNames.push_back(outfilenames[i]); } + for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); outputTypes["svg"].push_back(outfilenames[i]); } } delete sabund; } + if (m->control_pressed) { + for (int i = 0; i < vennCalculators.size(); i++) { delete vennCalculators[i]; } + for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } + return 0; + } } for (int i = 0; i < vennCalculators.size(); i++) { delete vennCalculators[i]; } @@ -378,5 +528,55 @@ int VennCommand::execute(){ exit(1); } } +//********************************************************************************************************************** +//returns a vector of sets containing the 4 group combinations +set< set > VennCommand::findCombinations(int lookupSize){ + try { + set< set > combos; + + set possibles; + for (int i = 0; i < lookupSize; i++) { possibles.insert(i); } + + getCombos(possibles, combos); + + return combos; + + } + catch(exception& e) { + m->errorOut(e, "VennCommand", "findCombinations"); + exit(1); + } +} +//********************************************************************************************************************** +//recusively finds combos of 4 +int VennCommand::getCombos(set possibles, set< set >& combos){ + try { + + if (possibles.size() == 4) { //done + if (combos.count(possibles) == 0) { //no dups + combos.insert(possibles); + } + }else { //we still have work to do + set::iterator it; + set::iterator it2; + for (it = possibles.begin(); it != possibles.end(); it++) { + + set newPossibles; + for (it2 = possibles.begin(); it2 != possibles.end(); it2++) { //all possible combos of one length smaller + if (*it != *it2) { + newPossibles.insert(*it2); + } + } + getCombos(newPossibles, combos); + } + } + + return 0; + } + catch(exception& e) { + m->errorOut(e, "VennCommand", "getCombos"); + exit(1); + } +} //**********************************************************************************************************************