From: westcott Date: Thu, 30 Sep 2010 10:51:56 +0000 (+0000) Subject: added parse.fastaq command, added permute option to venn command, fixed bug with... X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=commitdiff_plain;h=2c5386dc0cb6a0e42e8d7256bd6fb18ab66e4480 added parse.fastaq command, added permute option to venn command, fixed bug with output redirect in chimera commands --- diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj index 9c147b8..e2673fc 100644 --- a/Mothur.xcodeproj/project.pbxproj +++ b/Mothur.xcodeproj/project.pbxproj @@ -37,6 +37,8 @@ A71D924511AEB42400D00CBC /* splitabundcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = splitabundcommand.h; sourceTree = ""; }; A71D924611AEB42400D00CBC /* splitmatrix.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = splitmatrix.cpp; sourceTree = ""; }; A71D924711AEB42400D00CBC /* splitmatrix.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = splitmatrix.h; sourceTree = ""; }; + A724C2B61254961E006BB1C7 /* parsefastaqcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = parsefastaqcommand.h; sourceTree = ""; }; + A724C2B71254961E006BB1C7 /* parsefastaqcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = parsefastaqcommand.cpp; sourceTree = ""; }; A72B3A62118B37FD004B9F8D /* phylodiversitycommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = phylodiversitycommand.h; sourceTree = ""; }; A72B3A63118B37FD004B9F8D /* phylodiversitycommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = phylodiversitycommand.cpp; sourceTree = ""; }; A72B3A7B118B4D1B004B9F8D /* phylodiversity.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = phylodiversity.h; sourceTree = ""; }; @@ -820,10 +822,12 @@ A798626E1240D91B005FC847 /* normalizesharedcommand.cpp */, A7DA20B8113FECD400BF472F /* otuhierarchycommand.cpp */, A7DA20B9113FECD400BF472F /* otuhierarchycommand.h */, + A724C2B61254961E006BB1C7 /* parsefastaqcommand.h */, + A724C2B71254961E006BB1C7 /* parsefastaqcommand.cpp */, A7DA20BC113FECD400BF472F /* parselistscommand.cpp */, A7DA20BD113FECD400BF472F /* parselistscommand.h */, - A7E8338B115BBDAA00739EC4 /* parsesffcommand.cpp */, A7E8338C115BBDAA00739EC4 /* parsesffcommand.h */, + A7E8338B115BBDAA00739EC4 /* parsesffcommand.cpp */, A7DA20C0113FECD400BF472F /* parsimonycommand.cpp */, A7DA20C1113FECD400BF472F /* parsimonycommand.h */, A7DA20C3113FECD400BF472F /* pcacommand.h */, diff --git a/chimerabellerophoncommand.cpp b/chimerabellerophoncommand.cpp index a504d0b..5972c62 100644 --- a/chimerabellerophoncommand.cpp +++ b/chimerabellerophoncommand.cpp @@ -82,10 +82,7 @@ ChimeraBellerophonCommand::ChimeraBellerophonCommand(string option) { } //if the user changes the output directory command factory will send this info to us in the output parameter - outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ - outputDir = ""; - outputDir += m->hasPath(fastafile); //if user entered a file with a path then preserve it - } + outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; } string temp; temp = validParameter.validFile(parameters, "filter", false); if (temp == "not found") { temp = "F"; } @@ -152,7 +149,8 @@ int ChimeraBellerophonCommand::execute(){ int start = time(NULL); chimera = new Bellerophon(fastaFileNames[i], filter, correction, window, increment, processors, outputDir); - + + if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[i]); }//if user entered a file with a path then preserve it string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[i])) + "bellerophon.chimeras"; string accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[i])) + "bellerophon.accnos"; diff --git a/chimeraccodecommand.cpp b/chimeraccodecommand.cpp index 753bc9a..ab10eb8 100644 --- a/chimeraccodecommand.cpp +++ b/chimeraccodecommand.cpp @@ -92,10 +92,7 @@ ChimeraCcodeCommand::ChimeraCcodeCommand(string option) { } //if the user changes the output directory command factory will send this info to us in the output parameter - outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ - outputDir = ""; - outputDir += m->hasPath(fastafile); //if user entered a file with a path then preserve it - } + outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; } templatefile = validParameter.validFile(parameters, "template", true); if (templatefile == "not open") { abort = true; } @@ -192,6 +189,7 @@ int ChimeraCcodeCommand::execute(){ if (chimera->getUnaligned()) { m->mothurOut("Your template sequences are different lengths, please correct."); m->mothurOutEndLine(); delete chimera; return 0; } templateSeqsLength = chimera->getLength(); + if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]); }//if user entered a file with a path then preserve it string outputFileName, accnosFileName; if (maskfile != "") { outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + maskfile + ".ccode.chimeras"; diff --git a/chimeracheckcommand.cpp b/chimeracheckcommand.cpp index 1855439..c8761b9 100644 --- a/chimeracheckcommand.cpp +++ b/chimeracheckcommand.cpp @@ -217,6 +217,7 @@ int ChimeraCheckCommand::execute(){ if (m->control_pressed) { delete chimera; return 0; } + if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[i]); }//if user entered a file with a path then preserve it string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[i])) + "chimeracheck.chimeras"; outputNames.push_back(outputFileName); diff --git a/chimerapintailcommand.cpp b/chimerapintailcommand.cpp index 7c3b24b..3013650 100644 --- a/chimerapintailcommand.cpp +++ b/chimerapintailcommand.cpp @@ -150,10 +150,7 @@ ChimeraPintailCommand::ChimeraPintailCommand(string option) { //if the user changes the output directory command factory will send this info to us in the output parameter - outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ - outputDir = ""; - outputDir += m->hasPath(fastafile); //if user entered a file with a path then preserve it - } + outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; } templatefile = validParameter.validFile(parameters, "template", true); if (templatefile == "not open") { abort = true; } @@ -287,6 +284,7 @@ int ChimeraPintailCommand::execute(){ chimera = new Pintail(fastaFileNames[s], templatefile, filter, processors, maskfile, consfile, quanfile, window, increment, outputDir); + if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]); }//if user entered a file with a path then preserve it string outputFileName, accnosFileName; if (maskfile != "") { outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + m->getSimpleName(m->getRootName(maskfile)) + ".pintail.chimeras"; diff --git a/chimeraslayercommand.cpp b/chimeraslayercommand.cpp index 996843b..d2f3067 100644 --- a/chimeraslayercommand.cpp +++ b/chimeraslayercommand.cpp @@ -95,10 +95,7 @@ ChimeraSlayerCommand::ChimeraSlayerCommand(string option) { } //if the user changes the output directory command factory will send this info to us in the output parameter - outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ - outputDir = ""; - outputDir += m->hasPath(fastafile); //if user entered a file with a path then preserve it - } + outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; } templatefile = validParameter.validFile(parameters, "template", true); if (templatefile == "not open") { abort = true; } @@ -218,7 +215,8 @@ int ChimeraSlayerCommand::execute(){ int start = time(NULL); chimera = new ChimeraSlayer(fastaFileNames[s], templatefile, search, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign); - + + if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]); }//if user entered a file with a path then preserve it string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "slayer.chimeras"; string accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "slayer.accnos"; diff --git a/commandfactory.cpp b/commandfactory.cpp index 94da28f..5fbdfa8 100644 --- a/commandfactory.cpp +++ b/commandfactory.cpp @@ -91,6 +91,7 @@ #include "clusterfragmentscommand.h" #include "getlineagecommand.h" #include "removelineagecommand.h" +#include "parsefastaqcommand.h" /*******************************************************/ @@ -186,6 +187,7 @@ CommandFactory::CommandFactory(){ commands["cluster.fragments"] = "cluster.fragments"; commands["get.lineage"] = "get.lineage"; commands["remove.lineage"] = "remove.lineage"; + commands["parse.fastaq"] = "parse.fastaq"; commands["classify.seqs"] = "MPIEnabled"; commands["dist.seqs"] = "MPIEnabled"; commands["filter.seqs"] = "MPIEnabled"; @@ -324,6 +326,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString){ else if(commandName == "cluster.fragments") { command = new ClusterFragmentsCommand(optionString); } else if(commandName == "get.lineage") { command = new GetLineageCommand(optionString); } else if(commandName == "remove.lineage") { command = new RemoveLineageCommand(optionString); } + else if(commandName == "parse.fastaq") { command = new ParseFastaQCommand(optionString); } else { command = new NoCommand(optionString); } return command; diff --git a/makefile b/makefile index 773b52f..0fd76c1 100644 --- a/makefile +++ b/makefile @@ -13,6 +13,21 @@ CXXFLAGS += -O3 +64BIT_VERSION ?= yes + +ifeq ($(strip $(64BIT_VERSION)),yes) + #if you are using centos uncomment the following lines + #CXX = g++44 + + #if you are a mac user use the following line + TARGET_ARCH += -arch x86_64 + + #if you are a linux user use the following line + #CXXFLAGS += -mtune=native -march=native -m64 + + CXXFLAGS += -DBIT_VERSION +endif + MOTHUR_FILES = "\"../Release\"" RELEASE_DATE = "\"9/17/2010\"" @@ -31,21 +46,6 @@ ifeq ($(strip $(CYGWIN_BUILD)),yes) LDFLAGS += -mno-cygwin endif -64BIT_VERSION ?= yes - -ifeq ($(strip $(64BIT_VERSION)),yes) - CXXFLAGS += -DBIT_VERSION - - #if you are using centos uncomment the following lines - #CXX = g++44 - - #if you are a mac user use the following line - TARGET_ARCH += -arch x86_64 - - #if you are a linux user use the following line - #CXXFLAGS += -mtune=native -march=native -m64 -endif - # if you do not want to use the readline library, set this to no. # make sure you have the library installed diff --git a/mothur b/mothur index d4e222c..ea9c520 100755 Binary files a/mothur and b/mothur differ diff --git a/parsefastaqcommand.cpp b/parsefastaqcommand.cpp new file mode 100644 index 0000000..2dece31 --- /dev/null +++ b/parsefastaqcommand.cpp @@ -0,0 +1,180 @@ +/* + * parsefastaqcommand.cpp + * Mothur + * + * Created by westcott on 9/30/10. + * Copyright 2010 Schloss Lab. All rights reserved. + * + */ + +#include "parsefastaqcommand.h" +#include "sequence.hpp" + +//********************************************************************************************************************** +ParseFastaQCommand::ParseFastaQCommand(string option){ + try { + abort = false; + + if(option == "help") { help(); abort = true; } + + else { + //valid paramters for this command + string Array[] = {"fastaq", "outputdir", "inputdir"}; + vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + + OptionParser parser(option); + map parameters = parser.getParameters(); + + ValidParameters validParameter; + map::iterator it; + + //check to make sure all parameters are valid for command + for (map::iterator it = parameters.begin(); it != parameters.end(); it++) { + if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; } + } + + //if the user changes the input directory command factory will send this info to us in the output parameter + string inputDir = validParameter.validFile(parameters, "inputdir", false); + if (inputDir == "not found"){ inputDir = ""; } + else { + string path; + it = parameters.find("fastaq"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["fastaq"] = inputDir + it->second; } + } + } + + //check for required parameters + fastaQFile = validParameter.validFile(parameters, "fastaq", true); + if (fastaQFile == "not found") { m->mothurOut("fastaq is a required parameter for the parse.fastaq command."); m->mothurOutEndLine(); abort = true; } + else if (fastaQFile == "not open") { fastaQFile = ""; abort = true; } + + //if the user changes the output directory command factory will send this info to us in the output parameter + outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(fastaQFile); } + + } + } + catch(exception& e) { + m->errorOut(e, "ParseFastaQCommand", "ParseFastaQCommand"); + exit(1); + } +} +//********************************************************************************************************************** + +void ParseFastaQCommand::help(){ + try { + m->mothurOut("The parse.fastaq command reads a fastaQ file and creates a fasta and quality file.\n"); + m->mothurOut("The parse.fastaq command parameter is fastaq, and it is required.\n"); + m->mothurOut("The parse.fastaq command should be in the following format: parse.fastaq(fastaq=yourFastaQFile).\n"); + m->mothurOut("Example parse.fastaq(fastaq=test.fastaq).\n"); + m->mothurOut("Note: No spaces between parameter labels (i.e. fastaq), '=' and yourFastaQFile.\n"); + m->mothurOutEndLine(); + } + catch(exception& e) { + m->errorOut(e, "ParseFastaQCommand", "help"); + exit(1); + } +} +//********************************************************************************************************************** + +ParseFastaQCommand::~ParseFastaQCommand() { /* do nothing */ } + +//********************************************************************************************************************** + +int ParseFastaQCommand::execute(){ + try { + if (abort == true) { return 0; } + + //open Output Files + string fastaFile = outputDir + m->getRootName(m->getSimpleName(fastaQFile)) + "fasta"; + string qualFile = outputDir + m->getRootName(m->getSimpleName(fastaQFile)) + "qual"; + ofstream outFasta, outQual; + m->openOutputFile(fastaFile, outFasta); outputNames.push_back(fastaFile); + m->openOutputFile(qualFile, outQual); outputNames.push_back(qualFile); + + ifstream in; + m->openInputFile(fastaQFile, in); + + while (!in.eof()) { + + //read sequence name + string name = m->getline(in); m->gobble(in); + if (name == "") { m->mothurOut("[ERROR]: Blank fasta name."); m->mothurOutEndLine(); m->control_pressed = true; break; } + else if (name[0] != '@') { m->mothurOut("[ERROR]: reading " + name + " expected a name with @ as a leading character."); m->mothurOutEndLine(); m->control_pressed = true; break; } + else { name = name.substr(1); } + + //read sequence + string sequence = m->getline(in); m->gobble(in); + if (sequence == "") { m->mothurOut("[ERROR]: missing sequence for " + name); m->mothurOutEndLine(); m->control_pressed = true; break; } + + //read sequence name + string name2 = m->getline(in); m->gobble(in); + if (name2 == "") { m->mothurOut("[ERROR]: Blank quality name."); m->mothurOutEndLine(); m->control_pressed = true; break; } + else if (name2[0] != '+') { m->mothurOut("[ERROR]: reading " + name2 + " expected a name with + as a leading character."); m->mothurOutEndLine(); m->control_pressed = true; break; } + else { name2 = name2.substr(1); } + + //read quality scores + string qual = m->getline(in); m->gobble(in); + if (qual == "") { m->mothurOut("[ERROR]: missing quality for " + name2); m->mothurOutEndLine(); m->control_pressed = true; break; } + + //sanity check sequence length and number of quality scores match + if (name != name2) { m->mothurOut("[ERROR]: names do not match. read " + name + " for fasta and " + name2 + " for quality."); m->mothurOutEndLine(); m->control_pressed = true; break; } + if (qual.length() != sequence.length()) { m->mothurOut("[ERROR]: lengths do not match. read " + toString(sequence.length()) + " characters for fasta and " + toString(qual.length()) + " characters for quality scores."); m->mothurOutEndLine(); m->control_pressed = true; break; } + + //convert quality scores + vector qualScores = convertQual(qual); + + //print sequence info to files + outFasta << ">" << name << endl << sequence << endl; + + outQual << ">" << name << endl; + for (int i = 0; i < qualScores.size(); i++) { outQual << qualScores[i] << " "; } + outQual << endl; + } + + in.close(); + outFasta.close(); + outQual.close(); + + if (m->control_pressed) { remove(fastaFile.c_str()); remove(qualFile.c_str()); return 0; } + + m->mothurOutEndLine(); + m->mothurOut("Output File Names: "); m->mothurOutEndLine(); + for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } + m->mothurOutEndLine(); + + return 0; + } + catch(exception& e) { + m->errorOut(e, "ParseFastaQCommand", "execute"); + exit(1); + } +} +//********************************************************************************************************************** +vector ParseFastaQCommand::convertQual(string qual) { + try { + vector qualScores; + + int controlChar = int('!'); + + for (int i = 0; i < qual.length(); i++) { + int temp = int(qual[i]); + temp -= controlChar; + + qualScores.push_back(temp); + } + + return qualScores; + } + catch(exception& e) { + m->errorOut(e, "ParseFastaQCommand", "convertQual"); + exit(1); + } +} +//********************************************************************************************************************** + + + diff --git a/parsefastaqcommand.h b/parsefastaqcommand.h new file mode 100644 index 0000000..0710775 --- /dev/null +++ b/parsefastaqcommand.h @@ -0,0 +1,35 @@ +#ifndef PARSEFASTAQCOMMAND_H +#define PARSEFASTAQCOMMAND_H + +/* + * parsefastaqcommand.h + * Mothur + * + * Created by westcott on 9/30/10. + * Copyright 2010 Schloss Lab. All rights reserved. + * + */ + + +#include "command.hpp" + +class ParseFastaQCommand : public Command { + +public: + ParseFastaQCommand(string); + ~ParseFastaQCommand(); + int execute(); + void help(); + +private: + + vector outputNames; + string outputDir, fastaQFile; + bool abort; + + vector convertQual(string); +}; + +#endif + + diff --git a/parsesffcommand.h b/parsesffcommand.h index 409293c..5714e0d 100644 --- a/parsesffcommand.h +++ b/parsesffcommand.h @@ -10,7 +10,7 @@ * */ -#include "mothur.h" + #include "command.hpp" class ParseSFFCommand : public Command { diff --git a/venncommand.cpp b/venncommand.cpp index 5fa06aa..852ee8c 100644 --- a/venncommand.cpp +++ b/venncommand.cpp @@ -32,7 +32,7 @@ VennCommand::VennCommand(string option) { else { //valid paramters for this command - string AlignArray[] = {"groups","label","calc", "abund","nseqs","outputdir","inputdir"}; + string AlignArray[] = {"groups","label","calc","permute", "abund","nseqs","outputdir","inputdir"}; vector myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string))); OptionParser parser(option); @@ -96,9 +96,11 @@ VennCommand::VennCommand(string option) { temp = validParameter.validFile(parameters, "abund", false); if (temp == "not found") { temp = "10"; } convert(temp, abund); - temp = validParameter.validFile(parameters, "nseqs", false); if (temp == "not found"){ temp = "f"; } + temp = validParameter.validFile(parameters, "nseqs", false); if (temp == "not found"){ temp = "f"; } nseqs = m->isTrue(temp); + temp = validParameter.validFile(parameters, "permute", false); if (temp == "not found"){ temp = "f"; } + perm = m->isTrue(temp); if (abort == false) { validCalculator = new ValidCalculators(); @@ -154,7 +156,7 @@ VennCommand::VennCommand(string option) { void VennCommand::help(){ try { m->mothurOut("The venn command can only be executed after a successful read.otu command.\n"); - m->mothurOut("The venn command parameters are groups, calc, abund, nseqs and label. No parameters are required.\n"); + m->mothurOut("The venn command parameters are groups, calc, abund, nseqs, permute and label. No parameters are required.\n"); m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like included in your venn diagram, you may only use a maximum of 4 groups.\n"); m->mothurOut("The group names are separated by dashes. The label allows you to select what distance levels you would like a venn diagram created for, and are also separated by dashes.\n"); m->mothurOut("The venn command should be in the following format: venn(groups=yourGroups, calc=yourCalcs, label=yourLabels, abund=yourAbund).\n"); @@ -163,6 +165,7 @@ void VennCommand::help(){ m->mothurOut("The default value for calc is sobs if you have only read a list file or if you have selected only one group, and sharedsobs if you have multiple groups.\n"); m->mothurOut("The default available estimators for calc are sobs, chao and ace if you have only read a list file, and sharedsobs, sharedchao and sharedace if you have read a list and group file or a shared file.\n"); m->mothurOut("The nseqs parameter will output the number of sequences represented by the otus in the picture, default=F.\n"); + m->mothurOut("If you have more than 4 groups, the permute parameter will find all possible combos of 4 of your groups and create pictures for them, default=F.\n"); m->mothurOut("The only estimators available four 4 groups are sharedsobs and sharedchao.\n"); m->mothurOut("The venn command outputs a .svg file for each calculator you specify at each distance you choose.\n"); m->mothurOut("Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n\n"); @@ -205,6 +208,8 @@ int VennCommand::execute(){ input = globaldata->ginput; lookup = input->getSharedRAbundVectors(); lastLabel = lookup[0]->getLabel(); + + if ((lookup.size() > 4) && (perm)) { combosOfFour = findCombinations(lookup.size()); } }else if (format == "list") { //you are using just a list file and have only one group read = new ReadOTUFile(globaldata->inputFileName); @@ -218,7 +223,7 @@ int VennCommand::execute(){ //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. set processedLabels; set userLabels = labels; - + if (format != "list") { //as long as you are not at the end of the file or done wih the lines you want @@ -237,13 +242,29 @@ int VennCommand::execute(){ processedLabels.insert(lookup[0]->getLabel()); userLabels.erase(lookup[0]->getLabel()); - if (lookup.size() > 4) { - m->mothurOut("Error: Too many groups chosen. You may use up to 4 groups with the venn command. I will use the first four groups in your groupfile."); m->mothurOutEndLine(); + if ((lookup.size() > 4) && (!perm)){ + m->mothurOut("Error: Too many groups chosen. You may use up to 4 groups with the venn command. I will use the first four groups in your groupfile. If you set perm=t, I will find all possible combos of 4 groups."); m->mothurOutEndLine(); for (int i = lookup.size(); i > 4; i--) { lookup.pop_back(); } //no memmory leak because pop_back calls destructor - } - vector outfilenames = venn->getPic(lookup, vennCalculators); - for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); } } + vector outfilenames = venn->getPic(lookup, vennCalculators); + for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); } } + + }else if ((lookup.size() > 4) && (perm)) { + set< set >::iterator it3; + set::iterator it2; + for (it3 = combosOfFour.begin(); it3 != combosOfFour.end(); it3++) { + + set poss = *it3; + vector subset; + for (it2 = poss.begin(); it2 != poss.end(); it2++) { subset.push_back(lookup[*it2]); } + + vector outfilenames = venn->getPic(subset, vennCalculators); + for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); } } + } + }else { + vector outfilenames = venn->getPic(lookup, vennCalculators); + for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); } } + } } if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { @@ -256,13 +277,30 @@ int VennCommand::execute(){ processedLabels.insert(lookup[0]->getLabel()); userLabels.erase(lookup[0]->getLabel()); - if (lookup.size() > 4) { - m->mothurOut("Error: Too many groups chosen. You may use up to 4 groups with the venn command. I will use the first four groups in your groupfile."); m->mothurOutEndLine(); + if ((lookup.size() > 4) && (!perm)){ + m->mothurOut("Error: Too many groups chosen. You may use up to 4 groups with the venn command. I will use the first four groups in your groupfile. If you set perm=t, I will find all possible combos of 4 groups."); m->mothurOutEndLine(); for (int i = lookup.size(); i > 4; i--) { lookup.pop_back(); } //no memmory leak because pop_back calls destructor - } - vector outfilenames = venn->getPic(lookup, vennCalculators); - for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); } } + vector outfilenames = venn->getPic(lookup, vennCalculators); + for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); } } + + }else if ((lookup.size() > 4) && (perm)) { + set< set >::iterator it3; + set::iterator it2; + for (it3 = combosOfFour.begin(); it3 != combosOfFour.end(); it3++) { + + set poss = *it3; + vector subset; + for (it2 = poss.begin(); it2 != poss.end(); it2++) { subset.push_back(lookup[*it2]); } + + vector outfilenames = venn->getPic(subset, vennCalculators); + for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); } } + } + }else { + vector outfilenames = venn->getPic(lookup, vennCalculators); + for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); } } + } + //restore real lastlabel to save below lookup[0]->setLabel(saveLabel); } @@ -305,13 +343,30 @@ int VennCommand::execute(){ processedLabels.insert(lookup[0]->getLabel()); userLabels.erase(lookup[0]->getLabel()); - if (lookup.size() > 4) { - m->mothurOut("Error: Too many groups chosen. You may use up to 4 groups with the venn command. I will use the first four groups in your groupfile."); m->mothurOutEndLine(); + if ((lookup.size() > 4) && (!perm)){ + m->mothurOut("Error: Too many groups chosen. You may use up to 4 groups with the venn command. I will use the first four groups in your groupfile. If you set perm=t, I will find all possible combos of 4 groups."); m->mothurOutEndLine(); for (int i = lookup.size(); i > 4; i--) { lookup.pop_back(); } //no memmory leak because pop_back calls destructor - } - vector outfilenames = venn->getPic(lookup, vennCalculators); - for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); } } + + vector outfilenames = venn->getPic(lookup, vennCalculators); + for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); } } + }else if ((lookup.size() > 4) && (perm)) { + set< set >::iterator it3; + set::iterator it2; + for (it3 = combosOfFour.begin(); it3 != combosOfFour.end(); it3++) { + + set poss = *it3; + vector subset; + for (it2 = poss.begin(); it2 != poss.end(); it2++) { subset.push_back(lookup[*it2]); } + + vector outfilenames = venn->getPic(subset, vennCalculators); + for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); } } + } + }else { + vector outfilenames = venn->getPic(lookup, vennCalculators); + for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); } } + } + for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } } @@ -426,5 +481,55 @@ int VennCommand::execute(){ exit(1); } } +//********************************************************************************************************************** +//returns a vector of sets containing the 4 group combinations +set< set > VennCommand::findCombinations(int lookupSize){ + try { + set< set > combos; + + set possibles; + for (int i = 0; i < lookupSize; i++) { possibles.insert(i); } + + getCombos(possibles, combos); + + return combos; + + } + catch(exception& e) { + m->errorOut(e, "VennCommand", "findCombinations"); + exit(1); + } +} +//********************************************************************************************************************** +//recusively finds combos of 4 +int VennCommand::getCombos(set possibles, set< set >& combos){ + try { + + if (possibles.size() == 4) { //done + if (combos.count(possibles) == 0) { //no dups + combos.insert(possibles); + } + }else { //we still have work to do + set::iterator it; + set::iterator it2; + for (it = possibles.begin(); it != possibles.end(); it++) { + + set newPossibles; + for (it2 = possibles.begin(); it2 != possibles.end(); it2++) { //all possible combos of one length smaller + if (*it != *it2) { + newPossibles.insert(*it2); + } + } + getCombos(newPossibles, combos); + } + } + + return 0; + } + catch(exception& e) { + m->errorOut(e, "VennCommand", "getCombos"); + exit(1); + } +} //********************************************************************************************************************** diff --git a/venncommand.h b/venncommand.h index a159356..f6bdf39 100644 --- a/venncommand.h +++ b/venncommand.h @@ -37,13 +37,17 @@ private: vector vennCalculators; ValidCalculators* validCalculator; vector lookup; + set< set > combosOfFour; SAbundVector* sabund; int abund; - bool abort, allLines, nseqs; + bool abort, allLines, nseqs, perm; set labels; //holds labels to be used string format, groups, calc, label, outputDir; vector Estimators, Groups; + + set< set > findCombinations(int); + int getCombos(set, set< set >&); };