From: westcott Date: Mon, 24 Jan 2011 18:50:52 +0000 (+0000) Subject: added merge.groups command X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=commitdiff_plain;h=f598bd8389840cf030d61f5da7d0b2c3e37c06ba added merge.groups command --- diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj index ade49c9..24187d8 100644 --- a/Mothur.xcodeproj/project.pbxproj +++ b/Mothur.xcodeproj/project.pbxproj @@ -11,6 +11,7 @@ A70332B712D3A13400761E33 /* makefile in Sources */ = {isa = PBXBuildFile; fileRef = A70332B512D3A13400761E33 /* makefile */; }; A713EBAC12DC7613000092AC /* readphylipvector.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A713EBAB12DC7613000092AC /* readphylipvector.cpp */; }; A713EBED12DC7C5E000092AC /* nmdscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A713EBEC12DC7C5E000092AC /* nmdscommand.cpp */; }; + A71FE12C12EDF72400963CA7 /* mergegroupscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A71FE12B12EDF72400963CA7 /* mergegroupscommand.cpp */; }; A727864412E9E28C00F86ABA /* removerarecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A727864312E9E28C00F86ABA /* removerarecommand.cpp */; }; A7E9B88112D37EC400DA6239 /* ace.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B64F12D37EC300DA6239 /* ace.cpp */; }; A7E9B88212D37EC400DA6239 /* aligncommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B65112D37EC300DA6239 /* aligncommand.cpp */; }; @@ -307,6 +308,8 @@ A713EBAB12DC7613000092AC /* readphylipvector.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = readphylipvector.cpp; sourceTree = ""; }; A713EBEB12DC7C5E000092AC /* nmdscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = nmdscommand.h; sourceTree = ""; }; A713EBEC12DC7C5E000092AC /* nmdscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = nmdscommand.cpp; sourceTree = ""; }; + A71FE12A12EDF72400963CA7 /* mergegroupscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mergegroupscommand.h; sourceTree = ""; }; + A71FE12B12EDF72400963CA7 /* mergegroupscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = mergegroupscommand.cpp; sourceTree = ""; }; A727864212E9E28C00F86ABA /* removerarecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = removerarecommand.h; sourceTree = ""; }; A727864312E9E28C00F86ABA /* removerarecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = removerarecommand.cpp; sourceTree = ""; }; A7E9B64F12D37EC300DA6239 /* ace.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ace.cpp; sourceTree = ""; }; @@ -1109,6 +1112,8 @@ A7E9B74A12D37EC400DA6239 /* matrixoutputcommand.h */, A7E9B75312D37EC400DA6239 /* mergefilecommand.cpp */, A7E9B75412D37EC400DA6239 /* mergefilecommand.h */, + A71FE12A12EDF72400963CA7 /* mergegroupscommand.h */, + A71FE12B12EDF72400963CA7 /* mergegroupscommand.cpp */, A7E9B75712D37EC400DA6239 /* metastatscommand.cpp */, A7E9B75812D37EC400DA6239 /* metastatscommand.h */, A7E9B75912D37EC400DA6239 /* mgclustercommand.cpp */, @@ -1889,6 +1894,7 @@ A713EBAC12DC7613000092AC /* readphylipvector.cpp in Sources */, A713EBED12DC7C5E000092AC /* nmdscommand.cpp in Sources */, A727864412E9E28C00F86ABA /* removerarecommand.cpp in Sources */, + A71FE12C12EDF72400963CA7 /* mergegroupscommand.cpp in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; diff --git a/commandfactory.cpp b/commandfactory.cpp index 26b8c86..250e4f6 100644 --- a/commandfactory.cpp +++ b/commandfactory.cpp @@ -108,6 +108,7 @@ #include "pcacommand.h" #include "nmdscommand.h" #include "removerarecommand.h" +#include "mergegroupscommand.h" /*******************************************************/ @@ -219,6 +220,7 @@ CommandFactory::CommandFactory(){ commands["pca"] = "pca"; commands["nmds"] = "nmds"; commands["remove.rare"] = "remove.rare"; + commands["merge.groups"] = "merge.groups"; commands["pairwise.seqs"] = "MPIEnabled"; commands["pipeline.pds"] = "MPIEnabled"; commands["classify.seqs"] = "MPIEnabled"; @@ -379,6 +381,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString){ else if(commandName == "consensus.seqs") { command = new ConsensusSeqsCommand(optionString); } else if(commandName == "corr.axes") { command = new CorrAxesCommand(optionString); } else if(commandName == "remove.rare") { command = new RemoveRareCommand(optionString); } + else if(commandName == "merge.groups") { command = new MergeGroupsCommand(optionString); } else { command = new NoCommand(optionString); } return command; @@ -505,6 +508,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString, str else if(commandName == "consensus.seqs") { pipecommand = new ConsensusSeqsCommand(optionString); } else if(commandName == "corr.axes") { pipecommand = new CorrAxesCommand(optionString); } else if(commandName == "remove.rare") { pipecommand = new RemoveRareCommand(optionString); } + else if(commandName == "merge.groups") { pipecommand = new MergeGroupsCommand(optionString); } else { pipecommand = new NoCommand(optionString); } return pipecommand; @@ -619,6 +623,7 @@ Command* CommandFactory::getCommand(string commandName){ else if(commandName == "consensus.seqs") { shellcommand = new ConsensusSeqsCommand(); } else if(commandName == "corr.axes") { shellcommand = new CorrAxesCommand(); } else if(commandName == "remove.rare") { shellcommand = new RemoveRareCommand(); } + else if(commandName == "merge.groups") { shellcommand = new MergeGroupsCommand(); } else { shellcommand = new NoCommand(); } return shellcommand; diff --git a/mergegroupscommand.cpp b/mergegroupscommand.cpp new file mode 100644 index 0000000..ad0dc31 --- /dev/null +++ b/mergegroupscommand.cpp @@ -0,0 +1,341 @@ +/* + * mergegroupscommand.cpp + * mothur + * + * Created by westcott on 1/24/11. + * Copyright 2011 Schloss Lab. All rights reserved. + * + */ + +#include "mergegroupscommand.h" +#include "sharedutilities.h" + +//********************************************************************************************************************** +vector MergeGroupsCommand::getValidParameters(){ + try { + string Array[] = {"shared","label","outputdir","design","groups","processors","inputdir"}; + vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + return myArray; + } + catch(exception& e) { + m->errorOut(e, "MergeGroupsCommand", "getValidParameters"); + exit(1); + } +} +//********************************************************************************************************************** +MergeGroupsCommand::MergeGroupsCommand(){ + try { + abort = true; + //initialize outputTypes + vector tempOutNames; + outputTypes["shared"] = tempOutNames; + } + catch(exception& e) { + m->errorOut(e, "MergeGroupsCommand", "MetaStatsCommand"); + exit(1); + } +} +//********************************************************************************************************************** +vector MergeGroupsCommand::getRequiredParameters(){ + try { + string Array[] = {"design", "shared"}; + vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + return myArray; + } + catch(exception& e) { + m->errorOut(e, "MergeGroupsCommand", "getRequiredParameters"); + exit(1); + } +} +//********************************************************************************************************************** +vector MergeGroupsCommand::getRequiredFiles(){ + try { + string Array[] = {}; + vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + return myArray; + } + catch(exception& e) { + m->errorOut(e, "MergeGroupsCommand", "getRequiredFiles"); + exit(1); + } +} +//********************************************************************************************************************** + +MergeGroupsCommand::MergeGroupsCommand(string option) { + try { + globaldata = GlobalData::getInstance(); + abort = false; + allLines = 1; + labels.clear(); + + //allow user to run help + if(option == "help") { help(); abort = true; } + + else { + //valid paramters for this command + string AlignArray[] = {"groups","label","outputdir","shared","design","processors","inputdir"}; + vector myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string))); + + OptionParser parser(option); + map parameters = parser.getParameters(); + + ValidParameters validParameter; + + //check to make sure all parameters are valid for command + map::iterator it; + for (it = parameters.begin(); it != parameters.end(); it++) { + if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; } + } + + //initialize outputTypes + vector tempOutNames; + outputTypes["shared"] = tempOutNames; + + //if the user changes the output directory command factory will send this info to us in the output parameter + outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; } + + //if the user changes the input directory command factory will send this info to us in the output parameter + string inputDir = validParameter.validFile(parameters, "inputdir", false); + if (inputDir == "not found"){ inputDir = ""; } + else { + string path; + it = parameters.find("design"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["design"] = inputDir + it->second; } + } + + it = parameters.find("shared"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["shared"] = inputDir + it->second; } + } + } + + //check for required parameters + designfile = validParameter.validFile(parameters, "design", true); + if (designfile == "not open") { abort = true; } + else if (designfile == "not found") { designfile = ""; m->mothurOut("You must provide a design file."); m->mothurOutEndLine(); abort = true; } + + //make sure the user has already run the read.otu command + sharedfile = validParameter.validFile(parameters, "shared", true); + if (sharedfile == "not open") { abort = true; } + else if (sharedfile == "not found") { sharedfile = ""; m->mothurOut("You must provide a shared file."); m->mothurOutEndLine(); abort = true; } + + //check for optional parameter and set defaults + // ...at some point should added some additional type checking... + label = validParameter.validFile(parameters, "label", false); + if (label == "not found") { label = ""; } + else { + if(label != "all") { m->splitAtDash(label, labels); allLines = 0; } + else { allLines = 1; } + } + + groups = validParameter.validFile(parameters, "groups", false); + if (groups == "not found") { groups = "all"; } + m->splitAtDash(groups, Groups); + globaldata->Groups = Groups; + } + + } + catch(exception& e) { + m->errorOut(e, "MergeGroupsCommand", "MergeGroupsCommand"); + exit(1); + } +} + +//********************************************************************************************************************** + +void MergeGroupsCommand::help(){ + try { + m->mothurOut("The merge.groups command reads a shared file and a design file and merges the groups in the shared file that are in the same grouping in the design file.\n"); + m->mothurOut("The metastats command outputs a .shared file. \n"); + m->mothurOut("The metastats command parameters are shared, groups, label and design. The design and shared parameter are required.\n"); + m->mothurOut("The design parameter allows you to assign your groups to sets when you are running metastat. mothur will run all pairwise comparisons of the sets. It is required. \n"); + m->mothurOut("The design file looks like the group file. It is a 2 column tab delimited file, where the first column is the group name and the second column is the set the group belongs to.\n"); + m->mothurOut("The groups parameter allows you to specify which of the groups in your shared you would like included. The group names are separated by dashes.\n"); + m->mothurOut("The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n"); + m->mothurOut("The merge.groups command should be in the following format: metastats(design=yourDesignFile, shared=yourSharedFile).\n"); + m->mothurOut("Example metastats(design=temp.design, groups=A-B-C, shared=temp.shared).\n"); + m->mothurOut("The default value for groups is all the groups in your sharedfile, and all labels in your inputfile will be used.\n"); + m->mothurOut("Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n\n"); + + } + catch(exception& e) { + m->errorOut(e, "MergeGroupsCommand", "help"); + exit(1); + } +} + +//********************************************************************************************************************** + +MergeGroupsCommand::~MergeGroupsCommand(){} + +//********************************************************************************************************************** + +int MergeGroupsCommand::execute(){ + try { + + if (abort == true) { return 0; } + + if (outputDir == "") { outputDir += m->hasPath(sharedfile); } + string outputFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "merge" + m->getExtension(sharedfile); + outputTypes["shared"].push_back(outputFileName); outputNames.push_back(outputFileName); + + ofstream out; + m->openOutputFile(outputFileName, out); + + designMap = new GroupMap(designfile); + designMap->readDesignMap(); + + InputData input(sharedfile, "sharedfile"); + lookup = input.getSharedRAbundVectors(); + string lastLabel = lookup[0]->getLabel(); + + //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. + set processedLabels; + set userLabels = labels; + + //as long as you are not at the end of the file or done wih the lines you want + while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) { + + if (m->control_pressed) { out.close(); for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } globaldata->Groups.clear(); delete designMap; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } + + if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){ + + m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); + process(lookup, out); + + processedLabels.insert(lookup[0]->getLabel()); + userLabels.erase(lookup[0]->getLabel()); + } + + if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { + string saveLabel = lookup[0]->getLabel(); + + for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } + lookup = input.getSharedRAbundVectors(lastLabel); + m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); + + process(lookup, out); + + processedLabels.insert(lookup[0]->getLabel()); + userLabels.erase(lookup[0]->getLabel()); + + //restore real lastlabel to save below + lookup[0]->setLabel(saveLabel); + } + + lastLabel = lookup[0]->getLabel(); + //prevent memory leak + for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; lookup[i] = NULL; } + + if (m->control_pressed) { out.close(); globaldata->Groups.clear(); delete designMap; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } + + //get next line to process + lookup = input.getSharedRAbundVectors(); + } + + if (m->control_pressed) { out.close(); globaldata->Groups.clear(); delete designMap; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } + + //output error messages about any remaining user labels + set::iterator it; + bool needToRun = false; + for (it = userLabels.begin(); it != userLabels.end(); it++) { + m->mothurOut("Your file does not include the label " + *it); + if (processedLabels.count(lastLabel) != 1) { + m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine(); + needToRun = true; + }else { + m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine(); + } + } + + //run last label if you need to + if (needToRun == true) { + for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } } + lookup = input.getSharedRAbundVectors(lastLabel); + + m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); + + process(lookup, out); + + for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } + } + + out.close(); + //reset groups parameter + globaldata->Groups.clear(); + delete designMap; + + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0;} + + m->mothurOutEndLine(); + m->mothurOut("Output File Names: "); m->mothurOutEndLine(); + for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } + m->mothurOutEndLine(); + + return 0; + } + catch(exception& e) { + m->errorOut(e, "MergeGroupsCommand", "execute"); + exit(1); + } +} +//********************************************************************************************************************** + +int MergeGroupsCommand::process(vector& thisLookUp, ofstream& out){ + try { + + map merged; + map::iterator it; + + for (int i = 0; i < thisLookUp.size(); i++) { + + if (m->control_pressed) { return 0; } + + //what grouping does this group belong to + string grouping = designMap->getGroup(thisLookUp[i]->getGroup()); + if (grouping == "not found") { m->mothurOut("[ERROR]: " + thisLookUp[i]->getGroup() + " is not in your design file. Ignoring!"); m->mothurOutEndLine(); grouping = "NOTFOUND"; } + + else { + //do we already have a member of this grouping? + it = merged.find(grouping); + + if (it == merged.end()) { //nope, so create it + merged[grouping] = *thisLookUp[i]; + merged[grouping].setGroup(grouping); + }else { //yes, merge it + + for (int j = 0; j < thisLookUp[i]->getNumBins(); j++) { + int abund = (it->second).getAbundance(j); + abund += thisLookUp[i]->getAbundance(j); + + (it->second).set(j, abund, grouping); + } + } + } + } + + //print new file + for (it = merged.begin(); it != merged.end(); it++) { + out << (it->second).getLabel() << '\t' << it->first << '\t'; + (it->second).print(out); + } + + return 0; + + } + catch(exception& e) { + m->errorOut(e, "MergeGroupsCommand", "process"); + exit(1); + } +} +//********************************************************************************************************************** + + + diff --git a/mergegroupscommand.h b/mergegroupscommand.h new file mode 100644 index 0000000..62839ec --- /dev/null +++ b/mergegroupscommand.h @@ -0,0 +1,47 @@ +#ifndef MERGEGROUPSCOMMAND_H +#define MERGEGROUPSCOMMAND_H + +/* + * mergegroupscommand.h + * mothur + * + * Created by westcott on 1/24/11. + * Copyright 2011 Schloss Lab. All rights reserved. + * + */ + +#include "command.hpp" +#include "inputdata.h" +#include "sharedrabundvector.h" + +class GlobalData; + +class MergeGroupsCommand : public Command { + +public: + MergeGroupsCommand(string); + MergeGroupsCommand(); + ~MergeGroupsCommand(); + vector getRequiredParameters(); + vector getValidParameters(); + vector getRequiredFiles(); + map > getOutputFiles() { return outputTypes; } + int execute(); + void help(); + +private: + GlobalData* globaldata; + GroupMap* designMap; + vector lookup; + map > outputTypes; + + bool abort, allLines, pickedGroups; + set labels; //holds labels to be used + string groups, label, outputDir, inputDir, designfile, sharedfile; + vector Groups, outputNames; + + int process(vector&, ofstream&); +}; + +#endif +