From: westcott Date: Fri, 21 Jan 2011 19:16:14 +0000 (+0000) Subject: added calcs to tree.shared. working on remove.rare command X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=commitdiff_plain;h=259b6adf51ef0639cafd88cf862e4ffd5e0c7576 added calcs to tree.shared. working on remove.rare command --- diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj index 1b87a4f..8aeed0a 100644 --- a/Mothur.xcodeproj/project.pbxproj +++ b/Mothur.xcodeproj/project.pbxproj @@ -11,6 +11,7 @@ A70332B712D3A13400761E33 /* makefile in Sources */ = {isa = PBXBuildFile; fileRef = A70332B512D3A13400761E33 /* makefile */; }; A713EBAC12DC7613000092AC /* readphylipvector.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A713EBAB12DC7613000092AC /* readphylipvector.cpp */; }; A713EBED12DC7C5E000092AC /* nmdscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A713EBEC12DC7C5E000092AC /* nmdscommand.cpp */; }; + A727864412E9E28C00F86ABA /* removerarecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A727864312E9E28C00F86ABA /* removerarecommand.cpp */; }; A7E9B88112D37EC400DA6239 /* ace.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B64F12D37EC300DA6239 /* ace.cpp */; }; A7E9B88212D37EC400DA6239 /* aligncommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B65112D37EC300DA6239 /* aligncommand.cpp */; }; A7E9B88312D37EC400DA6239 /* alignment.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B65312D37EC300DA6239 /* alignment.cpp */; }; @@ -306,6 +307,8 @@ A713EBAB12DC7613000092AC /* readphylipvector.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = readphylipvector.cpp; sourceTree = ""; }; A713EBEB12DC7C5E000092AC /* nmdscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = nmdscommand.h; sourceTree = ""; }; A713EBEC12DC7C5E000092AC /* nmdscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = nmdscommand.cpp; sourceTree = ""; }; + A727864212E9E28C00F86ABA /* removerarecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = removerarecommand.h; sourceTree = ""; }; + A727864312E9E28C00F86ABA /* removerarecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = removerarecommand.cpp; sourceTree = ""; }; A7E9B64F12D37EC300DA6239 /* ace.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ace.cpp; sourceTree = ""; }; A7E9B65012D37EC300DA6239 /* ace.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ace.h; sourceTree = ""; }; A7E9B65112D37EC300DA6239 /* aligncommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = aligncommand.cpp; sourceTree = ""; }; @@ -1156,6 +1159,8 @@ A7E9B7C612D37EC400DA6239 /* removelineagecommand.h */, A7E9B7C712D37EC400DA6239 /* removeotuscommand.cpp */, A7E9B7C812D37EC400DA6239 /* removeotuscommand.h */, + A727864212E9E28C00F86ABA /* removerarecommand.h */, + A727864312E9E28C00F86ABA /* removerarecommand.cpp */, A7E9B7C912D37EC400DA6239 /* removeseqscommand.cpp */, A7E9B7CA12D37EC400DA6239 /* removeseqscommand.h */, A7E9B7CD12D37EC400DA6239 /* reversecommand.cpp */, @@ -1880,6 +1885,7 @@ A7FC486712D795D60055BC5C /* pcacommand.cpp in Sources */, A713EBAC12DC7613000092AC /* readphylipvector.cpp in Sources */, A713EBED12DC7C5E000092AC /* nmdscommand.cpp in Sources */, + A727864412E9E28C00F86ABA /* removerarecommand.cpp in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; diff --git a/clusterfragmentscommand.cpp b/clusterfragmentscommand.cpp index f345d3d..6eff3a7 100644 --- a/clusterfragmentscommand.cpp +++ b/clusterfragmentscommand.cpp @@ -166,7 +166,7 @@ void ClusterFragmentsCommand::help(){ m->mothurOut("The cluster.fragments command parameters are fasta, name, diffs and percent. The fasta parameter is required. \n"); m->mothurOut("The names parameter allows you to give a list of seqs that are identical. This file is 2 columns, first column is name or representative sequence, second column is a list of its identical sequences separated by commas.\n"); m->mothurOut("The diffs parameter allows you to set the number of differences allowed, default=0. \n"); - m->mothurOut("The percent parameter allows you to set percentage of differences allowed, default=0. percent=2 means if the number of difference is less than two percent of the length of the fragment, then cluster.\n"); + m->mothurOut("The percent parameter allows you to set percentage of differences allowed, default=0. percent=2 means if the number of difference is less than or equal to two percent of the length of the fragment, then cluster.\n"); m->mothurOut("You may use diffs and percent at the same time to say something like: If the number or differences is greater than 1 or more than 2% of the fragment length, don't merge. \n"); m->mothurOut("The cluster.fragments command should be in the following format: \n"); m->mothurOut("cluster.fragments(fasta=yourFastaFile, names=yourNamesFile) \n"); diff --git a/commandfactory.cpp b/commandfactory.cpp index ae59a8d..26b8c86 100644 --- a/commandfactory.cpp +++ b/commandfactory.cpp @@ -107,6 +107,7 @@ #include "shhhercommand.h" #include "pcacommand.h" #include "nmdscommand.h" +#include "removerarecommand.h" /*******************************************************/ @@ -217,6 +218,7 @@ CommandFactory::CommandFactory(){ commands["corr.axes"] = "corr.axes"; commands["pca"] = "pca"; commands["nmds"] = "nmds"; + commands["remove.rare"] = "remove.rare"; commands["pairwise.seqs"] = "MPIEnabled"; commands["pipeline.pds"] = "MPIEnabled"; commands["classify.seqs"] = "MPIEnabled"; @@ -376,6 +378,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString){ else if(commandName == "indicator") { command = new IndicatorCommand(optionString); } else if(commandName == "consensus.seqs") { command = new ConsensusSeqsCommand(optionString); } else if(commandName == "corr.axes") { command = new CorrAxesCommand(optionString); } + else if(commandName == "remove.rare") { command = new RemoveRareCommand(optionString); } else { command = new NoCommand(optionString); } return command; @@ -501,6 +504,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString, str else if(commandName == "indicator") { pipecommand = new IndicatorCommand(optionString); } else if(commandName == "consensus.seqs") { pipecommand = new ConsensusSeqsCommand(optionString); } else if(commandName == "corr.axes") { pipecommand = new CorrAxesCommand(optionString); } + else if(commandName == "remove.rare") { pipecommand = new RemoveRareCommand(optionString); } else { pipecommand = new NoCommand(optionString); } return pipecommand; @@ -614,6 +618,7 @@ Command* CommandFactory::getCommand(string commandName){ else if(commandName == "indicator") { shellcommand = new IndicatorCommand(); } else if(commandName == "consensus.seqs") { shellcommand = new ConsensusSeqsCommand(); } else if(commandName == "corr.axes") { shellcommand = new CorrAxesCommand(); } + else if(commandName == "remove.rare") { shellcommand = new RemoveRareCommand(); } else { shellcommand = new NoCommand(); } return shellcommand; diff --git a/removeotuscommand.h b/removeotuscommand.h index ce1646b..872b2c8 100644 --- a/removeotuscommand.h +++ b/removeotuscommand.h @@ -10,8 +10,6 @@ * */ - - #include "command.hpp" #include "groupmap.h" #include "listvector.hpp" diff --git a/removerarecommand.cpp b/removerarecommand.cpp new file mode 100644 index 0000000..4ba493f --- /dev/null +++ b/removerarecommand.cpp @@ -0,0 +1,576 @@ +/* + * removerarecommand.cpp + * mothur + * + * Created by westcott on 1/21/11. + * Copyright 2011 Schloss Lab. All rights reserved. + * + */ + +#include "removerarecommand.h" +#include "sequence.hpp" +#include "groupmap.h" +#include "sharedutilities.h" +#include "inputdata.h" + +//********************************************************************************************************************** +vector RemoveRareCommand::getValidParameters(){ + try { + string Array[] = {"rabund","sabund", "group", "list", "shared", "nseqs","groups","label","outputdir","inputdir"}; + vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + return myArray; + } + catch(exception& e) { + m->errorOut(e, "RemoveRareCommand", "getValidParameters"); + exit(1); + } +} +//********************************************************************************************************************** +RemoveRareCommand::RemoveRareCommand(){ + try { + abort = true; + //initialize outputTypes + vector tempOutNames; + outputTypes["rabund"] = tempOutNames; + outputTypes["sabund"] = tempOutNames; + outputTypes["list"] = tempOutNames; + outputTypes["group"] = tempOutNames; + outputTypes["shared"] = tempOutNames; + } + catch(exception& e) { + m->errorOut(e, "RemoveRareCommand", "RemoveRareCommand"); + exit(1); + } +} +//********************************************************************************************************************** +vector RemoveRareCommand::getRequiredParameters(){ + try { + string Array[] = {"nseqs"}; + vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + return myArray; + } + catch(exception& e) { + m->errorOut(e, "RemoveRareCommand", "getRequiredParameters"); + exit(1); + } +} +//********************************************************************************************************************** +vector RemoveRareCommand::getRequiredFiles(){ + try { + vector myArray; + return myArray; + } + catch(exception& e) { + m->errorOut(e, "RemoveRareCommand", "getRequiredFiles"); + exit(1); + } +} +//********************************************************************************************************************** +RemoveRareCommand::RemoveRareCommand(string option) { + try { + abort = false; + allLines = 1; + + //allow user to run help + if(option == "help") { help(); abort = true; } + + else { + //valid paramters for this command + string Array[] = {"rabund","sabund", "group", "list", "shared", "nseqs","groups","label","outputdir","inputdir"}; + vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + + OptionParser parser(option); + map parameters = parser.getParameters(); + + ValidParameters validParameter; + map::iterator it; + + //check to make sure all parameters are valid for command + for (it = parameters.begin(); it != parameters.end(); it++) { + if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; } + } + + //initialize outputTypes + vector tempOutNames; + outputTypes["rabund"] = tempOutNames; + outputTypes["sabund"] = tempOutNames; + outputTypes["list"] = tempOutNames; + outputTypes["group"] = tempOutNames; + outputTypes["shared"] = tempOutNames; + + //if the user changes the output directory command factory will send this info to us in the output parameter + outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; } + + //if the user changes the input directory command factory will send this info to us in the output parameter + string inputDir = validParameter.validFile(parameters, "inputdir", false); + if (inputDir == "not found"){ inputDir = ""; } + else { + string path; + it = parameters.find("list"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["list"] = inputDir + it->second; } + } + + it = parameters.find("group"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["group"] = inputDir + it->second; } + } + + it = parameters.find("sabund"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["sabund"] = inputDir + it->second; } + } + + it = parameters.find("rabund"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["rabund"] = inputDir + it->second; } + } + + it = parameters.find("shared"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["shared"] = inputDir + it->second; } + } + } + + + //check for file parameters + listfile = validParameter.validFile(parameters, "list", true); + if (listfile == "not open") { abort = true; } + else if (listfile == "not found") { listfile = ""; } + + sabundfile = validParameter.validFile(parameters, "sabund", true); + if (sabundfile == "not open") { abort = true; } + else if (sabundfile == "not found") { sabundfile = ""; } + + rabundfile = validParameter.validFile(parameters, "rabund", true); + if (rabundfile == "not open") { abort = true; } + else if (rabundfile == "not found") { rabundfile = ""; } + + groupfile = validParameter.validFile(parameters, "group", true); + if (groupfile == "not open") { abort = true; } + else if (groupfile == "not found") { groupfile = ""; } + + sharedfile = validParameter.validFile(parameters, "shared", true); + if (sharedfile == "not open") { abort = true; } + else if (sharedfile == "not found") { sharedfile = ""; } + + if ((sabundfile == "") && (rabundfile == "") && (sharedfile == "") && (listfile == "")) { m->mothurOut("You must provide at least one of the following: rabund, sabund, shared or list."); m->mothurOutEndLine(); abort = true; } + + groups = validParameter.validFile(parameters, "groups", false); + if (groups == "not found") { groups = "all"; } + m->splitAtDash(groups, Groups); + + label = validParameter.validFile(parameters, "label", false); + if (label == "not found") { label = ""; } + else { + if(label != "all") { m->splitAtDash(label, labels); allLines = 0; } + else { allLines = 1; } + } + + string temp = validParameter.validFile(parameters, "nseqs", false); + if (temp == "not found") { m->mothurOut("nseqs is a required parameter."); m->mothurOutEndLine(); abort = true; } + else { convert(temp, nseqs); } + } + + } + catch(exception& e) { + m->errorOut(e, "RemoveRareCommand", "RemoveRareCommand"); + exit(1); + } +} +//********************************************************************************************************************** + +void RemoveRareCommand::help(){ + try { + //m->mothurOut("The remove.seqs command reads an .accnos file and at least one of the following file types: fasta, name, group, list, taxonomy, quality or alignreport file.\n"); + //m->mothurOut("It outputs a file containing the sequences NOT in the .accnos file.\n"); + //m->mothurOut("The remove.seqs command parameters are accnos, fasta, name, group, list, taxonomy, qfile, alignreport and dups. You must provide accnos and at least one of the file parameters.\n"); + //m->mothurOut("The dups parameter allows you to remove the entire line from a name file if you remove any name from the line. default=true. \n"); + //m->mothurOut("The remove.seqs command should be in the following format: remove.seqs(accnos=yourAccnos, fasta=yourFasta).\n"); + //m->mothurOut("Example remove.seqs(accnos=amazon.accnos, fasta=amazon.fasta).\n"); + //m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n\n"); + } + catch(exception& e) { + m->errorOut(e, "RemoveRareCommand", "help"); + exit(1); + } +} + +//********************************************************************************************************************** + +int RemoveRareCommand::execute(){ + try { + + if (abort == true) { return 0; } + + if (m->control_pressed) { return 0; } + + //read through the correct file and output lines you want to keep + if (sabundfile != "") { processSabund(); } + if (rabundfile != "") { processRabund(); } + if (listfile != "") { processList(); } + //if (sharedfile != "") { processShared(); } + + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } + + if (outputNames.size() != 0) { + m->mothurOutEndLine(); + m->mothurOut("Output File Names: "); m->mothurOutEndLine(); + for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } + m->mothurOutEndLine(); + } + + return 0; + } + + catch(exception& e) { + m->errorOut(e, "RemoveRareCommand", "execute"); + exit(1); + } +} + +//********************************************************************************************************************** +int RemoveRareCommand::processList(){ + try { + string thisOutputDir = outputDir; + if (outputDir == "") { thisOutputDir += m->hasPath(listfile); } + string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(listfile)) + "pick" + m->getExtension(listfile); + string outputGroupFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + "pick" + m->getExtension(groupfile); + + ofstream out, outGroup; + m->openOutputFile(outputFileName, out); + + bool wroteSomething = false; + + //you must provide a label because the names in the listfile need to be consistent + string thisLabel = ""; + if (allLines) { m->mothurOut("For the listfile you must select one label, using first label in your listfile."); m->mothurOutEndLine(); } + else if (labels.size() > 1) { m->mothurOut("For the listfile you must select one label, using " + (*labels.begin()) + "."); m->mothurOutEndLine(); thisLabel = *labels.begin(); } + else { thisLabel = *labels.begin(); } + + InputData input(listfile, "list"); + ListVector* list = input.getListVector(); + + //get first one or the one we want + if (thisLabel != "") { + //use smart distancing + set userLabels; userLabels.insert(thisLabel); + set processedLabels; + string lastLabel = list->getLabel(); + while((list != NULL) && (userLabels.size() != 0)) { + if(userLabels.count(list->getLabel()) == 1){ + processedLabels.insert(list->getLabel()); + userLabels.erase(list->getLabel()); + break; + } + + if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { + processedLabels.insert(list->getLabel()); + userLabels.erase(list->getLabel()); + delete list; + list = input.getListVector(lastLabel); + break; + } + lastLabel = list->getLabel(); + delete list; + list = input.getListVector(); + } + if (userLabels.size() != 0) { + m->mothurOut("Your file does not include the label " + thisLabel + ". I will use " + lastLabel + "."); m->mothurOutEndLine(); + list = input.getListVector(lastLabel); + } + } + + //if groupfile is given then use it + GroupMap* groupMap; + if (groupfile != "") { + groupMap = new GroupMap(groupfile); groupMap->readMap(); + SharedUtil util; + util.setGroups(Groups, groupMap->namesOfGroups); + m->openOutputFile(outputGroupFileName, outGroup); + } + + + if (list != NULL) { + //make a new list vector + ListVector newList; + newList.setLabel(list->getLabel()); + + //for each bin + for (int i = 0; i < list->getNumBins(); i++) { + if (m->control_pressed) { if (groupfile != "") { delete groupMap; outGroup.close(); remove(outputGroupFileName.c_str()); } out.close(); remove(outputFileName.c_str()); return 0; } + + //parse out names that are in accnos file + string binnames = list->get(i); + vector names; + string saveBinNames = binnames; + m->splitAtComma(binnames, names); + + vector newGroupFile; + if (groupfile != "") { + vector newNames; + saveBinNames = ""; + for(int k = 0; k < names.size(); k++) { + string group = groupMap->getGroup(names[k]); + + if (m->inUsersGroups(group, Groups)) { + newGroupFile.push_back(names[k] + "\t" + group); + + newNames.push_back(names[k]); + saveBinNames += names[k] + ","; + } + } + names = newNames; + saveBinNames = saveBinNames.substr(0, saveBinNames.length()-1); + } + + if (names.size() > nseqs) { //keep bin + newList.push_back(saveBinNames); + for(int k = 0; k < newGroupFile.size(); k++) { outGroup << newGroupFile[k] << endl; } + } + } + + //print new listvector + if (newList.getNumBins() != 0) { + wroteSomething = true; + newList.print(out); + } + } + + out.close(); + if (groupfile != "") { outGroup.close(); outputTypes["group"].push_back(outputGroupFileName); outputNames.push_back(outputGroupFileName); } + + if (wroteSomething == false) { m->mothurOut("Your file contains only rare sequences."); m->mothurOutEndLine(); } + outputTypes["list"].push_back(outputFileName); outputNames.push_back(outputFileName); + + return 0; + } + catch(exception& e) { + m->errorOut(e, "RemoveRareCommand", "processList"); + exit(1); + } +} +//********************************************************************************************************************** +int RemoveRareCommand::processSabund(){ + try { + string thisOutputDir = outputDir; + if (outputDir == "") { thisOutputDir += m->hasPath(sabundfile); } + string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(sabundfile)) + "pick" + m->getExtension(sabundfile); + outputTypes["sabund"].push_back(outputFileName); outputNames.push_back(outputFileName); + + ofstream out; + m->openOutputFile(outputFileName, out); + + //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. + InputData input(sabundfile, "sabund"); + SAbundVector* sabund = input.getSAbundVector(); + string lastLabel = sabund->getLabel(); + set processedLabels; + set userLabels = labels; + + while((sabund != NULL) && ((allLines == 1) || (userLabels.size() != 0))) { + + if (m->control_pressed) { delete sabund; out.close(); return 0; } + + if(allLines == 1 || labels.count(sabund->getLabel()) == 1){ + + m->mothurOut(sabund->getLabel()); m->mothurOutEndLine(); + processedLabels.insert(sabund->getLabel()); + userLabels.erase(sabund->getLabel()); + + if (sabund->getMaxRank() > nseqs) { + for(int i = 1; i <=nseqs; i++) { sabund->set(i, 0); } + }else { sabund->clear(); } + + if (sabund->getNumBins() > 0) { sabund->print(out); } + } + + if ((m->anyLabelsToProcess(sabund->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { + string saveLabel = sabund->getLabel(); + + delete sabund; + sabund = input.getSAbundVector(lastLabel); + + m->mothurOut(sabund->getLabel()); m->mothurOutEndLine(); + processedLabels.insert(sabund->getLabel()); + userLabels.erase(sabund->getLabel()); + + if (sabund->getMaxRank() > nseqs) { + for(int i = 1; i <=nseqs; i++) { sabund->set(i, 0); } + }else { sabund->clear(); } + + if (sabund->getNumBins() > 0) { sabund->print(out); } + + //restore real lastlabel to save below + sabund->setLabel(saveLabel); + } + + lastLabel = sabund->getLabel(); + + delete sabund; + sabund = input.getSAbundVector(); + } + + if (m->control_pressed) { out.close(); return 0; } + + //output error messages about any remaining user labels + set::iterator it; + bool needToRun = false; + for (it = userLabels.begin(); it != userLabels.end(); it++) { + m->mothurOut("Your file does not include the label " + *it); + if (processedLabels.count(lastLabel) != 1) { + m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine(); + needToRun = true; + }else { + m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine(); + } + } + + //run last label if you need to + if (needToRun == true) { + if (sabund != NULL) { delete sabund; } + sabund = input.getSAbundVector(lastLabel); + + m->mothurOut(sabund->getLabel()); m->mothurOutEndLine(); + + if (sabund->getMaxRank() > nseqs) { + for(int i = 1; i <=nseqs; i++) { sabund->set(i, 0); } + }else { sabund->clear(); } + + if (sabund->getNumBins() > 0) { sabund->print(out); } + + delete sabund; + } + + return 0; + } + catch(exception& e) { + m->errorOut(e, "RemoveRareCommand", "processSabund"); + exit(1); + } +} +//********************************************************************************************************************** +int RemoveRareCommand::processRabund(){ + try { + string thisOutputDir = outputDir; + if (outputDir == "") { thisOutputDir += m->hasPath(rabundfile); } + string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(rabundfile)) + "pick" + m->getExtension(rabundfile); + outputTypes["rabund"].push_back(outputFileName); outputNames.push_back(outputFileName); + + ofstream out; + m->openOutputFile(outputFileName, out); + + //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. + InputData input(rabundfile, "rabund"); + RAbundVector* rabund = input.getRAbundVector(); + string lastLabel = rabund->getLabel(); + set processedLabels; + set userLabels = labels; + + while((rabund != NULL) && ((allLines == 1) || (userLabels.size() != 0))) { + + if (m->control_pressed) { delete rabund; out.close(); return 0; } + + if(allLines == 1 || labels.count(rabund->getLabel()) == 1){ + + m->mothurOut(rabund->getLabel()); m->mothurOutEndLine(); + processedLabels.insert(rabund->getLabel()); + userLabels.erase(rabund->getLabel()); + + RAbundVector newRabund; newRabund.setLabel(rabund->getLabel()); + for (int i = 0; i < rabund->getNumBins(); i++) { + if (rabund->get(i) > nseqs) { + newRabund.push_back(rabund->get(i)); + } + } + if (newRabund.getNumBins() > 0) { newRabund.print(out); } + } + + if ((m->anyLabelsToProcess(rabund->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { + string saveLabel = rabund->getLabel(); + + delete rabund; + rabund = input.getRAbundVector(lastLabel); + + m->mothurOut(rabund->getLabel()); m->mothurOutEndLine(); + processedLabels.insert(rabund->getLabel()); + userLabels.erase(rabund->getLabel()); + + RAbundVector newRabund; newRabund.setLabel(rabund->getLabel()); + for (int i = 0; i < rabund->getNumBins(); i++) { + if (rabund->get(i) > nseqs) { + newRabund.push_back(rabund->get(i)); + } + } + if (newRabund.getNumBins() > 0) { newRabund.print(out); } + + //restore real lastlabel to save below + rabund->setLabel(saveLabel); + } + + lastLabel = rabund->getLabel(); + + delete rabund; + rabund = input.getRAbundVector(); + } + + if (m->control_pressed) { out.close(); return 0; } + + //output error messages about any remaining user labels + set::iterator it; + bool needToRun = false; + for (it = userLabels.begin(); it != userLabels.end(); it++) { + m->mothurOut("Your file does not include the label " + *it); + if (processedLabels.count(lastLabel) != 1) { + m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine(); + needToRun = true; + }else { + m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine(); + } + } + + //run last label if you need to + if (needToRun == true) { + if (rabund != NULL) { delete rabund; } + rabund = input.getRAbundVector(lastLabel); + + m->mothurOut(rabund->getLabel()); m->mothurOutEndLine(); + + RAbundVector newRabund; newRabund.setLabel(rabund->getLabel()); + for (int i = 0; i < rabund->getNumBins(); i++) { + if (rabund->get(i) > nseqs) { + newRabund.push_back(rabund->get(i)); + } + } + if (newRabund.getNumBins() > 0) { newRabund.print(out); } + + delete rabund; + } + + return 0; + } + catch(exception& e) { + m->errorOut(e, "RemoveRareCommand", "processSabund"); + exit(1); + } +} +//********************************************************************************************************************** + + + + diff --git a/removerarecommand.h b/removerarecommand.h new file mode 100644 index 0000000..7db54b4 --- /dev/null +++ b/removerarecommand.h @@ -0,0 +1,50 @@ +#ifndef REMOVERARECOMMAND_H +#define REMOVERARECOMMAND_H + +/* + * removerarecommand.h + * mothur + * + * Created by westcott on 1/21/11. + * Copyright 2011 Schloss Lab. All rights reserved. + * + */ + + +#include "command.hpp" +#include "listvector.hpp" + +class RemoveRareCommand : public Command { + +public: + + RemoveRareCommand(string); + RemoveRareCommand(); + ~RemoveRareCommand(){} + vector getRequiredParameters(); + vector getValidParameters(); + vector getRequiredFiles(); + map > getOutputFiles() { return outputTypes; } + int execute(); + void help(); + +private: + string sabundfile, rabundfile, sharedfile, groupfile, listfile, outputDir, groups, label; + int nseqs, allLines; + bool abort, byGroup; + vector outputNames, Groups; + set labels; + map > outputTypes; + + int processSabund(); + int processRabund(); + int processList(); + int processShared(); + +}; + +#endif + + + + diff --git a/shhhercommand.cpp b/shhhercommand.cpp index d1732cd..63df681 100644 --- a/shhhercommand.cpp +++ b/shhhercommand.cpp @@ -838,10 +838,10 @@ void ShhherCommand::getJointLookUp(){ for(int i=0;i TreeGroupCommand::getValidParameters(){ @@ -210,7 +241,13 @@ TreeGroupCommand::TreeGroupCommand(string option) { int i; for (i=0; iisValidCalculator("treegroup", Estimators[i]) == true) { - if (Estimators[i] == "jabund") { + if (Estimators[i] == "sharedsobs") { + treeCalculators.push_back(new SharedSobsCS()); + }else if (Estimators[i] == "sharedchao") { + treeCalculators.push_back(new SharedChao1()); + }else if (Estimators[i] == "sharedace") { + treeCalculators.push_back(new SharedAce()); + }else if (Estimators[i] == "jabund") { treeCalculators.push_back(new JAbund()); }else if (Estimators[i] == "sorabund") { treeCalculators.push_back(new SorAbund()); @@ -226,10 +263,62 @@ TreeGroupCommand::TreeGroupCommand(string option) { treeCalculators.push_back(new ThetaYC()); }else if (Estimators[i] == "thetan") { treeCalculators.push_back(new ThetaN()); + }else if (Estimators[i] == "kstest") { + treeCalculators.push_back(new KSTest()); + }else if (Estimators[i] == "sharednseqs") { + treeCalculators.push_back(new SharedNSeqs()); + }else if (Estimators[i] == "ochiai") { + treeCalculators.push_back(new Ochiai()); + }else if (Estimators[i] == "anderberg") { + treeCalculators.push_back(new Anderberg()); + }else if (Estimators[i] == "kulczynski") { + treeCalculators.push_back(new Kulczynski()); + }else if (Estimators[i] == "kulczynskicody") { + treeCalculators.push_back(new KulczynskiCody()); + }else if (Estimators[i] == "lennon") { + treeCalculators.push_back(new Lennon()); }else if (Estimators[i] == "morisitahorn") { treeCalculators.push_back(new MorHorn()); }else if (Estimators[i] == "braycurtis") { treeCalculators.push_back(new BrayCurtis()); + }else if (Estimators[i] == "whittaker") { + treeCalculators.push_back(new Whittaker()); + }else if (Estimators[i] == "odum") { + treeCalculators.push_back(new Odum()); + }else if (Estimators[i] == "canberra") { + treeCalculators.push_back(new Canberra()); + }else if (Estimators[i] == "structeuclidean") { + treeCalculators.push_back(new StructEuclidean()); + }else if (Estimators[i] == "structchord") { + treeCalculators.push_back(new StructChord()); + }else if (Estimators[i] == "hellinger") { + treeCalculators.push_back(new Hellinger()); + }else if (Estimators[i] == "manhattan") { + treeCalculators.push_back(new Manhattan()); + }else if (Estimators[i] == "structpearson") { + treeCalculators.push_back(new StructPearson()); + }else if (Estimators[i] == "soergel") { + treeCalculators.push_back(new Soergel()); + }else if (Estimators[i] == "spearman") { + treeCalculators.push_back(new Spearman()); + }else if (Estimators[i] == "structkulczynski") { + treeCalculators.push_back(new StructKulczynski()); + }else if (Estimators[i] == "speciesprofile") { + treeCalculators.push_back(new SpeciesProfile()); + }else if (Estimators[i] == "hamming") { + treeCalculators.push_back(new Hamming()); + }else if (Estimators[i] == "structchi2") { + treeCalculators.push_back(new StructChi2()); + }else if (Estimators[i] == "gower") { + treeCalculators.push_back(new Gower()); + }else if (Estimators[i] == "memchi2") { + treeCalculators.push_back(new MemChi2()); + }else if (Estimators[i] == "memchord") { + treeCalculators.push_back(new MemChord()); + }else if (Estimators[i] == "memeuclidean") { + treeCalculators.push_back(new MemEuclidean()); + }else if (Estimators[i] == "mempearson") { + treeCalculators.push_back(new MemPearson()); } } } diff --git a/validcalculator.cpp b/validcalculator.cpp index 6242d5a..6583bbe 100644 --- a/validcalculator.cpp +++ b/validcalculator.cpp @@ -447,6 +447,9 @@ void ValidCalculators::initialVennShared() { /********************************************************************/ void ValidCalculators::initialTreeGroups() { try { + treegroup["sharedsobs"] = "sharedsobs"; + treegroup["sharedchao"] = "sharedchao"; + treegroup["sharedace"] = "sharedace"; treegroup["jabund"] = "jabund"; treegroup["sorabund"] = "sorabund"; treegroup["jclass"] = "jclass"; @@ -455,8 +458,35 @@ void ValidCalculators::initialTreeGroups() { treegroup["sorest"] = "sorest"; treegroup["thetayc"] = "thetayc"; treegroup["thetan"] = "thetan"; + treegroup["kstest"] = "kstest"; + treegroup["whittaker"] = "whittaker"; + treegroup["sharednseqs"] = "sharednseqs"; + treegroup["ochiai"] = "ochiai"; + treegroup["anderberg"] = "anderberg"; + treegroup["kulczynski"] = "kulczynski"; + treegroup["kulczynskicody"] = "kulczynskicody"; + treegroup["lennon"] = "lennon"; treegroup["morisitahorn"] = "morisitahorn"; treegroup["braycurtis"] = "braycurtis"; + treegroup["odum"] = "odum"; + treegroup["canberra"] = "canberra"; + treegroup["structeuclidean"] = "structeuclidean"; + treegroup["structchord"] = "structchord"; + treegroup["hellinger"] = "hellinger"; + treegroup["manhattan"] = "manhattan"; + treegroup["structpearson"] = "structpearson"; + treegroup["structkulczynski"] = "structkulczynski"; + treegroup["structchi2"] = "structchi2"; + treegroup["soergel"] = "soergel"; + treegroup["spearman"] = "spearman"; + treegroup["speciesprofile"] = "speciesprofile"; + treegroup["hamming"] = "hamming"; + treegroup["gower"] = "gower"; + treegroup["memchi2"] = "memchi2"; + treegroup["memchord"] = "memchord"; + treegroup["memeuclidean"] = "memeuclidean"; + treegroup["mempearson"] = "mempearson"; + } catch(exception& e) { m->errorOut(e, "ValidCalculator", "initialTreeGroups");