From: westcott Date: Thu, 25 Feb 2010 12:05:21 +0000 (+0000) Subject: added parse.list command X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=commitdiff_plain;h=a218321731df14d231bbc08e79906f757cf1540d added parse.list command --- diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj index 51a592b..5900f0a 100644 --- a/Mothur.xcodeproj/project.pbxproj +++ b/Mothur.xcodeproj/project.pbxproj @@ -164,6 +164,7 @@ A70B53AA0F4CD7AD0064797E /* getgroupcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A70B53A40F4CD7AD0064797E /* getgroupcommand.cpp */; }; A70B53AB0F4CD7AD0064797E /* getlabelcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A70B53A60F4CD7AD0064797E /* getlabelcommand.cpp */; }; A70DECD91063D8B40057C03C /* secondarystructurecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A70DECD81063D8B40057C03C /* secondarystructurecommand.cpp */; }; + A71CB8A311354B9F00848EF7 /* parselistscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A71CB8A211354B9F00848EF7 /* parselistscommand.cpp */; }; A727E84A10D14568001A8432 /* readblast.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A727E84910D14568001A8432 /* readblast.cpp */; }; A7283FF81056CAE100D0CC69 /* chimeracheckrdp.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7283FF71056CAE100D0CC69 /* chimeracheckrdp.cpp */; }; A729ACD010848E6100139801 /* hclustercommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A729ACCF10848E6100139801 /* hclustercommand.cpp */; }; @@ -545,6 +546,8 @@ A70B53A70F4CD7AD0064797E /* getlabelcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = getlabelcommand.h; sourceTree = SOURCE_ROOT; }; A70DECD71063D8B40057C03C /* secondarystructurecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = secondarystructurecommand.h; sourceTree = SOURCE_ROOT; }; A70DECD81063D8B40057C03C /* secondarystructurecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = secondarystructurecommand.cpp; sourceTree = SOURCE_ROOT; }; + A71CB8A111354B9F00848EF7 /* parselistscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = parselistscommand.h; sourceTree = SOURCE_ROOT; }; + A71CB8A211354B9F00848EF7 /* parselistscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = parselistscommand.cpp; sourceTree = SOURCE_ROOT; }; A727E84810D14568001A8432 /* readblast.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = readblast.h; sourceTree = SOURCE_ROOT; }; A727E84910D14568001A8432 /* readblast.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = readblast.cpp; sourceTree = SOURCE_ROOT; }; A7283FF61056CAE100D0CC69 /* chimeracheckrdp.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chimeracheckrdp.h; sourceTree = SOURCE_ROOT; }; @@ -956,6 +959,8 @@ 375873F60F7D649C0040F377 /* nocommands.cpp */, A704E8A11106045D00870157 /* otuhierarchycommand.h */, A704E8A21106045D00870157 /* otuhierarchycommand.cpp */, + A71CB8A111354B9F00848EF7 /* parselistscommand.h */, + A71CB8A211354B9F00848EF7 /* parselistscommand.cpp */, 3792946E0F2E191800B9034A /* parsimonycommand.h */, 3792946F0F2E191800B9034A /* parsimonycommand.cpp */, A7D176CD10F2558500159497 /* pcacommand.h */, @@ -1343,6 +1348,7 @@ A70B00C8110885EB002F453A /* setdircommand.cpp in Sources */, A794201111107897003AECCD /* distancedb.cpp in Sources */, A7E8A22F1125939F0011D39C /* chimerarealigner.cpp in Sources */, + A71CB8A311354B9F00848EF7 /* parselistscommand.cpp in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; diff --git a/cluster.cpp b/cluster.cpp index e360624..c6a9ca4 100644 --- a/cluster.cpp +++ b/cluster.cpp @@ -213,8 +213,11 @@ void Cluster::update(double& cutOFF){ } //if not merged it you need it for warning if ((!merged) && (method == "average")) { - mothurOut("Warning: trying to merge cell " + toString(rowCells[i]->row+1) + " " + toString(rowCells[i]->column+1) + " distance " + toString(rowCells[i]->dist) + " with value above cutoff. Results may vary from using cutoff at cluster command instead of read.dist."); mothurOutEndLine(); - if (cutOFF > rowCells[i]->dist) { cutOFF = rowCells[i]->dist; mothurOut("changing cutoff to " + toString(cutOFF)); mothurOutEndLine(); } + //mothurOut("Warning: trying to merge cell " + toString(rowCells[i]->row+1) + " " + toString(rowCells[i]->column+1) + " distance " + toString(rowCells[i]->dist) + " with value above cutoff. Results may vary from using cutoff at cluster command instead of read.dist."); mothurOutEndLine(); + if (cutOFF > rowCells[i]->dist) { + cutOFF = rowCells[i]->dist; + //mothurOut("changing cutoff to " + toString(cutOFF)); mothurOutEndLine(); + } } removeCell(rowCells[i], i , -1); @@ -230,8 +233,11 @@ void Cluster::update(double& cutOFF){ if (foundCol[i] == 0) { if (method == "average") { if (!((colCells[i]->row == smallRow) && (colCells[i]->column == smallCol))) { - mothurOut("Warning: merging cell " + toString(colCells[i]->row+1) + " " + toString(colCells[i]->column+1) + " distance " + toString(colCells[i]->dist) + " value above cutoff. Results may vary from using cutoff at cluster command instead of read.dist."); mothurOutEndLine(); - if (cutOFF > colCells[i]->dist) { cutOFF = colCells[i]->dist; mothurOut("changing cutoff to " + toString(cutOFF)); mothurOutEndLine(); } + //mothurOut("Warning: merging cell " + toString(colCells[i]->row+1) + " " + toString(colCells[i]->column+1) + " distance " + toString(colCells[i]->dist) + " value above cutoff. Results may vary from using cutoff at cluster command instead of read.dist."); mothurOutEndLine(); + if (cutOFF > colCells[i]->dist) { + cutOFF = colCells[i]->dist; + //mothurOut("changing cutoff to " + toString(cutOFF)); mothurOutEndLine(); + } } } removeCell(colCells[i], -1, i); diff --git a/clustercommand.cpp b/clustercommand.cpp index d14af10..e4e2d21 100644 --- a/clustercommand.cpp +++ b/clustercommand.cpp @@ -146,6 +146,7 @@ int ClusterCommand::execute(){ print_start = true; start = time(NULL); loops = 0; + double saveCutoff = cutoff; while (matrix->getSmallDist() < cutoff && matrix->getNNodes() > 0){ if (print_start && isTrue(timing)) { @@ -204,9 +205,14 @@ int ClusterCommand::execute(){ sabundFile.close(); rabundFile.close(); listFile.close(); + + if (saveCutoff != cutoff) { mothurOut("changed cutoff to " + toString(cutoff)); mothurOutEndLine(); } + //if (isTrue(timing)) { mothurOut("It took " + toString(time(NULL) - estart) + " seconds to cluster"); mothurOutEndLine(); //} + + return 0; } catch(exception& e) { diff --git a/commandfactory.cpp b/commandfactory.cpp index 9648474..baef216 100644 --- a/commandfactory.cpp +++ b/commandfactory.cpp @@ -64,6 +64,7 @@ #include "pcacommand.h" #include "otuhierarchycommand.h" #include "setdircommand.h" +#include "parselistscommand.h" /*******************************************************/ @@ -138,6 +139,7 @@ CommandFactory::CommandFactory(){ commands["otu.hierarchy"] = "otu.hierarchy"; commands["set.dir"] = "set.dir"; commands["merge.files"] = "merge.files"; + commands["parse.list"] = "parse.list"; } /***********************************************************/ @@ -220,7 +222,8 @@ Command* CommandFactory::getCommand(string commandName, string optionString){ else if(commandName == "pre.cluster") { command = new PreClusterCommand(optionString); } else if(commandName == "pcoa") { command = new PCACommand(optionString); } else if(commandName == "otu.hierarchy") { command = new OtuHierarchyCommand(optionString); } - else if(commandName == "set.dir") { command = new SetDirectoryCommand(optionString); } + else if(commandName == "set.dir") { command = new SetDirectoryCommand(optionString); } + else if(commandName == "parse.list") { command = new ParseListCommand(optionString); } else { command = new NoCommand(optionString); } return command; diff --git a/parselistscommand.cpp b/parselistscommand.cpp new file mode 100644 index 0000000..ce014fc --- /dev/null +++ b/parselistscommand.cpp @@ -0,0 +1,277 @@ +/* + * parselistcommand.cpp + * Mothur + * + * Created by westcott on 2/24/10. + * Copyright 2010 Schloss Lab. All rights reserved. + * + */ + +#include "parselistscommand.h" + +//********************************************************************************************************************** +ParseListCommand::ParseListCommand(string option) { + try { + abort = false; + allLines = 1; + + //allow user to run help + if(option == "help") { help(); abort = true; } + + else { + //valid paramters for this command + string Array[] = {"list","group", "label", "outputdir","inputdir"}; + vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + + OptionParser parser(option); + map parameters = parser.getParameters(); + + ValidParameters validParameter; + map::iterator it; + + //check to make sure all parameters are valid for command + for (it = parameters.begin(); it != parameters.end(); it++) { + if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; } + } + + //if the user changes the input directory command factory will send this info to us in the output parameter + string inputDir = validParameter.validFile(parameters, "inputdir", false); + if (inputDir == "not found"){ inputDir = ""; } + else { + string path; + it = parameters.find("list"); + //user has given a template file + if(it != parameters.end()){ + path = hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["list"] = inputDir + it->second; } + } + + it = parameters.find("group"); + //user has given a template file + if(it != parameters.end()){ + path = hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["group"] = inputDir + it->second; } + } + } + + + //if the user changes the output directory command factory will send this info to us in the output parameter + outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; } + + //check for required parameters + listfile = validParameter.validFile(parameters, "list", true); + if (listfile == "not open") { abort = true; } + else if (listfile == "not found") { listfile = ""; } + + groupfile = validParameter.validFile(parameters, "group", true); + if (groupfile == "not open") { abort = true; } + else if (groupfile == "not found") { groupfile = ""; } + else { + groupMap = new GroupMap(groupfile); + + int error = groupMap->readMap(); + if (error == 1) { abort = true; } + } + + //do you have all files needed + if ((listfile == "") || (groupfile == "")) { mothurOut("You must enter both a listfile and groupfile for the parse.list command. "); mothurOutEndLine(); abort = true; } + + //check for optional parameter and set defaults + // ...at some point should added some additional type checking... + label = validParameter.validFile(parameters, "label", false); + if (label == "not found") { label = ""; allLines = 1; } + else { + if(label != "all") { splitAtDash(label, labels); allLines = 0; } + else { allLines = 1; } + } + } + + } + catch(exception& e) { + errorOut(e, "ParseListCommand", "ParseListCommand"); + exit(1); + } +} +//********************************************************************************************************************** +void ParseListCommand::help(){ + try { + mothurOut("The parse.list command reads a list and group file and generates a list file for each group in the groupfile \n"); + mothurOut("The parse.list command parameters are list, group and label.\n"); + mothurOut("The list and group parameters are required.\n"); + mothurOut("The label parameter is used to read specific labels in your input you want to use.\n"); + mothurOut("The parse.list command should be used in the following format: parse.list(list=yourListFile, group=yourGroupFile, label=yourLabels).\n"); + mothurOut("Example: parse.list(list=abrecovery.fn.list, group=abrecovery.groups, label=0.03).\n"); + mothurOut("Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListfile).\n\n"); + + } + catch(exception& e) { + errorOut(e, "ParseListCommand", "help"); + exit(1); + } +} +//********************************************************************************************************************** +ParseListCommand::~ParseListCommand(){} +//********************************************************************************************************************** +int ParseListCommand::execute(){ + try { + + if (abort == true) { return 0; } + + //set fileroot + string fileroot = outputDir + getRootName(getSimpleName(listfile)); + + //fill filehandles with neccessary ofstreams + int i; + ofstream* temp; + for (i=0; inamesOfGroups.size(); i++) { + temp = new ofstream; + filehandles[groupMap->namesOfGroups[i]] = temp; + + string filename = fileroot + groupMap->namesOfGroups[i] + ".list"; + openOutputFile(filename, *temp); + } + + //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. + set processedLabels; + set userLabels = labels; + + input = new InputData(listfile, "list"); + list = input->getListVector(); + string lastLabel = list->getLabel(); + + while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) { + + if(allLines == 1 || labels.count(list->getLabel()) == 1){ + + parse(list); + mothurOut(list->getLabel()); mothurOutEndLine(); + + processedLabels.insert(list->getLabel()); + userLabels.erase(list->getLabel()); + } + + if ((anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { + string saveLabel = list->getLabel(); + + delete list; + list = input->getListVector(lastLabel); //get new list vector to process + + parse(list); + mothurOut(list->getLabel()); mothurOutEndLine(); + + processedLabels.insert(list->getLabel()); + userLabels.erase(list->getLabel()); + + //restore real lastlabel to save below + list->setLabel(saveLabel); + } + + + lastLabel = list->getLabel(); + + delete list; + list = input->getListVector(); //get new list vector to process + } + + //output error messages about any remaining user labels + set::iterator it; + bool needToRun = false; + for (it = userLabels.begin(); it != userLabels.end(); it++) { + mothurOut("Your file does not include the label " + *it); + if (processedLabels.count(lastLabel) != 1) { + mothurOut(". I will use " + lastLabel + "."); mothurOutEndLine(); + needToRun = true; + }else { + mothurOut(". Please refer to " + lastLabel + "."); mothurOutEndLine(); + } + + } + + //run last label if you need to + if (needToRun == true) { + if (list != NULL) { delete list; } + list = input->getListVector(lastLabel); //get new list vector to process + + parse(list); + mothurOut(list->getLabel()); mothurOutEndLine(); + + delete list; + } + + for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { + (*(filehandles[it3->first])).close(); + delete it3->second; + } + + delete groupMap; + + return 0; + } + catch(exception& e) { + errorOut(e, "ParseListCommand", "execute"); + exit(1); + } +} +/**********************************************************************************************************************/ +void ParseListCommand::parse(ListVector* thisList) { + try { + + map groupVector; + map::iterator itGroup; + map groupNumBins; + + //print label + for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { + groupNumBins[it3->first] = 0; + groupVector[it3->first] = ""; + } + + + for (int i = 0; i < thisList->getNumBins(); i++) { + + map groupBins; + string bin = list->get(i); + + vector names; + splitAtComma(bin, names); //parses bin into individual sequence names + + //parse bin into list of sequences in each group + for (int j = 0; j < names.size(); j++) { + string group = groupMap->getGroup(names[j]); + + if (group == "not found") { mothurOut(names[j] + " is not in your groupfile. please correct."); mothurOutEndLine(); exit(1); } + + itGroup = groupBins.find(group); + if(itGroup == groupBins.end()) { + groupBins[group] = names[j]; //add first name + groupNumBins[group]++; + }else{ //add another name + groupBins[group] = groupBins[group] + "," + names[j]; + } + } + + //print parsed bin info to files + for (itGroup = groupBins.begin(); itGroup != groupBins.end(); itGroup++) { + groupVector[itGroup->first] += itGroup->second + '\t'; + } + + } + + //end list vector + for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { + (*(filehandles[it3->first])) << thisList->getLabel() << '\t' << groupNumBins[it3->first] << '\t' << groupVector[it3->first] << endl; // label numBins listvector for that group + } + + + } + catch(exception& e) { + errorOut(e, "ParseListCommand", "parse"); + exit(1); + } +} + +/**********************************************************************************************************************/ + + diff --git a/parselistscommand.h b/parselistscommand.h new file mode 100644 index 0000000..8f91017 --- /dev/null +++ b/parselistscommand.h @@ -0,0 +1,47 @@ +#ifndef PARSELISTCOMMAND_H +#define PARSELISTCOMMAND_H +/* + * parselistcommand.h + * Mothur + * + * Created by westcott on 2/24/10. + * Copyright 2010 Schloss Lab. All rights reserved. + * + */ + +#include "command.hpp" +#include "groupmap.h" +#include "inputdata.h" +#include "listvector.hpp" + +/***************************************************************************************/ + +class ParseListCommand : public Command { + +public: + ParseListCommand(string); + ~ParseListCommand(); + int execute(); + void help(); + +private: + void parse(ListVector*); + + ListVector* list; + GroupMap* groupMap; + InputData* input; + + ofstream out; + string outputDir, listfile, groupfile, label; + set labels; + bool abort, allLines; + + map filehandles; + map::iterator it3; + +}; + +/***************************************************************************************/ + +#endif +