From 5b72d1cf3fa48730e5bb70d59cced1e43e1fe424 Mon Sep 17 00:00:00 2001 From: Sarah Westcott Date: Tue, 11 Sep 2012 15:02:46 -0400 Subject: [PATCH] added count file to get.groups and remove.groups. added shortcut parameter to classify.seqs. --- bayesian.cpp | 40 +++++---- bayesian.h | 2 +- classify.cpp | 6 +- classify.h | 2 +- classifyseqscommand.cpp | 10 ++- classifyseqscommand.h | 11 +-- counttable.cpp | 1 + getgroupscommand.cpp | 169 ++++++++++++++++++++++++++++++++----- getgroupscommand.h | 3 +- knn.cpp | 1 + mgclustercommand.cpp | 12 ++- mothurout.cpp | 26 ++++++ mothurout.h | 1 + phylotree.cpp | 2 +- removegroupscommand.cpp | 181 +++++++++++++++++++++++++++++++++++----- removegroupscommand.h | 3 +- sharedcommand.cpp | 8 +- sharedutilities.cpp | 2 +- 18 files changed, 401 insertions(+), 79 deletions(-) diff --git a/bayesian.cpp b/bayesian.cpp index 1dc3833..bccf0ce 100644 --- a/bayesian.cpp +++ b/bayesian.cpp @@ -12,13 +12,14 @@ #include "phylosummary.h" #include "referencedb.h" /**************************************************************************************************/ -Bayesian::Bayesian(string tfile, string tempFile, string method, int ksize, int cutoff, int i, int tid, bool f) : +Bayesian::Bayesian(string tfile, string tempFile, string method, int ksize, int cutoff, int i, int tid, bool f, bool sh) : Classify(), kmerSize(ksize), confidenceThreshold(cutoff), iters(i) { try { ReferenceDB* rdb = ReferenceDB::getInstance(); threadID = tid; flip = f; + shortcuts = sh; string baseName = tempFile; if (baseName == "saved") { baseName = rdb->getSavedReference(); } @@ -63,7 +64,7 @@ Classify(), kmerSize(ksize), confidenceThreshold(cutoff), iters(i) { } saveIn.close(); } - +FilesGood = false; if(probFileTest && probFileTest2 && phyloTreeTest && probFileTest3 && FilesGood){ if (tempFile == "saved") { m->mothurOutEndLine(); m->mothurOut("Using sequences from " + rdb->getSavedReference() + " that are saved in memory."); m->mothurOutEndLine(); } @@ -113,7 +114,7 @@ Classify(), kmerSize(ksize), confidenceThreshold(cutoff), iters(i) { WordPairDiffArr.resize(numKmers); for (int j = 0; j < wordGenusProb.size(); j++) { wordGenusProb[j].resize(genusNodes.size()); } - ofstream out; + ofstream out; ofstream out2; #ifdef USE_MPI @@ -124,17 +125,19 @@ Classify(), kmerSize(ksize), confidenceThreshold(cutoff), iters(i) { #endif - m->openOutputFile(probFileName, out); + if (shortcuts) { + m->openOutputFile(probFileName, out); - //output mothur version - out << "#" << m->getVersion() << endl; + //output mothur version + out << "#" << m->getVersion() << endl; - out << numKmers << endl; + out << numKmers << endl; - m->openOutputFile(probFileName2, out2); + m->openOutputFile(probFileName2, out2); - //output mothur version - out2 << "#" << m->getVersion() << endl; + //output mothur version + out2 << "#" << m->getVersion() << endl; + } #ifdef USE_MPI } @@ -151,7 +154,7 @@ Classify(), kmerSize(ksize), confidenceThreshold(cutoff), iters(i) { if (pid == 0) { #endif - out << i << '\t'; + if (shortcuts) { out << i << '\t'; } #ifdef USE_MPI } @@ -188,7 +191,7 @@ Classify(), kmerSize(ksize), confidenceThreshold(cutoff), iters(i) { if (pid == 0) { #endif - out << k << '\t' << wordGenusProb[i][k] << '\t' ; + if (shortcuts) { out << k << '\t' << wordGenusProb[i][k] << '\t' ; } #ifdef USE_MPI } @@ -204,8 +207,10 @@ Classify(), kmerSize(ksize), confidenceThreshold(cutoff), iters(i) { if (pid == 0) { #endif - out << endl; - out2 << probabilityInTemplate << '\t' << numNotZero << '\t' << log(probabilityInTemplate) << endl; + if (shortcuts) { + out << endl; + out2 << probabilityInTemplate << '\t' << numNotZero << '\t' << log(probabilityInTemplate) << endl; + } #ifdef USE_MPI } @@ -218,9 +223,10 @@ Classify(), kmerSize(ksize), confidenceThreshold(cutoff), iters(i) { if (pid == 0) { #endif - out.close(); - out2.close(); - + if (shortcuts) { + out.close(); + out2.close(); + } #ifdef USE_MPI } #endif diff --git a/bayesian.h b/bayesian.h index 7c88433..405fee3 100644 --- a/bayesian.h +++ b/bayesian.h @@ -18,7 +18,7 @@ class Bayesian : public Classify { public: - Bayesian(string, string, string, int, int, int, int, bool); + Bayesian(string, string, string, int, int, int, int, bool, bool); ~Bayesian(); string getTaxonomy(Sequence*); diff --git a/classify.cpp b/classify.cpp index 212e563..f44e66c 100644 --- a/classify.cpp +++ b/classify.cpp @@ -200,7 +200,8 @@ void Classify::generateDatabaseAndNames(string tfile, string tempFile, string me } fastaFile.close(); - database->generateDB(); + if ((method == "kmer") && (!shortcuts)) {;} //don't print + else {database->generateDB(); } }else if ((method == "kmer") && (!needToGenerate)) { ifstream kmerFileTest(kmerDBName.c_str()); @@ -260,9 +261,6 @@ int Classify::readTaxonomy(string file) { MPI_File inMPI; MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are MPI_Comm_size(MPI_COMM_WORLD, &processors); - - //char* inFileName = new char[file.length()]; - //memcpy(inFileName, file.c_str(), file.length()); char inFileName[1024]; strcpy(inFileName, file.c_str()); diff --git a/classify.h b/classify.h index 4e03547..6582be4 100644 --- a/classify.h +++ b/classify.h @@ -46,7 +46,7 @@ protected: string taxFile, templateFile, simpleTax; vector names; int threadID; - bool flip, flipped; + bool flip, flipped, shortcuts; int readTaxonomy(string); vector parseTax(string); diff --git a/classifyseqscommand.cpp b/classifyseqscommand.cpp index 43a021e..bab7740 100644 --- a/classifyseqscommand.cpp +++ b/classifyseqscommand.cpp @@ -34,6 +34,7 @@ vector ClassifySeqsCommand::setParameters(){ CommandParameter pprobs("probs", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pprobs); CommandParameter piters("iters", "Number", "", "100", "", "", "",false,true); parameters.push_back(piters); CommandParameter psave("save", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(psave); + CommandParameter pshortcuts("shortcuts", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pshortcuts); CommandParameter pnumwanted("numwanted", "Number", "", "10", "", "", "",false,true); parameters.push_back(pnumwanted); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir); @@ -557,6 +558,9 @@ ClassifySeqsCommand::ClassifySeqsCommand(string option) { temp = validParameter.validFile(parameters, "probs", false); if (temp == "not found"){ temp = "true"; } probs = m->isTrue(temp); + + temp = validParameter.validFile(parameters, "shortcuts", false); if (temp == "not found"){ temp = "true"; } + writeShortcuts = m->isTrue(temp); //temp = validParameter.validFile(parameters, "flip", false); if (temp == "not found"){ temp = "T"; } //flip = m->isTrue(temp); @@ -601,12 +605,12 @@ int ClassifySeqsCommand::execute(){ try { if (abort == true) { if (calledHelp) { return 0; } return 2; } - if(method == "bayesian"){ classify = new Bayesian(taxonomyFileName, templateFileName, search, kmerSize, cutoff, iters, rand(), flip); } + if(method == "bayesian"){ classify = new Bayesian(taxonomyFileName, templateFileName, search, kmerSize, cutoff, iters, rand(), flip, writeShortcuts); } else if(method == "knn"){ classify = new Knn(taxonomyFileName, templateFileName, search, kmerSize, gapOpen, gapExtend, match, misMatch, numWanted, rand()); } else { m->mothurOut(search + " is not a valid method option. I will run the command using bayesian."); m->mothurOutEndLine(); - classify = new Bayesian(taxonomyFileName, templateFileName, search, kmerSize, cutoff, iters, rand(), flip); + classify = new Bayesian(taxonomyFileName, templateFileName, search, kmerSize, cutoff, iters, rand(), flip, writeShortcuts); } if (m->control_pressed) { delete classify; return 0; } @@ -1004,7 +1008,7 @@ int ClassifySeqsCommand::createProcesses(string taxFileName, string tempTaxFile, string extension = ""; if (i != 0) { extension = toString(i) + ".temp"; processIDS.push_back(i); } - classifyData* tempclass = new classifyData((accnos + extension), probs, method, templateFileName, taxonomyFileName, (taxFileName + extension), (tempTaxFile + extension), filename, search, kmerSize, iters, numWanted, m, lines[i]->start, lines[i]->end, match, misMatch, gapOpen, gapExtend, cutoff, i, flip); + classifyData* tempclass = new classifyData((accnos + extension), probs, method, templateFileName, taxonomyFileName, (taxFileName + extension), (tempTaxFile + extension), filename, search, kmerSize, iters, numWanted, m, lines[i]->start, lines[i]->end, match, misMatch, gapOpen, gapExtend, cutoff, i, flip, writeShortcuts); pDataArray.push_back(tempclass); //MySeqSumThreadFunction is in header. It must be global or static to work with the threads. diff --git a/classifyseqscommand.h b/classifyseqscommand.h index 6d43dcb..6d11d92 100644 --- a/classifyseqscommand.h +++ b/classifyseqscommand.h @@ -74,7 +74,7 @@ private: string fastaFileName, templateFileName, countfile, distanceFileName, namefile, search, method, taxonomyFileName, outputDir, groupfile; int processors, kmerSize, numWanted, cutoff, iters; float match, misMatch, gapOpen, gapExtend; - bool abort, probs, save, flip, hasName, hasCount; + bool abort, probs, save, flip, hasName, hasCount, writeShortcuts; int driver(linePair*, string, string, string, string); int createProcesses(string, string, string, string); @@ -100,10 +100,10 @@ struct classifyData { MothurOut* m; float match, misMatch, gapOpen, gapExtend; int count, kmerSize, threadID, cutoff, iters, numWanted; - bool probs, flip; + bool probs, flip, writeShortcuts; classifyData(){} - classifyData(string acc, bool p, string me, string te, string tx, string a, string r, string f, string se, int ks, int i, int numW, MothurOut* mout, unsigned long long st, unsigned long long en, float ma, float misMa, float gapO, float gapE, int cut, int tid, bool fli) { + classifyData(string acc, bool p, string me, string te, string tx, string a, string r, string f, string se, int ks, int i, int numW, MothurOut* mout, unsigned long long st, unsigned long long en, float ma, float misMa, float gapO, float gapE, int cut, int tid, bool fli, bool wsh) { accnos = acc; taxonomyFileName = tx; templateFileName = te; @@ -127,6 +127,7 @@ struct classifyData { probs = p; count = 0; flip = fli; + writeShortcuts = wsh; } }; @@ -163,12 +164,12 @@ static DWORD WINAPI MyClassThreadFunction(LPVOID lpParam){ //make classify Classify* myclassify; - if(pDataArray->method == "bayesian"){ myclassify = new Bayesian(pDataArray->taxonomyFileName, pDataArray->templateFileName, pDataArray->search, pDataArray->kmerSize, pDataArray->cutoff, pDataArray->iters, pDataArray->threadID, pDataArray->flip); } + if(pDataArray->method == "bayesian"){ myclassify = new Bayesian(pDataArray->taxonomyFileName, pDataArray->templateFileName, pDataArray->search, pDataArray->kmerSize, pDataArray->cutoff, pDataArray->iters, pDataArray->threadID, pDataArray->flip, pDataArray->writeShortcuts); } else if(pDataArray->method == "knn"){ myclassify = new Knn(pDataArray->taxonomyFileName, pDataArray->templateFileName, pDataArray->search, pDataArray->kmerSize, pDataArray->gapOpen, pDataArray->gapExtend, pDataArray->match, pDataArray->misMatch, pDataArray->numWanted, pDataArray->threadID); } else { pDataArray->m->mothurOut(pDataArray->search + " is not a valid method option. I will run the command using bayesian."); pDataArray->m->mothurOutEndLine(); - myclassify = new Bayesian(pDataArray->taxonomyFileName, pDataArray->templateFileName, pDataArray->search, pDataArray->kmerSize, pDataArray->cutoff, pDataArray->iters, pDataArray->threadID, pDataArray->flip); + myclassify = new Bayesian(pDataArray->taxonomyFileName, pDataArray->templateFileName, pDataArray->search, pDataArray->kmerSize, pDataArray->cutoff, pDataArray->iters, pDataArray->threadID, pDataArray->flip, pDataArray->writeShortcuts); } if (pDataArray->m->control_pressed) { delete myclassify; return 0; } diff --git a/counttable.cpp b/counttable.cpp index 5307bee..bc9d4da 100644 --- a/counttable.cpp +++ b/counttable.cpp @@ -447,6 +447,7 @@ int CountTable::addGroup(string groupName) { counts[i] = newCounts; } hasGroups = true; + m->setAllGroups(groups); return 0; } diff --git a/getgroupscommand.cpp b/getgroupscommand.cpp index fe6f571..7585c12 100644 --- a/getgroupscommand.cpp +++ b/getgroupscommand.cpp @@ -18,8 +18,9 @@ vector GetGroupsCommand::setParameters(){ try { CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "FNGLT",false,false); parameters.push_back(pfasta); CommandParameter pshared("shared", "InputTypes", "", "", "none", "sharedGroup", "none",false,false); parameters.push_back(pshared); - CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname); - CommandParameter pgroup("group", "InputTypes", "", "", "none", "sharedGroup", "FNGLT",false,false); parameters.push_back(pgroup); + CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pname); + CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none",false,false); parameters.push_back(pcount); + CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "sharedGroup", "FNGLT",false,false); parameters.push_back(pgroup); CommandParameter pdesign("design", "InputTypes", "", "", "none", "sharedGroup", "FNGLT",false,false); parameters.push_back(pdesign); CommandParameter plist("list", "InputTypes", "", "", "none", "none", "FNGLT",false,false); parameters.push_back(plist); CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "none", "FNGLT",false,false); parameters.push_back(ptaxonomy); @@ -43,7 +44,7 @@ string GetGroupsCommand::getHelpString(){ string helpString = ""; helpString += "The get.groups command selects sequences from a specfic group or set of groups from the following file types: fasta, name, group, list, taxonomy, design or shared file.\n"; helpString += "It outputs a file containing the sequences in the those specified groups, or a sharedfile containing only those groups.\n"; - helpString += "The get.groups command parameters are accnos, fasta, name, group, list, taxonomy, shared, design and groups. The group parameter is required, unless you have a current group file, or are using a shared file.\n"; + helpString += "The get.groups command parameters are accnos, fasta, name, group, list, taxonomy, shared, design and groups. The group or count parameter is required, unless you have a current group or count file, or are using a shared file.\n"; helpString += "You must also provide an accnos containing the list of groups to get or set the groups parameter to the groups you wish to select.\n"; helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like. You can separate group names with dashes.\n"; helpString += "The get.groups command should be in the following format: get.groups(accnos=yourAccnos, fasta=yourFasta, group=yourGroupFile).\n"; @@ -71,6 +72,7 @@ string GetGroupsCommand::getOutputFileNameTag(string type, string inputName=""){ else if (type == "taxonomy") { outputFileName = "pick" + m->getExtension(inputName); } else if (type == "name") { outputFileName = "pick" + m->getExtension(inputName); } else if (type == "group") { outputFileName = "pick" + m->getExtension(inputName); } + else if (type == "count") { outputFileName = "pick" + m->getExtension(inputName); } else if (type == "list") { outputFileName = "pick" + m->getExtension(inputName); } else if (type == "shared") { outputFileName = "pick" + m->getExtension(inputName); } else if (type == "design") { outputFileName = "pick" + m->getExtension(inputName); } @@ -97,6 +99,7 @@ GetGroupsCommand::GetGroupsCommand(){ outputTypes["list"] = tempOutNames; outputTypes["shared"] = tempOutNames; outputTypes["design"] = tempOutNames; + outputTypes["count"] = tempOutNames; } catch(exception& e) { m->errorOut(e, "GetGroupsCommand", "GetGroupsCommand"); @@ -135,6 +138,7 @@ GetGroupsCommand::GetGroupsCommand(string option) { outputTypes["list"] = tempOutNames; outputTypes["shared"] = tempOutNames; outputTypes["design"] = tempOutNames; + outputTypes["count"] = tempOutNames; //if the user changes the output directory command factory will send this info to us in the output parameter @@ -208,6 +212,14 @@ GetGroupsCommand::GetGroupsCommand(string option) { //if the user has not given a path then, add inputdir. else leave path alone. if (path == "") { parameters["design"] = inputDir + it->second; } } + + it = parameters.find("count"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["count"] = inputDir + it->second; } + } } @@ -227,11 +239,6 @@ GetGroupsCommand::GetGroupsCommand(string option) { else if (namefile == "not found") { namefile = ""; } else { m->setNameFile(namefile); } - groupfile = validParameter.validFile(parameters, "group", true); - if (groupfile == "not open") { groupfile = ""; abort = true; } - else if (groupfile == "not found") { groupfile = ""; } - else { m->setGroupFile(groupfile); } - listfile = validParameter.validFile(parameters, "list", true); if (listfile == "not open") { abort = true; } else if (listfile == "not found") { listfile = ""; } @@ -263,8 +270,22 @@ GetGroupsCommand::GetGroupsCommand(string option) { if (designfile == "not open") { designfile = ""; abort = true; } else if (designfile == "not found") { designfile = ""; } else { m->setDesignFile(designfile); } + + countfile = validParameter.validFile(parameters, "count", true); + if (countfile == "not open") { countfile = ""; abort = true; } + else if (countfile == "not found") { countfile = ""; } + else { m->setCountTableFile(countfile); } + + if ((namefile != "") && (countfile != "")) { + m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true; + } + + if ((groupfile != "") && (countfile != "")) { + m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true; + } + - if ((sharedfile == "") && (groupfile == "") && (designfile == "")) { + if ((sharedfile == "") && (groupfile == "") && (designfile == "") && (countfile == "")) { //is there are current file available for any of these? if ((namefile != "") || (fastafile != "") || (listfile != "") || (taxfile != "")) { //give priority to group, then shared @@ -274,7 +295,11 @@ GetGroupsCommand::GetGroupsCommand(string option) { sharedfile = m->getSharedFile(); if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); } else { - m->mothurOut("You have no current groupfile or sharedfile and one is required."); m->mothurOutEndLine(); abort = true; + countfile = m->getCountTableFile(); + if (countfile != "") { m->mothurOut("Using " + countfile + " as input file for the count parameter."); m->mothurOutEndLine(); } + else { + m->mothurOut("You have no current groupfile, countfile or sharedfile and one is required."); m->mothurOutEndLine(); abort = true; + } } } }else { @@ -288,7 +313,12 @@ GetGroupsCommand::GetGroupsCommand(string option) { designfile = m->getDesignFile(); if (designfile != "") { m->mothurOut("Using " + designfile + " as input file for the design parameter."); m->mothurOutEndLine(); } else { - m->mothurOut("You have no current groupfile or sharedfile or designfile and one is required."); m->mothurOutEndLine(); abort = true; + countfile = m->getCountTableFile(); + if (countfile != "") { m->mothurOut("Using " + countfile + " as input file for the count parameter."); m->mothurOutEndLine(); } + else { + m->mothurOut("You have no current groupfile, designfile, countfile or sharedfile and one is required."); m->mothurOutEndLine(); abort = true; + } + } } } @@ -297,13 +327,15 @@ GetGroupsCommand::GetGroupsCommand(string option) { if ((accnosfile == "") && (Groups.size() == 0)) { m->mothurOut("You must provide an accnos file or specify groups using the groups parameter."); m->mothurOutEndLine(); abort = true; } - if ((fastafile == "") && (namefile == "") && (groupfile == "") && (designfile == "") && (sharedfile == "") && (listfile == "") && (taxfile == "")) { m->mothurOut("You must provide at least one of the following: fasta, name, taxonomy, group, shared, design or list."); m->mothurOutEndLine(); abort = true; } - if ((groupfile == "") && ((namefile != "") || (fastafile != "") || (listfile != "") || (taxfile != ""))) { m->mothurOut("If using a fasta, name, taxonomy, group or list, then you must provide a group file."); m->mothurOutEndLine(); abort = true; } - - if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){ - vector files; files.push_back(fastafile); files.push_back(taxfile); - parser.getNameFile(files); - } + if ((fastafile == "") && (namefile == "") && (countfile == "") && (groupfile == "") && (designfile == "") && (sharedfile == "") && (listfile == "") && (taxfile == "")) { m->mothurOut("You must provide at least one of the following: fasta, name, taxonomy, group, shared, design, count or list."); m->mothurOutEndLine(); abort = true; } + if (((groupfile == "") && (countfile == "")) && ((namefile != "") || (fastafile != "") || (listfile != "") || (taxfile != ""))) { m->mothurOut("If using a fasta, name, taxonomy, group or list, then you must provide a group or count file."); m->mothurOutEndLine(); abort = true; } + + if (countfile == "") { + if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){ + vector files; files.push_back(fastafile); files.push_back(taxfile); + parser.getNameFile(files); + } + } } } @@ -331,6 +363,7 @@ int GetGroupsCommand::execute(){ SharedUtil* util = new SharedUtil(); vector gNamesOfGroups = groupMap->getNamesOfGroups(); util->setGroups(Groups, gNamesOfGroups); + m->setGroups(Groups); groupMap->setNamesOfGroups(gNamesOfGroups); delete util; @@ -338,7 +371,23 @@ int GetGroupsCommand::execute(){ fillNames(); delete groupMap; - } + }else if (countfile != ""){ + if ((fastafile != "") || (listfile != "") || (taxfile != "")) { + m->mothurOut("\n[NOTE]: The count file should contain only unique names, so mothur assumes your fasta, list and taxonomy files also contain only uniques.\n\n"); + } + CountTable ct; + ct.readTable(countfile); + if (!ct.hasGroupInfo()) { m->mothurOut("[ERROR]: your count file does not contain group info, aborting.\n"); return 0; } + + vector gNamesOfGroups = ct.getNamesOfGroups(); + SharedUtil util; + util.setGroups(Groups, gNamesOfGroups); + m->setGroups(Groups); + for (int i = 0; i < Groups.size(); i++) { + vector thisGroupsSeqs = ct.getNamesOfSeqs(Groups[i]); + for (int j = 0; j < thisGroupsSeqs.size(); j++) { names.insert(thisGroupsSeqs[j]); } + } + } if (m->control_pressed) { return 0; } @@ -346,6 +395,7 @@ int GetGroupsCommand::execute(){ if (namefile != "") { readName(); } if (fastafile != "") { readFasta(); } if (groupfile != "") { readGroup(); } + if (countfile != "") { readCount(); } if (listfile != "") { readList(); } if (taxfile != "") { readTax(); } if (sharedfile != "") { readShared(); } @@ -396,6 +446,11 @@ int GetGroupsCommand::execute(){ if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setDesignFile(current); } } + + itTypes = outputTypes.find("count"); + if (itTypes != outputTypes.end()) { + if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); } + } } return 0; @@ -742,6 +797,82 @@ int GetGroupsCommand::readGroup(){ } } //********************************************************************************************************************** +int GetGroupsCommand::readCount(){ + try { + string thisOutputDir = outputDir; + if (outputDir == "") { thisOutputDir += m->hasPath(countfile); } + string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(countfile)) + getOutputFileNameTag("count", countfile); + + ofstream out; + m->openOutputFile(outputFileName, out); + + ifstream in; + m->openInputFile(countfile, in); + + bool wroteSomething = false; + int selectedCount = 0; + + string headers = m->getline(in); m->gobble(in); + vector columnHeaders = m->splitWhiteSpace(headers); + + vector groups; + map originalGroupIndexes; + map GroupIndexes; + set indexOfGroupsChosen; + for (int i = 2; i < columnHeaders.size(); i++) { groups.push_back(columnHeaders[i]); originalGroupIndexes[i-2] = columnHeaders[i]; } + //sort groups to keep consistent with how we store the groups in groupmap + sort(groups.begin(), groups.end()); + for (int i = 0; i < groups.size(); i++) { GroupIndexes[groups[i]] = i; } + sort(Groups.begin(), Groups.end()); + out << "Representative_Sequence\ttotal\t"; + for (int i = 0; i < Groups.size(); i++) { out << Groups[i] << '\t'; indexOfGroupsChosen.insert(GroupIndexes[Groups[i]]); } + out << endl; + + string name; int oldTotal; + while (!in.eof()) { + + if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; } + + in >> name; m->gobble(in); in >> oldTotal; m->gobble(in); + if (m->debug) { m->mothurOut("[DEBUG]: " + name + '\t' + toString(oldTotal) + "\n"); } + + if (names.count(name) != 0) { + //if group info, then read it + vector selectedCounts; int thisTotal = 0; int temp; + for (int i = 0; i < groups.size(); i++) { + int thisIndex = GroupIndexes[originalGroupIndexes[i]]; + in >> temp; m->gobble(in); + if (indexOfGroupsChosen.count(thisIndex) != 0) { //we want this group + selectedCounts.push_back(temp); thisTotal += temp; + } + } + + out << name << '\t' << thisTotal << '\t'; + for (int i = 0; i < selectedCounts.size(); i++) { out << selectedCounts[i] << '\t'; } + out << endl; + + wroteSomething = true; + selectedCount+= thisTotal; + }else { m->getline(in); } + + m->gobble(in); + } + in.close(); + out.close(); + + if (wroteSomething == false) { m->mothurOut("Your file does NOT contain sequences from the groups you wish to get."); m->mothurOutEndLine(); } + outputTypes["count"].push_back(outputFileName); outputNames.push_back(outputFileName); + + m->mothurOut("Selected " + toString(selectedCount) + " sequences from your count file."); m->mothurOutEndLine(); + + return 0; + } + catch(exception& e) { + m->errorOut(e, "GetGroupsCommand", "readCount"); + exit(1); + } +} +//********************************************************************************************************************** int GetGroupsCommand::readDesign(){ try { string thisOutputDir = outputDir; diff --git a/getgroupscommand.h b/getgroupscommand.h index 80230b4..6bb6088 100644 --- a/getgroupscommand.h +++ b/getgroupscommand.h @@ -40,7 +40,7 @@ private: map uniqueToRedundant; //if a namefile is given and the first column name is not selected //then the other files need to change the unique name in their file to match. //only add the names that need to be changed to keep the map search quick - string accnosfile, fastafile, namefile, groupfile, listfile, designfile, taxfile, outputDir, groups, sharedfile; + string accnosfile, countfile, fastafile, namefile, groupfile, listfile, designfile, taxfile, outputDir, groups, sharedfile; bool abort; vector outputNames, Groups; GroupMap* groupMap; @@ -48,6 +48,7 @@ private: int readFasta(); int readName(); int readGroup(); + int readCount(); int readList(); int readTax(); int fillNames(); diff --git a/knn.cpp b/knn.cpp index 837fa6d..81b21b2 100644 --- a/knn.cpp +++ b/knn.cpp @@ -14,6 +14,7 @@ Knn::Knn(string tfile, string tempFile, string method, int kmerSize, float gapOp : Classify(), num(n), search(method) { try { threadID = tid; + shortcuts = true; //create search database and names vector generateDatabaseAndNames(tfile, tempFile, method, kmerSize, gapOpen, gapExtend, match, misMatch); diff --git a/mgclustercommand.cpp b/mgclustercommand.cpp index 4774504..1861aa5 100644 --- a/mgclustercommand.cpp +++ b/mgclustercommand.cpp @@ -13,8 +13,8 @@ vector MGClusterCommand::setParameters(){ try { CommandParameter pblast("blast", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pblast); - CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname); - CommandParameter pcount("count", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pcount); + CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "ColumnName",false,false); parameters.push_back(pname); + CommandParameter pcount("count", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pcount); CommandParameter plength("length", "Number", "", "5", "", "", "",false,false); parameters.push_back(plength); CommandParameter ppenalty("penalty", "Number", "", "0.10", "", "", "",false,false); parameters.push_back(ppenalty); CommandParameter pcutoff("cutoff", "Number", "", "0.70", "", "", "",false,false); parameters.push_back(pcutoff); @@ -147,6 +147,14 @@ MGClusterCommand::MGClusterCommand(string option) { //if the user has not given a path then, add inputdir. else leave path alone. if (path == "") { parameters["name"] = inputDir + it->second; } } + + it = parameters.find("count"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["count"] = inputDir + it->second; } + } } diff --git a/mothurout.cpp b/mothurout.cpp index 2debf84..d9df5a0 100644 --- a/mothurout.cpp +++ b/mothurout.cpp @@ -2114,6 +2114,32 @@ int MothurOut::getNumChar(string line, char c){ exit(1); } } +//********************************************************************************************************************** +bool MothurOut::isSubset(vector bigset, vector subset) { + try { + + + if (subset.size() > bigset.size()) { return false; } + + //check if each guy in suset is also in bigset + for (int i = 0; i < subset.size(); i++) { + bool match = false; + for (int j = 0; j < bigset.size(); j++) { + if (subset[i] == bigset[j]) { match = true; break; } + } + + //you have a guy in subset that had no match in bigset + if (match == false) { return false; } + } + + return true; + + } + catch(exception& e) { + errorOut(e, "MothurOut", "isSubset"); + exit(1); + } +} /***********************************************************************/ int MothurOut::mothurRemove(string filename){ try { diff --git a/mothurout.h b/mothurout.h index 77c5a80..3338403 100644 --- a/mothurout.h +++ b/mothurout.h @@ -141,6 +141,7 @@ class MothurOut { void splitAtChar(string&, string&, char); int removeConfidences(string&); string makeList(vector&); + bool isSubset(vector, vector); //bigSet, subset //math operation int factorial(int num); diff --git a/phylotree.cpp b/phylotree.cpp index 3dde186..73cb461 100644 --- a/phylotree.cpp +++ b/phylotree.cpp @@ -598,7 +598,7 @@ int PhyloTree::getIndex(string seqName){ try { map::iterator itFind = name2Taxonomy.find(seqName); - if (itFind != name2Taxonomy.end()) { return name2Taxonomy[seqName]; } + if (itFind != name2Taxonomy.end()) { return itFind->second; } else { m->mothurOut("Cannot find " + seqName + ". Mismatch with taxonomy and template files. Cannot continue."); m->mothurOutEndLine(); exit(1);} } catch(exception& e) { diff --git a/removegroupscommand.cpp b/removegroupscommand.cpp index 05b1170..86ddf94 100644 --- a/removegroupscommand.cpp +++ b/removegroupscommand.cpp @@ -18,9 +18,9 @@ vector RemoveGroupsCommand::setParameters(){ try { CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "FNGLT",false,false); parameters.push_back(pfasta); CommandParameter pshared("shared", "InputTypes", "", "", "none", "sharedGroup", "none",false,false); parameters.push_back(pshared); - CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname); - CommandParameter pgroup("group", "InputTypes", "", "", "none", "sharedGroup", "FNGLT",false,false); parameters.push_back(pgroup); - CommandParameter pdesign("design", "InputTypes", "", "", "none", "sharedGroup", "FNGLT",false,false); parameters.push_back(pdesign); + CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pname); + CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none",false,false); parameters.push_back(pcount); + CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "sharedGroup", "FNGLT",false,false); parameters.push_back(pgroup); CommandParameter pdesign("design", "InputTypes", "", "", "none", "sharedGroup", "FNGLT",false,false); parameters.push_back(pdesign); CommandParameter plist("list", "InputTypes", "", "", "none", "none", "FNGLT",false,false); parameters.push_back(plist); CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "none", "FNGLT",false,false); parameters.push_back(ptaxonomy); CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(paccnos); @@ -41,9 +41,9 @@ vector RemoveGroupsCommand::setParameters(){ string RemoveGroupsCommand::getHelpString(){ try { string helpString = ""; - helpString += "The remove.groups command removes sequences from a specfic group or set of groups from the following file types: fasta, name, group, list, taxonomy, design or sharedfile.\n"; + helpString += "The remove.groups command removes sequences from a specfic group or set of groups from the following file types: fasta, name, group, count, list, taxonomy, design or sharedfile.\n"; helpString += "It outputs a file containing the sequences NOT in the those specified groups, or with a sharedfile eliminates the groups you selected.\n"; - helpString += "The remove.groups command parameters are accnos, fasta, name, group, list, taxonomy, shared, design and groups. The group parameter is required, unless you have a current group file or are using a sharedfile.\n"; + helpString += "The remove.groups command parameters are accnos, fasta, name, group, list, taxonomy, shared, design and groups. The group or count parameter is required, unless you have a current group or count file or are using a sharedfile.\n"; helpString += "You must also provide an accnos containing the list of groups to remove or set the groups parameter to the groups you wish to remove.\n"; helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like removed. You can separate group names with dashes.\n"; helpString += "The remove.groups command should be in the following format: remove.groups(accnos=yourAccnos, fasta=yourFasta, group=yourGroupFile).\n"; @@ -71,6 +71,7 @@ string RemoveGroupsCommand::getOutputFileNameTag(string type, string inputName=" else if (type == "taxonomy") { outputFileName = "pick" + m->getExtension(inputName); } else if (type == "name") { outputFileName = "pick" + m->getExtension(inputName); } else if (type == "group") { outputFileName = "pick" + m->getExtension(inputName); } + else if (type == "count") { outputFileName = "pick" + m->getExtension(inputName); } else if (type == "list") { outputFileName = "pick" + m->getExtension(inputName); } else if (type == "shared") { outputFileName = "pick" + m->getExtension(inputName); } else if (type == "design") { outputFileName = "pick" + m->getExtension(inputName); } @@ -96,6 +97,7 @@ RemoveGroupsCommand::RemoveGroupsCommand(){ outputTypes["list"] = tempOutNames; outputTypes["shared"] = tempOutNames; outputTypes["design"] = tempOutNames; + outputTypes["count"] = tempOutNames; } catch(exception& e) { m->errorOut(e, "RemoveGroupsCommand", "RemoveGroupsCommand"); @@ -134,6 +136,7 @@ RemoveGroupsCommand::RemoveGroupsCommand(string option) { outputTypes["list"] = tempOutNames; outputTypes["shared"] = tempOutNames; outputTypes["design"] = tempOutNames; + outputTypes["count"] = tempOutNames; //if the user changes the output directory command factory will send this info to us in the output parameter @@ -207,6 +210,14 @@ RemoveGroupsCommand::RemoveGroupsCommand(string option) { //if the user has not given a path then, add inputdir. else leave path alone. if (path == "") { parameters["design"] = inputDir + it->second; } } + + it = parameters.find("count"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["count"] = inputDir + it->second; } + } } @@ -258,12 +269,22 @@ RemoveGroupsCommand::RemoveGroupsCommand(string option) { else if (sharedfile == "not found") { sharedfile = ""; } else { m->setSharedFile(sharedfile); } - groupfile = validParameter.validFile(parameters, "group", true); - if (groupfile == "not open") { groupfile = ""; abort = true; } - else if (groupfile == "not found") { groupfile = ""; } - else { m->setGroupFile(groupfile); } - if ((sharedfile == "") && (groupfile == "") && (designfile == "")) { + countfile = validParameter.validFile(parameters, "count", true); + if (countfile == "not open") { countfile = ""; abort = true; } + else if (countfile == "not found") { countfile = ""; } + else { m->setCountTableFile(countfile); } + + if ((namefile != "") && (countfile != "")) { + m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true; + } + + if ((groupfile != "") && (countfile != "")) { + m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true; + } + + + if ((sharedfile == "") && (groupfile == "") && (designfile == "") && (countfile == "")) { //is there are current file available for any of these? if ((namefile != "") || (fastafile != "") || (listfile != "") || (taxfile != "")) { //give priority to group, then shared @@ -273,7 +294,11 @@ RemoveGroupsCommand::RemoveGroupsCommand(string option) { sharedfile = m->getSharedFile(); if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); } else { - m->mothurOut("You have no current groupfile or sharedfile and one is required."); m->mothurOutEndLine(); abort = true; + countfile = m->getCountTableFile(); + if (countfile != "") { m->mothurOut("Using " + countfile + " as input file for the count parameter."); m->mothurOutEndLine(); } + else { + m->mothurOut("You have no current groupfile, countfile or sharedfile and one is required."); m->mothurOutEndLine(); abort = true; + } } } }else { @@ -287,7 +312,12 @@ RemoveGroupsCommand::RemoveGroupsCommand(string option) { designfile = m->getDesignFile(); if (designfile != "") { m->mothurOut("Using " + designfile + " as input file for the design parameter."); m->mothurOutEndLine(); } else { - m->mothurOut("You have no current groupfile or sharedfile or designfile and one is required."); m->mothurOutEndLine(); abort = true; + countfile = m->getCountTableFile(); + if (countfile != "") { m->mothurOut("Using " + countfile + " as input file for the count parameter."); m->mothurOutEndLine(); } + else { + m->mothurOut("You have no current groupfile, designfile, countfile or sharedfile and one is required."); m->mothurOutEndLine(); abort = true; + } + } } } @@ -296,14 +326,15 @@ RemoveGroupsCommand::RemoveGroupsCommand(string option) { if ((accnosfile == "") && (Groups.size() == 0)) { m->mothurOut("You must provide an accnos file containing group names or specify groups using the groups parameter."); m->mothurOutEndLine(); abort = true; } - if ((fastafile == "") && (namefile == "") && (groupfile == "") && (sharedfile == "") && (designfile == "") && (listfile == "") && (taxfile == "")) { m->mothurOut("You must provide at least one of the following: fasta, name, taxonomy, group, shared, design or list."); m->mothurOutEndLine(); abort = true; } - if ((groupfile == "") && ((namefile != "") || (fastafile != "") || (listfile != "") || (taxfile != ""))) { m->mothurOut("If using a fasta, name, taxonomy, group or list, then you must provide a group file."); m->mothurOutEndLine(); abort = true; } - - if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){ - vector files; files.push_back(fastafile); files.push_back(taxfile); - parser.getNameFile(files); - } - + if ((fastafile == "") && (namefile == "") && (countfile == "") && (groupfile == "") && (designfile == "") && (sharedfile == "") && (listfile == "") && (taxfile == "")) { m->mothurOut("You must provide at least one of the following: fasta, name, taxonomy, group, shared, design, count or list."); m->mothurOutEndLine(); abort = true; } + if (((groupfile == "") && (countfile == "")) && ((namefile != "") || (fastafile != "") || (listfile != "") || (taxfile != ""))) { m->mothurOut("If using a fasta, name, taxonomy, group or list, then you must provide a group or count file."); m->mothurOutEndLine(); abort = true; } + + if (countfile == "") { + if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){ + vector files; files.push_back(fastafile); files.push_back(taxfile); + parser.getNameFile(files); + } + } } } @@ -337,7 +368,28 @@ int RemoveGroupsCommand::execute(){ fillNames(); delete groupMap; - } + }else if (countfile != ""){ + if ((fastafile != "") || (listfile != "") || (taxfile != "")) { + m->mothurOut("\n[NOTE]: The count file should contain only unique names, so mothur assumes your fasta, list and taxonomy files also contain only uniques.\n\n"); + } + CountTable ct; + ct.readTable(countfile); + if (!ct.hasGroupInfo()) { m->mothurOut("[ERROR]: your count file does not contain group info, aborting.\n"); return 0; } + + vector gNamesOfGroups = ct.getNamesOfGroups(); + SharedUtil util; + util.setGroups(Groups, gNamesOfGroups); + vector namesOfSeqs = ct.getNamesOfSeqs(); + sort(Groups.begin(), Groups.end()); + + for (int i = 0; i < namesOfSeqs.size(); i++) { + vector thisSeqsGroups = ct.getGroups(namesOfSeqs[i]); + if (m->isSubset(Groups, thisSeqsGroups)) { //you only have seqs from these groups so remove you + names.insert(namesOfSeqs[i]); + } + } + } + if (m->control_pressed) { return 0; } @@ -345,6 +397,7 @@ int RemoveGroupsCommand::execute(){ if (namefile != "") { readName(); } if (fastafile != "") { readFasta(); } if (groupfile != "") { readGroup(); } + if (countfile != "") { readCount(); } if (listfile != "") { readList(); } if (taxfile != "") { readTax(); } if (sharedfile != "") { readShared(); } @@ -394,6 +447,11 @@ int RemoveGroupsCommand::execute(){ if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setDesignFile(current); } } + + itTypes = outputTypes.find("count"); + if (itTypes != outputTypes.end()) { + if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); } + } } return 0; @@ -762,6 +820,87 @@ int RemoveGroupsCommand::readGroup(){ } } //********************************************************************************************************************** +int RemoveGroupsCommand::readCount(){ + try { + string thisOutputDir = outputDir; + if (outputDir == "") { thisOutputDir += m->hasPath(countfile); } + string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(countfile)) + getOutputFileNameTag("count", countfile); + + ofstream out; + m->openOutputFile(outputFileName, out); + + ifstream in; + m->openInputFile(countfile, in); + + bool wroteSomething = false; + int removedCount = 0; + + string headers = m->getline(in); m->gobble(in); + vector columnHeaders = m->splitWhiteSpace(headers); + + vector groups; + map originalGroupIndexes; + map GroupIndexes; + set indexOfGroupsChosen; + for (int i = 2; i < columnHeaders.size(); i++) { groups.push_back(columnHeaders[i]); originalGroupIndexes[i-2] = columnHeaders[i]; } + //sort groups to keep consistent with how we store the groups in groupmap + sort(groups.begin(), groups.end()); + for (int i = 0; i < groups.size(); i++) { GroupIndexes[groups[i]] = i; } + + vector groupsToKeep; + for (int i = 0; i < groups.size(); i++) { + if (!m->inUsersGroups(groups[i], Groups)) { groupsToKeep.push_back(groups[i]); } + } + sort(groupsToKeep.begin(), groupsToKeep.end()); + out << "Representative_Sequence\ttotal\t"; + for (int i = 0; i < groupsToKeep.size(); i++) { out << groupsToKeep[i] << '\t'; indexOfGroupsChosen.insert(GroupIndexes[groupsToKeep[i]]); } + out << endl; + + string name; int oldTotal; + while (!in.eof()) { + + if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; } + + in >> name; m->gobble(in); in >> oldTotal; m->gobble(in); + if (m->debug) { m->mothurOut("[DEBUG]: " + name + '\t' + toString(oldTotal) + "\n"); } + + if (names.count(name) == 0) { + //if group info, then read it + vector selectedCounts; int thisTotal = 0; int temp; + for (int i = 0; i < groups.size(); i++) { + int thisIndex = GroupIndexes[originalGroupIndexes[i]]; + in >> temp; m->gobble(in); + if (indexOfGroupsChosen.count(thisIndex) != 0) { //we want this group + selectedCounts.push_back(temp); thisTotal += temp; + } + } + + out << name << '\t' << thisTotal << '\t'; + for (int i = 0; i < selectedCounts.size(); i++) { out << selectedCounts[i] << '\t'; } + out << endl; + + wroteSomething = true; + removedCount+= (oldTotal - thisTotal); + }else { m->getline(in); removedCount += oldTotal; } + + m->gobble(in); + } + in.close(); + out.close(); + + if (wroteSomething == false) { m->mothurOut("Your file does NOT contain sequences from the groups you wish to get."); m->mothurOutEndLine(); } + outputTypes["count"].push_back(outputFileName); outputNames.push_back(outputFileName); + + m->mothurOut("Removed " + toString(removedCount) + " sequences from your count file."); m->mothurOutEndLine(); + + return 0; + } + catch(exception& e) { + m->errorOut(e, "RemoveGroupsCommand", "readCount"); + exit(1); + } +} +//********************************************************************************************************************** int RemoveGroupsCommand::readDesign(){ try { string thisOutputDir = outputDir; diff --git a/removegroupscommand.h b/removegroupscommand.h index c6db380..c36998a 100644 --- a/removegroupscommand.h +++ b/removegroupscommand.h @@ -36,7 +36,7 @@ public: private: set names; - string accnosfile, fastafile, namefile, groupfile, designfile, listfile, taxfile, outputDir, groups, sharedfile; + string accnosfile, fastafile, namefile, groupfile, countfile, designfile, listfile, taxfile, outputDir, groups, sharedfile; bool abort; vector outputNames, Groups; GroupMap* groupMap; @@ -49,6 +49,7 @@ private: int readShared(); int readName(); int readGroup(); + int readCount(); int readList(); int readTax(); int fillNames(); diff --git a/sharedcommand.cpp b/sharedcommand.cpp index e6e790d..3980106 100644 --- a/sharedcommand.cpp +++ b/sharedcommand.cpp @@ -1083,8 +1083,12 @@ int SharedCommand::ListGroupSameSeqs(vector& groupMapsSeqs, SharedListVe for (int j = 0; j < listNames.size(); j++) { int num = groupNamesSeqs.count(listNames[j]); - if (num == 0) { error = 1; m->mothurOut("[ERROR]: " + listNames[j] + " is in your listfile and not in your groupfile. Please correct."); m->mothurOutEndLine(); } - else { groupNamesSeqs.erase(listNames[j]); } + if (num == 0) { + error = 1; + if (groupfile != "") { + m->mothurOut("[ERROR]: " + listNames[j] + " is in your listfile and not in your groupfile. Please correct."); m->mothurOutEndLine(); } + else{ m->mothurOut("[ERROR]: " + listNames[j] + " is in your listfile and not in your count file. Please correct."); m->mothurOutEndLine(); } + }else { groupNamesSeqs.erase(listNames[j]); } } } diff --git a/sharedutilities.cpp b/sharedutilities.cpp index 151b254..71d7782 100644 --- a/sharedutilities.cpp +++ b/sharedutilities.cpp @@ -120,7 +120,7 @@ void SharedUtil::setGroups(vector& userGroups, vector& allGroups //if the user only entered invalid groups if (userGroups.size() == 0) { - m->mothurOut("You provided no valid groups. I will run the command using all the groups in your groupfile."); m->mothurOutEndLine(); + m->mothurOut("You provided no valid groups. I will run the command using all the groups in your file."); m->mothurOutEndLine(); for (int i = 0; i < allGroups.size(); i++) { userGroups.push_back(allGroups[i]); } -- 2.39.2