From 81a77fdce5e9b060c71b162305ddd9fa4308f78a Mon Sep 17 00:00:00 2001 From: westcott Date: Thu, 21 May 2009 16:45:22 +0000 Subject: [PATCH] modified bin.seqs and get.oturep commands to include use of a groupfile if provided and removed get.repseqs command. --- Mothur.xcodeproj/project.pbxproj | 6 - binsequencecommand.cpp | 50 +++++- binsequencecommand.h | 4 +- commandfactory.cpp | 2 - errorchecking.cpp | 6 - getoturepcommand.cpp | 50 +++++- getoturepcommand.h | 8 +- getrepseqscommand.cpp | 263 ------------------------------- getrepseqscommand.h | 50 ------ helpcommand.cpp | 25 +-- validcommands.cpp | 1 - validparameter.cpp | 7 +- 12 files changed, 107 insertions(+), 365 deletions(-) delete mode 100644 getrepseqscommand.cpp delete mode 100644 getrepseqscommand.h diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj index c49b637..1b52eee 100644 --- a/Mothur.xcodeproj/project.pbxproj +++ b/Mothur.xcodeproj/project.pbxproj @@ -7,7 +7,6 @@ objects = { /* Begin PBXBuildFile section */ - 211A24EB0FC306BC00769A33 /* getrepseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 211A24EA0FC306BC00769A33 /* getrepseqscommand.cpp */; }; 211C38320F961DD400FEE541 /* sharedutilities.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 211C38300F961DD400FEE541 /* sharedutilities.cpp */; }; 211C38380F961E1F00FEE541 /* treegroupscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 211C38360F961E1F00FEE541 /* treegroupscommand.cpp */; }; 21DDC01B0F97A8FE0060691C /* bootstrapsharedcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 21DDC01A0F97A8FE0060691C /* bootstrapsharedcommand.cpp */; }; @@ -175,8 +174,6 @@ /* End PBXCopyFilesBuildPhase section */ /* Begin PBXFileReference section */ - 211A24E90FC306BC00769A33 /* getrepseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = getrepseqscommand.h; sourceTree = ""; }; - 211A24EA0FC306BC00769A33 /* getrepseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = getrepseqscommand.cpp; sourceTree = ""; }; 211C38300F961DD400FEE541 /* sharedutilities.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sharedutilities.cpp; sourceTree = ""; }; 211C38310F961DD400FEE541 /* sharedutilities.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sharedutilities.h; sourceTree = ""; }; 211C38360F961E1F00FEE541 /* treegroupscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = treegroupscommand.cpp; sourceTree = ""; }; @@ -776,8 +773,6 @@ A70B53A80F4CD7AD0064797E /* getlinecommand.cpp */, 370B88050F8A4EE4005AB382 /* getoturepcommand.h */, 370B88060F8A4EE4005AB382 /* getoturepcommand.cpp */, - 211A24E90FC306BC00769A33 /* getrepseqscommand.h */, - 211A24EA0FC306BC00769A33 /* getrepseqscommand.cpp */, 375873F10F7D64800040F377 /* heatmapcommand.h */, 375873F00F7D64800040F377 /* heatmapcommand.cpp */, 37D927E40F21331F001D4494 /* helpcommand.h */, @@ -1097,7 +1092,6 @@ 373C699B0FC1E63600137ACD /* efron.cpp in Sources */, 373C699C0FC1E63600137ACD /* solow.cpp in Sources */, EB72FE260FC1F5CA0051AC11 /* shen.cpp in Sources */, - 211A24EB0FC306BC00769A33 /* getrepseqscommand.cpp in Sources */, 21E859D80FC4632E005E1A48 /* matrixoutputcommand.cpp in Sources */, ); runOnlyForDeploymentPostprocessing = 0; diff --git a/binsequencecommand.cpp b/binsequencecommand.cpp index 7810ea9..f1ba8b5 100644 --- a/binsequencecommand.cpp +++ b/binsequencecommand.cpp @@ -15,8 +15,15 @@ BinSeqCommand::BinSeqCommand(){ globaldata = GlobalData::getInstance(); fastafile = globaldata->getFastaFile(); namesfile = globaldata->getNameFile(); + groupfile = globaldata->getGroupFile(); openInputFile(fastafile, in); + if (groupfile != "") { + //read in group map info. + groupMap = new GroupMap(groupfile); + groupMap->readMap(); + } + fasta = new FastaMap(); } catch(exception& e) { @@ -36,6 +43,9 @@ BinSeqCommand::~BinSeqCommand(){ delete read; delete fasta; delete list; + if (groupfile != "") { + delete groupMap; + } } //********************************************************************************************************************** @@ -83,9 +93,23 @@ int BinSeqCommand::execute(){ //do work for that name sequence = fasta->getSequence(name); if (sequence != "not found") { - name = name + "|" + toString(i+1); - out << ">" << name << endl; - out << sequence << endl; + //if you don't have groups + if (groupfile == "") { + name = name + "|" + toString(i+1); + out << ">" << name << endl; + out << sequence << endl; + }else {//if you do have groups + string group = groupMap->getGroup(name); + if (group == "not found") { + cout << name << " is missing from your group file. Please correct. " << endl; + remove(outputFileName.c_str()); + return 0; + }else{ + name = name + "|" + group + "|" + toString(i+1); + out << ">" << name << endl; + out << sequence << endl; + } + } }else { cout << name << " is missing from your fasta or name file. Please correct. " << endl; remove(outputFileName.c_str()); @@ -97,9 +121,23 @@ int BinSeqCommand::execute(){ //get last name sequence = fasta->getSequence(binnames); if (sequence != "not found") { - name = binnames + '|' + toString(i+1); - out << ">" << name << endl; - out << sequence << endl; + //if you don't have groups + if (groupfile == "") { + binnames = binnames + "|" + toString(i+1); + out << ">" << binnames << endl; + out << sequence << endl; + }else {//if you do have groups + string group = groupMap->getGroup(binnames); + if (group == "not found") { + cout << binnames << " is missing from your group file. Please correct. " << endl; + remove(outputFileName.c_str()); + return 0; + }else{ + binnames = binnames + "|" + group + "|" + toString(i+1); + out << ">" << binnames << endl; + out << sequence << endl; + } + } }else { cout << binnames << " is missing from your fasta or name file. Please correct. " << endl; remove(outputFileName.c_str()); diff --git a/binsequencecommand.h b/binsequencecommand.h index 617f5ec..e2ff648 100644 --- a/binsequencecommand.h +++ b/binsequencecommand.h @@ -16,6 +16,7 @@ #include "listvector.hpp" #include "readotu.h" #include "fastamap.h" +#include "groupmap.h" class GlobalData; @@ -33,7 +34,8 @@ private: ReadOTUFile* read; InputData* input; FastaMap* fasta; - string filename, fastafile, namesfile; + GroupMap* groupMap; + string filename, fastafile, namesfile, groupfile; ofstream out; ifstream in, inNames; diff --git a/commandfactory.cpp b/commandfactory.cpp index 21e7b54..06e7e84 100644 --- a/commandfactory.cpp +++ b/commandfactory.cpp @@ -42,7 +42,6 @@ #include "concensuscommand.h" #include "distancecommand.h" #include "aligncommand.h" -#include "getrepseqscommand.h" #include "matrixoutputcommand.h" @@ -91,7 +90,6 @@ Command* CommandFactory::getCommand(string commandName){ else if(commandName == "filter.seqs") { command = new FilterSeqsCommand(); } else if(commandName == "venn") { command = new VennCommand(); } else if(commandName == "bin.seqs") { command = new BinSeqCommand(); } - else if(commandName == "get.repseqs") { command = new GetRepSeqsCommand(); } else if(commandName == "get.oturep") { command = new GetOTURepCommand(); } else if(commandName == "tree.shared") { command = new TreeGroupCommand(); } else if(commandName == "dist.shared") { command = new MatrixOutputCommand(); } diff --git a/errorchecking.cpp b/errorchecking.cpp index f82e806..f8b34bb 100644 --- a/errorchecking.cpp +++ b/errorchecking.cpp @@ -277,12 +277,6 @@ bool ErrorCheck::checkInput(string input) { validateBinFiles(); } - if ((commandName == "get.repseqs")) { - if ((globaldata->getListFile() == "")) { cout << "You must read a list file before you can use the get.repseqs command." << endl; return false; } - else if (groupfile == "") { cout << "You must provide a groupfile before you can use the get.repseqs command." << endl; return false; } - validateBinFiles(); - } - if ((commandName == "get.oturep")) { if ((globaldata->gSparseMatrix == NULL) || (globaldata->gListVector == NULL)) { diff --git a/getoturepcommand.cpp b/getoturepcommand.cpp index 45f34fe..e5f87c2 100644 --- a/getoturepcommand.cpp +++ b/getoturepcommand.cpp @@ -37,6 +37,14 @@ GetOTURepCommand::GetOTURepCommand(){ fastafile = globaldata->getFastaFile(); namesfile = globaldata->getNameFile(); + groupfile = globaldata->getGroupFile(); + + if (groupfile != "") { + //read in group map info. + groupMap = new GroupMap(groupfile); + groupMap->readMap(); + } + openInputFile(fastafile, in); fasta = new FastaMap(); @@ -59,6 +67,9 @@ GetOTURepCommand::~GetOTURepCommand(){ delete input; delete read; delete fasta; + if (groupfile != "") { + delete groupMap; + } } //********************************************************************************************************************** @@ -98,15 +109,22 @@ int GetOTURepCommand::execute(){ //for each bin in the list vector for (int i = 0; i < list->size(); i++) { - nameRep = FindRep(i); + string groups; + nameRep = FindRep(i, groups); //print out name and sequence for that bin sequence = fasta->getSequence(nameRep); if (sequence != "not found") { - nameRep = nameRep + "|" + toString(i+1); - out << ">" << nameRep << endl; - out << sequence << endl; + if (groupfile == "") { + nameRep = nameRep + "|" + toString(i+1); + out << ">" << nameRep << endl; + out << sequence << endl; + }else { + nameRep = nameRep + "|" + groups + "|" + toString(i+1); + out << ">" << nameRep << endl; + out << sequence << endl; + } }else { cout << nameRep << " is missing from your fasta or name file. Please correct. " << endl; remove(outputFileName.c_str()); @@ -173,7 +191,7 @@ void GetOTURepCommand::readNamesFile() { } } //********************************************************************************************************************** -string GetOTURepCommand::FindRep(int bin) { +string GetOTURepCommand::FindRep(int bin, string& group) { try{ vector names; map sums; @@ -182,12 +200,34 @@ string GetOTURepCommand::FindRep(int bin) { string binnames; float min = 10000; string minName; + map groups; + map::iterator groupIt; binnames = list->get(bin); //parse names into vector splitAtComma(binnames, names); + //if you have a groupfile + if(groupfile != "") { + //find the groups that are in this bin + for (int i = 0; i < names.size(); i++) { + string groupName = groupMap->getGroup(names[i]); + if (groupName == "not found") { + cout << names[i] << " is missing from your group file. Please correct. " << endl; + groupError = true; + }else{ + groups[groupName] = groupName; + } + } + + //turn the groups into a string + for(groupIt = groups.begin(); groupIt != groups.end(); groupIt++) { group += groupIt->first + "-"; } + + //rip off last dash + group = group.substr(0, group.length()-1); + } + //if only 1 sequence in bin then that's the rep if (names.size() == 1) { return names[0]; } else { diff --git a/getoturepcommand.h b/getoturepcommand.h index 706b219..deafc78 100644 --- a/getoturepcommand.h +++ b/getoturepcommand.h @@ -18,7 +18,7 @@ #include "inputdata.h" #include "readotu.h" #include "fastamap.h" - +#include "groupmap.h" class GlobalData; @@ -39,9 +39,11 @@ private: ReadOTUFile* read; InputData* input; FastaMap* fasta; - string filename, fastafile, namesfile; + GroupMap* groupMap; + string filename, fastafile, namesfile, groupfile; ofstream out; ifstream in, inNames; + bool groupError; map nameToIndex; //maps sequence name to index in sparsematrix @@ -50,7 +52,7 @@ private: map::iterator it3; void readNamesFile(); - string FindRep(int); // returns name of "representative" sequence of given bin. + string FindRep(int, string&); // returns name of "representative" sequence of given bin. //and fill a string containing the groups in that bin if a groupfile is given }; diff --git a/getrepseqscommand.cpp b/getrepseqscommand.cpp deleted file mode 100644 index ee9bf5a..0000000 --- a/getrepseqscommand.cpp +++ /dev/null @@ -1,263 +0,0 @@ -/* - * getrepseqscommand.cpp - * Mothur - * - * Created by Sarah Westcott on 5/19/09. - * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. - * - */ - -#include "getrepseqscommand.h" - -//********************************************************************************************************************** -GetRepSeqsCommand::GetRepSeqsCommand(){ - try { - globaldata = GlobalData::getInstance(); - fastafile = globaldata->getFastaFile(); - namesfile = globaldata->getNameFile(); - openInputFile(fastafile, in); - - fasta = new FastaMap(); - - //read in group map info. - groupMap = new GroupMap(globaldata->getGroupFile()); - groupMap->readMap(); - - //fill filehandles with neccessary ofstreams - int i; - ofstream* temp; - //one for each group - for (i=0; igetNumGroups(); i++) { - temp = new ofstream; - filehandles[groupMap->namesOfGroups[i]] = temp; - } - - //one for shared - temp = new ofstream; - string s = "shared"; - filehandles[s] = temp; - - } - catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the GetRepSeqsCommand class Function GetRepSeqsCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } - catch(...) { - cout << "An unknown error has occurred in the GetRepSeqsCommand class function GetRepSeqsCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } -} - -//********************************************************************************************************************** - -GetRepSeqsCommand::~GetRepSeqsCommand(){ - delete input; - delete read; - delete fasta; - delete list; -} - -//********************************************************************************************************************** - -int GetRepSeqsCommand::execute(){ - try { - int count = 1; - string binnames, name, sequence; - - //read fastafile - fasta->readFastaFile(in); - - //set format to list so input can get listvector - globaldata->setFormat("list"); - - //if user gave a namesfile then use it - if (namesfile != "") { - readNamesFile(); - } - - //read list file - read = new ReadOTUFile(globaldata->getListFile()); - read->read(&*globaldata); - - input = globaldata->ginput; - list = globaldata->gListVector; - - while(list != NULL){ - - if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(list->getLabel()) == 1){ - - cout << list->getLabel() << '\t' << count << endl; - - //open output list files - for (int i=0; igetNumGroups(); i++) {//opens an output file for each group - openOutputFile(fastafile + groupMap->namesOfGroups[i] + list->getLabel() + ".fasta", *(filehandles[groupMap->namesOfGroups[i]])); - used[groupMap->namesOfGroups[i]] = false; - } - string s = "shared"; - openOutputFile(fastafile + s + list->getLabel() + ".fasta", *(filehandles[s])); - used[s] = false; - - - //for each bin in the list vector - for (int i = 0; i < list->size(); i++) { - seq.clear(); - //uses this to determine if the bin is unique to one group or if it is shared - map groups; - - //determine if this otu is unique to one group or not - binnames = list->get(i); - while (binnames.find_first_of(',') != -1) { - //parse out each name in bin - name = binnames.substr(0,binnames.find_first_of(',')); - binnames = binnames.substr(binnames.find_first_of(',')+1, binnames.length()); - - //do work for that name - sequence = fasta->getSequence(name); - if (sequence != "not found") { - string group = groupMap->getGroup(name); - if (group != "not found") { groups[group] = group; } //add group to list of groups in this bin - else { - cout << "error sequence " << name << " is not assigned a group in your groupfile. Please correct." << endl; - removeFiles(list->getLabel()); - return 0; - } - name = ">" + name + "|" + toString(i+1); - seq[name] = sequence; - }else { - cout << name << " is missing from your fasta or name file. Please correct. " << endl; - removeFiles(list->getLabel()); - return 0; - } - - } - - //get last name - sequence = fasta->getSequence(binnames); - if (sequence != "not found") { - string group = groupMap->getGroup(binnames); - if (group != "not found") { groups[group] = group; } //add group to list of groups in this bin - else { - cout << "error sequence " << binnames << " is not assigned a group in your groupfile. Please correct." << endl; - removeFiles(list->getLabel()); - return 0; - } - binnames = ">" + binnames + "|" + toString(i+1); //attach bin number to name - seq[binnames] = sequence; - }else { - cout << binnames << " is missing from your fasta or name file. Please correct. " << endl; - removeFiles(list->getLabel()); - return 0; - } - - //output each bin to files - //what file does this bin need to be outputted to - if (groups.size() == 1) { //this bin is unique to one group - it3 = groups.begin(); - string uniqueGroup = it3->first; - used[uniqueGroup] = true; - //print out sequences from that bin to shared file - for (it3 = seq.begin(); it3 != seq.end(); it3++){ - *(filehandles[uniqueGroup]) << it3->first << endl; - *(filehandles[uniqueGroup]) << it3->second << endl; - } - }else {//this bin has sequences from multiple groups in it - used[s] = true; - //print out sequences from that bin to shared file - for (it3 = seq.begin(); it3 != seq.end(); it3++){ - *(filehandles[s]) << it3->first << endl; - *(filehandles[s]) << it3->second << endl; - } - } - } - - //close ostreams and remove unused files - for (it = filehandles.begin(); it != filehandles.end(); it++) { - it->second->close(); - if (used[it->first] == false) { string filename = fastafile + it->first + list->getLabel() + ".fasta"; remove(filename.c_str()); } - } - - } - - delete list; - list = input->getListVector(); - count++; - } - - return 0; - } - catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the GetRepSeqsCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } - catch(...) { - cout << "An unknown error has occurred in the GetRepSeqsCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } -} - -//********************************************************************************************************************** -void GetRepSeqsCommand::readNamesFile() { - try { - vector dupNames; - openInputFile(namesfile, inNames); - - string name, names, sequence; - - while(inNames){ - inNames >> name; //read from first column A - inNames >> names; //read from second column A,B,C,D - - dupNames.clear(); - - //parse names into vector - splitAtComma(names, dupNames); - - //store names in fasta map - sequence = fasta->getSequence(name); - for (int i = 0; i < dupNames.size(); i++) { - fasta->push_back(dupNames[i], sequence); - } - - gobble(inNames); - } - inNames.close(); - - } - catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the GetRepSeqsCommand class Function readNamesFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } - catch(...) { - cout << "An unknown error has occurred in the GetRepSeqsCommand class function readNamesFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } -} -//********************************************************************************************************************** -void GetRepSeqsCommand::removeFiles(string label) { - try { - //close ostreams - for (it = filehandles.begin(); it != filehandles.end(); it++) { - it->second->close(); - } - - //remove output files because there was an error - for (int i=0; igetNumGroups(); i++) { - string outputFileName = fastafile + groupMap->namesOfGroups[i] + label + ".fasta"; - remove(outputFileName.c_str()); - } - string outputFileName = fastafile + "shared"+ label + ".fasta"; - remove(outputFileName.c_str()); - - } - catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the GetRepSeqsCommand class Function removeFiles. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } - catch(...) { - cout << "An unknown error has occurred in the GetRepSeqsCommand class function removeFiles. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } -} - -//********************************************************************************************************************** - diff --git a/getrepseqscommand.h b/getrepseqscommand.h deleted file mode 100644 index 970b65c..0000000 --- a/getrepseqscommand.h +++ /dev/null @@ -1,50 +0,0 @@ -#ifndef GETREPSEQSCOMMAND_H -#define GETREPSEQSCOMMAND_H -/* - * getrepseqscommand.h - * Mothur - * - * Created by Sarah Westcott on 5/19/09. - * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. - * - */ - - -#include "command.hpp" -#include "inputdata.h" -#include "listvector.hpp" -#include "readotu.h" -#include "fastamap.h" -#include "groupmap.h" - - -class GlobalData; - -class GetRepSeqsCommand : public Command { - -public: - GetRepSeqsCommand(); - ~GetRepSeqsCommand(); - int execute(); - -private: - GlobalData* globaldata; - ListVector* list; - ReadOTUFile* read; - GroupMap* groupMap; - InputData* input; - FastaMap* fasta; - string filename, fastafile, namesfile; - map filehandles; - map::iterator it; - map used; //group, if it had any unique otus - map::iterator it2; - map seq; - map::iterator it3; - ifstream in, inNames; - - void readNamesFile(); - void removeFiles(string); -}; - -#endif diff --git a/helpcommand.cpp b/helpcommand.cpp index 72ab80d..6abe7af 100644 --- a/helpcommand.cpp +++ b/helpcommand.cpp @@ -301,32 +301,23 @@ int HelpCommand::execute(){ cout << "is reported with its percentage, as well as the other pairs that were seen for that node but not used and their percentages." << "\n" << "\n"; }else if (globaldata->helpRequest == "bin.seqs") { cout << "The bin.seqs command can only be executed after a successful read.otu command of a list file." << "\n"; - cout << "The bin.seqs command parameters are fasta, name, line and label. The fasta parameter is required, and you may not use line and label at the same time." << "\n"; + cout << "The bin.seqs command parameters are fasta, name, line, label and group. The fasta parameter is required, and you may not use line and label at the same time." << "\n"; cout << "The line and label allow you to select what distance levels you would like a output files created for, and are separated by dashes." << "\n"; - cout << "The bin.seqs command should be in the following format: bin.seqs(fasta=yourFastaFile, name=yourNamesFile, line=yourLines, label=yourLabels)." << "\n"; - cout << "Example bin.seqs(fasta=amazon.fasta, line=1-3-5, name=amazon.names)." << "\n"; + cout << "The bin.seqs command should be in the following format: bin.seqs(fasta=yourFastaFile, name=yourNamesFile, group=yourGroupFile, line=yourLines, label=yourLabels)." << "\n"; + cout << "Example bin.seqs(fasta=amazon.fasta, group=amazon.groups, line=1-3-5, name=amazon.names)." << "\n"; cout << "The default value for line and label are all lines in your inputfile." << "\n"; cout << "The bin.seqs command outputs a .fasta file for each distance you specify appending the OTU number to each name." << "\n"; - cout << "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile)." << "\n" << "\n"; - }else if (globaldata->helpRequest == "get.repseqs") { - cout << "The get.repseqs command can only be executed after a successful read.otu command of a list file." << "\n"; - cout << "The get.repseqs command parameters are fasta, name, group, line and label. The fasta and group parameters are required, and you may not use line and label at the same time." << "\n"; - cout << "The line and label allow you to select what distance levels you would like a output files created for, and are separated by dashes." << "\n"; - cout << "The get.repseqss command should be in the following format: get.repseqs(fasta=yourFastaFile, name=yourNamesFile, group=yourGroupfile, line=yourLines, label=yourLabels)." << "\n"; - cout << "Example get.repseqs(fasta=amazon.fasta, group=amazon.groups, line=1-3-5, name=amazon.names)." << "\n"; - cout << "The default value for line and label are all lines in your inputfile." << "\n"; - cout << "The get.repseqs command outputs several .fasta files for each distance you specify. " << "\n"; - cout << "If the distance level you choose has bins that contain only sequences unique to a specific group those sequences are outputted to a file for that group." << "\n"; - cout << "If the bin contains sequences from multiple groups then the bin is outputted to the shared fasta file." << "\n"; + cout << "If you provide a groupfile, then it also appends the sequences group to the name." << "\n"; cout << "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile)." << "\n" << "\n"; }else if (globaldata->helpRequest == "get.oturep") { cout << "The get.oturep command can only be executed after a successful read.dist command." << "\n"; - cout << "The get.oturep command parameters are list, fasta, name, line and label. The fasta and list parameters are required, and you may not use line and label at the same time." << "\n"; + cout << "The get.oturep command parameters are list, fasta, name, group, line and label. The fasta and list parameters are required, and you may not use line and label at the same time." << "\n"; cout << "The line and label allow you to select what distance levels you would like a output files created for, and are separated by dashes." << "\n"; - cout << "The get.oturep command should be in the following format: get.oturep(fasta=yourFastaFile, list=yourListFile, name=yourNamesFile, line=yourLines, label=yourLabels)." << "\n"; - cout << "Example get.oturep(fasta=amazon.fasta, list=amazon.fn.list, line=1-3-5, name=amazon.names)." << "\n"; + cout << "The get.oturep command should be in the following format: get.oturep(fasta=yourFastaFile, list=yourListFile, name=yourNamesFile, group=yourGroupFile, line=yourLines, label=yourLabels)." << "\n"; + cout << "Example get.oturep(fasta=amazon.fasta, list=amazon.fn.list, group=amazon.groups, line=1-3-5, name=amazon.names)." << "\n"; cout << "The default value for line and label are all lines in your inputfile." << "\n"; cout << "The get.oturep command outputs a .fastarep file for each distance you specify, selecting one OTU representative for each bin." << "\n"; + cout << "If you provide a groupfile, then it also appends the names of the groups present in that bin." << "\n"; cout << "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile)." << "\n" << "\n"; }else if (globaldata->helpRequest == "quit") { cout << "The quit command will terminate mothur and should be in the following format: " << "\n"; diff --git a/validcommands.cpp b/validcommands.cpp index 787cc80..7f7acc2 100644 --- a/validcommands.cpp +++ b/validcommands.cpp @@ -19,7 +19,6 @@ ValidCommands::ValidCommands() { commands["read.tree"] = "read.tree"; commands["read.seqs"] = "read.seqs"; commands["bin.seqs"] = "bin.seqs"; - commands["get.repseqs"] = "get.repseqs"; commands["get.oturep"] = "get.oturep"; commands["cluster"] = "cluster"; commands["deconvolute"] = "deconvolute"; diff --git a/validparameter.cpp b/validparameter.cpp index 5da87d1..6719e2c 100644 --- a/validparameter.cpp +++ b/validparameter.cpp @@ -273,16 +273,13 @@ void ValidParameters::initCommandParameters() { string vennArray[] = {"groups","line","label","calc"}; commandParameters["venn"] = addParameters(vennArray, sizeof(vennArray)/sizeof(string)); - string binseqsArray[] = {"fasta","line","label","name"}; + string binseqsArray[] = {"fasta","line","label","name", "group"}; commandParameters["bin.seqs"] = addParameters(binseqsArray, sizeof(binseqsArray)/sizeof(string)); string distsharedArray[] = {"line","label","calc","groups"}; commandParameters["dist.shared"] = addParameters(distsharedArray, sizeof(distsharedArray)/sizeof(string)); - string getrepseqsArray[] = {"fasta","line","label","name", "group"}; - commandParameters["get.repseqs"] = addParameters(getrepseqsArray, sizeof(getrepseqsArray)/sizeof(string)); - - string getOTURepArray[] = {"fasta","list","line","label","name"}; + string getOTURepArray[] = {"fasta","list","line","label","name", "group"}; commandParameters["get.oturep"] = addParameters(getOTURepArray, sizeof(getOTURepArray)/sizeof(string)); string treeGroupsArray[] = {"line","label","calc","groups"}; -- 2.39.2