From 96f25c3c0f3ba2a1634094de146630dbc9ee0d03 Mon Sep 17 00:00:00 2001 From: westcott Date: Fri, 7 Oct 2011 11:07:20 +0000 Subject: [PATCH] testing 1.22.0 --- clustersplitcommand.cpp | 12 ++++++------ commandfactory.cpp | 8 ++++---- listvector.hpp | 3 ++- preclustercommand.cpp | 9 ++++----- qualityscores.h | 2 ++ rabundvector.hpp | 3 ++- sabundvector.hpp | 3 ++- sequence.hpp | 3 +++ sequenceparser.cpp | 10 ++++++++++ sharedrabundvector.h | 3 ++- shhhercommand.cpp | 2 +- shhhercommand.h | 8 ++++---- trimflowscommand.h | 4 ++-- 13 files changed, 44 insertions(+), 26 deletions(-) diff --git a/clustersplitcommand.cpp b/clustersplitcommand.cpp index dc913d8..e9b3077 100644 --- a/clustersplitcommand.cpp +++ b/clustersplitcommand.cpp @@ -24,13 +24,13 @@ vector ClusterSplitCommand::setParameters(){ CommandParameter pfasta("fasta", "InputTypes", "", "", "PhylipColumnFasta", "PhylipColumnFasta", "FastaTaxName",false,false); parameters.push_back(pfasta); CommandParameter pname("name", "InputTypes", "", "", "none", "none", "ColumnName-FastaTaxName",false,false); parameters.push_back(pname); CommandParameter pcolumn("column", "InputTypes", "", "", "PhylipColumnFasta", "PhylipColumnFasta", "ColumnName",false,false); parameters.push_back(pcolumn); - CommandParameter ptaxlevel("taxlevel", "Number", "", "1", "", "", "",false,false); parameters.push_back(ptaxlevel); + CommandParameter ptaxlevel("taxlevel", "Number", "", "3", "", "", "",false,false); parameters.push_back(ptaxlevel); CommandParameter psplitmethod("splitmethod", "Multiple", "classify-fasta-distance", "distance", "", "", "",false,false); parameters.push_back(psplitmethod); CommandParameter plarge("large", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(plarge); CommandParameter pshowabund("showabund", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pshowabund); CommandParameter ptiming("timing", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(ptiming); CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors); - CommandParameter pcutoff("cutoff", "Number", "", "10", "", "", "",false,false); parameters.push_back(pcutoff); + CommandParameter pcutoff("cutoff", "Number", "", "0.25", "", "", "",false,false); parameters.push_back(pcutoff); CommandParameter pprecision("precision", "Number", "", "100", "", "", "",false,false); parameters.push_back(pprecision); CommandParameter pmethod("method", "Multiple", "furthest-nearest-average-weighted", "average", "", "", "",false,false); parameters.push_back(pmethod); CommandParameter phard("hard", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(phard); @@ -60,12 +60,12 @@ string ClusterSplitCommand::getHelpString(){ helpString += "The phylip and column parameter allow you to enter your distance file. \n"; helpString += "The fasta parameter allows you to enter your aligned fasta file. \n"; helpString += "The name parameter allows you to enter your name file and is required if your distance file is in column format. \n"; - helpString += "The cutoff parameter allow you to set the distance you want to cluster to, default is 10.0. \n"; + helpString += "The cutoff parameter allow you to set the distance you want to cluster to, default is 0.25. \n"; helpString += "The precision parameter allows you specify the precision of the precision of the distances outputted, default=100, meaning 2 decimal places. \n"; helpString += "The method allows you to specify what clustering algorythm you want to use, default=average, option furthest, nearest, or average. \n"; helpString += "The splitmethod parameter allows you to specify how you want to split your distance file before you cluster, default=distance, options distance, classify or fasta. \n"; helpString += "The taxonomy parameter allows you to enter the taxonomy file for your sequences, this is only valid if you are using splitmethod=classify. Be sure your taxonomy file does not include the probability scores. \n"; - helpString += "The taxlevel parameter allows you to specify the taxonomy level you want to use to split the distance file, default=1, meaning use the first taxon in each list. \n"; + helpString += "The taxlevel parameter allows you to specify the taxonomy level you want to use to split the distance file, default=3, meaning use the first taxon in each list. \n"; helpString += "The large parameter allows you to indicate that your distance matrix is too large to fit in RAM. The default value is false.\n"; #ifdef USE_MPI helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n"; @@ -282,11 +282,11 @@ ClusterSplitCommand::ClusterSplitCommand(string option) { else { splitmethod = temp; } } - temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "10"; } + temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "0.25"; } convert(temp, cutoff); cutoff += (5 / (precision * 10.0)); - temp = validParameter.validFile(parameters, "taxlevel", false); if (temp == "not found") { temp = "1"; } + temp = validParameter.validFile(parameters, "taxlevel", false); if (temp == "not found") { temp = "3"; } convert(temp, taxLevelCutoff); method = validParameter.validFile(parameters, "method", false); if (method == "not found") { method = "average"; } diff --git a/commandfactory.cpp b/commandfactory.cpp index b826167..2fc6875 100644 --- a/commandfactory.cpp +++ b/commandfactory.cpp @@ -263,7 +263,7 @@ CommandFactory::CommandFactory(){ commands["screen.seqs"] = "MPIEnabled"; commands["summary.seqs"] = "MPIEnabled"; commands["cluster.split"] = "MPIEnabled"; - commands["shhh.seqs"] = "MPIEnabled"; + commands["shhh.flows"] = "MPIEnabled"; commands["sens.spec"] = "sens.spec"; commands["seq.error"] = "seq.error"; commands["seq.error"] = "summary.tax"; @@ -349,7 +349,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString){ else if(commandName == "reverse.seqs") { command = new ReverseSeqsCommand(optionString); } else if(commandName == "trim.seqs") { command = new TrimSeqsCommand(optionString); } else if(commandName == "trim.flows") { command = new TrimFlowsCommand(optionString); } - else if(commandName == "shhh.seqs") { command = new ShhherCommand(optionString); } + else if(commandName == "shhh.flows") { command = new ShhherCommand(optionString); } else if(commandName == "list.seqs") { command = new ListSeqsCommand(optionString); } else if(commandName == "get.seqs") { command = new GetSeqsCommand(optionString); } else if(commandName == "remove.seqs") { command = new RemoveSeqsCommand(optionString); } @@ -491,7 +491,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString, str else if(commandName == "reverse.seqs") { pipecommand = new ReverseSeqsCommand(optionString); } else if(commandName == "trim.seqs") { pipecommand = new TrimSeqsCommand(optionString); } else if(commandName == "trim.flows") { pipecommand = new TrimFlowsCommand(optionString); } - else if(commandName == "shhh.seqs") { pipecommand = new ShhherCommand(optionString); } + else if(commandName == "shhh.flows") { pipecommand = new ShhherCommand(optionString); } else if(commandName == "list.seqs") { pipecommand = new ListSeqsCommand(optionString); } else if(commandName == "get.seqs") { pipecommand = new GetSeqsCommand(optionString); } else if(commandName == "remove.seqs") { pipecommand = new RemoveSeqsCommand(optionString); } @@ -620,7 +620,7 @@ Command* CommandFactory::getCommand(string commandName){ else if(commandName == "reverse.seqs") { shellcommand = new ReverseSeqsCommand(); } else if(commandName == "trim.seqs") { shellcommand = new TrimSeqsCommand(); } else if(commandName == "trim.flows") { shellcommand = new TrimFlowsCommand(); } - else if(commandName == "shhh.seqs") { shellcommand = new ShhherCommand(); } + else if(commandName == "shhh.flows") { shellcommand = new ShhherCommand(); } else if(commandName == "list.seqs") { shellcommand = new ListSeqsCommand(); } else if(commandName == "get.seqs") { shellcommand = new GetSeqsCommand(); } else if(commandName == "remove.seqs") { shellcommand = new RemoveSeqsCommand(); } diff --git a/listvector.hpp b/listvector.hpp index 801537d..dcf01a0 100644 --- a/listvector.hpp +++ b/listvector.hpp @@ -3,7 +3,8 @@ #include "datavector.hpp" -/* This class is a child to datavector. It represents OTU information at a certain distance. +/* DataStructure for a list file. + This class is a child to datavector. It represents OTU information at a certain distance. A list vector can be converted into and ordervector, rabundvector or sabundvector. Each member of the internal container "data" represents an individual OTU. So data[0] = "a,b,c,d,e,f". diff --git a/preclustercommand.cpp b/preclustercommand.cpp index 74eddbe..4b04529 100644 --- a/preclustercommand.cpp +++ b/preclustercommand.cpp @@ -227,7 +227,7 @@ int PreClusterCommand::execute(){ if (m->control_pressed) { delete parser; m->mothurRemove(newFastaFile); m->mothurRemove(newNamesFile); return 0; } - m->mothurOut("Total number of sequences before precluster was " + toString(alignSeqs.size()) + "."); m->mothurOutEndLine(); + m->mothurOut("Total number of sequences before pre.cluster was " + toString(alignSeqs.size()) + "."); m->mothurOutEndLine(); m->mothurOut("pre.cluster removed " + toString(count) + " sequences."); m->mothurOutEndLine(); m->mothurOutEndLine(); printData(newFastaFile, newNamesFile); @@ -253,9 +253,8 @@ int PreClusterCommand::execute(){ m->mothurOut("/******************************************/"); m->mothurOutEndLine(); - newNamesFile = filenames["name"][0]; - newFastaFile = filenames["fasta"][0]; - + m->renameFile(filenames["fasta"][0], newFastaFile); + }else { if (namefile != "") { readNameFile(); } @@ -436,7 +435,7 @@ int PreClusterCommand::loadSeqs(map& thisName, vector& if (it == thisName.end()) { m->mothurOut(thisSeqs[i].getName() + " is not in your names file, please correct."); m->mothurOutEndLine(); error = true; } else{ //get number of reps - int numReps = 0; + int numReps = 1; for(int j=0;j<(it->second).length();j++){ if((it->second)[j] == ','){ numReps++; } } diff --git a/qualityscores.h b/qualityscores.h index e522938..8183d51 100644 --- a/qualityscores.h +++ b/qualityscores.h @@ -10,6 +10,8 @@ * */ +//DataStructure for a quality file. + #include "mothur.h" #include "mothurout.h" diff --git a/rabundvector.hpp b/rabundvector.hpp index 9516564..c722229 100644 --- a/rabundvector.hpp +++ b/rabundvector.hpp @@ -3,7 +3,8 @@ #include "datavector.hpp" -/* This class is a child to datavector. It represents OTU information at a certain distance. +/* Data Structure for a rabund file. + This class is a child to datavector. It represents OTU information at a certain distance. A rabundvector can be converted into and ordervector, listvector or sabundvector. Each member of the internal container "data" represents an individual OTU. So data[0] = 6, because there are six member in that OTU. diff --git a/sabundvector.hpp b/sabundvector.hpp index 561294c..666e94f 100644 --- a/sabundvector.hpp +++ b/sabundvector.hpp @@ -7,7 +7,8 @@ #include "calculator.h" -/* This class is a child to datavector. It represents OTU information at a certain distance. +/* Data Structure for a sabund file. + This class is a child to datavector. It represents OTU information at a certain distance. A sabundvector can be converted into and ordervector, listvector or rabundvector. Each member of the internal container "data" represents the number of OTU's with that many members, but staring at 1. So data[1] = 2, because there are two OTUs with 1 member. diff --git a/sequence.hpp b/sequence.hpp index b433740..224ae5d 100644 --- a/sequence.hpp +++ b/sequence.hpp @@ -14,6 +14,9 @@ * */ + +//Data Structure for a fasta file. + #include "mothur.h" #include "mothurout.h" diff --git a/sequenceparser.cpp b/sequenceparser.cpp index e60f19b..76c90c3 100644 --- a/sequenceparser.cpp +++ b/sequenceparser.cpp @@ -63,6 +63,7 @@ SequenceParser::SequenceParser(string groupFile, string fastaFile, string nameFi string first, second; int countName = 0; + set thisnames1; while(!inName.eof()) { if (m->control_pressed) { break; } @@ -94,10 +95,12 @@ SequenceParser::SequenceParser(string groupFile, string fastaFile, string nameFi it = splitMap.find(group); if (it != splitMap.end()) { //adding seqs to this group (it->second) += "," + names[i]; + thisnames1.insert(names[i]); countName++; }else { //first sighting of this group splitMap[group] = names[i]; countName++; + thisnames1.insert(names[i]); //is this seq in the fasta file? if (i != 0) { //if not then we need to add a duplicate sequence to the seqs for this group so the new "fasta" and "name" files will match @@ -131,6 +134,13 @@ SequenceParser::SequenceParser(string groupFile, string fastaFile, string nameFi if (error == 1) { m->control_pressed = true; } if (countName != (groupMap->getNumSeqs())) { + vector groupseqsnames = groupMap->getNamesSeqs(); + for (int i = 0; i < groupseqsnames.size(); i++) { + set::iterator itnamesfile = thisnames1.find(groupseqsnames[i]); + if (itnamesfile == thisnames1.end()){ + cout << "missing name " + groupseqsnames[i] << '\t' << allSeqsMap[groupseqsnames[i]] << endl; + } + } m->mothurOutEndLine(); m->mothurOut("[ERROR]: Your name file contains " + toString(countName) + " valid sequences, and your groupfile contains " + toString(groupMap->getNumSeqs()) + ", please correct."); m->mothurOutEndLine(); diff --git a/sharedrabundvector.h b/sharedrabundvector.h index b86c884..792543e 100644 --- a/sharedrabundvector.h +++ b/sharedrabundvector.h @@ -17,7 +17,8 @@ #include "rabundvector.hpp" //#include "groupmap.h" -/* This class is a child to datavector. It represents OTU information at a certain distance. +/* DataStructure for a shared file. + This class is a child to datavector. It represents OTU information at a certain distance. It is similiar to an rabundvector except each member of data knows which group it belongs to. Each member of the internal container "data" is a struct of type individual. An individual which knows the OTU from which it came, diff --git a/shhhercommand.cpp b/shhhercommand.cpp index a40d774..3c46793 100644 --- a/shhhercommand.cpp +++ b/shhhercommand.cpp @@ -46,7 +46,7 @@ vector ShhherCommand::setParameters(){ string ShhherCommand::getHelpString(){ try { string helpString = ""; - helpString += "The shhh.seqs command reads a file containing flowgrams and creates a file of corrected sequences.\n"; + helpString += "The shhh.flows command reads a file containing flowgrams and creates a file of corrected sequences.\n"; return helpString; } catch(exception& e) { diff --git a/shhhercommand.h b/shhhercommand.h index 00bd41a..4d9bcbb 100644 --- a/shhhercommand.h +++ b/shhhercommand.h @@ -31,11 +31,11 @@ public: ~ShhherCommand() {} vector setParameters(); - string getCommandName() { return "shhh.seqs"; } - string getCommandCategory() { return "Hidden"; } + string getCommandName() { return "shhh.flows"; } + string getCommandCategory() { return "Sequence Processing"; } string getHelpString(); - string getCitation() { return "no citation"; } - string getDescription() { return "shhh.seqs"; } + string getCitation() { return "http://www.mothur.org/wiki/Shhh.flows"; } + string getDescription() { return "shhh.flows"; } int execute(); diff --git a/trimflowscommand.h b/trimflowscommand.h index ab8ca91..e3a2814 100644 --- a/trimflowscommand.h +++ b/trimflowscommand.h @@ -25,9 +25,9 @@ public: vector setParameters(); string getCommandName() { return "trim.flows"; } - string getCommandCategory() { return "Hidden"; } + string getCommandCategory() { return "Sequence Processing"; } string getHelpString(); - string getCitation() { return "no citation"; } + string getCitation() { return "http://www.mothur.org/wiki/Trim.flows"; } string getDescription() { return "trim.flows"; } -- 2.39.2