From 0470f6d037aacb3563c3f7010708120a4a67d4e6 Mon Sep 17 00:00:00 2001 From: westcott Date: Fri, 12 Jun 2009 19:41:27 +0000 Subject: [PATCH] broke up globaldata and moved error checking and help into commands --- Mothur.xcodeproj/project.pbxproj | 16 +- aligncommand.cpp | 126 ++++-- aligncommand.h | 15 +- binsequencecommand.cpp | 138 ++++++- binsequencecommand.h | 13 +- bootstrapsharedcommand.cpp | 206 +++++++--- bootstrapsharedcommand.h | 12 +- clustercommand.cpp | 125 ++++-- clustercommand.h | 9 +- collectcommand.cpp | 212 +++++++--- collectcommand.h | 12 +- collectdisplay.h | 2 +- collectsharedcommand.cpp | 221 ++++++++--- collectsharedcommand.h | 18 +- command.hpp | 3 + commandfactory.cpp | 83 ++-- commandfactory.hpp | 2 +- commandoptionparser.cpp | 16 +- commandoptionparser.hpp | 3 +- concensuscommand.cpp | 41 +- concensuscommand.h | 6 +- deconvolutecommand.cpp | 70 +++- deconvolutecommand.h | 10 +- distancecommand.cpp | 146 +++++-- distancecommand.h | 11 +- engine.cpp | 67 ++-- engine.hpp | 1 - errorchecking.cpp | 654 ------------------------------- errorchecking.h | 44 --- fileoutput.cpp | 4 +- fileoutput.h | 3 +- filterseqscommand.cpp | 127 ++++-- filterseqscommand.h | 10 +- getgroupcommand.cpp | 53 ++- getgroupcommand.h | 4 +- getlabelcommand.cpp | 37 +- getlabelcommand.h | 4 +- getlinecommand.cpp | 37 +- getlinecommand.h | 4 +- getoturepcommand.cpp | 177 +++++++-- getoturepcommand.h | 16 +- getrabundcommand.cpp | 95 ++++- getrabundcommand.h | 12 +- getsabundcommand.cpp | 95 ++++- getsabundcommand.h | 11 +- globaldata.cpp | 569 ++------------------------- globaldata.hpp | 69 +--- heatmap.cpp | 11 +- heatmap.h | 2 +- heatmapcommand.cpp | 123 +++++- heatmapcommand.h | 13 +- heatmapsimcommand.cpp | 172 ++++++-- heatmapsimcommand.h | 12 +- helpcommand.cpp | 339 +--------------- helpcommand.h | 6 +- libshuffcommand.cpp | 112 +++++- libshuffcommand.h | 11 +- matrixoutputcommand.cpp | 175 +++++++-- matrixoutputcommand.h | 13 +- nocommands.cpp | 9 +- nocommands.h | 3 +- optionparser.cpp | 40 ++ optionparser.h | 30 ++ parselistcommand.h | 3 +- parsimonycommand.cpp | 135 +++++-- parsimonycommand.h | 10 +- quitcommand.cpp | 27 +- quitcommand.h | 5 +- rarefactcommand.cpp | 176 +++++++-- rarefactcommand.h | 14 +- rarefactsharedcommand.cpp | 151 +++++-- rarefactsharedcommand.h | 18 +- readdistcommand.cpp | 141 +++++-- readdistcommand.h | 9 +- readotu.cpp | 2 + readotucommand.cpp | 140 ++++++- readotucommand.h | 12 +- readtreecommand.cpp | 79 +++- readtreecommand.h | 10 +- reversecommand.cpp | 55 ++- reversecommand.h | 9 +- screenseqscommand.cpp | 117 +++++- screenseqscommand.h | 10 +- seqsummarycommand.cpp | 58 ++- seqsummarycommand.h | 9 +- sharedcommand.h | 1 + sharedrabundvector.cpp | 2 +- sharedutilities.cpp | 1 + shen.cpp | 3 - shen.h | 3 +- summarycommand.cpp | 212 +++++++--- summarycommand.h | 12 +- summarysharedcommand.cpp | 222 ++++++++--- summarysharedcommand.h | 21 +- tree.cpp | 140 +++++++ tree.h | 6 +- treegroupscommand.cpp | 225 ++++++++--- treegroupscommand.h | 14 +- trimseqscommand.cpp | 109 ++++-- trimseqscommand.h | 9 +- unifracunweightedcommand.cpp | 108 ++++- unifracunweightedcommand.h | 12 +- unifracweightedcommand.cpp | 103 ++++- unifracweightedcommand.h | 11 +- validparameter.cpp | 149 ++----- validparameter.h | 7 +- venn.cpp | 37 +- venncommand.cpp | 197 +++++++--- venncommand.h | 14 +- 109 files changed, 4619 insertions(+), 2869 deletions(-) delete mode 100644 errorchecking.cpp delete mode 100644 errorchecking.h create mode 100644 optionparser.cpp create mode 100644 optionparser.h diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj index 3f1cd99..32077f3 100644 --- a/Mothur.xcodeproj/project.pbxproj +++ b/Mothur.xcodeproj/project.pbxproj @@ -58,6 +58,7 @@ 377326650FAF16E0007ABB8B /* concensuscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 377326630FAF16E0007ABB8B /* concensuscommand.cpp */; }; 378598660FDD497000EF9D03 /* heatmapsimcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 378598650FDD497000EF9D03 /* heatmapsimcommand.cpp */; }; 378598740FDD4C1500EF9D03 /* heatmapsim.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 378598730FDD4C1500EF9D03 /* heatmapsim.cpp */; }; + 378599100FDD7E8E00EF9D03 /* optionparser.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3785990F0FDD7E8E00EF9D03 /* optionparser.cpp */; }; 378C1B030FB0644E004D63F5 /* filterseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 378C1AEE0FB0644D004D63F5 /* filterseqscommand.cpp */; }; 378C1B040FB0644E004D63F5 /* goodscoverage.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 378C1AF00FB0644D004D63F5 /* goodscoverage.cpp */; }; 378C1B0A0FB0644E004D63F5 /* sequencedb.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 378C1AFD0FB0644D004D63F5 /* sequencedb.cpp */; }; @@ -90,7 +91,6 @@ 37D928610F21331F001D4494 /* completelinkage.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 37D927D20F21331F001D4494 /* completelinkage.cpp */; }; 37D928620F21331F001D4494 /* database.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 37D927D30F21331F001D4494 /* database.cpp */; }; 37D928630F21331F001D4494 /* engine.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 37D927D70F21331F001D4494 /* engine.cpp */; }; - 37D928640F21331F001D4494 /* errorchecking.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 37D927D90F21331F001D4494 /* errorchecking.cpp */; }; 37D928650F21331F001D4494 /* fastamap.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 37D927DB0F21331F001D4494 /* fastamap.cpp */; }; 37D928660F21331F001D4494 /* fileoutput.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 37D927DD0F21331F001D4494 /* fileoutput.cpp */; }; 37D928670F21331F001D4494 /* globaldata.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 37D927DF0F21331F001D4494 /* globaldata.cpp */; }; @@ -281,6 +281,8 @@ 378598650FDD497000EF9D03 /* heatmapsimcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = heatmapsimcommand.cpp; sourceTree = ""; }; 378598720FDD4C1500EF9D03 /* heatmapsim.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = heatmapsim.h; sourceTree = ""; }; 378598730FDD4C1500EF9D03 /* heatmapsim.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = heatmapsim.cpp; sourceTree = ""; }; + 3785990E0FDD7E8E00EF9D03 /* optionparser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = optionparser.h; sourceTree = ""; }; + 3785990F0FDD7E8E00EF9D03 /* optionparser.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = optionparser.cpp; sourceTree = ""; }; 378C1AEE0FB0644D004D63F5 /* filterseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = filterseqscommand.cpp; sourceTree = SOURCE_ROOT; }; 378C1AEF0FB0644D004D63F5 /* filterseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = filterseqscommand.h; sourceTree = SOURCE_ROOT; }; 378C1AF00FB0644D004D63F5 /* goodscoverage.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = goodscoverage.cpp; sourceTree = SOURCE_ROOT; }; @@ -356,8 +358,6 @@ 37D927D60F21331F001D4494 /* display.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = display.h; sourceTree = SOURCE_ROOT; }; 37D927D70F21331F001D4494 /* engine.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = engine.cpp; sourceTree = SOURCE_ROOT; }; 37D927D80F21331F001D4494 /* engine.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = engine.hpp; sourceTree = SOURCE_ROOT; }; - 37D927D90F21331F001D4494 /* errorchecking.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = errorchecking.cpp; sourceTree = SOURCE_ROOT; }; - 37D927DA0F21331F001D4494 /* errorchecking.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = errorchecking.h; sourceTree = SOURCE_ROOT; }; 37D927DB0F21331F001D4494 /* fastamap.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = fastamap.cpp; sourceTree = SOURCE_ROOT; }; 37D927DC0F21331F001D4494 /* fastamap.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = fastamap.h; sourceTree = SOURCE_ROOT; }; 37D927DD0F21331F001D4494 /* fileoutput.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = fileoutput.cpp; sourceTree = SOURCE_ROOT; }; @@ -582,6 +582,8 @@ 373C68E40FC1C4A500137ACD /* noalign.hpp */, 373C68E30FC1C4A500137ACD /* noalign.cpp */, 37D927F60F21331F001D4494 /* observable.h */, + 3785990E0FDD7E8E00EF9D03 /* optionparser.h */, + 3785990F0FDD7E8E00EF9D03 /* optionparser.cpp */, 373C68C40FC1C25F00137ACD /* overlap.hpp */, 373C68C30FC1C25F00137ACD /* overlap.cpp */, 37D927FC0F21331F001D4494 /* progress.hpp */, @@ -789,8 +791,8 @@ 375873F30F7D648F0040F377 /* libshuffcommand.cpp */, 21E859D60FC4632E005E1A48 /* matrixoutputcommand.h */, 21E859D70FC4632E005E1A48 /* matrixoutputcommand.cpp */, - 375873F60F7D649C0040F377 /* nocommands.cpp */, 375873F70F7D649C0040F377 /* nocommands.h */, + 375873F60F7D649C0040F377 /* nocommands.cpp */, 37D927FA0F21331F001D4494 /* parselistcommand.h */, 37D927F90F21331F001D4494 /* parselistcommand.cpp */, 3792946E0F2E191800B9034A /* parsimonycommand.h */, @@ -811,8 +813,8 @@ 7E09C5130FDA79C5002ECAE5 /* reversecommand.cpp */, 371B30B30FD7EE67000414CA /* screenseqscommand.h */, 371B30B20FD7EE67000414CA /* screenseqscommand.cpp */, - 3799A94E0FD6A58C00E33EDE /* seqsummarycommand.cpp */, 3799A94F0FD6A58C00E33EDE /* seqsummarycommand.h */, + 3799A94E0FD6A58C00E33EDE /* seqsummarycommand.cpp */, 37D928270F21331F001D4494 /* sharedcommand.h */, 37D928260F21331F001D4494 /* sharedcommand.cpp */, 37D928470F21331F001D4494 /* summarycommand.h */, @@ -898,8 +900,6 @@ 37D928B10F213472001D4494 /* errorcheckor */ = { isa = PBXGroup; children = ( - 37D927DA0F21331F001D4494 /* errorchecking.h */, - 37D927D90F21331F001D4494 /* errorchecking.cpp */, 37D928500F21331F001D4494 /* validcalculator.h */, 37D9284F0F21331F001D4494 /* validcalculator.cpp */, 37D928520F21331F001D4494 /* validcommands.h */, @@ -975,7 +975,6 @@ 37D928610F21331F001D4494 /* completelinkage.cpp in Sources */, 37D928620F21331F001D4494 /* database.cpp in Sources */, 37D928630F21331F001D4494 /* engine.cpp in Sources */, - 37D928640F21331F001D4494 /* errorchecking.cpp in Sources */, 37D928650F21331F001D4494 /* fastamap.cpp in Sources */, 37D928660F21331F001D4494 /* fileoutput.cpp in Sources */, 37D928670F21331F001D4494 /* globaldata.cpp in Sources */, @@ -1115,6 +1114,7 @@ 7E09C5360FDA7F65002ECAE5 /* trimseqscommand.cpp in Sources */, 378598660FDD497000EF9D03 /* heatmapsimcommand.cpp in Sources */, 378598740FDD4C1500EF9D03 /* heatmapsim.cpp in Sources */, + 378599100FDD7E8E00EF9D03 /* optionparser.cpp in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; diff --git a/aligncommand.cpp b/aligncommand.cpp index 33c06d4..4b9dedc 100644 --- a/aligncommand.cpp +++ b/aligncommand.cpp @@ -38,19 +38,69 @@ //********************************************************************************************************************** -AlignCommand::AlignCommand(){ +AlignCommand::AlignCommand(string option){ try { globaldata = GlobalData::getInstance(); - if(globaldata->getFastaFile() == ""){ - cout << "you forgot a template file" << endl; - } - openInputFile(globaldata->getCandidateFile(), in); + abort = false; + + //allow user to run help + if(option == "help") { help(); abort = true; } + + else { + //valid paramters for this command + string AlignArray[] = {"fasta","candidate","search","ksize","align","match","mismatch","gapopen","gapextend"}; + vector myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string))); + + parser = new OptionParser(); + + parser->parse(option, parameters); delete parser; + + ValidParameters* validParameter = new ValidParameters(); + + //check to make sure all parameters are valid for command + for (it = parameters.begin(); it != parameters.end(); it++) { + if (validParameter->isValidParameter(it->first, myArray, it->second) != true) { abort = true; } + } + + //check for required parameters + templateFileName = validParameter->validFile(parameters, "fasta", true); + if (templateFileName == "not found") { cout << "fasta is a required parameter for the align.seqs command." << endl; abort = true; } + else if (templateFileName == "not open") { abort = true; } + else { globaldata->setFastaFile(templateFileName); } + + candidateFileName = validParameter->validFile(parameters, "candidate", true); + if (candidateFileName == "not found") { cout << "candidate is a required parameter for the align.seqs command." << endl; abort = true; } + else if (candidateFileName == "not open") { abort = true; } + else { + globaldata->setCandidateFile(candidateFileName); + openInputFile(candidateFileName, in); + } + + + //check for optional parameter and set defaults + // ...at some point should added some additional type checking... + string temp; + temp = validParameter->validFile(parameters, "ksize", false); if (temp == "not found") { temp = "8"; } + convert(temp, kmerSize); + + temp = validParameter->validFile(parameters, "match", false); if (temp == "not found") { temp = "1.0"; } + convert(temp, match); + + temp = validParameter->validFile(parameters, "mismatch", false); if (temp == "not found") { temp = "-1.0"; } + convert(temp, misMatch); + + temp = validParameter->validFile(parameters, "gapopen", false); if (temp == "not found") { temp = "-1.0"; } + convert(temp, gapOpen); + + temp = validParameter->validFile(parameters, "gapextend", false); if (temp == "not found") { temp = "-2.0"; } + convert(temp, gapExtend); - convert(globaldata->getKSize(), kmerSize); - convert(globaldata->getMatch(), match); - convert(globaldata->getMismatch(), misMatch); - convert(globaldata->getGapopen(), gapOpen); - convert(globaldata->getGapextend(), gapExtend); + search = validParameter->validFile(parameters, "search", false); if (search == "not found") { search = "kmer"; } + align = validParameter->validFile(parameters, "align", false); if (align == "not found") { align = "needleman"; } + + delete validParameter; + } + } catch(exception& e) { cout << "Standard Error: " << e.what() << " has occurred in the AlignCommand class Function AlignCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; @@ -67,36 +117,66 @@ AlignCommand::AlignCommand(){ AlignCommand::~AlignCommand(){ } +//********************************************************************************************************************** + +void AlignCommand::help(){ + try { + cout << "The align.seqs command reads a file containing sequences and creates an alignment file and a report file." << "\n"; + cout << "The align.seqs command parameters are fasta, candidate, search, ksize, align, match, mismatch, gapopen and gapextend. " << "\n"; + cout << "The fasta and candidate parameters are required." << "\n"; + cout << "The search parameter allows you to specify the method to find most similar template. Your options are: suffix, kmer and blast. The default is kmer." << "\n"; + cout << "The align parameter allows you to specify the alignment method to use. Your options are: gotoh, needleman, blast and noalign. The default is needleman." << "\n"; + cout << "The ksize parameter allows you to specify the kmer size for finding most similar template to candidate. The default is 7." << "\n"; + cout << "The match parameter allows you to specify the bonus for having the same base. The default is 1.0." << "\n"; + cout << "The mistmatch parameter allows you to specify the penalty for having different bases. The default is -1.0." << "\n"; + cout << "The gapopen parameter allows you to specify the penalty for opening a gap in an alignment. The default is -1.0." << "\n"; + cout << "The gapextend parameter allows you to specify the penalty for extending a gap in an alignment. The default is -2.0." << "\n"; + cout << "The align.seqs command should be in the following format: " << "\n"; + cout << "align.seqs(fasta=yourTemplateFile, candidate=yourCandidateFile, align=yourAlignmentMethod, search=yourSearchmethod, ksize=yourKmerSize, match=yourMatchBonus, mismatch=yourMismatchpenalty, gapopen=yourGapopenPenalty, gapextend=yourGapExtendPenalty) " << "\n"; + cout << "Example align.seqs(candidate=candidate.fasta, fasta=core.filtered, align=kmer, search=gotoh, ksize=8, match=2.0, mismatch=3.0, gapopen=-2.0, gapextend=-1.0)" << "\n"; + cout << "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile)." << "\n" << "\n"; + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the AlignCommand class Function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the AlignCommand class function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} + //********************************************************************************************************************** int AlignCommand::execute(){ try { + if (abort == true) { return 0; } + srand( (unsigned)time( NULL ) ); //needed to assign names to temporary files Database* templateDB; - if(globaldata->getSearch() == "kmer") { templateDB = new KmerDB(globaldata->getFastaFile() , kmerSize); } - else if(globaldata->getSearch() == "suffix") { templateDB = new SuffixDB(globaldata->getFastaFile()); } - else if(globaldata->getSearch() == "blast") { templateDB = new BlastDB(globaldata->getFastaFile(), gapOpen, gapExtend, match, misMatch); } + if(search == "kmer") { templateDB = new KmerDB(templateFileName, kmerSize); } + else if(search == "suffix") { templateDB = new SuffixDB(templateFileName); } + else if(search == "blast") { templateDB = new BlastDB(templateFileName, gapOpen, gapExtend, match, misMatch); } else { - cout << globaldata->getSearch() << " is not a valid search option. I will run the command using kmer, ksize=8." << endl; - templateDB = new KmerDB(globaldata->getFastaFile(), kmerSize); + cout << search << " is not a valid search option. I will run the command using kmer, ksize=8." << endl; kmerSize = 8; + templateDB = new KmerDB(templateFileName, kmerSize); } Alignment* alignment; - if(globaldata->getAlign() == "gotoh") { alignment = new GotohOverlap(gapOpen, gapExtend, match, misMatch, 3000); } - else if(globaldata->getAlign() == "needleman") { alignment = new NeedlemanOverlap(gapOpen, match, misMatch, 3000); } - else if(globaldata->getAlign() == "blast") { alignment = new BlastAlignment(gapOpen, gapExtend, match, misMatch); } - else if(globaldata->getAlign() == "noalign") { alignment = new NoAlign(); } + if(align == "gotoh") { alignment = new GotohOverlap(gapOpen, gapExtend, match, misMatch, 3000); } + else if(align == "needleman") { alignment = new NeedlemanOverlap(gapOpen, match, misMatch, 3000); } + else if(align == "blast") { alignment = new BlastAlignment(gapOpen, gapExtend, match, misMatch); } + else if(align == "noalign") { alignment = new NoAlign(); } else { - cout << globaldata->getAlign() << " is not a valid alignment option. I will run the command using needleman." << endl; + cout << align << " is not a valid alignment option. I will run the command using needleman." << endl; alignment = new NeedlemanOverlap(gapOpen, match, misMatch, 3000); } int numFastaSeqs=count(istreambuf_iterator(in),istreambuf_iterator(), '>'); in.seekg(0); - candidateFileName = globaldata->getCandidateFile(); string candidateAligngmentFName = candidateFileName.substr(0,candidateFileName.find_last_of(".")+1) + "align"; ofstream candidateAlignmentFile; openOutputFile(candidateAligngmentFName, candidateAlignmentFile); @@ -116,11 +196,11 @@ int AlignCommand::execute(){ Sequence* templateSeq = templateDB->findClosestSequence(candidateSeq); report.setTemplate(templateSeq); - report.setSearchParameters(globaldata->getSearch(), templateDB->getSearchScore()); + report.setSearchParameters(search, templateDB->getSearchScore()); Nast nast(alignment, candidateSeq, templateSeq); - report.setAlignmentParameters(globaldata->getAlign(), alignment); + report.setAlignmentParameters(align, alignment); report.setNastParameters(nast); candidateAlignmentFile << '>' << candidateSeq->getName() << '\n' << candidateSeq->getAligned() << endl; diff --git a/aligncommand.h b/aligncommand.h index b5ff782..a3ec827 100644 --- a/aligncommand.h +++ b/aligncommand.h @@ -13,20 +13,29 @@ #include "command.hpp" #include "globaldata.hpp" + + class AlignCommand : public Command { public: - AlignCommand(); + AlignCommand(string); ~AlignCommand(); - int execute(); + int execute(); + void help(); private: GlobalData* globaldata; - string candidateFileName, templateFileName, distanceFileName; + OptionParser* parser; + map parameters; + map::iterator it; + bool abort; + string candidateFileName, templateFileName, distanceFileName, search, align; int kmerSize; float match, misMatch, gapOpen, gapExtend; ofstream out; ifstream in; + int ableToOpen; + }; diff --git a/binsequencecommand.cpp b/binsequencecommand.cpp index 8f0bb9f..c2fd6fa 100644 --- a/binsequencecommand.cpp +++ b/binsequencecommand.cpp @@ -10,21 +10,87 @@ #include "binsequencecommand.h" //********************************************************************************************************************** -BinSeqCommand::BinSeqCommand(){ +BinSeqCommand::BinSeqCommand(string option){ try { globaldata = GlobalData::getInstance(); - fastafile = globaldata->getFastaFile(); - namesfile = globaldata->getNameFile(); - groupfile = globaldata->getGroupFile(); - openInputFile(fastafile, in); + abort = false; + allLines = 1; + lines.clear(); + labels.clear(); - if (groupfile != "") { - //read in group map info. - groupMap = new GroupMap(groupfile); - groupMap->readMap(); - } + //allow user to run help + if(option == "help") { help(); abort = true; } + + else { + //valid paramters for this command + string AlignArray[] = {"fasta","line","label","name", "group"}; + vector myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string))); + + parser = new OptionParser(); + parser->parse(option, parameters); delete parser; + + ValidParameters* validParameter = new ValidParameters(); + + //check to make sure all parameters are valid for command + for (it = parameters.begin(); it != parameters.end(); it++) { + if (validParameter->isValidParameter(it->first, myArray, it->second) != true) { abort = true; } + } + + //make sure the user has already run the read.otu command + if (globaldata->getListFile() == "") { cout << "You must read a listfile before running the bin.seqs command." << endl; abort = true; } + + + //check for required parameters + fastafile = validParameter->validFile(parameters, "fasta", true); + if (fastafile == "not found") { cout << "fasta is a required parameter for the bin.seqs command." << endl; abort = true; } + else if (fastafile == "not open") { abort = true; } + else { + globaldata->setFastaFile(fastafile); + openInputFile(fastafile, in); + fasta = new FastaMap(); + } - fasta = new FastaMap(); + + //check for optional parameter and set defaults + // ...at some point should added some additional type checking... + line = validParameter->validFile(parameters, "line", false); + if (line == "not found") { line = ""; } + else { + if(line != "all") { splitAtDash(line, lines); allLines = 0; } + else { allLines = 1; } + } + + label = validParameter->validFile(parameters, "label", false); + if (label == "not found") { label = ""; } + else { + if(label != "all") { splitAtDash(label, labels); allLines = 0; } + else { allLines = 1; } + } + + //make sure user did not use both the line and label parameters + if ((line != "") && (label != "")) { cout << "You cannot use both the line and label parameters at the same time. " << endl; abort = true; } + //if the user has not specified any line or labels use the ones from read.otu + else if ((line == "") && (label == "")) { + allLines = globaldata->allLines; + labels = globaldata->labels; + lines = globaldata->lines; + } + + namesfile = validParameter->validFile(parameters, "name", true); + if (namesfile == "not open") { abort = true; } + else if (namesfile == "not found") { namesfile = ""; } + + groupfile = validParameter->validFile(parameters, "group", true); + if (groupfile == "not open") { abort = true; } + else if (groupfile == "not found") { groupfile = ""; } + else { + //read in group map info. + groupMap = new GroupMap(groupfile); + groupMap->readMap(); + } + + delete validParameter; + } } catch(exception& e) { cout << "Standard Error: " << e.what() << " has occurred in the BinSeqCommand class Function BinSeqCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; @@ -35,23 +101,57 @@ BinSeqCommand::BinSeqCommand(){ exit(1); } } +//********************************************************************************************************************** + +void BinSeqCommand::help(){ + try { + cout << "The bin.seqs command can only be executed after a successful read.otu command of a listfile." << "\n"; + cout << "The bin.seqs command parameters are fasta, name, line, label and group. The fasta parameter is required, and you may not use line and label at the same time." << "\n"; + cout << "The line and label allow you to select what distance levels you would like a output files created for, and are separated by dashes." << "\n"; + cout << "The bin.seqs command should be in the following format: bin.seqs(fasta=yourFastaFile, name=yourNamesFile, group=yourGroupFile, line=yourLines, label=yourLabels)." << "\n"; + cout << "Example bin.seqs(fasta=amazon.fasta, group=amazon.groups, line=1-3-5, name=amazon.names)." << "\n"; + cout << "The default value for line and label are all lines in your inputfile." << "\n"; + cout << "The bin.seqs command outputs a .fasta file for each distance you specify appending the OTU number to each name." << "\n"; + cout << "If you provide a groupfile, then it also appends the sequences group to the name." << "\n"; + cout << "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile)." << "\n" << "\n"; + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the BinSeqCommand class Function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the BinSeqCommand class function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} + + //********************************************************************************************************************** BinSeqCommand::~BinSeqCommand(){ - delete input; - delete read; + //made new in execute + if (abort == false) { + delete input; + delete read; + delete list; + } + + //made new in constructor delete fasta; - delete list; if (groupfile != "") { - delete groupMap; + delete groupMap; } + } //********************************************************************************************************************** int BinSeqCommand::execute(){ try { + + if (abort == true) { return 0; } + int count = 1; int error = 0; @@ -76,13 +176,13 @@ int BinSeqCommand::execute(){ //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. set processedLabels; - set userLabels = globaldata->labels; - set userLines = globaldata->lines; + set userLabels = labels; + set userLines = lines; - while((list != NULL) && ((globaldata->allLines == 1) || (userLabels.size() != 0) || (userLines.size() != 0))) { + while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0) || (userLines.size() != 0))) { - if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(list->getLabel()) == 1){ + if(allLines == 1 || lines.count(count) == 1 || labels.count(list->getLabel()) == 1){ error = process(list, count); if (error == 1) { return 0; } diff --git a/binsequencecommand.h b/binsequencecommand.h index bc2f883..aceba65 100644 --- a/binsequencecommand.h +++ b/binsequencecommand.h @@ -24,9 +24,10 @@ class GlobalData; class BinSeqCommand : public Command { public: - BinSeqCommand(); + BinSeqCommand(string); ~BinSeqCommand(); - int execute(); + int execute(); + void help(); private: GlobalData* globaldata; @@ -35,7 +36,13 @@ private: InputData* input; FastaMap* fasta; GroupMap* groupMap; - string filename, fastafile, namesfile, groupfile; + OptionParser* parser; + map parameters; + map::iterator it; + bool abort, allLines; + set lines; //hold lines to be used + set labels; //holds labels to be used + string filename, fastafile, namesfile, groupfile, line, label; ofstream out; ifstream in, inNames; diff --git a/bootstrapsharedcommand.cpp b/bootstrapsharedcommand.cpp index 9ffd2b7..9e3a82d 100644 --- a/bootstrapsharedcommand.cpp +++ b/bootstrapsharedcommand.cpp @@ -22,50 +22,125 @@ //********************************************************************************************************************** -BootSharedCommand::BootSharedCommand(){ +BootSharedCommand::BootSharedCommand(string option){ try { globaldata = GlobalData::getInstance(); - format = globaldata->getFormat(); - convert(globaldata->getIters(), iters); - validCalculator = new ValidCalculators(); - util = new SharedUtil(); + abort = false; + allLines = 1; + lines.clear(); + labels.clear(); + Groups.clear(); + Estimators.clear(); + //allow user to run help + if(option == "help") { help(); abort = true; } - int i; - for (i=0; iEstimators.size(); i++) { - if (validCalculator->isValidCalculator("boot", globaldata->Estimators[i]) == true) { - if (globaldata->Estimators[i] == "jabund") { - treeCalculators.push_back(new JAbund()); - }else if (globaldata->Estimators[i] == "sorabund") { - treeCalculators.push_back(new SorAbund()); - }else if (globaldata->Estimators[i] == "jclass") { - treeCalculators.push_back(new Jclass()); - }else if (globaldata->Estimators[i] == "sorclass") { - treeCalculators.push_back(new SorClass()); - }else if (globaldata->Estimators[i] == "jest") { - treeCalculators.push_back(new Jest()); - }else if (globaldata->Estimators[i] == "sorest") { - treeCalculators.push_back(new SorEst()); - }else if (globaldata->Estimators[i] == "thetayc") { - treeCalculators.push_back(new ThetaYC()); - }else if (globaldata->Estimators[i] == "thetan") { - treeCalculators.push_back(new ThetaN()); - }else if (globaldata->Estimators[i] == "morisitahorn") { - treeCalculators.push_back(new MorHorn()); - }else if (globaldata->Estimators[i] == "braycurtis") { - treeCalculators.push_back(new BrayCurtis()); + else { + //valid paramters for this command + string Array[] = {"line","label","calc","groups","iters"}; + vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + + parser = new OptionParser(); + parser->parse(option, parameters); delete parser; + + ValidParameters* validParameter = new ValidParameters(); + + //check to make sure all parameters are valid for command + for (it = parameters.begin(); it != parameters.end(); it++) { + if (validParameter->isValidParameter(it->first, myArray, it->second) != true) { abort = true; } + } + + //make sure the user has already run the read.otu command + if (globaldata->getSharedFile() == "") { + if (globaldata->getListFile() == "") { cout << "You must read a list and a group, or a shared before you can use the bootstrap.shared command." << endl; abort = true; } + else if (globaldata->getGroupFile() == "") { cout << "You must read a list and a group, or a shared before you can use the bootstrap.shared command." << endl; abort = true; } + } + + //check for optional parameter and set defaults + // ...at some point should added some additional type checking... + line = validParameter->validFile(parameters, "line", false); + if (line == "not found") { line = ""; } + else { + if(line != "all") { splitAtDash(line, lines); allLines = 0; } + else { allLines = 1; } + } + + label = validParameter->validFile(parameters, "label", false); + if (label == "not found") { label = ""; } + else { + if(label != "all") { splitAtDash(label, labels); allLines = 0; } + else { allLines = 1; } + } + + //make sure user did not use both the line and label parameters + if ((line != "") && (label != "")) { cout << "You cannot use both the line and label parameters at the same time. " << endl; abort = true; } + //if the user has not specified any line or labels use the ones from read.otu + else if((line == "") && (label == "")) { + allLines = globaldata->allLines; + labels = globaldata->labels; + lines = globaldata->lines; + } + + groups = validParameter->validFile(parameters, "groups", false); + if (groups == "not found") { groups = ""; } + else { + splitAtDash(groups, Groups); + globaldata->Groups = Groups; + } + + calc = validParameter->validFile(parameters, "calc", false); + if (calc == "not found") { calc = "jclass-thetayc"; } + else { + if (calc == "default") { calc = "jclass-thetayc"; } + } + splitAtDash(calc, Estimators); + + string temp; + temp = validParameter->validFile(parameters, "iters", false); if (temp == "not found") { temp = "1000"; } + convert(temp, iters); + + delete validParameter; + + if (abort == false) { + + validCalculator = new ValidCalculators(); + + int i; + for (i=0; iisValidCalculator("boot", Estimators[i]) == true) { + if (Estimators[i] == "jabund") { + treeCalculators.push_back(new JAbund()); + }else if (Estimators[i] == "sorabund") { + treeCalculators.push_back(new SorAbund()); + }else if (Estimators[i] == "jclass") { + treeCalculators.push_back(new Jclass()); + }else if (Estimators[i] == "sorclass") { + treeCalculators.push_back(new SorClass()); + }else if (Estimators[i] == "jest") { + treeCalculators.push_back(new Jest()); + }else if (Estimators[i] == "sorest") { + treeCalculators.push_back(new SorEst()); + }else if (Estimators[i] == "thetayc") { + treeCalculators.push_back(new ThetaYC()); + }else if (Estimators[i] == "thetan") { + treeCalculators.push_back(new ThetaN()); + }else if (Estimators[i] == "morisitahorn") { + treeCalculators.push_back(new MorHorn()); + }else if (Estimators[i] == "braycurtis") { + treeCalculators.push_back(new BrayCurtis()); + } + } } + + delete validCalculator; + + ofstream* tempo; + for (int i=0; i < treeCalculators.size(); i++) { + tempo = new ofstream; + out.push_back(tempo); + } } } - - ofstream* temp; - for (int i=0; i < treeCalculators.size(); i++) { - temp = new ofstream; - out.push_back(temp); - } - - //reset calc for next command - globaldata->setCalc(""); } catch(exception& e) { @@ -77,23 +152,51 @@ BootSharedCommand::BootSharedCommand(){ exit(1); } } + +//********************************************************************************************************************** + +void BootSharedCommand::help(){ + try { + cout << "The bootstrap.shared command can only be executed after a successful read.otu command." << "\n"; + cout << "The bootstrap.shared command parameters are groups, calc, iters, line and label. You may not use line and label at the same time." << "\n"; + cout << "The groups parameter allows you to specify which of the groups in your groupfile you would like included used." << "\n"; + cout << "The group names are separated by dashes. The line and label allow you to select what distance levels you would like trees created for, and are also separated by dashes." << "\n"; + cout << "The bootstrap.shared command should be in the following format: bootstrap.shared(groups=yourGroups, calc=yourCalcs, line=yourLines, label=yourLabels, iters=yourIters)." << "\n"; + cout << "Example bootstrap.shared(groups=A-B-C, line=1-3-5, calc=jabund-sorabund, iters=100)." << "\n"; + cout << "The default value for groups is all the groups in your groupfile." << "\n"; + cout << "The default value for calc is jclass-thetayc. The default for iters is 1000." << "\n"; + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the BootSharedCommand class Function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the BootSharedCommand class function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} + //********************************************************************************************************************** BootSharedCommand::~BootSharedCommand(){ - delete input; - delete read; - delete util; + //made new in execute + if (abort == false) { + delete input; + delete read; + delete util; + } } //********************************************************************************************************************** int BootSharedCommand::execute(){ try { - int count = 1; - //if the users entered no valid calculators don't execute command - if (treeCalculators.size() == 0) { return 0; } - + if (abort == true) { return 0; } + + int count = 1; + util = new SharedUtil(); + //read first line read = new ReadOTUFile(globaldata->inputFileName); read->read(&*globaldata); @@ -101,10 +204,13 @@ int BootSharedCommand::execute(){ order = input->getSharedOrderVector(); SharedOrderVector* lastOrder = order; + //if the users entered no valid calculators don't execute command + if (treeCalculators.size() == 0) { return 0; } + //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. set processedLabels; - set userLabels = globaldata->labels; - set userLines = globaldata->lines; + set userLabels = labels; + set userLines = lines; //set users groups util->setGroups(globaldata->Groups, globaldata->gGroupmap->namesOfGroups, "treegroup"); @@ -121,9 +227,9 @@ int BootSharedCommand::execute(){ tmap->makeSim(globaldata->gGroupmap); globaldata->gTreemap = tmap; - while((order != NULL) && ((globaldata->allLines == 1) || (userLabels.size() != 0) || (userLines.size() != 0))) { + while((order != NULL) && ((allLines == 1) || (userLabels.size() != 0) || (userLines.size() != 0))) { - if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(order->getLabel()) == 1){ + if(allLines == 1 || lines.count(count) == 1 || labels.count(order->getLabel()) == 1){ cout << order->getLabel() << '\t' << count << endl; process(order); @@ -173,7 +279,7 @@ int BootSharedCommand::execute(){ delete lastOrder; //reset groups parameter - globaldata->Groups.clear(); globaldata->setGroups(""); + globaldata->Groups.clear(); return 0; } @@ -300,7 +406,7 @@ void BootSharedCommand::process(SharedOrderVector* order) { //create a file for each calculator with the 1000 trees in it. for (int p = 0; p < iters; p++) { - util->getSharedVectorswithReplacement(globaldata->Groups, lookup, order); //fills group vectors from order vector. + util->getSharedVectorswithReplacement(Groups, lookup, order); //fills group vectors from order vector. //for each calculator for(int i = 0 ; i < treeCalculators.size(); i++) { diff --git a/bootstrapsharedcommand.h b/bootstrapsharedcommand.h index b632ace..9ff44b3 100644 --- a/bootstrapsharedcommand.h +++ b/bootstrapsharedcommand.h @@ -25,9 +25,10 @@ class GlobalData; class BootSharedCommand : public Command { public: - BootSharedCommand(); + BootSharedCommand(string); ~BootSharedCommand(); int execute(); + void help(); private: void createTree(ostream*); @@ -48,8 +49,15 @@ private: ValidCalculators* validCalculator; SharedOrderVector* order; vector lookup; - string format, outputFile; + OptionParser* parser; + map parameters; + map::iterator it; + bool abort, allLines; + set lines; //hold lines to be used + set labels; //holds labels to be used + string outputFile, calc, groups, line, label; int numGroups, iters; + vector Estimators, Groups; //holds estimators to be used }; diff --git a/clustercommand.cpp b/clustercommand.cpp index 307b774..36fff22 100644 --- a/clustercommand.cpp +++ b/clustercommand.cpp @@ -11,45 +11,82 @@ //********************************************************************************************************************** //This function checks to make sure the cluster command has no errors and then clusters based on the method chosen. -ClusterCommand::ClusterCommand(){ +ClusterCommand::ClusterCommand(string option){ try{ globaldata = GlobalData::getInstance(); - - if(globaldata->gSparseMatrix != NULL) { matrix = new SparseMatrix(*globaldata->gSparseMatrix); } - // Not sure if we want the address or an entire new memory allocation. Might be nice to have new memory so data - // doesn't need to be re-read, but then again, it could suck up a ton of memory. Dunno. - // if(globaldata->getSparseMatrix() != NULL) { matrix = globaldata->getSparseMatrix(); } - - if(globaldata->gListVector != NULL){ - list = new ListVector(*globaldata->gListVector); - rabund = new RAbundVector(list->getRAbundVector()); - //rabund->print(cout); - } - - if(globaldata->getMethod() != "") { method = globaldata->getMethod(); } - //if no method given use furthest, initialized in globaldata - if(method == "furthest") { cluster = new CompleteLinkage(rabund, list, matrix); tag = "fn"; } - else if(method == "nearest"){ cluster = new SingleLinkage(rabund, list, matrix); tag = "nn"; } - else if(method == "average"){ cluster = new AverageLinkage(rabund, list, matrix); tag = "an"; } - else { cout << "error - not recognized method" << endl; } - - if(globaldata->getPrecision() != ""){ - convert(globaldata->getPrecision(), precision); - } + abort = false; + + //allow user to run help + if(option == "help") { help(); abort = true; } - //saves precision legnth for formatting below - length = globaldata->getPrecision().length(); + else { + //valid paramters for this command + string Array[] = {"cutoff","precision","method"}; + vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + + parser = new OptionParser(); + parser->parse(option, parameters); delete parser; + + ValidParameters* validParameter = new ValidParameters(); - if(globaldata->getCutOff() != ""){ - convert(globaldata->getCutOff(), cutoff); + //check to make sure all parameters are valid for command + for (it = parameters.begin(); it != parameters.end(); it++) { + if (validParameter->isValidParameter(it->first, myArray, it->second) != true) { abort = true; } + } + + //error checking to make sure they read a distance file + if ((globaldata->gSparseMatrix == NULL) || (globaldata->gListVector == NULL)) { + cout << "Before you use the cluster command, you first need to read in a distance matrix." << endl; abort = true; + } + + //check for optional parameter and set defaults + // ...at some point should added some additional type checking... + //get user cutoff and precision or use defaults + string temp; + temp = validParameter->validFile(parameters, "precision", false); if (temp == "not found") { temp = "100"; } + //saves precision legnth for formatting below + length = temp.length(); + convert(temp, precision); + + temp = validParameter->validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "10"; } + convert(temp, cutoff); cutoff += (5 / (precision * 10.0)); + + method = validParameter->validFile(parameters, "method", false); if (method == "not found") { method = "furthest"; } + + delete validParameter; + + if ((method == "furthest") || (method == "nearest") || (method == "average")) { } + else {cout << "Not a valid clustering method. Valid clustering algorithms are furthest, nearest or average." << endl; abort = true; } + + + if (abort == false) { + + //get matrix, list and rabund for execute + if(globaldata->gSparseMatrix != NULL) { matrix = new SparseMatrix(*globaldata->gSparseMatrix); } + + if(globaldata->gListVector != NULL){ + list = new ListVector(*globaldata->gListVector); + rabund = new RAbundVector(list->getRAbundVector()); + } + + //create cluster + if(method == "furthest") { cluster = new CompleteLinkage(rabund, list, matrix); tag = "fn"; } + else if(method == "nearest"){ cluster = new SingleLinkage(rabund, list, matrix); tag = "nn"; } + else if(method == "average"){ cluster = new AverageLinkage(rabund, list, matrix); tag = "an"; } + else { cout << "error - not recognized method" << endl; abort = true; } + + fileroot = getRootName(globaldata->inputFileName); + + openOutputFile(fileroot+ tag + ".sabund", sabundFile); + openOutputFile(fileroot+ tag + ".rabund", rabundFile); + openOutputFile(fileroot+ tag + ".list", listFile); + + + } + } - - fileroot = getRootName(globaldata->getFileRoot()); - openOutputFile(fileroot+ tag + ".sabund", sabundFile); - openOutputFile(fileroot+ tag + ".rabund", rabundFile); - openOutputFile(fileroot+ tag + ".list", listFile); } catch(exception& e) { cout << "Standard Error: " << e.what() << " has occurred in the ClusterCommand class Function ClusterCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; @@ -60,6 +97,27 @@ ClusterCommand::ClusterCommand(){ exit(1); } } + +//********************************************************************************************************************** + +void ClusterCommand::help(){ + try { + cout << "The cluster command can only be executed after a successful read.dist command." << "\n"; + cout << "The cluster command parameter options are method, cuttoff and precision. No parameters are required." << "\n"; + cout << "The cluster command should be in the following format: " << "\n"; + cout << "cluster(method=yourMethod, cutoff=yourCutoff, precision=yourPrecision) " << "\n"; + cout << "The acceptable cluster methods are furthest, nearest and average. If no method is provided then furthest is assumed." << "\n" << "\n"; + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the ClusterCommand class Function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the ClusterCommand class function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} + //********************************************************************************************************************** ClusterCommand::~ClusterCommand(){ @@ -73,6 +131,9 @@ ClusterCommand::~ClusterCommand(){ int ClusterCommand::execute(){ try { + + if (abort == true) { return 0; } + float previousDist = 0.00000; float rndPreviousDist = 0.00000; oldRAbund = *rabund; diff --git a/clustercommand.h b/clustercommand.h index ad41a45..37b9edb 100644 --- a/clustercommand.h +++ b/clustercommand.h @@ -31,9 +31,10 @@ class GlobalData; class ClusterCommand : public Command { public: - ClusterCommand(); + ClusterCommand(string); ~ClusterCommand(); int execute(); + void help(); private: GlobalData* globaldata; @@ -43,7 +44,11 @@ private: RAbundVector* rabund; RAbundVector oldRAbund; ListVector oldList; - + OptionParser* parser; + map parameters; + map::iterator it; + bool abort; + string method, fileroot, tag; double cutoff; int precision, length; diff --git a/collectcommand.cpp b/collectcommand.cpp index 3d7f1b0..a904870 100644 --- a/collectcommand.cpp +++ b/collectcommand.cpp @@ -31,68 +31,134 @@ //********************************************************************************************************************** -CollectCommand::CollectCommand(){ +CollectCommand::CollectCommand(string option){ try { globaldata = GlobalData::getInstance(); - string fileNameRoot; - fileNameRoot = getRootName(globaldata->inputFileName); - convert(globaldata->getFreq(), freq); - int i; - validCalculator = new ValidCalculators(); + abort = false; + allLines = 1; + lines.clear(); + labels.clear(); + Estimators.clear(); - for (i=0; iEstimators.size(); i++) { - if (validCalculator->isValidCalculator("single", globaldata->Estimators[i]) == true) { - if (globaldata->Estimators[i] == "sobs") { - cDisplays.push_back(new CollectDisplay(new Sobs(), new OneColumnFile(fileNameRoot+"sobs"))); - }else if (globaldata->Estimators[i] == "chao") { - cDisplays.push_back(new CollectDisplay(new Chao1(), new ThreeColumnFile(fileNameRoot+"chao"))); - }else if (globaldata->Estimators[i] == "nseqs") { - cDisplays.push_back(new CollectDisplay(new NSeqs(), new OneColumnFile(fileNameRoot+"nseqs"))); - }else if (globaldata->Estimators[i] == "coverage") { - cDisplays.push_back(new CollectDisplay(new Coverage(), new OneColumnFile(fileNameRoot+"coverage"))); - }else if (globaldata->Estimators[i] == "ace") { - convert(globaldata->getAbund(), abund); - cDisplays.push_back(new CollectDisplay(new Ace(abund), new ThreeColumnFile(fileNameRoot+"ace"))); - }else if (globaldata->Estimators[i] == "jack") { - cDisplays.push_back(new CollectDisplay(new Jackknife(), new ThreeColumnFile(fileNameRoot+"jack"))); - }else if (globaldata->Estimators[i] == "shannon") { - cDisplays.push_back(new CollectDisplay(new Shannon(), new ThreeColumnFile(fileNameRoot+"shannon"))); - }else if (globaldata->Estimators[i] == "npshannon") { - cDisplays.push_back(new CollectDisplay(new NPShannon(), new OneColumnFile(fileNameRoot+"np_shannon"))); - }else if (globaldata->Estimators[i] == "simpson") { - cDisplays.push_back(new CollectDisplay(new Simpson(), new ThreeColumnFile(fileNameRoot+"simpson"))); - }else if (globaldata->Estimators[i] == "bootstrap") { - cDisplays.push_back(new CollectDisplay(new Bootstrap(), new OneColumnFile(fileNameRoot+"bootstrap"))); - }else if (globaldata->Estimators[i] == "geometric") { - cDisplays.push_back(new CollectDisplay(new Geom(), new OneColumnFile(fileNameRoot+"geometric"))); - }else if (globaldata->Estimators[i] == "qstat") { - cDisplays.push_back(new CollectDisplay(new QStat(), new OneColumnFile(fileNameRoot+"qstat"))); - }else if (globaldata->Estimators[i] == "logseries") { - cDisplays.push_back(new CollectDisplay(new LogSD(), new OneColumnFile(fileNameRoot+"logseries"))); - }else if (globaldata->Estimators[i] == "bergerparker") { - cDisplays.push_back(new CollectDisplay(new BergerParker(), new OneColumnFile(fileNameRoot+"bergerparker"))); - }else if (globaldata->Estimators[i] == "bstick") { - cDisplays.push_back(new CollectDisplay(new BStick(), new ThreeColumnFile(fileNameRoot+"bstick"))); - }else if (globaldata->Estimators[i] == "goodscoverage") { - cDisplays.push_back(new CollectDisplay(new GoodsCoverage(), new OneColumnFile(fileNameRoot+"goodscoverage"))); - }else if (globaldata->Estimators[i] == "efron") { - convert(globaldata->getSize(), size); - cDisplays.push_back(new CollectDisplay(new Efron(size), new OneColumnFile(fileNameRoot+"efron"))); - }else if (globaldata->Estimators[i] == "boneh") { - convert(globaldata->getSize(), size); - cDisplays.push_back(new CollectDisplay(new Boneh(size), new OneColumnFile(fileNameRoot+"boneh"))); - }else if (globaldata->Estimators[i] == "solow") { - convert(globaldata->getSize(), size); - cDisplays.push_back(new CollectDisplay(new Solow(size), new OneColumnFile(fileNameRoot+"solow"))); - }else if (globaldata->Estimators[i] == "shen") { - convert(globaldata->getSize(), size); - cDisplays.push_back(new CollectDisplay(new Shen(size), new OneColumnFile(fileNameRoot+"shen"))); + //allow user to run help + if(option == "help") { validCalculator = new ValidCalculators(); help(); abort = true; } + + else { + //valid paramters for this command + string Array[] = {"freq","line","label","calc","abund","size"}; + vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + + parser = new OptionParser(); + parser->parse(option, parameters); delete parser; + + ValidParameters* validParameter = new ValidParameters(); + + //check to make sure all parameters are valid for command + for (it = parameters.begin(); it != parameters.end(); it++) { + if (validParameter->isValidParameter(it->first, myArray, it->second) != true) { abort = true; } + } + + //make sure the user has already run the read.otu command + if ((globaldata->getListFile() == "") && (globaldata->getRabundFile() == "") && (globaldata->getSabundFile() == "")) { cout << "You must read a list, sabund or rabund before you can use the collect.single command." << endl; abort = true; } + + //check for optional parameter and set defaults + // ...at some point should added some additional type checking... + line = validParameter->validFile(parameters, "line", false); + if (line == "not found") { line = ""; } + else { + if(line != "all") { splitAtDash(line, lines); allLines = 0; } + else { allLines = 1; } + } + + label = validParameter->validFile(parameters, "label", false); + if (label == "not found") { label = ""; } + else { + if(label != "all") { splitAtDash(label, labels); allLines = 0; } + else { allLines = 1; } + } + + //make sure user did not use both the line and label parameters + if ((line != "") && (label != "")) { cout << "You cannot use both the line and label parameters at the same time. " << endl; abort = true; } + //if the user has not specified any line or labels use the ones from read.otu + else if((line == "") && (label == "")) { + allLines = globaldata->allLines; + labels = globaldata->labels; + lines = globaldata->lines; + } + + calc = validParameter->validFile(parameters, "calc", false); + if (calc == "not found") { calc = "sobs-chao-ace-jack-shannon-npshannon-simpson"; } + else { + if (calc == "default") { calc = "sobs-chao-ace-jack-shannon-npshannon-simpson"; } + } + splitAtDash(calc, Estimators); + + string temp; + temp = validParameter->validFile(parameters, "freq", false); if (temp == "not found") { temp = "100"; } + convert(temp, freq); + + temp = validParameter->validFile(parameters, "abund", false); if (temp == "not found") { temp = "10"; } + convert(temp, abund); + + temp = validParameter->validFile(parameters, "size", false); if (temp == "not found") { temp = "0"; } + convert(temp, size); + + delete validParameter; + + + if (abort == false) { + string fileNameRoot = getRootName(globaldata->inputFileName); + int i; + validCalculator = new ValidCalculators(); + + for (i=0; iisValidCalculator("single", Estimators[i]) == true) { + if (Estimators[i] == "sobs") { + cDisplays.push_back(new CollectDisplay(new Sobs(), new OneColumnFile(fileNameRoot+"sobs"))); + }else if (Estimators[i] == "chao") { + cDisplays.push_back(new CollectDisplay(new Chao1(), new ThreeColumnFile(fileNameRoot+"chao"))); + }else if (Estimators[i] == "nseqs") { + cDisplays.push_back(new CollectDisplay(new NSeqs(), new OneColumnFile(fileNameRoot+"nseqs"))); + }else if (Estimators[i] == "coverage") { + cDisplays.push_back(new CollectDisplay(new Coverage(), new OneColumnFile(fileNameRoot+"coverage"))); + }else if (Estimators[i] == "ace") { + cDisplays.push_back(new CollectDisplay(new Ace(abund), new ThreeColumnFile(fileNameRoot+"ace"))); + }else if (Estimators[i] == "jack") { + cDisplays.push_back(new CollectDisplay(new Jackknife(), new ThreeColumnFile(fileNameRoot+"jack"))); + }else if (Estimators[i] == "shannon") { + cDisplays.push_back(new CollectDisplay(new Shannon(), new ThreeColumnFile(fileNameRoot+"shannon"))); + }else if (Estimators[i] == "npshannon") { + cDisplays.push_back(new CollectDisplay(new NPShannon(), new OneColumnFile(fileNameRoot+"np_shannon"))); + }else if (Estimators[i] == "simpson") { + cDisplays.push_back(new CollectDisplay(new Simpson(), new ThreeColumnFile(fileNameRoot+"simpson"))); + }else if (Estimators[i] == "bootstrap") { + cDisplays.push_back(new CollectDisplay(new Bootstrap(), new OneColumnFile(fileNameRoot+"bootstrap"))); + }else if (Estimators[i] == "geometric") { + cDisplays.push_back(new CollectDisplay(new Geom(), new OneColumnFile(fileNameRoot+"geometric"))); + }else if (Estimators[i] == "qstat") { + cDisplays.push_back(new CollectDisplay(new QStat(), new OneColumnFile(fileNameRoot+"qstat"))); + }else if (Estimators[i] == "logseries") { + cDisplays.push_back(new CollectDisplay(new LogSD(), new OneColumnFile(fileNameRoot+"logseries"))); + }else if (Estimators[i] == "bergerparker") { + cDisplays.push_back(new CollectDisplay(new BergerParker(), new OneColumnFile(fileNameRoot+"bergerparker"))); + }else if (Estimators[i] == "bstick") { + cDisplays.push_back(new CollectDisplay(new BStick(), new ThreeColumnFile(fileNameRoot+"bstick"))); + }else if (Estimators[i] == "goodscoverage") { + cDisplays.push_back(new CollectDisplay(new GoodsCoverage(), new OneColumnFile(fileNameRoot+"goodscoverage"))); + }else if (Estimators[i] == "efron") { + cDisplays.push_back(new CollectDisplay(new Efron(size), new OneColumnFile(fileNameRoot+"efron"))); + }else if (Estimators[i] == "boneh") { + cDisplays.push_back(new CollectDisplay(new Boneh(size), new OneColumnFile(fileNameRoot+"boneh"))); + }else if (Estimators[i] == "solow") { + cDisplays.push_back(new CollectDisplay(new Solow(size), new OneColumnFile(fileNameRoot+"solow"))); + }else if (Estimators[i] == "shen") { + cDisplays.push_back(new CollectDisplay(new Shen(size, abund), new OneColumnFile(fileNameRoot+"shen"))); + } + } } } } - //reset calc for next command - globaldata->setCalc(""); } catch(exception& e) { cout << "Standard Error: " << e.what() << " has occurred in the CollectCommand class Function CollectCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; @@ -104,6 +170,30 @@ CollectCommand::CollectCommand(){ } } +//********************************************************************************************************************** + +void CollectCommand::help(){ + try { + cout << "The collect.single command can only be executed after a successful read.otu command. WITH ONE EXECEPTION. " << "\n"; + cout << "The collect.single command can be executed after a successful cluster command. It will use the .list file from the output of the cluster." << "\n"; + cout << "The collect.single command parameters are label, line, freq, calc and abund. No parameters are required, but you may not use " << "\n"; + cout << "both the line and label parameters at the same time. The collect.single command should be in the following format: " << "\n"; + cout << "collect.single(label=yourLabel, line=yourLines, iters=yourIters, freq=yourFreq, calc=yourEstimators)." << "\n"; + cout << "Example collect(label=unique-.01-.03, line=0-5-10, iters=10000, freq=10, calc=sobs-chao-ace-jack)." << "\n"; + cout << "The default values for freq is 100, and calc are sobs-chao-ace-jack-shannon-npshannon-simpson." << "\n"; + validCalculator->printCalc("single", cout); + cout << "The label and line parameters are used to analyze specific lines in your input." << "\n"; + cout << "Note: No spaces between parameter labels (i.e. freq), '=' and parameters (i.e.yourFreq)." << "\n" << "\n"; + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the CollectCommand class Function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the CollectCommand class function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} //********************************************************************************************************************** @@ -112,12 +202,16 @@ CollectCommand::~CollectCommand(){ delete input; delete cCurve; delete read; + delete validCalculator; } //********************************************************************************************************************** int CollectCommand::execute(){ try { + + if (abort == true) { return 0; } + int count = 1; //if the users entered no valid calculators don't execute command @@ -132,12 +226,12 @@ int CollectCommand::execute(){ //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. set processedLabels; - set userLabels = globaldata->labels; - set userLines = globaldata->lines; + set userLabels = labels; + set userLines = lines; - while((order != NULL) && ((globaldata->allLines == 1) || (userLabels.size() != 0) || (userLines.size() != 0))) { + while((order != NULL) && ((allLines == 1) || (userLabels.size() != 0) || (userLines.size() != 0))) { - if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(order->getLabel()) == 1){ + if(allLines == 1 || lines.count(count) == 1 || labels.count(order->getLabel()) == 1){ cCurve = new Collect(order, cDisplays); cCurve->getCurve(freq); diff --git a/collectcommand.h b/collectcommand.h index 7c5ec42..3b79d7f 100644 --- a/collectcommand.h +++ b/collectcommand.h @@ -37,9 +37,10 @@ class GlobalData; class CollectCommand : public Command { public: - CollectCommand(); + CollectCommand(string); ~CollectCommand(); int execute(); + void help(); private: GlobalData* globaldata; @@ -51,6 +52,15 @@ private: ValidCalculators* validCalculator; vector cDisplays; int freq, abund, size; + OptionParser* parser; + map parameters; + map::iterator it; + bool abort, allLines; + set lines; //hold lines to be used + set labels; //holds labels to be used + string line, label, calc; + vector Estimators; + }; diff --git a/collectdisplay.h b/collectdisplay.h index 110d816..4fe3588 100644 --- a/collectdisplay.h +++ b/collectdisplay.h @@ -26,7 +26,7 @@ public: data = estimate->getValues(shared); //passes estimators a shared vector from each group to be compared //figure out what groups are being compared in getValues - //because the jumble parameter randomizes the order we need to put the results in the correct column in the output file + //because we randomizes the order we need to put the results in the correct column in the output file int group1Index, group2Index, pos; group1Index = shared[0]->getGroupIndex(); group2Index = shared[1]->getGroupIndex(); diff --git a/collectsharedcommand.cpp b/collectsharedcommand.cpp index d2333d7..2b6cbac 100644 --- a/collectsharedcommand.cpp +++ b/collectsharedcommand.cpp @@ -36,68 +36,143 @@ //********************************************************************************************************************** -CollectSharedCommand::CollectSharedCommand(){ +CollectSharedCommand::CollectSharedCommand(string option){ try { globaldata = GlobalData::getInstance(); - string fileNameRoot; - fileNameRoot = getRootName(globaldata->inputFileName); - format = globaldata->getFormat(); - convert(globaldata->getFreq(), freq); - validCalculator = new ValidCalculators(); - util = new SharedUtil(); + abort = false; + allLines = 1; + lines.clear(); + labels.clear(); + Estimators.clear(); - int i; - for (i=0; iEstimators.size(); i++) { - if (validCalculator->isValidCalculator("shared", globaldata->Estimators[i]) == true) { - if (globaldata->Estimators[i] == "sharedchao") { - cDisplays.push_back(new CollectDisplay(new SharedChao1(), new SharedOneColumnFile(fileNameRoot+"shared.chao"))); - }else if (globaldata->Estimators[i] == "sharedsobs") { - cDisplays.push_back(new CollectDisplay(new SharedSobsCS(), new SharedOneColumnFile(fileNameRoot+"shared.sobs"))); - }else if (globaldata->Estimators[i] == "sharedace") { - cDisplays.push_back(new CollectDisplay(new SharedAce(), new SharedOneColumnFile(fileNameRoot+"shared.ace"))); - }else if (globaldata->Estimators[i] == "jabund") { - cDisplays.push_back(new CollectDisplay(new JAbund(), new SharedOneColumnFile(fileNameRoot+"jabund"))); - }else if (globaldata->Estimators[i] == "sorabund") { - cDisplays.push_back(new CollectDisplay(new SorAbund(), new SharedOneColumnFile(fileNameRoot+"sorabund"))); - }else if (globaldata->Estimators[i] == "jclass") { - cDisplays.push_back(new CollectDisplay(new Jclass(), new SharedOneColumnFile(fileNameRoot+"jclass"))); - }else if (globaldata->Estimators[i] == "sorclass") { - cDisplays.push_back(new CollectDisplay(new SorClass(), new SharedOneColumnFile(fileNameRoot+"sorclass"))); - }else if (globaldata->Estimators[i] == "jest") { - cDisplays.push_back(new CollectDisplay(new Jest(), new SharedOneColumnFile(fileNameRoot+"jest"))); - }else if (globaldata->Estimators[i] == "sorest") { - cDisplays.push_back(new CollectDisplay(new SorEst(), new SharedOneColumnFile(fileNameRoot+"sorest"))); - }else if (globaldata->Estimators[i] == "thetayc") { - cDisplays.push_back(new CollectDisplay(new ThetaYC(), new SharedOneColumnFile(fileNameRoot+"thetayc"))); - }else if (globaldata->Estimators[i] == "thetan") { - cDisplays.push_back(new CollectDisplay(new ThetaN(), new SharedOneColumnFile(fileNameRoot+"thetan"))); - }else if (globaldata->Estimators[i] == "kstest") { - cDisplays.push_back(new CollectDisplay(new KSTest(), new SharedOneColumnFile(fileNameRoot+"kstest"))); - }else if (globaldata->Estimators[i] == "whittaker") { - cDisplays.push_back(new CollectDisplay(new Whittaker(), new SharedOneColumnFile(fileNameRoot+"whittaker"))); - }else if (globaldata->Estimators[i] == "sharednseqs") { - cDisplays.push_back(new CollectDisplay(new SharedNSeqs(), new SharedOneColumnFile(fileNameRoot+"shared.nseqs"))); - - }else if (globaldata->Estimators[i] == "ochiai") { - cDisplays.push_back(new CollectDisplay(new Ochiai(), new SharedOneColumnFile(fileNameRoot+"ochiai"))); - }else if (globaldata->Estimators[i] == "anderberg") { - cDisplays.push_back(new CollectDisplay(new Anderberg(), new SharedOneColumnFile(fileNameRoot+"anderberg"))); - }else if (globaldata->Estimators[i] == "skulczynski") { - cDisplays.push_back(new CollectDisplay(new Kulczynski(), new SharedOneColumnFile(fileNameRoot+"kulczynski"))); - }else if (globaldata->Estimators[i] == "kulczynskicody") { - cDisplays.push_back(new CollectDisplay(new KulczynskiCody(), new SharedOneColumnFile(fileNameRoot+"kulczynskicody"))); - }else if (globaldata->Estimators[i] == "lennon") { - cDisplays.push_back(new CollectDisplay(new Lennon(), new SharedOneColumnFile(fileNameRoot+"lennon"))); - }else if (globaldata->Estimators[i] == "morisitahorn") { - cDisplays.push_back(new CollectDisplay(new MorHorn(), new SharedOneColumnFile(fileNameRoot+"morisitahorn"))); - }else if (globaldata->Estimators[i] == "braycurtis") { - cDisplays.push_back(new CollectDisplay(new BrayCurtis(), new SharedOneColumnFile(fileNameRoot+"braycurtis"))); - } + //allow user to run help + if(option == "help") { validCalculator = new ValidCalculators(); help(); abort = true; } + + else { + //valid paramters for this command + string Array[] = {"freq","line","label","calc","groups"}; + vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + + parser = new OptionParser(); + parser->parse(option, parameters); delete parser; + + ValidParameters* validParameter = new ValidParameters(); + + //check to make sure all parameters are valid for command + for (it = parameters.begin(); it != parameters.end(); it++) { + if (validParameter->isValidParameter(it->first, myArray, it->second) != true) { abort = true; } + } + + //make sure the user has already run the read.otu command + if (globaldata->getSharedFile() == "") { + if (globaldata->getListFile() == "") { cout << "You must read a list and a group, or a shared before you can use the collect.shared command." << endl; abort = true; } + else if (globaldata->getGroupFile() == "") { cout << "You must read a list and a group, or a shared before you can use the collect.shared command." << endl; abort = true; } + } + + + //check for optional parameter and set defaults + // ...at some point should added some additional type checking... + line = validParameter->validFile(parameters, "line", false); + if (line == "not found") { line = ""; } + else { + if(line != "all") { splitAtDash(line, lines); allLines = 0; } + else { allLines = 1; } + } + + label = validParameter->validFile(parameters, "label", false); + if (label == "not found") { label = ""; } + else { + if(label != "all") { splitAtDash(label, labels); allLines = 0; } + else { allLines = 1; } + } + + //make sure user did not use both the line and label parameters + if ((line != "") && (label != "")) { cout << "You cannot use both the line and label parameters at the same time. " << endl; abort = true; } + //if the user has not specified any line or labels use the ones from read.otu + else if((line == "") && (label == "")) { + allLines = globaldata->allLines; + labels = globaldata->labels; + lines = globaldata->lines; + } + + calc = validParameter->validFile(parameters, "calc", false); + if (calc == "not found") { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan"; } + else { + if (calc == "default") { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan"; } + } + splitAtDash(calc, Estimators); + + groups = validParameter->validFile(parameters, "groups", false); + if (groups == "not found") { groups = ""; } + else { + splitAtDash(groups, Groups); + globaldata->Groups = Groups; + } + + string temp; + temp = validParameter->validFile(parameters, "freq", false); if (temp == "not found") { temp = "100"; } + convert(temp, freq); + + delete validParameter; + + if (abort == false) { + + string fileNameRoot = getRootName(globaldata->inputFileName); + format = globaldata->getFormat(); + int i; + + validCalculator = new ValidCalculators(); + util = new SharedUtil(); + + for (i=0; iisValidCalculator("shared", Estimators[i]) == true) { + if (Estimators[i] == "sharedchao") { + cDisplays.push_back(new CollectDisplay(new SharedChao1(), new SharedOneColumnFile(fileNameRoot+"shared.chao"))); + }else if (Estimators[i] == "sharedsobs") { + cDisplays.push_back(new CollectDisplay(new SharedSobsCS(), new SharedOneColumnFile(fileNameRoot+"shared.sobs"))); + }else if (Estimators[i] == "sharedace") { + cDisplays.push_back(new CollectDisplay(new SharedAce(), new SharedOneColumnFile(fileNameRoot+"shared.ace"))); + }else if (Estimators[i] == "jabund") { + cDisplays.push_back(new CollectDisplay(new JAbund(), new SharedOneColumnFile(fileNameRoot+"jabund"))); + }else if (Estimators[i] == "sorabund") { + cDisplays.push_back(new CollectDisplay(new SorAbund(), new SharedOneColumnFile(fileNameRoot+"sorabund"))); + }else if (Estimators[i] == "jclass") { + cDisplays.push_back(new CollectDisplay(new Jclass(), new SharedOneColumnFile(fileNameRoot+"jclass"))); + }else if (Estimators[i] == "sorclass") { + cDisplays.push_back(new CollectDisplay(new SorClass(), new SharedOneColumnFile(fileNameRoot+"sorclass"))); + }else if (Estimators[i] == "jest") { + cDisplays.push_back(new CollectDisplay(new Jest(), new SharedOneColumnFile(fileNameRoot+"jest"))); + }else if (Estimators[i] == "sorest") { + cDisplays.push_back(new CollectDisplay(new SorEst(), new SharedOneColumnFile(fileNameRoot+"sorest"))); + }else if (Estimators[i] == "thetayc") { + cDisplays.push_back(new CollectDisplay(new ThetaYC(), new SharedOneColumnFile(fileNameRoot+"thetayc"))); + }else if (Estimators[i] == "thetan") { + cDisplays.push_back(new CollectDisplay(new ThetaN(), new SharedOneColumnFile(fileNameRoot+"thetan"))); + }else if (Estimators[i] == "kstest") { + cDisplays.push_back(new CollectDisplay(new KSTest(), new SharedOneColumnFile(fileNameRoot+"kstest"))); + }else if (Estimators[i] == "whittaker") { + cDisplays.push_back(new CollectDisplay(new Whittaker(), new SharedOneColumnFile(fileNameRoot+"whittaker"))); + }else if (Estimators[i] == "sharednseqs") { + cDisplays.push_back(new CollectDisplay(new SharedNSeqs(), new SharedOneColumnFile(fileNameRoot+"shared.nseqs"))); + }else if (Estimators[i] == "ochiai") { + cDisplays.push_back(new CollectDisplay(new Ochiai(), new SharedOneColumnFile(fileNameRoot+"ochiai"))); + }else if (Estimators[i] == "anderberg") { + cDisplays.push_back(new CollectDisplay(new Anderberg(), new SharedOneColumnFile(fileNameRoot+"anderberg"))); + }else if (Estimators[i] == "skulczynski") { + cDisplays.push_back(new CollectDisplay(new Kulczynski(), new SharedOneColumnFile(fileNameRoot+"kulczynski"))); + }else if (Estimators[i] == "kulczynskicody") { + cDisplays.push_back(new CollectDisplay(new KulczynskiCody(), new SharedOneColumnFile(fileNameRoot+"kulczynskicody"))); + }else if (Estimators[i] == "lennon") { + cDisplays.push_back(new CollectDisplay(new Lennon(), new SharedOneColumnFile(fileNameRoot+"lennon"))); + }else if (Estimators[i] == "morisitahorn") { + cDisplays.push_back(new CollectDisplay(new MorHorn(), new SharedOneColumnFile(fileNameRoot+"morisitahorn"))); + }else if (Estimators[i] == "braycurtis") { + cDisplays.push_back(new CollectDisplay(new BrayCurtis(), new SharedOneColumnFile(fileNameRoot+"braycurtis"))); + } + } + } } } - - //reset calc for next command - globaldata->setCalc(""); } catch(exception& e) { @@ -110,6 +185,32 @@ CollectSharedCommand::CollectSharedCommand(){ } } +//********************************************************************************************************************** + +void CollectSharedCommand::help(){ + try { + cout << "The collect.shared command can only be executed after a successful read.otu command." << "\n"; + cout << "The collect.shared command parameters are label, line, freq, calc and groups. No parameters are required, but you may not use " << "\n"; + cout << "both the line and label parameters at the same time. The collect.shared command should be in the following format: " << "\n"; + cout << "collect.shared(label=yourLabel, line=yourLines, freq=yourFreq, calc=yourEstimators, groups=yourGroups)." << "\n"; + cout << "Example collect.shared(label=unique-.01-.03, line=0-5-10, freq=10, groups=B-C, calc=sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan)." << "\n"; + cout << "The default values for freq is 100 and calc are sharedsobs-sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan." << "\n"; + cout << "The default value for groups is all the groups in your groupfile." << "\n"; + validCalculator->printCalc("shared", cout); + cout << "The label and line parameters are used to analyze specific lines in your input." << "\n"; + cout << "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 2 valid groups." << "\n"; + cout << "Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListfile)." << "\n" << "\n"; + + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the CollectSharedCommand class Function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the CollectSharedCommand class function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} //********************************************************************************************************************** @@ -119,12 +220,16 @@ CollectSharedCommand::~CollectSharedCommand(){ delete cCurve; delete read; delete util; + delete validCalculator; } //********************************************************************************************************************** int CollectSharedCommand::execute(){ try { + + if (abort == true) { return 0; } + int count = 1; //if the users entered no valid calculators don't execute command @@ -208,7 +313,7 @@ int CollectSharedCommand::execute(){ for(int i=0;iGroups.clear(); globaldata->setGroups(""); + globaldata->Groups.clear(); return 0; } diff --git a/collectsharedcommand.h b/collectsharedcommand.h index aac31c1..7024d0b 100644 --- a/collectsharedcommand.h +++ b/collectsharedcommand.h @@ -24,11 +24,11 @@ /* The collect.shared() command: The collect command generates a collector's curve from the given file representing several groups. The collect.shared command can only be executed after a successful read.shared command. - It outputs a file for each estimator you choose to use. The collect.shared command parameters are label, line, freq, jumble and shared. + It outputs a file for each estimator you choose to use. The collect.shared command parameters are label, line, freq and shared. No parameters are required, but you may not use both the line and label parameters at the same time. The collect.shared command should be in the following format: collect.shared(label=yourLabel, line=yourLines, - freq=yourFreq, jumble=yourJumble, shared=yourEstimators). Example collect.shared(label=unique-.01-.03, line=0,5,10, freq=10, jumble=1, - shared=sharedChao-sharedAce-sharedJabund). The default values for jumble is 0 (meaning don’t jumble, if it’s set to 1 then it will jumble), + freq=yourFreq, shared=yourEstimators). Example collect.shared(label=unique-.01-.03, line=0,5,10, freq=10, + shared=sharedChao-sharedAce-sharedJabund). The default value for freq is 100 and shared are sharedChao-sharedAce-sharedJabund-sharedSorensonAbund-sharedJclass-sharedSorClass-sharedJest-sharedSorEst-SharedThetaYC-SharedThetaN. The valid shared estimators are: sharedChao-sharedAce-sharedJabund-sharedSorensonAbund-sharedJclass-sharedSorClass-sharedJest-sharedSorEst-SharedThetaYC-SharedThetaN. The label and line parameters are used to analyze specific lines in your input. */ @@ -39,9 +39,10 @@ class GlobalData; class CollectSharedCommand : public Command { public: - CollectSharedCommand(); + CollectSharedCommand(string); ~CollectSharedCommand(); int execute(); + void help(); private: GlobalData* globaldata; @@ -56,6 +57,15 @@ private: vector cDisplays; int freq; string format; + OptionParser* parser; + map parameters; + map::iterator it; + bool abort, allLines; + set lines; //hold lines to be used + set labels; //holds labels to be used + string line, label, calc, groups; + vector Estimators, Groups; + }; diff --git a/command.hpp b/command.hpp index c8dae40..3e655ab 100644 --- a/command.hpp +++ b/command.hpp @@ -14,10 +14,13 @@ #include "mothur.h" +#include "optionparser.h" +#include "validparameter.h" class Command { public: virtual int execute() = 0; + virtual void help() = 0; }; #endif diff --git a/commandfactory.cpp b/commandfactory.cpp index 8488e65..48a3633 100644 --- a/commandfactory.cpp +++ b/commandfactory.cpp @@ -55,7 +55,8 @@ /***********************************************************/ CommandFactory::CommandFactory(){ - command = new NoCommand(); + string s = ""; + command = new NoCommand(s); } /***********************************************************/ @@ -68,49 +69,49 @@ CommandFactory::~CommandFactory(){ /***********************************************************/ //This function calls the appropriate command fucntions based on user input. -Command* CommandFactory::getCommand(string commandName){ +Command* CommandFactory::getCommand(string commandName, string optionString){ try { delete command; //delete the old command - if(commandName == "read.dist") { command = new ReadDistCommand(); } - else if(commandName == "read.otu") { command = new ReadOtuCommand(); } - else if(commandName == "read.tree") { command = new ReadTreeCommand(); } - else if(commandName == "cluster") { command = new ClusterCommand(); } - else if(commandName == "unique.seqs") { command = new DeconvoluteCommand(); } - else if(commandName == "parsimony") { command = new ParsimonyCommand(); } - else if(commandName == "help") { command = new HelpCommand(); } - else if(commandName == "quit") { command = new QuitCommand(); } - else if(commandName == "collect.single") { command = new CollectCommand(); } - else if(commandName == "collect.shared") { command = new CollectSharedCommand(); } - else if(commandName == "rarefaction.single") { command = new RareFactCommand(); } - else if(commandName == "rarefaction.shared") { command = new RareFactSharedCommand(); } - else if(commandName == "summary.single") { command = new SummaryCommand(); } - else if(commandName == "summary.shared") { command = new SummarySharedCommand(); } - else if(commandName == "unifrac.weighted") { command = new UnifracWeightedCommand(); } - else if(commandName == "unifrac.unweighted") { command = new UnifracUnweightedCommand(); } - else if(commandName == "get.group") { command = new GetgroupCommand(); } - else if(commandName == "get.label") { command = new GetlabelCommand(); } - else if(commandName == "get.line") { command = new GetlineCommand(); } - else if(commandName == "get.sabund") { command = new GetSAbundCommand(); } - else if(commandName == "get.rabund") { command = new GetRAbundCommand(); } - else if(commandName == "libshuff") { command = new LibShuffCommand(); } - else if(commandName == "heatmap.bin") { command = new HeatMapCommand(); } - else if(commandName == "heatmap.sim") { command = new HeatMapSimCommand(); } - else if(commandName == "filter.seqs") { command = new FilterSeqsCommand(); } - else if(commandName == "venn") { command = new VennCommand(); } - else if(commandName == "bin.seqs") { command = new BinSeqCommand(); } - else if(commandName == "get.oturep") { command = new GetOTURepCommand(); } - else if(commandName == "tree.shared") { command = new TreeGroupCommand(); } - else if(commandName == "dist.shared") { command = new MatrixOutputCommand(); } - else if(commandName == "bootstrap.shared") { command = new BootSharedCommand(); } - else if(commandName == "concensus") { command = new ConcensusCommand(); } - else if(commandName == "dist.seqs") { command = new DistanceCommand(); } - else if(commandName == "align.seqs") { command = new AlignCommand(); } - else if(commandName == "summary.seqs") { command = new SeqSummaryCommand(); } - else if(commandName == "screen.seqs") { command = new ScreenSeqsCommand(); } - else if(commandName == "reverse.seqs") { command = new ReverseSeqsCommand(); } - else if(commandName == "trim.seqs") { command = new TrimSeqsCommand(); } - else { command = new NoCommand(); } + if(commandName == "read.dist") { command = new ReadDistCommand(optionString); } + else if(commandName == "read.otu") { command = new ReadOtuCommand(optionString); } + else if(commandName == "read.tree") { command = new ReadTreeCommand(optionString); } + else if(commandName == "cluster") { command = new ClusterCommand(optionString); } + else if(commandName == "unique.seqs") { command = new DeconvoluteCommand(optionString); } + else if(commandName == "parsimony") { command = new ParsimonyCommand(optionString); } + else if(commandName == "help") { command = new HelpCommand(optionString); } + else if(commandName == "quit") { command = new QuitCommand(optionString); } + else if(commandName == "collect.single") { command = new CollectCommand(optionString); } + else if(commandName == "collect.shared") { command = new CollectSharedCommand(optionString); } + else if(commandName == "rarefaction.single") { command = new RareFactCommand(optionString); } + else if(commandName == "rarefaction.shared") { command = new RareFactSharedCommand(optionString); } + else if(commandName == "summary.single") { command = new SummaryCommand(optionString); } + else if(commandName == "summary.shared") { command = new SummarySharedCommand(optionString); } + else if(commandName == "unifrac.weighted") { command = new UnifracWeightedCommand(optionString); } + else if(commandName == "unifrac.unweighted") { command = new UnifracUnweightedCommand(optionString); } + else if(commandName == "get.group") { command = new GetgroupCommand(optionString); } + else if(commandName == "get.label") { command = new GetlabelCommand(optionString); } + else if(commandName == "get.line") { command = new GetlineCommand(optionString); } + else if(commandName == "get.sabund") { command = new GetSAbundCommand(optionString); } + else if(commandName == "get.rabund") { command = new GetRAbundCommand(optionString); } + else if(commandName == "libshuff") { command = new LibShuffCommand(optionString); } + else if(commandName == "heatmap.bin") { command = new HeatMapCommand(optionString); } + else if(commandName == "heatmap.sim") { command = new HeatMapSimCommand(optionString); } + else if(commandName == "filter.seqs") { command = new FilterSeqsCommand(optionString); } + else if(commandName == "venn") { command = new VennCommand(optionString); } + else if(commandName == "bin.seqs") { command = new BinSeqCommand(optionString); } + else if(commandName == "get.oturep") { command = new GetOTURepCommand(optionString); } + else if(commandName == "tree.shared") { command = new TreeGroupCommand(optionString); } + else if(commandName == "dist.shared") { command = new MatrixOutputCommand(optionString); } + else if(commandName == "bootstrap.shared") { command = new BootSharedCommand(optionString); } + else if(commandName == "concensus") { command = new ConcensusCommand(optionString); } + else if(commandName == "dist.seqs") { command = new DistanceCommand(optionString); } + else if(commandName == "align.seqs") { command = new AlignCommand(optionString); } + else if(commandName == "summary.seqs") { command = new SeqSummaryCommand(optionString); } + else if(commandName == "screen.seqs") { command = new ScreenSeqsCommand(optionString); } + else if(commandName == "reverse.seqs") { command = new ReverseSeqsCommand(optionString); } + else if(commandName == "trim.seqs") { command = new TrimSeqsCommand(optionString); } + else { command = new NoCommand(optionString); } return command; } diff --git a/commandfactory.hpp b/commandfactory.hpp index f715b84..e3eea41 100644 --- a/commandfactory.hpp +++ b/commandfactory.hpp @@ -18,7 +18,7 @@ class CommandFactory { public: CommandFactory(); ~CommandFactory(); - Command* getCommand(string); + Command* getCommand(string, string); private: Command* command; diff --git a/commandoptionparser.cpp b/commandoptionparser.cpp index 6fc43ab..e4db11d 100644 --- a/commandoptionparser.cpp +++ b/commandoptionparser.cpp @@ -18,16 +18,18 @@ CommandOptionParser::CommandOptionParser(string input){ try { int openParen = input.find_first_of('('); int closeParen = input.find_last_of(')'); - string optionString = ""; + optionString = ""; commandString = ""; - + if(openParen != -1 && closeParen != -1){ commandString = input.substr(0, openParen); //commandString contains everything before "(" - optionString = input.substr(openParen+1, closeParen-openParen-1); //optionString contains everything between "(" and ")". + optionString = input.substr((openParen+1), (closeParen-openParen-1)); //optionString contains everything between "(" and ")". } + else if (openParen == -1) { cout << "You are missing (" << endl; } + else if (closeParen == -1) { cout << "You are missing )" << endl; } - GlobalData* globaldata = GlobalData::getInstance(); - globaldata->parseGlobalData(commandString, optionString); //parser to separate and check options + //GlobalData* globaldata = GlobalData::getInstance(); + //globaldata->parseGlobalData(commandString, optionString); //parser to separate and check options } catch(exception& e) { cout << "Standard Error: " << e.what() << " has occurred in the CommandOptionParser class Function CommandOptionParser. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; @@ -45,3 +47,7 @@ CommandOptionParser::CommandOptionParser(string input){ string CommandOptionParser::getCommandString() { return commandString; } //********************************************************************************************************************** + +string CommandOptionParser::getOptionString() { return optionString; } + +//********************************************************************************************************************** diff --git a/commandoptionparser.hpp b/commandoptionparser.hpp index f83f60e..44df8f7 100644 --- a/commandoptionparser.hpp +++ b/commandoptionparser.hpp @@ -9,9 +9,10 @@ class CommandOptionParser { public: CommandOptionParser(string); string getCommandString(); + string getOptionString(); private: - string commandString; + string commandString, optionString; }; //********************************************************************************************************************** diff --git a/concensuscommand.cpp b/concensuscommand.cpp index 01df253..09e0f32 100644 --- a/concensuscommand.cpp +++ b/concensuscommand.cpp @@ -11,10 +11,21 @@ //********************************************************************************************************************** -ConcensusCommand::ConcensusCommand(){ +ConcensusCommand::ConcensusCommand(string option){ try { globaldata = GlobalData::getInstance(); - t = globaldata->gTree; + abort = false; + + //allow user to run help + if(option == "help") { help(); abort = true; } + + else { + if (option != "") { cout << "There are no valid parameters for the concensus command." << endl; abort = true; } + + //no trees were read + if (globaldata->gTree.size() == 0) { cout << "You must execute the read.tree command, before you may use the concensus command." << endl; abort = true; } + else { t = globaldata->gTree; } + } } catch(exception& e) { cout << "Standard Error: " << e.what() << " has occurred in the ConcensusCommand class Function ConcensusCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; @@ -25,6 +36,30 @@ ConcensusCommand::ConcensusCommand(){ exit(1); } } + +//********************************************************************************************************************** + +void ConcensusCommand::help(){ + try { + cout << "The concensus command can only be executed after a successful read.tree command." << "\n"; + cout << "The concensus command has no parameters." << "\n"; + cout << "The concensus command should be in the following format: concensus()." << "\n"; + cout << "The concensus command output two files: .concensus.tre and .concensuspairs." << "\n"; + cout << "The .concensus.tre file contains the concensus tree of the trees in your input file." << "\n"; + cout << "The branch lengths are the percentage of trees in your input file that had the given pair." << "\n"; + cout << "The .concensuspairs file contains a list of the internal nodes in your tree. For each node, the pair that was used in the concensus tree " << "\n"; + cout << "is reported with its percentage, as well as the other pairs that were seen for that node but not used and their percentages." << "\n" << "\n"; + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the ConcensusCommand class Function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the ConcensusCommand class function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} + //********************************************************************************************************************** ConcensusCommand::~ConcensusCommand(){} @@ -34,7 +69,7 @@ ConcensusCommand::~ConcensusCommand(){} int ConcensusCommand::execute(){ try { - if (t.size() == 0) { return 0; } + if (abort == true) { return 0; } else { numNodes = t[0]->getNumNodes(); numLeaves = t[0]->getNumLeaves(); diff --git a/concensuscommand.h b/concensuscommand.h index cc923dd..970102a 100644 --- a/concensuscommand.h +++ b/concensuscommand.h @@ -19,15 +19,17 @@ class GlobalData; class ConcensusCommand : public Command { public: - ConcensusCommand(); + ConcensusCommand(string); ~ConcensusCommand(); - int execute(); + int execute(); + void help(); private: GlobalData* globaldata; SharedUtil* util; vector t; Tree* concensusTree; + bool abort; vector treeSet; //set containing all members of the tree to start recursion. filled in getSets(). map< vector, int > nodePairs; // myArray (Array, Array+(sizeof(Array)/sizeof(string))); + + parser = new OptionParser(); + parser->parse(option, parameters); delete parser; + + ValidParameters* validParameter = new ValidParameters(); + + //check to make sure all parameters are valid for command + for (it = parameters.begin(); it != parameters.end(); it++) { + if (validParameter->isValidParameter(it->first, myArray, it->second) != true) { abort = true; } + } + + //check for required parameters + filename = validParameter->validFile(parameters, "fasta", true); + if (filename == "not open") { abort = true; } + else if (filename == "not found") { filename = ""; cout << "fasta is a required parameter for the unique.seqs command." << endl; abort = true; } + else { globaldata->setFastaFile(filename); globaldata->setFormat("fasta"); } + + delete validParameter; + } + + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the DeconvoluteCommand class Function DeconvoluteCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the DeconvoluteCommand class function DeconvoluteCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} +//********************************************************************************************************************** + +void DeconvoluteCommand::help(){ + try { + cout << "The unique.seqs command reads a fastafile and creates a namesfile." << "\n"; + cout << "It creates a file where the first column is the groupname and the second column is a list of sequence names who have the same sequence. " << "\n"; + cout << "If the sequence is unique the second column will just contain its name. " << "\n"; + cout << "The unique.seqs command parameter is fasta and it is required." << "\n"; + cout << "The unique.seqs command should be in the following format: " << "\n"; + cout << "unique.seqs(fasta=yourFastaFile) " << "\n"; + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the DeconvoluteCommand class Function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the DeconvoluteCommand class function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} + +/**************************************************************************************/ +int DeconvoluteCommand::execute() { + try { + + if (abort == true) { return 0; } //prepare filenames and open files - filename = globaldata->getFastaFile(); outputFileName = (getRootName(filename) + "names"); outFastafile = (getRootName(filename) + "unique.fasta"); openInputFile(filename, in); openOutputFile(outputFileName, out); openOutputFile(outFastafile, outFasta); - + //constructor reads in file and store internally fastamap = new FastaMap(); diff --git a/deconvolutecommand.h b/deconvolutecommand.h index aaed841..0b6eb5e 100644 --- a/deconvolutecommand.h +++ b/deconvolutecommand.h @@ -20,9 +20,10 @@ class DeconvoluteCommand : public Command { public: - DeconvoluteCommand() {}; + DeconvoluteCommand(string); ~DeconvoluteCommand() { delete fastamap; }; - int execute(); + int execute(); + void help(); private: GlobalData* globaldata; @@ -30,6 +31,11 @@ private: ifstream in; ofstream out, outFasta; string filename, outputFileName, outFastafile; + OptionParser* parser; + map parameters; + map::iterator it; + bool abort; + }; diff --git a/distancecommand.cpp b/distancecommand.cpp index 67b87f1..7890cb9 100644 --- a/distancecommand.cpp +++ b/distancecommand.cpp @@ -16,49 +16,93 @@ //********************************************************************************************************************** -DistanceCommand::DistanceCommand(){ +DistanceCommand::DistanceCommand(string option){ try { globaldata = GlobalData::getInstance(); - validCalculator = new ValidCalculators(); - countends = globaldata->getCountEnds(); - convert(globaldata->getProcessors(), processors); - convert(globaldata->getCutOff(), cutoff); - phylip = globaldata->getPhylipFile(); + abort = false; + Estimators.clear(); - //open file - string filename = globaldata->getFastaFile(); - openInputFile(filename, in); + //allow user to run help + if(option == "help") { help(); abort = true; } - + else { + //valid paramters for this command + string Array[] = {"fasta", "phylip", "calc", "countends", "cutoff", "processors"}; + vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + + parser = new OptionParser(); + parser->parse(option, parameters); delete parser; + + ValidParameters* validParameter = new ValidParameters(); - int i; - if (isTrue(countends) == true) { - for (i=0; iEstimators.size(); i++) { - if (validCalculator->isValidCalculator("distance", globaldata->Estimators[i]) == true) { - if (globaldata->Estimators[i] == "nogaps") { - distCalculator = new ignoreGaps(); - }else if (globaldata->Estimators[i] == "eachgap") { - distCalculator = new eachGapDist(); - }else if (globaldata->Estimators[i] == "onegap") { - distCalculator = new oneGapDist(); } - } + //check to make sure all parameters are valid for command + for (it2 = parameters.begin(); it2 != parameters.end(); it2++) { + if (validParameter->isValidParameter(it2->first, myArray, it2->second) != true) { abort = true; } } - }else { - for (i=0; iEstimators.size(); i++) { - if (validCalculator->isValidCalculator("distance", globaldata->Estimators[i]) == true) { - if (globaldata->Estimators[i] == "nogaps") { - distCalculator = new ignoreGaps(); - }else if (globaldata->Estimators[i] == "eachgap") { - distCalculator = new eachGapIgnoreTermGapDist(); - }else if (globaldata->Estimators[i] == "onegap") { - distCalculator = new oneGapIgnoreTermGapDist(); + + //check for required parameters + fastafile = validParameter->validFile(parameters, "fasta", true); + if (fastafile == "not found") { cout << "fasta is a required parameter for the dist.seqs command." << endl; abort = true; } + else if (fastafile == "not open") { abort = true; } + else { + globaldata->setFastaFile(fastafile); + openInputFile(fastafile, in); + } + + //check for optional parameter and set defaults + // ...at some point should added some additional type checking... + calc = validParameter->validFile(parameters, "calc", false); + if (calc == "not found") { calc = "onegap"; } + else { + if (calc == "default") { calc = "onegap"; } + } + splitAtDash(calc, Estimators); + + string temp; + temp = validParameter->validFile(parameters, "countends", false); if (temp == "not found") { temp = "T"; } + convert(temp, countends); + + temp = validParameter->validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "1.0"; } + convert(temp, cutoff); + + temp = validParameter->validFile(parameters, "processors", false); if (temp == "not found") { temp = "1"; } + convert(temp, processors); + + phylip = validParameter->validFile(parameters, "phylip", false); if (phylip == "not found") { phylip = "F"; } + + delete validParameter; + + validCalculator = new ValidCalculators(); + + int i; + if (isTrue(countends) == true) { + for (i=0; iisValidCalculator("distance", Estimators[i]) == true) { + if (Estimators[i] == "nogaps") { + distCalculator = new ignoreGaps(); + }else if (Estimators[i] == "eachgap") { + distCalculator = new eachGapDist(); + }else if (Estimators[i] == "onegap") { + distCalculator = new oneGapDist(); } + } + } + }else { + for (i=0; iisValidCalculator("distance", Estimators[i]) == true) { + if (Estimators[i] == "nogaps") { + distCalculator = new ignoreGaps(); + }else if (Estimators[i] == "eachgap") { + distCalculator = new eachGapIgnoreTermGapDist(); + }else if (Estimators[i] == "onegap") { + distCalculator = new oneGapIgnoreTermGapDist(); + } } } } + + delete validCalculator; } - - //reset calc for next command - globaldata->setCalc(""); + } catch(exception& e) { cout << "Standard Error: " << e.what() << " has occurred in the DistanceCommand class Function DistanceCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; @@ -71,12 +115,40 @@ DistanceCommand::DistanceCommand(){ } //********************************************************************************************************************** +void DistanceCommand::help(){ + try { + cout << "The dist.seqs command reads a file containing sequences and creates a distance file." << "\n"; + cout << "The dist.seqs command parameters are fasta, calc, countends, cutoff and processors. " << "\n"; + cout << "The fasta parameter is required." << "\n"; + cout << "The calc parameter allows you to specify the method of calculating the distances. Your options are: nogaps, onegap or eachgap. The default is onegap." << "\n"; + cout << "The countends parameter allows you to specify whether to include terminal gaps in distance. Your options are: T or F. The default is T." << "\n"; + cout << "The cutoff parameter allows you to specify maximum distance to keep. The default is 1.0." << "\n"; + cout << "The processors parameter allows you to specify number of processors to use. The default is 1." << "\n"; + cout << "The dist.seqs command should be in the following format: " << "\n"; + cout << "dist.seqs(fasta=yourFastaFile, calc=yourCalc, countends=yourEnds, cutoff= yourCutOff, processors=yourProcessors) " << "\n"; + cout << "Example dist.seqs(fasta=amazon.fasta, calc=eachgap, countends=F, cutoff= 2.0, processors=3)." << "\n"; + cout << "Note: No spaces between parameter labels (i.e. calc), '=' and parameters (i.e.yourCalc)." << "\n" << "\n"; + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the DistanceCommand class Function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the DistanceCommand class function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} + + +//********************************************************************************************************************** + int DistanceCommand::execute(){ try { + if (abort == true) { return 0; } + //reads fasta file and fills sequenceDB - if(globaldata->getFastaFile() != "") { seqDB = new SequenceDB(in); } - else { cout << "Error no fasta file." << endl; return 0; } + seqDB = new SequenceDB(in); int numSeqs = seqDB->getNumSeqs(); cutoff += 0.005; @@ -85,7 +157,7 @@ int DistanceCommand::execute(){ //doses the user want the phylip formatted file as well if (isTrue(phylip) == true) { - outputFile = getRootName(globaldata->getFastaFile()) + "phylip.dist"; + outputFile = getRootName(fastafile) + "phylip.dist"; remove(outputFile.c_str()); //output numSeqs to phylip formatted dist file @@ -93,7 +165,7 @@ int DistanceCommand::execute(){ outFile << numSeqs << endl; outFile.close(); }else { //user wants column format - outputFile = getRootName(globaldata->getFastaFile()) + "dist"; + outputFile = getRootName(fastafile) + "dist"; remove(outputFile.c_str()); } diff --git a/distancecommand.h b/distancecommand.h index 2cb653f..446be04 100644 --- a/distancecommand.h +++ b/distancecommand.h @@ -25,9 +25,10 @@ struct linePair { class DistanceCommand : public Command { public: - DistanceCommand(); + DistanceCommand(string); ~DistanceCommand() {}; int execute(); + void help(); private: GlobalData* globaldata; @@ -36,13 +37,19 @@ private: SequenceDB* seqDB; ofstream out, outFile; ifstream in; - string countends, phylip; + string countends, phylip, fastafile, calc; int processors; float cutoff; map processIDS; //end line, processid map::iterator it; vector lines; + OptionParser* parser; + map parameters; + map::iterator it2; + bool abort; + vector Estimators; //holds estimators to be used + void appendFiles(string, string); void createProcesses(string); int driver(Dist*, SequenceDB*, int, int, string, float); diff --git a/engine.cpp b/engine.cpp index d5192bf..b037137 100644 --- a/engine.cpp +++ b/engine.cpp @@ -42,9 +42,10 @@ bool InteractEngine::getInput(){ try { string input = ""; string commandName = ""; + string options = ""; int quitCommandCalled = 0; - bool errorFree; - ErrorCheck* errorCheckor = new ErrorCheck(); + //bool errorFree; + //ErrorCheck* errorCheckor = new ErrorCheck(); cout << "mothur v.1.3.0" << endl; cout << "Last updated: 5/29/2009" << endl << endl; @@ -67,18 +68,21 @@ bool InteractEngine::getInput(){ //allow user to omit the () on the quit command if (input == "quit") { input = "quit()"; } - errorFree = errorCheckor->checkInput(input); - if (errorFree == true) { - CommandOptionParser parser(input); - commandName = parser.getCommandString(); + //errorFree = errorCheckor->checkInput(input); + //if (errorFree == true) { + CommandOptionParser parser(input); + commandName = parser.getCommandString(); + options = parser.getOptionString(); + + if (commandName != "") { //executes valid command CommandFactory cFactory; - Command* command = cFactory.getCommand(commandName); + Command* command = cFactory.getCommand(commandName, options); quitCommandCalled = command->execute(); - + }else { - cout << "Your input contains errors. Please try again." << endl; + cout << "Your input contains errors. Please try again." << endl; } } return 1; @@ -135,43 +139,36 @@ bool BatchEngine::getInput(){ string input = ""; string commandName = ""; - bool errorFree; - ErrorCheck* errorCheckor = new ErrorCheck(); - - CommandFactory cFactory; + string options = ""; + + //CommandFactory cFactory; int quitCommandCalled = 0; while(quitCommandCalled == 0){ - getline(inputBatchFile, input); + if (inputBatchFile.eof()) { input = "quit()"; } + else { getline(inputBatchFile, input); } + if (input[0] != '#') { - if (inputBatchFile.eof()) { input = "quit()"; } cout << endl << "mothur > " << input << endl; //allow user to omit the () on the quit command if (input == "quit") { input = "quit()"; } - errorFree = errorCheckor->checkInput(input); - if (errorFree == true) { - CommandOptionParser parser(input); - commandName = parser.getCommandString(); - ifstream filehandle; - - if (openedBatch == 0) { //able to open batchfile - //executes valid command - CommandFactory cFactory; - Command* command = cFactory.getCommand(commandName); - quitCommandCalled = command->execute(); - } - else { - cout << "Invalid." << endl; - } - } - else { - cout << "Unable to open batchfile." << endl; - } - }else { if (inputBatchFile.eof()) { input = "quit()"; } } + CommandOptionParser parser(input); + commandName = parser.getCommandString(); + options = parser.getOptionString(); + + if (commandName != "") { + + //executes valid command + CommandFactory cFactory; + Command* command = cFactory.getCommand(commandName, options); + quitCommandCalled = command->execute(); + }else { cout << "Invalid." << endl; } + + } } return 1; } diff --git a/engine.hpp b/engine.hpp index d42406e..d169905 100644 --- a/engine.hpp +++ b/engine.hpp @@ -17,7 +17,6 @@ #include "commandoptionparser.hpp" #include "command.hpp" #include "commandfactory.hpp" -#include "errorchecking.h" class GlobalData; diff --git a/errorchecking.cpp b/errorchecking.cpp deleted file mode 100644 index e77c166..0000000 --- a/errorchecking.cpp +++ /dev/null @@ -1,654 +0,0 @@ -/* - * errorchecking.cpp - * Dotur - * - * Created by Sarah Westcott on 1/2/09. - * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. - * - */ - -#include "errorchecking.h" - -/*******************************************************/ - -/******************************************************/ - -ErrorCheck::ErrorCheck() { - globaldata = GlobalData::getInstance(); - validCommand = new ValidCommands(); - validParameter = new ValidParameters(); -} -/*******************************************************/ - -/******************************************************/ - -void ErrorCheck::refresh() { - - //columnfile = globaldata->getColumnFile(); - //phylipfile = globaldata->getPhylipFile(); - //listfile = globaldata->getListFile(); - //rabundfile = globaldata->getRabundFile(); - //sabundfile = globaldata->getSabundFile(); - //namefile = globaldata->getNameFile(); - //groupfile = globaldata->getGroupFile(); - //orderfile = globaldata->getOrderFile(); - //fastafile = globaldata->getFastaFile(); - //treefile = globaldata->getTreeFile(); - //cutoff = globaldata->getCutOff(); - //format = globaldata->getFormat(); - //method = globaldata->getMethod(); - //randomtree = globaldata->getRandomTree(); - //sharedfile = globaldata->getSharedFile(); - -} - -/*******************************************************/ - -/******************************************************/ - -ErrorCheck::~ErrorCheck() { - delete validCommand; - delete validParameter; -} - -/*******************************************************/ - -/******************************************************/ - -bool ErrorCheck::checkInput(string input) { - errorFree = true; - clear(); - - //refresh variable - refresh(); - - //get command name and parameters - int openParen = input.find_first_of('('); - int closeParen = input.find_last_of(')'); - - if(openParen != -1 && closeParen != -1){ - commandName = input.substr(0, openParen); //commandName contains everything before "(" - optionText = input.substr(openParen+1, closeParen-openParen-1); //optionString contains everything between "(" and ")". - }else if (openParen == -1) { //there is no parenthesis - cout << input << " is not a valid command. You are missing the ()." << endl; - return false; - } - - //is it a valid command - if (validCommand->isValidCommand(commandName) != true) { return false; } - string parameter, value; - - //reads in parameters and values - if((optionText != "") && (commandName != "help")){ - while((optionText.find_first_of(',') != -1) && (errorFree)) { //while there are parameters - splitAtComma(value, optionText); - splitAtEquals(parameter, value); - - //is it a valid parameter - if (validParameter->isValidParameter(parameter, commandName, value) != true) { return false; } - - if (parameter == "phylip" ) { phylipfile = value; } - if (parameter == "column" ) { columnfile = value; } - if (parameter == "list" ) { listfile = value; } - if (parameter == "rabund" ) { rabundfile = value; } - if (parameter == "sabund" ) { sabundfile = value; } - if (parameter == "name" ) { namefile = value; } - if (parameter == "order" ) { orderfile = value; } - if (parameter == "fasta" ) { fastafile = value; } - if (parameter == "tree" ) { treefile = value; } - if (parameter == "group" ) { groupfile = value; } - if (parameter == "shared" ) { sharedfile = value; } - if (parameter == "cutoff" ) { cutoff = value; } - if (parameter == "precision" ) { precision = value; } - if (parameter == "iters" ) { iters = value; } - if (parameter == "jumble" ) { jumble = value; } - if (parameter == "freq" ) { freq = value; } - if (parameter == "method" ) { method = value; } - if (parameter == "fileroot" ) { fileroot = value; } - if (parameter == "line" ) { line = value; } - if (parameter == "label" ) { label = value; } - if (parameter == "abund" ) { abund = value; } - if (parameter == "random" ) { randomtree = value; } - if (parameter == "sorted" ) { sorted = value; } - if (parameter == "trump" ) { trump = value; } - if (parameter == "soft" ) { soft = value; } - if (parameter == "filter" ) { filter = value; } - if (parameter == "scale" ) { scale = value; } - if (parameter == "countends" ) { countends = value; } - if (parameter == "processors" ) { processors = value; } - if (parameter == "size" ) { size = value; } - if (parameter == "candidate") { candidatefile = value; } - if (parameter == "search") { search = value; } - if (parameter == "ksize") { ksize = value; } - if (parameter == "align") { align = value; } - if (parameter == "match") { match = value; } - if (parameter == "mismatch") { mismatch = value; } - if (parameter == "gapopen") { gapopen = value; } - if (parameter == "gapextend" ) { gapextend = value; } - } - - //gets the last parameter and value - if (errorFree) { //gets the last parameter and value - value = optionText; - splitAtEquals(parameter, value); - //is it a valid parameter - if (validParameter->isValidParameter(parameter, commandName, value) != true) { return false; } - - - if (parameter == "phylip" ) { phylipfile = value; } - if (parameter == "column" ) { columnfile = value; } - if (parameter == "list" ) { listfile = value; } - if (parameter == "rabund" ) { rabundfile = value; } - if (parameter == "sabund" ) { sabundfile = value; } - if (parameter == "name" ) { namefile = value; } - if (parameter == "order" ) { orderfile = value; } - if (parameter == "group" ) { groupfile = value; } - if (parameter == "shared" ) { sharedfile = value; } - if (parameter == "fasta" ) { fastafile = value; } - if (parameter == "tree" ) { treefile = value; } - if (parameter == "cutoff" ) { cutoff = value; } - if (parameter == "precision" ) { precision = value; } - if (parameter == "iters" ) { iters = value; } - if (parameter == "jumble" ) { jumble = value; } - if (parameter == "freq" ) { freq = value; } - if (parameter == "method" ) { method = value; } - if (parameter == "fileroot" ) { fileroot = value; } - if (parameter == "line" ) { line = value; } - if (parameter == "label" ) { label = value; } - if (parameter == "random" ) { randomtree = value; } - if (parameter == "abund" ) { abund = value; } - if (parameter == "sorted" ) { sorted = value; } - if (parameter == "trump" ) { trump = value; } - if (parameter == "soft" ) { soft = value; } - if (parameter == "filter" ) { filter = value; } - if (parameter == "scale" ) { scale = value; } - if (parameter == "countends" ) { countends = value; } - if (parameter == "processors" ) { processors = value; } - if (parameter == "size" ) { size = value; } - if (parameter == "candidate") { candidatefile = value; } - if (parameter == "search") { search = value; } - if (parameter == "ksize") { ksize = value; } - if (parameter == "align") { align = value; } - if (parameter == "match") { match = value; } - if (parameter == "mismatch") { mismatch = value; } - if (parameter == "gapopen") { gapopen = value; } - if (parameter == "gapextend" ) { gapextend = value; } - - } - } - - //make sure the user does not use both the line and label parameters - if ((line != "") && (label != "")) { cout << "You may use either the line or label parameters, but not both." << endl; return false; } - - //check for valid files - if (commandName == "read.dist") { - validateReadFiles(); - validateReadDist(); - }else if (commandName == "read.otu") { - //you want to do shared commands - if ((listfile != "") && (groupfile != "")) { - validateParseFiles(); //checks the listfile and groupfile parameters - //you want to do single commands - }else if ((listfile != "") || (rabundfile != "") || (sabundfile != "")){ - validateReadFiles(); - validateReadPhil(); - //you have not given a file - }else if ((listfile == "") && (sharedfile == "") && (rabundfile == "") && (sabundfile == "")) { - cout << "You must enter either a listfile, rabundfile, sabundfile or a sharedfile with the read.otu command. " << endl; return false; - //you want to do shared commands with a shared file - }else if (sharedfile != "") {//you are reading a shared file - validateReadFiles(); - } - }else if (commandName == "read.tree") { - validateTreeFiles(); //checks the treefile and groupfile parameters - }else if (commandName == "unique.seqs") { - if (fastafile == "") { cout << "You must enter a fastafile with the unique.seqs() command." << endl; return false; } - validateReadFiles(); - } - - //are you trying to cluster before you have read something - if (((commandName == "cluster") && (globaldata->gSparseMatrix == NULL)) || - ((commandName == "cluster") && (globaldata->gListVector == NULL))) { - cout << "Before you use the cluster command, you first need to read in a distance matrix." << endl; - errorFree = false; - } - - if ((commandName == "libshuff") && ((globaldata->gMatrix == NULL) || (globaldata->gGroupmap == NULL))) { - cout << "You must read in a matrix and groupfile using the read.dist command, before you use the libshuff command. " << endl; return false; - } - - if (commandName == "parsimony") { - //are you trying to use parsimony without reading a tree or saying you want random distribution - if (randomtree == "") { - if (globaldata->gTree.size() == 0) { - cout << "You must read a treefile and a groupfile or set the randomtree parameter to the output filename you wish, before you may execute the parsimony command." << endl; return false; } - } - } - - if ((commandName == "unifrac.weighted") || (commandName == "unifrac.unweighted") || (commandName == "concensus")) { - if (globaldata->gTree.size() == 0) {//no trees were read - cout << "You must execute the read.tree command, before you may execute the unifrac.weighted, unifrac.unweighted or concensus command." << endl; return false; } - } - - //check for valid method - if(commandName == "get.group") { - if ((globaldata->getSharedFile() == "")) { cout << "You must read a groupfile or a sharedfile before you can use the get.group command." << endl; return false; } - } - if (commandName == "get.label" || commandName == "get.line") { - if ((globaldata->getListFile() == "") && (globaldata->getRabundFile() == "") && (globaldata->getSabundFile() == "")) { cout << "You must read a list, sabund or rabund before you can use the get.label or get.line command." << endl; return false; } - } - if (commandName == "cluster") { - if ((method == "furthest") || (method == "nearest") || (method == "average")) { } - else {cout << "Not a valid clustering method. Valid clustering algorithms are furthest, nearest or average." << endl; return false; } - } - - if ((commandName == "collect.single") || (commandName == "rarefaction.single") || (commandName == "summary.single")){ - if ((globaldata->getListFile() == "") && (globaldata->getRabundFile() == "") && (globaldata->getSabundFile() == "")) { cout << "You must read a list, sabund or rabund before you can use the collect.single, rarefaction.single or summary.single commands." << endl; return false; } - } - - if (commandName == "get.rabund") { - if (globaldata->getListFile() == "") { cout << "You must read a listfile before you can use the get.rabund command." << endl; return false; } - } - - if (commandName == "get.sabund") { - if ((globaldata->getListFile() == "") && (globaldata->getRabundFile() == "")) { cout << "You must read a list or rabund before you can use the get.sabund command." << endl; return false; } - } - - if ((commandName == "collect.shared") || (commandName == "rarefaction.shared") || (commandName == "summary.shared") || (commandName == "bootstrap.shared") || (commandName == "dist.shared")){ - if (globaldata->getSharedFile() == "") { - if (globaldata->getListFile() == "") { cout << "You must read a list and a group, or a shared before you can use the collect.shared, rarefaction.shared, summary.shared, tree.shared, bootstrap.shared or dist.shared commands." << endl; return false; } - else if (globaldata->getGroupFile() == "") { cout << "You must read a list and a group, or a shared before you can use the collect.shared, rarefaction.shared, summary.shared, tree.shared, bootstrap.shared or dist.shared commands." << endl; return false; } - } - } - - if (commandName == "tree.shared") { - //given no files - if ((globaldata->getSharedFile() == "") && ((phylipfile == "") && (columnfile == ""))) { cout << "You must run the read.otu command or provide a distance file before running the tree.shared command." << endl; return false; } - //you want to do single commands - else if ((globaldata->getSharedFile() == "") && ((phylipfile != "") || (columnfile != ""))) { - validateReadDist(); - } - } - - if ((commandName == "heatmap.bin") || (commandName == "venn") || (commandName == "heatmap.sim")) { - if ((globaldata->getListFile() == "") && (globaldata->getSharedFile() == "")) { - cout << "You must read a list, or a list and a group, or a shared before you can use the heatmap.bin, heatmap.sim or venn commands." << endl; return false; - } - } - - if (commandName == "filter.seqs") { - if (fastafile == "") { - cout << "You must enter either a fasta file before you can use the filter.seqs command." << endl; return false; - } - validateReadFiles(); - } - - if (commandName == "dist.seqs") { - if (fastafile == "") { - cout << "You must enter either a fasta file before you can use the dist.seqs command." << endl; return false; - }else { - ifstream filehandle; - int ableToOpen = openInputFile(fastafile, filehandle); - filehandle.close(); - //unable to open - if (ableToOpen == 1) { return false; } - } - } - - if (commandName == "align.seqs") { - if ((fastafile == "") || (candidatefile == "")) { - cout << "You must enter fasta and a candidate file to use the align.seqs command." << endl; return false; - } - validateReadFiles(); - - ifstream filehandle; - int ableToOpen = openInputFile(candidatefile, filehandle); - filehandle.close(); - //unable to open - if (ableToOpen == 1) { return false; } - } - - if ((commandName == "bin.seqs")) { - if ((globaldata->getListFile() == "")) { cout << "You must read a list file before you can use the bin.seqs commands." << endl; return false; } - validateBinFiles(); - } - - - if ((commandName == "get.oturep")) { - if ((globaldata->gSparseMatrix == NULL) || (globaldata->gListVector == NULL)) { - cout << "Before you use the get.oturep command, you first need to read in a distance matrix." << endl; - errorFree = false; - } - if (listfile == "") { cout << "list is a required parameter for the get.oturep command." << endl; errorFree = false; } - if (fastafile == "") { cout << "fasta is a required parameter for the get.oturep command." << endl; errorFree = false; } - validateBinFiles(); - } - - - return errorFree; -} - -/*******************************************************/ - -/******************************************************/ -//This function checks to make sure the user entered a file to -// read and that the file exists and can be opened. -void ErrorCheck::validateReadFiles() { - try { - //Validating files for read - ifstream filehandle; - int ableToOpen; - - //are we reading a phylipfile - if (phylipfile != "") { - ableToOpen = openInputFile(phylipfile, filehandle); - filehandle.close(); - //unable to open - if (ableToOpen == 1) { errorFree = false; } - else { globaldata->inputFileName = phylipfile; } - //are we reading a columnfile - }else if (columnfile != "") { - ableToOpen = openInputFile(columnfile, filehandle); - filehandle.close(); - //unable to open - if (ableToOpen == 1) { errorFree = false; } - else { globaldata->inputFileName = columnfile; } - //are we reading a listfile - }else if (listfile!= "") { - ableToOpen = openInputFile(listfile, filehandle); - filehandle.close(); - //unable to open - if (ableToOpen == 1) { errorFree = false; } - else { globaldata->inputFileName = listfile; } - //are we reading a rabundfile - }else if (rabundfile != "") { - ableToOpen = openInputFile(rabundfile, filehandle); - filehandle.close(); - //unable to open - if (ableToOpen == 1) { errorFree = false; } - else { globaldata->inputFileName = rabundfile; } - //are we reading a sabundfile - }else if (sabundfile != "") { - ableToOpen = openInputFile(sabundfile, filehandle); - filehandle.close(); - //unable to open - if (ableToOpen == 1) { errorFree = false; } - else { globaldata->inputFileName = sabundfile; } - }else if (fastafile != "") { - ableToOpen = openInputFile(fastafile, filehandle); - filehandle.close(); - //unable to open - if (ableToOpen == 1) { errorFree = false; } - else { globaldata->inputFileName = fastafile; } - }else if (sharedfile != "") { - ableToOpen = openInputFile(sharedfile, filehandle); - filehandle.close(); - //unable to open - if (ableToOpen == 1) { errorFree = false; } - else { globaldata->inputFileName = sharedfile; } - }else if (groupfile != "") { - ableToOpen = openInputFile(groupfile, filehandle); - filehandle.close(); - if (ableToOpen == 1) { //unable to open - errorFree = false; - } - }else{ //no file given - errorFree = false; - } - } - catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the ErrorCheck class Function validateReadFiles. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } - catch(...) { - cout << "An unknown error has occurred in the ErrorCheck class function validateReadFiles. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } - -} -/*******************************************************/ - -/******************************************************/ -//This function checks to make sure the user entered appropriate -// format parameters on a distfile read -void ErrorCheck::validateReadDist() { - try { - ifstream filehandle; - int ableToOpen; - - if (groupfile != "") { - ableToOpen = openInputFile(groupfile, filehandle); - filehandle.close(); - //unable to open - if (ableToOpen == 1) { errorFree = false; } - } - - if ((phylipfile == "") && (columnfile == "")) { cout << "When executing a read.dist or a tree.shared command with a distance file you must enter a phylip or a column." << endl; errorFree = false; } - else if ((phylipfile != "") && (columnfile != "")) { cout << "When executing a read.dist or a tree.shared command with a distance file you must enter ONLY ONE of the following: phylip or column." << endl; errorFree = false; } - - if (columnfile != "") { - if (namefile == "") { - cout << "You need to provide a namefile if you are going to use the column format." << endl; - errorFree = false; - }else { - ableToOpen = openInputFile(namefile, filehandle); - filehandle.close(); - //unable to open - if (ableToOpen == 1) { errorFree = false; } - } - } - } - catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the ErrorCheck class Function validateReadDist. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } - catch(...) { - cout << "An unknown error has occurred in the ErrorCheck class function validateReadDist. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } -} -/*******************************************************/ - -/******************************************************/ -//This function checks to make sure the user entered appropriate -// format parameters on a parselistcommand -void ErrorCheck::validateParseFiles() { - try { - ifstream filehandle; - int ableToOpen; - - //checks for valid files - - if (listfile == "") { cout << "When executing a read.otu for groups you must enter a list and a group." << endl; errorFree = false; } - else if (groupfile == "") { cout << "When executing a read.otu for groups you must enter a list and a group." << endl; errorFree = false; } - - //checks parameters on the read command - if (listfile != "") { - ableToOpen = openInputFile(listfile, filehandle); - filehandle.close(); - if (ableToOpen == 1) { //unable to open - errorFree = false; - } - if (groupfile != "") { - ableToOpen = openInputFile(groupfile, filehandle); - filehandle.close(); - if (ableToOpen == 1) { //unable to open - errorFree = false; - } - } - } - } - catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the ErrorCheck class Function validateReadPhil. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } - catch(...) { - cout << "An unknown error has occurred in the ErrorCheck class function validateReadPhil. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } -} -/*******************************************************/ - -/******************************************************/ -//This function checks to make sure the user entered appropriate -// format parameters on a parselistcommand -void ErrorCheck::validateTreeFiles() { - try { - ifstream filehandle; - int ableToOpen; - - //checks for valid files - - if (treefile == "") { cout << "When executing a read.tree you must enter a treefile and a groupfile." << endl; errorFree = false; } - else if (groupfile == "") { cout << "When executing a read.tree you must enter a treefile and a groupfile." << endl; errorFree = false; } - - //checks parameters on the read command - if (treefile != "") { - ableToOpen = openInputFile(treefile, filehandle); - filehandle.close(); - if (ableToOpen == 1) { //unable to open - errorFree = false; - } - if (groupfile != "") { - ableToOpen = openInputFile(groupfile, filehandle); - filehandle.close(); - if (ableToOpen == 1) { //unable to open - errorFree = false;; - } - } - } - } - catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the ErrorCheck class Function validateTreeFiles. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } - catch(...) { - cout << "An unknown error has occurred in the ErrorCheck class function validateTreeFiles. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } -} - -/*******************************************************/ - -/******************************************************/ -//This function checks to make sure the user entered appropriate -// format parameters on a distfile read -void ErrorCheck::validateReadPhil() { - try { - ifstream filehandle; - int ableToOpen; - - //checks to make sure only one file type is given - if (listfile != "") { - if ((rabundfile != "") || (sabundfile != "")) { - cout << "When executing a read.otu you must enter ONLY ONE of the following: list, rabund or sabund." << endl; errorFree = false; } - }else if (rabundfile != "") { - if ((listfile != "") || (sabundfile != "")) { - cout << "When executing a read.otu you must enter ONLY ONE of the following: list, rabund or sabund." << endl; errorFree = false; } - }else if (sabundfile != "") { - if ((listfile != "") || (rabundfile != "")) { - cout << "When executing a read.otu you must enter ONLY ONE of the following: list, rabund or sabund." << endl; errorFree = false; } - }else if ((listfile == "") && (rabundfile == "") && (sabundfile == "") && (sharedfile == "")) { - cout << "When executing a read.otu you must enter one of the following: list, rabund or sabund." << endl; errorFree = false; - } - - //checks parameters on the read command - if (orderfile != "") { - ableToOpen = openInputFile(orderfile, filehandle); - filehandle.close(); - if (ableToOpen == 1) { //unable to open - errorFree = false; - } - } - } - catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the ErrorCheck class Function validateReadPhil. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } - catch(...) { - cout << "An unknown error has occurred in the ErrorCheck class function validateReadPhil. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } -} -/*******************************************************/ - -/******************************************************/ -//This function checks to make sure the user entered appropriate -// format parameters on a bin.seq command -void ErrorCheck::validateBinFiles() { - try { - ifstream filehandle; - int ableToOpen; - - if (fastafile == "") { - cout << "fasta is a required parameter for bin.seqs, get.oturep and get.repseqs commands." << endl; errorFree = false; - }else if (fastafile != "") { - //is it a valid filename' - ableToOpen = openInputFile(fastafile, filehandle); - filehandle.close(); - //unable to open - if (ableToOpen == 1) { errorFree = false; } - }else if (listfile != "") { - //is it a valid filename' - ableToOpen = openInputFile(listfile, filehandle); - filehandle.close(); - //unable to open - if (ableToOpen == 1) { errorFree = false; } - }else if (globaldata->getNameFile() != "") { - //is it a valid filename' - ifstream filehandle; - int ableToOpen = openInputFile(globaldata->getNameFile(), filehandle); - filehandle.close(); - //unable to open - if (ableToOpen == 1) { errorFree = false; } - }else if (namefile != "") { - //is it a valid filename' - ifstream filehandle; - int ableToOpen = openInputFile(namefile, filehandle); - filehandle.close(); - //unable to open - if (ableToOpen == 1) { errorFree = false; } - }else if (groupfile != "") { - //is it a valid filename' - ifstream filehandle; - int ableToOpen = openInputFile(groupfile, filehandle); - filehandle.close(); - //unable to open - if (ableToOpen == 1) { errorFree = false; } - } - } - catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the ErrorCheck class Function validateBinFiles. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } - catch(...) { - cout << "An unknown error has occurred in the ErrorCheck class function validateBinFiles. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } -} - -/*******************************************************/ - -/******************************************************/ - -void ErrorCheck::clear() { - //option definitions should go here... - phylipfile = ""; - columnfile = ""; - listfile = ""; - rabundfile = ""; - sabundfile = ""; - namefile = ""; - groupfile = ""; - orderfile = ""; - sharedfile = ""; - fastafile = ""; - candidatefile = ""; - line = ""; - label = ""; - method = "furthest"; -} -/*******************************************************/ - -/******************************************************/ - diff --git a/errorchecking.h b/errorchecking.h deleted file mode 100644 index 7f9e458..0000000 --- a/errorchecking.h +++ /dev/null @@ -1,44 +0,0 @@ -#ifndef ERRORCHECKING_H -#define ERRORCHECKING_H -/* - * errorchecking.h - * Dotur - * - * Created by Sarah Westcott on 1/2/09. - * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. - * - */ - -#include "mothur.h" -#include "globaldata.hpp" -#include "validcommands.h" -#include "validparameter.h" - - -class ErrorCheck { - public: - ErrorCheck(); - ~ErrorCheck(); - bool checkInput(string); - - private: - GlobalData* globaldata; - ValidCommands* validCommand; - ValidParameters* validParameter; - void validateReadFiles(); - void validateReadDist(); - void validateReadPhil(); - void validateParseFiles(); - void validateTreeFiles(); - void validateBinFiles(); - void clear(); - void refresh(); - string phylipfile, columnfile, listfile, rabundfile, sabundfile, namefile, groupfile, orderfile, fastafile, treefile, sharedfile, cutoff, format; - string precision, method, fileroot, label, line, iters, jumble, freq, single, rarefaction, shared, summary, randomtree, abund, sorted, trump, soft, filter, scale, countends, processors, size; - string candidatefile, search, ksize, align, match, mismatch, gapopen, gapextend; - string commandName, optionText; - bool errorFree; - - vector sharedGroups; -}; -#endif diff --git a/fileoutput.cpp b/fileoutput.cpp index d4db546..4347c12 100644 --- a/fileoutput.cpp +++ b/fileoutput.cpp @@ -160,14 +160,14 @@ void ColumnFile::output(vector data){ string inputBuffer; getline(inFile, inputBuffer); - outFile << inputBuffer << '\t' << setprecision(6) << data[0] << setprecision(globaldata->getIters().length()); + outFile << inputBuffer << '\t' << setprecision(6) << data[0] << setprecision(iters.length()); for (int i = 1; i< data.size(); i++) { outFile << '\t' << data[i]; } outFile << endl; } else{ - outFile << setprecision(6) << data[0] << setprecision(globaldata->getIters().length()); + outFile << setprecision(6) << data[0] << setprecision(iters.length()); for (int i = 1; i< data.size(); i++) { outFile << '\t' << data[i]; } diff --git a/fileoutput.h b/fileoutput.h index 359946c..209dd1b 100644 --- a/fileoutput.h +++ b/fileoutput.h @@ -127,7 +127,7 @@ private: class ColumnFile : public FileOutput { public: - ColumnFile(string n) : FileOutput(), inName(n), counter(0), outName(getPathName(n) + ".temp") { globaldata = GlobalData::getInstance(); }; + ColumnFile(string n, string i) : FileOutput(), iters(i), inName(n), counter(0), outName(getPathName(n) + ".temp") { globaldata = GlobalData::getInstance(); }; ~ColumnFile(); //to make compatible with parent class @@ -144,6 +144,7 @@ private: ifstream inFile; ofstream outFile; int counter; + string iters; }; /***********************************************************************/ diff --git a/filterseqscommand.cpp b/filterseqscommand.cpp index e024a24..1363216 100644 --- a/filterseqscommand.cpp +++ b/filterseqscommand.cpp @@ -11,25 +11,102 @@ /**************************************************************************************/ -FilterSeqsCommand::FilterSeqsCommand(){ - - globaldata = GlobalData::getInstance(); +FilterSeqsCommand::FilterSeqsCommand(string option){ + try { + globaldata = GlobalData::getInstance(); + abort = false; + + //allow user to run help + if(option == "help") { help(); abort = true; } + + else { + //valid paramters for this command + string Array[] = {"fasta", "trump", "soft", "hard", "vertical"}; + vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + + parser = new OptionParser(); + parser->parse(option, parameters); delete parser; + + ValidParameters* validParameter = new ValidParameters(); + + //check to make sure all parameters are valid for command + for (it = parameters.begin(); it != parameters.end(); it++) { + if (validParameter->isValidParameter(it->first, myArray, it->second) != true) { abort = true; } + } + + //check for required parameters + fastafile = validParameter->validFile(parameters, "fasta", true); + if (fastafile == "not found") { cout << "fasta is a required parameter for the filter.seqs command." << endl; abort = true; } + else if (fastafile == "not open") { abort = true; } + else { + globaldata->setFastaFile(fastafile); + } + + //check for optional parameter and set defaults + // ...at some point should added some additional type checking... + + string temp; + temp = validParameter->validFile(parameters, "trump", false); if (temp == "not found") { temp = "."; } + trump = temp[0]; + + temp = validParameter->validFile(parameters, "soft", false); if (temp == "not found") { soft = 0; } + else { soft = (float)atoi(temp.c_str()) / 100.0; } + + hard = validParameter->validFile(parameters, "hard", true); if (hard == "not found") { hard = ""; } + else if (hard == "not open") { abort = true; } + + vertical = validParameter->validFile(parameters, "vertical", false); if (vertical == "not found") { vertical = "F"; } - if(globaldata->getFastaFile() == "") { cout << "You must enter a fasta formatted file" << endl; } - trump = globaldata->getTrump()[0]; - numSeqs = 0; + delete validParameter; + + numSeqs = 0; + + } + + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the FilterSeqsCommand class Function FilterSeqsCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the FilterSeqsCommand class function FilterSeqsCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} + +//********************************************************************************************************************** +void FilterSeqsCommand::help(){ + try { + cout << "The filter.seqs command reads a file containing sequences and creates a .filter and .filter.fasta file." << "\n"; + cout << "The filter.seqs command parameters are fasta, trump, soft, hard and vertical. " << "\n"; + cout << "The fasta parameter is required." << "\n"; + cout << "The trump parameter .... The default is '.'" << "\n"; + cout << "The soft parameter .... The default is ...." << "\n"; + cout << "The hard parameter .... The default is ...." << "\n"; + cout << "The vertical parameter .... The default is F." << "\n"; + cout << "The filter.seqs command should be in the following format: " << "\n"; + cout << "filter.seqs(fasta=yourFastaFile, trump=yourTrump, soft=yourSoft, hard=yourHard, vertical=yourVertical) " << "\n"; + cout << "Example filter.seqs(fasta=abrecovery.fasta, trump=..., soft=..., hard=..., vertical=T)." << "\n"; + cout << "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta)." << "\n" << "\n"; + + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the FilterSeqsCommand class Function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the FilterSeqsCommand class function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } } /**************************************************************************************/ void FilterSeqsCommand::doHard() { - string hardName = globaldata->getHard(); - string hardFilter = ""; - ifstream fileHandle; - openInputFile(hardName, fileHandle); + openInputFile(hard, fileHandle); fileHandle >> filter; @@ -96,33 +173,33 @@ void FilterSeqsCommand::getFreqs(Sequence seq) { int FilterSeqsCommand::execute() { try { + + if (abort == true) { return 0; } + ifstream inFASTA; - openInputFile(globaldata->getFastaFile(), inFASTA); + openInputFile(fastafile, inFASTA); Sequence testSeq(inFASTA); alignmentLength = testSeq.getAlignLength(); inFASTA.seekg(0); - if(globaldata->getSoft() != "" || isTrue(globaldata->getVertical())){ + if(soft != 0 || isTrue(vertical)){ a.assign(alignmentLength, 0); t.assign(alignmentLength, 0); g.assign(alignmentLength, 0); c.assign(alignmentLength, 0); gap.assign(alignmentLength, 0); } - if(globaldata->getSoft() != ""){ - soft = (float)atoi(globaldata->getSoft().c_str()) / 100.0; - } - if(globaldata->getHard().compare("") != 0) { doHard(); } - else { filter = string(alignmentLength, '1'); } + if(hard.compare("") != 0) { doHard(); } + else { filter = string(alignmentLength, '1'); } - if(globaldata->getTrump().compare("") != 0 || isTrue(globaldata->getVertical()) || globaldata->getSoft().compare("") != 0){ + if(isTrue(vertical) || soft != 0){ while(!inFASTA.eof()){ Sequence seq(inFASTA); - if(globaldata->getTrump().compare("") != 0) { doTrump(seq); } - if(isTrue(globaldata->getVertical()) || globaldata->getSoft().compare("") != 0){ getFreqs(seq); } + doTrump(seq); + if(isTrue(vertical) || soft != 0){ getFreqs(seq); } numSeqs++; cout.flush(); } @@ -130,18 +207,18 @@ int FilterSeqsCommand::execute() { } inFASTA.close(); - if(isTrue(globaldata->getVertical()) == 1) { doVertical(); } - if(globaldata->getSoft().compare("") != 0) { doSoft(); } + if(isTrue(vertical) == 1) { doVertical(); } + if(soft != 0) { doSoft(); } ofstream outFilter; - string filterFile = getRootName(globaldata->inputFileName) + "filter"; + string filterFile = getRootName(fastafile) + "filter"; openOutputFile(filterFile, outFilter); outFilter << filter << endl; outFilter.close(); - openInputFile(globaldata->getFastaFile(), inFASTA); - string filteredFasta = getRootName(globaldata->inputFileName) + "filter.fasta"; + openInputFile(fastafile, inFASTA); + string filteredFasta = getRootName(fastafile) + "filter.fasta"; ofstream outFASTA; openOutputFile(filteredFasta, outFASTA); diff --git a/filterseqscommand.h b/filterseqscommand.h index 17f1b33..3f1e4c4 100644 --- a/filterseqscommand.h +++ b/filterseqscommand.h @@ -17,9 +17,10 @@ class FilterSeqsCommand : public Command { public: - FilterSeqsCommand(); + FilterSeqsCommand(string); ~FilterSeqsCommand() {}; int execute(); + void help(); private: void doHard(); @@ -27,13 +28,16 @@ private: void doVertical(); void doSoft(); void getFreqs(Sequence); - string filter; + string vertical, filter, fastafile, hard; int alignmentLength; char trump; - bool vertical; + bool abort; float soft; int numSeqs; + OptionParser* parser; + map parameters; + map::iterator it; GlobalData* globaldata; vector a, t, g, c, gap; diff --git a/getgroupcommand.cpp b/getgroupcommand.cpp index 4d43959..ea6ef17 100644 --- a/getgroupcommand.cpp +++ b/getgroupcommand.cpp @@ -9,18 +9,31 @@ #include "getgroupcommand.h" - -GetgroupCommand::GetgroupCommand(){ +//********************************************************************************************************************** +GetgroupCommand::GetgroupCommand(string option){ try { globaldata = GlobalData::getInstance(); + abort = false; - //open shared file - sharedfile = globaldata->getSharedFile(); - openInputFile(sharedfile, in); + //allow user to run help + if(option == "help") { help(); abort = true; } - //open output file - outputFile = getRootName(globaldata->inputFileName) + "bootGroups"; - openOutputFile(outputFile, out); + else { + if (option != "") { cout << "There are no valid parameters for the get.group command." << endl; abort = true; } + + if ((globaldata->getSharedFile() == "")) { cout << "You must use the read.otu command to read a groupfile or a sharedfile before you can use the get.group command." << endl; abort = true; } + + if (abort == false) { + //open shared file + sharedfile = globaldata->getSharedFile(); + openInputFile(sharedfile, in); + + //open output file + outputFile = getRootName(sharedfile) + "bootGroups"; + openOutputFile(outputFile, out); + + } + } } catch(exception& e) { cout << "Standard Error: " << e.what() << " has occurred in the GetgroupCommand class Function GetgroupCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; @@ -32,6 +45,27 @@ GetgroupCommand::GetgroupCommand(){ } } +//********************************************************************************************************************** + +void GetgroupCommand::help(){ + try { + cout << "The get.group command can only be executed after a successful read.otu command." << "\n"; + //cout << "The get.group command outputs a .bootGroups file to you can use in addition to the tree file generated by the bootstrap.shared command to run the concensus command." << "\n"; + cout << "You may not use any parameters with the get.group command." << "\n"; + cout << "The get.group command should be in the following format: " << "\n"; + cout << "get.group()" << "\n"; + cout << "Example get.group()." << "\n"; + + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the GetgroupCommand class Function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the GetgroupCommand class function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} //********************************************************************************************************************** @@ -42,6 +76,9 @@ GetgroupCommand::~GetgroupCommand(){ int GetgroupCommand::execute(){ try { + + if (abort == true) { return 0; } + int num, inputData, count; count = 0; string holdLabel, nextLabel, groupN, label; diff --git a/getgroupcommand.h b/getgroupcommand.h index 496d0c9..bc805bd 100644 --- a/getgroupcommand.h +++ b/getgroupcommand.h @@ -15,9 +15,10 @@ class GetgroupCommand : public Command { public: - GetgroupCommand(); + GetgroupCommand(string); ~GetgroupCommand(); int execute(); + void help(); private: GlobalData* globaldata; @@ -25,6 +26,7 @@ private: string outputFile, sharedfile; ofstream out; ifstream in; + bool abort; }; diff --git a/getlabelcommand.cpp b/getlabelcommand.cpp index 6d92b8e..e0c1720 100644 --- a/getlabelcommand.cpp +++ b/getlabelcommand.cpp @@ -9,11 +9,22 @@ #include "getlabelcommand.h" +//********************************************************************************************************************** - -GetlabelCommand::GetlabelCommand(){ +GetlabelCommand::GetlabelCommand(string option){ try { globaldata = GlobalData::getInstance(); + abort = false; + + //allow user to run help + if(option == "help") { help(); abort = true; } + + else { + if (option != "") { cout << "There are no valid parameters for the get.label command." << endl; abort = true; } + + if ((globaldata->getListFile() == "") && (globaldata->getRabundFile() == "") && (globaldata->getSabundFile() == "")) { cout << "You must read a list, sabund or rabund before you can use the get.label command." << endl; abort = true; } + } + } catch(exception& e) { cout << "Standard Error: " << e.what() << " has occurred in the GetlabelCommand class Function GetlabelCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; @@ -25,6 +36,25 @@ GetlabelCommand::GetlabelCommand(){ } } +//********************************************************************************************************************** + +void GetlabelCommand::help(){ + try { + cout << "The get.label command can only be executed after a successful read.otu command." << "\n"; + cout << "You may not use any parameters with the get.label command." << "\n"; + cout << "The get.label command should be in the following format: " << "\n"; + cout << "get.label()" << "\n"; + cout << "Example get.label()." << "\n"; + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the GetlabelCommand class Function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the GetlabelCommand class function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} //********************************************************************************************************************** @@ -35,6 +65,9 @@ GetlabelCommand::~GetlabelCommand(){ int GetlabelCommand::execute(){ try { + + if (abort == true) { return 0; } + filename = globaldata->inputFileName; ifstream in; openInputFile(filename, in); diff --git a/getlabelcommand.h b/getlabelcommand.h index ec0692c..326d894 100644 --- a/getlabelcommand.h +++ b/getlabelcommand.h @@ -17,13 +17,15 @@ class GlobalData; class GetlabelCommand : public Command { public: - GetlabelCommand(); + GetlabelCommand(string); ~GetlabelCommand(); int execute(); + void help(); private: GlobalData* globaldata; string filename; + bool abort; }; #endif \ No newline at end of file diff --git a/getlinecommand.cpp b/getlinecommand.cpp index df14f51..5f459bf 100644 --- a/getlinecommand.cpp +++ b/getlinecommand.cpp @@ -9,10 +9,21 @@ #include "getlinecommand.h" - -GetlineCommand::GetlineCommand(){ +//********************************************************************************************************************** +GetlineCommand::GetlineCommand(string option){ try { globaldata = GlobalData::getInstance(); + abort = false; + + //allow user to run help + if(option == "help") { help(); abort = true; } + + else { + if (option != "") { cout << "There are no valid parameters for the get.line command." << endl; abort = true; } + + if ((globaldata->getListFile() == "") && (globaldata->getRabundFile() == "") && (globaldata->getSabundFile() == "")) { cout << "You must read a list, sabund or rabund before you can use the get.line command." << endl; abort = true; } + } + } catch(exception& e) { cout << "Standard Error: " << e.what() << " has occurred in the GetlineCommand class Function GetlineCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; @@ -24,6 +35,25 @@ GetlineCommand::GetlineCommand(){ } } +//********************************************************************************************************************** + +void GetlineCommand::help(){ + try { + cout << "The get.line command can only be executed after a successful read.otu command." << "\n"; + cout << "You may not use any parameters with the get.line command." << "\n"; + cout << "The get.line command should be in the following format: " << "\n"; + cout << "get.line()" << "\n"; + cout << "Example get.line()." << "\n"; + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the GetlineCommand class Function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the GetlineCommand class function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} //********************************************************************************************************************** @@ -34,6 +64,9 @@ GetlineCommand::~GetlineCommand(){ int GetlineCommand::execute(){ try { + + if (abort == true) { return 0; } + filename = globaldata->inputFileName; ifstream in; openInputFile(filename, in); diff --git a/getlinecommand.h b/getlinecommand.h index 2eda1cc..915a09a 100644 --- a/getlinecommand.h +++ b/getlinecommand.h @@ -17,13 +17,15 @@ class GlobalData; class GetlineCommand : public Command { public: - GetlineCommand(); + GetlineCommand(string); ~GetlineCommand(); int execute(); + void help(); private: GlobalData* globaldata; string filename; + bool abort; }; #endif diff --git a/getoturepcommand.cpp b/getoturepcommand.cpp index eb88fbf..92aab92 100644 --- a/getoturepcommand.cpp +++ b/getoturepcommand.cpp @@ -10,45 +10,122 @@ #include "getoturepcommand.h" //********************************************************************************************************************** -GetOTURepCommand::GetOTURepCommand(){ +GetOTURepCommand::GetOTURepCommand(string option){ try{ globaldata = GlobalData::getInstance(); - - if(globaldata->gSparseMatrix != NULL) { matrix = new SparseMatrix(*globaldata->gSparseMatrix); } + abort = false; + allLines = 1; + lines.clear(); + labels.clear(); + + //allow user to run help + if(option == "help") { help(); abort = true; } - //listOfNames bin 0 = first name read in distance matrix, listOfNames bin 1 = second name read in distance matrix - if(globaldata->gListVector != NULL) { - listOfNames = new ListVector(*globaldata->gListVector); + else { + //valid paramters for this command + string Array[] = {"fasta","list","line","label","name", "group"}; + vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); - vector names; - string binnames; - //map names to rows in sparsematrix - for (int i = 0; i < listOfNames->size(); i++) { - names.clear(); - binnames = listOfNames->get(i); - splitAtComma(binnames, names); - - for (int j = 0; j < names.size(); j++) { - nameToIndex[names[j]] = i; - } + parser = new OptionParser(); + parser->parse(option, parameters); delete parser; + + ValidParameters* validParameter = new ValidParameters(); + + //check to make sure all parameters are valid for command + for (it4 = parameters.begin(); it4 != parameters.end(); it4++) { + if (validParameter->isValidParameter(it4->first, myArray, it4->second) != true) { abort = true; } + } + + //make sure the user has already run the read.otu command + if ((globaldata->gSparseMatrix == NULL) || (globaldata->gListVector == NULL)) { + cout << "Before you use the get.oturep command, you first need to read in a distance matrix." << endl; + abort = true; + } + + //check for required parameters + fastafile = validParameter->validFile(parameters, "fasta", true); + if (fastafile == "not found") { cout << "fasta is a required parameter for the get.oturep command." << endl; abort = true; } + else if (fastafile == "not open") { abort = true; } + else { + globaldata->setFastaFile(fastafile); + } + + listfile = validParameter->validFile(parameters, "list", true); + if (listfile == "not found") { cout << "list is a required parameter for the get.oturep command." << endl; abort = true; } + else if (listfile == "not open") { abort = true; } + else { + globaldata->setListFile(listfile); } - }else { cout << "error, no listvector." << endl; } + //check for optional parameter and set defaults + // ...at some point should added some additional type checking... + line = validParameter->validFile(parameters, "line", false); + if (line == "not found") { line = ""; } + else { + if(line != "all") { splitAtDash(line, lines); allLines = 0; } + else { allLines = 1; } + } + + label = validParameter->validFile(parameters, "label", false); + if (label == "not found") { label = ""; } + else { + if(label != "all") { splitAtDash(label, labels); allLines = 0; } + else { allLines = 1; } + } + + //make sure user did not use both the line and label parameters + if ((line != "") && (label != "")) { cout << "You cannot use both the line and label parameters at the same time. " << endl; abort = true; } + //if the user has not specified any line or labels use the ones from read.otu + else if ((line == "") && (label == "")) { + allLines = globaldata->allLines; + labels = globaldata->labels; + lines = globaldata->lines; + } + + namesfile = validParameter->validFile(parameters, "name", true); + if (namesfile == "not open") { abort = true; } + else if (namesfile == "not found") { namesfile = ""; } + + groupfile = validParameter->validFile(parameters, "group", true); + if (groupfile == "not open") { abort = true; } + else if (groupfile == "not found") { groupfile = ""; } + else { + //read in group map info. + groupMap = new GroupMap(groupfile); + groupMap->readMap(); + } + + delete validParameter; + + if (abort == false) { + + if(globaldata->gSparseMatrix != NULL) { matrix = new SparseMatrix(*globaldata->gSparseMatrix); } + + //globaldata->gListVector bin 0 = first name read in distance matrix, globaldata->gListVector bin 1 = second name read in distance matrix + if(globaldata->gListVector != NULL) { - fastafile = globaldata->getFastaFile(); - namesfile = globaldata->getNameFile(); - groupfile = globaldata->getGroupFile(); + vector names; + string binnames; + //map names to rows in sparsematrix + for (int i = 0; i < globaldata->gListVector->size(); i++) { + names.clear(); + binnames = globaldata->gListVector->get(i); + + splitAtComma(binnames, names); + + for (int j = 0; j < names.size(); j++) { + nameToIndex[names[j]] = i; + } + + } + }else { cout << "error, no listvector." << endl; } + + openInputFile(fastafile, in); + fasta = new FastaMap(); + } - if (groupfile != "") { - //read in group map info. - groupMap = new GroupMap(groupfile); - groupMap->readMap(); } - - openInputFile(fastafile, in); - - fasta = new FastaMap(); - + } catch(exception& e) { cout << "Standard Error: " << e.what() << " has occurred in the GetOTURepCommand class Function GetOTURepCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; @@ -59,6 +136,31 @@ GetOTURepCommand::GetOTURepCommand(){ exit(1); } } + +//********************************************************************************************************************** + +void GetOTURepCommand::help(){ + try { + cout << "The get.oturep command can only be executed after a successful read.dist command." << "\n"; + cout << "The get.oturep command parameters are list, fasta, name, group, line and label. The fasta and list parameters are required, and you may not use line and label at the same time." << "\n"; + cout << "The line and label allow you to select what distance levels you would like a output files created for, and are separated by dashes." << "\n"; + cout << "The get.oturep command should be in the following format: get.oturep(fasta=yourFastaFile, list=yourListFile, name=yourNamesFile, group=yourGroupFile, line=yourLines, label=yourLabels)." << "\n"; + cout << "Example get.oturep(fasta=amazon.fasta, list=amazon.fn.list, group=amazon.groups, line=1-3-5, name=amazon.names)." << "\n"; + cout << "The default value for line and label are all lines in your inputfile." << "\n"; + cout << "The get.oturep command outputs a .fastarep file for each distance you specify, selecting one OTU representative for each bin." << "\n"; + cout << "If you provide a groupfile, then it also appends the names of the groups present in that bin." << "\n"; + cout << "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile)." << "\n" << "\n"; + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the GetOTURepCommand class Function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the GetOTURepCommand class function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} + //********************************************************************************************************************** GetOTURepCommand::~GetOTURepCommand(){ @@ -74,6 +176,9 @@ GetOTURepCommand::~GetOTURepCommand(){ int GetOTURepCommand::execute(){ try { + + if (abort == true) { return 0; } + int count = 1; int error; @@ -89,7 +194,7 @@ int GetOTURepCommand::execute(){ } //read list file - read = new ReadOTUFile(globaldata->getListFile()); + read = new ReadOTUFile(listfile); read->read(&*globaldata); input = globaldata->ginput; @@ -98,13 +203,13 @@ int GetOTURepCommand::execute(){ //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. set processedLabels; - set userLabels = globaldata->labels; - set userLines = globaldata->lines; + set userLabels = labels; + set userLines = lines; - while((list != NULL) && ((globaldata->allLines == 1) || (userLabels.size() != 0) || (userLines.size() != 0))) { + while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0) || (userLines.size() != 0))) { - if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(list->getLabel()) == 1){ + if(allLines == 1 || lines.count(count) == 1 || labels.count(list->getLabel()) == 1){ cout << list->getLabel() << '\t' << count << endl; error = process(list); if (error == 1) { return 0; } //there is an error in hte input files, abort command @@ -304,7 +409,7 @@ int GetOTURepCommand::process(ListVector* processList) { string nameRep, name, sequence; //create output file - string outputFileName = getRootName(globaldata->getListFile()) + processList->getLabel() + ".rep.fasta"; + string outputFileName = getRootName(listfile) + processList->getLabel() + ".rep.fasta"; openOutputFile(outputFileName, out); //for each bin in the list vector diff --git a/getoturepcommand.h b/getoturepcommand.h index 8153c36..f413d4f 100644 --- a/getoturepcommand.h +++ b/getoturepcommand.h @@ -27,25 +27,29 @@ typedef list::iterator MatData; class GetOTURepCommand : public Command { public: - GetOTURepCommand(); + GetOTURepCommand(string); ~GetOTURepCommand(); - int execute(); + int execute(); + void help(); private: GlobalData* globaldata; SparseMatrix* matrix; ListVector* list; - ListVector* listOfNames; ReadOTUFile* read; InputData* input; FastaMap* fasta; GroupMap* groupMap; - string filename, fastafile, namesfile, groupfile; + string filename, fastafile, listfile, namesfile, groupfile, line, label; ofstream out; ifstream in, inNames; bool groupError; - - + OptionParser* parser; + map parameters; + map::iterator it4; + bool abort, allLines; + set lines; //hold lines to be used + set labels; //holds labels to be used map nameToIndex; //maps sequence name to index in sparsematrix map::iterator it; map::iterator it2; diff --git a/getrabundcommand.cpp b/getrabundcommand.cpp index b0f7a92..9236ba0 100644 --- a/getrabundcommand.cpp +++ b/getrabundcommand.cpp @@ -11,12 +11,68 @@ //********************************************************************************************************************** -GetRAbundCommand::GetRAbundCommand(){ +GetRAbundCommand::GetRAbundCommand(string option){ try { globaldata = GlobalData::getInstance(); - filename = getRootName(globaldata->inputFileName) + "rabund"; + abort = false; + allLines = 1; + lines.clear(); + labels.clear(); - openOutputFile(filename, out); + //allow user to run help + if(option == "help") { help(); abort = true; } + + else { + //valid paramters for this command + string Array[] = {"line","label"}; + vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + + parser = new OptionParser(); + parser->parse(option, parameters); delete parser; + + ValidParameters* validParameter = new ValidParameters(); + + //check to make sure all parameters are valid for command + for (it = parameters.begin(); it != parameters.end(); it++) { + if (validParameter->isValidParameter(it->first, myArray, it->second) != true) { abort = true; } + } + + //make sure the user has already run the read.otu command + if (globaldata->getListFile() == "") { cout << "You must read a listfile before you can use the get.rabund command." << endl; abort = true; } + + //check for optional parameter and set defaults + // ...at some point should added some additional type checking... + line = validParameter->validFile(parameters, "line", false); + if (line == "not found") { line = ""; } + else { + if(line != "all") { splitAtDash(line, lines); allLines = 0; } + else { allLines = 1; } + } + + label = validParameter->validFile(parameters, "label", false); + if (label == "not found") { label = ""; } + else { + if(label != "all") { splitAtDash(label, labels); allLines = 0; } + else { allLines = 1; } + } + + //make sure user did not use both the line and label parameters + if ((line != "") && (label != "")) { cout << "You cannot use both the line and label parameters at the same time. " << endl; abort = true; } + //if the user has not specified any line or labels use the ones from read.otu + else if((line == "") && (label == "")) { + allLines = globaldata->allLines; + labels = globaldata->labels; + lines = globaldata->lines; + } + + delete validParameter; + + if (abort == false) { + filename = getRootName(globaldata->inputFileName) + "rabund"; + openOutputFile(filename, out); + } + } + } catch(exception& e) { cout << "Standard Error: " << e.what() << " has occurred in the GetRAbundCommand class Function GetRAbundCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; @@ -28,6 +84,28 @@ GetRAbundCommand::GetRAbundCommand(){ } } +//********************************************************************************************************************** + +void GetRAbundCommand::help(){ + try { + cout << "The get.rabund command can only be executed after a successful read.otu of a listfile." << "\n"; + cout << "The get.rabund command parameters are line and label. No parameters are required, and you may not use line and label at the same time." << "\n"; + cout << "The line and label allow you to select what distance levels you would like included in your .rabund file, and are separated by dashes." << "\n"; + cout << "The get.rabund command should be in the following format: get.rabund(line=yourLines, label=yourLabels)." << "\n"; + cout << "Example get.rabund(line=1-3-5)." << "\n"; + cout << "The default value for line and label are all lines in your inputfile." << "\n"; + cout << "The get.rabund command outputs a .rabund file containing the lines you selected." << "\n"; + cout << "Note: No spaces between parameter labels (i.e. line), '=' and parameters (i.e.yourLines)." << "\n" << "\n"; + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the GetRAbundCommand class Function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the GetRAbundCommand class function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} //********************************************************************************************************************** @@ -38,6 +116,9 @@ GetRAbundCommand::~GetRAbundCommand(){ int GetRAbundCommand::execute(){ try { + + if (abort == true) { return 0; } + int count = 1; //read first line @@ -50,13 +131,13 @@ int GetRAbundCommand::execute(){ //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. set processedLabels; - set userLabels = globaldata->labels; - set userLines = globaldata->lines; + set userLabels = labels; + set userLines = lines; - while((list != NULL) && ((globaldata->allLines == 1) || (userLabels.size() != 0) || (userLines.size() != 0))) { + while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0) || (userLines.size() != 0))) { - if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(list->getLabel()) == 1){ + if(allLines == 1 || lines.count(count) == 1 || labels.count(list->getLabel()) == 1){ cout << list->getLabel() << '\t' << count << endl; rabund = new RAbundVector(); *rabund = (list->getRAbundVector()); diff --git a/getrabundcommand.h b/getrabundcommand.h index 66a3211..4b2c37f 100644 --- a/getrabundcommand.h +++ b/getrabundcommand.h @@ -20,9 +20,10 @@ class GlobalData; class GetRAbundCommand : public Command { public: - GetRAbundCommand(); + GetRAbundCommand(string); ~GetRAbundCommand(); int execute(); + void help(); private: GlobalData* globaldata; @@ -32,6 +33,15 @@ private: InputData* input; ListVector* list; RAbundVector* rabund; + OptionParser* parser; + map parameters; + map::iterator it; + bool abort, allLines; + set lines; //hold lines to be used + set labels; //holds labels to be used + string line, label; + + }; #endif diff --git a/getsabundcommand.cpp b/getsabundcommand.cpp index 8c7f07c..b3aa54f 100644 --- a/getsabundcommand.cpp +++ b/getsabundcommand.cpp @@ -11,12 +11,68 @@ //********************************************************************************************************************** -GetSAbundCommand::GetSAbundCommand(){ +GetSAbundCommand::GetSAbundCommand(string option){ try { globaldata = GlobalData::getInstance(); - filename = getRootName(globaldata->inputFileName) + "sabund"; + abort = false; + allLines = 1; + lines.clear(); + labels.clear(); - openOutputFile(filename, out); + //allow user to run help + if(option == "help") { help(); abort = true; } + + else { + //valid paramters for this command + string Array[] = {"line","label"}; + vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + + parser = new OptionParser(); + parser->parse(option, parameters); delete parser; + + ValidParameters* validParameter = new ValidParameters(); + + //check to make sure all parameters are valid for command + for (it = parameters.begin(); it != parameters.end(); it++) { + if (validParameter->isValidParameter(it->first, myArray, it->second) != true) { abort = true; } + } + + //make sure the user has already run the read.otu command + if ((globaldata->getListFile() == "") && (globaldata->getRabundFile() == "")) { cout << "You must read a list or rabund before you can use the get.sabund command." << endl; abort = true; } + + //check for optional parameter and set defaults + // ...at some point should added some additional type checking... + line = validParameter->validFile(parameters, "line", false); + if (line == "not found") { line = ""; } + else { + if(line != "all") { splitAtDash(line, lines); allLines = 0; } + else { allLines = 1; } + } + + label = validParameter->validFile(parameters, "label", false); + if (label == "not found") { label = ""; } + else { + if(label != "all") { splitAtDash(label, labels); allLines = 0; } + else { allLines = 1; } + } + + //make sure user did not use both the line and label parameters + if ((line != "") && (label != "")) { cout << "You cannot use both the line and label parameters at the same time. " << endl; abort = true; } + //if the user has not specified any line or labels use the ones from read.otu + else if((line == "") && (label == "")) { + allLines = globaldata->allLines; + labels = globaldata->labels; + lines = globaldata->lines; + } + + delete validParameter; + + if (abort == false) { + filename = getRootName(globaldata->inputFileName) + "sabund"; + openOutputFile(filename, out); + } + } + } catch(exception& e) { cout << "Standard Error: " << e.what() << " has occurred in the GetSAbundCommand class Function GetSAbundCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; @@ -28,6 +84,28 @@ GetSAbundCommand::GetSAbundCommand(){ } } +//********************************************************************************************************************** + +void GetSAbundCommand::help(){ + try { + cout << "The get.sabund command can only be executed after a successful read.otu of a listfile." << "\n"; + cout << "The get.sabund command parameters are line and label. No parameters are required, and you may not use line and label at the same time." << "\n"; + cout << "The line and label allow you to select what distance levels you would like included in your .sabund file, and are separated by dashes." << "\n"; + cout << "The get.sabund command should be in the following format: get.sabund(line=yourLines, label=yourLabels)." << "\n"; + cout << "Example get.sabund(line=1-3-5)." << "\n"; + cout << "The default value for line and label are all lines in your inputfile." << "\n"; + cout << "The get.sabund command outputs a .sabund file containing the lines you selected." << "\n"; + cout << "Note: No spaces between parameter labels (i.e. line), '=' and parameters (i.e.yourLines)." << "\n" << "\n"; + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the GetSAbundCommand class Function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the GetSAbundCommand class function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} //********************************************************************************************************************** @@ -38,6 +116,9 @@ GetSAbundCommand::~GetSAbundCommand(){ int GetSAbundCommand::execute(){ try { + + if (abort == true) { return 0; } + int count = 1; //using order vector so you don't have to distinguish between the list and rabund files @@ -50,13 +131,13 @@ int GetSAbundCommand::execute(){ //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. set processedLabels; - set userLabels = globaldata->labels; - set userLines = globaldata->lines; + set userLabels = labels; + set userLines = lines; - while((order != NULL) && ((globaldata->allLines == 1) || (userLabels.size() != 0) || (userLines.size() != 0))) { + while((order != NULL) && ((allLines == 1) || (userLabels.size() != 0) || (userLines.size() != 0))) { - if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(order->getLabel()) == 1){ + if(allLines == 1 || lines.count(count) == 1 || labels.count(order->getLabel()) == 1){ cout << order->getLabel() << '\t' << count << endl; sabund = new SAbundVector(); *sabund = (order->getSAbundVector()); diff --git a/getsabundcommand.h b/getsabundcommand.h index 27d25c2..cd95d94 100644 --- a/getsabundcommand.h +++ b/getsabundcommand.h @@ -20,9 +20,10 @@ class GlobalData; class GetSAbundCommand : public Command { public: - GetSAbundCommand(); + GetSAbundCommand(string); ~GetSAbundCommand(); int execute(); + void help(); private: GlobalData* globaldata; @@ -33,6 +34,14 @@ private: OrderVector* lastOrder; InputData* input; SAbundVector* sabund; + OptionParser* parser; + map parameters; + map::iterator it; + bool abort, allLines; + set lines; //hold lines to be used + set labels; //holds labels to be used + string line, label; + }; #endif diff --git a/globaldata.cpp b/globaldata.cpp index d24f4b9..68f7fc3 100644 --- a/globaldata.cpp +++ b/globaldata.cpp @@ -13,286 +13,6 @@ GlobalData* GlobalData::getInstance() { } /*******************************************************/ -/******************************************************/ -//This function parses through the option string of the command to remove its parameters -void GlobalData::parseGlobalData(string commandString, string optionText){ - try { - commandName = commandString; //save command name to be used by other classes - - //set all non filename paramters to default - reset(); - - //clears out data from previous read - if((commandName == "read.dist") || (commandName == "read.otu") || (commandName == "read.tree")) { - clear(); - gGroupmap = NULL; - gListVector = NULL; - gSparseMatrix = NULL; - gTree.clear(); - Treenames.clear(); - labels.clear(); lines.clear(); Groups.clear(); - allLines = 1; - } - - //saves help request - if(commandName =="help") { - helpRequest = optionText; - } - - if(commandName == "libshuff") { - iters = "10000"; - cutoff = "1.0"; - } - - //set default value for cutoff and phylip - if (commandName == "dist.seqs") { cutoff = "1.0"; phylipfile = "F"; } - - string key, value; - //reads in parameters and values - if((optionText != "") && (commandName != "help")){ - while((optionText.find_first_of(',') != -1)) { //while there are parameters - splitAtComma(value, optionText); - splitAtEquals(key, value); - - if(key == "phylip") { phylipfile = value; inputFileName = value; fileroot = value; format = "phylip"; } - if(key == "column") { columnfile = value; inputFileName = value; fileroot = value; format = "column"; } - if(key == "list") { listfile = value; inputFileName = value; fileroot = value; format = "list"; } - if(key == "rabund") { rabundfile = value; inputFileName = value; fileroot = value; format = "rabund"; } - if(key == "sabund") { sabundfile = value; inputFileName = value; fileroot = value; format = "sabund"; } - if(key == "fasta") { fastafile = value; inputFileName = value; fileroot = value; format = "fasta"; } - if(key == "tree") { treefile = value; inputFileName = value; fileroot = value; format = "tree"; } - if(key == "shared") { sharedfile = value; inputFileName = value; fileroot = value; format = "sharedfile"; } - if(key == "name") { namefile = value; } - if(key == "order") { orderfile = value; } - if(key == "group") { groupfile = value; } - if(key == "cutoff") { cutoff = value; } - if(key == "precision") { precision = value; } - if(key == "iters") { iters = value; } - if(key == "jumble") { jumble = value; } - if(key == "freq") { freq = value; } - if(key == "method") { method = value; } - if(key == "fileroot") { fileroot = value; } - if(key == "abund") { abund = value; } - if(key == "random") { randomtree = value; } - if(key == "calc") { calc = value; } - if(key == "step") { step = value; } - if(key == "form") { form = value; } - if(key == "sorted") { sorted = value; } - if(key == "vertical") { vertical = value; } - if(key == "trump") { trump = value; } - if(key == "hard") { hard = value; } - if(key == "soft") { soft = value; } - if(key == "scale") { scale = value; } - if(key == "countends") { countends = value; } - if(key == "processors") { processors = value; } - if(key == "size") { size = value; } - if(key == "candidate") { candidatefile = value; } - if(key == "search") { search = value; } - if(key == "ksize") { ksize = value; } - if(key == "align") { align = value; } - if(key == "match") { match = value; } - if(key == "mismatch") { mismatch = value; } - if(key == "gapopen") { gapopen = value; } - if(key == "gapextend") { gapextend = value; } - if(key == "start") { startPos = value; } - if(key == "end") { endPos = value; } - if(key == "maxambig") { maxAmbig = value; } - if(key == "maxhomop") { maxHomoPolymer = value; } - if(key == "minlength") { minLength = value; } - if(key == "maxlength") { maxLength = value; } - if(key == "flip" ) { flip = value; } - if(key == "oligos" ) { oligoFile = value; } - - if(key == "line") {//stores lines to be used in a vector - lines.clear(); - labels.clear(); - line = value; - label = ""; - if(line != "all") { splitAtDash(value, lines); allLines = 0; } - else { allLines = 1; } - } - - if(key == "label") {//stores lines to be used in a vector - labels.clear(); - lines.clear(); - label = value; - line = ""; - if(label != "all") { splitAtDash(value, labels); allLines = 0; } - else { allLines = 1; } - } - - if(key == "groups") {//stores groups to be used in a vector - Groups.clear(); - groups = value; - splitAtDash(value, Groups); - } - - } - - //saves the last parameter ==> this seems silly... - value = optionText; - splitAtEquals(key, value); - if(key == "phylip") { phylipfile = value; inputFileName = value; fileroot = value; format = "phylip"; } - if(key == "column") { columnfile = value; inputFileName = value; fileroot = value; format = "column"; } - if(key == "list") { listfile = value; inputFileName = value; fileroot = value; format = "list"; } - if(key == "rabund") { rabundfile = value; inputFileName = value; fileroot = value; format = "rabund"; } - if(key == "sabund") { sabundfile = value; inputFileName = value; fileroot = value; format = "sabund"; } - if(key == "fasta") { fastafile = value; inputFileName = value; fileroot = value; format = "fasta"; } - if(key == "tree") { treefile = value; inputFileName = value; fileroot = value; format = "tree"; } - if(key == "shared") { sharedfile = value; inputFileName = value; fileroot = value; format = "sharedfile"; } - if(key == "name") { namefile = value; } - if(key == "order") { orderfile = value; } - if(key == "group") { groupfile = value; } - if(key == "cutoff") { cutoff = value; } - if(key == "precision") { precision = value; } - if(key == "iters") { iters = value; } - if(key == "jumble") { jumble = value; } - if(key == "freq") { freq = value; } - if(key == "method") { method = value; } - if(key == "fileroot") { fileroot = value; } - if(key == "abund") { abund = value; } - if(key == "random") { randomtree = value; } - if(key == "calc") { calc = value; } - if(key == "step") { step = value; } - if(key == "form") { form = value; } - if(key == "sorted") { sorted = value; } - if(key == "vertical") { vertical = value; } - if(key == "trump") { trump = value; } - if(key == "hard") { hard = value; } - if(key == "soft") { soft = value; } - if(key == "scale") { scale = value; } - if(key == "countends") { countends = value; } - if(key == "processors") { processors = value; } - if(key == "size") { size = value; } - if(key == "candidate") { candidatefile = value; } - if(key == "search") { search = value; } - if(key == "ksize") { ksize = value; } - if(key == "align") { align = value; } - if(key == "match") { match = value; } - if(key == "mismatch") { mismatch = value; } - if(key == "gapopen") { gapopen = value; } - if(key == "gapextend") { gapextend = value; } - if(key == "start") { startPos = value; } - if(key == "end") { endPos = value; } - if(key == "maxambig") { maxAmbig = value; } - if(key == "maxhomop") { maxHomoPolymer = value; } - if(key == "minlength") { minLength = value; } - if(key == "maxlength") { maxLength = value; } - if(key == "flip" ) { flip = value; } - if(key == "oligos" ) { oligoFile = value; } - - - if(key == "line") {//stores lines to be used in a vector - lines.clear(); - labels.clear(); - line = value; - label = ""; - if(line != "all") { splitAtDash(value, lines); allLines = 0; } - else { allLines = 1; } - } - - if(key == "label") {//stores lines to be used in a vector - labels.clear(); - lines.clear(); - label = value; - line = ""; - if(label != "all") { splitAtDash(value, labels); allLines = 0; } - else { allLines = 1; } - } - - if(key == "groups") {//stores groups to be used in a vector - Groups.clear(); - groups = value; - splitAtDash(value, Groups); - } - } - - //set format for shared - if ((listfile != "") && (groupfile != "")) { format = "shared"; } - if ((phylipfile != "") && (groupfile != "")) { format = "matrix"; } - - //set default value for cutoff - if (commandName == "dist.seqs") { format = "fasta"; inputFileName = fastafile; fileroot = fastafile; } - - //input defaults for calculators - if(commandName == "collect.single") { - - if((calc == "default") || (calc == "")) { calc = "sobs-chao-ace-jack-shannon-npshannon-simpson"; } - Estimators.clear(); - splitAtDash(calc, Estimators); - } - if(commandName == "rarefaction.single") { - if((calc == "default") || (calc == "")) { calc = "sobs"; } - Estimators.clear(); - splitAtDash(calc, Estimators); - } - if(commandName == "collect.shared") { - if((calc == "default") || (calc == "")) { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan"; } - Estimators.clear(); - splitAtDash(calc, Estimators); - } - if(commandName == "summary.single") { - if((calc == "default") || (calc == "")) { calc = "sobs-chao-ace-jack-shannon-npshannon-simpson"; } - Estimators.clear(); - splitAtDash(calc, Estimators); - } - if(commandName == "summary.shared") { - if((calc == "default") || (calc == "")) { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan"; } - Estimators.clear(); - splitAtDash(calc, Estimators); - } - if(commandName == "rarefaction.shared") { - if((calc == "default") || (calc == "")) { calc = "sharedobserved"; } - Estimators.clear(); - splitAtDash(calc, Estimators); - } - if(commandName == "dist.seqs") { - if((calc == "default") || (calc == "")) { calc = "onegap"; } - if(countends == "") { countends = "T"; } - Estimators.clear(); - splitAtDash(calc, Estimators); - } - if(commandName == "venn") { - if((calc == "default") || (calc == "")) { - if(format == "list") { calc = "sobs"; } - else { calc = "sharedsobs"; } - } - Estimators.clear(); - splitAtDash(calc, Estimators); - } - if((commandName == "tree.shared") || (commandName == "bootstrap.shared") || (commandName == "dist.shared") || (commandName == "heatmap.sim")) { - if((calc == "default") || (calc == "")) { - calc = "jclass-thetayc"; - } - Estimators.clear(); - splitAtDash(calc, Estimators); - } - - if(commandName == "filter.seqs"){ - if(trump == "" && vertical == "" && hard == "" && soft == ""){ - trump = '.'; - } - - } - - //ifyou have done a read.otu with a groupfile but don't want to use it anymore because you want to do single commands - if((commandName == "collect.single") || (commandName == "rarefaction.single") || (commandName == "summary.single")) { - if(listfile != "") { format = "list"; } - else if(sabundfile != "") { format = "sabund"; } - else if(rabundfile != "") { format = "rabund"; } - } - } - catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the GlobalData class Function parseGlobalData. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } - catch(...) { - cout << "An unknown error has occurred in the GlobalData class function parseGlobalData. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } -} -/*******************************************************/ - /******************************************************/ // These functions give you the option parameters of the commands string GlobalData::getPhylipFile() { return phylipfile; } @@ -306,60 +26,23 @@ string GlobalData::getOrderFile() { return orderfile; } string GlobalData::getTreeFile() { return treefile; } string GlobalData::getSharedFile() { return sharedfile; } string GlobalData::getFastaFile() { return fastafile; } -string GlobalData::getCutOff() { return cutoff; } string GlobalData::getFormat() { return format; } -string GlobalData::getPrecision() { return precision; } -string GlobalData::getMethod() { return method; } -string GlobalData::getFileRoot() { return fileroot; } -string GlobalData::getIters() { return iters; } -string GlobalData::getJumble() { return jumble; } -string GlobalData::getFreq() { return freq; } -string GlobalData::getAbund() { return abund; } -string GlobalData::getRandomTree() { return randomtree; } -string GlobalData::getGroups() { return groups; } -string GlobalData::getStep() { return step; } -string GlobalData::getForm() { return form; } -string GlobalData::getSorted() { return sorted; } -string GlobalData::getVertical() { return vertical; } -string GlobalData::getTrump() { return trump; } -string GlobalData::getSoft() { return soft; } -string GlobalData::getHard() { return hard; } -string GlobalData::getScale() { return scale; } -string GlobalData::getCountEnds() { return countends; } -string GlobalData::getProcessors() { return processors; } -string GlobalData::getSize() { return size; } string GlobalData::getCandidateFile() { return candidatefile; } -string GlobalData::getSearch() { return search; } -string GlobalData::getKSize() { return ksize; } -string GlobalData::getAlign() { return align; } -string GlobalData::getMatch() { return match; } -string GlobalData::getMismatch() { return mismatch; } -string GlobalData::getGapopen() { return gapopen; } -string GlobalData::getGapextend() { return gapextend; } -string GlobalData::getStartPos() { return startPos; } -string GlobalData::getEndPos() { return endPos; } -string GlobalData::getMaxAmbig() { return maxAmbig; } -string GlobalData::getMaxHomoPolymer() { return maxHomoPolymer; } -string GlobalData::getMinLength() { return minLength; } -string GlobalData::getMaxLength() { return maxLength; } -string GlobalData::getFlip() { return flip; } -string GlobalData::getOligosFile() { return oligoFile; } void GlobalData::setListFile(string file) { listfile = file; inputFileName = file; } +void GlobalData::setFastaFile(string file) { fastafile = file; inputFileName = file; } +void GlobalData::setTreeFile(string file) { treefile = file; inputFileName = file; } +void GlobalData::setCandidateFile(string file) { candidatefile = file; } void GlobalData::setRabundFile(string file) { rabundfile = file; inputFileName = file; } void GlobalData::setSabundFile(string file) { sabundfile = file; inputFileName = file; } void GlobalData::setPhylipFile(string file) { phylipfile = file; inputFileName = file; } void GlobalData::setColumnFile(string file) { columnfile = file; inputFileName = file; } void GlobalData::setGroupFile(string file) { groupfile = file; } -void GlobalData::setSharedFile(string file) { sharedfile = file; inputFileName = file; fileroot = file; } +void GlobalData::setSharedFile(string file) { sharedfile = file; inputFileName = file; } void GlobalData::setNameFile(string file) { namefile = file; } +void GlobalData::setOrderFile(string file) { orderfile = file; } void GlobalData::setFormat(string Format) { format = Format; } -void GlobalData::setRandomTree(string Random) { randomtree = Random; } -void GlobalData::setGroups(string g) { groups = g; } -void GlobalData::setCalc(string Calc) { calc = Calc; } -void GlobalData::setCountEnds(string e) { countends = e; } -void GlobalData::setProcessors(string p) { processors = p; } /*******************************************************/ @@ -367,7 +50,6 @@ void GlobalData::setProcessors(string p) { processors = p; } /******************************************************/ GlobalData::GlobalData() { //option definitions should go here... - helpRequest = ""; clear(); gListVector = NULL; gSparseMatrix = NULL; @@ -389,238 +71,41 @@ void GlobalData::clear() { treefile = ""; sharedfile = ""; candidatefile = ""; - cutoff = "10.00"; - format = ""; - precision = "100"; - iters = "1000"; - line = ""; - label = ""; - groups = ""; - jumble = "1"; //0 means don't jumble, 1 means jumble. - randomtree = ""; //"" means user will enter some user trees, "outputfile" means they just want the random tree distribution to be outputted to outputfile. - freq = "100"; - method = "furthest"; - fileroot = ""; - abund = "10"; - step = "0.01"; - form = "integral"; - sorted = "T"; //F means don't sort, T means sort. - vertical = "F"; - trump = ""; - hard = ""; - soft = ""; - scale = "log10"; - countends = "T"; //yes - processors = "1"; - size = "0"; - search = "kmer"; - ksize = "8"; - align = "needleman"; - match = "1.0"; - mismatch = "-1.0"; - gapopen = "-1.0"; - gapextend = "-2.0"; - startPos = "-1"; - endPos = "-1"; - maxAmbig = "-1"; - maxHomoPolymer = "-1"; - minLength = "-1"; - maxLength = "-1"; - flip = "0"; - oligoFile = ""; -} - -//*******************************************************/ - -/******************************************************/ -void GlobalData::reset() { - label = ""; - line = ""; - cutoff = "10.00"; - precision = "100"; - iters = "1000"; - groups = ""; - jumble = "1"; //0 means don't jumble, 1 means jumble. - sorted = "T"; //F means don't sort, T means sort. - randomtree = ""; //"" means user will enter some user trees, "outputfile" means they just want the random tree distribution to be outputted to outputfile. - freq = "100"; - method = "furthest"; - calc = ""; - abund = "10"; - step = "0.01"; - form = "integral"; - countends = "T"; - processors = "1"; - size = "0"; - search = "kmer"; - ksize = "8"; - align = "needleman"; - match = "1.0"; - mismatch = "-1.0"; - gapopen = "-1.0"; - gapextend = "-2.0"; - vertical = ""; - trump = ""; - hard = ""; - soft = ""; - startPos = "-1"; - endPos = "-1"; - maxAmbig = "-1"; - maxHomoPolymer = "-1"; - minLength = "-1"; - maxLength = "-1"; - flip = "0"; - oligoFile = ""; - } /*******************************************************/ /******************************************************/ -GlobalData::~GlobalData() { - _uniqueInstance = 0; - if(gListVector != NULL) { delete gListVector; } - if(gSparseMatrix != NULL) { delete gSparseMatrix; } - if(gorder != NULL) { delete gorder; } -} -/*******************************************************/ - -/*******************************************************/ -void GlobalData::parseTreeFile() { - //Why is THIS in GlobalData??? - PDS - - //only takes names from the first tree and assumes that all trees use the same names. - try { - string filename = treefile; - ifstream filehandle; - openInputFile(filename, filehandle); - int c, comment; - comment = 0; - - //ifyou are not a nexus file - if((c = filehandle.peek()) != '#') { - while((c = filehandle.peek()) != ';') { - while ((c = filehandle.peek()) != ';') { - // get past comments - if(c == '[') { - comment = 1; - } - if(c == ']'){ - comment = 0; - } - if((c == '(') && (comment != 1)){ break; } - filehandle.get(); - } - - readTreeString(filehandle); - } - //ifyou are a nexus file - }else if((c = filehandle.peek()) == '#') { - string holder = ""; - - // get past comments - while(holder != "translate" && holder != "Translate"){ - if(holder == "[" || holder == "[!"){ - comment = 1; - } - if(holder == "]"){ - comment = 0; - } - filehandle >> holder; - - //ifthere is no translate then you must read tree string otherwise use translate to get names - if(holder == "tree" && comment != 1){ - //pass over the "tree rep.6878900 = " - while (((c = filehandle.get()) != '(') && ((c = filehandle.peek()) != EOF)) {;} - - if(c == EOF) { break; } - filehandle.putback(c); //put back first ( of tree. - readTreeString(filehandle); - break; - } - } - - //use nexus translation rather than parsing tree to save time - if((holder == "translate") || (holder == "Translate")) { - - string number, name, h; - h = ""; // so it enters the loop the first time - while((h != ";") && (number != ";")) { - filehandle >> number; - filehandle >> name; - - //c = , until done with translation then c = ; - h = name.substr(name.length()-1, name.length()); - name.erase(name.end()-1); //erase the comma - Treenames.push_back(number); - } - if(number == ";") { Treenames.pop_back(); } //in case ';' from translation is on next line instead of next to last name - } - } - +void GlobalData::newRead() { + try{ + clear(); + gGroupmap = NULL; + gListVector = NULL; + gSparseMatrix = NULL; + gTree.clear(); + Treenames.clear(); + labels.clear(); lines.clear(); Groups.clear(); + allLines = 1; + runParse = true; } catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the GlobalData class Function parseTreeFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + cout << "Standard Error: " << e.what() << " has occurred in the GlobalData class Function newRead. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; exit(1); } catch(...) { - cout << "An unknown error has occurred in the GlobalData class function parseTreeFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + cout << "An unknown error has occurred in the GlobalData class function newRead. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; exit(1); - } + } } -/*******************************************************/ - -/*******************************************************/ -void GlobalData::readTreeString(ifstream& filehandle) { - try { - int c; - string name; //k - - while((c = filehandle.peek()) != ';') { - //ifyou are a name - if((c != '(') && (c != ')') && (c != ',') && (c != ':') && (c != '\n') && (c != '\t') && (c != 32)) { //32 is space - name = ""; - c = filehandle.get(); - // k = c; -//cout << k << endl; - while ((c != '(') && (c != ')') && (c != ',') && (c != ':') && (c != '\n') && (c != 32) && (c != '\t')) { - name += c; - c = filehandle.get(); - // k = c; -//cout << " in name while " << k << endl; - } - -//cout << "name = " << name << endl; - Treenames.push_back(name); - filehandle.putback(c); -//k = c; -//cout << " after putback" << k << endl; - } - - if(c == ':') { //read until you reach the end of the branch length - while ((c != '(') && (c != ')') && (c != ',') && (c != ';') && (c != '\n') && (c != '\t') && (c != 32)) { - c = filehandle.get(); - // k = c; - //cout << " in branch while " << k << endl; - } - filehandle.putback(c); - } - c = filehandle.get(); - if(c == ';') { break; } - // k = c; -//cout << k << endl; - } - } - catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the GlobalData class Function parseTreeFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } - catch(...) { - cout << "An unknown error has occurred in the GlobalData class function parseTreeFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } -} +//******************************************************/ +/******************************************************/ +GlobalData::~GlobalData() { + _uniqueInstance = 0; + if(gListVector != NULL) { delete gListVector; } + if(gSparseMatrix != NULL) { delete gSparseMatrix; } + if(gorder != NULL) { delete gorder; } +} /*******************************************************/ /*******************************************************/ diff --git a/globaldata.hpp b/globaldata.hpp index 89089ad..1d38d63 100644 --- a/globaldata.hpp +++ b/globaldata.hpp @@ -37,8 +37,8 @@ public: FullMatrix* gMatrix; TreeMap* gTreemap; SequenceDB* gSequenceDB; - string inputFileName, helpRequest, commandName, vertical, argv; - bool allLines; + string inputFileName, argv; + bool allLines, runParse; vector Estimators, Groups; //holds estimators to be used set lines; //hold lines to be used set labels; //holds labels to be used @@ -55,85 +55,38 @@ public: string getFastaFile(); string getTreeFile(); string getSharedFile(); - string getCutOff(); string getFormat(); - string getPrecision(); - string getMethod(); - string getFileRoot(); - string getIters(); - string getJumble(); - string getFreq(); - string getAbund(); - string getRandomTree(); - string getGroups(); - string getStep(); - string getForm(); - string getSorted(); - string getCountEnds(); - string getProcessors(); - string getSize(); string getCandidateFile(); - string getSearch(); - string getKSize(); - string getAlign(); - string getMatch(); - string getMismatch(); - string getGapopen(); - string getGapextend(); - string getVertical(); - string getTrump(); - string getSoft(); - string getHard(); - string getScale(); - string getStartPos(); - string getEndPos(); - string getMaxAmbig(); - string getMaxHomoPolymer(); - string getMinLength(); - string getMaxLength(); - string getFlip(); - string getOligosFile(); - + void setListFile(string); - void setGroupFile(string file); + void setFastaFile(string); + void setTreeFile(string); + void setCandidateFile(string); + void setGroupFile(string); void setPhylipFile(string); void setColumnFile(string); void setNameFile(string); void setRabundFile(string); void setSabundFile(string); void setSharedFile(string); + void setOrderFile(string file); void setFormat(string); - void setRandomTree(string); - void setGroups(string); - void setCalc(string); - void setCountEnds(string); - void setProcessors(string); - + void clear(); void clearLabels(); void clearAbund(); - void parseGlobalData(string, string); + void newRead(); - void parseTreeFile(); //parses through tree file to find names of nodes and number of them - //this is required in case user has sequences in the names file that are - //not included in the tree. - //only takes names from the first tree in the tree file and assumes that all trees use the same names. - - private: - string phylipfile, columnfile, listfile, rabundfile, sabundfile, namefile, groupfile, orderfile, fastafile, treefile, sharedfile, line, label, randomtree, groups, cutoff, format, precision, method, fileroot, iters, jumble, freq, calc, abund, step, form, sorted, trump, soft, hard, scale, countends, processors, candidatefile, search, ksize, align, match, size, mismatch, gapopen, gapextend, minLength, maxLength, startPos, endPos, maxAmbig, maxHomoPolymer, flip, oligoFile; - + string phylipfile, columnfile, listfile, rabundfile, sabundfile, namefile, groupfile, orderfile, fastafile, treefile, sharedfile, format, candidatefile; static GlobalData* _uniqueInstance; GlobalData( const GlobalData& ); // Disable copy constructor void operator=( const GlobalData& ); // Disable assignment operator GlobalData(); ~GlobalData(); - void reset(); //clears all non filename parameters - void readTreeString(ifstream&); - }; diff --git a/heatmap.cpp b/heatmap.cpp index 8a1837a..e707b60 100644 --- a/heatmap.cpp +++ b/heatmap.cpp @@ -10,11 +10,12 @@ #include "heatmap.h" //********************************************************************************************************************** -HeatMap::HeatMap(){ +HeatMap::HeatMap(string sort, string scale){ try { globaldata = GlobalData::getInstance(); format = globaldata->getFormat(); - sorted = globaldata->getSorted(); + sorted = sort; + scaler = scale; } catch(exception& e) { cout << "Standard Error: " << e.what() << " has occurred in the HeatMap class Function HeatMap. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; @@ -30,8 +31,7 @@ HeatMap::HeatMap(){ void HeatMap::getPic(RAbundVector* rabund) { try { - //get users scaling method - scaler = globaldata->getScale(); + float maxRelAbund = 0.0; @@ -40,7 +40,6 @@ void HeatMap::getPic(RAbundVector* rabund) { if(relAbund > maxRelAbund){ maxRelAbund = relAbund; } } - scaler = globaldata->getScale(); vector scaleRelAbund(rabund->size(), ""); @@ -121,8 +120,6 @@ void HeatMap::getPic(vector lookup) { if(maxRelAbund[i] > superMaxRelAbund){ superMaxRelAbund = maxRelAbund[i]; } } - scaler = globaldata->getScale(); - scaleRelAbund.resize(lookup.size()); for(int i=0;isize(), ""); diff --git a/heatmap.h b/heatmap.h index 7c5b561..56a56e4 100644 --- a/heatmap.h +++ b/heatmap.h @@ -19,7 +19,7 @@ class HeatMap { public: - HeatMap(); + HeatMap(string, string); ~HeatMap(){}; void getPic(RAbundVector*); diff --git a/heatmapcommand.cpp b/heatmapcommand.cpp index e99537d..b095041 100644 --- a/heatmapcommand.cpp +++ b/heatmapcommand.cpp @@ -12,11 +12,82 @@ //********************************************************************************************************************** -HeatMapCommand::HeatMapCommand(){ +HeatMapCommand::HeatMapCommand(string option){ try { globaldata = GlobalData::getInstance(); - heatmap = new HeatMap(); - format = globaldata->getFormat(); + abort = false; + allLines = 1; + lines.clear(); + labels.clear(); + + //allow user to run help + if(option == "help") { help(); abort = true; } + + else { + //valid paramters for this command + string AlignArray[] = {"groups","line","label","sorted","scale"}; + vector myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string))); + + parser = new OptionParser(); + parser->parse(option, parameters); delete parser; + + ValidParameters* validParameter = new ValidParameters(); + + //check to make sure all parameters are valid for command + for (it = parameters.begin(); it != parameters.end(); it++) { + if (validParameter->isValidParameter(it->first, myArray, it->second) != true) { abort = true; } + } + + //make sure the user has already run the read.otu command + if ((globaldata->getListFile() == "") && (globaldata->getSharedFile() == "")) { + cout << "You must read a list, or a list and a group, or a shared before you can use the heatmap.bin command." << endl; abort = true; + } + + //check for optional parameter and set defaults + // ...at some point should added some additional type checking... + line = validParameter->validFile(parameters, "line", false); + if (line == "not found") { line = ""; } + else { + if(line != "all") { splitAtDash(line, lines); allLines = 0; } + else { allLines = 1; } + } + + label = validParameter->validFile(parameters, "label", false); + if (label == "not found") { label = ""; } + else { + if(label != "all") { splitAtDash(label, labels); allLines = 0; } + else { allLines = 1; } + } + + //make sure user did not use both the line and label parameters + if ((line != "") && (label != "")) { cout << "You cannot use both the line and label parameters at the same time. " << endl; abort = true; } + //if the user has not specified any line or labels use the ones from read.otu + else if ((line == "") && (label == "")) { + allLines = globaldata->allLines; + labels = globaldata->labels; + lines = globaldata->lines; + } + + groups = validParameter->validFile(parameters, "groups", false); + if (groups == "not found") { groups = ""; } + else { + splitAtDash(groups, Groups); + globaldata->Groups = Groups; + } + + sorted = validParameter->validFile(parameters, "sorted", false); if (sorted == "not found") { sorted = "T"; } + + scale = validParameter->validFile(parameters, "scale", false); if (scale == "not found") { scale = "log10"; } + + + delete validParameter; + + if (abort == false) { + heatmap = new HeatMap(sorted, scale); + format = globaldata->getFormat(); + } + } + } catch(exception& e) { cout << "Standard Error: " << e.what() << " has occurred in the HeatMapCommand class Function HeatMapCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; @@ -27,6 +98,36 @@ HeatMapCommand::HeatMapCommand(){ exit(1); } } + +//********************************************************************************************************************** + +void HeatMapCommand::help(){ + try { + cout << "The heatmap.bin command can only be executed after a successful read.otu command." << "\n"; + cout << "The heatmap.bin command parameters are groups, sorted, scale, line and label. No parameters are required, but you may not use line and label at the same time." << "\n"; + cout << "The groups parameter allows you to specify which of the groups in your groupfile you would like included in your heatmap." << "\n"; + cout << "The sorted parameter allows you to choose to see the file with the shared otus at the top or the otus in the order they appear in your input file. " << "\n"; + cout << "The scale parameter allows you to choose the range of color your bin information will be displayed with." << "\n"; + cout << "The group names are separated by dashes. The line and label allow you to select what distance levels you would like a heatmap created for, and are also separated by dashes." << "\n"; + cout << "The heatmap.bin command should be in the following format: heatmap.bin(groups=yourGroups, sorted=yourSorted, line=yourLines, label=yourLabels)." << "\n"; + cout << "Example heatmap.bin(groups=A-B-C, line=1-3-5, sorted=F, scale=log10)." << "\n"; + cout << "The default value for groups is all the groups in your groupfile, and all lines in your inputfile will be used." << "\n"; + cout << "The default value for sorted is T meaning you want the shared otus on top, you may change it to F meaning the exact representation of your input file." << "\n"; + cout << "The default value for scale is log10; your other options are log2 and linear." << "\n"; + cout << "The heatmap.bin command outputs a .svg file for each line or label you specify." << "\n"; + cout << "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups)." << "\n" << "\n"; + + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the HeatMapCommand class Function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the HeatMapCommand class function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} + //********************************************************************************************************************** HeatMapCommand::~HeatMapCommand(){ @@ -39,6 +140,9 @@ HeatMapCommand::~HeatMapCommand(){ int HeatMapCommand::execute(){ try { + + if (abort == true) { return 0; } + int count = 1; RAbundVector* lastRAbund; vector lastLookup; @@ -63,15 +167,15 @@ int HeatMapCommand::execute(){ //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. set processedLabels; - set userLabels = globaldata->labels; - set userLines = globaldata->lines; + set userLabels = labels; + set userLines = lines; if (format != "list") { //as long as you are not at the end of the file or done wih the lines you want - while((lookup[0] != NULL) && ((globaldata->allLines == 1) || (userLabels.size() != 0) || (userLines.size() != 0))) { + while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0) || (userLines.size() != 0))) { - if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(lookup[0]->getLabel()) == 1){ + if(allLines == 1 || lines.count(count) == 1 || labels.count(lookup[0]->getLabel()) == 1){ cout << lookup[0]->getLabel() << '\t' << count << endl; heatmap->getPic(lookup); @@ -124,9 +228,9 @@ int HeatMapCommand::execute(){ }else{ - while((rabund != NULL) && ((globaldata->allLines == 1) || (userLabels.size() != 0) || (userLines.size() != 0))) { + while((rabund != NULL) && ((allLines == 1) || (userLabels.size() != 0) || (userLines.size() != 0))) { - if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(rabund->getLabel()) == 1){ + if(allLines == 1 || lines.count(count) == 1 || labels.count(rabund->getLabel()) == 1){ cout << rabund->getLabel() << '\t' << count << endl; heatmap->getPic(rabund); @@ -175,7 +279,6 @@ int HeatMapCommand::execute(){ } - globaldata->setGroups(""); return 0; } catch(exception& e) { diff --git a/heatmapcommand.h b/heatmapcommand.h index fc50554..2ae9882 100644 --- a/heatmapcommand.h +++ b/heatmapcommand.h @@ -23,9 +23,10 @@ class GlobalData; class HeatMapCommand : public Command { public: - HeatMapCommand(); + HeatMapCommand(string); ~HeatMapCommand(); int execute(); + void help(); private: GlobalData* globaldata; @@ -35,7 +36,15 @@ private: RAbundVector* rabund; vector lookup; HeatMap* heatmap; - string format; + OptionParser* parser; + map parameters; + map::iterator it; + bool abort, allLines; + set lines; //hold lines to be used + set labels; //holds labels to be used + string format, groups, sorted, scale, line, label; + vector Groups; + }; diff --git a/heatmapsimcommand.cpp b/heatmapsimcommand.cpp index dc3db89..0c9502f 100644 --- a/heatmapsimcommand.cpp +++ b/heatmapsimcommand.cpp @@ -22,42 +22,115 @@ //********************************************************************************************************************** -HeatMapSimCommand::HeatMapSimCommand(){ +HeatMapSimCommand::HeatMapSimCommand(string option){ try { globaldata = GlobalData::getInstance(); - validCalculator = new ValidCalculators(); - heatmap = new HeatMapSim(); + abort = false; + allLines = 1; + lines.clear(); + labels.clear(); + Groups.clear(); + Estimators.clear(); + + //allow user to run help + if(option == "help") { validCalculator = new ValidCalculators(); help(); abort = true; } + + else { + //valid paramters for this command + string AlignArray[] = {"groups","line","label", "calc"}; + vector myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string))); + + parser = new OptionParser(); + parser->parse(option, parameters); delete parser; + + ValidParameters* validParameter = new ValidParameters(); + + //check to make sure all parameters are valid for command + for (it = parameters.begin(); it != parameters.end(); it++) { + if (validParameter->isValidParameter(it->first, myArray, it->second) != true) { abort = true; } + } + + //make sure the user has already run the read.otu command + if (globaldata->getSharedFile() == "") { + cout << "You must read a list and a group, or a shared before you can use the heatmap.sim command." << endl; abort = true; + } + + //check for optional parameter and set defaults + // ...at some point should added some additional type checking... + line = validParameter->validFile(parameters, "line", false); + if (line == "not found") { line = ""; } + else { + if(line != "all") { splitAtDash(line, lines); allLines = 0; } + else { allLines = 1; } + } + + label = validParameter->validFile(parameters, "label", false); + if (label == "not found") { label = ""; } + else { + if(label != "all") { splitAtDash(label, labels); allLines = 0; } + else { allLines = 1; } + } + + //make sure user did not use both the line and label parameters + if ((line != "") && (label != "")) { cout << "You cannot use both the line and label parameters at the same time. " << endl; abort = true; } + //if the user has not specified any line or labels use the ones from read.otu + else if ((line == "") && (label == "")) { + allLines = globaldata->allLines; + labels = globaldata->labels; + lines = globaldata->lines; + } + + calc = validParameter->validFile(parameters, "calc", false); + if (calc == "not found") { calc = "jclass-thetayc"; } + else { + if (calc == "default") { calc = "jclass-thetayc"; } + } + splitAtDash(calc, Estimators); + + groups = validParameter->validFile(parameters, "groups", false); + if (groups == "not found") { groups = ""; } + else { + splitAtDash(groups, Groups); + globaldata->Groups = Groups; + } + + delete validParameter; + + if (abort == false) { + validCalculator = new ValidCalculators(); + heatmap = new HeatMapSim(); - int i; - for (i=0; iEstimators.size(); i++) { - if (validCalculator->isValidCalculator("heat", globaldata->Estimators[i]) == true) { - if (globaldata->Estimators[i] == "jabund") { - heatCalculators.push_back(new JAbund()); - }else if (globaldata->Estimators[i] == "sorabund") { - heatCalculators.push_back(new SorAbund()); - }else if (globaldata->Estimators[i] == "jclass") { - heatCalculators.push_back(new Jclass()); - }else if (globaldata->Estimators[i] == "sorclass") { - heatCalculators.push_back(new SorClass()); - }else if (globaldata->Estimators[i] == "jest") { - heatCalculators.push_back(new Jest()); - }else if (globaldata->Estimators[i] == "sorest") { - heatCalculators.push_back(new SorEst()); - }else if (globaldata->Estimators[i] == "thetayc") { - heatCalculators.push_back(new ThetaYC()); - }else if (globaldata->Estimators[i] == "thetan") { - heatCalculators.push_back(new ThetaN()); - }else if (globaldata->Estimators[i] == "morisitahorn") { - heatCalculators.push_back(new MorHorn()); - }else if (globaldata->Estimators[i] == "braycurtis") { - heatCalculators.push_back(new BrayCurtis()); + int i; + for (i=0; iisValidCalculator("heat", Estimators[i]) == true) { + if (Estimators[i] == "jabund") { + heatCalculators.push_back(new JAbund()); + }else if (Estimators[i] == "sorabund") { + heatCalculators.push_back(new SorAbund()); + }else if (Estimators[i] == "jclass") { + heatCalculators.push_back(new Jclass()); + }else if (Estimators[i] == "sorclass") { + heatCalculators.push_back(new SorClass()); + }else if (Estimators[i] == "jest") { + heatCalculators.push_back(new Jest()); + }else if (Estimators[i] == "sorest") { + heatCalculators.push_back(new SorEst()); + }else if (Estimators[i] == "thetayc") { + heatCalculators.push_back(new ThetaYC()); + }else if (Estimators[i] == "thetan") { + heatCalculators.push_back(new ThetaN()); + }else if (Estimators[i] == "morisitahorn") { + heatCalculators.push_back(new MorHorn()); + }else if (Estimators[i] == "braycurtis") { + heatCalculators.push_back(new BrayCurtis()); + } + } } + } } - - //reset calc for next command - globaldata->setCalc(""); + } catch(exception& e) { @@ -69,18 +142,50 @@ HeatMapSimCommand::HeatMapSimCommand(){ exit(1); } } + +//********************************************************************************************************************** + +void HeatMapSimCommand::help(){ + try { + cout << "The heatmap.sim command can only be executed after a successful read.otu command." << "\n"; + cout << "The heatmap.sim command parameters are groups, calc, line and label. No parameters are required, but you may not use line and label at the same time." << "\n"; + cout << "The groups parameter allows you to specify which of the groups in your groupfile you would like included in your heatmap." << "\n"; + cout << "The group names are separated by dashes. The line and label allow you to select what distance levels you would like a heatmap created for, and are also separated by dashes." << "\n"; + cout << "The heatmap.sim command should be in the following format: heatmap.sim(groups=yourGroups, calc=yourCalc, line=yourLines, label=yourLabels)." << "\n"; + cout << "Example heatmap.sim(groups=A-B-C, line=1-3-5, calc=jabund)." << "\n"; + cout << "The default value for groups is all the groups in your groupfile, and all lines in your inputfile will be used." << "\n"; + validCalculator->printCalc("heat", cout); + cout << "The default value for calc is jclass-thetayc." << "\n"; + cout << "The heatmap.sim command outputs a .svg file for each calculator you choose at each line or label you specify." << "\n"; + cout << "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups)." << "\n" << "\n"; + + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the HeatMapSimCommand class Function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the HeatMapSimCommand class function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} + //********************************************************************************************************************** HeatMapSimCommand::~HeatMapSimCommand(){ delete input; delete read; delete heatmap; + delete validCalculator; } //********************************************************************************************************************** int HeatMapSimCommand::execute(){ try { + + if (abort == true) { return 0; } + int count = 1; //if the users entered no valid calculators don't execute command @@ -98,14 +203,14 @@ int HeatMapSimCommand::execute(){ //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. set processedLabels; - set userLabels = globaldata->labels; - set userLines = globaldata->lines; + set userLabels = labels; + set userLines = lines; //as long as you are not at the end of the file or done wih the lines you want - while((lookup[0] != NULL) && ((globaldata->allLines == 1) || (userLabels.size() != 0) || (userLines.size() != 0))) { + while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0) || (userLines.size() != 0))) { - if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(lookup[0]->getLabel()) == 1){ + if(allLines == 1 || lines.count(count) == 1 || labels.count(lookup[0]->getLabel()) == 1){ cout << lookup[0]->getLabel() << '\t' << count << endl; heatmap->getPic(lookup, heatCalculators); @@ -155,7 +260,6 @@ int HeatMapSimCommand::execute(){ //reset groups parameter globaldata->Groups.clear(); - globaldata->setGroups(""); return 0; } diff --git a/heatmapsimcommand.h b/heatmapsimcommand.h index 33696a3..091da26 100644 --- a/heatmapsimcommand.h +++ b/heatmapsimcommand.h @@ -21,9 +21,10 @@ class GlobalData; class HeatMapSimCommand : public Command { public: - HeatMapSimCommand(); + HeatMapSimCommand(string); ~HeatMapSimCommand(); int execute(); + void help(); private: GlobalData* globaldata; @@ -33,6 +34,15 @@ private: vector heatCalculators; ValidCalculators* validCalculator; HeatMapSim* heatmap; + OptionParser* parser; + map parameters; + map::iterator it; + bool abort, allLines; + set lines; //hold lines to be used + set labels; //holds labels to be used + string format, groups, line, label, calc; + vector Estimators, Groups; + }; diff --git a/helpcommand.cpp b/helpcommand.cpp index acc5353..0acce70 100644 --- a/helpcommand.cpp +++ b/helpcommand.cpp @@ -11,11 +11,13 @@ //********************************************************************************************************************** -HelpCommand::HelpCommand(){ +HelpCommand::HelpCommand(string option){ globaldata = GlobalData::getInstance(); + + if (option != "") { cout << "There are no valid parameters for the help() command." << endl; } + validCommands = new ValidCommands(); - validCalcs = new ValidCalculators(); -} + } //********************************************************************************************************************** @@ -25,333 +27,10 @@ HelpCommand::~HelpCommand(){} int HelpCommand::execute(){ - if (globaldata->helpRequest == "read.dist") { - cout << "The read.dist command parameter options are phylip or column, group, name, cutoff and precision" << "\n"; - cout << "The read.dist command must be run before using the cluster or libshuff commands" << "\n"; - cout << "The read.dist command can be used in two ways. The first is to read a phylip or column and run the cluster command" << "\n"; - cout << "For this use the read.dist command should be in the following format: " << "\n"; - cout << "read.dist(phylip=yourDistFile, name=yourNameFile, cutoff=yourCutoff, precision=yourPrecision) " << "\n"; - cout << "The phylip or column parameter is required, but only one may be used. If you use a column file the name filename is required. " << "\n"; - cout << "If you do not provide a cutoff value 10.00 is assumed. If you do not provide a precision value then 100 is assumed." << "\n"; - cout << "The second way to use the read.dist command is to read a phylip or column and a group, so you can use the libshuff command." << "\n"; - cout << "For this use the read.dist command should be in the following format: " << "\n"; - cout << "read.dist(phylip=yourPhylipfile, group=yourGroupFile). The cutoff and precision parameters are not valid with this use. " << "\n"; - cout << "Note: No spaces between parameter labels (i.e. phylip), '=' and parameters (i.e.yourPhylipfile)." << "\n" << "\n"; - }else if (globaldata->helpRequest == "read.otu") { - cout << "The read.otu command must be run before you execute a collect.single, rarefaction.single, summary.single, " << "\n"; - cout << "collect.shared, rarefaction.shared or summary.shared command. Mothur will generate a .list, .rabund and .sabund upon completion of the cluster command " << "\n"; - cout << "or you may use your own. The read.otu command parameter options are list, rabund, sabund, shared, group, order, line and label." << "\n"; - cout << "The read.otu command can be used in two ways. The first is to read a list, rabund or sabund and run the collect.single, rarefaction.single or summary.single." << "\n"; - cout << "For this use the read.otu command should be in the following format: read.otu(list=yourListFile, order=yourOrderFile, label=yourLabels)." << "\n"; - cout << "The list, rabund or sabund parameter is required, but you may only use one of them." << "\n"; - cout << "The line and label parameters are optional but you may not use both the line and label parameters at the same time." << "\n"; - cout << "The label and line parameters are used to read specific lines in your input." << "\n"; - cout << "The second way to use the read.otu command is to read a list and a group, or a shared so you can use the collect.shared, rarefaction.shared or summary.shared commands." << "\n"; - cout << "In this case the read.otu command should be in the following format: read.otu(list=yourListFile, group=yourGroupFile, line=yourLines) or read.otu(shared=yourSharedFile). " << "\n"; - cout << "The list parameter and group paramaters or the shared paremeter is required. When using the command the second way with a list and group file read.otu command parses the .list file" << "\n"; - cout << "and separates it into groups. It outputs a .shared file containing the OTU information for each group. The read.otu command also outputs a .list file for each group. " << "\n"; - cout << "Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListfile)." << "\n" << "\n"; - }else if (globaldata->helpRequest == "read.tree") { - cout << "The read.tree command must be run before you execute a unifrac.weighted, unifrac.unweighted. " << "\n"; - cout << "It also must be run before using the parsimony command, unless you are using the randomtree parameter." << "\n"; - cout << "The read.tree command should be in the following format: read.tree(tree=yourTreeFile, group=yourGroupFile)." << "\n"; - cout << "The tree and group parameters are both required." << "\n"; - cout << "Note: No spaces between parameter labels (i.e. tree), '=' and parameters (i.e.yourTreefile)." << "\n" << "\n"; - }else if (globaldata->helpRequest == "cluster") { - cout << "The cluster command can only be executed after a successful read.dist command." << "\n"; - cout << "The cluster command parameter options are method, cuttoff and precision. No parameters are required." << "\n"; - cout << "The cluster command should be in the following format: " << "\n"; - cout << "cluster(method=yourMethod, cutoff=yourCutoff, precision=yourPrecision) " << "\n"; - cout << "The acceptable cluster methods are furthest, nearest and average. If no method is provided then furthest is assumed." << "\n" << "\n"; - }else if (globaldata->helpRequest == "unique.seqs") { - cout << "The unique.seqs command reads a fastafile and creates a namesfile." << "\n"; - cout << "It creates a file where the first column is the groupname and the second column is a list of sequence names who have the same sequence. " << "\n"; - cout << "If the sequence is unique the second column will just contain its name. " << "\n"; - cout << "The unique.seqs command parameter is fasta and it is required." << "\n"; - cout << "The unique.seqs command should be in the following format: " << "\n"; - cout << "unique.seqs(fasta=yourFastaFile) " << "\n"; - }else if (globaldata->helpRequest == "dist.seqs") { - cout << "The dist.seqs command reads a file containing sequences and creates a distance file." << "\n"; - cout << "The dist.seqs command parameters are fasta, calc, countends, cutoff and processors. " << "\n"; - cout << "The fasta parameter is required." << "\n"; - cout << "The calc parameter allows you to specify the method of calculating the distances. Your options are: nogaps, onegap or eachgap. The default is onegap." << "\n"; - cout << "The countends parameter allows you to specify whether to include terminal gaps in distance. Your options are: T or F. The default is T." << "\n"; - cout << "The cutoff parameter allows you to specify maximum distance to keep. The default is 1.0." << "\n"; - cout << "The processors parameter allows you to specify number of processors to use. The default is 1, but you can use up to 4 processors." << "\n"; - cout << "The dist.seqs command should be in the following format: " << "\n"; - cout << "dist.seqs(fasta=yourFastaFile, calc=yourCalc, countends=yourEnds, cutoff= yourCutOff, processors=yourProcessors) " << "\n"; - cout << "Example dist.seqs(fasta=amazon.fasta, calc=eachgap, countends=F, cutoff= 2.0, processors=3)." << "\n"; - cout << "Note: No spaces between parameter labels (i.e. calc), '=' and parameters (i.e.yourCalc)." << "\n" << "\n"; - }else if (globaldata->helpRequest == "align.seqs") { - cout << "The align.seqs command reads a file containing sequences and creates an alignment file and a report file." << "\n"; - cout << "The align.seqs command parameters are fasta, candidate, search, ksize, align, match, mismatch, gapopen and gapextend. " << "\n"; - cout << "The fasta and candidate parameters are required." << "\n"; - cout << "The search parameter allows you to specify the method to find most similar template. Your options are: suffix, kmer and blast. The default is kmer." << "\n"; - cout << "The align parameter allows you to specify the alignment method to use. Your options are: gotoh, needleman, blast and noalign. The default is needleman." << "\n"; - cout << "The ksize parameter allows you to specify the kmer size for finding most similar template to candidate. The default is 7." << "\n"; - cout << "The match parameter allows you to specify the bonus for having the same base. The default is 1.0." << "\n"; - cout << "The mistmatch parameter allows you to specify the penalty for having different bases. The default is -1.0." << "\n"; - cout << "The gapopen parameter allows you to specify the penalty for opening a gap in an alignment. The default is -1.0." << "\n"; - cout << "The gapextend parameter allows you to specify the penalty for extending a gap in an alignment. The default is -2.0." << "\n"; - cout << "The align.seqs command should be in the following format: " << "\n"; - cout << "align.seqs(fasta=yourTemplateFile, candidate=yourCandidateFile, align=yourAlignmentMethod, search=yourSearchmethod, ksize=yourKmerSize, match=yourMatchBonus, mismatch=yourMismatchpenalty, gapopen=yourGapopenPenalty, gapextend=yourGapExtendPenalty) " << "\n"; - cout << "Example align.seqs(candidate=candidate.fasta, fasta=core.filtered, align=kmer, search=gotoh, ksize=8, match=2.0, mismatch=3.0, gapopen=-2.0, gapextend=-1.0)" << "\n"; - cout << "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile)." << "\n" << "\n"; - }else if (globaldata->helpRequest == "collect.single") { - cout << "The collect.single command can only be executed after a successful read.otu command. WITH ONE EXECEPTION. " << "\n"; - cout << "The collect.single command can be executed after a successful cluster command. It will use the .list file from the output of the cluster." << "\n"; - cout << "The collect.single command parameters are label, line, freq, calc and abund. No parameters are required, but you may not use " << "\n"; - cout << "both the line and label parameters at the same time. The collect.single command should be in the following format: " << "\n"; - cout << "collect.single(label=yourLabel, line=yourLines, iters=yourIters, freq=yourFreq, calc=yourEstimators)." << "\n"; - cout << "Example collect(label=unique-.01-.03, line=0-5-10, iters=10000, freq=10, calc=sobs-chao-ace-jack)." << "\n"; - cout << "The default values for freq is 100, and calc are sobs-chao-ace-jack-shannon-npshannon-simpson." << "\n"; - validCalcs->printCalc("single", cout); - cout << "The label and line parameters are used to analyze specific lines in your input." << "\n"; - cout << "Note: No spaces between parameter labels (i.e. freq), '=' and parameters (i.e.yourFreq)." << "\n" << "\n"; - }else if (globaldata->helpRequest == "collect.shared") { - cout << "The collect.shared command can only be executed after a successful read.otu command." << "\n"; - cout << "The collect.shared command parameters are label, line, freq, calc and groups. No parameters are required, but you may not use " << "\n"; - cout << "both the line and label parameters at the same time. The collect.shared command should be in the following format: " << "\n"; - cout << "collect.shared(label=yourLabel, line=yourLines, freq=yourFreq, calc=yourEstimators, groups=yourGroups)." << "\n"; - cout << "Example collect.shared(label=unique-.01-.03, line=0-5-10, freq=10, groups=B-C, calc=sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan)." << "\n"; - cout << "The default values for freq is 100 and calc are sharedsobs-sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan." << "\n"; - cout << "The default value for groups is all the groups in your groupfile." << "\n"; - validCalcs->printCalc("shared", cout); - cout << "The label and line parameters are used to analyze specific lines in your input." << "\n"; - cout << "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 2 valid groups." << "\n"; - cout << "Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListfile)." << "\n" << "\n"; - }else if (globaldata->helpRequest == "get.group") { - cout << "The get.group command can only be executed after a successful read.otu command." << "\n"; -//cout << "The get.group command outputs a .bootGroups file to you can use in addition to the tree file generated by the bootstrap.shared command to run the concensus command." << "\n"; - cout << "You may not use any parameters with the get.group command." << "\n"; - cout << "The get.group command should be in the following format: " << "\n"; - cout << "get.group()" << "\n"; - cout << "Example get.group()." << "\n"; - }else if (globaldata->helpRequest == "get.label") { - cout << "The get.label command can only be executed after a successful read.otu command." << "\n"; - cout << "You may not use any parameters with the get.label command." << "\n"; - cout << "The get.label command should be in the following format: " << "\n"; - cout << "get.label()" << "\n"; - cout << "Example get.label()." << "\n"; - }else if (globaldata->helpRequest == "get.line") { - cout << "The get.line command can only be executed after a successful read.otu command." << "\n"; - cout << "You may not use any parameters with the get.line command." << "\n"; - cout << "The get.line command should be in the following format: " << "\n"; - cout << "get.line()" << "\n"; - cout << "Example get.line()." << "\n"; - }else if (globaldata->helpRequest == "rarefaction.single") { - cout << "The rarefaction.single command can only be executed after a successful read.otu WTIH ONE EXECEPTION." << "\n"; - cout << "The rarefaction.single command can be executed after a successful cluster command. It will use the .list file from the output of the cluster." << "\n"; - cout << "The rarefaction.single command parameters are label, line, iters, freq, calc and abund. No parameters are required, but you may not use " << "\n"; - cout << "both the line and label parameters at the same time. The rarefaction.single command should be in the following format: " << "\n"; - cout << "rarefaction.single(label=yourLabel, line=yourLines, iters=yourIters, freq=yourFreq, calc=yourEstimators)." << "\n"; - cout << "Example rarefaction.single(label=unique-.01-.03, line=0-5-10, iters=10000, freq=10, calc=sobs-rchao-race-rjack-rbootstrap-rshannon-rnpshannon-rsimpson)." << "\n"; - cout << "The default values for iters is 1000, freq is 100, and calc is rarefaction which calculates the rarefaction curve for the observed richness." << "\n"; - validCalcs->printCalc("rarefaction", cout); - cout << "The label and line parameters are used to analyze specific lines in your input." << "\n"; - cout << "Note: No spaces between parameter labels (i.e. freq), '=' and parameters (i.e.yourFreq)." << "\n" << "\n"; - }else if (globaldata->helpRequest == "rarefaction.shared") { - cout << "The rarefaction.shared command can only be executed after a successful read.otu command." << "\n"; - cout << "The rarefaction.shared command parameters are label, line, iters, jumble, groups and calc. No parameters are required, but you may not use " << "\n"; - cout << "both the line and label parameters at the same time. The rarefaction command should be in the following format: " << "\n"; - cout << "rarefaction.shared(label=yourLabel, line=yourLines, iters=yourIters, jumble=yourJumble, calc=yourEstimators, groups=yourGroups)." << "\n"; - cout << "Example rarefaction.shared(label=unique-.01-.03, line=0-5-10, iters=10000, jumble=1, groups=B-C, calc=sharedobserved)." << "\n"; - cout << "The default values for iters is 1000, jumble is 1 (meaning jumble, if it’s set to 0 then it will not jumble), freq is 100, and calc is sharedobserved which calculates the shared rarefaction curve for the observed richness." << "\n"; - cout << "The default value for groups is all the groups in your groupfile." << "\n"; - validCalcs->printCalc("sharedrarefaction", cout); - cout << "The label and line parameters are used to analyze specific lines in your input." << "\n"; - cout << "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 2 valid groups." << "\n"; - cout << "Note: No spaces between parameter labels (i.e. freq), '=' and parameters (i.e.yourFreq)." << "\n" << "\n"; - }else if (globaldata->helpRequest == "summary.single") { - cout << "The summary.single command can only be executed after a successful read.otu WTIH ONE EXECEPTION." << "\n"; - cout << "The summary.single command can be executed after a successful cluster command. It will use the .list file from the output of the cluster." << "\n"; - cout << "The summary.single command parameters are label, line, calc, abund. No parameters are required, but you may not use " << "\n"; - cout << "both the line and label parameters at the same time. The summary.single command should be in the following format: " << "\n"; - cout << "summary.single(label=yourLabel, line=yourLines, calc=yourEstimators)." << "\n"; - cout << "Example summary.single(label=unique-.01-.03, line=0,5,10, calc=sobs-chao-ace-jack-bootstrap-shannon-npshannon-simpson)." << "\n"; - validCalcs->printCalc("summary", cout); - cout << "The default value calc is sobs-chao-ace-jack-shannon-npshannon-simpson" << "\n"; - cout << "The label and line parameters are used to analyze specific lines in your input." << "\n"; - cout << "Note: No spaces between parameter labels (i.e. line), '=' and parameters (i.e.yourLines)." << "\n" << "\n"; - }else if (globaldata->helpRequest == "summary.shared") { - cout << "The summary.shared command can only be executed after a successful read.otu command." << "\n"; - cout << "The summary.shared command parameters are label, line and calc. No parameters are required, but you may not use " << "\n"; - cout << "both the line and label parameters at the same time. The summary.shared command should be in the following format: " << "\n"; - cout << "summary.shared(label=yourLabel, line=yourLines, calc=yourEstimators, groups=yourGroups)." << "\n"; - cout << "Example summary.shared(label=unique-.01-.03, line=0,5,10, groups=B-C, calc=sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan)." << "\n"; - validCalcs->printCalc("sharedsummary", cout); - cout << "The default value for calc is sharedsobs-sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan" << "\n"; - cout << "The default value for groups is all the groups in your groupfile." << "\n"; - cout << "The label and line parameters are used to analyze specific lines in your input." << "\n"; - cout << "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 2 valid groups." << "\n"; - cout << "Note: No spaces between parameter labels (i.e. line), '=' and parameters (i.e.yourLines)." << "\n" << "\n"; - }else if (globaldata->helpRequest == "parsimony") { - cout << "The parsimony command can only be executed after a successful read.tree command, unless you use the random parameter." << "\n"; - cout << "The parsimony command parameters are random, groups and iters. No parameters are required." << "\n"; - cout << "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 1 valid group." << "\n"; - cout << "The group names are separated by dashes. The iters parameter allows you to specify how many random trees you would like compared to your tree." << "\n"; - cout << "The parsimony command should be in the following format: parsimony(random=yourOutputFilename, groups=yourGroups, iters=yourIters)." << "\n"; - cout << "Example parsimony(random=out, iters=500)." << "\n"; - cout << "The default value for random is "" (meaning you want to use the trees in your inputfile, randomtree=out means you just want the random distribution of trees outputted to out.rd_parsimony)," << "\n"; - cout << "and iters is 1000. The parsimony command output two files: .parsimony and .psummary their descriptions are in the manual." << "\n"; - cout << "Note: No spaces between parameter labels (i.e. random), '=' and parameters (i.e.yourOutputFilename)." << "\n" << "\n"; - }else if (globaldata->helpRequest == "unifrac.weighted") { - cout << "The unifrac.weighted command can only be executed after a successful read.tree command." << "\n"; - cout << "The unifrac.weighted command parameters are groups and iters. No parameters are required." << "\n"; - cout << "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 2 valid groups." << "\n"; - cout << "The group names are separated by dashes. The iters parameter allows you to specify how many random trees you would like compared to your tree." << "\n"; - cout << "The unifrac.weighted command should be in the following format: unifrac.weighted(groups=yourGroups, iters=yourIters)." << "\n"; - cout << "Example unifrac.weighted(groups=A-B-C, iters=500)." << "\n"; - cout << "The default value for groups is all the groups in your groupfile, and iters is 1000." << "\n"; - cout << "The unifrac.weighted command output two files: .weighted and .wsummary their descriptions are in the manual." << "\n"; - cout << "Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListfile)." << "\n" << "\n"; - }else if (globaldata->helpRequest == "unifrac.unweighted") { - cout << "The unifrac.unweighted command can only be executed after a successful read.tree command." << "\n"; - cout << "The unifrac.unweighted command parameters are groups and iters. No parameters are required." << "\n"; - cout << "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 1 valid group." << "\n"; - cout << "The group names are separated by dashes. The iters parameter allows you to specify how many random trees you would like compared to your tree." << "\n"; - cout << "The unifrac.unweighted command should be in the following format: unifrac.unweighted(groups=yourGroups, iters=yourIters)." << "\n"; - cout << "Example unifrac.unweighted(groups=A-B-C, iters=500)." << "\n"; - cout << "The default value for groups is all the groups in your groupfile, and iters is 1000." << "\n"; - cout << "The unifrac.unweighted command output two files: .unweighted and .uwsummary their descriptions are in the manual." << "\n"; - cout << "Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListfile)." << "\n" << "\n"; - }else if (globaldata->helpRequest == "libshuff") { - cout << "The libshuff command can only be executed after a successful read.dist command." << "\n"; - cout << "The libshuff command parameters are groups, iters, step, form and cutoff. No parameters are required." << "\n"; - cout << "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 2 valid groups." << "\n"; - cout << "The group names are separated by dashes. The iters parameter allows you to specify how many random matrices you would like compared to your matrix." << "\n"; - cout << "The step parameter allows you to specify change in distance you would like between each output if you are using the discrete form." << "\n"; - cout << "The form parameter allows you to specify if you would like to analyze your matrix using the discrete or integral form. Your options are integral or discrete." << "\n"; - cout << "The libshuff command should be in the following format: libshuff(groups=yourGroups, iters=yourIters, cutOff=yourCutOff, form=yourForm, step=yourStep)." << "\n"; - cout << "Example libshuff(groups=A-B-C, iters=500, form=discrete, step=0.01, cutOff=2.0)." << "\n"; - cout << "The default value for groups is all the groups in your groupfile, iters is 10000, cutoff is 1.0, form is integral and step is 0.01." << "\n"; - cout << "The libshuff command output two files: .coverage and .slsummary their descriptions are in the manual." << "\n"; - cout << "Note: No spaces between parameter labels (i.e. iters), '=' and parameters (i.e.yourIters)." << "\n" << "\n"; - }else if (globaldata->helpRequest == "heatmap.bin") { - cout << "The heatmap.bin command can only be executed after a successful read.otu command." << "\n"; - cout << "The heatmap.bin command parameters are groups, sorted, scale, line and label. No parameters are required, but you may not use line and label at the same time." << "\n"; - cout << "The groups parameter allows you to specify which of the groups in your groupfile you would like included in your heatmap." << "\n"; - cout << "The sorted parameter allows you to choose to see the file with the shared otus at the top or the otus in the order they appear in your input file. " << "\n"; - cout << "The scale parameter allows you to choose the range of color your bin information will be displayed with." << "\n"; - cout << "The group names are separated by dashes. The line and label allow you to select what distance levels you would like a heatmap created for, and are also separated by dashes." << "\n"; - cout << "The heatmap.bin command should be in the following format: heatmap.bin(groups=yourGroups, sorted=yourSorted, line=yourLines, label=yourLabels)." << "\n"; - cout << "Example heatmap.bin(groups=A-B-C, line=1-3-5, sorted=F, scale=log10)." << "\n"; - cout << "The default value for groups is all the groups in your groupfile, and all lines in your inputfile will be used." << "\n"; - cout << "The default value for sorted is T meaning you want the shared otus on top, you may change it to F meaning the exact representation of your input file." << "\n"; - cout << "The default value for scale is log10; your other options are log2 and linear." << "\n"; - cout << "The heatmap.bin command outputs a .svg file for each line or label you specify." << "\n"; - cout << "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups)." << "\n" << "\n"; - }else if (globaldata->helpRequest == "heatmap.sim") { - cout << "The heatmap.sim command can only be executed after a successful read.otu command." << "\n"; - cout << "The heatmap.sim command parameters are groups, calc, line and label. No parameters are required, but you may not use line and label at the same time." << "\n"; - cout << "The groups parameter allows you to specify which of the groups in your groupfile you would like included in your heatmap." << "\n"; - cout << "The group names are separated by dashes. The line and label allow you to select what distance levels you would like a heatmap created for, and are also separated by dashes." << "\n"; - cout << "The heatmap.sim command should be in the following format: heatmap.sim(groups=yourGroups, calc=yourCalc, line=yourLines, label=yourLabels)." << "\n"; - cout << "Example heatmap.sim(groups=A-B-C, line=1-3-5, calc=jabund)." << "\n"; - cout << "The default value for groups is all the groups in your groupfile, and all lines in your inputfile will be used." << "\n"; - validCalcs->printCalc("heat", cout); - cout << "The default value for calc is jclass-thetayc." << "\n"; - cout << "The heatmap.sim command outputs a .svg file for each calculator you choose at each line or label you specify." << "\n"; - cout << "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups)." << "\n" << "\n"; - }else if (globaldata->helpRequest == "venn") { - cout << "The venn command can only be executed after a successful read.otu command." << "\n"; - cout << "The venn command parameters are groups, calc, line and label. No parameters are required, but you may not use line and label at the same time." << "\n"; - cout << "The groups parameter allows you to specify which of the groups in your groupfile you would like included in your venn diagram, you may only use a maximum of 4 groups." << "\n"; - cout << "The group names are separated by dashes. The line and label allow you to select what distance levels you would like a venn diagram created for, and are also separated by dashes." << "\n"; - cout << "The venn command should be in the following format: venn(groups=yourGroups, calc=yourCalcs, line=yourLines, label=yourLabels)." << "\n"; - cout << "Example venn(groups=A-B-C, line=1-3-5, calc=sharedsobs-sharedchao)." << "\n"; - cout << "The default value for groups is all the groups in your groupfile up to 4, and all lines in your inputfile will be used." << "\n"; - cout << "The default value for calc is sobs if you have only read a list file or if you have selected only one group, and sharedsobs if you have multiple groups." << "\n"; - cout << "The default available estimators for calc are sobs, chao and ace if you have only read a list file, and sharedsobs, sharedchao and sharedace if you have read a list and group file or a shared file." << "\n"; - cout << "The only estmiator available four 4 groups is sharedsobs." << "\n"; - cout << "The venn command outputs a .svg file for each calculator you specify at each distance you choose." << "\n"; - cout << "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups)." << "\n" << "\n"; - }else if (globaldata->helpRequest == "tree.shared") { - cout << "The tree.shared command can only be executed after a successful read.otu command." << "\n"; - cout << "The tree.shared command parameters are groups, calc, line and label. The calc parameter is required, and you may not use line and label at the same time." << "\n"; - cout << "The groups parameter allows you to specify which of the groups in your groupfile you would like included used." << "\n"; - cout << "The group names are separated by dashes. The line and label allow you to select what distance levels you would like trees created for, and are also separated by dashes." << "\n"; - cout << "The tree.shared command should be in the following format: tree.shared(groups=yourGroups, calc=yourCalcs, line=yourLines, label=yourLabels)." << "\n"; - cout << "Example tree.shared(groups=A-B-C, line=1-3-5, calc=jabund-sorabund)." << "\n"; - cout << "The default value for groups is all the groups in your groupfile." << "\n"; - cout << "There is no default value for calc." << "\n"; - validCalcs->printCalc("treegroup", cout); - cout << "The tree.shared command outputs a .tre file for each calculator you specify at each distance you choose." << "\n"; - cout << "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups)." << "\n" << "\n"; - }else if (globaldata->helpRequest == "dist.shared") { - cout << "The dist.shared command can only be executed after a successful read.otu command." << "\n"; - cout << "The dist.shared command parameters are groups, calc, line and label. The calc parameter is required, and you may not use line and label at the same time." << "\n"; - cout << "The groups parameter allows you to specify which of the groups in your groupfile you would like included used." << "\n"; - cout << "The group names are separated by dashes. The line and label allow you to select what distance levels you would like distance matrices created for, and are also separated by dashes." << "\n"; - cout << "The dist.shared command should be in the following format: dist.shared(groups=yourGroups, calc=yourCalcs, line=yourLines, label=yourLabels)." << "\n"; - cout << "Example dist.shared(groups=A-B-C, line=1-3-5, calc=jabund-sorabund)." << "\n"; - cout << "The default value for groups is all the groups in your groupfile." << "\n"; - cout << "The default value for calc is jclass and thetayc." << "\n"; - validCalcs->printCalc("matrix", cout); - cout << "The dist.shared command outputs a .dist file for each calculator you specify at each distance you choose." << "\n"; - cout << "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups)." << "\n" << "\n"; - }else if (globaldata->helpRequest == "bootstrap.shared") { - cout << "The bootstrap.shared command can only be executed after a successful read.otu command." << "\n"; - cout << "The bootstrap.shared command parameters are groups, calc, iters, line and label. The calc parameter is required, and you may not use line and label at the same time." << "\n"; - cout << "The groups parameter allows you to specify which of the groups in your groupfile you would like included used." << "\n"; - cout << "The group names are separated by dashes. The line and label allow you to select what distance levels you would like trees created for, and are also separated by dashes." << "\n"; - cout << "The bootstrap.shared command should be in the following format: bootstrap.shared(groups=yourGroups, calc=yourCalcs, line=yourLines, label=yourLabels, iters=yourIters)." << "\n"; - cout << "Example bootstrap.shared(groups=A-B-C, line=1-3-5, calc=jabund-sorabund, iters=100)." << "\n"; - cout << "The default value for groups is all the groups in your groupfile." << "\n"; - cout << "There is no default value for calc. The default for iters is 1000." << "\n"; - validCalcs->printCalc("boot", cout); - cout << "The bootstrap.shared command outputs a .tre file for each calculator you specify at each distance you choose containing iters number of trees." << "\n"; - cout << "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups)." << "\n" << "\n"; - }else if (globaldata->helpRequest == "concensus") { - cout << "The concensus command can only be executed after a successful read.tree command." << "\n"; - cout << "The concensus command has no parameters." << "\n"; - cout << "The concensus command should be in the following format: concensus()." << "\n"; - cout << "The concensus command output two files: .concensus.tre and .concensuspairs." << "\n"; - cout << "The .concensus.tre file contains the concensus tree of the trees in your input file." << "\n"; - cout << "The branch lengths are the percentage of trees in your input file that had the given pair." << "\n"; - cout << "The .concensuspairs file contains a list of the internal nodes in your tree. For each node, the pair that was used in the concensus tree " << "\n"; - cout << "is reported with its percentage, as well as the other pairs that were seen for that node but not used and their percentages." << "\n" << "\n"; - }else if (globaldata->helpRequest == "bin.seqs") { - cout << "The bin.seqs command can only be executed after a successful read.otu command of a list file." << "\n"; - cout << "The bin.seqs command parameters are fasta, name, line, label and group. The fasta parameter is required, and you may not use line and label at the same time." << "\n"; - cout << "The line and label allow you to select what distance levels you would like a output files created for, and are separated by dashes." << "\n"; - cout << "The bin.seqs command should be in the following format: bin.seqs(fasta=yourFastaFile, name=yourNamesFile, group=yourGroupFile, line=yourLines, label=yourLabels)." << "\n"; - cout << "Example bin.seqs(fasta=amazon.fasta, group=amazon.groups, line=1-3-5, name=amazon.names)." << "\n"; - cout << "The default value for line and label are all lines in your inputfile." << "\n"; - cout << "The bin.seqs command outputs a .fasta file for each distance you specify appending the OTU number to each name." << "\n"; - cout << "If you provide a groupfile, then it also appends the sequences group to the name." << "\n"; - cout << "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile)." << "\n" << "\n"; - }else if (globaldata->helpRequest == "filter.seqs") { - cout << "The filter.seqs command reads a file containing sequences and creates a .filter and .filter.fasta file." << "\n"; - cout << "The filter.seqs command parameters are fasta, trump, soft, hard and vertical. " << "\n"; - cout << "The fasta parameter is required." << "\n"; - cout << "The trump parameter .... The default is ...." << "\n"; - cout << "The soft parameter .... The default is ...." << "\n"; - cout << "The hard parameter .... The default is ...." << "\n"; - cout << "The vertical parameter .... The default is ...." << "\n"; - cout << "The filter.seqs command should be in the following format: " << "\n"; - cout << "filter.seqs(fasta=yourFastaFile, trump=yourTrump, soft=yourSoft, hard=yourHard, vertical=yourVertical) " << "\n"; - cout << "Example filter.seqs(fasta=abrecovery.fasta, trump=..., soft=..., hard=..., vertical=...)." << "\n"; - cout << "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta)." << "\n" << "\n"; - }else if (globaldata->helpRequest == "get.oturep") { - cout << "The get.oturep command can only be executed after a successful read.dist command." << "\n"; - cout << "The get.oturep command parameters are list, fasta, name, group, line and label. The fasta and list parameters are required, and you may not use line and label at the same time." << "\n"; - cout << "The line and label allow you to select what distance levels you would like a output files created for, and are separated by dashes." << "\n"; - cout << "The get.oturep command should be in the following format: get.oturep(fasta=yourFastaFile, list=yourListFile, name=yourNamesFile, group=yourGroupFile, line=yourLines, label=yourLabels)." << "\n"; - cout << "Example get.oturep(fasta=amazon.fasta, list=amazon.fn.list, group=amazon.groups, line=1-3-5, name=amazon.names)." << "\n"; - cout << "The default value for line and label are all lines in your inputfile." << "\n"; - cout << "The get.oturep command outputs a .fastarep file for each distance you specify, selecting one OTU representative for each bin." << "\n"; - cout << "If you provide a groupfile, then it also appends the names of the groups present in that bin." << "\n"; - cout << "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile)." << "\n" << "\n"; - }else if (globaldata->helpRequest == "quit") { - cout << "The quit command will terminate mothur and should be in the following format: " << "\n"; - cout << "quit()" << "\n" << "\n"; - }else if (globaldata->helpRequest == "") { - validCommands->printCommands(cout); - cout << "For more information about a specific command type 'help(commandName)' i.e. 'help(read.dist)'" << endl; - }else { - cout << globaldata->helpRequest << " is not a valid command" << endl; - } + validCommands->printCommands(cout); + cout << "For more information about a specific command type 'commandName(help)' i.e. 'read.dist(help)'" << endl; + + delete validCommands; cout << endl << "For further assistance please refer to the Mothur manual on our wiki at http://schloss.micro.umass.edu/mothur/, or contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; return 0; diff --git a/helpcommand.h b/helpcommand.h index 58356d1..c303678 100644 --- a/helpcommand.h +++ b/helpcommand.h @@ -14,19 +14,19 @@ #include "command.hpp" #include "globaldata.hpp" #include "validcommands.h" -#include "validcalculator.h" class HelpCommand : public Command { public: - HelpCommand(); + HelpCommand(string); ~HelpCommand(); int execute(); + void help() {}; + private: GlobalData* globaldata; ValidCommands* validCommands; - ValidCalculators* validCalcs; private: diff --git a/libshuffcommand.cpp b/libshuffcommand.cpp index 49a8767..e7e1aa7 100644 --- a/libshuffcommand.cpp +++ b/libshuffcommand.cpp @@ -20,23 +20,77 @@ //********************************************************************************************************************** -LibShuffCommand::LibShuffCommand(){ +LibShuffCommand::LibShuffCommand(string option){ try { srand( (unsigned)time( NULL ) ); globaldata = GlobalData::getInstance(); - convert(globaldata->getCutOff(), cutOff); //get the cutoff - convert(globaldata->getIters(), iters); //get the number of iterations - convert(globaldata->getStep(), step); //get the step size for the discrete command - matrix = globaldata->gMatrix; //get the distance matrix - setGroups(); //set the groups to be analyzed + abort = false; + Groups.clear(); + + + //allow user to run help + if(option == "help") { help(); abort = true; } + + else { + //valid paramters for this command + string Array[] = {"iters","groups","step","form","cutoff"}; + vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + + parser = new OptionParser(); + parser->parse(option, parameters); delete parser; + + ValidParameters* validParameter = new ValidParameters(); + + //check to make sure all parameters are valid for command + for (it = parameters.begin(); it != parameters.end(); it++) { + if (validParameter->isValidParameter(it->first, myArray, it->second) != true) { abort = true; } + } + + //make sure the user has already run the read.dist command + if ((globaldata->gMatrix == NULL) || (globaldata->gGroupmap == NULL)) { + cout << "You must read in a matrix and groupfile using the read.dist command, before you use the libshuff command. " << endl; abort = true;; + } + + //check for optional parameter and set defaults + // ...at some point should added some additional type checking... + groups = validParameter->validFile(parameters, "groups", false); + if (groups == "not found") { groups = ""; savegroups = groups; } + else { + savegroups = groups; + splitAtDash(groups, Groups); + globaldata->Groups = Groups; + } + + string temp; + temp = validParameter->validFile(parameters, "iters", false); if (temp == "not found") { temp = "10000"; } + convert(temp, iters); + + temp = validParameter->validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "1.0"; } + convert(temp, cutOff); + + temp = validParameter->validFile(parameters, "step", false); if (temp == "not found") { temp = "0.01"; } + convert(temp, step); + + userform = validParameter->validFile(parameters, "form", false); if (userform == "not found") { userform = "integral"; } + + delete validParameter; + + if (abort == false) { + + matrix = globaldata->gMatrix; //get the distance matrix + setGroups(); //set the groups to be analyzed - if(globaldata->getForm() == "discrete"){ - form = new DLibshuff(matrix, iters, step, cutOff); - } - else{ - form = new SLibshuff(matrix, iters, cutOff); + if(userform == "discrete"){ + form = new DLibshuff(matrix, iters, step, cutOff); + } + else{ + form = new SLibshuff(matrix, iters, cutOff); + } + } + } + } catch(exception& e) { cout << "Standard Error: " << e.what() << " has occurred in the LibShuffCommand class Function LibShuffCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; @@ -48,12 +102,39 @@ LibShuffCommand::LibShuffCommand(){ } } +//********************************************************************************************************************** + +void LibShuffCommand::help(){ + try { + cout << "The libshuff command can only be executed after a successful read.dist command including a groupfile." << "\n"; + cout << "The libshuff command parameters are groups, iters, step, form and cutoff. No parameters are required." << "\n"; + cout << "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 2 valid groups." << "\n"; + cout << "The group names are separated by dashes. The iters parameter allows you to specify how many random matrices you would like compared to your matrix." << "\n"; + cout << "The step parameter allows you to specify change in distance you would like between each output if you are using the discrete form." << "\n"; + cout << "The form parameter allows you to specify if you would like to analyze your matrix using the discrete or integral form. Your options are integral or discrete." << "\n"; + cout << "The libshuff command should be in the following format: libshuff(groups=yourGroups, iters=yourIters, cutOff=yourCutOff, form=yourForm, step=yourStep)." << "\n"; + cout << "Example libshuff(groups=A-B-C, iters=500, form=discrete, step=0.01, cutOff=2.0)." << "\n"; + cout << "The default value for groups is all the groups in your groupfile, iters is 10000, cutoff is 1.0, form is integral and step is 0.01." << "\n"; + cout << "The libshuff command output two files: .coverage and .slsummary their descriptions are in the manual." << "\n"; + cout << "Note: No spaces between parameter labels (i.e. iters), '=' and parameters (i.e.yourIters)." << "\n" << "\n"; + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the LibShuffCommand class Function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the LibShuffCommand class function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} //********************************************************************************************************************** int LibShuffCommand::execute(){ try { - + + if (abort == true) { return 0; } + savedDXYValues = form->evaluateAll(); savedMinValues = form->getSavedMins(); @@ -88,6 +169,9 @@ int LibShuffCommand::execute(){ globaldata->Groups.clear(); delete form; + //delete globaldata's copy of the gmatrix to free up memory + delete globaldata->gMatrix; globaldata->gMatrix = NULL; + return 0; } catch(exception& e) { @@ -247,13 +331,13 @@ void LibShuffCommand::setGroups() { globaldata->Groups.push_back(globaldata->gGroupmap->namesOfGroups[i]); } } else { - if (globaldata->getGroups() != "all") { + if (savegroups != "all") { //check that groups are valid for (int i = 0; i < globaldata->Groups.size(); i++) { if (globaldata->gGroupmap->isValidGroup(globaldata->Groups[i]) != true) { cout << globaldata->Groups[i] << " is not a valid group, and will be disregarded." << endl; // erase the invalid group from globaldata->Groups - globaldata->Groups.erase (globaldata->Groups.begin()+i); + globaldata->Groups.erase(globaldata->Groups.begin()+i); } } diff --git a/libshuffcommand.h b/libshuffcommand.h index 07c1ba5..150bdb4 100644 --- a/libshuffcommand.h +++ b/libshuffcommand.h @@ -20,9 +20,10 @@ class GlobalData; class LibShuffCommand : public Command { public: - LibShuffCommand(); + LibShuffCommand(string); ~LibShuffCommand(){}; - int execute(); + int execute(); + void help(); private: vector groupNames; @@ -40,6 +41,12 @@ class LibShuffCommand : public Command { vector > pValueCounts; vector > savedDXYValues; vector > > savedMinValues; + OptionParser* parser; + map parameters; + map::iterator it; + bool abort; + string outputFile, groups, userform, savegroups; + vector Groups; //holds groups to be used }; #endif diff --git a/matrixoutputcommand.cpp b/matrixoutputcommand.cpp index 6602bf9..1e4de75 100644 --- a/matrixoutputcommand.cpp +++ b/matrixoutputcommand.cpp @@ -22,41 +22,116 @@ //********************************************************************************************************************** -MatrixOutputCommand::MatrixOutputCommand(){ +MatrixOutputCommand::MatrixOutputCommand(string option){ try { globaldata = GlobalData::getInstance(); - validCalculator = new ValidCalculators(); + abort = false; + allLines = 1; + lines.clear(); + labels.clear(); + Groups.clear(); + Estimators.clear(); + + //allow user to run help + if(option == "help") { validCalculator = new ValidCalculators(); help(); abort = true; } + + else { + //valid paramters for this command + string Array[] = {"line","label","calc","groups"}; + vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + + parser = new OptionParser(); + parser->parse(option, parameters); delete parser; + + ValidParameters* validParameter = new ValidParameters(); + + //check to make sure all parameters are valid for command + for (it = parameters.begin(); it != parameters.end(); it++) { + if (validParameter->isValidParameter(it->first, myArray, it->second) != true) { abort = true; } + } + + //make sure the user has already run the read.otu command + if (globaldata->getSharedFile() == "") { + if (globaldata->getListFile() == "") { cout << "You must read a list and a group, or a shared before you can use the dist.shared command." << endl; abort = true; } + else if (globaldata->getGroupFile() == "") { cout << "You must read a list and a group, or a shared before you can use the dist.shared command." << endl; abort = true; } + } + + //check for optional parameter and set defaults + // ...at some point should added some additional type checking... + line = validParameter->validFile(parameters, "line", false); + if (line == "not found") { line = ""; } + else { + if(line != "all") { splitAtDash(line, lines); allLines = 0; } + else { allLines = 1; } + } + + label = validParameter->validFile(parameters, "label", false); + if (label == "not found") { label = ""; } + else { + if(label != "all") { splitAtDash(label, labels); allLines = 0; } + else { allLines = 1; } + } + + //make sure user did not use both the line and label parameters + if ((line != "") && (label != "")) { cout << "You cannot use both the line and label parameters at the same time. " << endl; abort = true; } + //if the user has not specified any line or labels use the ones from read.otu + else if((line == "") && (label == "")) { + allLines = globaldata->allLines; + labels = globaldata->labels; + lines = globaldata->lines; + } + + groups = validParameter->validFile(parameters, "groups", false); + if (groups == "not found") { groups = ""; } + else { + splitAtDash(groups, Groups); + globaldata->Groups = Groups; + } + + calc = validParameter->validFile(parameters, "calc", false); + if (calc == "not found") { calc = "jclass-thetayc"; } + else { + if (calc == "default") { calc = "jclass-thetayc"; } + } + splitAtDash(calc, Estimators); + + delete validParameter; + + + if (abort == false) { - int i; - for (i=0; iEstimators.size(); i++) { - if (validCalculator->isValidCalculator("matrix", globaldata->Estimators[i]) == true) { - if (globaldata->Estimators[i] == "jabund") { - matrixCalculators.push_back(new JAbund()); - }else if (globaldata->Estimators[i] == "sorabund") { - matrixCalculators.push_back(new SorAbund()); - }else if (globaldata->Estimators[i] == "jclass") { - matrixCalculators.push_back(new Jclass()); - }else if (globaldata->Estimators[i] == "sorclass") { - matrixCalculators.push_back(new SorClass()); - }else if (globaldata->Estimators[i] == "jest") { - matrixCalculators.push_back(new Jest()); - }else if (globaldata->Estimators[i] == "sorest") { - matrixCalculators.push_back(new SorEst()); - }else if (globaldata->Estimators[i] == "thetayc") { - matrixCalculators.push_back(new ThetaYC()); - }else if (globaldata->Estimators[i] == "thetan") { - matrixCalculators.push_back(new ThetaN()); - }else if (globaldata->Estimators[i] == "morisitahorn") { - matrixCalculators.push_back(new MorHorn()); - }else if (globaldata->Estimators[i] == "braycurtis") { - matrixCalculators.push_back(new BrayCurtis()); + validCalculator = new ValidCalculators(); + + int i; + for (i=0; iisValidCalculator("matrix", Estimators[i]) == true) { + if (Estimators[i] == "jabund") { + matrixCalculators.push_back(new JAbund()); + }else if (Estimators[i] == "sorabund") { + matrixCalculators.push_back(new SorAbund()); + }else if (Estimators[i] == "jclass") { + matrixCalculators.push_back(new Jclass()); + }else if (Estimators[i] == "sorclass") { + matrixCalculators.push_back(new SorClass()); + }else if (Estimators[i] == "jest") { + matrixCalculators.push_back(new Jest()); + }else if (Estimators[i] == "sorest") { + matrixCalculators.push_back(new SorEst()); + }else if (Estimators[i] == "thetayc") { + matrixCalculators.push_back(new ThetaYC()); + }else if (Estimators[i] == "thetan") { + matrixCalculators.push_back(new ThetaN()); + }else if (Estimators[i] == "morisitahorn") { + matrixCalculators.push_back(new MorHorn()); + }else if (Estimators[i] == "braycurtis") { + matrixCalculators.push_back(new BrayCurtis()); + } + } } + } } - //reset calc for next command - globaldata->setCalc(""); - } catch(exception& e) { cout << "Standard Error: " << e.what() << " has occurred in the MatrixOutputCommand class Function MatrixOutputCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; @@ -67,17 +142,49 @@ MatrixOutputCommand::MatrixOutputCommand(){ exit(1); } } + +//********************************************************************************************************************** + +void MatrixOutputCommand::help(){ + try { + cout << "The dist.shared command can only be executed after a successful read.otu command." << "\n"; + cout << "The dist.shared command parameters are groups, calc, line and label. The calc parameter is required, and you may not use line and label at the same time." << "\n"; + cout << "The groups parameter allows you to specify which of the groups in your groupfile you would like included used." << "\n"; + cout << "The group names are separated by dashes. The line and label allow you to select what distance levels you would like distance matrices created for, and are also separated by dashes." << "\n"; + cout << "The dist.shared command should be in the following format: dist.shared(groups=yourGroups, calc=yourCalcs, line=yourLines, label=yourLabels)." << "\n"; + cout << "Example dist.shared(groups=A-B-C, line=1-3-5, calc=jabund-sorabund)." << "\n"; + cout << "The default value for groups is all the groups in your groupfile." << "\n"; + cout << "The default value for calc is jclass and thetayc." << "\n"; + validCalculator->printCalc("matrix", cout); + cout << "The dist.shared command outputs a .dist file for each calculator you specify at each distance you choose." << "\n"; + cout << "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups)." << "\n" << "\n"; + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the MatrixOutputCommand class Function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the MatrixOutputCommand class function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} + + //********************************************************************************************************************** MatrixOutputCommand::~MatrixOutputCommand(){ delete input; delete read; + delete validCalculator; } //********************************************************************************************************************** int MatrixOutputCommand::execute(){ try { + + if (abort == true) { return 0; } + int count = 1; //if the users entered no valid calculators don't execute command @@ -93,17 +200,17 @@ int MatrixOutputCommand::execute(){ //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. set processedLabels; - set userLabels = globaldata->labels; - set userLines = globaldata->lines; + set userLabels = labels; + set userLines = lines; if (lookup.size() < 2) { cout << "You have not provided enough valid groups. I cannot run the command." << endl; return 0;} - numGroups = globaldata->Groups.size(); + numGroups = lookup.size(); //as long as you are not at the end of the file or done wih the lines you want - while((lookup[0] != NULL) && ((globaldata->allLines == 1) || (userLabels.size() != 0) || (userLines.size() != 0))) { + while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0) || (userLines.size() != 0))) { - if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(lookup[0]->getLabel()) == 1){ + if(allLines == 1 || lines.count(count) == 1 || labels.count(lookup[0]->getLabel()) == 1){ cout << lookup[0]->getLabel() << '\t' << count << endl; process(lookup); @@ -152,7 +259,7 @@ int MatrixOutputCommand::execute(){ //reset groups parameter - globaldata->Groups.clear(); globaldata->setGroups(""); + globaldata->Groups.clear(); return 0; } diff --git a/matrixoutputcommand.h b/matrixoutputcommand.h index 547b567..d609932 100644 --- a/matrixoutputcommand.h +++ b/matrixoutputcommand.h @@ -15,6 +15,8 @@ #include "readotu.h" #include "validcalculator.h" +// aka. dist.shared() + /* This command create a tree file for each similarity calculator at distance level, using various calculators to find the similiarity between groups. The user can select the lines or labels they wish to use as well as the groups they would like included. They can also use as many or as few calculators as they wish. */ @@ -24,9 +26,10 @@ class GlobalData; class MatrixOutputCommand : public Command { public: - MatrixOutputCommand(); + MatrixOutputCommand(string); ~MatrixOutputCommand(); int execute(); + void help(); private: void printSims(ostream&); @@ -42,6 +45,14 @@ private: string exportFileName; int numGroups; ofstream out; + OptionParser* parser; + map parameters; + map::iterator it; + bool abort, allLines; + set lines; //hold lines to be used + set labels; //holds labels to be used + string outputFile, calc, groups, line, label; + vector Estimators, Groups; //holds estimators to be used }; diff --git a/nocommands.cpp b/nocommands.cpp index 974fce4..9ac565b 100644 --- a/nocommands.cpp +++ b/nocommands.cpp @@ -8,10 +8,11 @@ */ #include "nocommands.h" +#include "validcommands.h" //********************************************************************************************************************** -NoCommand::NoCommand(){} +NoCommand::NoCommand(string option){} //********************************************************************************************************************** @@ -22,7 +23,11 @@ NoCommand::~NoCommand(){} int NoCommand::execute(){ //Could choose to give more help here?fdsah cout << "Invalid command." << "\n"; - cout << "For more information on command parameters use the help() command." << "\n"; + + ValidCommands* valid = new ValidCommands(); + valid->printCommands(cout); + delete valid; + return 0; } diff --git a/nocommands.h b/nocommands.h index b529392..2da23d9 100644 --- a/nocommands.h +++ b/nocommands.h @@ -16,9 +16,10 @@ class NoCommand : public Command { public: - NoCommand(); + NoCommand(string); ~NoCommand(); int execute(); + void help() {} private: diff --git a/optionparser.cpp b/optionparser.cpp new file mode 100644 index 0000000..7ff6ae7 --- /dev/null +++ b/optionparser.cpp @@ -0,0 +1,40 @@ +/* + * optionparser.cpp + * Mothur + * + * Created by Sarah Westcott on 6/8/09. + * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. + * + */ + +#include "optionparser.h" + +/***********************************************************************/ +void OptionParser::parse(string option, map& container) { + try { + + if (option != "") { + + string key, value; + //reads in parameters and values + while((option.find_first_of(',') != -1)) { //while there are parameters + splitAtComma(value, option); + splitAtEquals(key, value); + container[key] = value; + } + + //in case there is no comma and to get last parameter after comma + splitAtEquals(key, option); + container[key] = option; + } + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the OptionParser class Function parse. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the OptionParser class function parse. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} +/***********************************************************************/ \ No newline at end of file diff --git a/optionparser.h b/optionparser.h new file mode 100644 index 0000000..b4d0dcb --- /dev/null +++ b/optionparser.h @@ -0,0 +1,30 @@ +#ifndef OPTIONPARSER_H +#define OPTIONPARSER_H + +/* + * optionparser.h + * Mothur + * + * Created by Sarah Westcott on 6/8/09. + * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. + * + */ + + + +#include "mothur.h" + + +/***********************************************************************/ + +class OptionParser { + public: + OptionParser() {} + ~OptionParser() {} + void parse(string, map&); //pass it an option string and a container + //fills the container key=parameter name, value=parameter value +}; + +/***********************************************************************/ + +#endif diff --git a/parselistcommand.h b/parselistcommand.h index 4aa4ccd..9f88bc6 100644 --- a/parselistcommand.h +++ b/parselistcommand.h @@ -31,7 +31,8 @@ class ParseListCommand : public Command { public: ParseListCommand(); ~ParseListCommand(); - int execute(); + int execute(); + void help() {} private: GlobalData* globaldata; diff --git a/parsimonycommand.cpp b/parsimonycommand.cpp index 56305e2..e081279 100644 --- a/parsimonycommand.cpp +++ b/parsimonycommand.cpp @@ -10,37 +10,80 @@ #include "parsimonycommand.h" /***********************************************************/ -ParsimonyCommand::ParsimonyCommand() { +ParsimonyCommand::ParsimonyCommand(string option) { try { globaldata = GlobalData::getInstance(); + abort = false; + Groups.clear(); - //randomtree will tell us if user had their own treefile or if they just want the random distribution - randomtree = globaldata->getRandomTree(); + //allow user to run help + if(option == "help") { help(); abort = true; } - //user has entered their own tree - if (randomtree == "") { - T = globaldata->gTree; - tmap = globaldata->gTreemap; - output = new ColumnFile(globaldata->getTreeFile() + ".parsimony"); - sumFile = globaldata->getTreeFile() + ".psummary"; - openOutputFile(sumFile, outSum); - }else { //user wants random distribution - savetmap = globaldata->gTreemap; - getUserInput(); - output = new ColumnFile(randomtree); - } - - //set users groups to analyze - util = new SharedUtil(); - util->setGroups(globaldata->Groups, tmap->namesOfGroups, allGroups, numGroups, "unweighted"); //sets the groups the user wants to analyze - util->getCombos(groupComb, globaldata->Groups, numComp); - globaldata->setGroups(""); - - if (numGroups == 1) { numComp++; groupComb.push_back(allGroups); } + else { + //valid paramters for this command + string Array[] = {"random","groups","iters"}; + vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + + parser = new OptionParser(); + parser->parse(option, parameters); delete parser; + + ValidParameters* validParameter = new ValidParameters(); - convert(globaldata->getIters(), iters); //how many random trees to generate - pars = new Parsimony(tmap); - counter = 0; + //check to make sure all parameters are valid for command + for (it4 = parameters.begin(); it4 != parameters.end(); it4++) { + if (validParameter->isValidParameter(it4->first, myArray, it4->second) != true) { abort = true; } + } + + randomtree = validParameter->validFile(parameters, "random", false); if (randomtree == "not found") { randomtree = ""; } + + //are you trying to use parsimony without reading a tree or saying you want random distribution + if (randomtree == "") { + if (globaldata->gTree.size() == 0) { + cout << "You must read a treefile and a groupfile or set the randomtree parameter to the output filename you wish, before you may execute the parsimony command." << endl; abort = true; } + } + + //check for optional parameter and set defaults + // ...at some point should added some additional type checking... + groups = validParameter->validFile(parameters, "groups", false); + if (groups == "not found") { groups = ""; } + else { + splitAtDash(groups, Groups); + globaldata->Groups = Groups; + } + + itersString = validParameter->validFile(parameters, "iters", false); if (itersString == "not found") { itersString = "1000"; } + convert(itersString, iters); + + delete validParameter; + + if (abort == false) { + //randomtree will tell us if user had their own treefile or if they just want the random distribution + //user has entered their own tree + if (randomtree == "") { + T = globaldata->gTree; + tmap = globaldata->gTreemap; + output = new ColumnFile(globaldata->getTreeFile() + ".parsimony", itersString); + sumFile = globaldata->getTreeFile() + ".psummary"; + openOutputFile(sumFile, outSum); + }else { //user wants random distribution + savetmap = globaldata->gTreemap; + getUserInput(); + output = new ColumnFile(randomtree, itersString); + } + + //set users groups to analyze + util = new SharedUtil(); + util->setGroups(globaldata->Groups, tmap->namesOfGroups, allGroups, numGroups, "unweighted"); //sets the groups the user wants to analyze + util->getCombos(groupComb, globaldata->Groups, numComp); + + if (numGroups == 1) { numComp++; groupComb.push_back(allGroups); } + + pars = new Parsimony(tmap); + counter = 0; + + } + + } } catch(exception& e) { @@ -52,9 +95,38 @@ ParsimonyCommand::ParsimonyCommand() { exit(1); } } + +//********************************************************************************************************************** + +void ParsimonyCommand::help(){ + try { + cout << "The parsimony command can only be executed after a successful read.tree command, unless you use the random parameter." << "\n"; + cout << "The parsimony command parameters are random, groups and iters. No parameters are required." << "\n"; + cout << "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 1 valid group." << "\n"; + cout << "The group names are separated by dashes. The iters parameter allows you to specify how many random trees you would like compared to your tree." << "\n"; + cout << "The parsimony command should be in the following format: parsimony(random=yourOutputFilename, groups=yourGroups, iters=yourIters)." << "\n"; + cout << "Example parsimony(random=out, iters=500)." << "\n"; + cout << "The default value for random is "" (meaning you want to use the trees in your inputfile, randomtree=out means you just want the random distribution of trees outputted to out.rd_parsimony)," << "\n"; + cout << "and iters is 1000. The parsimony command output two files: .parsimony and .psummary their descriptions are in the manual." << "\n"; + cout << "Note: No spaces between parameter labels (i.e. random), '=' and parameters (i.e.yourOutputFilename)." << "\n" << "\n"; + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the ParsimonyCommand class Function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the ParsimonyCommand class function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} + + /***********************************************************/ int ParsimonyCommand::execute() { try { + + if (abort == true) { return 0; } + Progress* reading; reading = new Progress("Comparing to random:", iters); @@ -195,8 +267,6 @@ int ParsimonyCommand::execute() { globaldata->gTreemap = savetmap; } - //reset randomTree parameter to "" - globaldata->setRandomTree(""); //reset groups parameter globaldata->Groups.clear(); @@ -264,11 +334,11 @@ void ParsimonyCommand::printUSummaryFile() { for (int i = 0; i< T.size(); i++) { for(int a = 0; a < numComp; a++) { if (UScoreSig[a][i] > (1/(float)iters)) { - outSum << setprecision(6) << i+1 << '\t' << groupComb[a] << '\t' << userTreeScores[a][i] << setprecision(globaldata->getIters().length()) << '\t' << UScoreSig[a][i] << endl; - cout << setprecision(6) << i+1 << '\t' << groupComb[a] << '\t' << userTreeScores[a][i] << setprecision(globaldata->getIters().length()) << '\t' << UScoreSig[a][i] << endl; + outSum << setprecision(6) << i+1 << '\t' << groupComb[a] << '\t' << userTreeScores[a][i] << setprecision(itersString.length()) << '\t' << UScoreSig[a][i] << endl; + cout << setprecision(6) << i+1 << '\t' << groupComb[a] << '\t' << userTreeScores[a][i] << setprecision(itersString.length()) << '\t' << UScoreSig[a][i] << endl; }else { - outSum << setprecision(6) << i+1 << '\t' << groupComb[a] << '\t' << userTreeScores[a][i] << setprecision(globaldata->getIters().length()) << '\t' << "<" << (1/float(iters)) << endl; - cout << setprecision(6) << i+1 << '\t' << groupComb[a] << '\t' << userTreeScores[a][i] << setprecision(globaldata->getIters().length()) << '\t' << "<" << (1/float(iters)) << endl; + outSum << setprecision(6) << i+1 << '\t' << groupComb[a] << '\t' << userTreeScores[a][i] << setprecision(itersString.length()) << '\t' << "<" << (1/float(iters)) << endl; + cout << setprecision(6) << i+1 << '\t' << groupComb[a] << '\t' << userTreeScores[a][i] << setprecision(itersString.length()) << '\t' << "<" << (1/float(iters)) << endl; } } } @@ -323,6 +393,7 @@ void ParsimonyCommand::getUserInput() { //memory leak prevention //if (globaldata->gTreemap != NULL) { delete globaldata->gTreemap; } globaldata->gTreemap = tmap; + globaldata->Treenames = tmap->namesOfSeqs; } catch(exception& e) { diff --git a/parsimonycommand.h b/parsimonycommand.h index 603506f..bbcd6af 100644 --- a/parsimonycommand.h +++ b/parsimonycommand.h @@ -22,9 +22,10 @@ class GlobalData; class ParsimonyCommand : public Command { public: - ParsimonyCommand(); + ParsimonyCommand(string); ~ParsimonyCommand() { delete pars; delete util; delete output; } int execute(); + void help(); private: GlobalData* globaldata; @@ -55,6 +56,13 @@ class ParsimonyCommand : public Command { ofstream out, outSum; ifstream inFile; + OptionParser* parser; + map parameters; + map::iterator it4; + bool abort; + string groups, itersString; + vector Groups; //holds groups to be used + void printParsimonyFile(); void printUSummaryFile(); void getUserInput(); diff --git a/quitcommand.cpp b/quitcommand.cpp index 6b90f4b..87c2fa0 100644 --- a/quitcommand.cpp +++ b/quitcommand.cpp @@ -11,7 +11,31 @@ //********************************************************************************************************************** -QuitCommand::QuitCommand(){} +QuitCommand::QuitCommand(string option){ + abort = false; + + //allow user to run help + if(option == "help") { help(); abort = true; } + + else if (option != "") { cout << "There are no valid parameters for the quit command." << endl; abort = true; } + +} +//********************************************************************************************************************** + +void QuitCommand::help(){ + try { + cout << "The quit command will terminate mothur and should be in the following format: " << "\n"; + cout << "quit() or quit" << "\n" << "\n"; + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the QuitCommand class Function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the QuitCommand class function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} //********************************************************************************************************************** @@ -20,6 +44,7 @@ QuitCommand::~QuitCommand(){} //********************************************************************************************************************** int QuitCommand::execute(){ + if (abort == true) { return 0; } return 1; } diff --git a/quitcommand.h b/quitcommand.h index 7f7b7e2..8b89306 100644 --- a/quitcommand.h +++ b/quitcommand.h @@ -19,12 +19,13 @@ class QuitCommand : public Command { public: - QuitCommand(); + QuitCommand(string); ~QuitCommand(); int execute(); + void help(); private: - + bool abort; }; #endif diff --git a/rarefactcommand.cpp b/rarefactcommand.cpp index 114c8d0..680f68c 100644 --- a/rarefactcommand.cpp +++ b/rarefactcommand.cpp @@ -22,48 +22,118 @@ //********************************************************************************************************************** -RareFactCommand::RareFactCommand(){ +RareFactCommand::RareFactCommand(string option){ try { globaldata = GlobalData::getInstance(); - string fileNameRoot; - fileNameRoot = getRootName(globaldata->inputFileName); - convert(globaldata->getFreq(), freq); - convert(globaldata->getIters(), nIters); - validCalculator = new ValidCalculators(); + abort = false; + allLines = 1; + lines.clear(); + labels.clear(); + Estimators.clear(); - int i; - for (i=0; iEstimators.size(); i++) { - if (validCalculator->isValidCalculator("rarefaction", globaldata->Estimators[i]) == true) { - if (globaldata->Estimators[i] == "sobs") { - rDisplays.push_back(new RareDisplay(new Sobs(), new ThreeColumnFile(fileNameRoot+"rarefaction"))); - }else if (globaldata->Estimators[i] == "chao") { - rDisplays.push_back(new RareDisplay(new Chao1(), new ThreeColumnFile(fileNameRoot+"r_chao"))); - }else if (globaldata->Estimators[i] == "ace") { - convert(globaldata->getAbund(), abund); - if(abund < 5) - abund = 10; - rDisplays.push_back(new RareDisplay(new Ace(abund), new ThreeColumnFile(fileNameRoot+"r_ace"))); - }else if (globaldata->Estimators[i] == "jack") { - rDisplays.push_back(new RareDisplay(new Jackknife(), new ThreeColumnFile(fileNameRoot+"r_jack"))); - }else if (globaldata->Estimators[i] == "shannon") { - rDisplays.push_back(new RareDisplay(new Shannon(), new ThreeColumnFile(fileNameRoot+"r_shannon"))); - }else if (globaldata->Estimators[i] == "npshannon") { - rDisplays.push_back(new RareDisplay(new NPShannon(), new ThreeColumnFile(fileNameRoot+"r_npshannon"))); - }else if (globaldata->Estimators[i] == "simpson") { - rDisplays.push_back(new RareDisplay(new Simpson(), new ThreeColumnFile(fileNameRoot+"r_simpson"))); - }else if (globaldata->Estimators[i] == "bootstrap") { - rDisplays.push_back(new RareDisplay(new Bootstrap(), new ThreeColumnFile(fileNameRoot+"r_bootstrap"))); - }else if (globaldata->Estimators[i] == "coverage") { - rDisplays.push_back(new RareDisplay(new Coverage(), new ThreeColumnFile(fileNameRoot+"r_coverage"))); - }else if (globaldata->Estimators[i] == "nseqs") { - rDisplays.push_back(new RareDisplay(new NSeqs(), new ThreeColumnFile(fileNameRoot+"r_nseqs"))); + //allow user to run help + if(option == "help") { validCalculator = new ValidCalculators(); help(); abort = true; } + + else { + //valid paramters for this command + string Array[] = {"iters","freq","line","label","calc","abund"}; + vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + + parser = new OptionParser(); + parser->parse(option, parameters); delete parser; + + ValidParameters* validParameter = new ValidParameters(); + + //check to make sure all parameters are valid for command + for (it = parameters.begin(); it != parameters.end(); it++) { + if (validParameter->isValidParameter(it->first, myArray, it->second) != true) { abort = true; } + } + + //make sure the user has already run the read.otu command + if ((globaldata->getListFile() == "") && (globaldata->getRabundFile() == "") && (globaldata->getSabundFile() == "")) { cout << "You must read a list, sabund or rabund before you can use the rarefaction.single command." << endl; abort = true; } + + //check for optional parameter and set defaults + // ...at some point should added some additional type checking... + line = validParameter->validFile(parameters, "line", false); + if (line == "not found") { line = ""; } + else { + if(line != "all") { splitAtDash(line, lines); allLines = 0; } + else { allLines = 1; } + } + + label = validParameter->validFile(parameters, "label", false); + if (label == "not found") { label = ""; } + else { + if(label != "all") { splitAtDash(label, labels); allLines = 0; } + else { allLines = 1; } + } + + //make sure user did not use both the line and label parameters + if ((line != "") && (label != "")) { cout << "You cannot use both the line and label parameters at the same time. " << endl; abort = true; } + //if the user has not specified any line or labels use the ones from read.otu + else if((line == "") && (label == "")) { + allLines = globaldata->allLines; + labels = globaldata->labels; + lines = globaldata->lines; + } + + calc = validParameter->validFile(parameters, "calc", false); + if (calc == "not found") { calc = "sobs"; } + else { + if (calc == "default") { calc = "sobs"; } + } + splitAtDash(calc, Estimators); + + string temp; + temp = validParameter->validFile(parameters, "freq", false); if (temp == "not found") { temp = "100"; } + convert(temp, freq); + + temp = validParameter->validFile(parameters, "abund", false); if (temp == "not found") { temp = "10"; } + convert(temp, abund); + + temp = validParameter->validFile(parameters, "iters", false); if (temp == "not found") { temp = "1000"; } + convert(temp, nIters); + + delete validParameter; + + if (abort == false) { + + string fileNameRoot = getRootName(globaldata->inputFileName); + int i; + validCalculator = new ValidCalculators(); + + + for (i=0; iisValidCalculator("rarefaction", Estimators[i]) == true) { + if (Estimators[i] == "sobs") { + rDisplays.push_back(new RareDisplay(new Sobs(), new ThreeColumnFile(fileNameRoot+"rarefaction"))); + }else if (Estimators[i] == "chao") { + rDisplays.push_back(new RareDisplay(new Chao1(), new ThreeColumnFile(fileNameRoot+"r_chao"))); + }else if (Estimators[i] == "ace") { + if(abund < 5) + abund = 10; + rDisplays.push_back(new RareDisplay(new Ace(abund), new ThreeColumnFile(fileNameRoot+"r_ace"))); + }else if (Estimators[i] == "jack") { + rDisplays.push_back(new RareDisplay(new Jackknife(), new ThreeColumnFile(fileNameRoot+"r_jack"))); + }else if (Estimators[i] == "shannon") { + rDisplays.push_back(new RareDisplay(new Shannon(), new ThreeColumnFile(fileNameRoot+"r_shannon"))); + }else if (Estimators[i] == "npshannon") { + rDisplays.push_back(new RareDisplay(new NPShannon(), new ThreeColumnFile(fileNameRoot+"r_npshannon"))); + }else if (Estimators[i] == "simpson") { + rDisplays.push_back(new RareDisplay(new Simpson(), new ThreeColumnFile(fileNameRoot+"r_simpson"))); + }else if (Estimators[i] == "bootstrap") { + rDisplays.push_back(new RareDisplay(new Bootstrap(), new ThreeColumnFile(fileNameRoot+"r_bootstrap"))); + }else if (Estimators[i] == "coverage") { + rDisplays.push_back(new RareDisplay(new Coverage(), new ThreeColumnFile(fileNameRoot+"r_coverage"))); + }else if (Estimators[i] == "nseqs") { + rDisplays.push_back(new RareDisplay(new NSeqs(), new ThreeColumnFile(fileNameRoot+"r_nseqs"))); + } + } } } + } - //reset calc for next command - globaldata->setCalc(""); - } catch(exception& e) { cout << "Standard Error: " << e.what() << " has occurred in the RareFactCommand class Function RareFactCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; @@ -75,6 +145,30 @@ RareFactCommand::RareFactCommand(){ } } +//********************************************************************************************************************** + +void RareFactCommand::help(){ + try { + cout << "The rarefaction.single command can only be executed after a successful read.otu WTIH ONE EXECEPTION." << "\n"; + cout << "The rarefaction.single command can be executed after a successful cluster command. It will use the .list file from the output of the cluster." << "\n"; + cout << "The rarefaction.single command parameters are label, line, iters, freq, calc and abund. No parameters are required, but you may not use " << "\n"; + cout << "both the line and label parameters at the same time. The rarefaction.single command should be in the following format: " << "\n"; + cout << "rarefaction.single(label=yourLabel, line=yourLines, iters=yourIters, freq=yourFreq, calc=yourEstimators)." << "\n"; + cout << "Example rarefaction.single(label=unique-.01-.03, line=0-5-10, iters=10000, freq=10, calc=sobs-rchao-race-rjack-rbootstrap-rshannon-rnpshannon-rsimpson)." << "\n"; + cout << "The default values for iters is 1000, freq is 100, and calc is rarefaction which calculates the rarefaction curve for the observed richness." << "\n"; + validCalculator->printCalc("rarefaction", cout); + cout << "The label and line parameters are used to analyze specific lines in your input." << "\n"; + cout << "Note: No spaces between parameter labels (i.e. freq), '=' and parameters (i.e.yourFreq)." << "\n" << "\n"; + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the RareFactCommand class Function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the RareFactCommand class function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} //********************************************************************************************************************** @@ -83,12 +177,16 @@ RareFactCommand::~RareFactCommand(){ delete input; delete rCurve; delete read; + delete validCalculator; } //********************************************************************************************************************** int RareFactCommand::execute(){ try { + + if (abort == true) { return 0; } + int count = 1; //if the users entered no valid calculators don't execute command @@ -103,13 +201,13 @@ int RareFactCommand::execute(){ //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. set processedLabels; - set userLabels = globaldata->labels; - set userLines = globaldata->lines; + set userLabels = labels; + set userLines = lines; //as long as you are not at the end of the file or done wih the lines you want - while((order != NULL) && ((globaldata->allLines == 1) || (userLabels.size() != 0) || (userLines.size() != 0))) { + while((order != NULL) && ((allLines == 1) || (userLabels.size() != 0) || (userLines.size() != 0))) { - if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(order->getLabel()) == 1){ + if(allLines == 1 || lines.count(count) == 1 || labels.count(order->getLabel()) == 1){ rCurve = new Rarefact(order, rDisplays); rCurve->getCurve(freq, nIters); diff --git a/rarefactcommand.h b/rarefactcommand.h index 5ac970e..ea867f0 100644 --- a/rarefactcommand.h +++ b/rarefactcommand.h @@ -37,9 +37,10 @@ class GlobalData; class RareFactCommand : public Command { public: - RareFactCommand(); + RareFactCommand(string); ~RareFactCommand(); - int execute(); + int execute(); + void help(); private: GlobalData* globaldata; @@ -50,6 +51,15 @@ private: ValidCalculators* validCalculator; Rarefact* rCurve; int freq, nIters, abund; + OptionParser* parser; + map parameters; + map::iterator it; + bool abort, allLines; + set lines; //hold lines to be used + set labels; //holds labels to be used + string line, label, calc; + vector Estimators; + }; #endif diff --git a/rarefactsharedcommand.cpp b/rarefactsharedcommand.cpp index c930025..66ef6dd 100644 --- a/rarefactsharedcommand.cpp +++ b/rarefactsharedcommand.cpp @@ -13,30 +13,105 @@ //********************************************************************************************************************** -RareFactSharedCommand::RareFactSharedCommand(){ +RareFactSharedCommand::RareFactSharedCommand(string option){ try { globaldata = GlobalData::getInstance(); - string fileNameRoot; - fileNameRoot = getRootName(globaldata->inputFileName); - format = globaldata->getFormat(); - convert(globaldata->getFreq(), freq); - convert(globaldata->getIters(), nIters); - validCalculator = new ValidCalculators(); + abort = false; + allLines = 1; + lines.clear(); + labels.clear(); + Estimators.clear(); + + //allow user to run help + if(option == "help") { validCalculator = new ValidCalculators(); help(); abort = true; } + + else { + //valid paramters for this command + string Array[] = {"iters","line","label","calc","groups"}; + vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + + parser = new OptionParser(); + parser->parse(option, parameters); delete parser; + + ValidParameters* validParameter = new ValidParameters(); + + //check to make sure all parameters are valid for command + for (it = parameters.begin(); it != parameters.end(); it++) { + if (validParameter->isValidParameter(it->first, myArray, it->second) != true) { abort = true; } + } + + //make sure the user has already run the read.otu command + if (globaldata->getSharedFile() == "") { + if (globaldata->getListFile() == "") { cout << "You must read a list and a group, or a shared before you can use the collect.shared command." << endl; abort = true; } + else if (globaldata->getGroupFile() == "") { cout << "You must read a list and a group, or a shared before you can use the collect.shared command." << endl; abort = true; } + } + + + //check for optional parameter and set defaults + // ...at some point should added some additional type checking... + line = validParameter->validFile(parameters, "line", false); + if (line == "not found") { line = ""; } + else { + if(line != "all") { splitAtDash(line, lines); allLines = 0; } + else { allLines = 1; } + } + + label = validParameter->validFile(parameters, "label", false); + if (label == "not found") { label = ""; } + else { + if(label != "all") { splitAtDash(label, labels); allLines = 0; } + else { allLines = 1; } + } + + //make sure user did not use both the line and label parameters + if ((line != "") && (label != "")) { cout << "You cannot use both the line and label parameters at the same time. " << endl; abort = true; } + //if the user has not specified any line or labels use the ones from read.otu + else if((line == "") && (label == "")) { + allLines = globaldata->allLines; + labels = globaldata->labels; + lines = globaldata->lines; + } - int i; - for (i=0; iEstimators.size(); i++) { - if (validCalculator->isValidCalculator("sharedrarefaction", globaldata->Estimators[i]) == true) { - if (globaldata->Estimators[i] == "sharedobserved") { - rDisplays.push_back(new RareDisplay(new SharedSobs(), new SharedThreeColumnFile(fileNameRoot+"shared.rarefaction", ""))); - }else if (globaldata->Estimators[i] == "sharednseqs") { - rDisplays.push_back(new RareDisplay(new SharedNSeqs(), new SharedThreeColumnFile(fileNameRoot+"shared.r_nseqs", ""))); + calc = validParameter->validFile(parameters, "calc", false); + if (calc == "not found") { calc = "sharedobserved"; } + else { + if (calc == "default") { calc = "sharedobserved"; } + } + splitAtDash(calc, Estimators); + + groups = validParameter->validFile(parameters, "groups", false); + if (groups == "not found") { groups = ""; } + else { + splitAtDash(groups, Groups); + globaldata->Groups = Groups; + } + + string temp; + temp = validParameter->validFile(parameters, "iters", false); if (temp == "not found") { temp = "1000"; } + convert(temp, nIters); + + delete validParameter; + + if (abort == false) { + + string fileNameRoot = getRootName(globaldata->inputFileName); + format = globaldata->getFormat(); + int i; + + validCalculator = new ValidCalculators(); + + for (i=0; iisValidCalculator("sharedrarefaction", Estimators[i]) == true) { + if (Estimators[i] == "sharedobserved") { + rDisplays.push_back(new RareDisplay(new SharedSobs(), new SharedThreeColumnFile(fileNameRoot+"shared.rarefaction", ""))); + }else if (Estimators[i] == "sharednseqs") { + rDisplays.push_back(new RareDisplay(new SharedNSeqs(), new SharedThreeColumnFile(fileNameRoot+"shared.r_nseqs", ""))); + } + } } - } + } - - //reset calc for next command - globaldata->setCalc(""); } catch(exception& e) { @@ -52,16 +127,46 @@ RareFactSharedCommand::RareFactSharedCommand(){ //********************************************************************************************************************** +void RareFactSharedCommand::help(){ + try { + cout << "The rarefaction.shared command can only be executed after a successful read.otu command." << "\n"; + cout << "The rarefaction.shared command parameters are label, line, iters, groups and calc. No parameters are required, but you may not use " << "\n"; + cout << "both the line and label parameters at the same time. The rarefaction command should be in the following format: " << "\n"; + cout << "rarefaction.shared(label=yourLabel, line=yourLines, iters=yourIters, calc=yourEstimators, groups=yourGroups)." << "\n"; + cout << "Example rarefaction.shared(label=unique-.01-.03, line=0-5-10, iters=10000, groups=B-C, calc=sharedobserved)." << "\n"; + cout << "The default values for iters is 1000, freq is 100, and calc is sharedobserved which calculates the shared rarefaction curve for the observed richness." << "\n"; + cout << "The default value for groups is all the groups in your groupfile." << "\n"; + validCalculator->printCalc("sharedrarefaction", cout); + cout << "The label and line parameters are used to analyze specific lines in your input." << "\n"; + cout << "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 2 valid groups." << "\n"; + cout << "Note: No spaces between parameter labels (i.e. freq), '=' and parameters (i.e.yourFreq)." << "\n" << "\n"; + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the RareFactSharedCommand class Function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the RareFactSharedCommand class function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} + +//********************************************************************************************************************** + RareFactSharedCommand::~RareFactSharedCommand(){ delete input; delete rCurve; delete read; + delete validCalculator; } //********************************************************************************************************************** int RareFactSharedCommand::execute(){ try { + + if (abort == true) { return 0; } + int count = 1; //if the users entered no valid calculators don't execute command @@ -82,13 +187,13 @@ int RareFactSharedCommand::execute(){ //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. set processedLabels; - set userLabels = globaldata->labels; - set userLines = globaldata->lines; + set userLabels = labels; + set userLines = lines; //as long as you are not at the end of the file or done wih the lines you want - while((lookup[0] != NULL) && ((globaldata->allLines == 1) || (userLabels.size() != 0) || (userLines.size() != 0))) { + while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0) || (userLines.size() != 0))) { - if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(lookup[0]->getLabel()) == 1){ + if(allLines == 1 || lines.count(count) == 1 || labels.count(lookup[0]->getLabel()) == 1){ rCurve = new Rarefact(lookup, rDisplays); rCurve->getSharedCurve(freq, nIters); @@ -145,7 +250,7 @@ int RareFactSharedCommand::execute(){ for(int i=0;iGroups.clear(); globaldata->setGroups(""); + globaldata->Groups.clear(); return 0; } diff --git a/rarefactsharedcommand.h b/rarefactsharedcommand.h index 1935398..d81ff6d 100644 --- a/rarefactsharedcommand.h +++ b/rarefactsharedcommand.h @@ -19,11 +19,11 @@ /* The rarefaction.shared() command: The rarefaction command generates a rarefaction curve from a given file representing several groups. The rarefaction.shared command can only be executed after a successful read.shared command. It outputs a file for each estimator you choose to use. - The rarefaction.shared command parameters are label, line, iters, jumble and sharedrarefaction. + The rarefaction.shared command parameters are label, line, iters and sharedrarefaction. No parameters are required, but you may not use both the line and label parameters at the same time. The rarefaction command should be in the following format: rarefaction.shared(label=yourLabel, line=yourLines, iters=yourIters, - jumble= yourJumble, sharedrarefaction=yourEstimators). Example rarefaction.shared(label=unique-.01-.03, line=0,5,10, iters=10000, - jumble=1, sharedrarefaction =sharedobserved). The default values for jumble is 0 (meaning don’t jumble, if it’s set to 1 then it will jumble), + sharedrarefaction=yourEstimators). Example rarefaction.shared(label=unique-.01-.03, line=0,5,10, iters=10000, + sharedrarefaction =sharedobserved). The default values for iters is 1000 and sharedrarefaction is sharedobserved which calculates the shared rarefaction curve for the observed richness. The valid sharedrarefaction estimator is sharedobserved. The label and line parameters are used to analyze specific lines in your input. */ @@ -33,9 +33,10 @@ class GlobalData; class RareFactSharedCommand : public Command { public: - RareFactSharedCommand(); + RareFactSharedCommand(string); ~RareFactSharedCommand(); int execute(); + void help(); private: GlobalData* globaldata; @@ -47,6 +48,15 @@ private: vector rDisplays; int freq, nIters; string format; + OptionParser* parser; + map parameters; + map::iterator it; + bool abort, allLines; + set lines; //hold lines to be used + set labels; //holds labels to be used + string line, label, calc, groups; + vector Estimators, Groups; + }; diff --git a/readdistcommand.cpp b/readdistcommand.cpp index 8de0c34..31d8308 100644 --- a/readdistcommand.cpp +++ b/readdistcommand.cpp @@ -12,42 +12,107 @@ #include "readcolumn.h" #include "readmatrix.hpp" -ReadDistCommand::ReadDistCommand(){ +ReadDistCommand::ReadDistCommand(string option){ try { globaldata = GlobalData::getInstance(); + abort = false; - filename = globaldata->inputFileName; - format = globaldata->getFormat(); + //allow user to run help + if(option == "help") { help(); abort = true; } - if (format == "column") { read = new ReadColumnMatrix(filename); } - else if (format == "phylip") { read = new ReadPhylipMatrix(filename); } - else if (format == "matrix") { - groupMap = new GroupMap(globaldata->getGroupFile()); - groupMap->readMap(); - //if (globaldata->gGroupmap != NULL) { delete globaldata->gGroupmap; } - globaldata->gGroupmap = groupMap; - } + else { + //valid paramters for this command + string Array[] = {"phylip","column", "name","cutoff","precision", "group"}; + vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + + parser = new OptionParser(); + parser->parse(option, parameters); delete parser; + + ValidParameters* validParameter = new ValidParameters(); - if (format != "matrix" ) { - if(globaldata->getPrecision() != ""){ - convert(globaldata->getPrecision(), precision); + //check to make sure all parameters are valid for command + for (it = parameters.begin(); it != parameters.end(); it++) { + if (validParameter->isValidParameter(it->first, myArray, it->second) != true) { abort = true; } + } + + globaldata->newRead(); + + //check for required parameters + phylipfile = validParameter->validFile(parameters, "phylip", true); + if (phylipfile == "not open") { abort = true; } + else if (phylipfile == "not found") { phylipfile = ""; } + else { globaldata->setPhylipFile(phylipfile); globaldata->setFormat("phylip"); } + + columnfile = validParameter->validFile(parameters, "column", true); + if (columnfile == "not open") { abort = true; } + else if (columnfile == "not found") { columnfile = ""; } + else { globaldata->setColumnFile(columnfile); globaldata->setFormat("column"); } + + groupfile = validParameter->validFile(parameters, "group", true); + if (groupfile == "not open") { abort = true; } + else if (groupfile == "not found") { groupfile = ""; } + else { + globaldata->setGroupFile(groupfile); + groupMap = new GroupMap(groupfile); + groupMap->readMap(); } + + namefile = validParameter->validFile(parameters, "name", true); + if (namefile == "not open") { abort = true; } + else if (namefile == "not found") { namefile = ""; } + else { globaldata->setNameFile(namefile); } + + + //you are doing a list and group shared + if ((phylipfile != "") && (groupfile != "")) { globaldata->setFormat("matrix"); } + + if ((phylipfile == "") && (columnfile == "")) { cout << "When executing a read.dist command you must enter a phylip or a column." << endl; abort = true; } + else if ((phylipfile != "") && (columnfile != "")) { cout << "When executing a read.dist command you must enter ONLY ONE of the following: phylip or column." << endl; abort = true; } - if(globaldata->getCutOff() != ""){ - convert(globaldata->getCutOff(), cutoff); - cutoff += (5 / (precision * 10.0)); + if (columnfile != "") { + if (namefile == "") { cout << "You need to provide a namefile if you are going to use the column format." << endl; abort = true; } } - read->setCutoff(cutoff); + + //check for optional parameter and set defaults + // ...at some point should added some additional type checking... + //get user cutoff and precision or use defaults + string temp; + temp = validParameter->validFile(parameters, "precision", false); if (temp == "not found") { temp = "100"; } + convert(temp, precision); + + temp = validParameter->validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "10"; } + convert(temp, cutoff); + cutoff += (5 / (precision * 10.0)); + + delete validParameter; + + if (abort == false) { + filename = globaldata->inputFileName; + format = globaldata->getFormat(); + + if (format == "column") { read = new ReadColumnMatrix(filename); } + else if (format == "phylip") { read = new ReadPhylipMatrix(filename); } + else if (format == "matrix") { + groupMap = new GroupMap(groupfile); + groupMap->readMap(); + if (globaldata->gGroupmap != NULL) { delete globaldata->gGroupmap; } + globaldata->gGroupmap = groupMap; + } + + if (format != "matrix" ) { + read->setCutoff(cutoff); - if(globaldata->getNameFile() != ""){ - nameMap = new NameAssignment(globaldata->getNameFile()); - nameMap->readMap(1,2); - } - else{ - nameMap = NULL; + if(namefile != ""){ + nameMap = new NameAssignment(namefile); + nameMap->readMap(1,2); + }else{ + nameMap = NULL; + } + } } + } - + } catch(exception& e) { cout << "Standard Error: " << e.what() << " has occurred in the ReadDistCommand class Function ReadDistCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; @@ -58,6 +123,30 @@ ReadDistCommand::ReadDistCommand(){ exit(1); } } +//********************************************************************************************************************** + +void ReadDistCommand::help(){ + try { + cout << "The read.dist command parameter options are phylip or column, group, name, cutoff and precision" << "\n"; + cout << "The read.dist command can be used in two ways. The first is to read a phylip or column and run the cluster command" << "\n"; + cout << "For this use the read.dist command should be in the following format: " << "\n"; + cout << "read.dist(phylip=yourDistFile, name=yourNameFile, cutoff=yourCutoff, precision=yourPrecision) " << "\n"; + cout << "The phylip or column parameter is required, but only one may be used. If you use a column file the name filename is required. " << "\n"; + cout << "If you do not provide a cutoff value 10.00 is assumed. If you do not provide a precision value then 100 is assumed." << "\n"; + cout << "The second way to use the read.dist command is to read a phylip or column and a group, so you can use the libshuff command." << "\n"; + cout << "For this use the read.dist command should be in the following format: " << "\n"; + cout << "read.dist(phylip=yourPhylipfile, group=yourGroupFile). The cutoff and precision parameters are not valid with this use. " << "\n"; + cout << "Note: No spaces between parameter labels (i.e. phylip), '=' and parameters (i.e.yourPhylipfile)." << "\n" << "\n"; + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the ReadDistCommand class Function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the ReadDistCommand class function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} //********************************************************************************************************************** ReadDistCommand::~ReadDistCommand(){ @@ -69,6 +158,8 @@ ReadDistCommand::~ReadDistCommand(){ int ReadDistCommand::execute(){ try { + if (abort == true) { return 0; } + if (format == "matrix") { ifstream in; openInputFile(filename, in); diff --git a/readdistcommand.h b/readdistcommand.h index 981afa3..b6e3ae4 100644 --- a/readdistcommand.h +++ b/readdistcommand.h @@ -26,9 +26,10 @@ class GlobalData; class ReadDistCommand : public Command { public: - ReadDistCommand(); + ReadDistCommand(string); ~ReadDistCommand(); int execute(); + void help(); private: GlobalData* globaldata; @@ -38,7 +39,13 @@ private: FullMatrix* matrix; GroupMap* groupMap; string filename, format, method; + string phylipfile, columnfile, namefile, groupfile; NameAssignment* nameMap; + OptionParser* parser; + map parameters; + map::iterator it; + bool abort; + }; #endif diff --git a/readotu.cpp b/readotu.cpp index 455641a..09a6c4c 100644 --- a/readotu.cpp +++ b/readotu.cpp @@ -20,6 +20,7 @@ ReadOTUFile::ReadOTUFile(string pf): philFile(pf){ //This function reads the list, rabund or sabund files to be used by collect and rarefact command. void ReadOTUFile::read(GlobalData* globaldata){ try { + if (globaldata->getOrderFile() == "") { //you have two inputs because in the next if statement if you only have one then it moves ahead in the same file. //So when you run the collect or summary commands you miss a line. @@ -46,6 +47,7 @@ void ReadOTUFile::read(GlobalData* globaldata){ globaldata->rabund = rabund; //saving to be used by heatmap.bin command. list = inputList->getListVector(); globaldata->gListVector = list; + }else if (globaldata->getFormat() == "shared") { SharedList = input->getSharedListVector(); //you are reading for collect.shared, rarefaction.shared, summary.shared, parselist command, or shared commands. //memory leak prevention diff --git a/readotucommand.cpp b/readotucommand.cpp index c8ec791..1f1ad97 100644 --- a/readotucommand.cpp +++ b/readotucommand.cpp @@ -10,15 +10,108 @@ #include "readotucommand.h" //********************************************************************************************************************** -ReadOtuCommand::ReadOtuCommand(){ +ReadOtuCommand::ReadOtuCommand(string option){ try { globaldata = GlobalData::getInstance(); - filename = globaldata->inputFileName; - read = new ReadOTUFile(filename); - if (globaldata->getFormat() == "shared") { - //read in group map info. - groupMap = new GroupMap(globaldata->getGroupFile()); + abort = false; + allLines = 1; + + //allow user to run help + if(option == "help") { help(); abort = true; } + + else { + //valid paramters for this command + string Array[] = {"list","order","shared", "line", "label","group","sabund", "rabund"}; + vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + + parser = new OptionParser(); + parser->parse(option, parameters); delete parser; + + ValidParameters* validParameter = new ValidParameters(); + + //check to make sure all parameters are valid for command + for (it = parameters.begin(); it != parameters.end(); it++) { + if (validParameter->isValidParameter(it->first, myArray, it->second) != true) { abort = true; } + } + + globaldata->newRead(); + + //check for required parameters + listfile = validParameter->validFile(parameters, "list", true); + if (listfile == "not open") { abort = true; } + else if (listfile == "not found") { listfile = ""; } + else { globaldata->setListFile(listfile); globaldata->setFormat("list"); } + + sabundfile = validParameter->validFile(parameters, "sabund", true); + if (sabundfile == "not open") { abort = true; } + else if (sabundfile == "not found") { sabundfile = ""; } + else { globaldata->setSabundFile(sabundfile); globaldata->setFormat("sabund"); } + + rabundfile = validParameter->validFile(parameters, "rabund", true); + if (rabundfile == "not open") { abort = true; } + else if (rabundfile == "not found") { rabundfile = ""; } + else { globaldata->setRabundFile(rabundfile); globaldata->setFormat("rabund");} + + sharedfile = validParameter->validFile(parameters, "shared", true); + if (sharedfile == "not open") { abort = true; } + else if (sharedfile == "not found") { sharedfile = ""; } + else { globaldata->setSharedFile(sharedfile); globaldata->setFormat("sharedfile"); } + + groupfile = validParameter->validFile(parameters, "group", true); + if (groupfile == "not open") { abort = true; } + else if (groupfile == "not found") { groupfile = ""; } + else { + globaldata->setGroupFile(groupfile); + groupMap = new GroupMap(groupfile); + groupMap->readMap(); + } + + //you are doing a list and group shared + if ((listfile != "") && (groupfile != "")) { globaldata->setFormat("shared"); } + + //you have not given a file + if ((listfile == "") && (sharedfile == "") && (rabundfile == "") && (sabundfile == "")) { + cout << "You must enter either a listfile, rabundfile, sabundfile or a sharedfile with the read.otu command. " << endl; abort = true; + } + + //check for optional parameter and set defaults + // ...at some point should added some additional type checking... + line = validParameter->validFile(parameters, "line", false); + if (line == "not found") { line = ""; } + else { + if(line != "all") { splitAtDash(line, lines); allLines = 0; } + else { allLines = 1; } + globaldata->lines = lines; + } + + label = validParameter->validFile(parameters, "label", false); + if (label == "not found") { label = ""; } + else { + if(label != "all") { splitAtDash(label, labels); allLines = 0; } + else { allLines = 1; } + globaldata->labels = labels; + } + + globaldata->allLines = allLines; + + //make sure user did not use both the line and label parameters + if ((line != "") && (label != "")) { cout << "You cannot use both the line and label parameters at the same time. " << endl; abort = true; } + + orderfile = validParameter->validFile(parameters, "order", true); + if (orderfile == "not open") { abort = true; } + else if (orderfile == "not found") { orderfile = ""; } + else { globaldata->setOrderFile(orderfile); } + + delete validParameter; + + if (abort == false) { + //gets whichever one of the above is set + filename = globaldata->inputFileName; + read = new ReadOTUFile(filename); + } + } + } catch(exception& e) { cout << "Standard Error: " << e.what() << " has occurred in the ReadOtuCommand class Function ReadOtuCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; @@ -29,17 +122,50 @@ ReadOtuCommand::ReadOtuCommand(){ exit(1); } } +//********************************************************************************************************************** + +void ReadOtuCommand::help(){ + try { + cout << "The read.otu command must be run before you execute a collect.single, rarefaction.single, summary.single, " << "\n"; + cout << "collect.shared, rarefaction.shared or summary.shared command. Mothur will generate a .list, .rabund and .sabund upon completion of the cluster command " << "\n"; + cout << "or you may use your own. The read.otu command parameter options are list, rabund, sabund, shared, group, order, line and label." << "\n"; + cout << "The read.otu command can be used in two ways. The first is to read a list, rabund or sabund and run the collect.single, rarefaction.single or summary.single." << "\n"; + cout << "For this use the read.otu command should be in the following format: read.otu(list=yourListFile, order=yourOrderFile, label=yourLabels)." << "\n"; + cout << "The list, rabund or sabund parameter is required, but you may only use one of them." << "\n"; + cout << "The line and label parameters are optional but you may not use both the line and label parameters at the same time." << "\n"; + cout << "The label and line parameters are used to read specific lines in your input." << "\n"; + cout << "The second way to use the read.otu command is to read a list and a group, or a shared so you can use the collect.shared, rarefaction.shared or summary.shared commands." << "\n"; + cout << "In this case the read.otu command should be in the following format: read.otu(list=yourListFile, group=yourGroupFile, line=yourLines) or read.otu(shared=yourSharedFile). " << "\n"; + cout << "The list parameter and group paramaters or the shared paremeter is required. When using the command the second way with a list and group file read.otu command parses the .list file" << "\n"; + cout << "and separates it into groups. It outputs a .shared file containing the OTU information for each group. The read.otu command also outputs a .list file for each group. " << "\n"; + cout << "Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListfile)." << "\n" << "\n"; + + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the ReadOtuCommand class Function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the ReadOtuCommand class function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} + + //********************************************************************************************************************** ReadOtuCommand::~ReadOtuCommand(){ - delete read; + if (abort == false) { delete read; } } //********************************************************************************************************************** int ReadOtuCommand::execute(){ try { + + if (abort == true) { return 0; } + read->read(&*globaldata); if (globaldata->getFormat() == "shared") { groupMap->readMap(); diff --git a/readotucommand.h b/readotucommand.h index a21167e..586e6ef 100644 --- a/readotucommand.h +++ b/readotucommand.h @@ -33,9 +33,10 @@ class GlobalData; class ReadOtuCommand : public Command { public: - ReadOtuCommand(); + ReadOtuCommand(string); ~ReadOtuCommand(); int execute(); + void help(); private: GlobalData* globaldata; @@ -44,7 +45,14 @@ private: Command* shared; Command* parselist; GroupMap* groupMap; - string filename; + string filename, listfile, orderfile, sharedfile, line, label, groupfile, sabundfile, rabundfile, format; + OptionParser* parser; + map parameters; + map::iterator it; + bool abort, allLines; + set lines; //hold lines to be used + set labels; //holds labels to be used + }; #endif diff --git a/readtreecommand.cpp b/readtreecommand.cpp index 19ab675..6e5742d 100644 --- a/readtreecommand.cpp +++ b/readtreecommand.cpp @@ -10,24 +10,55 @@ #include "readtreecommand.h" //********************************************************************************************************************** -ReadTreeCommand::ReadTreeCommand(){ +ReadTreeCommand::ReadTreeCommand(string option){ try { globaldata = GlobalData::getInstance(); - filename = globaldata->inputFileName; + abort = false; - //read in group map info. - treeMap = new TreeMap(globaldata->getGroupFile()); - treeMap->readMap(); + //allow user to run help + if(option == "help") { help(); abort = true; } - //memory leak prevention - //if (globaldata->gTreemap != NULL) { delete globaldata->gTreemap; } - globaldata->gTreemap = treeMap; - - //get names in tree - globaldata->parseTreeFile(); - - read = new ReadNewickTree(filename); + else { + //valid paramters for this command + string Array[] = {"tree","group"}; + vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + + parser = new OptionParser(); + parser->parse(option, parameters); delete parser; + + ValidParameters* validParameter = new ValidParameters(); + //check to make sure all parameters are valid for command + for (it = parameters.begin(); it != parameters.end(); it++) { + if (validParameter->isValidParameter(it->first, myArray, it->second) != true) { abort = true; } + } + + globaldata->newRead(); + + //check for required parameters + treefile = validParameter->validFile(parameters, "tree", true); + if (treefile == "not open") { abort = true; } + else if (treefile == "not found") { treefile = ""; cout << "tree is a required parameter for the read.tree command." << endl; abort = true; } + else { globaldata->setTreeFile(treefile); globaldata->setFormat("tree"); } + + groupfile = validParameter->validFile(parameters, "group", true); + if (groupfile == "not open") { abort = true; } + else if (groupfile == "not found") { groupfile = ""; cout << "group is a required parameter for the read.tree command." << endl; abort = true; } + else { + globaldata->setGroupFile(groupfile); + //read in group map info. + treeMap = new TreeMap(groupfile); + treeMap->readMap(); + globaldata->gTreemap = treeMap; + } + + if (abort == false) { + filename = treefile; + read = new ReadNewickTree(filename); + } + + delete validParameter; + } } catch(exception& e) { cout << "Standard Error: " << e.what() << " has occurred in the ReadTreeCommand class Function ReadTreeCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; @@ -38,6 +69,25 @@ ReadTreeCommand::ReadTreeCommand(){ exit(1); } } +//********************************************************************************************************************** + +void ReadTreeCommand::help(){ + try { + cout << "The read.tree command must be run before you execute a unifrac.weighted, unifrac.unweighted. " << "\n"; + cout << "It also must be run before using the parsimony command, unless you are using the randomtree parameter." << "\n"; + cout << "The read.tree command should be in the following format: read.tree(tree=yourTreeFile, group=yourGroupFile)." << "\n"; + cout << "The tree and group parameters are both required." << "\n"; + cout << "Note: No spaces between parameter labels (i.e. tree), '=' and parameters (i.e.yourTreefile)." << "\n" << "\n"; + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the ReadTreeCommand class Function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the ReadTreeCommand class function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} //********************************************************************************************************************** @@ -49,6 +99,9 @@ ReadTreeCommand::~ReadTreeCommand(){ int ReadTreeCommand::execute(){ try { + + if (abort == true) { return 0; } + int readOk; readOk = read->read(); diff --git a/readtreecommand.h b/readtreecommand.h index 2ee72a1..ca9f3b8 100644 --- a/readtreecommand.h +++ b/readtreecommand.h @@ -18,15 +18,21 @@ class GlobalData; class ReadTreeCommand : public Command { public: - ReadTreeCommand(); + ReadTreeCommand(string); ~ReadTreeCommand(); int execute(); + void help(); private: GlobalData* globaldata; ReadTree* read; TreeMap* treeMap; - string filename; + string filename, treefile, groupfile; + OptionParser* parser; + map parameters; + map::iterator it; + bool abort; + }; diff --git a/reversecommand.cpp b/reversecommand.cpp index 15f63f6..b8ccc07 100644 --- a/reversecommand.cpp +++ b/reversecommand.cpp @@ -13,10 +13,37 @@ //*************************************************************************************************************** -ReverseSeqsCommand::ReverseSeqsCommand(){ +ReverseSeqsCommand::ReverseSeqsCommand(string option){ try { globaldata = GlobalData::getInstance(); - if(globaldata->getFastaFile() == "") { cout << "you need to at least enter a fasta file name" << endl; } + abort = false; + + //allow user to run help + if(option == "help") { help(); abort = true; } + + else { + //valid paramters for this command + string Array[] = {"fasta"}; + vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + + parser = new OptionParser(); + parser->parse(option, parameters); delete parser; + + ValidParameters* validParameter = new ValidParameters(); + + //check to make sure all parameters are valid for command + for (it = parameters.begin(); it != parameters.end(); it++) { + if (validParameter->isValidParameter(it->first, myArray, it->second) != true) { abort = true; } + } + + //check for required parameters + fasta = validParameter->validFile(parameters, "fasta", true); + if (fasta == "not open") { abort = true; } + else if (fasta == "not found") { fasta = ""; cout << "fasta is a required parameter for the reverse.seqs command." << endl; abort = true; } + else { globaldata->setFastaFile(fasta); globaldata->setFormat("fasta"); } + + delete validParameter; + } } catch(exception& e) { cout << "Standard Error: " << e.what() << " has occurred in the ReverseSeqsCommand class Function ReverseSeqsCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; @@ -27,6 +54,24 @@ ReverseSeqsCommand::ReverseSeqsCommand(){ exit(1); } } +//********************************************************************************************************************** + +void ReverseSeqsCommand::help(){ + try { + cout << "The reverse.seqs command reads a fastafile and ...." << "\n"; + cout << "The reverse.seqs command parameter is fasta and it is required." << "\n"; + cout << "The reverse.seqs command should be in the following format: " << "\n"; + cout << "reverse.seqs(fasta=yourFastaFile) " << "\n"; + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the ReverseSeqsCommand class Function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the ReverseSeqsCommand class function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} //*************************************************************************************************************** @@ -38,11 +83,13 @@ ReverseSeqsCommand::~ReverseSeqsCommand(){ /* do nothing */ } int ReverseSeqsCommand::execute(){ try{ + if (abort == true) { return 0; } + ifstream inFASTA; - openInputFile(globaldata->getFastaFile(), inFASTA); + openInputFile(fasta, inFASTA); ofstream outFASTA; - string reverseFile = getRootName(globaldata->getFastaFile()) + "rc" + getExtension(globaldata->getFastaFile()); + string reverseFile = getRootName(fasta) + "rc" + getExtension(fasta); openOutputFile(reverseFile, outFASTA); while(!inFASTA.eof()){ diff --git a/reversecommand.h b/reversecommand.h index 46211a5..2a996e9 100644 --- a/reversecommand.h +++ b/reversecommand.h @@ -16,12 +16,19 @@ class ReverseSeqsCommand : public Command { public: - ReverseSeqsCommand(); + ReverseSeqsCommand(string); ~ReverseSeqsCommand(); int execute(); + void help(); private: GlobalData* globaldata; + OptionParser* parser; + map parameters; + map::iterator it; + bool abort; + string fasta; + }; diff --git a/screenseqscommand.cpp b/screenseqscommand.cpp index ed71559..31b6bb7 100644 --- a/screenseqscommand.cpp +++ b/screenseqscommand.cpp @@ -12,10 +12,74 @@ //*************************************************************************************************************** -ScreenSeqsCommand::ScreenSeqsCommand(){ +ScreenSeqsCommand::ScreenSeqsCommand(string option){ try { globaldata = GlobalData::getInstance(); - if(globaldata->getFastaFile() == "") { cout << "you must provide a fasta formatted file" << endl; } + abort = false; + + //allow user to run help + if(option == "help") { help(); abort = true; } + + else { + //valid paramters for this command + string AlignArray[] = {"fasta", "start", "end", "maxambig", "maxhomop", "minlength", "maxlength", "name", "group"}; + vector myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string))); + + parser = new OptionParser(); + parser->parse(option, parameters); delete parser; + + ValidParameters* validParameter = new ValidParameters(); + + //check to make sure all parameters are valid for command + for (it = parameters.begin(); it != parameters.end(); it++) { + if (validParameter->isValidParameter(it->first, myArray, it->second) != true) { abort = true; } + } + + //check for required parameters + fastafile = validParameter->validFile(parameters, "fasta", true); + if (fastafile == "not found") { cout << "fasta is a required parameter for the screen.seqs command." << endl; abort = true; } + else if (fastafile == "not open") { abort = true; } + else { globaldata->setFastaFile(fastafile); } + + groupfile = validParameter->validFile(parameters, "group", true); + if (groupfile == "not open") { abort = true; } + else if (groupfile == "not found") { groupfile = ""; } + else { + globaldata->setGroupFile(groupfile); + } + + namefile = validParameter->validFile(parameters, "name", true); + if (namefile == "not open") { abort = true; } + else if (namefile == "not found") { namefile = ""; } + else { + globaldata->setNameFile(namefile); + } + + + //check for optional parameter and set defaults + // ...at some point should added some additional type checking... + string temp; + temp = validParameter->validFile(parameters, "start", false); if (temp == "not found") { temp = "-1"; } + convert(temp, startPos); + + temp = validParameter->validFile(parameters, "end", false); if (temp == "not found") { temp = "-1"; } + convert(temp, endPos); + + temp = validParameter->validFile(parameters, "maxambig", false); if (temp == "not found") { temp = "-1"; } + convert(temp, maxAmbig); + + temp = validParameter->validFile(parameters, "maxhomop", false); if (temp == "not found") { temp = "-1"; } + convert(temp, maxHomoP); + + temp = validParameter->validFile(parameters, "minlength", false); if (temp == "not found") { temp = "-1"; } + convert(temp, minLength); + + temp = validParameter->validFile(parameters, "maxlength", false); if (temp == "not found") { temp = "-1"; } + convert(temp, maxLength); + + delete validParameter; + } + } catch(exception& e) { cout << "Standard Error: " << e.what() << " has occurred in the ScreenSeqsCommand class Function ScreenSeqsCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; @@ -26,6 +90,36 @@ ScreenSeqsCommand::ScreenSeqsCommand(){ exit(1); } } +//********************************************************************************************************************** + +void ScreenSeqsCommand::help(){ + try { + cout << "The screen.seqs command reads a fastafile and creates ....." << "\n"; + cout << "The screen.seqs command parameters are fasta, start, end, maxambig, maxhomop, minlength, maxlength, name, and group." << "\n"; + cout << "The fasta parameter is required." << "\n"; + cout << "The start parameter .... The default is -1." << "\n"; + cout << "The end parameter .... The default is -1." << "\n"; + cout << "The maxambig parameter .... The default is -1." << "\n"; + cout << "The maxhomop parameter .... The default is -1." << "\n"; + cout << "The minlength parameter .... The default is -1." << "\n"; + cout << "The maxlength parameter .... The default is -1." << "\n"; + cout << "The name parameter allows you to provide a namesfile, and the group parameter allows you to provide a groupfile." << "\n"; + cout << "The screen.seqs command should be in the following format: " << "\n"; + cout << "screen.seqs(fasta=yourFastaFile, name=youNameFile, group=yourGroupFIle, start=yourStart, end=yourEnd, maxambig=yourMaxambig, " << "\n"; + cout << "maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength) " << "\n"; + cout << "Example screen.seqs(fasta=abrecovery.fasta, name=abrecovery.names, group=abrecovery.groups, start=..., end=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...)." << "\n"; + cout << "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta)." << "\n" << "\n"; + + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the ScreenSeqsCommand class Function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the ScreenSeqsCommand class function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} //*************************************************************************************************************** @@ -35,21 +129,16 @@ ScreenSeqsCommand::~ScreenSeqsCommand(){ /* do nothing */ } int ScreenSeqsCommand::execute(){ try{ - int startPos, endPos, maxAmbig, maxHomoP, minLength, maxLength; - convert(globaldata->getStartPos(), startPos); - convert(globaldata->getEndPos(), endPos); - convert(globaldata->getMaxAmbig(), maxAmbig); - convert(globaldata->getMaxHomoPolymer(), maxHomoP); - convert(globaldata->getMinLength(), minLength); - convert(globaldata->getMaxLength(), maxLength); + if (abort == true) { return 0; } + ifstream inFASTA; - openInputFile(globaldata->getFastaFile(), inFASTA); + openInputFile(fastafile, inFASTA); set badSeqNames; - string goodSeqFile = getRootName(globaldata->getFastaFile()) + "good" + getExtension(globaldata->getFastaFile()); - string badSeqFile = getRootName(globaldata->getFastaFile()) + "bad" + getExtension(globaldata->getFastaFile()); + string goodSeqFile = getRootName(fastafile) + "good" + getExtension(fastafile); + string badSeqFile = getRootName(fastafile) + "bad" + getExtension(fastafile); ofstream goodSeqOut; openOutputFile(goodSeqFile, goodSeqOut); ofstream badSeqOut; openOutputFile(badSeqFile, badSeqOut); @@ -73,10 +162,10 @@ int ScreenSeqsCommand::execute(){ } gobble(inFASTA); } - if(globaldata->getNameFile() != ""){ + if(namefile != ""){ screenNameGroupFile(badSeqNames); } - else if(globaldata->getGroupFile() != ""){ + else if(groupfile != ""){ screenGroupFile(badSeqNames); } diff --git a/screenseqscommand.h b/screenseqscommand.h index 9b98726..97b3615 100644 --- a/screenseqscommand.h +++ b/screenseqscommand.h @@ -16,14 +16,22 @@ class ScreenSeqsCommand : public Command { public: - ScreenSeqsCommand(); + ScreenSeqsCommand(string); ~ScreenSeqsCommand(); int execute(); + void help(); + private: void screenNameGroupFile(set); void screenGroupFile(set); GlobalData* globaldata; + OptionParser* parser; + map parameters; + map::iterator it; + bool abort; + string fastafile, namefile, groupfile; + int startPos, endPos, maxAmbig, maxHomoP, minLength, maxLength; }; #endif diff --git a/seqsummarycommand.cpp b/seqsummarycommand.cpp index 3787cef..c8582b9 100644 --- a/seqsummarycommand.cpp +++ b/seqsummarycommand.cpp @@ -12,10 +12,37 @@ //*************************************************************************************************************** -SeqSummaryCommand::SeqSummaryCommand(){ +SeqSummaryCommand::SeqSummaryCommand(string option){ try { globaldata = GlobalData::getInstance(); - if(globaldata->getFastaFile() == "") { cout << "you need to at least enter a fasta file name" << endl; } + abort = false; + + //allow user to run help + if(option == "help") { help(); abort = true; } + + else { + //valid paramters for this command + string Array[] = {"fasta"}; + vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + + parser = new OptionParser(); + parser->parse(option, parameters); delete parser; + + ValidParameters* validParameter = new ValidParameters(); + + //check to make sure all parameters are valid for command + for (it = parameters.begin(); it != parameters.end(); it++) { + if (validParameter->isValidParameter(it->first, myArray, it->second) != true) { abort = true; } + } + + //check for required parameters + fastafile = validParameter->validFile(parameters, "fasta", true); + if (fastafile == "not open") { abort = true; } + else if (fastafile == "not found") { fastafile = ""; cout << "fasta is a required parameter for the summary.seqs command." << endl; abort = true; } + else { globaldata->setFastaFile(fastafile); globaldata->setFormat("fasta"); } + + delete validParameter; + } } catch(exception& e) { cout << "Standard Error: " << e.what() << " has occurred in the SeqSummaryCommand class Function SeqSummaryCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; @@ -26,6 +53,25 @@ SeqSummaryCommand::SeqSummaryCommand(){ exit(1); } } +//********************************************************************************************************************** + +void SeqSummaryCommand::help(){ + try { + cout << "The summary.seqs command reads a fastafile and ...." << "\n"; + cout << "The summary.seqs command parameter is fasta and it is required." << "\n"; + cout << "The summary.seqs command should be in the following format: " << "\n"; + cout << "summary.seqs(fasta=yourFastaFile) " << "\n"; + cout << "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile)." << "\n" << "\n"; + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the SeqSummaryCommand class Function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the SeqSummaryCommand class function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} //*************************************************************************************************************** @@ -35,13 +81,15 @@ SeqSummaryCommand::~SeqSummaryCommand(){ /* do nothing */ } int SeqSummaryCommand::execute(){ try{ - + + if (abort == true) { return 0; } + ifstream inFASTA; - openInputFile(globaldata->getFastaFile(), inFASTA); + openInputFile(fastafile, inFASTA); int numSeqs = 0; ofstream outSummary; - string summaryFile = globaldata->getFastaFile() + ".summary"; + string summaryFile = fastafile + ".summary"; openOutputFile(summaryFile, outSummary); vector startPosition; diff --git a/seqsummarycommand.h b/seqsummarycommand.h index 01dd450..03ec3e0 100644 --- a/seqsummarycommand.h +++ b/seqsummarycommand.h @@ -18,12 +18,19 @@ class SeqSummaryCommand : public Command { public: - SeqSummaryCommand(); + SeqSummaryCommand(string); ~SeqSummaryCommand(); int execute(); + void help(); private: GlobalData* globaldata; + OptionParser* parser; + map parameters; + map::iterator it; + bool abort; + string fastafile; + }; diff --git a/sharedcommand.h b/sharedcommand.h index b3506cd..08314b5 100644 --- a/sharedcommand.h +++ b/sharedcommand.h @@ -30,6 +30,7 @@ public: SharedCommand(); ~SharedCommand(); int execute(); + void help() {} private: void printSharedData(); diff --git a/sharedrabundvector.cpp b/sharedrabundvector.cpp index 94a6519..135763e 100644 --- a/sharedrabundvector.cpp +++ b/sharedrabundvector.cpp @@ -135,7 +135,7 @@ SharedRAbundVector::SharedRAbundVector(ifstream& f) : DataVector(), maxRank(0), //put file pointer back since you are now at a new distance label f.seekg(pos, ios::beg); - if (globaldata->gGroupmap == NULL) { globaldata->gGroupmap = groupmap; } + if (globaldata->gGroupmap == NULL) { globaldata->gGroupmap = groupmap; } } catch(exception& e) { diff --git a/sharedutilities.cpp b/sharedutilities.cpp index 5776b36..703241b 100644 --- a/sharedutilities.cpp +++ b/sharedutilities.cpp @@ -212,6 +212,7 @@ void SharedUtil::setGroups(vector& userGroups, vector& allGroups try { numGroups = 0; label = ""; + //if the user has not entered specific groups to analyze then do them all if (userGroups.size() != 0) { if (userGroups[0] != "all") { diff --git a/shen.cpp b/shen.cpp index a2f6188..612a253 100644 --- a/shen.cpp +++ b/shen.cpp @@ -22,9 +22,6 @@ EstOutput Shen::getValues(SAbundVector* rank){ double n = (double)rank->getNumSeqs(); double f1 = (double)rank->get(1); - int abund; - convert(globaldata->getAbund(), abund); - Ace* calc = new Ace(abund); EstOutput ace = calc->getValues(rank); diff --git a/shen.h b/shen.h index a96abe9..59d6c3a 100644 --- a/shen.h +++ b/shen.h @@ -21,11 +21,12 @@ class Shen : public Calculator { public: - Shen(int size) : m(size), Calculator("shen", 1, false) {}; + Shen(int size, int n) : m(size), abund(n), Calculator("shen", 1, false) {}; EstOutput getValues(SAbundVector*); EstOutput getValues(vector) {return data;}; private: int m; + int abund; GlobalData* globaldata; }; diff --git a/summarycommand.cpp b/summarycommand.cpp index 9bc0c0f..009c278 100644 --- a/summarycommand.cpp +++ b/summarycommand.cpp @@ -31,68 +31,133 @@ //********************************************************************************************************************** -SummaryCommand::SummaryCommand(){ +SummaryCommand::SummaryCommand(string option){ try { globaldata = GlobalData::getInstance(); - validCalculator = new ValidCalculators(); - int i; + abort = false; + allLines = 1; + lines.clear(); + labels.clear(); + Estimators.clear(); - for (i=0; iEstimators.size(); i++) { - if (validCalculator->isValidCalculator("summary", globaldata->Estimators[i]) == true) { - if(globaldata->Estimators[i] == "sobs"){ - sumCalculators.push_back(new Sobs()); - }else if(globaldata->Estimators[i] == "chao"){ - sumCalculators.push_back(new Chao1()); - }else if(globaldata->Estimators[i] == "coverage"){ - sumCalculators.push_back(new Coverage()); - }else if(globaldata->Estimators[i] == "geometric"){ - sumCalculators.push_back(new Geom()); - }else if(globaldata->Estimators[i] == "logseries"){ - sumCalculators.push_back(new LogSD()); - }else if(globaldata->Estimators[i] == "qstat"){ - sumCalculators.push_back(new QStat()); - }else if(globaldata->Estimators[i] == "bergerparker"){ - sumCalculators.push_back(new BergerParker()); - }else if(globaldata->Estimators[i] == "bstick"){ - sumCalculators.push_back(new BStick()); - }else if(globaldata->Estimators[i] == "ace"){ - convert(globaldata->getAbund(), abund); - if(abund < 5) - abund = 10; - sumCalculators.push_back(new Ace(abund)); - }else if(globaldata->Estimators[i] == "jack"){ - sumCalculators.push_back(new Jackknife()); - }else if(globaldata->Estimators[i] == "shannon"){ - sumCalculators.push_back(new Shannon()); - }else if(globaldata->Estimators[i] == "npshannon"){ - sumCalculators.push_back(new NPShannon()); - }else if(globaldata->Estimators[i] == "simpson"){ - sumCalculators.push_back(new Simpson()); - }else if(globaldata->Estimators[i] == "bootstrap"){ - sumCalculators.push_back(new Bootstrap()); - }else if (globaldata->Estimators[i] == "nseqs") { - sumCalculators.push_back(new NSeqs()); - }else if (globaldata->Estimators[i] == "goodscoverage") { - sumCalculators.push_back(new GoodsCoverage()); - }else if (globaldata->Estimators[i] == "efron") { - convert(globaldata->getSize(), size); - sumCalculators.push_back(new Efron(size)); - }else if (globaldata->Estimators[i] == "boneh") { - convert(globaldata->getSize(), size); - sumCalculators.push_back(new Boneh(size)); - }else if (globaldata->Estimators[i] == "solow") { - convert(globaldata->getSize(), size); - sumCalculators.push_back(new Solow(size)); - }else if (globaldata->Estimators[i] == "shen") { - convert(globaldata->getSize(), size); - sumCalculators.push_back(new Shen(size)); + //allow user to run help + if(option == "help") { validCalculator = new ValidCalculators(); help(); abort = true; } + + else { + //valid paramters for this command + string Array[] = {"line","label","calc","abund","size"}; + vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + + parser = new OptionParser(); + parser->parse(option, parameters); delete parser; + + ValidParameters* validParameter = new ValidParameters(); + + //check to make sure all parameters are valid for command + for (it = parameters.begin(); it != parameters.end(); it++) { + if (validParameter->isValidParameter(it->first, myArray, it->second) != true) { abort = true; } + } + + //make sure the user has already run the read.otu command + if ((globaldata->getListFile() == "") && (globaldata->getRabundFile() == "") && (globaldata->getSabundFile() == "")) { cout << "You must read a list, sabund or rabund before you can use the summary.single command." << endl; abort = true; } + + //check for optional parameter and set defaults + // ...at some point should added some additional type checking... + line = validParameter->validFile(parameters, "line", false); + if (line == "not found") { line = ""; } + else { + if(line != "all") { splitAtDash(line, lines); allLines = 0; } + else { allLines = 1; } + } + + label = validParameter->validFile(parameters, "label", false); + if (label == "not found") { label = ""; } + else { + if(label != "all") { splitAtDash(label, labels); allLines = 0; } + else { allLines = 1; } + } + + //make sure user did not use both the line and label parameters + if ((line != "") && (label != "")) { cout << "You cannot use both the line and label parameters at the same time. " << endl; abort = true; } + //if the user has not specified any line or labels use the ones from read.otu + else if((line == "") && (label == "")) { + allLines = globaldata->allLines; + labels = globaldata->labels; + lines = globaldata->lines; + } + + calc = validParameter->validFile(parameters, "calc", false); + if (calc == "not found") { calc = "sobs-chao-ace-jack-shannon-npshannon-simpson"; } + else { + if (calc == "default") { calc = "sobs-chao-ace-jack-shannon-npshannon-simpson"; } + } + splitAtDash(calc, Estimators); + + string temp; + temp = validParameter->validFile(parameters, "abund", false); if (temp == "not found") { temp = "10"; } + convert(temp, abund); + + temp = validParameter->validFile(parameters, "size", false); if (temp == "not found") { temp = "0"; } + convert(temp, size); + + delete validParameter; + + if (abort == false) { + + validCalculator = new ValidCalculators(); + int i; + + for (i=0; iisValidCalculator("summary", Estimators[i]) == true) { + if(Estimators[i] == "sobs"){ + sumCalculators.push_back(new Sobs()); + }else if(Estimators[i] == "chao"){ + sumCalculators.push_back(new Chao1()); + }else if(Estimators[i] == "coverage"){ + sumCalculators.push_back(new Coverage()); + }else if(Estimators[i] == "geometric"){ + sumCalculators.push_back(new Geom()); + }else if(Estimators[i] == "logseries"){ + sumCalculators.push_back(new LogSD()); + }else if(Estimators[i] == "qstat"){ + sumCalculators.push_back(new QStat()); + }else if(Estimators[i] == "bergerparker"){ + sumCalculators.push_back(new BergerParker()); + }else if(Estimators[i] == "bstick"){ + sumCalculators.push_back(new BStick()); + }else if(Estimators[i] == "ace"){ + if(abund < 5) + abund = 10; + sumCalculators.push_back(new Ace(abund)); + }else if(Estimators[i] == "jack"){ + sumCalculators.push_back(new Jackknife()); + }else if(Estimators[i] == "shannon"){ + sumCalculators.push_back(new Shannon()); + }else if(Estimators[i] == "npshannon"){ + sumCalculators.push_back(new NPShannon()); + }else if(Estimators[i] == "simpson"){ + sumCalculators.push_back(new Simpson()); + }else if(Estimators[i] == "bootstrap"){ + sumCalculators.push_back(new Bootstrap()); + }else if (Estimators[i] == "nseqs") { + sumCalculators.push_back(new NSeqs()); + }else if (Estimators[i] == "goodscoverage") { + sumCalculators.push_back(new GoodsCoverage()); + }else if (Estimators[i] == "efron") { + sumCalculators.push_back(new Efron(size)); + }else if (Estimators[i] == "boneh") { + sumCalculators.push_back(new Boneh(size)); + }else if (Estimators[i] == "solow") { + sumCalculators.push_back(new Solow(size)); + }else if (Estimators[i] == "shen") { + sumCalculators.push_back(new Shen(size, abund)); + } + } } } } - - //reset calc for next command - globaldata->setCalc(""); + } catch(exception& e) { cout << "Standard Error: " << e.what() << " has occurred in the SummaryCommand class Function SummaryCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; @@ -105,16 +170,45 @@ SummaryCommand::SummaryCommand(){ } //********************************************************************************************************************** +void SummaryCommand::help(){ + try { + cout << "The summary.single command can only be executed after a successful read.otu WTIH ONE EXECEPTION." << "\n"; + cout << "The summary.single command can be executed after a successful cluster command. It will use the .list file from the output of the cluster." << "\n"; + cout << "The summary.single command parameters are label, line, calc, abund. No parameters are required, but you may not use " << "\n"; + cout << "both the line and label parameters at the same time. The summary.single command should be in the following format: " << "\n"; + cout << "summary.single(label=yourLabel, line=yourLines, calc=yourEstimators)." << "\n"; + cout << "Example summary.single(label=unique-.01-.03, line=0,5,10, calc=sobs-chao-ace-jack-bootstrap-shannon-npshannon-simpson)." << "\n"; + validCalculator->printCalc("summary", cout); + cout << "The default value calc is sobs-chao-ace-jack-shannon-npshannon-simpson" << "\n"; + cout << "The label and line parameters are used to analyze specific lines in your input." << "\n"; + cout << "Note: No spaces between parameter labels (i.e. line), '=' and parameters (i.e.yourLines)." << "\n" << "\n"; + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the SummaryCommand class Function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the SummaryCommand class function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} + +//********************************************************************************************************************** + SummaryCommand::~SummaryCommand(){ delete sabund; delete input; delete read; + delete validCalculator; } //********************************************************************************************************************** int SummaryCommand::execute(){ try { + + if (abort == true) { return 0; } + int count = 1; //if the users entered no valid calculators don't execute command @@ -143,12 +237,12 @@ int SummaryCommand::execute(){ //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. set processedLabels; - set userLabels = globaldata->labels; - set userLines = globaldata->lines; + set userLabels = labels; + set userLines = lines; - while((sabund != NULL) && ((globaldata->allLines == 1) || (userLabels.size() != 0) || (userLines.size() != 0))) { + while((sabund != NULL) && ((allLines == 1) || (userLabels.size() != 0) || (userLines.size() != 0))) { - if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(sabund->getLabel()) == 1){ + if(allLines == 1 || lines.count(count) == 1 || labels.count(sabund->getLabel()) == 1){ cout << sabund->getLabel() << '\t' << count << endl; processedLabels.insert(sabund->getLabel()); diff --git a/summarycommand.h b/summarycommand.h index 135c402..895f9d7 100644 --- a/summarycommand.h +++ b/summarycommand.h @@ -33,9 +33,10 @@ class GlobalData; class SummaryCommand : public Command { public: - SummaryCommand(); + SummaryCommand(string); ~SummaryCommand(); int execute(); + void help(); private: GlobalData* globaldata; @@ -47,5 +48,14 @@ private: string outputFileName; ofstream outputFileHandle; int abund, size; + OptionParser* parser; + map parameters; + map::iterator it; + bool abort, allLines; + set lines; //hold lines to be used + set labels; //holds labels to be used + string line, label, calc; + vector Estimators; + }; #endif diff --git a/summarysharedcommand.cpp b/summarysharedcommand.cpp index 6a09ada..363506c 100644 --- a/summarysharedcommand.cpp +++ b/summarysharedcommand.cpp @@ -35,65 +35,140 @@ //********************************************************************************************************************** -SummarySharedCommand::SummarySharedCommand(){ +SummarySharedCommand::SummarySharedCommand(string option){ try { globaldata = GlobalData::getInstance(); - outputFileName = ((getRootName(globaldata->inputFileName)) + "shared.summary"); - openOutputFile(outputFileName, outputFileHandle); - format = globaldata->getFormat(); - validCalculator = new ValidCalculators(); - mult = false; + abort = false; + allLines = 1; + lines.clear(); + labels.clear(); + Estimators.clear(); - int i; - for (i=0; iEstimators.size(); i++) { - if (validCalculator->isValidCalculator("sharedsummary", globaldata->Estimators[i]) == true) { - if (globaldata->Estimators[i] == "sharedsobs") { - sumCalculators.push_back(new SharedSobsCS()); - }else if (globaldata->Estimators[i] == "sharedchao") { - sumCalculators.push_back(new SharedChao1()); - }else if (globaldata->Estimators[i] == "sharedace") { - sumCalculators.push_back(new SharedAce()); - }else if (globaldata->Estimators[i] == "jabund") { - sumCalculators.push_back(new JAbund()); - }else if (globaldata->Estimators[i] == "sorabund") { - sumCalculators.push_back(new SorAbund()); - }else if (globaldata->Estimators[i] == "jclass") { - sumCalculators.push_back(new Jclass()); - }else if (globaldata->Estimators[i] == "sorclass") { - sumCalculators.push_back(new SorClass()); - }else if (globaldata->Estimators[i] == "jest") { - sumCalculators.push_back(new Jest()); - }else if (globaldata->Estimators[i] == "sorest") { - sumCalculators.push_back(new SorEst()); - }else if (globaldata->Estimators[i] == "thetayc") { - sumCalculators.push_back(new ThetaYC()); - }else if (globaldata->Estimators[i] == "thetan") { - sumCalculators.push_back(new ThetaN()); - }else if (globaldata->Estimators[i] == "kstest") { - sumCalculators.push_back(new KSTest()); - }else if (globaldata->Estimators[i] == "sharednseqs") { - sumCalculators.push_back(new SharedNSeqs()); - }else if (globaldata->Estimators[i] == "ochiai") { - sumCalculators.push_back(new Ochiai()); - }else if (globaldata->Estimators[i] == "anderberg") { - sumCalculators.push_back(new Anderberg()); - }else if (globaldata->Estimators[i] == "kulczynski") { - sumCalculators.push_back(new Kulczynski()); - }else if (globaldata->Estimators[i] == "kulczynskicody") { - sumCalculators.push_back(new KulczynskiCody()); - }else if (globaldata->Estimators[i] == "lennon") { - sumCalculators.push_back(new Lennon()); - }else if (globaldata->Estimators[i] == "morisitahorn") { - sumCalculators.push_back(new MorHorn()); - }else if (globaldata->Estimators[i] == "braycurtis") { - sumCalculators.push_back(new BrayCurtis()); - }else if (globaldata->Estimators[i] == "whittaker") { - sumCalculators.push_back(new Whittaker()); + //allow user to run help + if(option == "help") { validCalculator = new ValidCalculators(); help(); abort = true; } + + else { + //valid paramters for this command + string Array[] = {"line","label","calc","groups"}; + vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + + parser = new OptionParser(); + parser->parse(option, parameters); delete parser; + + ValidParameters* validParameter = new ValidParameters(); + + //check to make sure all parameters are valid for command + for (it = parameters.begin(); it != parameters.end(); it++) { + if (validParameter->isValidParameter(it->first, myArray, it->second) != true) { abort = true; } + } + + //make sure the user has already run the read.otu command + if (globaldata->getSharedFile() == "") { + if (globaldata->getListFile() == "") { cout << "You must read a list and a group, or a shared before you can use the summary.shared command." << endl; abort = true; } + else if (globaldata->getGroupFile() == "") { cout << "You must read a list and a group, or a shared before you can use the summary.shared command." << endl; abort = true; } + } + + + //check for optional parameter and set defaults + // ...at some point should added some additional type checking... + line = validParameter->validFile(parameters, "line", false); + if (line == "not found") { line = ""; } + else { + if(line != "all") { splitAtDash(line, lines); allLines = 0; } + else { allLines = 1; } + } + + label = validParameter->validFile(parameters, "label", false); + if (label == "not found") { label = ""; } + else { + if(label != "all") { splitAtDash(label, labels); allLines = 0; } + else { allLines = 1; } + } + + //make sure user did not use both the line and label parameters + if ((line != "") && (label != "")) { cout << "You cannot use both the line and label parameters at the same time. " << endl; abort = true; } + //if the user has not specified any line or labels use the ones from read.otu + else if((line == "") && (label == "")) { + allLines = globaldata->allLines; + labels = globaldata->labels; + lines = globaldata->lines; + } + + calc = validParameter->validFile(parameters, "calc", false); + if (calc == "not found") { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan"; } + else { + if (calc == "default") { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan"; } + } + splitAtDash(calc, Estimators); + + groups = validParameter->validFile(parameters, "groups", false); + if (groups == "not found") { groups = ""; } + else { + splitAtDash(groups, Groups); + globaldata->Groups = Groups; + } + + delete validParameter; + + if (abort == false) { + + validCalculator = new ValidCalculators(); + int i; + + for (i=0; iisValidCalculator("sharedsummary", Estimators[i]) == true) { + if (Estimators[i] == "sharedsobs") { + sumCalculators.push_back(new SharedSobsCS()); + }else if (Estimators[i] == "sharedchao") { + sumCalculators.push_back(new SharedChao1()); + }else if (Estimators[i] == "sharedace") { + sumCalculators.push_back(new SharedAce()); + }else if (Estimators[i] == "jabund") { + sumCalculators.push_back(new JAbund()); + }else if (Estimators[i] == "sorabund") { + sumCalculators.push_back(new SorAbund()); + }else if (Estimators[i] == "jclass") { + sumCalculators.push_back(new Jclass()); + }else if (Estimators[i] == "sorclass") { + sumCalculators.push_back(new SorClass()); + }else if (Estimators[i] == "jest") { + sumCalculators.push_back(new Jest()); + }else if (Estimators[i] == "sorest") { + sumCalculators.push_back(new SorEst()); + }else if (Estimators[i] == "thetayc") { + sumCalculators.push_back(new ThetaYC()); + }else if (Estimators[i] == "thetan") { + sumCalculators.push_back(new ThetaN()); + }else if (Estimators[i] == "kstest") { + sumCalculators.push_back(new KSTest()); + }else if (Estimators[i] == "sharednseqs") { + sumCalculators.push_back(new SharedNSeqs()); + }else if (Estimators[i] == "ochiai") { + sumCalculators.push_back(new Ochiai()); + }else if (Estimators[i] == "anderberg") { + sumCalculators.push_back(new Anderberg()); + }else if (Estimators[i] == "kulczynski") { + sumCalculators.push_back(new Kulczynski()); + }else if (Estimators[i] == "kulczynskicody") { + sumCalculators.push_back(new KulczynskiCody()); + }else if (Estimators[i] == "lennon") { + sumCalculators.push_back(new Lennon()); + }else if (Estimators[i] == "morisitahorn") { + sumCalculators.push_back(new MorHorn()); + }else if (Estimators[i] == "braycurtis") { + sumCalculators.push_back(new BrayCurtis()); + }else if (Estimators[i] == "whittaker") { + sumCalculators.push_back(new Whittaker()); + } + } } + + outputFileName = ((getRootName(globaldata->inputFileName)) + "shared.summary"); + openOutputFile(outputFileName, outputFileHandle); + format = globaldata->getFormat(); + mult = false; } } - //reset calc for next command - globaldata->setCalc(""); } catch(exception& e) { @@ -105,17 +180,48 @@ SummarySharedCommand::SummarySharedCommand(){ exit(1); } } + +//********************************************************************************************************************** + +void SummarySharedCommand::help(){ + try { + cout << "The summary.shared command can only be executed after a successful read.otu command." << "\n"; + cout << "The summary.shared command parameters are label, line and calc. No parameters are required, but you may not use " << "\n"; + cout << "both the line and label parameters at the same time. The summary.shared command should be in the following format: " << "\n"; + cout << "summary.shared(label=yourLabel, line=yourLines, calc=yourEstimators, groups=yourGroups)." << "\n"; + cout << "Example summary.shared(label=unique-.01-.03, line=0,5,10, groups=B-C, calc=sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan)." << "\n"; + validCalculator->printCalc("sharedsummary", cout); + cout << "The default value for calc is sharedsobs-sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan" << "\n"; + cout << "The default value for groups is all the groups in your groupfile." << "\n"; + cout << "The label and line parameters are used to analyze specific lines in your input." << "\n"; + cout << "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 2 valid groups." << "\n"; + cout << "Note: No spaces between parameter labels (i.e. line), '=' and parameters (i.e.yourLines)." << "\n" << "\n"; + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the SummarySharedCommand class Function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the SummarySharedCommand class function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} + //********************************************************************************************************************** SummarySharedCommand::~SummarySharedCommand(){ delete input; delete read; + delete validCalculator; } //********************************************************************************************************************** int SummarySharedCommand::execute(){ try { + + if (abort == true) { return 0; } + int count = 1; //if the users entered no valid calculators don't execute command @@ -173,13 +279,13 @@ int SummarySharedCommand::execute(){ //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. set processedLabels; - set userLabels = globaldata->labels; - set userLines = globaldata->lines; + set userLabels = labels; + set userLines = lines; //as long as you are not at the end of the file or done wih the lines you want - while((lookup[0] != NULL) && ((globaldata->allLines == 1) || (userLabels.size() != 0) || (userLines.size() != 0))) { + while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0) || (userLines.size() != 0))) { - if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(lookup[0]->getLabel()) == 1){ + if(allLines == 1 || lines.count(count) == 1 || labels.count(lookup[0]->getLabel()) == 1){ cout << lookup[0]->getLabel() << '\t' << count << endl; process(lookup); @@ -228,7 +334,7 @@ int SummarySharedCommand::execute(){ for (int i = 0; i < lastLookup.size(); i++) { delete lastLookup[i]; } //reset groups parameter - globaldata->Groups.clear(); globaldata->setGroups(""); + globaldata->Groups.clear(); //close files outputFileHandle.close(); diff --git a/summarysharedcommand.h b/summarysharedcommand.h index 0f2f753..515c776 100644 --- a/summarysharedcommand.h +++ b/summarysharedcommand.h @@ -21,13 +21,12 @@ /*The summary.shared() command The summary.shared command can only be executed after a successful read.shared command. It outputs a file for each estimator you choose to use. The summary.shared command parameters are label, - line, jumble and sharedsummary. No parameters are required, but you may not use both the line and label parameters at the same time. + line and sharedsummary. No parameters are required, but you may not use both the line and label parameters at the same time. The summary.shared command should be in the following format: summary.shared(label=yourLabel, - line=yourLines, jumble=yourJumble, sharedsummary=yourEstimators). - Example summary.shared(label=unique-.01-.03, line=0,5,10, jumble=1, sharedsummary=sharedChao-sharedAce-sharedJabund + line=yourLines, sharedsummary=yourEstimators). + Example summary.shared(label=unique-.01-.03, line=0,5,10, sharedsummary=sharedChao-sharedAce-sharedJabund -sharedSorensonAbund-sharedJclass-sharedSorClass-sharedJest-sharedSorEst-SharedThetaYC-SharedThetaN). - The default value for jumble is 0 (meaning don’t jumble, if it’s set to 1 then it will jumble) and - sharedsummary is sharedChao-sharedAce-sharedJabund-sharedSorensonAbund-sharedJclass-sharedSorClass-sharedJest-sharedSorEst-SharedThetaYC-SharedThetaN. + The default value for sharedsummary is sharedChao-sharedAce-sharedJabund-sharedSorensonAbund-sharedJclass-sharedSorClass-sharedJest-sharedSorEst-SharedThetaYC-SharedThetaN. The valid sharedsummary estimators are: sharedChao-sharedAce-sharedJabund-sharedSorensonAbund-sharedJclass-sharedSorClass -sharedJest-sharedSorEst-SharedThetaYC-SharedThetaN. The label and line parameters are used to analyze specific lines in your input. */ @@ -38,9 +37,10 @@ class GlobalData; class SummarySharedCommand : public Command { public: - SummarySharedCommand(); + SummarySharedCommand(string); ~SummarySharedCommand(); int execute(); + void help(); private: GlobalData* globaldata; @@ -49,10 +49,17 @@ private: InputData* input; ValidCalculators* validCalculator; SharedListVector* SharedList; + OptionParser* parser; + map parameters; + map::iterator it; + bool abort, allLines, mult; + set lines; //hold lines to be used + set labels; //holds labels to be used + string line, label, calc, groups; + vector Estimators, Groups; vector lookup; string outputFileName, format, outAllFileName; ofstream outputFileHandle, outAll; - bool mult; void process(vector); }; diff --git a/tree.cpp b/tree.cpp index 30df409..656d4cc 100644 --- a/tree.cpp +++ b/tree.cpp @@ -15,6 +15,8 @@ Tree::Tree() { try { globaldata = GlobalData::getInstance(); + if (globaldata->runParse == true) { parseTreeFile(); globaldata->runParse = false; } + numLeaves = globaldata->Treenames.size(); numNodes = 2*numLeaves - 1; @@ -595,4 +597,142 @@ void Tree::printTree() { /*****************************************************************/ +void Tree::parseTreeFile() { + + //only takes names from the first tree and assumes that all trees use the same names. + try { + string filename = globaldata->getTreeFile(); + ifstream filehandle; + openInputFile(filename, filehandle); + int c, comment; + comment = 0; + + //ifyou are not a nexus file + if((c = filehandle.peek()) != '#') { + while((c = filehandle.peek()) != ';') { + while ((c = filehandle.peek()) != ';') { + // get past comments + if(c == '[') { + comment = 1; + } + if(c == ']'){ + comment = 0; + } + if((c == '(') && (comment != 1)){ break; } + filehandle.get(); + } + + readTreeString(filehandle); + } + //ifyou are a nexus file + }else if((c = filehandle.peek()) == '#') { + string holder = ""; + + // get past comments + while(holder != "translate" && holder != "Translate"){ + if(holder == "[" || holder == "[!"){ + comment = 1; + } + if(holder == "]"){ + comment = 0; + } + filehandle >> holder; + + //ifthere is no translate then you must read tree string otherwise use translate to get names + if(holder == "tree" && comment != 1){ + //pass over the "tree rep.6878900 = " + while (((c = filehandle.get()) != '(') && ((c = filehandle.peek()) != EOF)) {;} + + if(c == EOF) { break; } + filehandle.putback(c); //put back first ( of tree. + readTreeString(filehandle); + break; + } + } + + //use nexus translation rather than parsing tree to save time + if((holder == "translate") || (holder == "Translate")) { + + string number, name, h; + h = ""; // so it enters the loop the first time + while((h != ";") && (number != ";")) { + filehandle >> number; + filehandle >> name; + + //c = , until done with translation then c = ; + h = name.substr(name.length()-1, name.length()); + name.erase(name.end()-1); //erase the comma + globaldata->Treenames.push_back(number); + } + if(number == ";") { globaldata->Treenames.pop_back(); } //in case ';' from translation is on next line instead of next to last name + } + } + + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function parseTreeFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the Tree class function parseTreeFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} +/*******************************************************/ + +/*******************************************************/ +void Tree::readTreeString(ifstream& filehandle) { + try { + int c; + string name; //k + + while((c = filehandle.peek()) != ';') { + //ifyou are a name + if((c != '(') && (c != ')') && (c != ',') && (c != ':') && (c != '\n') && (c != '\t') && (c != 32)) { //32 is space + name = ""; + c = filehandle.get(); + // k = c; +//cout << k << endl; + while ((c != '(') && (c != ')') && (c != ',') && (c != ':') && (c != '\n') && (c != 32) && (c != '\t')) { + name += c; + c = filehandle.get(); + // k = c; +//cout << " in name while " << k << endl; + } + +//cout << "name = " << name << endl; + globaldata->Treenames.push_back(name); + filehandle.putback(c); +//k = c; +//cout << " after putback" << k << endl; + } + + if(c == ':') { //read until you reach the end of the branch length + while ((c != '(') && (c != ')') && (c != ',') && (c != ';') && (c != '\n') && (c != '\t') && (c != 32)) { + c = filehandle.get(); + // k = c; + //cout << " in branch while " << k << endl; + } + filehandle.putback(c); + } + c = filehandle.get(); + if(c == ';') { break; } + // k = c; +//cout << k << endl; + + } + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function parseTreeFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the Tree class function parseTreeFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} + +/*******************************************************/ + +/*******************************************************/ diff --git a/tree.h b/tree.h index 4f77e50..b6d5ea8 100644 --- a/tree.h +++ b/tree.h @@ -20,7 +20,6 @@ public: Tree(); //to generate a tree from a file ~Tree(); - void getCopy(Tree*); //makes tree a copy of the one passed in. void assembleRandomTree(); void assembleRandomUnifracTree(vector); @@ -55,6 +54,11 @@ private: void randomLabels(vector); void randomLabels(string, string); void printBranch(int, ostream&, string); //recursively print out tree + void parseTreeFile(); //parses through tree file to find names of nodes and number of them + //this is required in case user has sequences in the names file that are + //not included in the tree. + //only takes names from the first tree in the tree file and assumes that all trees use the same names. + void readTreeString(ifstream&); }; #endif diff --git a/treegroupscommand.cpp b/treegroupscommand.cpp index 850df46..7c78a34 100644 --- a/treegroupscommand.cpp +++ b/treegroupscommand.cpp @@ -22,43 +22,144 @@ //********************************************************************************************************************** -TreeGroupCommand::TreeGroupCommand(){ +TreeGroupCommand::TreeGroupCommand(string option){ try { globaldata = GlobalData::getInstance(); - format = globaldata->getFormat(); - validCalculator = new ValidCalculators(); + abort = false; + allLines = 1; + lines.clear(); + labels.clear(); + Groups.clear(); + Estimators.clear(); - if (format == "sharedfile") { - int i; - for (i=0; iEstimators.size(); i++) { - if (validCalculator->isValidCalculator("treegroup", globaldata->Estimators[i]) == true) { - if (globaldata->Estimators[i] == "jabund") { - treeCalculators.push_back(new JAbund()); - }else if (globaldata->Estimators[i] == "sorabund") { - treeCalculators.push_back(new SorAbund()); - }else if (globaldata->Estimators[i] == "jclass") { - treeCalculators.push_back(new Jclass()); - }else if (globaldata->Estimators[i] == "sorclass") { - treeCalculators.push_back(new SorClass()); - }else if (globaldata->Estimators[i] == "jest") { - treeCalculators.push_back(new Jest()); - }else if (globaldata->Estimators[i] == "sorest") { - treeCalculators.push_back(new SorEst()); - }else if (globaldata->Estimators[i] == "thetayc") { - treeCalculators.push_back(new ThetaYC()); - }else if (globaldata->Estimators[i] == "thetan") { - treeCalculators.push_back(new ThetaN()); - }else if (globaldata->Estimators[i] == "morisitahorn") { - treeCalculators.push_back(new MorHorn()); - }else if (globaldata->Estimators[i] == "braycurtis") { - treeCalculators.push_back(new BrayCurtis()); + //allow user to run help + if(option == "help") { validCalculator = new ValidCalculators(); help(); abort = true; } + + else { + //valid paramters for this command + string Array[] = {"line","label","calc","groups", "phylip", "column", "name", "precision","cutoff"}; + vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + + parser = new OptionParser(); + parser->parse(option, parameters); delete parser; + + ValidParameters* validParameter = new ValidParameters(); + + //check to make sure all parameters are valid for command + for (it = parameters.begin(); it != parameters.end(); it++) { + if (validParameter->isValidParameter(it->first, myArray, it->second) != true) { abort = true; } + } + + //required parameters + phylipfile = validParameter->validFile(parameters, "phylip", true); + if (phylipfile == "not open") { abort = true; } + else if (phylipfile == "not found") { phylipfile = ""; } + else { globaldata->setPhylipFile(phylipfile); globaldata->setFormat("phylip"); } + + columnfile = validParameter->validFile(parameters, "column", true); + if (columnfile == "not open") { abort = true; } + else if (columnfile == "not found") { columnfile = ""; } + else { globaldata->setColumnFile(columnfile); globaldata->setFormat("column"); } + + namefile = validParameter->validFile(parameters, "name", true); + if (namefile == "not open") { abort = true; } + else if (namefile == "not found") { namefile = ""; } + else { globaldata->setNameFile(namefile); } + + format = globaldata->getFormat(); + + //error checking on files + if ((globaldata->getSharedFile() == "") && ((phylipfile == "") && (columnfile == ""))) { cout << "You must run the read.otu command or provide a distance file before running the tree.shared command." << endl; abort = true; } + else if ((phylipfile != "") && (columnfile != "")) { cout << "When running the tree.shared command with a distance file you may not use both the column and the phylip parameters." << endl; abort = true; } + + if (columnfile != "") { + if (namefile == "") { cout << "You need to provide a namefile if you are going to use the column format." << endl; abort = true; } + } + + //check for optional parameter and set defaults + // ...at some point should added some additional type checking... + line = validParameter->validFile(parameters, "line", false); + if (line == "not found") { line = ""; } + else { + if(line != "all") { splitAtDash(line, lines); allLines = 0; } + else { allLines = 1; } + } + + label = validParameter->validFile(parameters, "label", false); + if (label == "not found") { label = ""; } + else { + if(label != "all") { splitAtDash(label, labels); allLines = 0; } + else { allLines = 1; } + } + + //make sure user did not use both the line and label parameters + if ((line != "") && (label != "")) { cout << "You cannot use both the line and label parameters at the same time. " << endl; abort = true; } + //if the user has not specified any line or labels use the ones from read.otu + else if((line == "") && (label == "")) { + allLines = globaldata->allLines; + labels = globaldata->labels; + lines = globaldata->lines; + } + + groups = validParameter->validFile(parameters, "groups", false); + if (groups == "not found") { groups = ""; } + else { + splitAtDash(groups, Groups); + globaldata->Groups = Groups; + } + + calc = validParameter->validFile(parameters, "calc", false); + if (calc == "not found") { calc = "jclass-thetayc"; } + else { + if (calc == "default") { calc = "jclass-thetayc"; } + } + splitAtDash(calc, Estimators); + + string temp; + temp = validParameter->validFile(parameters, "precision", false); if (temp == "not found") { temp = "100"; } + convert(temp, precision); + + temp = validParameter->validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "10"; } + convert(temp, cutoff); + cutoff += (5 / (precision * 10.0)); + + + delete validParameter; + + if (abort == false) { + + validCalculator = new ValidCalculators(); + + if (format == "sharedfile") { + int i; + for (i=0; iisValidCalculator("treegroup", Estimators[i]) == true) { + if (Estimators[i] == "jabund") { + treeCalculators.push_back(new JAbund()); + }else if (Estimators[i] == "sorabund") { + treeCalculators.push_back(new SorAbund()); + }else if (Estimators[i] == "jclass") { + treeCalculators.push_back(new Jclass()); + }else if (Estimators[i] == "sorclass") { + treeCalculators.push_back(new SorClass()); + }else if (Estimators[i] == "jest") { + treeCalculators.push_back(new Jest()); + }else if (Estimators[i] == "sorest") { + treeCalculators.push_back(new SorEst()); + }else if (Estimators[i] == "thetayc") { + treeCalculators.push_back(new ThetaYC()); + }else if (Estimators[i] == "thetan") { + treeCalculators.push_back(new ThetaN()); + }else if (Estimators[i] == "morisitahorn") { + treeCalculators.push_back(new MorHorn()); + }else if (Estimators[i] == "braycurtis") { + treeCalculators.push_back(new BrayCurtis()); + } + } } } - } + } } - - //reset calc for next command - globaldata->setCalc(""); } catch(exception& e) { @@ -70,6 +171,39 @@ TreeGroupCommand::TreeGroupCommand(){ exit(1); } } + +//********************************************************************************************************************** + +void TreeGroupCommand::help(){ + try { + cout << "The tree.shared command creates a .tre to represent the similiarity between groups or sequences." << "\n"; + cout << "The tree.shared command can only be executed after a successful read.otu command or by providing a distance file." << "\n"; + cout << "The tree.shared command parameters are groups, calc, phylip, column, name, cutoff, precision, line and label. You may not use line and label at the same time." << "\n"; + cout << "The groups parameter allows you to specify which of the groups in your groupfile you would like included used." << "\n"; + cout << "The group names are separated by dashes. The line and label allow you to select what distance levels you would like trees created for, and are also separated by dashes." << "\n"; + cout << "The phylip or column parameter are required if you do not run the read.otu command first, and only one may be used. If you use a column file the name filename is required. " << "\n"; + cout << "If you do not provide a cutoff value 10.00 is assumed. If you do not provide a precision value then 100 is assumed." << "\n"; + cout << "The tree.shared command should be in the following format: tree.shared(groups=yourGroups, calc=yourCalcs, line=yourLines, label=yourLabels)." << "\n"; + cout << "Example tree.shared(groups=A-B-C, line=1-3-5, calc=jabund-sorabund)." << "\n"; + cout << "The default value for groups is all the groups in your groupfile." << "\n"; + cout << "The default value for calc is jclass-thetayc." << "\n"; + cout << "The tree.shared command outputs a .tre file for each calculator you specify at each distance you choose." << "\n"; + validCalculator->printCalc("treegroup", cout); + cout << "Or the tree.shared command can be in the following format: tree.shared(phylip=yourPhylipFile)." << "\n"; + cout << "Example tree.shared(phylip=abrecovery.dist)." << "\n"; + cout << "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups)." << "\n" << "\n"; + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the TreeGroupCommand class Function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the TreeGroupCommand class function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} + + //********************************************************************************************************************** TreeGroupCommand::~TreeGroupCommand(){ @@ -77,6 +211,7 @@ TreeGroupCommand::~TreeGroupCommand(){ if (format == "sharedfile") {delete read;} else { delete readMatrix; delete matrix; delete list; } delete tmap; + delete validCalculator; } @@ -84,6 +219,9 @@ TreeGroupCommand::~TreeGroupCommand(){ int TreeGroupCommand::execute(){ try { + + if (abort == true) { return 0; } + if (format == "sharedfile") { //if the users entered no valid calculators don't execute command if (treeCalculators.size() == 0) { cout << "You have given no valid calculators." << endl; return 0; } @@ -107,18 +245,10 @@ int TreeGroupCommand::execute(){ if (format == "column") { readMatrix = new ReadColumnMatrix(filename); } else if (format == "phylip") { readMatrix = new ReadPhylipMatrix(filename); } - if(globaldata->getPrecision() != ""){ - convert(globaldata->getPrecision(), precision); - } - - if(globaldata->getCutOff() != ""){ - convert(globaldata->getCutOff(), cutoff); - cutoff += (5 / (precision * 10.0)); - } readMatrix->setCutoff(cutoff); - if(globaldata->getNameFile() != ""){ - nameMap = new NameAssignment(globaldata->getNameFile()); + if(namefile != ""){ + nameMap = new NameAssignment(namefile); nameMap->readMap(1,2); } else{ @@ -148,10 +278,11 @@ int TreeGroupCommand::execute(){ outputFile = getRootName(globaldata->inputFileName) + "tre"; createTree(); + cout << "Tree complete. " << endl; } //reset groups parameter - globaldata->Groups.clear(); globaldata->setGroups(""); + globaldata->Groups.clear(); return 0; } @@ -328,13 +459,13 @@ void TreeGroupCommand::makeSimsShared() { globaldata->gTreemap = tmap; set processedLabels; - set userLabels = globaldata->labels; - set userLines = globaldata->lines; + set userLabels = labels; + set userLines = lines; //as long as you are not at the end of the file or done wih the lines you want - while((lookup[0] != NULL) && ((globaldata->allLines == 1) || (userLabels.size() != 0) || (userLines.size() != 0))) { + while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0) || (userLines.size() != 0))) { - if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(lookup[0]->getLabel()) == 1){ + if(allLines == 1 || lines.count(count) == 1 || labels.count(lookup[0]->getLabel()) == 1){ cout << lookup[0]->getLabel() << '\t' << count << endl; process(lookup); @@ -397,7 +528,7 @@ void TreeGroupCommand::process(vector thisLookup) { try{ EstOutput data; vector subset; - numGroups = globaldata->Groups.size(); + numGroups = thisLookup.size(); //for each calculator for(int i = 0 ; i < treeCalculators.size(); i++) { diff --git a/treegroupscommand.h b/treegroupscommand.h index 989c699..76124f5 100644 --- a/treegroupscommand.h +++ b/treegroupscommand.h @@ -34,9 +34,10 @@ typedef list::iterator MatData; class TreeGroupCommand : public Command { public: - TreeGroupCommand(); + TreeGroupCommand(string); ~TreeGroupCommand(); int execute(); + void help(); private: void createTree(); @@ -63,8 +64,19 @@ private: int numGroups; ofstream out; float precision, cutoff; + OptionParser* parser; + map parameters; + map::iterator it; + bool abort, allLines; + set lines; //hold lines to be used + set labels; //holds labels to be used + string phylipfile, columnfile, namefile, calc, groups, line, label; + vector Estimators, Groups; //holds estimators to be used + //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. void process(vector); + + }; diff --git a/trimseqscommand.cpp b/trimseqscommand.cpp index 259adc1..77da3a6 100644 --- a/trimseqscommand.cpp +++ b/trimseqscommand.cpp @@ -11,33 +11,63 @@ //*************************************************************************************************************** -TrimSeqsCommand::TrimSeqsCommand(){ +TrimSeqsCommand::TrimSeqsCommand(string option){ try { globaldata = GlobalData::getInstance(); + abort = false; - oligos = 0; - - if(globaldata->getFastaFile() == ""){ - cout << "you need to at least enter a fasta file name" << endl; - } + //allow user to run help + if(option == "help") { help(); abort = true; } - if(isTrue(globaldata->getFlip())) { flip = 1; } - if(globaldata->getOligosFile() != "") { oligos = 1; } - - if(globaldata->getMaxAmbig() != "-1") { maxAmbig = atoi(globaldata->getMaxAmbig().c_str()); } - else { maxAmbig = -1; } + else { + //valid paramters for this command + string AlignArray[] = {"fasta", "flip", "oligos", "maxambig", "maxhomop", "minlength", "maxlength"}; + vector myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string))); + + parser = new OptionParser(); + parser->parse(option, parameters); delete parser; + + ValidParameters* validParameter = new ValidParameters(); - if(globaldata->getMaxHomoPolymer() != "-1") { maxHomoP = atoi(globaldata->getMaxHomoPolymer().c_str()); } - else { maxHomoP = 0; } + //check to make sure all parameters are valid for command + for (it = parameters.begin(); it != parameters.end(); it++) { + if (validParameter->isValidParameter(it->first, myArray, it->second) != true) { abort = true; } + } + + //check for required parameters + fastafile = validParameter->validFile(parameters, "fasta", true); + if (fastafile == "not found") { cout << "fasta is a required parameter for the screen.seqs command." << endl; abort = true; } + else if (fastafile == "not open") { abort = true; } + else { globaldata->setFastaFile(fastafile); } - if(globaldata->getMinLength() != "-1") { minLength = atoi(globaldata->getMinLength().c_str()); } - else { minLength = 0; } - if(globaldata->getMaxLength() != "-1") { maxLength = atoi(globaldata->getMaxLength().c_str()); } - else { maxLength = 0; } + //check for optional parameter and set defaults + // ...at some point should added some additional type checking... + string temp; + temp = validParameter->validFile(parameters, "flip", false); if (temp == "not found") { temp = "0"; } + if(isTrue(temp)) { flip = 1; } - if(!flip && !oligos && !maxLength && !minLength && (maxAmbig==-1) && !maxHomoP ){ cout << "huh?" << endl; } + temp = validParameter->validFile(parameters, "oligos", false); if (temp == "not found") { temp = ""; } + if(temp != "") { oligos = 1; } + else { oligos = 0; } + + temp = validParameter->validFile(parameters, "maxambig", false); if (temp == "not found") { temp = "-1"; } + convert(temp, maxAmbig); + + temp = validParameter->validFile(parameters, "maxhomop", false); if (temp == "not found") { temp = "0"; } + convert(temp, maxHomoP); + + temp = validParameter->validFile(parameters, "minlength", false); if (temp == "not found") { temp = "0"; } + convert(temp, minLength); + + temp = validParameter->validFile(parameters, "maxlength", false); if (temp == "not found") { temp = "0"; } + convert(temp, maxLength); + + if(!flip && !oligos && !maxLength && !minLength && (maxAmbig==-1) && !maxHomoP ){ cout << "huh?" << endl; } + + delete validParameter; + } } catch(exception& e) { @@ -49,6 +79,36 @@ TrimSeqsCommand::TrimSeqsCommand(){ exit(1); } } +//********************************************************************************************************************** + +void TrimSeqsCommand::help(){ + try { + cout << "The trim.seqs command reads a fastafile and creates ....." << "\n"; + cout << "The trim.seqs command parameters are fasta, flip, oligos, maxambig, maxhomop, minlength and maxlength." << "\n"; + cout << "The fasta parameter is required." << "\n"; + cout << "The flip parameter .... The default is 0." << "\n"; + cout << "The oligos parameter .... The default is ""." << "\n"; + cout << "The maxambig parameter .... The default is -1." << "\n"; + cout << "The maxhomop parameter .... The default is 0." << "\n"; + cout << "The minlength parameter .... The default is 0." << "\n"; + cout << "The maxlength parameter .... The default is 0." << "\n"; + cout << "The trim.seqs command should be in the following format: " << "\n"; + cout << "trim.seqs(fasta=yourFastaFile, flip=yourFlip, oligos=yourOligos, maxambig=yourMaxambig, " << "\n"; + cout << "maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength) " << "\n"; + cout << "Example trim.seqs(fasta=abrecovery.fasta, flip=..., oligos=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...)." << "\n"; + cout << "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta)." << "\n" << "\n"; + + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the TrimSeqsCommand class Function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the TrimSeqsCommand class function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} + //*************************************************************************************************************** @@ -58,21 +118,24 @@ TrimSeqsCommand::~TrimSeqsCommand(){ /* do nothing */ } int TrimSeqsCommand::execute(){ try{ + + if (abort == true) { return 0; } + getOligos(); ifstream inFASTA; - openInputFile(globaldata->getFastaFile(), inFASTA); + openInputFile(fastafile, inFASTA); ofstream outFASTA; - string trimSeqFile = getRootName(globaldata->getFastaFile()) + "trim.fasta"; + string trimSeqFile = getRootName(fastafile) + "trim.fasta"; openOutputFile(trimSeqFile, outFASTA); ofstream outGroups; - string groupFile = getRootName(globaldata->getFastaFile()) + "groups"; + string groupFile = getRootName(fastafile) + "groups"; openOutputFile(groupFile, outGroups); ofstream scrapFASTA; - string scrapSeqFile = getRootName(globaldata->getFastaFile()) + "scrap.fasta"; + string scrapSeqFile = getRootName(fastafile) + "scrap.fasta"; openOutputFile(scrapSeqFile, scrapFASTA); bool success; @@ -144,7 +207,7 @@ int TrimSeqsCommand::execute(){ void TrimSeqsCommand::getOligos(){ ifstream inOligos; - openInputFile(globaldata->getOligosFile(), inOligos); + //openInputFile(globaldata->getOligosFile(), inOligos); string type, oligo, group; diff --git a/trimseqscommand.h b/trimseqscommand.h index 800ec3d..03ee1f5 100644 --- a/trimseqscommand.h +++ b/trimseqscommand.h @@ -17,9 +17,10 @@ class TrimSeqsCommand : public Command { public: - TrimSeqsCommand(); + TrimSeqsCommand(string); ~TrimSeqsCommand(); int execute(); + void help(); private: void getOligos(); @@ -31,7 +32,11 @@ private: bool cullAmbigs(Sequence&); GlobalData* globaldata; - + OptionParser* parser; + map parameters; + map::iterator it; + bool abort; + string fastafile; bool oligos, flip; int numFPrimers, numRPrimers, maxAmbig, maxHomoP, minLength, maxLength; vector forPrimer, revPrimer; diff --git a/unifracunweightedcommand.cpp b/unifracunweightedcommand.cpp index fcff253..bc8eb47 100644 --- a/unifracunweightedcommand.cpp +++ b/unifracunweightedcommand.cpp @@ -10,25 +10,65 @@ #include "unifracunweightedcommand.h" /***********************************************************/ -UnifracUnweightedCommand::UnifracUnweightedCommand() { +UnifracUnweightedCommand::UnifracUnweightedCommand(string option) { try { globaldata = GlobalData::getInstance(); + abort = false; + Groups.clear(); - T = globaldata->gTree; - tmap = globaldata->gTreemap; - sumFile = globaldata->getTreeFile() + ".uwsummary"; - openOutputFile(sumFile, outSum); - - util = new SharedUtil(); - util->setGroups(globaldata->Groups, tmap->namesOfGroups, allGroups, numGroups, "unweighted"); //sets the groups the user wants to analyze - util->getCombos(groupComb, globaldata->Groups, numComp); - globaldata->setGroups(""); + //allow user to run help + if(option == "help") { help(); abort = true; } - if (numGroups == 1) { numComp++; groupComb.push_back(allGroups); } + else { + //valid paramters for this command + string Array[] = {"groups","iters"}; + vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + + parser = new OptionParser(); + parser->parse(option, parameters); delete parser; + + ValidParameters* validParameter = new ValidParameters(); + + //check to make sure all parameters are valid for command + for (it4 = parameters.begin(); it4 != parameters.end(); it4++) { + if (validParameter->isValidParameter(it4->first, myArray, it4->second) != true) { abort = true; } + } + + if (globaldata->gTree.size() == 0) {//no trees were read + cout << "You must execute the read.tree command, before you may execute the unifrac.unweighted command." << endl; abort = true; } + + //check for optional parameter and set defaults + // ...at some point should added some additional type checking... + groups = validParameter->validFile(parameters, "groups", false); + if (groups == "not found") { groups = ""; } + else { + splitAtDash(groups, Groups); + globaldata->Groups = Groups; + } - convert(globaldata->getIters(), iters); //how many random trees to generate - unweighted = new Unweighted(tmap); - + itersString = validParameter->validFile(parameters, "iters", false); if (itersString == "not found") { itersString = "1000"; } + convert(itersString, iters); + + delete validParameter; + + if (abort == false) { + T = globaldata->gTree; + tmap = globaldata->gTreemap; + sumFile = globaldata->getTreeFile() + ".uwsummary"; + openOutputFile(sumFile, outSum); + + util = new SharedUtil(); + util->setGroups(globaldata->Groups, tmap->namesOfGroups, allGroups, numGroups, "unweighted"); //sets the groups the user wants to analyze + util->getCombos(groupComb, globaldata->Groups, numComp); + + if (numGroups == 1) { numComp++; groupComb.push_back(allGroups); } + + unweighted = new Unweighted(tmap); + + } + + } + } catch(exception& e) { cout << "Standard Error: " << e.what() << " has occurred in the UnifracUnweightedCommand class Function UnifracUnweightedCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; @@ -39,10 +79,38 @@ UnifracUnweightedCommand::UnifracUnweightedCommand() { exit(1); } } + +//********************************************************************************************************************** + +void UnifracUnweightedCommand::help(){ + try { + cout << "The unifrac.unweighted command can only be executed after a successful read.tree command." << "\n"; + cout << "The unifrac.unweighted command parameters are groups and iters. No parameters are required." << "\n"; + cout << "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 1 valid group." << "\n"; + cout << "The group names are separated by dashes. The iters parameter allows you to specify how many random trees you would like compared to your tree." << "\n"; + cout << "The unifrac.unweighted command should be in the following format: unifrac.unweighted(groups=yourGroups, iters=yourIters)." << "\n"; + cout << "Example unifrac.unweighted(groups=A-B-C, iters=500)." << "\n"; + cout << "The default value for groups is all the groups in your groupfile, and iters is 1000." << "\n"; + cout << "The unifrac.unweighted command output two files: .unweighted and .uwsummary their descriptions are in the manual." << "\n"; + cout << "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups)." << "\n" << "\n"; + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the UnifracUnweightedCommand class Function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the UnifracUnweightedCommand class function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} + + /***********************************************************/ int UnifracUnweightedCommand::execute() { try { - + + if (abort == true) { return 0; } + userData.resize(numComp,0); //data[0] = unweightedscore randomData.resize(numComp,0); //data[0] = unweightedscore //create new tree with same num nodes and leaves as users @@ -54,7 +122,7 @@ int UnifracUnweightedCommand::execute() { for (int i = 0; i < T.size(); i++) { counter = 0; - output = new ColumnFile(globaldata->getTreeFile() + toString(i+1) + ".unweighted"); + output = new ColumnFile(globaldata->getTreeFile() + toString(i+1) + ".unweighted", itersString); //get unweighted for users tree rscoreFreq.resize(numComp); @@ -176,11 +244,11 @@ void UnifracUnweightedCommand::printUWSummaryFile(int i) { cout << i+1 << '\t'; if (UWScoreSig[a][0] > (1/(float)iters)) { - outSum << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << '\t' << setprecision(globaldata->getIters().length()) << UWScoreSig[a][0] << endl; - cout << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << '\t' << setprecision(globaldata->getIters().length()) << UWScoreSig[a][0] << endl; + outSum << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << UWScoreSig[a][0] << endl; + cout << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << UWScoreSig[a][0] << endl; }else { - outSum << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << '\t' << setprecision(globaldata->getIters().length()) << "<" << (1/float(iters)) << endl; - cout << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << '\t' << setprecision(globaldata->getIters().length()) << "<" << (1/float(iters)) << endl; + outSum << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl; + cout << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl; } } diff --git a/unifracunweightedcommand.h b/unifracunweightedcommand.h index 0a92538..3c031c6 100644 --- a/unifracunweightedcommand.h +++ b/unifracunweightedcommand.h @@ -22,9 +22,10 @@ class GlobalData; class UnifracUnweightedCommand : public Command { public: - UnifracUnweightedCommand(); + UnifracUnweightedCommand(string); ~UnifracUnweightedCommand() { delete unweighted; delete util; } - int execute(); + int execute(); + void help(); private: GlobalData* globaldata; @@ -46,6 +47,13 @@ class UnifracUnweightedCommand : public Command { map::iterator it2; map::iterator it; + OptionParser* parser; + map parameters; + map::iterator it4; + bool abort; + string groups, itersString; + vector Groups; //holds groups to be used + ofstream outSum, out; ifstream inFile; diff --git a/unifracweightedcommand.cpp b/unifracweightedcommand.cpp index 6dfbd71..a7a41f3 100644 --- a/unifracweightedcommand.cpp +++ b/unifracweightedcommand.cpp @@ -10,24 +10,64 @@ #include "unifracweightedcommand.h" /***********************************************************/ -UnifracWeightedCommand::UnifracWeightedCommand() { +UnifracWeightedCommand::UnifracWeightedCommand(string option) { try { globaldata = GlobalData::getInstance(); + abort = false; + Groups.clear(); - T = globaldata->gTree; - tmap = globaldata->gTreemap; - sumFile = globaldata->getTreeFile() + ".wsummary"; - openOutputFile(sumFile, outSum); + //allow user to run help + if(option == "help") { help(); abort = true; } + + else { + //valid paramters for this command + string Array[] = {"groups","iters"}; + vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + + parser = new OptionParser(); + parser->parse(option, parameters); delete parser; + + ValidParameters* validParameter = new ValidParameters(); + + //check to make sure all parameters are valid for command + for (it4 = parameters.begin(); it4 != parameters.end(); it4++) { + if (validParameter->isValidParameter(it4->first, myArray, it4->second) != true) { abort = true; } + } + + if (globaldata->gTree.size() == 0) {//no trees were read + cout << "You must execute the read.tree command, before you may execute the unifrac.weighted command." << endl; abort = true; } + + //check for optional parameter and set defaults + // ...at some point should added some additional type checking... + groups = validParameter->validFile(parameters, "groups", false); + if (groups == "not found") { groups = ""; } + else { + splitAtDash(groups, Groups); + globaldata->Groups = Groups; + } - util = new SharedUtil(); - string s; //to make work with setgroups - util->setGroups(globaldata->Groups, tmap->namesOfGroups, s, numGroups, "weighted"); //sets the groups the user wants to analyze - util->getCombos(groupComb, globaldata->Groups, numComp); - globaldata->setGroups(""); + itersString = validParameter->validFile(parameters, "iters", false); if (itersString == "not found") { itersString = "1000"; } + convert(itersString, iters); + + delete validParameter; + + if (abort == false) { + T = globaldata->gTree; + tmap = globaldata->gTreemap; + sumFile = globaldata->getTreeFile() + ".wsummary"; + openOutputFile(sumFile, outSum); - convert(globaldata->getIters(), iters); //how many random trees to generate - weighted = new Weighted(tmap); - + util = new SharedUtil(); + string s; //to make work with setgroups + util->setGroups(globaldata->Groups, tmap->namesOfGroups, s, numGroups, "weighted"); //sets the groups the user wants to analyze + util->getCombos(groupComb, globaldata->Groups, numComp); + + weighted = new Weighted(tmap); + + } + } + + } catch(exception& e) { cout << "Standard Error: " << e.what() << " has occurred in the UnifracWeightedCommand class Function UnifracWeightedCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; @@ -38,9 +78,36 @@ UnifracWeightedCommand::UnifracWeightedCommand() { exit(1); } } +//********************************************************************************************************************** + +void UnifracWeightedCommand::help(){ + try { + cout << "The unifrac.weighted command can only be executed after a successful read.tree command." << "\n"; + cout << "The unifrac.weighted command parameters are groups and iters. No parameters are required." << "\n"; + cout << "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 2 valid groups." << "\n"; + cout << "The group names are separated by dashes. The iters parameter allows you to specify how many random trees you would like compared to your tree." << "\n"; + cout << "The unifrac.weighted command should be in the following format: unifrac.weighted(groups=yourGroups, iters=yourIters)." << "\n"; + cout << "Example unifrac.weighted(groups=A-B-C, iters=500)." << "\n"; + cout << "The default value for groups is all the groups in your groupfile, and iters is 1000." << "\n"; + cout << "The unifrac.weighted command output two files: .weighted and .wsummary their descriptions are in the manual." << "\n"; + cout << "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups)." << "\n" << "\n"; + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the UnifracWeightedCommand class Function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the UnifracWeightedCommand class function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} + /***********************************************************/ int UnifracWeightedCommand::execute() { try { + + if (abort == true) { return 0; } + Progress* reading; reading = new Progress("Comparing to random:", iters); @@ -57,7 +124,7 @@ int UnifracWeightedCommand::execute() { rScores.resize(numComp); //data[0] = weightedscore AB, data[1] = weightedscore AC... uScores.resize(numComp); //data[0] = weightedscore AB, data[1] = weightedscore AC... - output = new ColumnFile(globaldata->getTreeFile() + toString(i+1) + ".weighted"); + output = new ColumnFile(globaldata->getTreeFile() + toString(i+1) + ".weighted", itersString); userData = weighted->getValues(T[i]); //userData[0] = weightedscore @@ -189,11 +256,11 @@ void UnifracWeightedCommand::printWSummaryFile() { for (int i = 0; i < T.size(); i++) { for (int j = 0; j < numComp; j++) { if (WScoreSig[count] > (1/(float)iters)) { - outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(globaldata->getIters().length()) << WScoreSig[count] << endl; - cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(globaldata->getIters().length()) << WScoreSig[count] << endl; + outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << WScoreSig[count] << endl; + cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << WScoreSig[count] << endl; }else{ - outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(globaldata->getIters().length()) << "<" << (1/float(iters)) << endl; - cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(globaldata->getIters().length()) << "<" << (1/float(iters)) << endl; + outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl; + cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl; } count++; } diff --git a/unifracweightedcommand.h b/unifracweightedcommand.h index 491d384..2d78d57 100644 --- a/unifracweightedcommand.h +++ b/unifracweightedcommand.h @@ -23,9 +23,10 @@ class GlobalData; class UnifracWeightedCommand : public Command { public: - UnifracWeightedCommand(); + UnifracWeightedCommand(string); ~UnifracWeightedCommand() { delete weighted; delete util; } int execute(); + void help(); private: GlobalData* globaldata; @@ -50,6 +51,14 @@ class UnifracWeightedCommand : public Command { map::iterator it2; map::iterator it; + OptionParser* parser; + map parameters; + map::iterator it4; + bool abort; + string groups, itersString; + vector Groups; //holds groups to be used + + ofstream outSum, out; ifstream inFile; diff --git a/validparameter.cpp b/validparameter.cpp index d6fde63..486316f 100644 --- a/validparameter.cpp +++ b/validparameter.cpp @@ -13,9 +13,7 @@ ValidParameters::ValidParameters() { try { - initCommandParameters(); initParameterRanges(); - } catch(exception& e) { cout << "Standard Error: " << e.what() << " has occurred in the ValidParameters class Function ValidParameters. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; @@ -32,10 +30,10 @@ ValidParameters::ValidParameters() { ValidParameters::~ValidParameters() {} /***********************************************************************/ -bool ValidParameters::isValidParameter(string parameter, string command, string value) { +bool ValidParameters::isValidParameter(string parameter, vector cParams, string value) { try { bool valid = false; - vector cParams = commandParameters[command]; + //vector cParams = commandParameters[command]; int numParams = cParams.size(); for(int i = 0; i < numParams; i++) { if(cParams.at(i).compare(parameter) == 0) { @@ -44,8 +42,8 @@ bool ValidParameters::isValidParameter(string parameter, string command, string } } if(!valid) { - cout << "'" << parameter << "' is not a valid parameter for the " << command << " command.\n"; - cout << "The valid paramters for the " << command << " command are: "; + cout << "'" << parameter << "' is not a valid parameter." << endl; + cout << "The valid parameters are: "; for(int i = 0; i < numParams-1; i++) cout << cParams.at(i) << ", "; cout << "and " << cParams.at(numParams-1) << ".\n"; @@ -202,132 +200,34 @@ bool ValidParameters::isValidParameter(string parameter, string command, string exit(1); } } +/*******************************************************/ -/***********************************************************************/ - -/***********************************************************************/ -void ValidParameters::initCommandParameters() { - try { - //{"parameter1","parameter2",...,"last parameter"}; - - string readdistArray[] = {"phylip","column", "name","cutoff","precision", "group"}; - commandParameters["read.dist"] = addParameters(readdistArray, sizeof(readdistArray)/sizeof(string)); - - string readotuArray[] = {"list","order","shared", "line", "label","group","sabund", "rabund"}; - commandParameters["read.otu"] = addParameters(readotuArray, sizeof(readotuArray)/sizeof(string)); - - string readtreeArray[] = {"tree","group"}; - commandParameters["read.tree"] = addParameters(readtreeArray, sizeof(readtreeArray)/sizeof(string)); - - string clusterArray[] = {"cutoff","precision","method"}; - commandParameters["cluster"] = addParameters(clusterArray, sizeof(clusterArray)/sizeof(string)); - - string deconvoluteArray[] = {"fasta"}; - commandParameters["unique.seqs"] = addParameters(deconvoluteArray, sizeof(deconvoluteArray)/sizeof(string)); - - string collectsingleArray[] = {"freq","line","label","calc","abund","size"}; - commandParameters["collect.single"] = addParameters(collectsingleArray, sizeof(collectsingleArray)/sizeof(string)); - - string collectsharedArray[] = {"freq","line","label","calc","groups"}; - commandParameters["collect.shared"] = addParameters(collectsharedArray, sizeof(collectsharedArray)/sizeof(string)); - - string getgroupArray[] = {}; - commandParameters["get.group"] = addParameters(getgroupArray, sizeof(getgroupArray)/sizeof(string)); - - string getlabelArray[] = {}; - commandParameters["get.label"] = addParameters(getlabelArray, sizeof(getlabelArray)/sizeof(string)); - - string getlineArray[] = {}; - commandParameters["get.line"] = addParameters(getlineArray, sizeof(getlineArray)/sizeof(string)); - - string getsabundArray[] = {"line", "label"}; - commandParameters["get.sabund"] = addParameters(getsabundArray, sizeof(getsabundArray)/sizeof(string)); - - string getrabundArray[] = {"line", "label"}; - commandParameters["get.rabund"] = addParameters(getrabundArray, sizeof(getrabundArray)/sizeof(string)); - - string rarefactionsingleArray[] = {"iters","freq","line","label","calc","abund"}; - commandParameters["rarefaction.single"] = addParameters(rarefactionsingleArray, sizeof(rarefactionsingleArray)/sizeof(string)); - - string rarefactionsharedArray[] = {"iters","jumble","line","label","calc","groups"}; - commandParameters["rarefaction.shared"] = addParameters(rarefactionsharedArray, sizeof(rarefactionsharedArray)/sizeof(string)); - - string libshuffArray[] = {"iters","groups","step","form","cutoff"}; - commandParameters["libshuff"] = addParameters(libshuffArray, sizeof(libshuffArray)/sizeof(string)); - - string summarysingleArray[] = {"line","label","calc","abund","size"}; - commandParameters["summary.single"] = addParameters(summarysingleArray, sizeof(summarysingleArray)/sizeof(string)); - - string summarysharedArray[] = {"line","label","calc","groups"}; - commandParameters["summary.shared"] = addParameters(summarysharedArray, sizeof(summarysharedArray)/sizeof(string)); - - string parsimonyArray[] = {"random","groups","iters"}; - commandParameters["parsimony"] = addParameters(parsimonyArray, sizeof(parsimonyArray)/sizeof(string)); +/******************************************************/ - string unifracWeightedArray[] = {"groups","iters"}; - commandParameters["unifrac.weighted"] = addParameters(unifracWeightedArray, sizeof(unifracWeightedArray)/sizeof(string)); - - string unifracUnweightedArray[] = {"groups","iters"}; - commandParameters["unifrac.unweighted"] = addParameters(unifracUnweightedArray, sizeof(unifracUnweightedArray)/sizeof(string)); - - string heatmapArray[] = {"groups","line","label","sorted","scale"}; - commandParameters["heatmap.bin"] = addParameters(heatmapArray, sizeof(heatmapArray)/sizeof(string)); - - string heatmapSimArray[] = {"groups","line","label"}; - commandParameters["heatmap.sim"] = addParameters(heatmapSimArray, sizeof(heatmapSimArray)/sizeof(string)); - - string filterseqsArray[] = {"fasta", "trump", "soft", "hard", "vertical"}; - commandParameters["filter.seqs"] = addParameters(filterseqsArray, sizeof(filterseqsArray)/sizeof(string)); - - string summaryseqsArray[] = {"fasta"}; - commandParameters["summary.seqs"] = addParameters(summaryseqsArray, sizeof(summaryseqsArray)/sizeof(string)); - - string screenseqsArray[] = {"fasta", "start", "end", "maxambig", "maxhomop", "minlength", "maxlength", "name", "group"}; - commandParameters["screen.seqs"] = addParameters(screenseqsArray, sizeof(screenseqsArray)/sizeof(string)); - - string reverseseqsArray[] = {"fasta"}; - commandParameters["reverse.seqs"] = addParameters(reverseseqsArray, sizeof(reverseseqsArray)/sizeof(string)); - - string trimseqsArray[] = {"fasta", "flip", "oligos", "maxambig", "maxhomop", "minlength", "maxlength"}; - commandParameters["trim.seqs"] = addParameters(trimseqsArray, sizeof(trimseqsArray)/sizeof(string)); - - string vennArray[] = {"groups","line","label","calc"}; - commandParameters["venn"] = addParameters(vennArray, sizeof(vennArray)/sizeof(string)); - - string binseqsArray[] = {"fasta","line","label","name", "group"}; - commandParameters["bin.seqs"] = addParameters(binseqsArray, sizeof(binseqsArray)/sizeof(string)); - - string distsharedArray[] = {"line","label","calc","groups"}; - commandParameters["dist.shared"] = addParameters(distsharedArray, sizeof(distsharedArray)/sizeof(string)); - - string getOTURepArray[] = {"fasta","list","line","label","name", "group"}; - commandParameters["get.oturep"] = addParameters(getOTURepArray, sizeof(getOTURepArray)/sizeof(string)); - - string treeGroupsArray[] = {"line","label","calc","groups", "phylip", "column", "name"}; - commandParameters["tree.shared"] = addParameters(treeGroupsArray, sizeof(treeGroupsArray)/sizeof(string)); - - string bootstrapArray[] = {"line","label","calc","groups","iters"}; - commandParameters["bootstrap.shared"] = addParameters(bootstrapArray, sizeof(bootstrapArray)/sizeof(string)); - - string concensusArray[] = {}; - commandParameters["concensus"] = addParameters(concensusArray, sizeof(concensusArray)/sizeof(string)); - - string distanceArray[] = {"fasta", "phylip", "calc", "countends", "cutoff", "processors"}; - commandParameters["dist.seqs"] = addParameters(distanceArray, sizeof(distanceArray)/sizeof(string)); +string ValidParameters::validFile(map container, string parameter, bool isFile) { + try { + int ableToOpen; + ifstream in; + map::iterator it; - string AlignArray[] = {"fasta", "candidate", "search", "ksize", "align", "match", "mismatch", "gapopen", "gapextend"}; - commandParameters["align.seqs"] = addParameters(AlignArray, sizeof(AlignArray)/sizeof(string)); + it = container.find(parameter); + if(it != container.end()){ //no parameter given + if(isFile == true) { + ableToOpen = openInputFile(it->second, in); + if (ableToOpen == 1) { return "not open"; } + in.close(); + } + }else { return "not found"; } - string quitArray[] = {}; - commandParameters["quit"] = addParameters(quitArray, sizeof(quitArray)/sizeof(string)); - + return it->second; + } catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the ValidParameters class Function isValidParameter. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + cout << "Standard Error: " << e.what() << " has occurred in the ValidParameters class Function validFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; exit(1); } catch(...) { - cout << "An unknown error has occurred in the ValidParameters class function isValidParameter. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + cout << "An unknown error has occurred in the ValidParameters class function validFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; exit(1); } } @@ -356,9 +256,6 @@ void ValidParameters::initParameterRanges() { string itersArray[] = {">=","10", "<","NA", "between"}; parameterRanges["iters"] = addParameters(itersArray, rangeSize); - string jumbleArray[] = {">","0", "<","1", "only"}; - parameterRanges["jumble"] = addParameters(jumbleArray, rangeSize); - string freqArray[] = {">=","1", "<","NA", "between"}; parameterRanges["freq"] = addParameters(freqArray, rangeSize); diff --git a/validparameter.h b/validparameter.h index ff886ef..5007948 100644 --- a/validparameter.h +++ b/validparameter.h @@ -22,15 +22,14 @@ class ValidParameters { public: ValidParameters(); ~ValidParameters(); - bool isValidParameter(string); - bool isValidParameter(string, string, string); + //bool isValidParameter(string, string, string) {return true;} + bool isValidParameter(string, vector, string); vector addParameters(string[], int); - void initCommandParameters(); void initParameterRanges(); + string validFile(map, string, bool); //container, parameter, isFile private: map::iterator it; - map > commandParameters; map > parameterRanges; }; diff --git a/venn.cpp b/venn.cpp index 112a78b..6402adf 100644 --- a/venn.cpp +++ b/venn.cpp @@ -165,18 +165,18 @@ void Venn::getPic(vector lookup, vector vCalcs outsvg << "getGroup().length() / 2)) + "\" y=\"175\">" + lookup[0]->getGroup() + "\n"; outsvg << "getGroup().length() / 2)) + "\" y=\"175\">" + lookup[1]->getGroup() + "\n"; outsvg << "" + toString(shared[0]) + "\n"; - outsvg << "The number of species in group " + globaldata->Groups[0] + " is " + toString(numA[0]); + outsvg << "The number of species in group " + lookup[0]->getGroup() + " is " + toString(numA[0]); if (numA.size() == 3) { outsvg << " the lci is " + toString(numA[1]) + " and the hci is " + toString(numA[2]) + "\n"; }else { outsvg << "\n"; } - outsvg << "The number of species in group " + globaldata->Groups[1] + " is " + toString(numB[0]); + outsvg << "The number of species in group " + lookup[1]->getGroup() + " is " + toString(numB[0]); if (numB.size() == 3) { outsvg << " the lci is " + toString(numB[1]) + " and the hci is " + toString(numB[2]) + "\n"; }else { outsvg << "\n"; } - outsvg << "The number of sepecies shared between groups " + globaldata->Groups[0] + " and " + globaldata->Groups[1] + " is " + toString(shared[0]) + "\n"; - outsvg << "Percentage of species that are shared in groups " + globaldata->Groups[0] + " and " + globaldata->Groups[1] + " is " + toString((shared[0] / (float)(numA[0] + numB[0] - shared[0]))) + "\n"; + outsvg << "The number of sepecies shared between groups " + lookup[0]->getGroup() + " and " + lookup[1]->getGroup() + " is " + toString(shared[0]) + "\n"; + outsvg << "Percentage of species that are shared in groups " + lookup[0]->getGroup() + " and " + lookup[1]->getGroup() + " is " + toString((shared[0] / (float)(numA[0] + numB[0] - shared[0]))) + "\n"; outsvg << "The total richness for all groups is " + toString((float)(numA[0] + numB[0] - shared[0])) + "\n"; //close file @@ -320,23 +320,23 @@ void Venn::getPic(vector lookup, vector vCalcs outsvg << "" + toString(sharedBC[0] - sharedABC) + "\n"; outsvg << "" + toString(sharedABC) + "\n"; - outsvg << "The number of sepecies shared between groups " + globaldata->Groups[0] + " and " + globaldata->Groups[1] + " is " + toString(sharedAB[0]) + "\n"; - outsvg << "The number of sepecies shared between groups " + globaldata->Groups[0] + " and " + globaldata->Groups[2] + " is " + toString(sharedAC[0]) + "\n"; - outsvg << "The number of sepecies shared between groups " + globaldata->Groups[1] + " and " + globaldata->Groups[2] + " is " + toString(sharedBC[0]) + "\n"; - outsvg << "The number of sepecies shared between groups " + globaldata->Groups[0] + " and combined groups " + globaldata->Groups[1] + globaldata->Groups[2] + " is " + toString(sharedAwithBC[0]) + "\n"; - outsvg << "The number of sepecies shared between groups " + globaldata->Groups[1] + " and combined groups " + globaldata->Groups[0] + globaldata->Groups[2] + " is " + toString(sharedBwithAC[0]) + "\n"; - outsvg << "The number of sepecies shared between groups " + globaldata->Groups[2] + " and combined groups " + globaldata->Groups[0] + globaldata->Groups[1] + " is " + toString(sharedCwithAB[0]) + "\n"; - outsvg << "The number of species in group " + globaldata->Groups[0] + " is " + toString(numA[0]); + outsvg << "The number of sepecies shared between groups " + lookup[0]->getGroup() + " and " + lookup[1]->getGroup() + " is " + toString(sharedAB[0]) + "\n"; + outsvg << "The number of sepecies shared between groups " + lookup[0]->getGroup() + " and " + lookup[2]->getGroup() + " is " + toString(sharedAC[0]) + "\n"; + outsvg << "The number of sepecies shared between groups " + lookup[1]->getGroup() + " and " + lookup[2]->getGroup() + " is " + toString(sharedBC[0]) + "\n"; + outsvg << "The number of sepecies shared between groups " + lookup[0]->getGroup() + " and combined groups " + lookup[1]->getGroup() + lookup[2]->getGroup() + " is " + toString(sharedAwithBC[0]) + "\n"; + outsvg << "The number of sepecies shared between groups " + lookup[1]->getGroup() + " and combined groups " + lookup[0]->getGroup() + lookup[2]->getGroup() + " is " + toString(sharedBwithAC[0]) + "\n"; + outsvg << "The number of sepecies shared between groups " + lookup[2]->getGroup() + " and combined groups " + lookup[0]->getGroup() + lookup[1]->getGroup() + " is " + toString(sharedCwithAB[0]) + "\n"; + outsvg << "The number of species in group " + lookup[0]->getGroup() + " is " + toString(numA[0]); if (numA.size() == 3) { outsvg << " the lci is " + toString(numA[1]) + " and the hci is " + toString(numA[2]) + "\n"; }else { outsvg << "\n"; } - outsvg << "The number of species in group " + globaldata->Groups[1] + " is " + toString(numB[0]); + outsvg << "The number of species in group " + lookup[1]->getGroup() + " is " + toString(numB[0]); if (numB.size() == 3) { outsvg << " the lci is " + toString(numB[1]) + " and the hci is " + toString(numB[2]) + "\n"; }else { outsvg << "\n"; } - outsvg << "The number of species in group " + globaldata->Groups[2] + " is " + toString(numC[0]); + outsvg << "The number of species in group " + lookup[2]->getGroup() + " is " + toString(numC[0]); if (numC.size() == 3) { outsvg << " the lci is " + toString(numC[1]) + " and the hci is " + toString(numC[2]) + "\n"; }else { outsvg << "\n"; } @@ -498,17 +498,6 @@ void Venn::getPic(vector lookup, vector vCalcs outsvg << "" + toString(sharedABCD) + "\n"; outsvg << "The total richness of all the groups is " + toString((float)(numA + numB + numC + numD + sharedAB + sharedAC + sharedAD + sharedBC + sharedBD + sharedCD + sharedABC + sharedABD + sharedACD + sharedBCD + sharedABCD)) + "\n"; - //outsvg << "Percentage of species that are shared in groups " + globaldata->Groups[0] + " and " + globaldata->Groups[2] + " is " + toString(((sharedAC + sharedACD + sharedABC + sharedABCD) / (float)(numA + numC + sharedAB + sharedAC + sharedAD + sharedBC + sharedCD + sharedABC + sharedABD + sharedACD + sharedBCD + sharedABCD))) + "\n"; - //outsvg << "Percentage of species that are shared in groups " + globaldata->Groups[0] + " and " + globaldata->Groups[3] + " is " + toString(((sharedAD + sharedACD + sharedABD + sharedABCD) / (float)(numA + numD + sharedAB + sharedAC + sharedAD + sharedBD + sharedCD + sharedABC + sharedABD + sharedACD + sharedBCD + sharedABCD))) + "\n"; - //outsvg << "Percentage of species that are shared in groups " + globaldata->Groups[1] + " and " + globaldata->Groups[2] + " is " + toString(((sharedBC + sharedABC + sharedBCD + sharedABCD) / (float)(numB + numC + sharedAB + sharedAC + sharedCD + sharedBD + sharedBC + sharedABC + sharedABD + sharedACD + sharedBCD + sharedABCD))) + "\n"; - //outsvg << "Percentage of species that are shared in groups " + globaldata->Groups[1] + " and " + globaldata->Groups[3] + " is " + toString(((sharedBD + sharedABD + sharedBCD + sharedABCD) / (float)(numB + numD + sharedAB + sharedAD + sharedCD + sharedBD + sharedBC + sharedABC + sharedABD + sharedACD + sharedBCD + sharedABCD))) + "\n"; - //outsvg << "Percentage of species that are shared in groups " + globaldata->Groups[2] + " and " + globaldata->Groups[3] + " is " + toString(((sharedCD + sharedBCD + sharedACD + sharedABCD) / (float)(numC + numD + sharedAC + sharedAD + sharedCD + sharedBD + sharedBC + sharedABC + sharedABD + sharedACD + sharedBCD + sharedABCD))) + "\n"; - //outsvg << "Percentage of species that are shared in groups " + globaldata->Groups[0] + ", " + globaldata->Groups[1] + " and " + globaldata->Groups[2] + " is " + toString(((sharedABC + sharedABCD) / (float)(numA + numB + numC + sharedAB + sharedAC + sharedAD + sharedBC + sharedBD + sharedCD + sharedABC + sharedABD + sharedACD + sharedBCD + sharedABCD))) + "\n"; - //outsvg << "Percentage of species that are shared in groups " + globaldata->Groups[0] + ", " + globaldata->Groups[1] + " and " + globaldata->Groups[3] + " is " + toString(((sharedABD + sharedABCD) / (float)(numA + numB + numD + sharedAB + sharedAC + sharedAD + sharedBC + sharedBD + sharedCD + sharedABC + sharedABD + sharedACD + sharedBCD + sharedABCD))) + "\n"; - //outsvg << "Percentage of species that are shared in groups " + globaldata->Groups[0] + ", " + globaldata->Groups[2] + " and " + globaldata->Groups[3] + " is " + toString(((sharedACD + sharedABCD) / (float)(numA + numC + numD + sharedAB + sharedAC + sharedAD + sharedBC + sharedBD + sharedCD + sharedABC + sharedABD + sharedACD + sharedBCD + sharedABCD))) + "\n"; - //outsvg << "Percentage of species that are shared in groups " + globaldata->Groups[1] + ", " + globaldata->Groups[2] + " and " + globaldata->Groups[3] + " is " + toString(((sharedBCD + sharedABCD) / (float)(numB + numC + numD + sharedAB + sharedAC + sharedAD + sharedBC + sharedBD + sharedCD + sharedABC + sharedABD + sharedACD + sharedBCD + sharedABCD))) + "\n"; - //outsvg << "Percentage of species that are shared in groups " + globaldata->Groups[0] + ", " + globaldata->Groups[1] + ", " + globaldata->Groups[2] + " and " + globaldata->Groups[3] + " is " + toString((sharedABCD / (float)(numA + numB + numC + numD + sharedAB + sharedAC + sharedAD + sharedBC + sharedBD + sharedCD + sharedABC + sharedABD + sharedACD + sharedBCD + sharedABCD))) + "\n"; - outsvg << "\n\n"; outsvg.close(); delete singleCalc; diff --git a/venncommand.cpp b/venncommand.cpp index bb3762c..0bed752 100644 --- a/venncommand.cpp +++ b/venncommand.cpp @@ -19,51 +19,128 @@ //********************************************************************************************************************** -VennCommand::VennCommand(){ +VennCommand::VennCommand(string option){ try { globaldata = GlobalData::getInstance(); - format = globaldata->getFormat(); - validCalculator = new ValidCalculators(); - - int i; - - if (format == "list") { - for (i=0; iEstimators.size(); i++) { - if (validCalculator->isValidCalculator("vennsingle", globaldata->Estimators[i]) == true) { - if (globaldata->Estimators[i] == "sobs") { - vennCalculators.push_back(new Sobs()); - }else if (globaldata->Estimators[i] == "chao") { - vennCalculators.push_back(new Chao1()); - }else if (globaldata->Estimators[i] == "ace") { - convert(globaldata->getAbund(), abund); - if(abund < 5) - abund = 10; - vennCalculators.push_back(new Ace(abund)); - //}else if (globaldata->Estimators[i] == "jack") { - //vennCalculators.push_back(new Jackknife()); - } + abort = false; + allLines = 1; + lines.clear(); + labels.clear(); + + //allow user to run help + if(option == "help") { help(); abort = true; } + + else { + //valid paramters for this command + string AlignArray[] = {"groups","line","label","calc", "abund"}; + vector myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string))); + + parser = new OptionParser(); + parser->parse(option, parameters); delete parser; + + ValidParameters* validParameter = new ValidParameters(); + + //check to make sure all parameters are valid for command + for (it = parameters.begin(); it != parameters.end(); it++) { + if (validParameter->isValidParameter(it->first, myArray, it->second) != true) { abort = true; } + } + + //make sure the user has already run the read.otu command + if ((globaldata->getListFile() == "") && (globaldata->getSharedFile() == "")) { + cout << "You must read a list, or a list and a group, or a shared before you can use the venn command." << endl; abort = true; + } + + //check for optional parameter and set defaults + // ...at some point should added some additional type checking... + line = validParameter->validFile(parameters, "line", false); + if (line == "not found") { line = ""; } + else { + if(line != "all") { splitAtDash(line, lines); allLines = 0; } + else { allLines = 1; } + } + + label = validParameter->validFile(parameters, "label", false); + if (label == "not found") { label = ""; } + else { + if(label != "all") { splitAtDash(label, labels); allLines = 0; } + else { allLines = 1; } + } + + //make sure user did not use both the line and label parameters + if ((line != "") && (label != "")) { cout << "You cannot use both the line and label parameters at the same time. " << endl; abort = true; } + //if the user has not specified any line or labels use the ones from read.otu + else if ((line == "") && (label == "")) { + allLines = globaldata->allLines; + labels = globaldata->labels; + lines = globaldata->lines; + } + + groups = validParameter->validFile(parameters, "groups", false); + if (groups == "not found") { groups = ""; } + else { + splitAtDash(groups, Groups); + globaldata->Groups = Groups; + } + + format = globaldata->getFormat(); + calc = validParameter->validFile(parameters, "calc", false); + if (calc == "not found") { + if(format == "list") { calc = "sobs"; } + else { calc = "sharedsobs"; } + } + else { + if (calc == "default") { + if(format == "list") { calc = "sobs"; } + else { calc = "sharedsobs"; } } } - }else { - for (i=0; iEstimators.size(); i++) { - if (validCalculator->isValidCalculator("vennshared", globaldata->Estimators[i]) == true) { - if (globaldata->Estimators[i] == "sharedsobs") { - vennCalculators.push_back(new SharedSobsCS()); - }else if (globaldata->Estimators[i] == "sharedchao") { - vennCalculators.push_back(new SharedChao1()); - }else if (globaldata->Estimators[i] == "sharedace") { - vennCalculators.push_back(new SharedAce()); + splitAtDash(calc, Estimators); + + string temp; + temp = validParameter->validFile(parameters, "abund", false); if (temp == "not found") { temp = "10"; } + convert(temp, abund); + + delete validParameter; + + if (abort == false) { + validCalculator = new ValidCalculators(); + + int i; + + if (format == "list") { + for (i=0; iisValidCalculator("vennsingle", Estimators[i]) == true) { + if (Estimators[i] == "sobs") { + vennCalculators.push_back(new Sobs()); + }else if (Estimators[i] == "chao") { + vennCalculators.push_back(new Chao1()); + }else if (Estimators[i] == "ace") { + if(abund < 5) + abund = 10; + vennCalculators.push_back(new Ace(abund)); + } + } + } + }else { + for (i=0; iisValidCalculator("vennshared", Estimators[i]) == true) { + if (Estimators[i] == "sharedsobs") { + vennCalculators.push_back(new SharedSobsCS()); + }else if (Estimators[i] == "sharedchao") { + vennCalculators.push_back(new SharedChao1()); + }else if (Estimators[i] == "sharedace") { + vennCalculators.push_back(new SharedAce()); + } + } } } + + venn = new Venn(); } } - - venn = new Venn(); - - //reset calc for next command - globaldata->setCalc(""); + } catch(exception& e) { cout << "Standard Error: " << e.what() << " has occurred in the VennCommand class Function VennCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; @@ -74,6 +151,35 @@ VennCommand::VennCommand(){ exit(1); } } + +//********************************************************************************************************************** + +void VennCommand::help(){ + try { + cout << "The venn command can only be executed after a successful read.otu command." << "\n"; + cout << "The venn command parameters are groups, calc, abund, line and label. No parameters are required, but you may not use line and label at the same time." << "\n"; + cout << "The groups parameter allows you to specify which of the groups in your groupfile you would like included in your venn diagram, you may only use a maximum of 4 groups." << "\n"; + cout << "The group names are separated by dashes. The line and label allow you to select what distance levels you would like a venn diagram created for, and are also separated by dashes." << "\n"; + cout << "The venn command should be in the following format: venn(groups=yourGroups, calc=yourCalcs, line=yourLines, label=yourLabels, abund=yourAbund)." << "\n"; + cout << "Example venn(groups=A-B-C, line=1-3-5, calc=sharedsobs-sharedchao, abund=20)." << "\n"; + cout << "The default value for groups is all the groups in your groupfile up to 4, and all lines in your inputfile will be used." << "\n"; + cout << "The default value for calc is sobs if you have only read a list file or if you have selected only one group, and sharedsobs if you have multiple groups." << "\n"; + cout << "The default available estimators for calc are sobs, chao and ace if you have only read a list file, and sharedsobs, sharedchao and sharedace if you have read a list and group file or a shared file." << "\n"; + cout << "The only estmiator available four 4 groups is sharedsobs." << "\n"; + cout << "The venn command outputs a .svg file for each calculator you specify at each distance you choose." << "\n"; + cout << "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups)." << "\n" << "\n"; + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the VennCommand class Function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the VennCommand class function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} + + //********************************************************************************************************************** VennCommand::~VennCommand(){ @@ -87,10 +193,13 @@ VennCommand::~VennCommand(){ int VennCommand::execute(){ try { + + if (abort == true) { return 0; } + int count = 1; SAbundVector* lastSAbund; vector lastLookup; - + //if the users entered no valid calculators don't execute command if (vennCalculators.size() == 0) { return 0; } @@ -114,15 +223,15 @@ int VennCommand::execute(){ //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. set processedLabels; - set userLabels = globaldata->labels; - set userLines = globaldata->lines; + set userLabels = labels; + set userLines = lines; if (format != "list") { //as long as you are not at the end of the file or done wih the lines you want - while((lookup[0] != NULL) && ((globaldata->allLines == 1) || (userLabels.size() != 0) || (userLines.size() != 0))) { - - if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(lookup[0]->getLabel()) == 1){ + while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0) || (userLines.size() != 0))) { + + if(allLines == 1 || lines.count(count) == 1 || labels.count(lookup[0]->getLabel()) == 1){ cout << lookup[0]->getLabel() << '\t' << count << endl; processedLabels.insert(lookup[0]->getLabel()); userLabels.erase(lookup[0]->getLabel()); @@ -186,9 +295,9 @@ int VennCommand::execute(){ }else{ - while((sabund != NULL) && ((globaldata->allLines == 1) || (userLabels.size() != 0) || (userLines.size() != 0))) { + while((sabund != NULL) && ((allLines == 1) || (userLabels.size() != 0) || (userLines.size() != 0))) { - if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(sabund->getLabel()) == 1){ + if(allLines == 1 || lines.count(count) == 1 || labels.count(sabund->getLabel()) == 1){ cout << sabund->getLabel() << '\t' << count << endl; venn->getPic(sabund, vennCalculators); @@ -235,8 +344,6 @@ int VennCommand::execute(){ delete lastSAbund; } - - globaldata->setGroups(""); return 0; } catch(exception& e) { diff --git a/venncommand.h b/venncommand.h index 663839c..3671350 100644 --- a/venncommand.h +++ b/venncommand.h @@ -23,9 +23,10 @@ class GlobalData; class VennCommand : public Command { public: - VennCommand(); + VennCommand(string); ~VennCommand(); int execute(); + void help(); private: GlobalData* globaldata; @@ -33,12 +34,21 @@ private: InputData* input; SharedListVector* SharedList; Venn* venn; - string format; vector vennCalculators; ValidCalculators* validCalculator; vector lookup; SAbundVector* sabund; int abund; + + OptionParser* parser; + map parameters; + map::iterator it; + bool abort, allLines; + set lines; //hold lines to be used + set labels; //holds labels to be used + string format, groups, calc, line, label; + vector Estimators, Groups; + }; -- 2.39.2