From b22853b5cfbf5c47949ad2a084f2fad88b2e4be4 Mon Sep 17 00:00:00 2001 From: westcott Date: Wed, 20 May 2009 16:34:26 +0000 Subject: [PATCH] added get.repseqs command, started matrix output command --- Mothur.xcodeproj/project.pbxproj | 22 ++- aligncommand.cpp | 3 - binsequencecommand.cpp | 11 +- boneh.cpp | 2 +- boneh.h | 2 +- commandfactory.cpp | 2 + efron.cpp | 2 +- efron.h | 2 +- errorchecking.cpp | 20 ++- getrepseqscommand.cpp | 263 +++++++++++++++++++++++++++++++ getrepseqscommand.h | 50 ++++++ globaldata.cpp | 17 +- globaldata.hpp | 2 - helpcommand.cpp | 13 +- matrixoutputcommand.cpp | 194 +++++++++++++++++++++++ matrixoutputcommand.h | 49 ++++++ noalign.cpp | 2 +- noalign.hpp | 2 +- rarefact.cpp | 1 + readseqs.cpp | 2 +- readseqs.h | 2 +- shen.cpp | 2 +- shen.h | 2 +- solow.cpp | 2 +- solow.h | 2 +- treegroupscommand.cpp | 29 ++-- treegroupscommand.h | 9 +- validcalculator.cpp | 36 +++++ validcalculator.h | 2 + validcommands.cpp | 1 + validparameter.cpp | 3 + 31 files changed, 685 insertions(+), 66 deletions(-) create mode 100644 getrepseqscommand.cpp create mode 100644 getrepseqscommand.h create mode 100644 matrixoutputcommand.cpp create mode 100644 matrixoutputcommand.h diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj index bb172b8..8136cc2 100644 --- a/Mothur.xcodeproj/project.pbxproj +++ b/Mothur.xcodeproj/project.pbxproj @@ -7,9 +7,11 @@ objects = { /* Begin PBXBuildFile section */ + 211A24EB0FC306BC00769A33 /* getrepseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 211A24EA0FC306BC00769A33 /* getrepseqscommand.cpp */; }; 211C38320F961DD400FEE541 /* sharedutilities.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 211C38300F961DD400FEE541 /* sharedutilities.cpp */; }; 211C38380F961E1F00FEE541 /* treegroupscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 211C38360F961E1F00FEE541 /* treegroupscommand.cpp */; }; 21DDC01B0F97A8FE0060691C /* bootstrapsharedcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 21DDC01A0F97A8FE0060691C /* bootstrapsharedcommand.cpp */; }; + 21E859D80FC4632E005E1A48 /* matrixoutputcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 21E859D70FC4632E005E1A48 /* matrixoutputcommand.cpp */; }; 370B88070F8A4EE4005AB382 /* getoturepcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 370B88060F8A4EE4005AB382 /* getoturepcommand.cpp */; }; 372E12700F26365B0095CF7E /* readotucommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 372E126F0F26365B0095CF7E /* readotucommand.cpp */; }; 372E12960F263D5A0095CF7E /* readdistcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 372E12950F263D5A0095CF7E /* readdistcommand.cpp */; }; @@ -27,7 +29,6 @@ 373C69180FC1C8AF00137ACD /* blastdb.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 373C69160FC1C8AF00137ACD /* blastdb.cpp */; }; 373C691F0FC1C98600137ACD /* nast.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 373C691D0FC1C98600137ACD /* nast.cpp */; }; 373C692B0FC1C9EB00137ACD /* nastreport.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 373C69290FC1C9EB00137ACD /* nastreport.cpp */; }; - 373C69340FC1CA9E00137ACD /* distancedb.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 373C69320FC1CA9E00137ACD /* distancedb.cpp */; }; 373C699A0FC1E63600137ACD /* boneh.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 373C69940FC1E63600137ACD /* boneh.cpp */; }; 373C699B0FC1E63600137ACD /* efron.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 373C69960FC1E63600137ACD /* efron.cpp */; }; 373C699C0FC1E63600137ACD /* solow.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 373C69980FC1E63600137ACD /* solow.cpp */; }; @@ -174,12 +175,16 @@ /* End PBXCopyFilesBuildPhase section */ /* Begin PBXFileReference section */ + 211A24E90FC306BC00769A33 /* getrepseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = getrepseqscommand.h; sourceTree = ""; }; + 211A24EA0FC306BC00769A33 /* getrepseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = getrepseqscommand.cpp; sourceTree = ""; }; 211C38300F961DD400FEE541 /* sharedutilities.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sharedutilities.cpp; sourceTree = ""; }; 211C38310F961DD400FEE541 /* sharedutilities.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sharedutilities.h; sourceTree = ""; }; 211C38360F961E1F00FEE541 /* treegroupscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = treegroupscommand.cpp; sourceTree = ""; }; 211C38370F961E1F00FEE541 /* treegroupscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = treegroupscommand.h; sourceTree = ""; }; 21DDC0190F97A8FE0060691C /* bootstrapsharedcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = bootstrapsharedcommand.h; sourceTree = ""; }; 21DDC01A0F97A8FE0060691C /* bootstrapsharedcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = bootstrapsharedcommand.cpp; sourceTree = ""; }; + 21E859D60FC4632E005E1A48 /* matrixoutputcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = matrixoutputcommand.h; sourceTree = ""; }; + 21E859D70FC4632E005E1A48 /* matrixoutputcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = matrixoutputcommand.cpp; sourceTree = ""; }; 370B88050F8A4EE4005AB382 /* getoturepcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = getoturepcommand.h; sourceTree = ""; }; 370B88060F8A4EE4005AB382 /* getoturepcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = getoturepcommand.cpp; sourceTree = ""; }; 372E126E0F26365B0095CF7E /* readotucommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = readotucommand.h; sourceTree = ""; }; @@ -213,8 +218,6 @@ 373C691E0FC1C98600137ACD /* nast.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = nast.hpp; sourceTree = ""; }; 373C69290FC1C9EB00137ACD /* nastreport.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = nastreport.cpp; sourceTree = ""; }; 373C692A0FC1C9EB00137ACD /* nastreport.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = nastreport.hpp; sourceTree = ""; }; - 373C69320FC1CA9E00137ACD /* distancedb.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = distancedb.cpp; sourceTree = ""; }; - 373C69330FC1CA9E00137ACD /* distancedb.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = distancedb.hpp; sourceTree = ""; }; 373C69940FC1E63600137ACD /* boneh.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = boneh.cpp; sourceTree = ""; }; 373C69950FC1E63600137ACD /* boneh.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = boneh.h; sourceTree = ""; }; 373C69960FC1E63600137ACD /* efron.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = efron.cpp; sourceTree = ""; }; @@ -511,8 +514,6 @@ 08FB7794FE84155DC02AAC07 /* Mothur */ = { isa = PBXGroup; children = ( - EB72FE240FC1F5CA0051AC11 /* shen.cpp */, - EB72FE250FC1F5CA0051AC11 /* shen.h */, 08FB7795FE84155DC02AAC07 /* Source */, C6859E8C029090F304C91782 /* Documentation */, 1AB674ADFE9D54B511CA2CBB /* Products */, @@ -724,6 +725,8 @@ 37D9283C0F21331F001D4494 /* sharedthetan.cpp */, 37D9283F0F21331F001D4494 /* sharedthetayc.h */, 37D9283E0F21331F001D4494 /* sharedthetayc.cpp */, + EB72FE250FC1F5CA0051AC11 /* shen.h */, + EB72FE240FC1F5CA0051AC11 /* shen.cpp */, 37D928410F21331F001D4494 /* simpson.h */, 37D928400F21331F001D4494 /* simpson.cpp */, 37D928430F21331F001D4494 /* sobs.h */, @@ -773,12 +776,16 @@ A70B53A80F4CD7AD0064797E /* getlinecommand.cpp */, 370B88050F8A4EE4005AB382 /* getoturepcommand.h */, 370B88060F8A4EE4005AB382 /* getoturepcommand.cpp */, + 211A24E90FC306BC00769A33 /* getrepseqscommand.h */, + 211A24EA0FC306BC00769A33 /* getrepseqscommand.cpp */, 375873F10F7D64800040F377 /* heatmapcommand.h */, 375873F00F7D64800040F377 /* heatmapcommand.cpp */, 37D927E40F21331F001D4494 /* helpcommand.h */, 37D927E30F21331F001D4494 /* helpcommand.cpp */, 375873F40F7D648F0040F377 /* libshuffcommand.h */, 375873F30F7D648F0040F377 /* libshuffcommand.cpp */, + 21E859D60FC4632E005E1A48 /* matrixoutputcommand.h */, + 21E859D70FC4632E005E1A48 /* matrixoutputcommand.cpp */, 375873F60F7D649C0040F377 /* nocommands.cpp */, 375873F70F7D649C0040F377 /* nocommands.h */, 37D927FA0F21331F001D4494 /* parselistcommand.h */, @@ -827,8 +834,6 @@ 37D927D40F21331F001D4494 /* database.hpp */, 37D927D30F21331F001D4494 /* database.cpp */, 37D927D50F21331F001D4494 /* datavector.hpp */, - 373C69330FC1CA9E00137ACD /* distancedb.hpp */, - 373C69320FC1CA9E00137ACD /* distancedb.cpp */, 37D927DC0F21331F001D4494 /* fastamap.h */, 37D927DB0F21331F001D4494 /* fastamap.cpp */, 375873EA0F7D64520040F377 /* fullmatrix.h */, @@ -1088,11 +1093,12 @@ 373C69180FC1C8AF00137ACD /* blastdb.cpp in Sources */, 373C691F0FC1C98600137ACD /* nast.cpp in Sources */, 373C692B0FC1C9EB00137ACD /* nastreport.cpp in Sources */, - 373C69340FC1CA9E00137ACD /* distancedb.cpp in Sources */, 373C699A0FC1E63600137ACD /* boneh.cpp in Sources */, 373C699B0FC1E63600137ACD /* efron.cpp in Sources */, 373C699C0FC1E63600137ACD /* solow.cpp in Sources */, EB72FE260FC1F5CA0051AC11 /* shen.cpp in Sources */, + 211A24EB0FC306BC00769A33 /* getrepseqscommand.cpp in Sources */, + 21E859D80FC4632E005E1A48 /* matrixoutputcommand.cpp in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; diff --git a/aligncommand.cpp b/aligncommand.cpp index 81f8e6a..f5eb48c 100644 --- a/aligncommand.cpp +++ b/aligncommand.cpp @@ -32,7 +32,6 @@ #include "kmerdb.hpp" #include "suffixdb.hpp" #include "blastdb.hpp" -#include "distancedb.hpp" #include "nast.hpp" #include "nastreport.hpp" @@ -50,7 +49,6 @@ AlignCommand::AlignCommand(){ convert(globaldata->getMismatch(), misMatch); convert(globaldata->getGapopen(), gapOpen); convert(globaldata->getGapextend(), gapExtend); - distanceFileName = "????"; } catch(exception& e) { cout << "Standard Error: " << e.what() << " has occurred in the AlignCommand class Function AlignCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; @@ -78,7 +76,6 @@ int AlignCommand::execute(){ if(globaldata->getSearch() == "kmer") { templateDB = new KmerDB(templateFileName, kmerSize); } else if(globaldata->getSearch() == "suffix") { templateDB = new SuffixDB(templateFileName); } else if(globaldata->getSearch() == "blast") { templateDB = new BlastDB(templateFileName, gapOpen, gapExtend, match, misMatch); } - else if(globaldata->getSearch() == "distance") { templateDB = new DistanceDB(templateFileName, distanceFileName); } else { cout << globaldata->getSearch() << " is not a valid search option. I will run the command using suffix." << endl; templateDB = new SuffixDB(templateFileName); } diff --git a/binsequencecommand.cpp b/binsequencecommand.cpp index 375eec2..7810ea9 100644 --- a/binsequencecommand.cpp +++ b/binsequencecommand.cpp @@ -67,10 +67,6 @@ int BinSeqCommand::execute(){ if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(list->getLabel()) == 1){ - //make new folder for bin info - //string foldername = "/" + getRootName(globaldata->getListFile()) + list->getLabel() + ".bins/"; - // mkdir(foldername.c_str()); - string outputFileName = getRootName(globaldata->getListFile()) + list->getLabel() + ".fasta"; openOutputFile(outputFileName, out); @@ -78,10 +74,6 @@ int BinSeqCommand::execute(){ //for each bin in the list vector for (int i = 0; i < list->size(); i++) { - - //create output file - //string outputFileName = foldername + getRootName(globaldata->getListFile()) + "bin" + toString(i+1) + ".fasta"; - //openOutputFile(outputFileName, out); binnames = list->get(i); while (binnames.find_first_of(',') != -1) { @@ -113,11 +105,12 @@ int BinSeqCommand::execute(){ remove(outputFileName.c_str()); return 0; } - //out.close(); + } out.close(); } + delete list; list = input->getListVector(); count++; } diff --git a/boneh.cpp b/boneh.cpp index 5174f75..7117e35 100644 --- a/boneh.cpp +++ b/boneh.cpp @@ -3,7 +3,7 @@ * Mothur * * Created by Thomas Ryabin on 5/13/09. - * Copyright 2009 __MyCompanyName__. All rights reserved. + * Copyright 2009Schloss Lab UMASS Amherst. All rights reserved. * */ diff --git a/boneh.h b/boneh.h index 919200c..7e328b1 100644 --- a/boneh.h +++ b/boneh.h @@ -6,7 +6,7 @@ * Mothur * * Created by Thomas Ryabin on 5/13/09. - * Copyright 2009 __MyCompanyName__. All rights reserved. + * Copyright 2009Schloss Lab UMASS Amherst. All rights reserved. * */ diff --git a/commandfactory.cpp b/commandfactory.cpp index d41e4a8..e48bc6a 100644 --- a/commandfactory.cpp +++ b/commandfactory.cpp @@ -42,6 +42,7 @@ #include "concensuscommand.h" #include "distancecommand.h" #include "aligncommand.h" +#include "getrepseqscommand.h" /***********************************************************/ @@ -89,6 +90,7 @@ Command* CommandFactory::getCommand(string commandName){ else if(commandName == "filter.seqs") { command = new FilterSeqsCommand(); } else if(commandName == "venn") { command = new VennCommand(); } else if(commandName == "bin.seqs") { command = new BinSeqCommand(); } + else if(commandName == "get.repseqs") { command = new GetRepSeqsCommand(); } else if(commandName == "get.oturep") { command = new GetOTURepCommand(); } else if(commandName == "tree.shared") { command = new TreeGroupCommand(); } else if(commandName == "bootstrap.shared") { command = new BootSharedCommand(); } diff --git a/efron.cpp b/efron.cpp index 0441f26..aeeb4c2 100644 --- a/efron.cpp +++ b/efron.cpp @@ -3,7 +3,7 @@ * Mothur * * Created by Thomas Ryabin on 5/13/09. - * Copyright 2009 __MyCompanyName__. All rights reserved. + * Copyright 2009Schloss Lab UMASS Amherst. All rights reserved. * */ diff --git a/efron.h b/efron.h index 652159d..782e72e 100644 --- a/efron.h +++ b/efron.h @@ -6,7 +6,7 @@ * Mothur * * Created by Thomas Ryabin on 5/13/09. - * Copyright 2009 __MyCompanyName__. All rights reserved. + * Copyright 2009Schloss Lab UMASS Amherst. All rights reserved. * */ diff --git a/errorchecking.cpp b/errorchecking.cpp index b460987..19d1f8a 100644 --- a/errorchecking.cpp +++ b/errorchecking.cpp @@ -119,7 +119,6 @@ bool ErrorCheck::checkInput(string input) { if (parameter == "ends" ) { ends = value; } if (parameter == "processors" ) { processors = value; } if (parameter == "size" ) { size = value; } - if (parameter == "template") { templatefile = value; } if (parameter == "search") { search = value; } if (parameter == "ksize") { ksize = value; } @@ -178,7 +177,7 @@ bool ErrorCheck::checkInput(string input) { if (parameter == "mismatch") { mismatch = value; } if (parameter == "gapopen") { gapopen = value; } if (parameter == "gapextend" ) { gapextend = value; } - + } } @@ -278,6 +277,13 @@ bool ErrorCheck::checkInput(string input) { validateBinFiles(); } + if ((commandName == "get.repseqs")) { + if ((globaldata->getListFile() == "")) { cout << "You must read a list file before you can use the get.repseqs command." << endl; return false; } + else if (groupfile == "") { cout << "You must provide a groupfile before you can use the get.repseqs command." << endl; return false; } + validateBinFiles(); + } + + if ((commandName == "get.oturep")) { if ((globaldata->gSparseMatrix == NULL) || (globaldata->gListVector == NULL)) { cout << "Before you use the get.oturep command, you first need to read in a distance matrix." << endl; @@ -609,7 +615,7 @@ void ErrorCheck::validateBinFiles() { int ableToOpen; if (fastafile == "") { - cout << "fasta is a required parameter for bin.seqs and get.oturep commands." << endl; errorFree = false; + cout << "fasta is a required parameter for bin.seqs, get.oturep and get.repseqs commands." << endl; errorFree = false; }else if (fastafile != "") { //is it a valid filename' ableToOpen = openInputFile(fastafile, filehandle); @@ -636,9 +642,17 @@ void ErrorCheck::validateBinFiles() { filehandle.close(); //unable to open if (ableToOpen == 1) { errorFree = false; } + }else if (groupfile != "") { + //is it a valid filename' + ifstream filehandle; + int ableToOpen = openInputFile(groupfile, filehandle); + filehandle.close(); + //unable to open + if (ableToOpen == 1) { errorFree = false; } } + } catch(exception& e) { cout << "Standard Error: " << e.what() << " has occurred in the ErrorCheck class Function validateBinFiles. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; diff --git a/getrepseqscommand.cpp b/getrepseqscommand.cpp new file mode 100644 index 0000000..ee9bf5a --- /dev/null +++ b/getrepseqscommand.cpp @@ -0,0 +1,263 @@ +/* + * getrepseqscommand.cpp + * Mothur + * + * Created by Sarah Westcott on 5/19/09. + * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. + * + */ + +#include "getrepseqscommand.h" + +//********************************************************************************************************************** +GetRepSeqsCommand::GetRepSeqsCommand(){ + try { + globaldata = GlobalData::getInstance(); + fastafile = globaldata->getFastaFile(); + namesfile = globaldata->getNameFile(); + openInputFile(fastafile, in); + + fasta = new FastaMap(); + + //read in group map info. + groupMap = new GroupMap(globaldata->getGroupFile()); + groupMap->readMap(); + + //fill filehandles with neccessary ofstreams + int i; + ofstream* temp; + //one for each group + for (i=0; igetNumGroups(); i++) { + temp = new ofstream; + filehandles[groupMap->namesOfGroups[i]] = temp; + } + + //one for shared + temp = new ofstream; + string s = "shared"; + filehandles[s] = temp; + + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the GetRepSeqsCommand class Function GetRepSeqsCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the GetRepSeqsCommand class function GetRepSeqsCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} + +//********************************************************************************************************************** + +GetRepSeqsCommand::~GetRepSeqsCommand(){ + delete input; + delete read; + delete fasta; + delete list; +} + +//********************************************************************************************************************** + +int GetRepSeqsCommand::execute(){ + try { + int count = 1; + string binnames, name, sequence; + + //read fastafile + fasta->readFastaFile(in); + + //set format to list so input can get listvector + globaldata->setFormat("list"); + + //if user gave a namesfile then use it + if (namesfile != "") { + readNamesFile(); + } + + //read list file + read = new ReadOTUFile(globaldata->getListFile()); + read->read(&*globaldata); + + input = globaldata->ginput; + list = globaldata->gListVector; + + while(list != NULL){ + + if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(list->getLabel()) == 1){ + + cout << list->getLabel() << '\t' << count << endl; + + //open output list files + for (int i=0; igetNumGroups(); i++) {//opens an output file for each group + openOutputFile(fastafile + groupMap->namesOfGroups[i] + list->getLabel() + ".fasta", *(filehandles[groupMap->namesOfGroups[i]])); + used[groupMap->namesOfGroups[i]] = false; + } + string s = "shared"; + openOutputFile(fastafile + s + list->getLabel() + ".fasta", *(filehandles[s])); + used[s] = false; + + + //for each bin in the list vector + for (int i = 0; i < list->size(); i++) { + seq.clear(); + //uses this to determine if the bin is unique to one group or if it is shared + map groups; + + //determine if this otu is unique to one group or not + binnames = list->get(i); + while (binnames.find_first_of(',') != -1) { + //parse out each name in bin + name = binnames.substr(0,binnames.find_first_of(',')); + binnames = binnames.substr(binnames.find_first_of(',')+1, binnames.length()); + + //do work for that name + sequence = fasta->getSequence(name); + if (sequence != "not found") { + string group = groupMap->getGroup(name); + if (group != "not found") { groups[group] = group; } //add group to list of groups in this bin + else { + cout << "error sequence " << name << " is not assigned a group in your groupfile. Please correct." << endl; + removeFiles(list->getLabel()); + return 0; + } + name = ">" + name + "|" + toString(i+1); + seq[name] = sequence; + }else { + cout << name << " is missing from your fasta or name file. Please correct. " << endl; + removeFiles(list->getLabel()); + return 0; + } + + } + + //get last name + sequence = fasta->getSequence(binnames); + if (sequence != "not found") { + string group = groupMap->getGroup(binnames); + if (group != "not found") { groups[group] = group; } //add group to list of groups in this bin + else { + cout << "error sequence " << binnames << " is not assigned a group in your groupfile. Please correct." << endl; + removeFiles(list->getLabel()); + return 0; + } + binnames = ">" + binnames + "|" + toString(i+1); //attach bin number to name + seq[binnames] = sequence; + }else { + cout << binnames << " is missing from your fasta or name file. Please correct. " << endl; + removeFiles(list->getLabel()); + return 0; + } + + //output each bin to files + //what file does this bin need to be outputted to + if (groups.size() == 1) { //this bin is unique to one group + it3 = groups.begin(); + string uniqueGroup = it3->first; + used[uniqueGroup] = true; + //print out sequences from that bin to shared file + for (it3 = seq.begin(); it3 != seq.end(); it3++){ + *(filehandles[uniqueGroup]) << it3->first << endl; + *(filehandles[uniqueGroup]) << it3->second << endl; + } + }else {//this bin has sequences from multiple groups in it + used[s] = true; + //print out sequences from that bin to shared file + for (it3 = seq.begin(); it3 != seq.end(); it3++){ + *(filehandles[s]) << it3->first << endl; + *(filehandles[s]) << it3->second << endl; + } + } + } + + //close ostreams and remove unused files + for (it = filehandles.begin(); it != filehandles.end(); it++) { + it->second->close(); + if (used[it->first] == false) { string filename = fastafile + it->first + list->getLabel() + ".fasta"; remove(filename.c_str()); } + } + + } + + delete list; + list = input->getListVector(); + count++; + } + + return 0; + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the GetRepSeqsCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the GetRepSeqsCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} + +//********************************************************************************************************************** +void GetRepSeqsCommand::readNamesFile() { + try { + vector dupNames; + openInputFile(namesfile, inNames); + + string name, names, sequence; + + while(inNames){ + inNames >> name; //read from first column A + inNames >> names; //read from second column A,B,C,D + + dupNames.clear(); + + //parse names into vector + splitAtComma(names, dupNames); + + //store names in fasta map + sequence = fasta->getSequence(name); + for (int i = 0; i < dupNames.size(); i++) { + fasta->push_back(dupNames[i], sequence); + } + + gobble(inNames); + } + inNames.close(); + + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the GetRepSeqsCommand class Function readNamesFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the GetRepSeqsCommand class function readNamesFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} +//********************************************************************************************************************** +void GetRepSeqsCommand::removeFiles(string label) { + try { + //close ostreams + for (it = filehandles.begin(); it != filehandles.end(); it++) { + it->second->close(); + } + + //remove output files because there was an error + for (int i=0; igetNumGroups(); i++) { + string outputFileName = fastafile + groupMap->namesOfGroups[i] + label + ".fasta"; + remove(outputFileName.c_str()); + } + string outputFileName = fastafile + "shared"+ label + ".fasta"; + remove(outputFileName.c_str()); + + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the GetRepSeqsCommand class Function removeFiles. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the GetRepSeqsCommand class function removeFiles. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} + +//********************************************************************************************************************** + diff --git a/getrepseqscommand.h b/getrepseqscommand.h new file mode 100644 index 0000000..970b65c --- /dev/null +++ b/getrepseqscommand.h @@ -0,0 +1,50 @@ +#ifndef GETREPSEQSCOMMAND_H +#define GETREPSEQSCOMMAND_H +/* + * getrepseqscommand.h + * Mothur + * + * Created by Sarah Westcott on 5/19/09. + * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. + * + */ + + +#include "command.hpp" +#include "inputdata.h" +#include "listvector.hpp" +#include "readotu.h" +#include "fastamap.h" +#include "groupmap.h" + + +class GlobalData; + +class GetRepSeqsCommand : public Command { + +public: + GetRepSeqsCommand(); + ~GetRepSeqsCommand(); + int execute(); + +private: + GlobalData* globaldata; + ListVector* list; + ReadOTUFile* read; + GroupMap* groupMap; + InputData* input; + FastaMap* fasta; + string filename, fastafile, namesfile; + map filehandles; + map::iterator it; + map used; //group, if it had any unique otus + map::iterator it2; + map seq; + map::iterator it3; + ifstream in, inNames; + + void readNamesFile(); + void removeFiles(string); +}; + +#endif diff --git a/globaldata.cpp b/globaldata.cpp index 8b8f9e3..1baf4ed 100644 --- a/globaldata.cpp +++ b/globaldata.cpp @@ -86,10 +86,6 @@ void GlobalData::parseGlobalData(string commandString, string optionText){ if (key == "ends" ) { ends = value; } if (key == "processors" ) { processors = value; } if (key == "size" ) { size = value; } - - - - if (key == "template") { templatefile = value; } if (key == "search") { search = value; } if (key == "ksize") { ksize = value; } @@ -161,7 +157,6 @@ void GlobalData::parseGlobalData(string commandString, string optionText){ if (key == "ends" ) { ends = value; } if (key == "processors" ) { processors = value; } if (key == "size" ) { size = value; } - if (key == "template") { templatefile = value; } if (key == "search") { search = value; } if (key == "ksize") { ksize = value; } @@ -308,12 +303,6 @@ string GlobalData::getScale() { return scale; } string GlobalData::getEnds() { return ends; } string GlobalData::getProcessors() { return processors; } string GlobalData::getSize() { return size; } - -void GlobalData::setListFile(string file) { listfile = file; inputFileName = file;} -void GlobalData::setRabundFile(string file) { rabundfile = file; inputFileName = file;} -void GlobalData::setSabundFile(string file) { sabundfile = file; inputFileName = file;} -void GlobalData::setPhylipFile(string file) { phylipfile = file; inputFileName = file;} -void GlobalData::setColumnFile(string file) { columnfile = file; inputFileName = file;} string GlobalData::getTemplateFile() { return templatefile;} string GlobalData::getSearch() { return search; } string GlobalData::getKSize() { return ksize; } @@ -323,6 +312,12 @@ string GlobalData::getMismatch() { return mismatch; } string GlobalData::getGapopen() { return gapopen; } string GlobalData::getGapextend() { return gapextend; } + +void GlobalData::setListFile(string file) { listfile = file; inputFileName = file;} +void GlobalData::setRabundFile(string file) { rabundfile = file; inputFileName = file;} +void GlobalData::setSabundFile(string file) { sabundfile = file; inputFileName = file;} +void GlobalData::setPhylipFile(string file) { phylipfile = file; inputFileName = file;} +void GlobalData::setColumnFile(string file) { columnfile = file; inputFileName = file;} void GlobalData::setGroupFile(string file) { groupfile = file; } void GlobalData::setSharedFile(string file) { sharedfile = file; inputFileName = file; fileroot = file;} void GlobalData::setNameFile(string file) { namefile = file; } diff --git a/globaldata.hpp b/globaldata.hpp index d427aba..c4bfe05 100644 --- a/globaldata.hpp +++ b/globaldata.hpp @@ -89,8 +89,6 @@ public: string getTrump(); string getSoft(); string getFilter(); - - string getScale(); diff --git a/helpcommand.cpp b/helpcommand.cpp index 9e433cd..a30caef 100644 --- a/helpcommand.cpp +++ b/helpcommand.cpp @@ -87,7 +87,7 @@ int HelpCommand::execute(){ cout << "The align.seqs command parameters are fasta, phylip, clustal, nexus, template, search, ksize, align, match, mismatch, gapopen and gapextend. " << "\n"; cout << "You must use one of the following parameters for your candidate filename: fasta, phylip, clustal or nexus. " << "\n"; cout << "The template parameter is also required." << "\n"; - cout << "The search parameter allows you to specify the method to find most similar template. Your options are: suffix, kmer, blast and distance. The default is suffix." << "\n"; + cout << "The search parameter allows you to specify the method to find most similar template. Your options are: suffix, kmer and blast. The default is suffix." << "\n"; cout << "The align parameter allows you to specify the alignment method to use. Your options are: gotoh, needleman, blast and noalign. The default is blast." << "\n"; cout << "The ksize parameter allows you to specify the kmer size for finding most similar template to candidate. The default is 7." << "\n"; cout << "The match parameter allows you to specify the bonus for having the same base. The default is 1.0." << "\n"; @@ -296,6 +296,17 @@ int HelpCommand::execute(){ cout << "The default value for line and label are all lines in your inputfile." << "\n"; cout << "The bin.seqs command outputs a .fasta file for each distance you specify appending the OTU number to each name." << "\n"; cout << "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile)." << "\n" << "\n"; + }else if (globaldata->helpRequest == "get.repseqs") { + cout << "The get.repseqs command can only be executed after a successful read.otu command of a list file." << "\n"; + cout << "The get.repseqs command parameters are fasta, name, group, line and label. The fasta and group parameters are required, and you may not use line and label at the same time." << "\n"; + cout << "The line and label allow you to select what distance levels you would like a output files created for, and are separated by dashes." << "\n"; + cout << "The get.repseqss command should be in the following format: get.repseqs(fasta=yourFastaFile, name=yourNamesFile, group=yourGroupfile, line=yourLines, label=yourLabels)." << "\n"; + cout << "Example get.repseqs(fasta=amazon.fasta, group=amazon.groups, line=1-3-5, name=amazon.names)." << "\n"; + cout << "The default value for line and label are all lines in your inputfile." << "\n"; + cout << "The get.repseqs command outputs several .fasta files for each distance you specify. " << "\n"; + cout << "If the distance level you choose has bins that contain only sequences unique to a specific group those sequences are outputted to a file for that group." << "\n"; + cout << "If the bin contains sequences from multiple groups then the bin is outputted to the shared fasta file." << "\n"; + cout << "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile)." << "\n" << "\n"; }else if (globaldata->helpRequest == "get.oturep") { cout << "The get.oturep command can only be executed after a successful read.dist command." << "\n"; cout << "The get.oturep command parameters are list, fasta, name, line and label. The fasta and list parameters are required, and you may not use line and label at the same time." << "\n"; diff --git a/matrixoutputcommand.cpp b/matrixoutputcommand.cpp new file mode 100644 index 0000000..7b9ea96 --- /dev/null +++ b/matrixoutputcommand.cpp @@ -0,0 +1,194 @@ +/* + * matrixoutputcommand.cpp + * Mothur + * + * Created by Sarah Westcott on 5/20/09. + * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. + * + */ + +#include "matrixoutputcommand.h" +#include "sharedjabund.h" +#include "sharedsorabund.h" +#include "sharedjclass.h" +#include "sharedsorclass.h" +#include "sharedjest.h" +#include "sharedsorest.h" +#include "sharedthetayc.h" +#include "sharedthetan.h" +#include "sharedmorisitahorn.h" +#include "sharedbraycurtis.h" + + +//********************************************************************************************************************** + +MatrixOutputCommand::MatrixOutputCommand(){ + try { + globaldata = GlobalData::getInstance(); + validCalculator = new ValidCalculators(); + + int i; + for (i=0; iEstimators.size(); i++) { + if (validCalculator->isValidCalculator("matrix", globaldata->Estimators[i]) == true) { + if (globaldata->Estimators[i] == "jabund") { + matrixCalculators.push_back(new JAbund()); + }else if (globaldata->Estimators[i] == "sorabund") { + matrixCalculators.push_back(new SorAbund()); + }else if (globaldata->Estimators[i] == "jclass") { + matrixCalculators.push_back(new Jclass()); + }else if (globaldata->Estimators[i] == "sorclass") { + matrixCalculators.push_back(new SorClass()); + }else if (globaldata->Estimators[i] == "jest") { + matrixCalculators.push_back(new Jest()); + }else if (globaldata->Estimators[i] == "sorest") { + matrixCalculators.push_back(new SorEst()); + }else if (globaldata->Estimators[i] == "thetayc") { + matrixCalculators.push_back(new ThetaYC()); + }else if (globaldata->Estimators[i] == "thetan") { + matrixCalculators.push_back(new ThetaN()); + }else if (globaldata->Estimators[i] == "morisitahorn") { + matrixCalculators.push_back(new MorHorn()); + }else if (globaldata->Estimators[i] == "braycurtis") { + matrixCalculators.push_back(new BrayCurtis()); + } + } + } + + //reset calc for next command + globaldata->setCalc(""); + + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the MatrixOutputCommand class Function MatrixOutputCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the MatrixOutputCommand class function MatrixOutputCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} +//********************************************************************************************************************** + +MatrixOutputCommand::~MatrixOutputCommand(){ + delete input; + delete read; +} + +//********************************************************************************************************************** + +int MatrixOutputCommand::execute(){ + try { + int count = 1; + EstOutput data; + vector subset; + + //if the users entered no valid calculators don't execute command + if (matrixCalculators.size() == 0) { cout << "No valid calculators." << endl; return 0; } + + //you have groups + read = new ReadOTUFile(globaldata->inputFileName); + read->read(&*globaldata); + + input = globaldata->ginput; + lookup = input->getSharedRAbundVectors(); + + if (lookup.size() < 2) { cout << "You have not provided enough valid groups. I cannot run the command." << endl; return 0;} + + numGroups = globaldata->Groups.size(); + + while(lookup[0] != NULL){ + + if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(lookup[0]->getLabel()) == 1){ + + cout << lookup[0]->getLabel() << '\t' << count << endl; + + //for each calculator + for(int i = 0 ; i < matrixCalculators.size(); i++) { + + //initialize simMatrix + simMatrix.clear(); + simMatrix.resize(numGroups); + for (int m = 0; m < simMatrix.size(); m++) { + for (int j = 0; j < simMatrix.size(); j++) { + simMatrix[m].push_back(0.0); + } + } + + for (int k = 0; k < lookup.size(); k++) { + for (int l = k; l < lookup.size(); l++) { + if (k != l) { //we dont need to similiarity of a groups to itself + //get estimated similarity between 2 groups + + subset.clear(); //clear out old pair of sharedrabunds + //add new pair of sharedrabunds + subset.push_back(lookup[k]); subset.push_back(lookup[l]); + + data = matrixCalculators[i]->getValues(subset); //saves the calculator outputs + //save values in similarity matrix + simMatrix[k][l] = data[0]; + simMatrix[l][k] = data[0]; + } + } + } + + exportFileName = getRootName(globaldata->inputFileName) + matrixCalculators[i]->getName() + "." + lookup[0]->getLabel() + ".matrix"; + openOutputFile(exportFileName, out); + printSims(out); + out.close(); + + } + } + + //prevent memory leak + for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } + + //get next line to process + lookup = input->getSharedRAbundVectors(); + count++; + } + + //reset groups parameter + globaldata->Groups.clear(); globaldata->setGroups(""); + + return 0; + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the MatrixOutputCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the MatrixOutputCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} +/***********************************************************/ +void MatrixOutputCommand::printSims(ostream& out) { + try { + + //output column headers + out << '\t'; + for (int i = 0; i < lookup.size(); i++) { out << lookup[i]->getGroup() << '\t'; } + out << endl; + + + for (int m = 0; m < simMatrix.size(); m++) { + out << lookup[m]->getGroup() << '\t'; + for (int n = 0; n < simMatrix.size(); n++) { + out << simMatrix[m][n] << '\t'; + } + out << endl; + } + + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the MatrixOutputCommand class Function printSims. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the MatrixOutputCommand class function printSims. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} +/***********************************************************/ + + diff --git a/matrixoutputcommand.h b/matrixoutputcommand.h new file mode 100644 index 0000000..04d0ccb --- /dev/null +++ b/matrixoutputcommand.h @@ -0,0 +1,49 @@ +#ifndef MATRIXOUTPUTCOMMAND_H +#define MATRIXOUTPUTCOMMAND_H + +/* + * matrixoutputcommand.h + * Mothur + * + * Created by Sarah Westcott on 5/20/09. + * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. + * + */ +#include "command.hpp" +#include "inputdata.h" +#include "groupmap.h" +#include "readotu.h" +#include "validcalculator.h" + +/* This command create a tree file for each similarity calculator at distance level, using various calculators to find the similiarity between groups. + The user can select the lines or labels they wish to use as well as the groups they would like included. + They can also use as many or as few calculators as they wish. */ + +class GlobalData; + +class MatrixOutputCommand : public Command { + +public: + MatrixOutputCommand(); + ~MatrixOutputCommand(); + int execute(); + +private: + void printSims(ostream&); + + GlobalData* globaldata; + ReadOTUFile* read; + vector matrixCalculators; + vector< vector > simMatrix; + InputData* input; + ValidCalculators* validCalculator; + vector lookup; + string exportFileName; + int numGroups; + ofstream out; + +}; + + +#endif + diff --git a/noalign.cpp b/noalign.cpp index 14e34f6..e87b757 100644 --- a/noalign.cpp +++ b/noalign.cpp @@ -3,7 +3,7 @@ * * * Created by Pat Schloss on 2/19/09. - * Copyright 2009 __MyCompanyName__. All rights reserved. + * Copyright 2009Schloss Lab UMASS Amherst. All rights reserved. * */ diff --git a/noalign.hpp b/noalign.hpp index dd1e4ba..bd9817a 100644 --- a/noalign.hpp +++ b/noalign.hpp @@ -6,7 +6,7 @@ * * * Created by Pat Schloss on 2/19/09. - * Copyright 2009 __MyCompanyName__. All rights reserved. + * Copyright 2009Schloss Lab UMASS Amherst. All rights reserved. * */ using namespace std; diff --git a/rarefact.cpp b/rarefact.cpp index ba0c02e..6766249 100644 --- a/rarefact.cpp +++ b/rarefact.cpp @@ -118,6 +118,7 @@ try { displays[i]->reset(); } + delete merge; } for(int i=0;igetFormat(); validCalculator = new ValidCalculators(); - util = new SharedUtil(); - + int i; for (i=0; iEstimators.size(); i++) { if (validCalculator->isValidCalculator("treegroup", globaldata->Estimators[i]) == true) { @@ -74,7 +73,6 @@ TreeGroupCommand::TreeGroupCommand(){ TreeGroupCommand::~TreeGroupCommand(){ delete input; delete read; - delete util; } //********************************************************************************************************************** @@ -95,7 +93,7 @@ int TreeGroupCommand::execute(){ input = globaldata->ginput; lookup = input->getSharedRAbundVectors(); - if (lookup.size() < 2) { cout << "You have not provided enough valid groups. I cannot run the command." << endl; } + if (lookup.size() < 2) { cout << "You have not provided enough valid groups. I cannot run the command." << endl; return 0; } numGroups = globaldata->Groups.size(); groupNames = ""; @@ -136,7 +134,8 @@ int TreeGroupCommand::execute(){ //create a new filename outputFile = getRootName(globaldata->inputFileName) + treeCalculators[i]->getName() + "." + lookup[0]->getLabel() + ".tre"; - + + for (int k = 0; k < lookup.size(); k++) { for (int l = k; l < lookup.size(); l++) { if (k != l) { //we dont need to similiarity of a groups to itself @@ -158,7 +157,10 @@ int TreeGroupCommand::execute(){ createTree(); } } - + + //prevent memory leak + for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } + //get next line to process lookup = input->getSharedRAbundVectors(); count++; @@ -256,14 +258,21 @@ void TreeGroupCommand::createTree(){ } } /***********************************************************/ -void TreeGroupCommand::printSims() { +void TreeGroupCommand::printSims(ostream& out) { try { - cout << "simsMatrix" << endl; + + //output column headers + out << '\t'; + for (int i = 0; i < lookup.size(); i++) { out << lookup[i]->getGroup() << '\t'; } + out << endl; + + for (int m = 0; m < simMatrix.size(); m++) { + out << lookup[m]->getGroup() << '\t'; for (int n = 0; n < simMatrix.size(); n++) { - cout << simMatrix[m][n] << '\t'; + out << simMatrix[m][n] << '\t'; } - cout << endl; + out << endl; } } diff --git a/treegroupscommand.h b/treegroupscommand.h index 3c40eff..10a2e31 100644 --- a/treegroupscommand.h +++ b/treegroupscommand.h @@ -11,15 +11,12 @@ */ #include "command.hpp" -#include "sharedordervector.h" -#include "sharedlistvector.h" #include "inputdata.h" #include "groupmap.h" #include "readotu.h" #include "validcalculator.h" #include "tree.h" #include "treemap.h" -#include "sharedutilities.h" /* This command create a tree file for each similarity calculator at distance level, using various calculators to find the similiarity between groups. @@ -37,10 +34,9 @@ public: private: void createTree(); - void printSims(); + void printSims(ostream&); GlobalData* globaldata; - SharedUtil* util; ReadOTUFile* read; TreeMap* tmap; Tree* t; @@ -49,11 +45,10 @@ private: map index; //maps row in simMatrix to vector index in the tree InputData* input; ValidCalculators* validCalculator; - SharedListVector* SharedList; - SharedOrderVector* order; vector lookup; string format, outputFile, groupNames; int numGroups; + ofstream out; }; diff --git a/validcalculator.cpp b/validcalculator.cpp index edcf8c7..99cd47a 100644 --- a/validcalculator.cpp +++ b/validcalculator.cpp @@ -23,6 +23,7 @@ ValidCalculators::ValidCalculators() { initialTreeGroups(); initialBoot(); initialDistance(); + initialMatrix(); } catch(exception& e) { cout << "Standard Error: " << e.what() << " has occurred in the ValidCalculator class Function ValidCalculator. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; @@ -146,6 +147,17 @@ bool ValidCalculators::isValidCalculator(string parameter, string calculator) { } cout << endl; return false; } + }else if (parameter == "matrix") { + //is it valid + if ((matrix.find(calculator)) != (matrix.end())) { + return true; + }else { + cout << calculator << " is not a valid estimator for the matrix.output command and will be disregarded. Valid estimators are "; + for (it = matrix.begin(); it != matrix.end(); it++) { + cout << it->first << ", "; + } + cout << endl; + return false; } }else if (parameter == "boot") { //is it valid if ((boot.find(calculator)) != (boot.end())) { @@ -430,6 +442,30 @@ void ValidCalculators::initialTreeGroups() { exit(1); } } +/********************************************************************/ +void ValidCalculators::initialMatrix() { + try { + matrix["jabund"] = "jabund"; + matrix["sorabund"] = "sorabund"; + matrix["jclass"] = "jclass"; + matrix["sorclass"] = "sorclass"; + matrix["jest"] = "jest"; + matrix["sorest"] = "sorest"; + matrix["thetayc"] = "thetayc"; + matrix["thetan"] = "thetan"; + matrix["morisitahorn"] = "morisitahorn"; + matrix["braycurtis"] = "braycurtis"; + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the ValidCalculator class Function initialMatrix. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the ValidCalculator class function initialMatrix. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} + /********************************************************************/ void ValidCalculators::initialBoot() { try { diff --git a/validcalculator.h b/validcalculator.h index 45381b8..15ff311 100644 --- a/validcalculator.h +++ b/validcalculator.h @@ -35,6 +35,7 @@ class ValidCalculators { map vennsingle; map vennshared; map treegroup; + map matrix; map boot; map distance; map::iterator it; @@ -48,6 +49,7 @@ class ValidCalculators { void initialVennSingle(); void initialVennShared(); void initialTreeGroups(); + void initialMatrix(); void initialBoot(); void initialDistance(); }; diff --git a/validcommands.cpp b/validcommands.cpp index a5f3651..c648585 100644 --- a/validcommands.cpp +++ b/validcommands.cpp @@ -19,6 +19,7 @@ ValidCommands::ValidCommands() { commands["read.tree"] = "read.tree"; commands["read.seqs"] = "read.seqs"; commands["bin.seqs"] = "bin.seqs"; + commands["get.repseqs"] = "get.repseqs"; commands["get.oturep"] = "get.oturep"; commands["cluster"] = "cluster"; commands["deconvolute"] = "deconvolute"; diff --git a/validparameter.cpp b/validparameter.cpp index 3e4d16b..5e93a5c 100644 --- a/validparameter.cpp +++ b/validparameter.cpp @@ -276,6 +276,9 @@ void ValidParameters::initCommandParameters() { string binseqsArray[] = {"fasta","line","label","name"}; commandParameters["bin.seqs"] = addParameters(binseqsArray, sizeof(binseqsArray)/sizeof(string)); + string getrepseqsArray[] = {"fasta","line","label","name", "group"}; + commandParameters["get.repseqs"] = addParameters(getrepseqsArray, sizeof(getrepseqsArray)/sizeof(string)); + string getOTURepArray[] = {"fasta","list","line","label","name"}; commandParameters["get.oturep"] = addParameters(getOTURepArray, sizeof(getOTURepArray)/sizeof(string)); -- 2.39.2