From fd282e6b4be2560f5b1bd154a9e8d24b798eefaf Mon Sep 17 00:00:00 2001 From: westcott Date: Thu, 22 Jan 2009 16:49:25 +0000 Subject: [PATCH] finishing the container classes, combining read.otu and read.list commands. some cosmetic fixes. --- Mothur.xcodeproj/project.pbxproj | 12 +- clustercommand.h | 2 + collectsharedcommand.cpp | 33 +-- collectsharedcommand.h | 4 +- commandfactory.cpp | 2 - datavector.hpp | 8 + errorchecking.cpp | 11 +- fastamap.h | 7 +- globaldata.cpp | 70 +++--- globaldata.hpp | 2 + inputdata.cpp | 37 ++- inputdata.h | 3 + listvector.cpp | 26 --- listvector.hpp | 17 +- ordervector.hpp | 10 + rabundvector.hpp | 9 + rarefactsharedcommand.cpp | 12 +- rarefactsharedcommand.h | 4 +- readlistcommand.h | 56 ----- readmatrix.cpp | 4 +- readmatrix.hpp | 2 + readotucommand.cpp | 13 ++ readotucommand.h | 21 +- sabundvector.hpp | 11 + shared.h | 1 + sharedlistvector.cpp | 377 +++++++++++++++++++++++++++++++ sharedlistvector.h | 71 ++++++ sharedordervector.cpp | 50 +++- sharedordervector.h | 24 +- sharedrabundvector.cpp | 48 +++- sharedrabundvector.h | 21 +- sharedsabundvector.cpp | 23 +- sharedsabundvector.h | 17 +- summarysharedcommand.cpp | 21 +- summarysharedcommand.h | 3 +- validcalculator.cpp | 2 + validcommands.cpp | 3 +- 37 files changed, 833 insertions(+), 204 deletions(-) delete mode 100644 readlistcommand.h create mode 100644 sharedlistvector.cpp create mode 100644 sharedlistvector.h diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj index 601ab36..a3ed9b8 100644 --- a/Mothur.xcodeproj/project.pbxproj +++ b/Mothur.xcodeproj/project.pbxproj @@ -9,8 +9,8 @@ /* Begin PBXBuildFile section */ 372E12700F26365B0095CF7E /* readotucommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 372E126F0F26365B0095CF7E /* readotucommand.cpp */; }; 372E12960F263D5A0095CF7E /* readdistcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 372E12950F263D5A0095CF7E /* readdistcommand.cpp */; }; - 372E12C10F2648250095CF7E /* readlistcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 372E12C00F2648250095CF7E /* readlistcommand.cpp */; }; 372E12ED0F264D320095CF7E /* commandfactory.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 372E12EC0F264D320095CF7E /* commandfactory.cpp */; }; + 37AD4CE40F28AEA300AA2D49 /* sharedlistvector.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 37AD4CE30F28AEA300AA2D49 /* sharedlistvector.cpp */; }; 37B28F680F27590100808A62 /* deconvolutecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 37B28F670F27590100808A62 /* deconvolutecommand.cpp */; }; 37D928550F21331F001D4494 /* ace.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 37D927B80F21331F001D4494 /* ace.cpp */; }; 37D928560F21331F001D4494 /* averagelinkage.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 37D927BA0F21331F001D4494 /* averagelinkage.cpp */; }; @@ -101,9 +101,9 @@ 372E126F0F26365B0095CF7E /* readotucommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = readotucommand.cpp; sourceTree = ""; }; 372E12940F263D5A0095CF7E /* readdistcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = readdistcommand.h; sourceTree = ""; }; 372E12950F263D5A0095CF7E /* readdistcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = readdistcommand.cpp; sourceTree = ""; }; - 372E12BF0F2648250095CF7E /* readlistcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = readlistcommand.h; sourceTree = ""; }; - 372E12C00F2648250095CF7E /* readlistcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = readlistcommand.cpp; sourceTree = ""; }; 372E12EC0F264D320095CF7E /* commandfactory.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = commandfactory.cpp; sourceTree = ""; }; + 37AD4CE20F28AEA300AA2D49 /* sharedlistvector.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sharedlistvector.h; sourceTree = ""; }; + 37AD4CE30F28AEA300AA2D49 /* sharedlistvector.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sharedlistvector.cpp; sourceTree = ""; }; 37B28F660F27590100808A62 /* deconvolutecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = deconvolutecommand.h; sourceTree = ""; }; 37B28F670F27590100808A62 /* deconvolutecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = deconvolutecommand.cpp; sourceTree = ""; }; 37D927B80F21331F001D4494 /* ace.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ace.cpp; sourceTree = ""; }; @@ -422,8 +422,6 @@ 372E12950F263D5A0095CF7E /* readdistcommand.cpp */, 372E126E0F26365B0095CF7E /* readotucommand.h */, 372E126F0F26365B0095CF7E /* readotucommand.cpp */, - 372E12BF0F2648250095CF7E /* readlistcommand.h */, - 372E12C00F2648250095CF7E /* readlistcommand.cpp */, 37D928270F21331F001D4494 /* sharedcommand.h */, 37D928260F21331F001D4494 /* sharedcommand.cpp */, 37D928470F21331F001D4494 /* summarycommand.h */, @@ -450,6 +448,8 @@ 37D927FF0F21331F001D4494 /* rabundvector.cpp */, 37D9281B0F21331F001D4494 /* sabundvector.hpp */, 37D9281A0F21331F001D4494 /* sabundvector.cpp */, + 37AD4CE20F28AEA300AA2D49 /* sharedlistvector.h */, + 37AD4CE30F28AEA300AA2D49 /* sharedlistvector.cpp */, 37D9282F0F21331F001D4494 /* sharedordervector.h */, 37D9282E0F21331F001D4494 /* sharedordervector.cpp */, 37D928310F21331F001D4494 /* sharedrabundvector.h */, @@ -596,9 +596,9 @@ 37D9289F0F21331F001D4494 /* validparameter.cpp in Sources */, 372E12700F26365B0095CF7E /* readotucommand.cpp in Sources */, 372E12960F263D5A0095CF7E /* readdistcommand.cpp in Sources */, - 372E12C10F2648250095CF7E /* readlistcommand.cpp in Sources */, 372E12ED0F264D320095CF7E /* commandfactory.cpp in Sources */, 37B28F680F27590100808A62 /* deconvolutecommand.cpp in Sources */, + 37AD4CE40F28AEA300AA2D49 /* sharedlistvector.cpp in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; diff --git a/clustercommand.h b/clustercommand.h index 9dfd2a0..1487b3e 100644 --- a/clustercommand.h +++ b/clustercommand.h @@ -12,8 +12,10 @@ #include #include #include +#include "globaldata.hpp" #include "command.hpp" #include "rabundvector.hpp" +#include "sabundvector.hpp" #include "listvector.hpp" #include "cluster.hpp" #include "sparsematrix.hpp" diff --git a/collectsharedcommand.cpp b/collectsharedcommand.cpp index 63a267d..76f7abb 100644 --- a/collectsharedcommand.cpp +++ b/collectsharedcommand.cpp @@ -8,6 +8,7 @@ */ #include "collectsharedcommand.h" +#include "sharedsobs.h" #include "sharedchao1.h" #include "sharedace.h" #include "sharedjabund.h" @@ -32,25 +33,27 @@ CollectSharedCommand::CollectSharedCommand(){ int i; for (i=0; isharedEstimators.size(); i++) { if (globaldata->sharedEstimators[i] == "sharedChao") { - cDisplays.push_back(new CollectDisplay(new SharedChao1(), new SharedOneColumnFile(fileNameRoot+"sharedChao", groupmap->namesOfGroups))); + cDisplays.push_back(new CollectDisplay(new SharedChao1(), new SharedOneColumnFile(fileNameRoot+"shared.chao", groupmap->namesOfGroups))); + }else if (globaldata->sharedEstimators[i] == "sharedSobs") { + cDisplays.push_back(new CollectDisplay(new SharedSobs(), new SharedOneColumnFile(fileNameRoot+"shared.sobs", groupmap->namesOfGroups))); }else if (globaldata->sharedEstimators[i] == "sharedAce") { - cDisplays.push_back(new CollectDisplay(new SharedAce(), new SharedOneColumnFile(fileNameRoot+"sharedAce", groupmap->namesOfGroups))); + cDisplays.push_back(new CollectDisplay(new SharedAce(), new SharedOneColumnFile(fileNameRoot+"shared.ace", groupmap->namesOfGroups))); }else if (globaldata->sharedEstimators[i] == "sharedJabund") { - cDisplays.push_back(new CollectDisplay(new SharedJAbund(), new SharedOneColumnFile(fileNameRoot+"SharedJabund", groupmap->namesOfGroups))); + cDisplays.push_back(new CollectDisplay(new SharedJAbund(), new SharedOneColumnFile(fileNameRoot+"shared.jabund", groupmap->namesOfGroups))); }else if (globaldata->sharedEstimators[i] == "sharedSorensonAbund") { - cDisplays.push_back(new CollectDisplay(new SharedSorAbund(), new SharedOneColumnFile(fileNameRoot+"SharedSorensonAbund", groupmap->namesOfGroups))); + cDisplays.push_back(new CollectDisplay(new SharedSorAbund(), new SharedOneColumnFile(fileNameRoot+"shared.sorensonabund", groupmap->namesOfGroups))); }else if (globaldata->sharedEstimators[i] == "sharedJclass") { - cDisplays.push_back(new CollectDisplay(new SharedJclass(), new SharedOneColumnFile(fileNameRoot+"SharedJclass", groupmap->namesOfGroups))); + cDisplays.push_back(new CollectDisplay(new SharedJclass(), new SharedOneColumnFile(fileNameRoot+"shared.jclass", groupmap->namesOfGroups))); }else if (globaldata->sharedEstimators[i] == "sharedSorClass") { - cDisplays.push_back(new CollectDisplay(new SharedSorClass(), new SharedOneColumnFile(fileNameRoot+"SharedSorClass", groupmap->namesOfGroups))); + cDisplays.push_back(new CollectDisplay(new SharedSorClass(), new SharedOneColumnFile(fileNameRoot+"shared.sorclass", groupmap->namesOfGroups))); }else if (globaldata->sharedEstimators[i] == "sharedJest") { - cDisplays.push_back(new CollectDisplay(new SharedJest(), new SharedOneColumnFile(fileNameRoot+"SharedJest", groupmap->namesOfGroups))); + cDisplays.push_back(new CollectDisplay(new SharedJest(), new SharedOneColumnFile(fileNameRoot+"shared.jest", groupmap->namesOfGroups))); }else if (globaldata->sharedEstimators[i] == "sharedSorEst") { - cDisplays.push_back(new CollectDisplay(new SharedSorEst(), new SharedOneColumnFile(fileNameRoot+"SharedSorEst", groupmap->namesOfGroups))); + cDisplays.push_back(new CollectDisplay(new SharedSorEst(), new SharedOneColumnFile(fileNameRoot+"shared.sorest", groupmap->namesOfGroups))); }else if (globaldata->sharedEstimators[i] == "SharedThetaYC") { - cDisplays.push_back(new CollectDisplay(new SharedThetaYC(), new SharedOneColumnFile(fileNameRoot+"SharedThetaYC", groupmap->namesOfGroups))); + cDisplays.push_back(new CollectDisplay(new SharedThetaYC(), new SharedOneColumnFile(fileNameRoot+"shared.thetayc", groupmap->namesOfGroups))); }else if (globaldata->sharedEstimators[i] == "SharedThetaN") { - cDisplays.push_back(new CollectDisplay(new SharedThetaN(), new SharedOneColumnFile(fileNameRoot+"SharedThetaN", groupmap->namesOfGroups))); + cDisplays.push_back(new CollectDisplay(new SharedThetaN(), new SharedOneColumnFile(fileNameRoot+"shared.thetan", groupmap->namesOfGroups))); } } } @@ -83,8 +86,8 @@ int CollectSharedCommand::execute(){ read->read(&*globaldata); input = globaldata->ginput; - list = globaldata->glist; - order = list->getSharedOrderVector(); + SharedList = globaldata->gSharedList; + order = SharedList->getSharedOrderVector(); while(order != NULL){ @@ -99,9 +102,9 @@ int CollectSharedCommand::execute(){ cout << order->getLabel() << '\t' << count << endl; } - list = input->getListVector(); //get new list vector to process - if (list != NULL) { - order = list->getSharedOrderVector(); //gets new order vector with group info. + SharedList = input->getSharedListVector(); //get new list vector to process + if (SharedList != NULL) { + order = SharedList->getSharedOrderVector(); //gets new order vector with group info. count++; }else { break; diff --git a/collectsharedcommand.h b/collectsharedcommand.h index ee11a70..3737c11 100644 --- a/collectsharedcommand.h +++ b/collectsharedcommand.h @@ -15,7 +15,7 @@ #include #include "command.hpp" #include "sharedordervector.h" -#include "listvector.hpp" +#include "sharedlistvector.h" #include "inputdata.h" #include "groupmap.h" #include "collect.h" @@ -47,7 +47,7 @@ public: private: GlobalData* globaldata; GroupMap* groupmap; - ListVector* list; + SharedListVector* SharedList; ReadMatrix* read; SharedOrderVector* order; InputData* input; diff --git a/commandfactory.cpp b/commandfactory.cpp index 49d197c..6e62995 100644 --- a/commandfactory.cpp +++ b/commandfactory.cpp @@ -10,7 +10,6 @@ #include "command.hpp" #include "readdistcommand.h" #include "readotucommand.h" -#include "readlistcommand.h" #include "clustercommand.h" #include "parselistcommand.h" #include "collectcommand.h" @@ -51,7 +50,6 @@ Command* CommandFactory::getCommand(string commandName){ if(commandName == "read.dist") { command = new ReadDistCommand(); } else if(commandName == "read.otu") { command = new ReadOtuCommand(); } - else if(commandName == "read.list") { command = new ReadListFileCommand(); } else if(commandName == "cluster") { command = new ClusterCommand(); } else if(commandName == "deconvolute") { command = new DeconvoluteCommand(); } else if(commandName == "help") { command = new HelpCommand(); } diff --git a/datavector.hpp b/datavector.hpp index 4ddb1aa..426a757 100644 --- a/datavector.hpp +++ b/datavector.hpp @@ -8,9 +8,17 @@ using namespace std; #include #include +/* This class is parent to listvector, ordervector, rabundvector, sabundvector, sharedordervector, sharedrabundvector, sharedsabundvector. + The child classes all contain OTU information in different forms. */ + + class RAbundVector; class SAbundVector; class OrderVector; +class SharedListVector; +class SharedOrderVector; +class SharedSAbundVector; +class SharedRAbundVector; class DataVector { diff --git a/errorchecking.cpp b/errorchecking.cpp index 8a56c15..41c1db9 100644 --- a/errorchecking.cpp +++ b/errorchecking.cpp @@ -210,10 +210,13 @@ bool ErrorCheck::checkInput(string input) { validateReadFiles(); validateReadDist(); }else if (commandName == "read.otu") { - validateReadFiles(); - validateReadPhil(); - }else if (commandName == "read.list") { - validateParseFiles(); //checks the listfile and groupfile parameters + //you want to do shared commands + if ((listfile != "") && (groupfile != "")) { + validateParseFiles(); //checks the listfile and groupfile parameters + }else { //you want to do single commands + validateReadFiles(); + validateReadPhil(); + } }else if (commandName == "deconvolute") { validateReadFiles(); } diff --git a/fastamap.h b/fastamap.h index fbda6ae..9eab853 100644 --- a/fastamap.h +++ b/fastamap.h @@ -19,6 +19,11 @@ using namespace std; #include "utilities.hpp" +/* This class represents the fasta file. It reads a fasta file a populates the internal data structure "data". +Data is a map where the key is the sequence and the value is a struct containing the sequences groupname, +a list of the sequences names who have the same sequence and a number of how many sequence names there are. */ + + class FastaMap { public: @@ -32,7 +37,7 @@ public: void set(string, string, string); //sequencename, groupname, groupnumber, names. void clear(); int size(); //returns number of unique sequences - void print(ostream&); + void print(ostream&); //produces a 2 column file with the groupname in the first column and the names in the second column. void readFastaFile(ifstream&); private: diff --git a/globaldata.cpp b/globaldata.cpp index 6d67d0d..f09ea35 100644 --- a/globaldata.cpp +++ b/globaldata.cpp @@ -96,22 +96,22 @@ void GlobalData::parseGlobalData(string commandString, string optionText){ splitAtComma(value, optionText); splitAtEquals(key, value); - if (key == "phylipfile" ) { phylipfile = value; inputFileName = value; fileroot = value; format = "phylip";} - if (key == "columnfile" ) { columnfile = value; inputFileName = value; fileroot = value; format = "column";} - if (key == "listfile" ) { listfile = value; inputFileName = value; fileroot = value; format = "list"; } - if (key == "rabundfile" ) { rabundfile = value; inputFileName = value; fileroot = value; format = "rabund"; } - if (key == "sabundfile" ) { sabundfile = value; inputFileName = value; fileroot = value; format = "sabund"; } - if (key == "fastafile" ) { fastafile = value; inputFileName = value; fileroot = value; format = "fasta"; } - if (key == "namefile" ) { namefile = value; } - if (key == "orderfile" ) { orderfile = value; } - if (key == "groupfile" ) { groupfile = value; } - if (key == "cutoff" ) { cutoff = value; } - if (key == "precision" ) { precision = value; } - if (key == "iters" ) { iters = value; } - if (key == "jumble" ) { jumble = value; } - if (key == "freq" ) { freq = value; } - if (key == "method" ) { method = value; } - if (key == "fileroot" ) { fileroot = value; } + if (key == "phylipfile" ) { phylipfile = value; inputFileName = value; fileroot = value; format = "phylip"; } + if (key == "columnfile" ) { columnfile = value; inputFileName = value; fileroot = value; format = "column"; } + if (key == "listfile" ) { listfile = value; inputFileName = value; fileroot = value; format = "list"; } + if (key == "rabundfile" ) { rabundfile = value; inputFileName = value; fileroot = value; format = "rabund"; } + if (key == "sabundfile" ) { sabundfile = value; inputFileName = value; fileroot = value; format = "sabund"; } + if (key == "fastafile" ) { fastafile = value; inputFileName = value; fileroot = value; format = "fasta"; } + if (key == "namefile" ) { namefile = value; } + if (key == "orderfile" ) { orderfile = value; } + if (key == "groupfile" ) { groupfile = value; } + if (key == "cutoff" ) { cutoff = value; } + if (key == "precision" ) { precision = value; } + if (key == "iters" ) { iters = value; } + if (key == "jumble" ) { jumble = value; } + if (key == "freq" ) { freq = value; } + if (key == "method" ) { method = value; } + if (key == "fileroot" ) { fileroot = value; } if (key == "single") {//stores estimators in a vector singleEstimators.clear(); //clears out old values @@ -160,22 +160,22 @@ void GlobalData::parseGlobalData(string commandString, string optionText){ //saves the last parameter value = optionText; splitAtEquals(key, value); - if (key == "phylipfile" ) { phylipfile = value; inputFileName = value; fileroot = value; format = "phylip"; } - if (key == "columnfile" ) { columnfile = value; inputFileName = value; fileroot = value; format = "column"; } - if (key == "listfile" ) { listfile = value; inputFileName = value; fileroot = value; format = "list"; } - if (key == "rabundfile" ) { rabundfile = value; inputFileName = value; fileroot = value; format = "rabund"; } - if (key == "sabundfile" ) { sabundfile = value; inputFileName = value; fileroot = value; format = "sabund"; } - if (key == "fastafile" ) { fastafile = value; inputFileName = value; fileroot = value; format = "fasta"; } - if (key == "namefile" ) { namefile = value; } - if (key == "orderfile" ) { orderfile = value; } - if (key == "groupfile" ) { groupfile = value; } - if (key == "cutoff" ) { cutoff = value; } - if (key == "precision" ) { precision = value; } - if (key == "iters" ) { iters = value; } - if (key == "jumble" ) { jumble = value; } - if (key == "freq" ) { freq = value; } - if (key == "method" ) { method = value; } - if (key == "fileroot" ) { fileroot = value; } + if (key == "phylipfile" ) { phylipfile = value; inputFileName = value; fileroot = value; format = "phylip"; } + if (key == "columnfile" ) { columnfile = value; inputFileName = value; fileroot = value; format = "column"; } + if (key == "listfile" ) { listfile = value; inputFileName = value; fileroot = value; format = "list"; } + if (key == "rabundfile" ) { rabundfile = value; inputFileName = value; fileroot = value; format = "rabund"; } + if (key == "sabundfile" ) { sabundfile = value; inputFileName = value; fileroot = value; format = "sabund"; } + if (key == "fastafile" ) { fastafile = value; inputFileName = value; fileroot = value; format = "fasta"; } + if (key == "namefile" ) { namefile = value; } + if (key == "orderfile" ) { orderfile = value; } + if (key == "groupfile" ) { groupfile = value; } + if (key == "cutoff" ) { cutoff = value; } + if (key == "precision" ) { precision = value; } + if (key == "iters" ) { iters = value; } + if (key == "jumble" ) { jumble = value; } + if (key == "freq" ) { freq = value; } + if (key == "method" ) { method = value; } + if (key == "fileroot" ) { fileroot = value; } if (key == "single") {//stores estimators in a vector singleEstimators.clear(); //clears out old values @@ -222,6 +222,8 @@ void GlobalData::parseGlobalData(string commandString, string optionText){ } } + //set format for shared + if ((listfile != "") && (groupfile != "")) { format = "shared"; } //input defaults if (commandString == "collect.single") { @@ -322,8 +324,8 @@ void GlobalData::clear() { fileroot = ""; single = "sobs-chao-ace-jack-bootstrap-shannon-npshannon-simpson-rarefraction"; rarefaction = "rarefaction"; - shared = "sharedChao-sharedAce-sharedJabund-sharedSorensonAbund-sharedJclass-sharedSorClass-sharedJest-sharedSorEst-SharedThetaYC-SharedThetaN"; - sharedsummary = "sharedChao-sharedAce-sharedJabund-sharedSorensonAbund-sharedJclass-sharedSorClass-sharedJest-sharedSorEst-SharedThetaYC-SharedThetaN"; + shared = "sharedSobs-sharedChao-sharedAce-sharedJabund-sharedSorensonAbund-sharedJclass-sharedSorClass-sharedJest-sharedSorEst-SharedThetaYC-SharedThetaN"; + sharedsummary = "sharedSobs-sharedChao-sharedAce-sharedJabund-sharedSorensonAbund-sharedJclass-sharedSorClass-sharedJest-sharedSorEst-SharedThetaYC-SharedThetaN"; summary = "summary-chao-ace-jack-bootstrap-shannon-npshannon-simpson"; sharedrarefaction = "sharedobserved"; } diff --git a/globaldata.hpp b/globaldata.hpp index a2b9173..5a1572c 100644 --- a/globaldata.hpp +++ b/globaldata.hpp @@ -9,6 +9,7 @@ using namespace std; class ListVector; +class SharedListVector; class SparseMatrix; class OrderVector; class InputData; @@ -23,6 +24,7 @@ public: InputData* ginput; OrderVector* gorder; ListVector* glist; + SharedListVector* gSharedList; SAbundVector* sabund; GroupMap* gGroupmap; string inputFileName, helpRequest, commandName; diff --git a/inputdata.cpp b/inputdata.cpp index 2ecebe3..e5513ab 100644 --- a/inputdata.cpp +++ b/inputdata.cpp @@ -63,7 +63,7 @@ InputData::InputData(string fName, string orderFileName, string f) : format(f){ ListVector* InputData::getListVector(){ try { if(fileHandle){ - if((format == "list") || (format == "shared")){ + if(format == "list") { list = new ListVector(fileHandle); } @@ -84,6 +84,31 @@ ListVector* InputData::getListVector(){ } } +/***********************************************************************/ + +SharedListVector* InputData::getSharedListVector(){ + try { + if(fileHandle){ + if (format == "shared"){ + SharedList = new SharedListVector(fileHandle); + } + + gobble(fileHandle); + return SharedList; + } + else{ + return 0; + } + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the InputData class Function getOrderVector. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the InputData class function getOrderVector. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} /***********************************************************************/ @@ -91,9 +116,12 @@ ListVector* InputData::getListVector(){ OrderVector* InputData::getOrderVector(){ try { if(fileHandle){ - if((format == "list") || (format == "shared")){ + if(format == "list") { input = new ListVector(fileHandle); } + else if(format == "shared") { + input = new SharedListVector(fileHandle); + } else if(format == "rabund"){ input = new RAbundVector(fileHandle); } @@ -132,9 +160,12 @@ OrderVector* InputData::getOrderVector(){ SAbundVector* InputData::getSAbundVector(){ try { if(fileHandle){ - if((format == "list") || (format == "shared")){ + if (format == "list") { input = new ListVector(fileHandle); } + else if(format == "shared") { + input = new SharedListVector(fileHandle); + } else if(format == "rabund"){ input = new RAbundVector(fileHandle); } diff --git a/inputdata.h b/inputdata.h index 46d1655..6d814c5 100644 --- a/inputdata.h +++ b/inputdata.h @@ -6,6 +6,7 @@ #include #include #include "ordervector.hpp" +#include "sharedlistvector.h" #include "listvector.hpp" @@ -18,6 +19,7 @@ public: InputData(string, string, string); ~InputData(); ListVector* getListVector(); + SharedListVector* getSharedListVector(); OrderVector* getOrderVector(); SAbundVector* getSAbundVector(); @@ -26,6 +28,7 @@ private: ifstream fileHandle; DataVector* input; ListVector* list; + SharedListVector* SharedList; OrderVector* output; SAbundVector* sabund; map orderMap; diff --git a/listvector.cpp b/listvector.cpp index 3af7cfe..3018efe 100644 --- a/listvector.cpp +++ b/listvector.cpp @@ -232,32 +232,6 @@ SAbundVector ListVector::getSAbundVector(){ } } -/***********************************************************************/ -SharedOrderVector* ListVector::getSharedOrderVector(){ - globaldata = GlobalData::getInstance(); - string groupName, names, name; - groupmap = globaldata->gGroupmap; - SharedOrderVector* order; - order = new SharedOrderVector(); - order->setLabel(label); - - for(int i=0;igetGroup(name); - order->push_back(i, binSize, groupName); //i represents what bin you are in - } - //get last name - groupName = groupmap->getGroup(names); - order->push_back(i, binSize, groupName); - } - random_shuffle(order->begin(), order->end()); - return order; -} - /***********************************************************************/ OrderVector ListVector::getOrderVector(map* orderMap = NULL){ diff --git a/listvector.hpp b/listvector.hpp index dbd61cd..e195f4e 100644 --- a/listvector.hpp +++ b/listvector.hpp @@ -1,13 +1,19 @@ #ifndef LIST_H #define LIST_H +#include #include "datavector.hpp" -#include "groupmap.h" -#include "globaldata.hpp" -#include "sharedordervector.h" #include #include +/* This class is a child to datavector. It represents OTU information at a certain distance. + A list vector can be converted into and ordervector, rabundvector or sabundvector. + Each member of the internal container "data" represents an individual OTU. + So data[0] = "a,b,c,d,e,f". + example: listvector = a,b,c,d,e,f g,h,i j,k l m + rabundvector = 6 3 2 1 1 + sabundvector = 2 1 1 0 0 1 + ordervector = 1 1 1 1 1 1 2 2 2 3 3 4 5 */ class ListVector : public DataVector { @@ -35,12 +41,9 @@ public: RAbundVector getRAbundVector(); SAbundVector getSAbundVector(); OrderVector getOrderVector(map*); - SharedOrderVector* getSharedOrderVector(); private: - vector data; - GlobalData* globaldata; - GroupMap* groupmap; + vector data; //data[i] is a list of names of sequences in the ith OTU. int maxRank; int numBins; int numSeqs; diff --git a/ordervector.hpp b/ordervector.hpp index 92259a8..d652bbf 100644 --- a/ordervector.hpp +++ b/ordervector.hpp @@ -5,6 +5,16 @@ #include "sabundvector.hpp" #include "rabundvector.hpp" +/* This class is a child to datavector. It represents OTU information at a certain distance. + A order vector can be converted into and listvector, rabundvector or sabundvector. + Each member of the internal container "data" represents the OTU from which it came. + So in the example below since there are 6 sequences in OTU 1 there are six 1's in the ordervector. + and since there are 2 sequences in OTU 3 there are two 3's in the ordervector. + example: listvector = a,b,c,d,e,f g,h,i j,k l m + rabundvector = 6 3 2 1 1 + sabundvector = 2 1 1 0 0 1 + ordervector = 1 1 1 1 1 1 2 2 2 3 3 4 5 */ + class OrderVector : public DataVector { diff --git a/rabundvector.hpp b/rabundvector.hpp index a0807e5..327b29b 100644 --- a/rabundvector.hpp +++ b/rabundvector.hpp @@ -3,6 +3,15 @@ #include "datavector.hpp" +/* This class is a child to datavector. It represents OTU information at a certain distance. + A rabundvector can be converted into and ordervector, listvector or sabundvector. + Each member of the internal container "data" represents an individual OTU. + So data[0] = 6, because there are six member in that OTU. + example: listvector = a,b,c,d,e,f g,h,i j,k l m + rabundvector = 6 3 2 1 1 + sabundvector = 2 1 1 0 0 1 + ordervector = 1 1 1 1 1 1 2 2 2 3 3 4 5 */ + class RAbundVector : public DataVector { public: diff --git a/rarefactsharedcommand.cpp b/rarefactsharedcommand.cpp index 4ed4795..189be88 100644 --- a/rarefactsharedcommand.cpp +++ b/rarefactsharedcommand.cpp @@ -28,7 +28,7 @@ RareFactSharedCommand::RareFactSharedCommand(){ int i; for (i=0; isharedRareEstimators.size(); i++) { if (globaldata->sharedRareEstimators[i] == "sharedobserved") { - rDisplays.push_back(new RareDisplay(new SharedSobs(), new SharedThreeColumnFile(fileNameRoot+"sharedObserved", groups))); + rDisplays.push_back(new RareDisplay(new SharedSobs(), new SharedThreeColumnFile(fileNameRoot+"r_shared.observed", groups))); } } } @@ -61,8 +61,8 @@ int RareFactSharedCommand::execute(){ read->read(&*globaldata); input = globaldata->ginput; - list = globaldata->glist; - order = list->getSharedOrderVector(); + SharedList = globaldata->gSharedList; + order = SharedList->getSharedOrderVector(); while(order != NULL){ @@ -78,9 +78,9 @@ int RareFactSharedCommand::execute(){ cout << order->getLabel() << '\t' << count << endl; } - list = input->getListVector(); //get new list vector to process - if (list != NULL) { - order = list->getSharedOrderVector(); //gets new order vector with group info. + SharedList = input->getSharedListVector(); //get new list vector to process + if (SharedList != NULL) { + order = SharedList->getSharedOrderVector(); //gets new order vector with group info. count++; }else { break; diff --git a/rarefactsharedcommand.h b/rarefactsharedcommand.h index cb2219f..e7d2abc 100644 --- a/rarefactsharedcommand.h +++ b/rarefactsharedcommand.h @@ -15,7 +15,7 @@ #include #include "command.hpp" #include "sharedordervector.h" -#include "listvector.hpp" +#include "sharedlistvector.h" #include "inputdata.h" #include "groupmap.h" #include "rarefact.h" @@ -46,7 +46,7 @@ public: private: GlobalData* globaldata; GroupMap* groupmap; - ListVector* list; + SharedListVector* SharedList; ReadMatrix* read; SharedOrderVector* order; InputData* input; diff --git a/readlistcommand.h b/readlistcommand.h deleted file mode 100644 index 01b8519..0000000 --- a/readlistcommand.h +++ /dev/null @@ -1,56 +0,0 @@ -/* - * readlistcommand.h - * Mothur - * - * Created by Sarah Westcott on 1/20/09. - * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. - * - */ - -#ifndef READLISTFILECOMMAND_H -#define READLISTFILECOMMAND_H -/* - * readlistcommand.h - * Mothur - * - * Created by Sarah Westcott on 1/20/09. - * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. - * - */ - -#include -#include -#include -#include "command.hpp" -#include "readmatrix.hpp" -#include "inputdata.h" -#include "groupmap.h" -#include "sharedcommand.h" -#include "parselistcommand.h" - - -/* The read.list command parameter options are listfile and groupfile. -The read.list command should be in the following format: -read.shared(listfile=yourListFile, groupfile=yourGroupFile). -The listfile parameter and groupfile paramaters are required. */ - - -class GlobalData; - -class ReadListFileCommand : public Command { -public: - ReadListFileCommand(); - ~ReadListFileCommand(); - int execute(); - -private: - GlobalData* globaldata; - Command* shared; - Command* parselist; - GroupMap* groupMap; - ReadMatrix* read; - InputData* input; - string filename; -}; - -#endif \ No newline at end of file diff --git a/readmatrix.cpp b/readmatrix.cpp index 0b9e3ec..bd92f6c 100644 --- a/readmatrix.cpp +++ b/readmatrix.cpp @@ -358,8 +358,8 @@ void ReadPhilFile::read(GlobalData* globaldata){ sabund = inputSabund->getSAbundVector(); globaldata->sabund = sabund; //saving to be used by summary command. }else { - list = input->getListVector(); //you are reading for parselist command, or shared commands. - globaldata->glist = list; + SharedList = input->getSharedListVector(); //you are reading for parselist command, or shared commands. + globaldata->gSharedList = SharedList; } } catch(exception& e) { diff --git a/readmatrix.hpp b/readmatrix.hpp index 9c71732..44e7bf6 100644 --- a/readmatrix.hpp +++ b/readmatrix.hpp @@ -12,6 +12,7 @@ #include "rabundvector.hpp" #include "listvector.hpp" +#include "sharedlistvector.h" #include "sparsematrix.hpp" #include "nameassignment.hpp" #include "inputdata.h" @@ -82,6 +83,7 @@ private: InputData* input; InputData* inputSabund; ListVector* list; + SharedListVector* SharedList; OrderVector* order; SAbundVector* sabund; GlobalData* globaldata; diff --git a/readotucommand.cpp b/readotucommand.cpp index 130ecd0..c7e83e4 100644 --- a/readotucommand.cpp +++ b/readotucommand.cpp @@ -15,6 +15,10 @@ ReadOtuCommand::ReadOtuCommand(){ globaldata = GlobalData::getInstance(); filename = globaldata->inputFileName; read = new ReadPhilFile(filename); + if (globaldata->getFormat() == "shared") { + //read in group map info. + groupMap = new GroupMap(globaldata->getGroupFile()); + } } catch(exception& e) { cout << "Standard Error: " << e.what() << " has occurred in the ReadOtuCommand class Function ReadOtuCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; @@ -37,6 +41,15 @@ ReadOtuCommand::~ReadOtuCommand(){ int ReadOtuCommand::execute(){ try { read->read(&*globaldata); + if (globaldata->getFormat() == "shared") { + groupMap->readMap(); + globaldata->gGroupmap = groupMap; + + shared = new SharedCommand(); + shared->execute(); + parselist = new ParseListCommand(); + parselist->execute(); + } return 0; } catch(exception& e) { diff --git a/readotucommand.h b/readotucommand.h index 972f30e..6974158 100644 --- a/readotucommand.h +++ b/readotucommand.h @@ -15,10 +15,22 @@ #include "command.hpp" #include "readmatrix.hpp" #include "inputdata.h" +#include "groupmap.h" +#include "sharedcommand.h" +#include "parselistcommand.h" -/* The read.otu command parameter options are listfile, sabundfile, rabundfile and orderfile. -The read.otu command should be in the following format: -read.otu(listfile=yourListFile, orderfile=yourOrderFile). The listfile, sabundfile or rabundfile are required, but only one may be used. */ +/* The read.otu must be run before you execute a collect.single, rarefaction.single, summary.single, +collect.shared, rarefaction.shared or summary.shared command. Mothur will generate a .list, .rabund and .sabund +upon completion of the cluster command or you may use your own. The read.otu command parameter options are +listfile, rabundfile, sabundfile, groupfile and orderfile. The reaad.otu command can be used in two ways. +The first is to read a listfile, rabundfile or sabundfile and run the collect.single, rarefaction.single or summary.single. +For this use the read.otu command should be in the following format: read.otu(listfile=yourListFile, orderfile=yourOrderFile). +The listfile, rabundfile or sabundfile parameter is required, but you may only use one of them. +The second way to use the read.otu command is to read a listfile and a groupfile so you can use the collect.shared, +rarefaction.shared or summary.shared commands. In this case the read.otu command should be in the following format: +read.otu(listfile=yourListFile, groupfile=yourGroupFile). The listfile parameter and groupfile paramaters are required. +When using the command the second way read.otu command parses the .list file and separates it into groups. +It outputs a .shared file containing the OTU information for each group. The read.otu command also outputs a .list file for each group. */ class GlobalData; @@ -32,6 +44,9 @@ private: GlobalData* globaldata; ReadMatrix* read; InputData* input; + Command* shared; + Command* parselist; + GroupMap* groupMap; string filename; }; diff --git a/sabundvector.hpp b/sabundvector.hpp index f45ef60..3c68462 100644 --- a/sabundvector.hpp +++ b/sabundvector.hpp @@ -7,6 +7,17 @@ using namespace std; #include "rabundvector.hpp" #include "ordervector.hpp" + +/* This class is a child to datavector. It represents OTU information at a certain distance. + A sabundvector can be converted into and ordervector, listvector or rabundvector. + Each member of the internal container "data" represents the number of OTU's with that many members, but staring at 1. + So data[1] = 2, because there are two OTUs with 1 member. + example: listvector = a,b,c,d,e,f g,h,i j,k l m + rabundvector = 6 3 2 1 1 + sabundvector = 2 1 1 0 0 1 + ordervector = 1 1 1 1 1 1 2 2 2 3 3 4 5 */ + + class SAbundVector : public DataVector { public: diff --git a/shared.h b/shared.h index 42c9d65..d4086a5 100644 --- a/shared.h +++ b/shared.h @@ -22,6 +22,7 @@ using namespace std; #include #include "sharedrabundvector.h" #include "listvector.hpp" +#include "globaldata.hpp" class Shared { public: diff --git a/sharedlistvector.cpp b/sharedlistvector.cpp new file mode 100644 index 0000000..2bceace --- /dev/null +++ b/sharedlistvector.cpp @@ -0,0 +1,377 @@ +/* + * sharedSharedListVector.cpp + * Mothur + * + * Created by Sarah Westcott on 1/22/09. + * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. + * + */ + + +using namespace std; + +#include +#include +#include "sabundvector.hpp" +#include "rabundvector.hpp" +#include "ordervector.hpp" +#include "datavector.hpp" +#include "utilities.hpp" +#include "sharedlistvector.h" +#include "sharedordervector.h" + + +/***********************************************************************/ + +SharedListVector::SharedListVector(int n): DataVector(), data(n, "") , maxRank(0), numBins(0), numSeqs(0){}; + +/***********************************************************************/ +SharedListVector::SharedListVector(ifstream& f) : DataVector(), maxRank(0), numBins(0), numSeqs(0) { + try { + globaldata = GlobalData::getInstance(); + + //set up groupmap for later. + groupmap = new GroupMap(globaldata->getGroupFile()); + groupmap->readMap(); + + int hold; + f >> label >> hold; + + data.assign(hold, ""); + string inputData = ""; + + for(int i=0;i> inputData; + set(i, inputData); + } + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the SharedListVector class Function SharedListVector. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the SharedListVector class function SharedListVector. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} + +/***********************************************************************/ +void SharedListVector::set(int binNumber, string seqNames){ + try { + int nNames_old = getNumNames(data[binNumber]); + data[binNumber] = seqNames; + int nNames_new = getNumNames(seqNames); + + if(nNames_old == 0) { numBins++; } + if(nNames_new == 0) { numBins--; } + if(nNames_new > maxRank) { maxRank = nNames_new; } + + numSeqs += (nNames_new - nNames_old); + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the SharedListVector class Function set. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the SharedListVector class function set. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} + +/***********************************************************************/ + +string SharedListVector::get(int index){ + return data[index]; +} + +/***********************************************************************/ + +void SharedListVector::push_back(string seqNames){ + try { + data.push_back(seqNames); + int nNames = getNumNames(seqNames); + + numBins++; + + if(nNames > maxRank) { maxRank = nNames; } + + numSeqs += nNames; + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the SharedListVector class Function push_back. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the SharedListVector class function push_back. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} + +/***********************************************************************/ + +void SharedListVector::resize(int size){ + data.resize(size); +} + +/***********************************************************************/ + +int SharedListVector::size(){ + return data.size(); +} +/***********************************************************************/ + +void SharedListVector::clear(){ + numBins = 0; + maxRank = 0; + numSeqs = 0; + return data.clear(); + +} + +/***********************************************************************/ + +void SharedListVector::print(ostream& output){ + try { + output << label << '\t' << numBins << '\t'; + + for(int i=0;i=0;i--){ + // if(rav.get(i) == 0){ rav.pop_back(); } + // else{ + // break; + // } + // } + rav.setLabel(label); + + return rav; + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the SharedListVector class Function getRAbundVector. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the SharedListVector class function getRAbundVector. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} + +/***********************************************************************/ + +SAbundVector SharedListVector::getSAbundVector(){ + try { + SAbundVector sav(maxRank+1); + + for(int i=0;isetLabel(label); + + for(int i=0;igetGroup(name); + order->push_back(i, binSize, groupName); //i represents what bin you are in + } + //get last name + groupName = groupmap->getGroup(names); + order->push_back(i, binSize, groupName); + } + random_shuffle(order->begin(), order->end()); + return order; + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the SharedListVector class Function getSharedOrderVector. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the SharedListVector class function getSharedOrderVector. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + +} +/***********************************************************************/ +SharedRAbundVector SharedListVector::getSharedRAbundVector(string groupName) { + try { + SharedRAbundVector rav(data.size()); + string group, names, name; + + for(int i=0;igetGroup(name); + if (group == groupName) { //this name is in the group you want the vector for. + rav.set(i, rav.getAbundance(i) + 1, group); //i represents what bin you are in + } + } + + //get last name + groupName = groupmap->getGroup(names); + if (group == groupName) { //this name is in the group you want the vector for. + rav.set(i, rav.getAbundance(i) + 1, group); //i represents what bin you are in + } + } + + rav.setLabel(label); + rav.setGroup(groupName); + return rav; + + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the SharedListVector class Function getSharedRAbundVector. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the SharedListVector class function getSharedRAbundVector. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} + +/***********************************************************************/ +SharedSAbundVector SharedListVector::getSharedSAbundVector(string groupName) { + try { + SharedSAbundVector sav; + SharedRAbundVector rav; + + rav = this->getSharedRAbundVector(groupName); + sav = rav.getSharedSAbundVector(); + + return sav; + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the SharedListVector class Function getSharedSAbundVector. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the SharedListVector class function getSharedSAbundVector. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} +/***********************************************************************/ + +OrderVector SharedListVector::getOrderVector(map* orderMap = NULL){ + + try { + if(orderMap == NULL){ + OrderVector ov; + + for(int i=0;icount(seqName) == 0){ + cerr << seqName << " not found, check *.names file\n"; + exit(1); + } + + ov.set((*orderMap)[seqName], i); + seqName = ""; + } + } + + if(orderMap->count(seqName) == 0){ + cerr << seqName << " not found, check *.names file\n"; + exit(1); + } + ov.set((*orderMap)[seqName], i); + } + + ov.setLabel(label); + ov.getNumBins(); + + return ov; + } + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the SharedListVector class Function getOrderVector. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the SharedListVector class function getOrderVector. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} + +/***********************************************************************/ + diff --git a/sharedlistvector.h b/sharedlistvector.h new file mode 100644 index 0000000..228ccf1 --- /dev/null +++ b/sharedlistvector.h @@ -0,0 +1,71 @@ +#ifndef SHAREDLIST_H +#define SHAREDLIST_H + +/* + * sharedlistvector.h + * Mothur + * + * Created by Sarah Westcott on 1/22/09. + * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. + * + */ + +#include +#include "datavector.hpp" +#include "groupmap.h" +#include "globaldata.hpp" +#include "sharedrabundvector.h" +#include "sharedsabundvector.h" +#include +#include + +/* This class is a child to datavector. It represents OTU information at a certain distance. + A sharedlistvector can be converted into a sharedordervector, sharedrabundvector or sharedsabundvectorand + as well as an ordervector, rabundvector or sabundvector. + Each member of the internal container "data" represents an individual OTU. + Each individual in the OTU belongs to a group. + So data[0] = "a,b,c,d,e,f". + example: listvector = a,b,c,d,e,f g,h,i j,k l m + rabundvector = 6 3 2 1 1 + sabundvector = 2 1 1 0 0 1 + ordervector = 1 1 1 1 1 1 2 2 2 3 3 4 5 */ + +class SharedListVector : public DataVector { + +public: + SharedListVector(int); + SharedListVector(ifstream&); + SharedListVector(const SharedListVector& lv) : DataVector(lv.label), data(lv.data), maxRank(lv.maxRank), numBins(lv.numBins), numSeqs(lv.numSeqs){}; + ~SharedListVector(){}; + + int getNumBins() { return numBins; } + int getNumSeqs() { return numSeqs; } + int getMaxRank() { return maxRank; } + + void set(int, string); + string get(int); + void push_back(string); + void resize(int); + void clear(); + int size(); + void print(ostream&); + + RAbundVector getRAbundVector(); + SAbundVector getSAbundVector(); + OrderVector getOrderVector(map*); + SharedOrderVector* getSharedOrderVector(); + SharedRAbundVector getSharedRAbundVector(string); //get sharedrabundvector for a certain group + SharedSAbundVector getSharedSAbundVector(string); //get sharedsabundvector for a certain group + +private: + vector data; //data[i] is a list of names of sequences in the ith OTU. + GlobalData* globaldata; + GroupMap* groupmap; + int maxRank; + int numBins; + int numSeqs; + +}; + +#endif + diff --git a/sharedordervector.cpp b/sharedordervector.cpp index fc81feb..575ae18 100644 --- a/sharedordervector.cpp +++ b/sharedordervector.cpp @@ -3,7 +3,7 @@ * Dotur * * Created by Sarah Westcott on 12/9/08. - * Copyright 2008 __MyCompanyName__. All rights reserved. + * Copyright 2008 Schloss Lab UMASS Amherst. All rights reserved. * */ @@ -11,7 +11,6 @@ using namespace std; #include "sharedordervector.h" -#include "datavector.hpp" #include "utilities.hpp" #include @@ -29,6 +28,8 @@ SharedOrderVector::SharedOrderVector(string id, vector ov) : /*********************************************************************** +//does not work since we don't have a shared order file format yet. + SharedOrderVector::SharedOrderVector(ifstream& f) : DataVector() { try { int hold; @@ -42,7 +43,7 @@ SharedOrderVector::SharedOrderVector(ifstream& f) : DataVector() { for(int i=0;i> inputData; - set(i, inputData); + set(i, inputData, inputData, group); } updateStats(); @@ -221,6 +222,49 @@ SAbundVector SharedOrderVector::getSAbundVector(){ RAbundVector rav(this->getRAbundVector()); return rav.getSAbundVector(); +} +/***********************************************************************/ +SharedRAbundVector SharedOrderVector::getSharedRAbundVector(string group) { + try { + SharedRAbundVector sharedRav(data.size()); + + sharedRav.setLabel(label); + sharedRav.setGroup(group); + + for (int i = 0; i < data.size(); i++) { + if (data[i].group == group) { + sharedRav.set(data[i].abundance, sharedRav.getAbundance(data[i].abundance) + 1, data[i].group); + } + } + return sharedRav; + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the SharedOrderVector class Function getSharedRAbundVector. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the SharedOrderVector class function getSharedRAbundVector. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + +} +/***********************************************************************/ +SharedSAbundVector SharedOrderVector::getSharedSAbundVector(string group) { + try { + + SharedRAbundVector sharedRav(this->getSharedRAbundVector(group)); + return sharedRav.getSharedSAbundVector(); + + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the SharedOrderVector class Function getSharedRAbundVector. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the SharedOrderVector class function getSharedRAbundVector. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + } /***********************************************************************/ diff --git a/sharedordervector.h b/sharedordervector.h index ef9d6a7..f925964 100644 --- a/sharedordervector.h +++ b/sharedordervector.h @@ -5,14 +5,19 @@ * Dotur * * Created by Sarah Westcott on 12/9/08. - * Copyright 2008 __MyCompanyName__. All rights reserved. + * Copyright 2008 Schloss Lab UMASS Amherst. All rights reserved. * */ + + /* This class is a child to datavector. It represents OTU information at a certain distance. + It is similiar to an order vector except each member of data knows which group it belongs to. + Each member of the internal container "data" represents is an individual which knows the OTU from which it came, + the group it is in and the abundance is equal to the OTU number. */ + + +using namespace std; -#include #include "datavector.hpp" -#include "sabundvector.hpp" -#include "rabundvector.hpp" struct individual { string group; @@ -20,6 +25,11 @@ struct individual { int abundance; }; +#include +#include "sabundvector.hpp" +#include "rabundvector.hpp" +#include "sharedrabundvector.h" +#include "sharedsabundvector.h" class SharedOrderVector : public DataVector { @@ -32,9 +42,9 @@ public: // SharedOrderVector(ifstream&); ~SharedOrderVector(){}; - void set(int, int, int, string); + void set(int, int, int, string); //index, OTU, abundance, group individual get(int); - void push_back(int, int, string); + void push_back(int, int, string); //OTU, abundance, group void resize(int); int size(); void print(ostream&); @@ -50,6 +60,8 @@ public: SAbundVector getSAbundVector(); OrderVector getOrderVector(map*); SharedOrderVector getSharedOrderVector(); + SharedRAbundVector getSharedRAbundVector(string); //get the sharedRabundvector for a sepecific group + SharedSAbundVector getSharedSAbundVector(string); //get the sharedSabundvector for a sepecific group private: vector data; diff --git a/sharedrabundvector.cpp b/sharedrabundvector.cpp index 6bdeed0..ba8f94d 100644 --- a/sharedrabundvector.cpp +++ b/sharedrabundvector.cpp @@ -224,7 +224,6 @@ void SharedRAbundVector::setGroup(string groupName){ group = groupName; } - /***********************************************************************/ int SharedRAbundVector::getNumBins(){ return numBins; @@ -247,7 +246,54 @@ int SharedRAbundVector::getMaxRank(){ SharedRAbundVector SharedRAbundVector::getSharedRAbundVector(){ return *this; } +/***********************************************************************/ + +RAbundVector SharedRAbundVector::getRAbundVector() { + try { + RAbundVector rav(data.size()); + + for (int i = 0; i < data.size(); i++) { + rav.set(i, data[i].abundance); + } + + return rav; + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the SharedRAbundVector class Function getRAbundVector. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the SharedRAbundVector class function getRAbundVector. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} + +/***********************************************************************/ +SharedSAbundVector SharedRAbundVector::getSharedSAbundVector(){ + try { + SharedSAbundVector sav(maxRank+1); + + for(int i=0;i #include "datavector.hpp" #include "sharedordervector.h" +#include "sharedsabundvector.h" +#include "rabundvector.hpp" +/* This class is a child to datavector. It represents OTU information at a certain distance. + It is similiar to an rabundvector except each member of data knows which group it belongs to. + Each member of the internal container "data" is a struct of type individual. + An individual which knows the OTU from which it came, + the group it is in and its abundance. */ @@ -33,10 +40,10 @@ public: string getGroup(); void setGroup(string); - void set(int, int, string); + void set(int, int, string); //OTU, abundance, groupname individual get(int); int getAbundance(int); - void push_back(int, int, string); + void push_back(int, int, string); //abundance, OTU, groupname void pop_back(); void resize(int); int size(); @@ -45,10 +52,12 @@ public: void print(ostream&); - SharedRAbundVector getSharedRAbundVector(); + RAbundVector getRAbundVector(); SAbundVector getSAbundVector(); OrderVector getOrderVector(map*); SharedOrderVector getSharedOrderVector(); + SharedSAbundVector getSharedSAbundVector(); + SharedRAbundVector getSharedRAbundVector(); private: vector data; diff --git a/sharedsabundvector.cpp b/sharedsabundvector.cpp index 5175f5e..b262484 100644 --- a/sharedsabundvector.cpp +++ b/sharedsabundvector.cpp @@ -264,7 +264,7 @@ bool compareMembers (individual member, individual member2){ } /***********************************************************************/ -SharedRAbundVector SharedSAbundVector::getSharedVector(){ +SharedRAbundVector SharedSAbundVector::getSharedRAbundVector(){ try { SharedRAbundVector rav; @@ -300,7 +300,26 @@ SharedSAbundVector SharedSAbundVector::getSharedSAbundVector(){ } /***********************************************************************/ - +SharedOrderVector SharedSAbundVector::getSharedOrderVector() { + try { + SharedRAbundVector rav; + SharedOrderVector ov; + + rav = this->getSharedRAbundVector(); + ov = rav.getSharedOrderVector(); + + return ov; + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the SharedSAbundVector class Function getSharedOrderVector. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the SharedSAbundVector class function getSharedOrderVector. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} +/***********************************************************************/ OrderVector SharedSAbundVector::getOrderVector(map* hold = NULL){ try { OrderVector ov; diff --git a/sharedsabundvector.h b/sharedsabundvector.h index 7010968..60b9364 100644 --- a/sharedsabundvector.h +++ b/sharedsabundvector.h @@ -7,7 +7,7 @@ * Dotur * * Created by Sarah Westcott on 12/10/08. - * Copyright 2008 __MyCompanyName__. All rights reserved. + * Copyright 2008 Schloss Lab UMASS Amherst. All rights reserved. * */ @@ -19,6 +19,12 @@ #include "sharedordervector.h" #include "sharedrabundvector.h" +/* This class is a child to datavector. It represents OTU information at a certain distance. + It is similiar to an sabundvector except each member of data knows which group it belongs to. + Each member of the internal container "data" is a struct of type individual. + An individual which knows the OTU from which it came, + the group it is in and its abundance. */ + using namespace std; class SharedSAbundVector : public DataVector { @@ -35,10 +41,10 @@ public: string getGroup(); void setGroup(string); - void set(int, int, string); + void set(int, int, string); //OTU, abundance, group individual get(int); int getAbundance(int); - void push_back(int, int, string); + void push_back(int, int, string); //abundance, OTU, group void pop_back(); void resize(int); int size(); @@ -47,9 +53,10 @@ public: RAbundVector getRAbundVector(); SAbundVector getSAbundVector(); - SharedSAbundVector getSharedSAbundVector(); - SharedRAbundVector getSharedVector(); OrderVector getOrderVector(map*); + SharedSAbundVector getSharedSAbundVector(); + SharedRAbundVector getSharedRAbundVector(); + SharedOrderVector getSharedOrderVector(); private: vector data; diff --git a/summarysharedcommand.cpp b/summarysharedcommand.cpp index b9fd08d..347ed04 100644 --- a/summarysharedcommand.cpp +++ b/summarysharedcommand.cpp @@ -8,6 +8,7 @@ */ #include "summarysharedcommand.h" +#include "sharedsobs.h" #include "sharedchao1.h" #include "sharedace.h" #include "sharedjabund.h" @@ -27,7 +28,9 @@ SummarySharedCommand::SummarySharedCommand(){ int i; for (i=0; isharedSummaryEstimators.size(); i++) { - if (globaldata->sharedSummaryEstimators[i] == "sharedChao") { + if (globaldata->sharedSummaryEstimators[i] == "sharedSobs") { + sumCalculators.push_back(new SharedSobs()); + }else if (globaldata->sharedEstimators[i] == "sharedChao") { sumCalculators.push_back(new SharedChao1()); }else if (globaldata->sharedSummaryEstimators[i] == "sharedAce") { sumCalculators.push_back(new SharedAce()); @@ -70,21 +73,21 @@ SummarySharedCommand::~SummarySharedCommand(){ int SummarySharedCommand::execute(){ try { - outputFileName = ((getRootName(globaldata->inputFileName)) + "sharedSummary"); + outputFileName = ((getRootName(globaldata->inputFileName)) + "shared.summary"); openOutputFile(outputFileName, outputFileHandle); read = new ReadPhilFile(globaldata->inputFileName); read->read(&*globaldata); - outputFileHandle << '\t' << '\t' << '\t' << '\t'; //pads file for labels and groupnames + outputFileHandle << "label" <<'\t' << "comparison" << '\t'; for(int i=0;igetName(); } outputFileHandle << endl; - list = globaldata->glist; + SharedList = globaldata->gSharedList; input = globaldata->ginput; - order = list->getSharedOrderVector(); + order = SharedList->getSharedOrderVector(); getGroupComb(); int count = 1; @@ -101,7 +104,7 @@ int SummarySharedCommand::execute(){ int n = 1; for (int k = 0; k < (lookup.size() - 1); k++) { // pass cdd each set of groups to commpare for (int l = n; l < lookup.size(); l++) { - outputFileHandle << order->getLabel() << '\t' << groupComb[n-1] << '\t'; //print out label and group + outputFileHandle << order->getLabel() << '\t' << groupComb[n-1] << '\t' << '\t'; //print out label and group for(int i=0;igetValues(lookup[k], lookup[l]); //saves the calculator outputs outputFileHandle << '\t'; @@ -113,9 +116,9 @@ int SummarySharedCommand::execute(){ } } - list = input->getListVector(); //get new list vector to process - if (list != NULL) { - order = list->getSharedOrderVector(); //gets new order vector with group info. + SharedList = input->getSharedListVector(); //get new list vector to process + if (SharedList != NULL) { + order = SharedList->getSharedOrderVector(); //gets new order vector with group info. count++; }else { break; diff --git a/summarysharedcommand.h b/summarysharedcommand.h index 6188b96..24ce04b 100644 --- a/summarysharedcommand.h +++ b/summarysharedcommand.h @@ -18,6 +18,7 @@ #include "inputdata.h" #include "calculator.h" #include "readmatrix.hpp" +#include "sharedlistvector.h" /*The summary.shared() command The summary.shared command can only be executed after a successful read.shared command. @@ -51,7 +52,7 @@ private: ReadMatrix* read; vector sumCalculators; InputData* input; - ListVector* list; + SharedListVector* SharedList; SharedOrderVector* order; vector lookup; SharedRAbundVector* shared1, shared2; diff --git a/validcalculator.cpp b/validcalculator.cpp index 8659be3..1e25fe6 100644 --- a/validcalculator.cpp +++ b/validcalculator.cpp @@ -114,6 +114,7 @@ void ValidCalculators::initialSingle() { /********************************************************************/ void ValidCalculators::initialShared() { try { + shared["sharedSobs"] = "sharedSobs"; shared["sharedChao"] = "sharedChao"; shared["sharedAce"] = "sharedAce"; shared["sharedJabund"] = "sharedJabund"; @@ -186,6 +187,7 @@ void ValidCalculators::initialSummary() { /********************************************************************/ void ValidCalculators::initialSharedSummary() { try { + sharedsummary["sharedSobs"] = "sharedSobs"; sharedsummary["sharedChao"] = "sharedChao"; sharedsummary["sharedAce"] = "sharedAce"; sharedsummary["sharedJabund"] = "sharedJabund"; diff --git a/validcommands.cpp b/validcommands.cpp index 70a6502..bd9d80e 100644 --- a/validcommands.cpp +++ b/validcommands.cpp @@ -16,7 +16,6 @@ ValidCommands::ValidCommands() { commands["read.dist"] = "read.dist"; commands["read.otu"] = "read.otu"; - commands["read.list"] = "read.list"; commands["cluster"] = "cluster"; commands["deconvolute"] = "deconvolute"; commands["help"] = "help"; @@ -51,7 +50,7 @@ bool ValidCommands::isValidCommand(string command) { if ((commands.find(command)) != (commands.end())) { return true; }else{ - cout << command << " is not a valid command in Mothur. Valid commands are read.dist(), read.list(), cluster(), collect.single(), collect.shared(), rarefaction.single(), rarefaction.shared(), summary.single(), summary.shared(), quit(), help()." << endl; + cout << command << " is not a valid command in Mothur. Valid commands are read.dist(), read.otu(), cluster(), collect.single(), collect.shared(), rarefaction.single(), rarefaction.shared(), summary.single(), summary.shared(), quit(), help()." << endl; return false; } -- 2.39.2