From f55cf350ca6643f8eb070d8336e1957699a3f109 Mon Sep 17 00:00:00 2001 From: Sarah Westcott Date: Fri, 13 Apr 2012 10:43:24 -0400 Subject: [PATCH] added tree reader class to handle reading trees. Reworked the tree map to tree class relationship to make less dependant. --- Mothur.xcodeproj/project.pbxproj | 12 +-- classifytreecommand.cpp | 81 ++------------- classifytreecommand.h | 2 - consensus.cpp | 11 +- consensus.h | 2 +- deuniquetreecommand.cpp | 114 ++------------------- deuniquetreecommand.h | 2 - engine.cpp | 3 - indicatorcommand.cpp | 13 ++- mothurout.cpp | 3 +- mothurout.h | 2 +- parsimony.cpp | 13 +-- parsimony.h | 8 +- parsimonycommand.cpp | 161 +++++------------------------ parsimonycommand.h | 3 - phylodiversity.cpp | 81 --------------- phylodiversity.h | 38 ------- phylodiversitycommand.cpp | 121 ++-------------------- phylodiversitycommand.h | 5 +- readtree.cpp | 4 +- readtree.h | 2 +- removegroupscommand.cpp | 1 - tree.cpp | 171 ++++++++++++++++++------------- tree.h | 9 +- treegroupscommand.cpp | 71 +------------ treemap.cpp | 51 +++++++-- treemap.h | 8 +- treereader.cpp | 158 ++++++++++++++++++++++++++++ treereader.h | 44 ++++++++ unifracunweightedcommand.cpp | 140 ++++--------------------- unifracunweightedcommand.h | 6 -- unifracweightedcommand.cpp | 157 +++++----------------------- unifracweightedcommand.h | 9 +- unweighted.cpp | 34 +++--- unweighted.h | 11 +- weighted.cpp | 18 ++-- weighted.h | 7 +- 37 files changed, 531 insertions(+), 1045 deletions(-) delete mode 100644 phylodiversity.cpp delete mode 100644 phylodiversity.h create mode 100644 treereader.cpp create mode 100644 treereader.h diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj index 0e170bc..7169f25 100644 --- a/Mothur.xcodeproj/project.pbxproj +++ b/Mothur.xcodeproj/project.pbxproj @@ -63,6 +63,7 @@ A7BF2232145879B2000AD524 /* chimeraperseuscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7BF2231145879B2000AD524 /* chimeraperseuscommand.cpp */; }; A7C3DC0B14FE457500FE1924 /* cooccurrencecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7C3DC0914FE457500FE1924 /* cooccurrencecommand.cpp */; }; A7C3DC0F14FE469500FE1924 /* trialSwap2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7C3DC0D14FE469500FE1924 /* trialSwap2.cpp */; }; + A7D755DA1535F679009BF21A /* treereader.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7D755D91535F679009BF21A /* treereader.cpp */; }; A7E9B88112D37EC400DA6239 /* ace.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B64F12D37EC300DA6239 /* ace.cpp */; }; A7E9B88212D37EC400DA6239 /* aligncommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B65112D37EC300DA6239 /* aligncommand.cpp */; }; A7E9B88312D37EC400DA6239 /* alignment.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B65312D37EC300DA6239 /* alignment.cpp */; }; @@ -207,7 +208,6 @@ A7E9B91312D37EC400DA6239 /* parsimony.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B78312D37EC400DA6239 /* parsimony.cpp */; }; A7E9B91412D37EC400DA6239 /* parsimonycommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B78512D37EC400DA6239 /* parsimonycommand.cpp */; }; A7E9B91512D37EC400DA6239 /* pcoacommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B78712D37EC400DA6239 /* pcoacommand.cpp */; }; - A7E9B91612D37EC400DA6239 /* phylodiversity.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B78912D37EC400DA6239 /* phylodiversity.cpp */; }; A7E9B91712D37EC400DA6239 /* phylodiversitycommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B78B12D37EC400DA6239 /* phylodiversitycommand.cpp */; }; A7E9B91812D37EC400DA6239 /* phylosummary.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B78D12D37EC400DA6239 /* phylosummary.cpp */; }; A7E9B91912D37EC400DA6239 /* phylotree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B78F12D37EC400DA6239 /* phylotree.cpp */; }; @@ -498,6 +498,8 @@ A7C3DC0A14FE457500FE1924 /* cooccurrencecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = cooccurrencecommand.h; sourceTree = ""; }; A7C3DC0D14FE469500FE1924 /* trialSwap2.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = trialSwap2.cpp; sourceTree = ""; }; A7C3DC0E14FE469500FE1924 /* trialswap2.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = trialswap2.h; sourceTree = ""; }; + A7D755D71535F665009BF21A /* treereader.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = treereader.h; sourceTree = ""; }; + A7D755D91535F679009BF21A /* treereader.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = treereader.cpp; sourceTree = ""; }; A7DAAFA3133A254E003956EB /* commandparameter.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = commandparameter.h; sourceTree = ""; }; A7E9B64F12D37EC300DA6239 /* ace.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ace.cpp; sourceTree = ""; }; A7E9B65012D37EC300DA6239 /* ace.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ace.h; sourceTree = ""; }; @@ -803,8 +805,6 @@ A7E9B78612D37EC400DA6239 /* parsimonycommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = parsimonycommand.h; sourceTree = ""; }; A7E9B78712D37EC400DA6239 /* pcoacommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = pcoacommand.cpp; sourceTree = ""; }; A7E9B78812D37EC400DA6239 /* pcoacommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = pcoacommand.h; sourceTree = ""; }; - A7E9B78912D37EC400DA6239 /* phylodiversity.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = phylodiversity.cpp; sourceTree = ""; }; - A7E9B78A12D37EC400DA6239 /* phylodiversity.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = phylodiversity.h; sourceTree = ""; }; A7E9B78B12D37EC400DA6239 /* phylodiversitycommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = phylodiversitycommand.cpp; sourceTree = ""; }; A7E9B78C12D37EC400DA6239 /* phylodiversitycommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = phylodiversitycommand.h; sourceTree = ""; }; A7E9B78D12D37EC400DA6239 /* phylosummary.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = phylosummary.cpp; sourceTree = ""; }; @@ -1826,8 +1826,6 @@ A7E9B68F12D37EC400DA6239 /* classify.h */, A7E9B73812D37EC400DA6239 /* knn.h */, A7E9B73712D37EC400DA6239 /* knn.cpp */, - A7E9B78912D37EC400DA6239 /* phylodiversity.cpp */, - A7E9B78A12D37EC400DA6239 /* phylodiversity.h */, A7E9B78D12D37EC400DA6239 /* phylosummary.cpp */, A7E9B78E12D37EC400DA6239 /* phylosummary.h */, A7E9B78F12D37EC400DA6239 /* phylotree.cpp */, @@ -1879,6 +1877,8 @@ A713EBAB12DC7613000092AC /* readphylipvector.cpp */, A7E9B84312D37EC400DA6239 /* splitmatrix.cpp */, A7E9B84412D37EC400DA6239 /* splitmatrix.h */, + A7D755D71535F665009BF21A /* treereader.h */, + A7D755D91535F679009BF21A /* treereader.cpp */, ); name = read; sourceTree = ""; @@ -2104,7 +2104,6 @@ A7E9B91312D37EC400DA6239 /* parsimony.cpp in Sources */, A7E9B91412D37EC400DA6239 /* parsimonycommand.cpp in Sources */, A7E9B91512D37EC400DA6239 /* pcoacommand.cpp in Sources */, - A7E9B91612D37EC400DA6239 /* phylodiversity.cpp in Sources */, A7E9B91712D37EC400DA6239 /* phylodiversitycommand.cpp in Sources */, A7E9B91812D37EC400DA6239 /* phylosummary.cpp in Sources */, A7E9B91912D37EC400DA6239 /* phylotree.cpp in Sources */, @@ -2302,6 +2301,7 @@ A76CDD821510F143004C8458 /* prcseqscommand.cpp in Sources */, A77EBD2F1523709100ED407C /* createdatabasecommand.cpp in Sources */, A7876A26152A017C00A0AE86 /* subsample.cpp in Sources */, + A7D755DA1535F679009BF21A /* treereader.cpp in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; diff --git a/classifytreecommand.cpp b/classifytreecommand.cpp index 9ec4e6f..bcf2769 100644 --- a/classifytreecommand.cpp +++ b/classifytreecommand.cpp @@ -8,6 +8,7 @@ #include "classifytreecommand.h" #include "phylotree.h" +#include "treereader.h" //********************************************************************************************************************** vector ClassifyTreeCommand::setParameters(){ @@ -86,12 +87,6 @@ ClassifyTreeCommand::ClassifyTreeCommand(string option) { if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; } } - m->runParse = true; - m->clearGroups(); - m->clearAllGroups(); - m->Treenames.clear(); - m->names.clear(); - vector tempOutNames; outputTypes["tree"] = tempOutNames; outputTypes["summary"] = tempOutNames; @@ -195,74 +190,19 @@ int ClassifyTreeCommand::execute(){ // reading tree info // /***************************************************/ m->setTreeFile(treefile); - if (groupfile != "") { - //read in group map info. - tmap = new TreeMap(groupfile); - tmap->readMap(); - }else{ //fake out by putting everyone in one group - Tree* tree = new Tree(treefile); delete tree; //extracts names from tree to make faked out groupmap - tmap = new TreeMap(); - - for (int i = 0; i < m->Treenames.size(); i++) { tmap->addSeq(m->Treenames[i], "Group1"); } - } - - if (namefile != "") { readNamesFile(); } - - read = new ReadNewickTree(treefile); - int readOk = read->read(tmap); - - if (readOk != 0) { m->mothurOut("Read Terminated."); m->mothurOutEndLine(); delete tmap; delete read; return 0; } - - read->AssembleTrees(); - vector T = read->getTrees(); - Tree* outputTree = T[0]; - delete read; - - //make sure all files match - //if you provide a namefile we will use the numNames in the namefile as long as the number of unique match the tree names size. - int numNamesInTree; - if (namefile != "") { - if (numUniquesInName == m->Treenames.size()) { numNamesInTree = nameMap.size(); } - else { numNamesInTree = m->Treenames.size(); } - }else { numNamesInTree = m->Treenames.size(); } - - - //output any names that are in group file but not in tree - if (numNamesInTree < tmap->getNumSeqs()) { - for (int i = 0; i < tmap->namesOfSeqs.size(); i++) { - //is that name in the tree? - int count = 0; - for (int j = 0; j < m->Treenames.size(); j++) { - if (tmap->namesOfSeqs[i] == m->Treenames[j]) { break; } //found it - count++; - } - - if (m->control_pressed) { - delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } - for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); - m->clearGroups(); - return 0; - } - - //then you did not find it so report it - if (count == m->Treenames.size()) { - //if it is in your namefile then don't remove - map::iterator it = nameMap.find(tmap->namesOfSeqs[i]); - - if (it == nameMap.end()) { - m->mothurOut(tmap->namesOfSeqs[i] + " is in your groupfile and not in your tree. It will be disregarded."); m->mothurOutEndLine(); - tmap->removeSeq(tmap->namesOfSeqs[i]); - i--; //need this because removeSeq removes name from namesOfSeqs - } - } - } - } + + TreeReader* reader = new TreeReader(treefile, groupfile, namefile); + vector T = reader->getTrees(); + TreeMap* tmap = T[0]->getTreeMap(); + Tree* outputTree = T[0]; + delete reader; + + if (namefile != "") { readNamesFile(); } - if (m->control_pressed) { delete outputTree; delete tmap; return 0; } + if (m->control_pressed) { delete tmap; delete outputTree; return 0; } readTaxonomyFile(); - /***************************************************/ // get concensus taxonomies // /***************************************************/ @@ -484,6 +424,7 @@ map > ClassifyTreeCommand::getDescendantList(Tree*& T, int i int lc = T->tree[i].getLChild(); int rc = T->tree[i].getRChild(); + TreeMap* tmap = T->getTreeMap(); if (lc == -1) { //you are a leaf your only descendant is yourself string group = tmap->getGroup(T->tree[i].getName()); diff --git a/classifytreecommand.h b/classifytreecommand.h index 026e4ba..30957af 100644 --- a/classifytreecommand.h +++ b/classifytreecommand.h @@ -30,8 +30,6 @@ public: void help() { m->mothurOut(getHelpString()); } private: - ReadTree* read; - TreeMap* tmap; string treefile, taxonomyfile, groupfile, namefile, outputDir; bool abort; vector outputNames; diff --git a/consensus.cpp b/consensus.cpp index 04671f8..1be052f 100644 --- a/consensus.cpp +++ b/consensus.cpp @@ -10,7 +10,7 @@ #include "consensus.h" //********************************************************************************************************************** -Tree* Consensus::getTree(vector& t, TreeMap* tmap){ +Tree* Consensus::getTree(vector& t){ try { numNodes = t[0]->getNumNodes(); numLeaves = t[0]->getNumLeaves(); @@ -21,7 +21,7 @@ Tree* Consensus::getTree(vector& t, TreeMap* tmap){ if (m->control_pressed) { return 0; } - consensusTree = new Tree(tmap); + consensusTree = new Tree(t[0]->getTreeMap()); it2 = nodePairs.find(treeSet); @@ -35,11 +35,12 @@ Tree* Consensus::getTree(vector& t, TreeMap* tmap){ buildConsensusTree(treeSet); - if (m->control_pressed) { delete consensusTree; return 0; } + if (m->control_pressed) { delete consensusTree; return 0; } - consensusTree->assembleTree(); + map empty; + consensusTree->assembleTree(empty); - if (m->control_pressed) { delete consensusTree; return 0; } + if (m->control_pressed) { delete consensusTree; return 0; } return consensusTree; diff --git a/consensus.h b/consensus.h index 630ee8d..faa4e42 100644 --- a/consensus.h +++ b/consensus.h @@ -25,7 +25,7 @@ public: Consensus() { m = MothurOut::getInstance(); } ~Consensus() {} - Tree* getTree(vector&, TreeMap*); + Tree* getTree(vector&); private: MothurOut* m; diff --git a/deuniquetreecommand.cpp b/deuniquetreecommand.cpp index 64ea9b7..c33c8e4 100644 --- a/deuniquetreecommand.cpp +++ b/deuniquetreecommand.cpp @@ -8,6 +8,7 @@ */ #include "deuniquetreecommand.h" +#include "treereader.h" //********************************************************************************************************************** vector DeuniqueTreeCommand::setParameters(){ @@ -103,13 +104,7 @@ DeuniqueTreeCommand::DeuniqueTreeCommand(string option) { } } - m->runParse = true; - m->clearGroups(); - m->clearAllGroups(); - m->Treenames.clear(); - m->names.clear(); - - //check for required parameters + //check for required parameters treefile = validParameter.validFile(parameters, "tree", true); if (treefile == "not open") { abort = true; } else if (treefile == "not found") { //if there is a current design file, use it @@ -144,72 +139,21 @@ int DeuniqueTreeCommand::execute() { m->setTreeFile(treefile); - //extracts names from tree to make faked out groupmap - Tree* tree = new Tree(treefile); delete tree; - tmap = new TreeMap(); - for (int i = 0; i < m->Treenames.size(); i++) { tmap->addSeq(m->Treenames[i], "Group1"); } - - if (m->control_pressed) { delete tmap; return 0; } - - readNamesFile(); - - if (m->control_pressed) { delete tmap; return 0; } - - ReadTree* read = new ReadNewickTree(treefile); - int readOk = read->read(tmap); - if (readOk != 0) { m->mothurOut("Read Terminated."); m->mothurOutEndLine(); delete tmap; delete read; return 0; } - - read->AssembleTrees(); - vector T = read->getTrees(); - delete read; - - //make sure all files match - //if you provide a namefile we will use the numNames in the namefile as long as the number of unique match the tree names size. - int numNamesInTree; - if (numUniquesInName == m->Treenames.size()) { numNamesInTree = nameMap.size(); } - else { numNamesInTree = m->Treenames.size(); } - - //output any names that are in group file but not in tree - if (numNamesInTree < tmap->getNumSeqs()) { - for (int i = 0; i < tmap->namesOfSeqs.size(); i++) { - //is that name in the tree? - int count = 0; - for (int j = 0; j < m->Treenames.size(); j++) { - if (tmap->namesOfSeqs[i] == m->Treenames[j]) { break; } //found it - count++; - } - - if (m->control_pressed) { - delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } - for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); - m->clearGroups(); - return 0; - } - - //then you did not find it so report it - if (count == m->Treenames.size()) { - //if it is in your namefile then don't remove - map::iterator it = nameMap.find(tmap->namesOfSeqs[i]); - - if (it == nameMap.end()) { - m->mothurOut(tmap->namesOfSeqs[i] + " is in your groupfile and not in your tree. It will be disregarded."); m->mothurOutEndLine(); - tmap->removeSeq(tmap->namesOfSeqs[i]); - i--; //need this because removeSeq removes name from namesOfSeqs - } - } - } - } - + TreeReader* reader = new TreeReader(treefile, "", namefile); + vector T = reader->getTrees(); + map nameMap = reader->getNameMap(); + delete reader; //print new Tree string outputFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + "deunique.tre"; outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile); ofstream out; m->openOutputFile(outputFile, out); - T[0]->print(out, "deunique"); + T[0]->print(out, nameMap); out.close(); - delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } + delete (T[0]->getTreeMap()); + for (int i = 0; i < T.size(); i++) { delete T[i]; } //set phylip file as new current phylipfile string current = ""; @@ -231,46 +175,6 @@ int DeuniqueTreeCommand::execute() { exit(1); } } -/*****************************************************************/ -int DeuniqueTreeCommand::readNamesFile() { - try { - m->names.clear(); - numUniquesInName = 0; - - ifstream in; - m->openInputFile(namefile, in); - - string first, second; - map::iterator itNames; - - while(!in.eof()) { - in >> first >> second; m->gobble(in); - - numUniquesInName++; - - itNames = m->names.find(first); - if (itNames == m->names.end()) { - m->names[first] = second; - - //we need a list of names in your namefile to use above when removing extra seqs above so we don't remove them - vector dupNames; - m->splitAtComma(second, dupNames); - - for (int i = 0; i < dupNames.size(); i++) { - nameMap[dupNames[i]] = dupNames[i]; - if (i != 0) { tmap->addSeq(dupNames[i], "Group1"); } - } - }else { m->mothurOut(first + " has already been seen in namefile, aborting."); m->mothurOutEndLine(); in.close(); m->names.clear(); m->control_pressed = true; return 1; } - } - in.close(); - - return 0; - } - catch(exception& e) { - m->errorOut(e, "DeuniqueTreeCommand", "readNamesFile"); - exit(1); - } -} /***********************************************************/ diff --git a/deuniquetreecommand.h b/deuniquetreecommand.h index 6d72253..18a6e5e 100644 --- a/deuniquetreecommand.h +++ b/deuniquetreecommand.h @@ -12,7 +12,6 @@ #include "command.hpp" -#include "treemap.h" #include "sharedutilities.h" #include "readtree.h" @@ -35,7 +34,6 @@ public: private: - TreeMap* tmap; int numUniquesInName; bool abort; diff --git a/engine.cpp b/engine.cpp index ffbe324..24adcb5 100644 --- a/engine.cpp +++ b/engine.cpp @@ -183,7 +183,6 @@ bool InteractEngine::getInput(){ mout->clearGroups(); mout->clearAllGroups(); mout->Treenames.clear(); - mout->names.clear(); mout->saveNextLabel = ""; mout->printedHeaders = false; mout->commandInputsConvertError = false; @@ -369,7 +368,6 @@ bool BatchEngine::getInput(){ mout->clearGroups(); mout->clearAllGroups(); mout->Treenames.clear(); - mout->names.clear(); mout->saveNextLabel = ""; mout->printedHeaders = false; mout->commandInputsConvertError = false; @@ -538,7 +536,6 @@ bool ScriptEngine::getInput(){ mout->clearGroups(); mout->clearAllGroups(); mout->Treenames.clear(); - mout->names.clear(); mout->saveNextLabel = ""; mout->printedHeaders = false; mout->commandInputsConvertError = false; diff --git a/indicatorcommand.cpp b/indicatorcommand.cpp index fe99c46..94d5c84 100644 --- a/indicatorcommand.cpp +++ b/indicatorcommand.cpp @@ -99,7 +99,6 @@ IndicatorCommand::IndicatorCommand(string option) { m->clearGroups(); m->clearAllGroups(); m->Treenames.clear(); - m->names.clear(); vector tempOutNames; outputTypes["tree"] = tempOutNames; @@ -236,11 +235,10 @@ int IndicatorCommand::execute(){ designMap->readDesignMap(); //fill Groups - checks for "all" and for any typo groups - SharedUtil* util = new SharedUtil(); + SharedUtil util; vector nameGroups = designMap->getNamesOfGroups(); - util->setGroups(Groups, nameGroups); + util.setGroups(Groups, nameGroups); designMap->setNamesOfGroups(nameGroups); - delete util; //loop through the Groups and fill Globaldata's Groups with the design file info vector namesSeqs = designMap->getNamesSeqs(Groups); @@ -320,8 +318,9 @@ int IndicatorCommand::execute(){ else { for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; } } for (int i = 0; i < T.size(); i++) { delete T[i]; } delete treeMap; return 0; } - - T[0]->assembleTree(); + + map nameMap; + T[0]->assembleTree(nameMap); /***************************************************/ // create ouptut tree - respecting pickedGroups // @@ -329,7 +328,7 @@ int IndicatorCommand::execute(){ Tree* outputTree = new Tree(m->getNumGroups(), treeMap); outputTree->getSubTree(T[0], m->getGroups()); - outputTree->assembleTree(); + outputTree->assembleTree(nameMap); //no longer need original tree, we have output tree to use and label for (int i = 0; i < T.size(); i++) { delete T[i]; } diff --git a/mothurout.cpp b/mothurout.cpp index d5f1534..ee809fa 100644 --- a/mothurout.cpp +++ b/mothurout.cpp @@ -1014,7 +1014,8 @@ int MothurOut::appendFiles(string temp, string filename) { int numLines = 0; if (ableToOpen == 0) { //you opened it - while(char c = input.get()){ + while(!input.eof()){ + char c = input.get(); if(input.eof()) { break; } else { output << c; if (c == '\n') {numLines++;} } } diff --git a/mothurout.h b/mothurout.h index 0659e4e..2a6ba2d 100644 --- a/mothurout.h +++ b/mothurout.h @@ -65,7 +65,7 @@ class MothurOut { vector getAllGroups() { sort(namesOfGroups.begin(), namesOfGroups.end()); return namesOfGroups; } vector Treenames; - map names; + //map names; vector binLabelsInFile; vector currentBinLabels; string saveNextLabel, argv, sharedHeaderMode; diff --git a/parsimony.cpp b/parsimony.cpp index d26bc27..bbb929d 100644 --- a/parsimony.cpp +++ b/parsimony.cpp @@ -15,6 +15,7 @@ EstOutput Parsimony::getValues(Tree* t, int p, string o) { try { processors = p; outputDir = o; + TreeMap* tmap = t->getTreeMap(); //if the users enters no groups then give them the score of all groups vector mGroups = m->getGroups(); @@ -56,7 +57,7 @@ EstOutput Parsimony::getValues(Tree* t, int p, string o) { #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) if(processors == 1){ - data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size()); + data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), tmap); }else{ lines.clear(); int numPairs = namesOfGroupCombos.size(); @@ -73,7 +74,7 @@ EstOutput Parsimony::getValues(Tree* t, int p, string o) { lines.push_back(linePair(startPos, numPairsPerProcessor)); } - data = createProcesses(t, namesOfGroupCombos); + data = createProcesses(t, namesOfGroupCombos, tmap); } #else data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size()); @@ -89,7 +90,7 @@ EstOutput Parsimony::getValues(Tree* t, int p, string o) { } /**************************************************************************************************/ -EstOutput Parsimony::createProcesses(Tree* t, vector< vector > namesOfGroupCombos) { +EstOutput Parsimony::createProcesses(Tree* t, vector< vector > namesOfGroupCombos, TreeMap* tmap) { try { #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) int process = 1; @@ -106,7 +107,7 @@ EstOutput Parsimony::createProcesses(Tree* t, vector< vector > namesOfGr process++; }else if (pid == 0){ EstOutput myresults; - myresults = driver(t, namesOfGroupCombos, lines[process].start, lines[process].num); + myresults = driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, tmap); if (m->control_pressed) { exit(0); } @@ -126,7 +127,7 @@ EstOutput Parsimony::createProcesses(Tree* t, vector< vector > namesOfGr } } - results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num); + results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, tmap); //force parent to wait until all the processes are done for (int i=0;i > namesOfGr } } /**************************************************************************************************/ -EstOutput Parsimony::driver(Tree* t, vector< vector > namesOfGroupCombos, int start, int num) { +EstOutput Parsimony::driver(Tree* t, vector< vector > namesOfGroupCombos, int start, int num, TreeMap* tmap) { try { EstOutput results; results.resize(num); diff --git a/parsimony.h b/parsimony.h index b116aa2..7316d50 100644 --- a/parsimony.h +++ b/parsimony.h @@ -19,10 +19,9 @@ class Parsimony : public TreeCalculator { public: - Parsimony(TreeMap* t) : tmap(t) {}; + Parsimony() {}; ~Parsimony() {}; EstOutput getValues(Tree*, int, string); - //EstOutput getValues(Tree*, string, string) { return data; } private: struct linePair { @@ -33,12 +32,11 @@ class Parsimony : public TreeCalculator { vector lines; EstOutput data; - TreeMap* tmap; int processors; string outputDir; - EstOutput driver(Tree*, vector< vector >, int, int); - EstOutput createProcesses(Tree*, vector< vector >); + EstOutput driver(Tree*, vector< vector >, int, int, TreeMap*); + EstOutput createProcesses(Tree*, vector< vector >, TreeMap*); }; /***********************************************************************/ diff --git a/parsimonycommand.cpp b/parsimonycommand.cpp index 2d46efc..50e1bfa 100644 --- a/parsimonycommand.cpp +++ b/parsimonycommand.cpp @@ -8,6 +8,7 @@ */ #include "parsimonycommand.h" +#include "treereader.h" //********************************************************************************************************************** vector ParsimonyCommand::setParameters(){ @@ -125,12 +126,6 @@ ParsimonyCommand::ParsimonyCommand(string option) { } } - m->runParse = true; - m->clearGroups(); - m->clearAllGroups(); - m->Treenames.clear(); - m->names.clear(); - outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; } randomtree = validParameter.validFile(parameters, "random", false); if (randomtree == "not found") { randomtree = ""; } @@ -203,68 +198,11 @@ int ParsimonyCommand::execute() { m->setTreeFile(treefile); - if (groupfile != "") { - //read in group map info. - tmap = new TreeMap(groupfile); - tmap->readMap(); - }else{ //fake out by putting everyone in one group - Tree* tree = new Tree(treefile); delete tree; //extracts names from tree to make faked out groupmap - tmap = new TreeMap(); - - for (int i = 0; i < m->Treenames.size(); i++) { tmap->addSeq(m->Treenames[i], "Group1"); } - } - - if (namefile != "") { readNamesFile(); } - - read = new ReadNewickTree(treefile); - int readOk = read->read(tmap); - - if (readOk != 0) { m->mothurOut("Read Terminated."); m->mothurOutEndLine(); delete tmap; delete read; return 0; } - - read->AssembleTrees(); - T = read->getTrees(); - delete read; - - //make sure all files match - //if you provide a namefile we will use the numNames in the namefile as long as the number of unique match the tree names size. - int numNamesInTree; - if (namefile != "") { - if (numUniquesInName == m->Treenames.size()) { numNamesInTree = nameMap.size(); } - else { numNamesInTree = m->Treenames.size(); } - }else { numNamesInTree = m->Treenames.size(); } - - - //output any names that are in group file but not in tree - if (numNamesInTree < tmap->getNumSeqs()) { - for (int i = 0; i < tmap->namesOfSeqs.size(); i++) { - //is that name in the tree? - int count = 0; - for (int j = 0; j < m->Treenames.size(); j++) { - if (tmap->namesOfSeqs[i] == m->Treenames[j]) { break; } //found it - count++; - } - - if (m->control_pressed) { - delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } - for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); - m->clearGroups(); - return 0; - } - - //then you did not find it so report it - if (count == m->Treenames.size()) { - //if it is in your namefile then don't remove - map::iterator it = nameMap.find(tmap->namesOfSeqs[i]); - - if (it == nameMap.end()) { - m->mothurOut(tmap->namesOfSeqs[i] + " is in your groupfile and not in your tree. It will be disregarded."); m->mothurOutEndLine(); - tmap->removeSeq(tmap->namesOfSeqs[i]); - i--; //need this because removeSeq removes name from namesOfSeqs - } - } - } - } - + TreeReader* reader = new TreeReader(treefile, groupfile, namefile); + T = reader->getTrees(); + tmap = T[0]->getTreeMap(); + delete reader; + if(outputDir == "") { outputDir += m->hasPath(treefile); } output = new ColumnFile(outputDir + m->getSimpleName(treefile) + ".parsimony", itersString); outputNames.push_back(outputDir + m->getSimpleName(treefile) + ".parsimony"); @@ -284,24 +222,23 @@ int ParsimonyCommand::execute() { } //set users groups to analyze - util = new SharedUtil(); + SharedUtil util; vector mGroups = m->getGroups(); vector tGroups = tmap->getNamesOfGroups(); - util->setGroups(mGroups, tGroups, allGroups, numGroups, "parsimony"); //sets the groups the user wants to analyze - util->getCombos(groupComb, mGroups, numComp); + util.setGroups(mGroups, tGroups, allGroups, numGroups, "parsimony"); //sets the groups the user wants to analyze + util.getCombos(groupComb, mGroups, numComp); m->setGroups(mGroups); - delete util; if (numGroups == 1) { numComp++; groupComb.push_back(allGroups); } - pars = new Parsimony(tmap); + Parsimony pars; counter = 0; Progress* reading; reading = new Progress("Comparing to random:", iters); if (m->control_pressed) { - delete reading; delete pars; delete output; + delete reading; delete output; delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } if (randomtree == "") { outSum.close(); } for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); @@ -323,10 +260,10 @@ int ParsimonyCommand::execute() { if (randomtree == "") { //get pscores for users trees for (int i = 0; i < T.size(); i++) { - userData = pars->getValues(T[i], processors, outputDir); //data = AB, AC, BC, ABC. + userData = pars.getValues(T[i], processors, outputDir); //data = AB, AC, BC, ABC. if (m->control_pressed) { - delete reading; delete pars; delete output; + delete reading; delete output; delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } if (randomtree == "") { outSum.close(); } for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); @@ -362,10 +299,10 @@ int ParsimonyCommand::execute() { randT->assembleRandomTree(); //get pscore of random tree - randomData = pars->getValues(randT, processors, outputDir); + randomData = pars.getValues(randT, processors, outputDir); if (m->control_pressed) { - delete reading; delete pars; delete output; delete randT; + delete reading; delete output; delete randT; if (randomtree == "") { outSum.close(); } for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } @@ -403,23 +340,17 @@ int ParsimonyCommand::execute() { randT->assembleRandomTree(); if (m->control_pressed) { - delete reading; delete pars; delete output; delete randT; - delete tmap; - for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); - m->clearGroups(); - return 0; + delete reading; delete output; delete randT; delete tmap; + for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); return 0; } //get pscore of random tree - randomData = pars->getValues(randT, processors, outputDir); + randomData = pars.getValues(randT, processors, outputDir); if (m->control_pressed) { - delete reading; delete pars; delete output; delete randT; - delete tmap; - for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); - m->clearGroups(); - return 0; + delete reading; delete output; delete randT; delete tmap; + for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); return 0; } for(int r = 0; r < numComp; r++) { @@ -471,27 +402,21 @@ int ParsimonyCommand::execute() { } if (m->control_pressed) { - delete reading; delete pars; delete output; + delete reading; delete output; delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } if (randomtree == "") { outSum.close(); } for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); - m->clearGroups(); return 0; } //finish progress bar reading->finish(); delete reading; - printParsimonyFile(); if (randomtree == "") { printUSummaryFile(); } - - //reset groups parameter - m->clearGroups(); - - delete pars; delete output; - delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } + + delete output; delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); return 0;} @@ -623,46 +548,6 @@ void ParsimonyCommand::getUserInput() { exit(1); } } -/*****************************************************************/ -int ParsimonyCommand::readNamesFile() { - try { - m->names.clear(); - numUniquesInName = 0; - - ifstream in; - m->openInputFile(namefile, in); - - string first, second; - map::iterator itNames; - - while(!in.eof()) { - in >> first >> second; m->gobble(in); - - numUniquesInName++; - - itNames = m->names.find(first); - if (itNames == m->names.end()) { - m->names[first] = second; - - //we need a list of names in your namefile to use above when removing extra seqs above so we don't remove them - vector dupNames; - m->splitAtComma(second, dupNames); - - for (int i = 0; i < dupNames.size(); i++) { - nameMap[dupNames[i]] = dupNames[i]; - if ((groupfile == "") && (i != 0)) { tmap->addSeq(dupNames[i], "Group1"); } - } - }else { m->mothurOut(first + " has already been seen in namefile, disregarding names file."); m->mothurOutEndLine(); in.close(); m->names.clear(); namefile = ""; return 1; } - } - in.close(); - - return 0; - } - catch(exception& e) { - m->errorOut(e, "ParsimonyCommand", "readNamesFile"); - exit(1); - } -} /***********************************************************/ diff --git a/parsimonycommand.h b/parsimonycommand.h index 81fa99b..9172556 100644 --- a/parsimonycommand.h +++ b/parsimonycommand.h @@ -36,15 +36,12 @@ public: void help() { m->mothurOut(getHelpString()); } private: - ReadTree* read; - SharedUtil* util; FileOutput* output; vector T; //user trees Tree* randT; //random tree Tree* copyUserTree; TreeMap* tmap; TreeMap* savetmap; - Parsimony* pars; vector groupComb; // AB. AC, BC... string sumFile, randomtree, allGroups, outputDir, treefile, groupfile, namefile; int iters, numGroups, numComp, counter, processors, numUniquesInName; diff --git a/phylodiversity.cpp b/phylodiversity.cpp deleted file mode 100644 index d5ccaf9..0000000 --- a/phylodiversity.cpp +++ /dev/null @@ -1,81 +0,0 @@ -/* - * phylodiversity.cpp - * Mothur - * - * Created by westcott on 4/30/10. - * Copyright 2010 Schloss Lab. All rights reserved. - * - */ - -#include "phylodiversity.h" - -/************************************************************************************************** -EstOutput PhyloDiversity::getValues(Tree* t, vector treeNodes, vector< vector >& data) { - try { - - map DScore; - float totalLength = 0.0; - data.clear(); - - //initialize Dscore - for (int i=0; iGroups.size(); i++) { DScore[globaldata->Groups[i]] = 0.0; } - - ******************************************************** - //calculate a D value for each group - for(int v=0;vcontrol_pressed) { return data; } - - //calc the branch length - //while you aren't at root - float sum = 0.0; - int index = treeNodes[v]; - - while(t->tree[index].getParent() != -1){ - - //if you have a BL - if(t->tree[index].getBranchLength() != -1){ - sum += abs(t->tree[index].getBranchLength()); - } - index = t->tree[index].getParent(); - } - - //get last breanch length added - if(t->tree[index].getBranchLength() != -1){ - sum += abs(t->tree[index].getBranchLength()); - } - - //for each group in the groups update the total branch length accounting for the names file - vector groups = t->tree[treeNodes[v]].getGroup(); - for (int j = 0; j < groups.size(); j++) { - int numSeqsInGroupJ = 0; - map::iterator it; - it = t->tree[treeNodes[v]].pcount.find(groups[j]); - if (it != t->tree[treeNodes[v]].pcount.end()) { //this leaf node contains seqs from group j - numSeqsInGroupJ = it->second; - } - - //add branch length to total for group - DScore[groups[j]] += (numSeqsInGroupJ * sum); - } - - } - - - for (int i=0; iGroups.size(); i++) { - float percent = DScore[globaldata->Groups[i]]; - data.push_back(percent); - - } - - return data; - } - catch(exception& e) { - m->errorOut(e, "PhyloDiversity", "getValues"); - exit(1); - } -} -**************************************************************************************************/ - - - diff --git a/phylodiversity.h b/phylodiversity.h deleted file mode 100644 index a789efa..0000000 --- a/phylodiversity.h +++ /dev/null @@ -1,38 +0,0 @@ -#ifndef PHYLODIVERSITY_H -#define PHYLODIVERSITY_H - - -/* - * phylodiversity.h - * Mothur - * - * Created by westcott on 4/30/10. - * Copyright 2010 Schloss Lab. All rights reserved. - * - */ - -#include "treemap.h" -#include "mothurout.h" - - -/***********************************************************************/ - -class PhyloDiversity { - - public: - PhyloDiversity(TreeMap* t) : tmap(t) { m = MothurOut::getInstance(); } - ~PhyloDiversity() {}; - - //int getValues(Tree*, vector, vector< vector< float> >&); - - - private: - MothurOut* m; - TreeMap* tmap; -}; - -/***********************************************************************/ - - -#endif - diff --git a/phylodiversitycommand.cpp b/phylodiversitycommand.cpp index abf9591..3db101a 100644 --- a/phylodiversitycommand.cpp +++ b/phylodiversitycommand.cpp @@ -8,6 +8,7 @@ */ #include "phylodiversitycommand.h" +#include "treereader.h" //********************************************************************************************************************** vector PhyloDiversityCommand::setParameters(){ @@ -136,12 +137,6 @@ PhyloDiversityCommand::PhyloDiversityCommand(string option) { } } - m->runParse = true; - m->clearGroups(); - m->clearAllGroups(); - m->Treenames.clear(); - m->names.clear(); - //check for required parameters treefile = validParameter.validFile(parameters, "tree", true); if (treefile == "not open") { treefile = ""; abort = true; } @@ -218,74 +213,15 @@ int PhyloDiversityCommand::execute(){ if (abort == true) { if (calledHelp) { return 0; } return 2; } m->setTreeFile(treefile); - - if (groupfile != "") { - //read in group map info. - tmap = new TreeMap(groupfile); - tmap->readMap(); - }else{ //fake out by putting everyone in one group - Tree* tree = new Tree(treefile); delete tree; //extracts names from tree to make faked out groupmap - tmap = new TreeMap(); - - for (int i = 0; i < m->Treenames.size(); i++) { tmap->addSeq(m->Treenames[i], "Group1"); } - } - - if (namefile != "") { readNamesFile(); } - - read = new ReadNewickTree(treefile); - int readOk = read->read(tmap); - - if (readOk != 0) { m->mothurOut("Read Terminated."); m->mothurOutEndLine(); delete tmap; delete read; return 0; } - - read->AssembleTrees(); - vector trees = read->getTrees(); - delete read; - - //make sure all files match - //if you provide a namefile we will use the numNames in the namefile as long as the number of unique match the tree names size. - int numNamesInTree; - if (namefile != "") { - if (numUniquesInName == m->Treenames.size()) { numNamesInTree = nameMap.size(); } - else { numNamesInTree = m->Treenames.size(); } - }else { numNamesInTree = m->Treenames.size(); } - - - //output any names that are in group file but not in tree - if (numNamesInTree < tmap->getNumSeqs()) { - for (int i = 0; i < tmap->namesOfSeqs.size(); i++) { - //is that name in the tree? - int count = 0; - for (int j = 0; j < m->Treenames.size(); j++) { - if (tmap->namesOfSeqs[i] == m->Treenames[j]) { break; } //found it - count++; - } - - if (m->control_pressed) { - delete tmap; for (int i = 0; i < trees.size(); i++) { delete trees[i]; } - for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); - m->clearGroups(); - return 0; - } - - //then you did not find it so report it - if (count == m->Treenames.size()) { - //if it is in your namefile then don't remove - map::iterator it = nameMap.find(tmap->namesOfSeqs[i]); - - if (it == nameMap.end()) { - m->mothurOut(tmap->namesOfSeqs[i] + " is in your groupfile and not in your tree. It will be disregarded."); m->mothurOutEndLine(); - tmap->removeSeq(tmap->namesOfSeqs[i]); - i--; //need this because removeSeq removes name from namesOfSeqs - } - } - } - } - - SharedUtil* util = new SharedUtil(); + TreeReader* reader = new TreeReader(treefile, groupfile, namefile); + vector trees = reader->getTrees(); + tmap = trees[0]->getTreeMap(); + delete reader; + + SharedUtil util; vector mGroups = m->getGroups(); vector tGroups = tmap->getNamesOfGroups(); - util->setGroups(mGroups, tGroups, "phylo.diversity"); //sets the groups the user wants to analyze - delete util; + util.setGroups(mGroups, tGroups, "phylo.diversity"); //sets the groups the user wants to analyze //incase the user had some mismatches between the tree and group files we don't want group xxx to be analyzed for (int i = 0; i < mGroups.size(); i++) { if (mGroups[i] == "xxx") { mGroups.erase(mGroups.begin()+i); break; } } @@ -711,47 +647,6 @@ vector PhyloDiversityCommand::calcBranchLength(Tree* t, int leaf, map< st exit(1); } } -/*****************************************************************/ -int PhyloDiversityCommand::readNamesFile() { - try { - m->names.clear(); - numUniquesInName = 0; - - ifstream in; - m->openInputFile(namefile, in); - - string first, second; - map::iterator itNames; - - while(!in.eof()) { - in >> first >> second; m->gobble(in); - - numUniquesInName++; - - itNames = m->names.find(first); - if (itNames == m->names.end()) { - m->names[first] = second; - - //we need a list of names in your namefile to use above when removing extra seqs above so we don't remove them - vector dupNames; - m->splitAtComma(second, dupNames); - - for (int i = 0; i < dupNames.size(); i++) { - nameMap[dupNames[i]] = dupNames[i]; - if ((groupfile == "") && (i != 0)) { tmap->addSeq(dupNames[i], "Group1"); } - } - }else { m->mothurOut(first + " has already been seen in namefile, disregarding names file."); m->mothurOutEndLine(); in.close(); m->names.clear(); namefile = ""; return 1; } - } - in.close(); - - return 0; - } - catch(exception& e) { - m->errorOut(e, "PhyloDiversityCommand", "readNamesFile"); - exit(1); - } -} - //********************************************************************************************************************** diff --git a/phylodiversitycommand.h b/phylodiversitycommand.h index b44e6ca..5d0cccf 100644 --- a/phylodiversitycommand.h +++ b/phylodiversitycommand.h @@ -12,9 +12,8 @@ #include "command.hpp" #include "treemap.h" -#include "readtree.h" #include "sharedutilities.h" - +#include "tree.h" class PhyloDiversityCommand : public Command { @@ -33,14 +32,12 @@ class PhyloDiversityCommand : public Command { int execute(); void help() { m->mothurOut(getHelpString()); } private: - ReadTree* read; TreeMap* tmap; float freq; int iters, processors, numUniquesInName; bool abort, rarefy, summary, collect, scale; string groups, outputDir, treefile, groupfile, namefile; vector Groups, outputNames; //holds groups to be used, and outputFile names - map nameMap; int readNamesFile(); void printData(set&, map< string, vector >&, ofstream&, int); diff --git a/readtree.cpp b/readtree.cpp index 74b4268..6fa4c3d 100644 --- a/readtree.cpp +++ b/readtree.cpp @@ -20,12 +20,12 @@ ReadTree::ReadTree() { } } /***********************************************************************/ -int ReadTree::AssembleTrees() { +int ReadTree::AssembleTrees(map nameMap) { try { //assemble users trees for (int i = 0; i < Trees.size(); i++) { if (m->control_pressed) { return 0; } - Trees[i]->assembleTree(); + Trees[i]->assembleTree(nameMap); } return 0; } diff --git a/readtree.h b/readtree.h index b5b26ed..6b074de 100644 --- a/readtree.h +++ b/readtree.h @@ -30,7 +30,7 @@ class ReadTree { float readBranchLength(istream& f); vector getTrees() { return Trees; } - int AssembleTrees(); + int AssembleTrees(map); protected: vector Trees; diff --git a/removegroupscommand.cpp b/removegroupscommand.cpp index 49674f5..801029c 100644 --- a/removegroupscommand.cpp +++ b/removegroupscommand.cpp @@ -465,7 +465,6 @@ int RemoveGroupsCommand::readShared(){ delete tempInput; m->setGroups(groupsToKeep); m->clearAllGroups(); - m->names.clear(); m->saveNextLabel = ""; m->printedHeaders = false; m->currentBinLabels.clear(); diff --git a/tree.cpp b/tree.cpp index 432811e..7c9671b 100644 --- a/tree.cpp +++ b/tree.cpp @@ -16,7 +16,7 @@ Tree::Tree(int num, TreeMap* t) : tmap(t) { numLeaves = num; numNodes = 2*numLeaves - 1; - + tree.resize(numNodes); } catch(exception& e) { @@ -28,9 +28,6 @@ Tree::Tree(int num, TreeMap* t) : tmap(t) { Tree::Tree(string g) { //do not use tree generated by this its just to extract the treenames, its a chicken before the egg thing that needs to be revisited. try { m = MothurOut::getInstance(); - - tmap = NULL; - parseTreeFile(); m->runParse = false; } catch(exception& e) { @@ -196,7 +193,8 @@ Tree::Tree(TreeMap* t, vector< vector >& sims) : tmap(t) { //adjust tree to make sure root to tip length is .5 int root = findRoot(); tree[root].setBranchLength((0.5 - tree[root].getLengthToLeaves())); - } + + } catch(exception& e) { m->errorOut(e, "Tree", "Tree"); exit(1); @@ -329,12 +327,13 @@ void Tree::setIndex(string searchName, int index) { } } /*****************************************************************/ -int Tree::assembleTree() { +int Tree::assembleTree(map nameMap) { try { - //float A = clock(); + //save for later + names = nameMap; //if user has given a names file we want to include that info in the pgroups and pcount info. - if(m->names.size() != 0) { addNamesToCounts(m->names); } + if(nameMap.size() != 0) { addNamesToCounts(nameMap); } //build the pGroups in non leaf nodes to be used in the parsimony calcs. for (int i = numLeaves; i < numNodes; i++) { @@ -343,8 +342,7 @@ int Tree::assembleTree() { tree[i].pGroups = (mergeGroups(i)); tree[i].pcount = (mergeGcounts(i)); } - //float B = clock(); - //cout << "assembleTree\t" << (B-A) / CLOCKS_PER_SEC << endl; + return 0; } catch(exception& e) { @@ -352,7 +350,7 @@ int Tree::assembleTree() { exit(1); } } -/*****************************************************************/ +/***************************************************************** int Tree::assembleTree(string n) { try { @@ -380,7 +378,8 @@ void Tree::getSubTree(Tree* Ctree, vector Groups) { //copy Tree since we are going to destroy it Tree* copy = new Tree(tmap); copy->getCopy(Ctree); - copy->assembleTree("nonames"); + map empty; + copy->assembleTree(empty); //we want to select some of the leaf nodes to create the output tree //go through the input Tree starting at parents of leaves @@ -840,21 +839,23 @@ void Tree::randomBlengths() { /*************************************************************************************************/ void Tree::assembleRandomUnifracTree(vector g) { randomLabels(g); - assembleTree("noNameCounts"); + map empty; + assembleTree(empty); } /*************************************************************************************************/ void Tree::assembleRandomUnifracTree(string groupA, string groupB) { - vector temp; temp.push_back(groupA); temp.push_back(groupB); randomLabels(temp); - assembleTree("noNameCounts"); + map empty; + assembleTree(empty); } /*************************************************************************************************/ //for now it's just random topology but may become random labels as well later that why this is such a simple function now... void Tree::assembleRandomTree() { randomTopology(); - assembleTree(); + map empty; + assembleTree(empty); } /**************************************************************************************************/ @@ -907,6 +908,18 @@ void Tree::print(ostream& out) { } } /*****************************************************************/ +void Tree::print(ostream& out, map nameMap) { + try { + int root = findRoot(); + printBranch(root, out, nameMap); + out << ";" << endl; + } + catch(exception& e) { + m->errorOut(e, "Tree", "print"); + exit(1); + } +} +/*****************************************************************/ void Tree::print(ostream& out, string mode) { try { int root = findRoot(); @@ -959,10 +972,82 @@ int Tree::findRoot() { } } /*****************************************************************/ -void Tree::printBranch(int node, ostream& out, string mode) { +void Tree::printBranch(int node, ostream& out, map names) { try { // you are not a leaf + if (tree[node].getLChild() != -1) { + out << "("; + printBranch(tree[node].getLChild(), out, names); + out << ","; + printBranch(tree[node].getRChild(), out, names); + out << ")"; + + //if there is a branch length then print it + if (tree[node].getBranchLength() != -1) { + out << ":" << tree[node].getBranchLength(); + } + + }else { //you are a leaf + map::iterator itNames = names.find(tree[node].getName()); + + string outputString = ""; + if (itNames != names.end()) { + + vector dupNames; + m->splitAtComma((itNames->second), dupNames); + + if (dupNames.size() == 1) { + outputString += tree[node].getName(); + if (tree[node].getBranchLength() != -1) { + outputString += ":" + toString(tree[node].getBranchLength()); + } + }else { + outputString += "("; + + for (int u = 0; u < dupNames.size()-1; u++) { + outputString += dupNames[u]; + + if (tree[node].getBranchLength() != -1) { + outputString += ":" + toString(0.0); + } + outputString += ","; + } + + outputString += dupNames[dupNames.size()-1]; + if (tree[node].getBranchLength() != -1) { + outputString += ":" + toString(0.0); + } + + outputString += ")"; + if (tree[node].getBranchLength() != -1) { + outputString += ":" + toString(tree[node].getBranchLength()); + } + } + }else { + outputString = tree[node].getName(); + //if there is a branch length then print it + if (tree[node].getBranchLength() != -1) { + outputString += ":" + toString(tree[node].getBranchLength()); + } + + m->mothurOut("[ERROR]: " + tree[node].getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); + } + + out << outputString; + } + + } + catch(exception& e) { + m->errorOut(e, "Tree", "printBranch"); + exit(1); + } +} +/*****************************************************************/ +void Tree::printBranch(int node, ostream& out, string mode) { + try { + + // you are not a leaf if (tree[node].getLChild() != -1) { out << "("; printBranch(tree[node].getLChild(), out, mode); @@ -987,11 +1072,6 @@ try { if (tree[node].getBranchLength() != -1) { out << ":" << tree[node].getBranchLength(); } - }else if (mode == "deunique") { - //if there is a branch length then print it - if (tree[node].getBranchLength() != -1) { - out << ":" << tree[node].getBranchLength(); - } } }else { //you are a leaf string leafGroup = tmap->getGroup(tree[node].getName()); @@ -1017,53 +1097,6 @@ try { if (tree[node].getBranchLength() != -1) { out << ":" << tree[node].getBranchLength(); } - }else if (mode == "deunique") { - map::iterator itNames = m->names.find(tree[node].getName()); - - string outputString = ""; - if (itNames != m->names.end()) { - - vector dupNames; - m->splitAtComma((itNames->second), dupNames); - - if (dupNames.size() == 1) { - outputString += tree[node].getName(); - if (tree[node].getBranchLength() != -1) { - outputString += ":" + toString(tree[node].getBranchLength()); - } - }else { - outputString += "("; - - for (int u = 0; u < dupNames.size()-1; u++) { - outputString += dupNames[u]; - - if (tree[node].getBranchLength() != -1) { - outputString += ":" + toString(0.0); - } - outputString += ","; - } - - outputString += dupNames[dupNames.size()-1]; - if (tree[node].getBranchLength() != -1) { - outputString += ":" + toString(0.0); - } - - outputString += ")"; - if (tree[node].getBranchLength() != -1) { - outputString += ":" + toString(tree[node].getBranchLength()); - } - } - }else { - outputString = tree[node].getName(); - //if there is a branch length then print it - if (tree[node].getBranchLength() != -1) { - outputString += ":" + toString(tree[node].getBranchLength()); - } - - m->mothurOut("[ERROR]: " + tree[node].getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); - } - - out << outputString; } } diff --git a/tree.h b/tree.h index 0b61c6e..0660e8a 100644 --- a/tree.h +++ b/tree.h @@ -22,6 +22,7 @@ public: Tree(TreeMap*, vector< vector >&); //create tree from sim matrix ~Tree(); + TreeMap* getTreeMap() { return tmap; } void getCopy(Tree*); //makes tree a copy of the one passed in. void getSubTree(Tree*, vector); //makes tree a that contains only the names passed in. int getSubTree(Tree* originalToCopy, vector seqToInclude, map nameMap); //used with (int, TreeMap) constructor. SeqsToInclude contains subsample wanted - assumes these are unique seqs and size of vector=numLeaves passed into constructor. nameMap is unique -> redundantList can be empty if no namesfile was provided. @@ -39,11 +40,11 @@ public: void printTree(); void print(ostream&); void print(ostream&, string); + void print(ostream&, map); int findRoot(); //return index of root node //this function takes the leaf info and populates the non leaf nodes - int assembleTree(); - int assembleTree(string); + int assembleTree(map); vector tree; //the first n nodes are the leaves, where n is the number of sequences. map< string, vector > groupNodeInfo; //maps group to indexes of leaf nodes with that group, different groups may contain same node because of names file. @@ -54,6 +55,7 @@ private: ofstream out; string filename; + map names; map::iterator it, it2; map mergeGroups(int); //returns a map with a groupname and the number of times that group was seen in the children map mergeGcounts(int); @@ -63,7 +65,8 @@ private: void randomBlengths(); void randomLabels(vector); //void randomLabels(string, string); - void printBranch(int, ostream&, string); //recursively print out tree + void printBranch(int, ostream&, map); //recursively print out tree + void printBranch(int, ostream&, string); void parseTreeFile(); //parses through tree file to find names of nodes and number of them //this is required in case user has sequences in the names file that are //not included in the tree. diff --git a/treegroupscommand.cpp b/treegroupscommand.cpp index 4a77211..3eeeae2 100644 --- a/treegroupscommand.cpp +++ b/treegroupscommand.cpp @@ -484,74 +484,13 @@ Tree* TreeGroupCommand::createTree(vector< vector >& simMatrix){ //create tree t = new Tree(tmap, simMatrix); - /* //initialize index - map index; //maps row in simMatrix to vector index in the tree - for (int g = 0; g < numGroups; g++) { index[g] = g; } + if (m->control_pressed) { delete t; t = NULL; return t; } - //do merges and create tree structure by setting parents and children - //there are numGroups - 1 merges to do - for (int i = 0; i < (numGroups - 1); i++) { - float largest = -1000.0; - - if (m->control_pressed) { delete t; t = NULL; return t; } - - int row, column; - //find largest value in sims matrix by searching lower triangle - for (int j = 1; j < simMatrix.size(); j++) { - for (int k = 0; k < j; k++) { - if (simMatrix[j][k] > largest) { largest = simMatrix[j][k]; row = j; column = k; } - } - } + //assemble tree + map empty; + t->assembleTree(empty); - //set non-leaf node info and update leaves to know their parents - //non-leaf - t->tree[numGroups + i].setChildren(index[row], index[column]); - - //parents - t->tree[index[row]].setParent(numGroups + i); - t->tree[index[column]].setParent(numGroups + i); - - //blength = distance / 2; - float blength = ((1.0 - largest) / 2); - - //branchlengths - t->tree[index[row]].setBranchLength(blength - t->tree[index[row]].getLengthToLeaves()); - t->tree[index[column]].setBranchLength(blength - t->tree[index[column]].getLengthToLeaves()); - - //set your length to leaves to your childs length plus branchlength - t->tree[numGroups + i].setLengthToLeaves(t->tree[index[row]].getLengthToLeaves() + t->tree[index[row]].getBranchLength()); - - - //update index - index[row] = numGroups+i; - index[column] = numGroups+i; - - //remove highest value that caused the merge. - simMatrix[row][column] = -1000.0; - simMatrix[column][row] = -1000.0; - - //merge values in simsMatrix - for (int n = 0; n < simMatrix.size(); n++) { - //row becomes merge of 2 groups - simMatrix[row][n] = (simMatrix[row][n] + simMatrix[column][n]) / 2; - simMatrix[n][row] = simMatrix[row][n]; - //delete column - simMatrix[column][n] = -1000.0; - simMatrix[n][column] = -1000.0; - } - } - - //adjust tree to make sure root to tip length is .5 - int root = t->findRoot(); - t->tree[root].setBranchLength((0.5 - t->tree[root].getLengthToLeaves())); - */ - //assemble tree - t->assembleTree(); - - if (m->control_pressed) { delete t; t = NULL; return t; } - return t; - } catch(exception& e) { m->errorOut(e, "TreeGroupCommand", "createTree"); @@ -1006,7 +945,7 @@ int TreeGroupCommand::process(vector thisLookup) { Consensus consensus; //clear old tree names if any m->Treenames.clear(); m->Treenames = m->getGroups(); //may have changed if subsample eliminated groups - Tree* conTree = consensus.getTree(trees, tmap); + Tree* conTree = consensus.getTree(trees); //create a new filename string conFile = outputDir + m->getRootName(m->getSimpleName(inputfile)) + treeCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".cons.tre"; diff --git a/treemap.cpp b/treemap.cpp index 450d8fb..c228162 100644 --- a/treemap.cpp +++ b/treemap.cpp @@ -19,6 +19,43 @@ /************************************************************/ TreeMap::~TreeMap(){} +/************************************************************/ +int TreeMap::readMap(string gf) { + + groupFileName = gf; + m->openInputFile(gf, fileHandle); + + string seqName, seqGroup; + int error = 0; + + while(fileHandle){ + fileHandle >> seqName; m->gobble(fileHandle); //read from first column + fileHandle >> seqGroup; //read from second column + + if (m->control_pressed) { fileHandle.close(); return 1; } + + setNamesOfGroups(seqGroup); + + map::iterator itCheck = treemap.find(seqName); + if (itCheck != treemap.end()) { error = 1; m->mothurOut("[WARNING]: Your groupfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); } + else { + namesOfSeqs.push_back(seqName); + treemap[seqName].groupname = seqGroup; //store data in map + + it2 = seqsPerGroup.find(seqGroup); + if (it2 == seqsPerGroup.end()) { //if it's a new group + seqsPerGroup[seqGroup] = 1; + }else {//it's a group we already have + seqsPerGroup[seqGroup]++; + } + } + + m->gobble(fileHandle); + } + fileHandle.close(); + + return error; +} /************************************************************/ int TreeMap::readMap() { @@ -26,7 +63,7 @@ int TreeMap::readMap() { int error = 0; while(fileHandle){ - fileHandle >> seqName; //read from first column + fileHandle >> seqName; m->gobble(fileHandle); //read from first column fileHandle >> seqGroup; //read from second column if (m->control_pressed) { fileHandle.close(); return 1; } @@ -230,14 +267,14 @@ void TreeMap::makeSim(ListVector* list) { } } /************************************************************/ -int TreeMap::getCopy(TreeMap* copy){ +int TreeMap::getCopy(TreeMap& copy){ try { - namesOfGroups = copy->getNamesOfGroups(); - numGroups = copy->getNumGroups(); - namesOfSeqs = copy->namesOfSeqs; - seqsPerGroup = copy->seqsPerGroup; - treemap = copy->treemap; + namesOfGroups = copy.getNamesOfGroups(); + numGroups = copy.getNumGroups(); + namesOfSeqs = copy.namesOfSeqs; + seqsPerGroup = copy.seqsPerGroup; + treemap = copy.treemap; return 0; } diff --git a/treemap.h b/treemap.h index fc9c369..57822e0 100644 --- a/treemap.h +++ b/treemap.h @@ -10,7 +10,6 @@ */ #include "mothur.h" -#include "groupmap.h" #include "listvector.hpp" /* This class is used by the read.tree command to build the tree container. */ @@ -20,15 +19,14 @@ struct GroupIndex { int vectorIndex; }; -class GroupMap; -class ListVector; - class TreeMap { public: TreeMap() { m = MothurOut::getInstance(); } TreeMap(string); ~TreeMap(); + int readMap(); + int readMap(string); int getNumGroups(); int getNumSeqs(); void setIndex(string, int); //sequencename, index @@ -48,7 +46,7 @@ public: void makeSim(ListVector*); //takes listvector info and fills treemap for use by tree.shared command. vector getNamesSeqs(); vector getNamesSeqs(vector); //get names of seqs belonging to a group or set of groups - int getCopy(TreeMap*); + int getCopy(TreeMap&); vector namesOfSeqs; map seqsPerGroup; //groupname, number of seqs in that group. diff --git a/treereader.cpp b/treereader.cpp new file mode 100644 index 0000000..b385d21 --- /dev/null +++ b/treereader.cpp @@ -0,0 +1,158 @@ +// +// treereader.cpp +// Mothur +// +// Created by Sarah Westcott on 4/11/12. +// Copyright (c) 2012 Schloss Lab. All rights reserved. +// + +#include "treereader.h" +#include "readtree.h" + +/***********************************************************************/ + +TreeReader::TreeReader(string tf) : treefile(tf) { + try { + m = MothurOut::getInstance(); + namefile = ""; + groupfile = ""; + readTrees(); + } + catch(exception& e) { + m->errorOut(e, "TreeReader", "TreeReader"); + exit(1); + } +} +/***********************************************************************/ + +TreeReader::TreeReader(string tf, string gf) : treefile(tf), groupfile(gf) { + try { + m = MothurOut::getInstance(); + namefile = ""; + readTrees(); + } + catch(exception& e) { + m->errorOut(e, "TreeReader", "TreeReader"); + exit(1); + } +} +/***********************************************************************/ +TreeReader::TreeReader(string tf, string gf, string nf) : treefile(tf), groupfile(gf), namefile(nf) { + try { + m = MothurOut::getInstance(); + readTrees(); + } + catch(exception& e) { + m->errorOut(e, "TreeReader", "TreeReader"); + exit(1); + } +} +/***********************************************************************/ +bool TreeReader::readTrees() { + try { + + tmap = new TreeMap(); + if (groupfile != "") { tmap->readMap(groupfile); } + else{ //fake out by putting everyone in one group + Tree* tree = new Tree(treefile); delete tree; //extracts names from tree to make faked out groupmap + for (int i = 0; i < m->Treenames.size(); i++) { tmap->addSeq(m->Treenames[i], "Group1"); } + } + + int numUniquesInName = 0; + if (namefile != "") { numUniquesInName = readNamesFile(); } + + ReadTree* read = new ReadNewickTree(treefile); + int readOk = read->read(tmap); + + if (readOk != 0) { m->mothurOut("Read Terminated."); m->mothurOutEndLine(); delete read; m->control_pressed=true; return 0; } + + read->AssembleTrees(names); + trees = read->getTrees(); + delete read; + + //make sure all files match + //if you provide a namefile we will use the numNames in the namefile as long as the number of unique match the tree names size. + int numNamesInTree; + if (namefile != "") { + if (numUniquesInName == m->Treenames.size()) { numNamesInTree = nameMap.size(); } + else { numNamesInTree = m->Treenames.size(); } + }else { numNamesInTree = m->Treenames.size(); } + + + //output any names that are in group file but not in tree + if (numNamesInTree < tmap->getNumSeqs()) { + for (int i = 0; i < tmap->namesOfSeqs.size(); i++) { + //is that name in the tree? + int count = 0; + for (int j = 0; j < m->Treenames.size(); j++) { + if (tmap->namesOfSeqs[i] == m->Treenames[j]) { break; } //found it + count++; + } + + if (m->control_pressed) { for (int i = 0; i < trees.size(); i++) { delete trees[i]; } return 0; } + + //then you did not find it so report it + if (count == m->Treenames.size()) { + //if it is in your namefile then don't remove + map::iterator it = nameMap.find(tmap->namesOfSeqs[i]); + + if (it == nameMap.end()) { + m->mothurOut(tmap->namesOfSeqs[i] + " is in your groupfile and not in your tree. It will be disregarded."); m->mothurOutEndLine(); + tmap->removeSeq(tmap->namesOfSeqs[i]); + i--; //need this because removeSeq removes name from namesOfSeqs + } + } + } + } + + return true; + } + catch(exception& e) { + m->errorOut(e, "TreeReader", "readTrees"); + exit(1); + } +} +/*****************************************************************/ +int TreeReader::readNamesFile() { + try { + nameMap.clear(); + names.clear(); + int numUniquesInName = 0; + + ifstream in; + m->openInputFile(namefile, in); + + string first, second; + map::iterator itNames; + + while(!in.eof()) { + in >> first >> second; m->gobble(in); + + numUniquesInName++; + + itNames = nameMap.find(first); + if (itNames == nameMap.end()) { + names[first] = second; + + //we need a list of names in your namefile to use above when removing extra seqs above so we don't remove them + vector dupNames; + m->splitAtComma(second, dupNames); + + for (int i = 0; i < dupNames.size(); i++) { + nameMap[dupNames[i]] = first; + if ((groupfile == "") && (i != 0)) { tmap->addSeq(dupNames[i], "Group1"); } + } + }else { m->mothurOut(first + " has already been seen in namefile, disregarding names file."); m->mothurOutEndLine(); in.close(); nameMap.clear(); names.clear(); namefile = ""; return 1; } + } + in.close(); + + return numUniquesInName; + } + catch(exception& e) { + m->errorOut(e, "TreeReader", "readNamesFile"); + exit(1); + } +} +/***********************************************************************/ + + diff --git a/treereader.h b/treereader.h new file mode 100644 index 0000000..fb9c791 --- /dev/null +++ b/treereader.h @@ -0,0 +1,44 @@ +#ifndef Mothur_treereader_h +#define Mothur_treereader_h + +// +// treereader.h +// Mothur +// +// Created by Sarah Westcott on 4/11/12. +// Copyright (c) 2012 Schloss Lab. All rights reserved. +// + +#include "mothurout.h" +#include "tree.h" + +class TreeReader { + +public: + + TreeReader(string tf); + TreeReader(string tf, string gf); + TreeReader(string tf, string gf, string nf); + ~TreeReader() {} + + vector getTrees() { return trees; } + map getNames() { return nameMap; } //dups -> unique + map getNameMap() { return names; } //unique -> dups list + + +private: + MothurOut* m; + vector trees; + TreeMap* tmap; + map nameMap; //dupName -> uniqueName + map names; + + string treefile, groupfile, namefile; + + bool readTrees(); + int readNamesFile(); +}; + + + +#endif diff --git a/unifracunweightedcommand.cpp b/unifracunweightedcommand.cpp index a404f79..de8cb56 100644 --- a/unifracunweightedcommand.cpp +++ b/unifracunweightedcommand.cpp @@ -8,6 +8,7 @@ */ #include "unifracunweightedcommand.h" +#include "treereader.h" //********************************************************************************************************************** vector UnifracUnweightedCommand::setParameters(){ @@ -133,13 +134,7 @@ UnifracUnweightedCommand::UnifracUnweightedCommand(string option) { } } - m->runParse = true; - m->clearGroups(); - m->clearAllGroups(); - m->Treenames.clear(); - m->names.clear(); - - //check for required parameters + //check for required parameters treefile = validParameter.validFile(parameters, "tree", true); if (treefile == "not open") { abort = true; } else if (treefile == "not found") { //if there is a current design file, use it @@ -220,83 +215,26 @@ int UnifracUnweightedCommand::execute() { m->setTreeFile(treefile); - if (groupfile != "") { - //read in group map info. - tmap = new TreeMap(groupfile); - tmap->readMap(); - }else{ //fake out by putting everyone in one group - Tree* tree = new Tree(treefile); delete tree; //extracts names from tree to make faked out groupmap - tmap = new TreeMap(); - - for (int i = 0; i < m->Treenames.size(); i++) { tmap->addSeq(m->Treenames[i], "Group1"); } - } - - if (namefile != "") { readNamesFile(); } - - read = new ReadNewickTree(treefile); - int readOk = read->read(tmap); - - if (readOk != 0) { m->mothurOut("Read Terminated."); m->mothurOutEndLine(); delete tmap; delete read; return 0; } - - read->AssembleTrees(); - T = read->getTrees(); - delete read; - - //make sure all files match - //if you provide a namefile we will use the numNames in the namefile as long as the number of unique match the tree names size. - int numNamesInTree; - if (namefile != "") { - if (numUniquesInName == m->Treenames.size()) { numNamesInTree = nameMap.size(); } - else { numNamesInTree = m->Treenames.size(); } - }else { numNamesInTree = m->Treenames.size(); } - - - //output any names that are in group file but not in tree - if (numNamesInTree < tmap->getNumSeqs()) { - for (int i = 0; i < tmap->namesOfSeqs.size(); i++) { - //is that name in the tree? - int count = 0; - for (int j = 0; j < m->Treenames.size(); j++) { - if (tmap->namesOfSeqs[i] == m->Treenames[j]) { break; } //found it - count++; - } - - if (m->control_pressed) { - delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } - for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); - m->clearGroups(); - return 0; - } - - //then you did not find it so report it - if (count == m->Treenames.size()) { - //if it is in your namefile then don't remove - map::iterator it = nameMap.find(tmap->namesOfSeqs[i]); - - if (it == nameMap.end()) { - m->mothurOut(tmap->namesOfSeqs[i] + " is in your groupfile and not in your tree. It will be disregarded."); m->mothurOutEndLine(); - tmap->removeSeq(tmap->namesOfSeqs[i]); - i--; //need this because removeSeq removes name from namesOfSeqs - } - } - } - } - + TreeReader* reader = new TreeReader(treefile, groupfile, namefile); + T = reader->getTrees(); + tmap = T[0]->getTreeMap(); + map nameMap = reader->getNameMap(); + delete reader; + sumFile = outputDir + m->getSimpleName(treefile) + ".uwsummary"; outputNames.push_back(sumFile); outputTypes["uwsummary"].push_back(sumFile); m->openOutputFile(sumFile, outSum); - util = new SharedUtil(); + SharedUtil util; Groups = m->getGroups(); vector namesGroups = tmap->getNamesOfGroups(); - util->setGroups(Groups, namesGroups, allGroups, numGroups, "unweighted"); //sets the groups the user wants to analyze - util->getCombos(groupComb, Groups, numComp); + util.setGroups(Groups, namesGroups, allGroups, numGroups, "unweighted"); //sets the groups the user wants to analyze + util.getCombos(groupComb, Groups, numComp); m->setGroups(Groups); - delete util; if (numGroups == 1) { numComp++; groupComb.push_back(allGroups); } - unweighted = new Unweighted(tmap, includeRoot); + Unweighted unweighted(includeRoot); int start = time(NULL); @@ -314,7 +252,7 @@ int UnifracUnweightedCommand::execute() { //get pscores for users trees for (int i = 0; i < T.size(); i++) { if (m->control_pressed) { - delete tmap; delete unweighted; + delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } @@ -336,9 +274,9 @@ int UnifracUnweightedCommand::execute() { utreeScores.resize(numComp); UWScoreSig.resize(numComp); - userData = unweighted->getValues(T[i], processors, outputDir); //userData[0] = unweightedscore + userData = unweighted.getValues(T[i], processors, outputDir); //userData[0] = unweightedscore - if (m->control_pressed) { delete tmap; delete unweighted; + if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }return 0; } //output scores for each combination @@ -354,9 +292,9 @@ int UnifracUnweightedCommand::execute() { for (int j = 0; j < iters; j++) { //we need a different getValues because when we swap the labels we only want to swap those in each pairwise comparison - randomData = unweighted->getValues(T[i], "", "", processors, outputDir); + randomData = unweighted.getValues(T[i], "", "", processors, outputDir); - if (m->control_pressed) { delete tmap; delete unweighted; + if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } for(int k = 0; k < numComp; k++) { @@ -394,7 +332,7 @@ int UnifracUnweightedCommand::execute() { } - if (m->control_pressed) { delete tmap; delete unweighted; + if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } //print output files @@ -411,8 +349,7 @@ int UnifracUnweightedCommand::execute() { outSum.close(); - m->clearGroups(); - delete tmap; delete unweighted; + delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } @@ -581,45 +518,6 @@ void UnifracUnweightedCommand::createPhylipFile(int i) { m->errorOut(e, "UnifracUnweightedCommand", "createPhylipFile"); exit(1); } -}/*****************************************************************/ -int UnifracUnweightedCommand::readNamesFile() { - try { - m->names.clear(); - numUniquesInName = 0; - - ifstream in; - m->openInputFile(namefile, in); - - string first, second; - map::iterator itNames; - - while(!in.eof()) { - in >> first >> second; m->gobble(in); - - numUniquesInName++; - - itNames = m->names.find(first); - if (itNames == m->names.end()) { - m->names[first] = second; - - //we need a list of names in your namefile to use above when removing extra seqs above so we don't remove them - vector dupNames; - m->splitAtComma(second, dupNames); - - for (int i = 0; i < dupNames.size(); i++) { - nameMap[dupNames[i]] = dupNames[i]; - if ((groupfile == "") && (i != 0)) { tmap->addSeq(dupNames[i], "Group1"); } - } - }else { m->mothurOut(first + " has already been seen in namefile, disregarding names file."); m->mothurOutEndLine(); in.close(); m->names.clear(); namefile = ""; return 1; } - } - in.close(); - - return 0; - } - catch(exception& e) { - m->errorOut(e, "UnifracUnweightedCommand", "readNamesFile"); - exit(1); - } } /***********************************************************/ diff --git a/unifracunweightedcommand.h b/unifracunweightedcommand.h index cd8d51d..8b24396 100644 --- a/unifracunweightedcommand.h +++ b/unifracunweightedcommand.h @@ -36,12 +36,9 @@ class UnifracUnweightedCommand : public Command { private: - ReadTree* read; - SharedUtil* util; FileOutput* output; vector T; //user trees TreeMap* tmap; - Unweighted* unweighted; string sumFile, allGroups; vector groupComb; // AB. AC, BC... int iters, numGroups, numComp, counter, processors, numUniquesInName; @@ -59,13 +56,10 @@ class UnifracUnweightedCommand : public Command { ofstream outSum, out; ifstream inFile; - map nameMap; void printUWSummaryFile(int); void printUnweightedFile(); void createPhylipFile(int); - int readNamesFile(); - }; diff --git a/unifracweightedcommand.cpp b/unifracweightedcommand.cpp index e596db2..a4a1bc3 100644 --- a/unifracweightedcommand.cpp +++ b/unifracweightedcommand.cpp @@ -10,6 +10,7 @@ #include "unifracweightedcommand.h" #include "consensus.h" #include "subsample.h" +#include "treereader.h" //********************************************************************************************************************** vector UnifracWeightedCommand::setParameters(){ @@ -141,12 +142,6 @@ UnifracWeightedCommand::UnifracWeightedCommand(string option) { } } - m->runParse = true; - m->clearGroups(); - m->clearAllGroups(); - m->Treenames.clear(); - m->names.clear(); - //check for required parameters treefile = validParameter.validFile(parameters, "tree", true); if (treefile == "not open") { treefile = ""; abort = true; } @@ -238,7 +233,13 @@ int UnifracWeightedCommand::execute() { m->setTreeFile(treefile); - readTrees(); if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } return 0; } + TreeReader* reader = new TreeReader(treefile, groupfile, namefile); + T = reader->getTrees(); + tmap = T[0]->getTreeMap(); + map nameMap = reader->getNames(); + delete reader; + + if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } return 0; } sumFile = outputDir + m->getSimpleName(treefile) + ".wsummary"; m->openOutputFile(sumFile, outSum); @@ -253,7 +254,7 @@ int UnifracWeightedCommand::execute() { if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } return 0; } - Weighted weighted(tmap, includeRoot); + Weighted weighted(includeRoot); int start = time(NULL); @@ -333,16 +334,16 @@ int UnifracWeightedCommand::execute() { if (m->control_pressed) { break; } - //copy to preserve old one - would do this in subsample but tree needs it and memory cleanup becomes messy. + //copy to preserve old one - would do this in subsample but memory cleanup becomes messy. TreeMap* newTmap = new TreeMap(); - newTmap->getCopy(tmap); + newTmap->getCopy(*tmap); SubSample sample; Tree* subSampleTree = sample.getSample(T[i], newTmap, nameMap, subsampleSize); //call new weighted function vector iterData; iterData.resize(numComp,0); - Weighted thisWeighted(newTmap, includeRoot); + Weighted thisWeighted(includeRoot); iterData = thisWeighted.getValues(subSampleTree, processors, outputDir); //userData[0] = weightedscore //save data to make ave dist, std dist @@ -350,6 +351,8 @@ int UnifracWeightedCommand::execute() { delete newTmap; delete subSampleTree; + + if((thisIter+1) % 100 == 0){ m->mothurOut(toString(thisIter+1)); m->mothurOutEndLine(); } } if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } @@ -525,8 +528,8 @@ int UnifracWeightedCommand::getConsensusTrees(vector< vector >& dists, i m->runParse = false; //create treemap class from groupmap for tree class to use - TreeMap* newTmap = new TreeMap(); - newTmap->makeSim(m->getGroups()); + TreeMap newTmap; + newTmap.makeSim(m->getGroups()); //clear old tree names if any m->Treenames.clear(); @@ -536,10 +539,10 @@ int UnifracWeightedCommand::getConsensusTrees(vector< vector >& dists, i vector newTrees = buildTrees(dists, treeNum, newTmap); //also creates .all.tre file containing the trees created - if (m->control_pressed) { delete newTmap; return 0; } + if (m->control_pressed) { return 0; } Consensus con; - Tree* conTree = con.getTree(newTrees, newTmap); + Tree* conTree = con.getTree(newTrees); //create a new filename string conFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".weighted.cons.tre"; @@ -549,7 +552,6 @@ int UnifracWeightedCommand::getConsensusTrees(vector< vector >& dists, i if (conTree != NULL) { conTree->print(outTree, "boot"); delete conTree; } outTree.close(); - delete newTmap; return 0; @@ -561,7 +563,7 @@ int UnifracWeightedCommand::getConsensusTrees(vector< vector >& dists, i } /**************************************************************************************************/ -vector UnifracWeightedCommand::buildTrees(vector< vector >& dists, int treeNum, TreeMap* mytmap) { +vector UnifracWeightedCommand::buildTrees(vector< vector >& dists, int treeNum, TreeMap& mytmap) { try { vector trees; @@ -595,8 +597,9 @@ vector UnifracWeightedCommand::buildTrees(vector< vector >& dists } //create tree - Tree* tempTree = new Tree(mytmap, sims); - tempTree->assembleTree(); + Tree* tempTree = new Tree(&mytmap, sims); + map empty; + tempTree->assembleTree(empty); trees.push_back(tempTree); @@ -617,80 +620,6 @@ vector UnifracWeightedCommand::buildTrees(vector< vector >& dists } /**************************************************************************************************/ -int UnifracWeightedCommand::readTrees() { - try { - - if (groupfile != "") { - //read in group map info. - tmap = new TreeMap(groupfile); - tmap->readMap(); - }else{ //fake out by putting everyone in one group - Tree* tree = new Tree(treefile); delete tree; //extracts names from tree to make faked out groupmap - tmap = new TreeMap(); - - for (int i = 0; i < m->Treenames.size(); i++) { tmap->addSeq(m->Treenames[i], "Group1"); } - } - - if (namefile != "") { readNamesFile(); } - - read = new ReadNewickTree(treefile); - int readOk = read->read(tmap); - - if (readOk != 0) { m->mothurOut("Read Terminated."); m->mothurOutEndLine(); delete tmap; delete read; return 0; } - - read->AssembleTrees(); - T = read->getTrees(); - delete read; - - //make sure all files match - //if you provide a namefile we will use the numNames in the namefile as long as the number of unique match the tree names size. - int numNamesInTree; - if (namefile != "") { - if (numUniquesInName == m->Treenames.size()) { numNamesInTree = nameMap.size(); } - else { numNamesInTree = m->Treenames.size(); } - }else { numNamesInTree = m->Treenames.size(); } - - - //output any names that are in group file but not in tree - if (numNamesInTree < tmap->getNumSeqs()) { - for (int i = 0; i < tmap->namesOfSeqs.size(); i++) { - //is that name in the tree? - int count = 0; - for (int j = 0; j < m->Treenames.size(); j++) { - if (tmap->namesOfSeqs[i] == m->Treenames[j]) { break; } //found it - count++; - } - - if (m->control_pressed) { - delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } - for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); - m->clearGroups(); - return 0; - } - - //then you did not find it so report it - if (count == m->Treenames.size()) { - //if it is in your namefile then don't remove - map::iterator it = nameMap.find(tmap->namesOfSeqs[i]); - - if (it == nameMap.end()) { - m->mothurOut(tmap->namesOfSeqs[i] + " is in your groupfile and not in your tree. It will be disregarded."); m->mothurOutEndLine(); - tmap->removeSeq(tmap->namesOfSeqs[i]); - i--; //need this because removeSeq removes name from namesOfSeqs - } - } - } - } - - return 0; - } - catch(exception& e) { - m->errorOut(e, "UnifracWeightedCommand", "readTrees"); - exit(1); - } -} -/**************************************************************************************************/ - int UnifracWeightedCommand::runRandomCalcs(Tree* thisTree, vector usersScores) { try { @@ -839,7 +768,7 @@ int UnifracWeightedCommand::driver(Tree* t, vector< vector > namesOfGrou try { Tree* randT = new Tree(tmap); - Weighted weighted(tmap, includeRoot); + Weighted weighted(includeRoot); for (int h = start; h < (start+num); h++) { @@ -1079,46 +1008,6 @@ void UnifracWeightedCommand::calculateFreqsCumuls() { exit(1); } } -/*****************************************************************/ -int UnifracWeightedCommand::readNamesFile() { - try { - m->names.clear(); - numUniquesInName = 0; - - ifstream in; - m->openInputFile(namefile, in); - - string first, second; - map::iterator itNames; - - while(!in.eof()) { - in >> first >> second; m->gobble(in); - - numUniquesInName++; - - itNames = m->names.find(first); - if (itNames == m->names.end()) { - m->names[first] = second; - - //we need a list of names in your namefile to use above when removing extra seqs above so we don't remove them - vector dupNames; - m->splitAtComma(second, dupNames); - - for (int i = 0; i < dupNames.size(); i++) { - nameMap[dupNames[i]] = first; - if ((groupfile == "") && (i != 0)) { tmap->addSeq(dupNames[i], "Group1"); } - } - }else { m->mothurOut(first + " has already been seen in namefile, disregarding names file."); m->mothurOutEndLine(); in.close(); m->names.clear(); namefile = ""; return 1; } - } - in.close(); - - return 0; - } - catch(exception& e) { - m->errorOut(e, "UnifracWeightedCommand", "readNamesFile"); - exit(1); - } -} /***********************************************************/ diff --git a/unifracweightedcommand.h b/unifracweightedcommand.h index e1ebd16..ddbdc85 100644 --- a/unifracweightedcommand.h +++ b/unifracweightedcommand.h @@ -42,15 +42,12 @@ class UnifracWeightedCommand : public Command { linePair(int i, int j) : start(i), num(j) {} }; vector lines; - - ReadTree* read; - SharedUtil* util; + TreeMap* tmap; FileOutput* output; vector T; //user trees vector utreeScores; //user tree unweighted scores vector WScoreSig; //tree weighted score signifigance when compared to random trees - percentage of random trees with that score or lower. vector groupComb; // AB. AC, BC... - TreeMap* tmap; string sumFile, outputDir; int iters, numGroups, numComp, counter; vector< vector > rScores; //vector each group comb has an entry @@ -74,10 +71,8 @@ class UnifracWeightedCommand : public Command { void calculateFreqsCumuls(); int createProcesses(Tree*, vector< vector >, vector< vector >&); int driver(Tree*, vector< vector >, int, int, vector< vector >&); - int readNamesFile(); int runRandomCalcs(Tree*, vector); - int readTrees(); - vector buildTrees(vector< vector >&, int, TreeMap*); + vector buildTrees(vector< vector >&, int, TreeMap&); int getConsensusTrees(vector< vector >&, int); int getAverageSTDMatrices(vector< vector >&, int); diff --git a/unweighted.cpp b/unweighted.cpp index d4fd327..864a9f8 100644 --- a/unweighted.cpp +++ b/unweighted.cpp @@ -15,7 +15,9 @@ EstOutput Unweighted::getValues(Tree* t, int p, string o) { try { processors = p; outputDir = o; - + + TreeMap* tmap = t->getTreeMap(); + //if the users enters no groups then give them the score of all groups int numGroups = m->getNumGroups(); @@ -50,7 +52,7 @@ EstOutput Unweighted::getValues(Tree* t, int p, string o) { #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) if(processors == 1){ - data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size()); + data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), tmap); }else{ int numPairs = namesOfGroupCombos.size(); @@ -65,11 +67,11 @@ EstOutput Unweighted::getValues(Tree* t, int p, string o) { lines.push_back(linePair(startPos, numPairsPerProcessor)); } - data = createProcesses(t, namesOfGroupCombos); + data = createProcesses(t, namesOfGroupCombos, tmap); lines.clear(); } #else - data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size()); + data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), tmap); #endif return data; @@ -81,7 +83,7 @@ EstOutput Unweighted::getValues(Tree* t, int p, string o) { } /**************************************************************************************************/ -EstOutput Unweighted::createProcesses(Tree* t, vector< vector > namesOfGroupCombos) { +EstOutput Unweighted::createProcesses(Tree* t, vector< vector > namesOfGroupCombos, TreeMap* tmap) { try { #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) int process = 1; @@ -98,7 +100,7 @@ EstOutput Unweighted::createProcesses(Tree* t, vector< vector > namesOfG process++; }else if (pid == 0){ EstOutput myresults; - myresults = driver(t, namesOfGroupCombos, lines[process].start, lines[process].num); + myresults = driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, tmap); if (m->control_pressed) { exit(0); } @@ -120,7 +122,7 @@ EstOutput Unweighted::createProcesses(Tree* t, vector< vector > namesOfG } } - results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num); + results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, tmap); //force parent to wait until all the processes are done for (int i=0;i<(processors-1);i++) { @@ -165,7 +167,7 @@ EstOutput Unweighted::createProcesses(Tree* t, vector< vector > namesOfG } } /**************************************************************************************************/ -EstOutput Unweighted::driver(Tree* t, vector< vector > namesOfGroupCombos, int start, int num) { +EstOutput Unweighted::driver(Tree* t, vector< vector > namesOfGroupCombos, int start, int num, TreeMap* tmap) { try { @@ -259,6 +261,8 @@ EstOutput Unweighted::getValues(Tree* t, string groupA, string groupB, int p, st processors = p; outputDir = o; + TreeMap* tmap = t->getTreeMap(); + //if the users enters no groups then give them the score of all groups int numGroups = m->getNumGroups(); @@ -293,7 +297,7 @@ EstOutput Unweighted::getValues(Tree* t, string groupA, string groupB, int p, st #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) if(processors == 1){ - data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), true); + data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), true, tmap); }else{ int numPairs = namesOfGroupCombos.size(); @@ -307,12 +311,12 @@ EstOutput Unweighted::getValues(Tree* t, string groupA, string groupB, int p, st lines.push_back(linePair(startPos, numPairsPerProcessor)); } - data = createProcesses(t, namesOfGroupCombos, true); + data = createProcesses(t, namesOfGroupCombos, true, tmap); lines.clear(); } #else - data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), true); + data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), true, tmap); #endif return data; @@ -324,7 +328,7 @@ EstOutput Unweighted::getValues(Tree* t, string groupA, string groupB, int p, st } /**************************************************************************************************/ -EstOutput Unweighted::createProcesses(Tree* t, vector< vector > namesOfGroupCombos, bool usingGroups) { +EstOutput Unweighted::createProcesses(Tree* t, vector< vector > namesOfGroupCombos, bool usingGroups, TreeMap* tmap) { try { #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) int process = 1; @@ -341,7 +345,7 @@ EstOutput Unweighted::createProcesses(Tree* t, vector< vector > namesOfG process++; }else if (pid == 0){ EstOutput myresults; - myresults = driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, usingGroups); + myresults = driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, usingGroups, tmap); if (m->control_pressed) { exit(0); } @@ -361,7 +365,7 @@ EstOutput Unweighted::createProcesses(Tree* t, vector< vector > namesOfG } } - results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, usingGroups); + results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, usingGroups, tmap); //force parent to wait until all the processes are done for (int i=0;i<(processors-1);i++) { @@ -405,7 +409,7 @@ EstOutput Unweighted::createProcesses(Tree* t, vector< vector > namesOfG } } /**************************************************************************************************/ -EstOutput Unweighted::driver(Tree* t, vector< vector > namesOfGroupCombos, int start, int num, bool usingGroups) { +EstOutput Unweighted::driver(Tree* t, vector< vector > namesOfGroupCombos, int start, int num, bool usingGroups, TreeMap* tmap) { try { EstOutput results; results.resize(num); diff --git a/unweighted.h b/unweighted.h index e751d2e..c6c13bb 100644 --- a/unweighted.h +++ b/unweighted.h @@ -19,7 +19,7 @@ class Unweighted : public TreeCalculator { public: - Unweighted(TreeMap* t, bool r) : tmap(t), includeRoot(r) {}; + Unweighted(bool r) : includeRoot(r) {}; ~Unweighted() {}; EstOutput getValues(Tree*, int, string); EstOutput getValues(Tree*, string, string, int, string); @@ -33,16 +33,15 @@ class Unweighted : public TreeCalculator { vector lines; EstOutput data; - TreeMap* tmap; int processors; string outputDir; map< vector, set > rootForGrouping; //maps a grouping combo to the roots for that combo bool includeRoot; - EstOutput driver(Tree*, vector< vector >, int, int); - EstOutput createProcesses(Tree*, vector< vector >); - EstOutput driver(Tree*, vector< vector >, int, int, bool); - EstOutput createProcesses(Tree*, vector< vector >, bool); + EstOutput driver(Tree*, vector< vector >, int, int, TreeMap*); + EstOutput createProcesses(Tree*, vector< vector >, TreeMap*); + EstOutput driver(Tree*, vector< vector >, int, int, bool, TreeMap*); + EstOutput createProcesses(Tree*, vector< vector >, bool, TreeMap*); int getRoot(Tree*, int, vector); }; diff --git a/weighted.cpp b/weighted.cpp index b0a642b..85eed52 100644 --- a/weighted.cpp +++ b/weighted.cpp @@ -18,6 +18,8 @@ EstOutput Weighted::getValues(Tree* t, int p, string o) { vector D; processors = p; outputDir = o; + + TreeMap* tmap = t->getTreeMap(); numGroups = m->getNumGroups(); @@ -36,7 +38,7 @@ EstOutput Weighted::getValues(Tree* t, int p, string o) { #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) if(processors == 1){ - data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size()); + data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), tmap); }else{ int numPairs = namesOfGroupCombos.size(); @@ -50,12 +52,12 @@ EstOutput Weighted::getValues(Tree* t, int p, string o) { lines.push_back(linePair(startPos, numPairsPerProcessor)); } - data = createProcesses(t, namesOfGroupCombos); + data = createProcesses(t, namesOfGroupCombos, tmap); lines.clear(); } #else - data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size()); + data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), tmap); #endif return data; @@ -67,7 +69,7 @@ EstOutput Weighted::getValues(Tree* t, int p, string o) { } /**************************************************************************************************/ -EstOutput Weighted::createProcesses(Tree* t, vector< vector > namesOfGroupCombos) { +EstOutput Weighted::createProcesses(Tree* t, vector< vector > namesOfGroupCombos, TreeMap* tmap) { try { #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) int process = 1; @@ -85,7 +87,7 @@ EstOutput Weighted::createProcesses(Tree* t, vector< vector > namesOfGro }else if (pid == 0){ EstOutput Myresults; - Myresults = driver(t, namesOfGroupCombos, lines[process].start, lines[process].num); + Myresults = driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, tmap); //m->mothurOut("Merging results."); m->mothurOutEndLine(); @@ -108,7 +110,7 @@ EstOutput Weighted::createProcesses(Tree* t, vector< vector > namesOfGro } } - results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num); + results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, tmap); //force parent to wait until all the processes are done for (int i=0;i<(processors-1);i++) { @@ -153,7 +155,7 @@ EstOutput Weighted::createProcesses(Tree* t, vector< vector > namesOfGro } } /**************************************************************************************************/ -EstOutput Weighted::driver(Tree* t, vector< vector > namesOfGroupCombos, int start, int num) { +EstOutput Weighted::driver(Tree* t, vector< vector > namesOfGroupCombos, int start, int num, TreeMap* tmap) { try { EstOutput results; vector D; @@ -267,6 +269,8 @@ EstOutput Weighted::getValues(Tree* t, string groupA, string groupB) { try { data.clear(); //clear out old values + + TreeMap* tmap = t->getTreeMap(); if (m->control_pressed) { return data; } diff --git a/weighted.h b/weighted.h index c23a115..180409c 100644 --- a/weighted.h +++ b/weighted.h @@ -19,7 +19,7 @@ class Weighted : public TreeCalculator { public: - Weighted(TreeMap* t, bool r) : tmap(t), includeRoot(r) {}; + Weighted( bool r) : includeRoot(r) {}; ~Weighted() {}; EstOutput getValues(Tree*, string, string); @@ -34,7 +34,6 @@ class Weighted : public TreeCalculator { vector lines; EstOutput data; - TreeMap* tmap; map::iterator it; map WScore; //a score for each group combination i.e. AB, AC, BC. int processors; @@ -42,8 +41,8 @@ class Weighted : public TreeCalculator { map< vector, set > rootForGrouping; //maps a grouping combo to the root for that combo bool includeRoot; - EstOutput driver(Tree*, vector< vector >, int, int); - EstOutput createProcesses(Tree*, vector< vector >); + EstOutput driver(Tree*, vector< vector >, int, int, TreeMap*); + EstOutput createProcesses(Tree*, vector< vector >, TreeMap*); double getLengthToRoot(Tree*, int, string, string); }; -- 2.39.2