A7BF2232145879B2000AD524 /* chimeraperseuscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7BF2231145879B2000AD524 /* chimeraperseuscommand.cpp */; };
A7C3DC0B14FE457500FE1924 /* cooccurrencecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7C3DC0914FE457500FE1924 /* cooccurrencecommand.cpp */; };
A7C3DC0F14FE469500FE1924 /* trialSwap2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7C3DC0D14FE469500FE1924 /* trialSwap2.cpp */; };
+ A7D755DA1535F679009BF21A /* treereader.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7D755D91535F679009BF21A /* treereader.cpp */; };
A7E9B88112D37EC400DA6239 /* ace.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B64F12D37EC300DA6239 /* ace.cpp */; };
A7E9B88212D37EC400DA6239 /* aligncommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B65112D37EC300DA6239 /* aligncommand.cpp */; };
A7E9B88312D37EC400DA6239 /* alignment.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B65312D37EC300DA6239 /* alignment.cpp */; };
A7E9B91312D37EC400DA6239 /* parsimony.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B78312D37EC400DA6239 /* parsimony.cpp */; };
A7E9B91412D37EC400DA6239 /* parsimonycommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B78512D37EC400DA6239 /* parsimonycommand.cpp */; };
A7E9B91512D37EC400DA6239 /* pcoacommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B78712D37EC400DA6239 /* pcoacommand.cpp */; };
- A7E9B91612D37EC400DA6239 /* phylodiversity.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B78912D37EC400DA6239 /* phylodiversity.cpp */; };
A7E9B91712D37EC400DA6239 /* phylodiversitycommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B78B12D37EC400DA6239 /* phylodiversitycommand.cpp */; };
A7E9B91812D37EC400DA6239 /* phylosummary.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B78D12D37EC400DA6239 /* phylosummary.cpp */; };
A7E9B91912D37EC400DA6239 /* phylotree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B78F12D37EC400DA6239 /* phylotree.cpp */; };
A7C3DC0A14FE457500FE1924 /* cooccurrencecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = cooccurrencecommand.h; sourceTree = "<group>"; };
A7C3DC0D14FE469500FE1924 /* trialSwap2.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = trialSwap2.cpp; sourceTree = "<group>"; };
A7C3DC0E14FE469500FE1924 /* trialswap2.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = trialswap2.h; sourceTree = "<group>"; };
+ A7D755D71535F665009BF21A /* treereader.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = treereader.h; sourceTree = "<group>"; };
+ A7D755D91535F679009BF21A /* treereader.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = treereader.cpp; sourceTree = "<group>"; };
A7DAAFA3133A254E003956EB /* commandparameter.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = commandparameter.h; sourceTree = "<group>"; };
A7E9B64F12D37EC300DA6239 /* ace.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ace.cpp; sourceTree = "<group>"; };
A7E9B65012D37EC300DA6239 /* ace.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ace.h; sourceTree = "<group>"; };
A7E9B78612D37EC400DA6239 /* parsimonycommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = parsimonycommand.h; sourceTree = "<group>"; };
A7E9B78712D37EC400DA6239 /* pcoacommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = pcoacommand.cpp; sourceTree = "<group>"; };
A7E9B78812D37EC400DA6239 /* pcoacommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = pcoacommand.h; sourceTree = "<group>"; };
- A7E9B78912D37EC400DA6239 /* phylodiversity.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = phylodiversity.cpp; sourceTree = "<group>"; };
- A7E9B78A12D37EC400DA6239 /* phylodiversity.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = phylodiversity.h; sourceTree = "<group>"; };
A7E9B78B12D37EC400DA6239 /* phylodiversitycommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = phylodiversitycommand.cpp; sourceTree = "<group>"; };
A7E9B78C12D37EC400DA6239 /* phylodiversitycommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = phylodiversitycommand.h; sourceTree = "<group>"; };
A7E9B78D12D37EC400DA6239 /* phylosummary.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = phylosummary.cpp; sourceTree = "<group>"; };
A7E9B68F12D37EC400DA6239 /* classify.h */,
A7E9B73812D37EC400DA6239 /* knn.h */,
A7E9B73712D37EC400DA6239 /* knn.cpp */,
- A7E9B78912D37EC400DA6239 /* phylodiversity.cpp */,
- A7E9B78A12D37EC400DA6239 /* phylodiversity.h */,
A7E9B78D12D37EC400DA6239 /* phylosummary.cpp */,
A7E9B78E12D37EC400DA6239 /* phylosummary.h */,
A7E9B78F12D37EC400DA6239 /* phylotree.cpp */,
A713EBAB12DC7613000092AC /* readphylipvector.cpp */,
A7E9B84312D37EC400DA6239 /* splitmatrix.cpp */,
A7E9B84412D37EC400DA6239 /* splitmatrix.h */,
+ A7D755D71535F665009BF21A /* treereader.h */,
+ A7D755D91535F679009BF21A /* treereader.cpp */,
);
name = read;
sourceTree = "<group>";
A7E9B91312D37EC400DA6239 /* parsimony.cpp in Sources */,
A7E9B91412D37EC400DA6239 /* parsimonycommand.cpp in Sources */,
A7E9B91512D37EC400DA6239 /* pcoacommand.cpp in Sources */,
- A7E9B91612D37EC400DA6239 /* phylodiversity.cpp in Sources */,
A7E9B91712D37EC400DA6239 /* phylodiversitycommand.cpp in Sources */,
A7E9B91812D37EC400DA6239 /* phylosummary.cpp in Sources */,
A7E9B91912D37EC400DA6239 /* phylotree.cpp in Sources */,
A76CDD821510F143004C8458 /* prcseqscommand.cpp in Sources */,
A77EBD2F1523709100ED407C /* createdatabasecommand.cpp in Sources */,
A7876A26152A017C00A0AE86 /* subsample.cpp in Sources */,
+ A7D755DA1535F679009BF21A /* treereader.cpp in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
#include "classifytreecommand.h"
#include "phylotree.h"
+#include "treereader.h"
//**********************************************************************************************************************
vector<string> ClassifyTreeCommand::setParameters(){
if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
}
- m->runParse = true;
- m->clearGroups();
- m->clearAllGroups();
- m->Treenames.clear();
- m->names.clear();
-
vector<string> tempOutNames;
outputTypes["tree"] = tempOutNames;
outputTypes["summary"] = tempOutNames;
// reading tree info //
/***************************************************/
m->setTreeFile(treefile);
- if (groupfile != "") {
- //read in group map info.
- tmap = new TreeMap(groupfile);
- tmap->readMap();
- }else{ //fake out by putting everyone in one group
- Tree* tree = new Tree(treefile); delete tree; //extracts names from tree to make faked out groupmap
- tmap = new TreeMap();
-
- for (int i = 0; i < m->Treenames.size(); i++) { tmap->addSeq(m->Treenames[i], "Group1"); }
- }
-
- if (namefile != "") { readNamesFile(); }
-
- read = new ReadNewickTree(treefile);
- int readOk = read->read(tmap);
-
- if (readOk != 0) { m->mothurOut("Read Terminated."); m->mothurOutEndLine(); delete tmap; delete read; return 0; }
-
- read->AssembleTrees();
- vector<Tree*> T = read->getTrees();
- Tree* outputTree = T[0];
- delete read;
-
- //make sure all files match
- //if you provide a namefile we will use the numNames in the namefile as long as the number of unique match the tree names size.
- int numNamesInTree;
- if (namefile != "") {
- if (numUniquesInName == m->Treenames.size()) { numNamesInTree = nameMap.size(); }
- else { numNamesInTree = m->Treenames.size(); }
- }else { numNamesInTree = m->Treenames.size(); }
-
-
- //output any names that are in group file but not in tree
- if (numNamesInTree < tmap->getNumSeqs()) {
- for (int i = 0; i < tmap->namesOfSeqs.size(); i++) {
- //is that name in the tree?
- int count = 0;
- for (int j = 0; j < m->Treenames.size(); j++) {
- if (tmap->namesOfSeqs[i] == m->Treenames[j]) { break; } //found it
- count++;
- }
-
- if (m->control_pressed) {
- delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }
- for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear();
- m->clearGroups();
- return 0;
- }
-
- //then you did not find it so report it
- if (count == m->Treenames.size()) {
- //if it is in your namefile then don't remove
- map<string, string>::iterator it = nameMap.find(tmap->namesOfSeqs[i]);
-
- if (it == nameMap.end()) {
- m->mothurOut(tmap->namesOfSeqs[i] + " is in your groupfile and not in your tree. It will be disregarded."); m->mothurOutEndLine();
- tmap->removeSeq(tmap->namesOfSeqs[i]);
- i--; //need this because removeSeq removes name from namesOfSeqs
- }
- }
- }
- }
+
+ TreeReader* reader = new TreeReader(treefile, groupfile, namefile);
+ vector<Tree*> T = reader->getTrees();
+ TreeMap* tmap = T[0]->getTreeMap();
+ Tree* outputTree = T[0];
+ delete reader;
+
+ if (namefile != "") { readNamesFile(); }
- if (m->control_pressed) { delete outputTree; delete tmap; return 0; }
+ if (m->control_pressed) { delete tmap; delete outputTree; return 0; }
readTaxonomyFile();
-
/***************************************************/
// get concensus taxonomies //
/***************************************************/
int lc = T->tree[i].getLChild();
int rc = T->tree[i].getRChild();
+ TreeMap* tmap = T->getTreeMap();
if (lc == -1) { //you are a leaf your only descendant is yourself
string group = tmap->getGroup(T->tree[i].getName());
void help() { m->mothurOut(getHelpString()); }
private:
- ReadTree* read;
- TreeMap* tmap;
string treefile, taxonomyfile, groupfile, namefile, outputDir;
bool abort;
vector<string> outputNames;
#include "consensus.h"
//**********************************************************************************************************************
-Tree* Consensus::getTree(vector<Tree*>& t, TreeMap* tmap){
+Tree* Consensus::getTree(vector<Tree*>& t){
try {
numNodes = t[0]->getNumNodes();
numLeaves = t[0]->getNumLeaves();
if (m->control_pressed) { return 0; }
- consensusTree = new Tree(tmap);
+ consensusTree = new Tree(t[0]->getTreeMap());
it2 = nodePairs.find(treeSet);
buildConsensusTree(treeSet);
- if (m->control_pressed) { delete consensusTree; return 0; }
+ if (m->control_pressed) { delete consensusTree; return 0; }
- consensusTree->assembleTree();
+ map<string, string> empty;
+ consensusTree->assembleTree(empty);
- if (m->control_pressed) { delete consensusTree; return 0; }
+ if (m->control_pressed) { delete consensusTree; return 0; }
return consensusTree;
Consensus() { m = MothurOut::getInstance(); }
~Consensus() {}
- Tree* getTree(vector<Tree*>&, TreeMap*);
+ Tree* getTree(vector<Tree*>&);
private:
MothurOut* m;
*/
#include "deuniquetreecommand.h"
+#include "treereader.h"
//**********************************************************************************************************************
vector<string> DeuniqueTreeCommand::setParameters(){
}
}
- m->runParse = true;
- m->clearGroups();
- m->clearAllGroups();
- m->Treenames.clear();
- m->names.clear();
-
- //check for required parameters
+ //check for required parameters
treefile = validParameter.validFile(parameters, "tree", true);
if (treefile == "not open") { abort = true; }
else if (treefile == "not found") { //if there is a current design file, use it
m->setTreeFile(treefile);
- //extracts names from tree to make faked out groupmap
- Tree* tree = new Tree(treefile); delete tree;
- tmap = new TreeMap();
- for (int i = 0; i < m->Treenames.size(); i++) { tmap->addSeq(m->Treenames[i], "Group1"); }
-
- if (m->control_pressed) { delete tmap; return 0; }
-
- readNamesFile();
-
- if (m->control_pressed) { delete tmap; return 0; }
-
- ReadTree* read = new ReadNewickTree(treefile);
- int readOk = read->read(tmap);
- if (readOk != 0) { m->mothurOut("Read Terminated."); m->mothurOutEndLine(); delete tmap; delete read; return 0; }
-
- read->AssembleTrees();
- vector<Tree*> T = read->getTrees();
- delete read;
-
- //make sure all files match
- //if you provide a namefile we will use the numNames in the namefile as long as the number of unique match the tree names size.
- int numNamesInTree;
- if (numUniquesInName == m->Treenames.size()) { numNamesInTree = nameMap.size(); }
- else { numNamesInTree = m->Treenames.size(); }
-
- //output any names that are in group file but not in tree
- if (numNamesInTree < tmap->getNumSeqs()) {
- for (int i = 0; i < tmap->namesOfSeqs.size(); i++) {
- //is that name in the tree?
- int count = 0;
- for (int j = 0; j < m->Treenames.size(); j++) {
- if (tmap->namesOfSeqs[i] == m->Treenames[j]) { break; } //found it
- count++;
- }
-
- if (m->control_pressed) {
- delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }
- for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear();
- m->clearGroups();
- return 0;
- }
-
- //then you did not find it so report it
- if (count == m->Treenames.size()) {
- //if it is in your namefile then don't remove
- map<string, string>::iterator it = nameMap.find(tmap->namesOfSeqs[i]);
-
- if (it == nameMap.end()) {
- m->mothurOut(tmap->namesOfSeqs[i] + " is in your groupfile and not in your tree. It will be disregarded."); m->mothurOutEndLine();
- tmap->removeSeq(tmap->namesOfSeqs[i]);
- i--; //need this because removeSeq removes name from namesOfSeqs
- }
- }
- }
- }
-
+ TreeReader* reader = new TreeReader(treefile, "", namefile);
+ vector<Tree*> T = reader->getTrees();
+ map<string, string> nameMap = reader->getNameMap();
+ delete reader;
//print new Tree
string outputFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + "deunique.tre";
outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile);
ofstream out;
m->openOutputFile(outputFile, out);
- T[0]->print(out, "deunique");
+ T[0]->print(out, nameMap);
out.close();
- delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }
+ delete (T[0]->getTreeMap());
+ for (int i = 0; i < T.size(); i++) { delete T[i]; }
//set phylip file as new current phylipfile
string current = "";
exit(1);
}
}
-/*****************************************************************/
-int DeuniqueTreeCommand::readNamesFile() {
- try {
- m->names.clear();
- numUniquesInName = 0;
-
- ifstream in;
- m->openInputFile(namefile, in);
-
- string first, second;
- map<string, string>::iterator itNames;
-
- while(!in.eof()) {
- in >> first >> second; m->gobble(in);
-
- numUniquesInName++;
-
- itNames = m->names.find(first);
- if (itNames == m->names.end()) {
- m->names[first] = second;
-
- //we need a list of names in your namefile to use above when removing extra seqs above so we don't remove them
- vector<string> dupNames;
- m->splitAtComma(second, dupNames);
-
- for (int i = 0; i < dupNames.size(); i++) {
- nameMap[dupNames[i]] = dupNames[i];
- if (i != 0) { tmap->addSeq(dupNames[i], "Group1"); }
- }
- }else { m->mothurOut(first + " has already been seen in namefile, aborting."); m->mothurOutEndLine(); in.close(); m->names.clear(); m->control_pressed = true; return 1; }
- }
- in.close();
-
- return 0;
- }
- catch(exception& e) {
- m->errorOut(e, "DeuniqueTreeCommand", "readNamesFile");
- exit(1);
- }
-}
/***********************************************************/
#include "command.hpp"
-#include "treemap.h"
#include "sharedutilities.h"
#include "readtree.h"
private:
- TreeMap* tmap;
int numUniquesInName;
bool abort;
mout->clearGroups();
mout->clearAllGroups();
mout->Treenames.clear();
- mout->names.clear();
mout->saveNextLabel = "";
mout->printedHeaders = false;
mout->commandInputsConvertError = false;
mout->clearGroups();
mout->clearAllGroups();
mout->Treenames.clear();
- mout->names.clear();
mout->saveNextLabel = "";
mout->printedHeaders = false;
mout->commandInputsConvertError = false;
mout->clearGroups();
mout->clearAllGroups();
mout->Treenames.clear();
- mout->names.clear();
mout->saveNextLabel = "";
mout->printedHeaders = false;
mout->commandInputsConvertError = false;
m->clearGroups();
m->clearAllGroups();
m->Treenames.clear();
- m->names.clear();
vector<string> tempOutNames;
outputTypes["tree"] = tempOutNames;
designMap->readDesignMap();
//fill Groups - checks for "all" and for any typo groups
- SharedUtil* util = new SharedUtil();
+ SharedUtil util;
vector<string> nameGroups = designMap->getNamesOfGroups();
- util->setGroups(Groups, nameGroups);
+ util.setGroups(Groups, nameGroups);
designMap->setNamesOfGroups(nameGroups);
- delete util;
//loop through the Groups and fill Globaldata's Groups with the design file info
vector<string> namesSeqs = designMap->getNamesSeqs(Groups);
else { for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; } }
for (int i = 0; i < T.size(); i++) { delete T[i]; } delete treeMap; return 0;
}
-
- T[0]->assembleTree();
+
+ map<string, string> nameMap;
+ T[0]->assembleTree(nameMap);
/***************************************************/
// create ouptut tree - respecting pickedGroups //
Tree* outputTree = new Tree(m->getNumGroups(), treeMap);
outputTree->getSubTree(T[0], m->getGroups());
- outputTree->assembleTree();
+ outputTree->assembleTree(nameMap);
//no longer need original tree, we have output tree to use and label
for (int i = 0; i < T.size(); i++) { delete T[i]; }
int numLines = 0;
if (ableToOpen == 0) { //you opened it
- while(char c = input.get()){
+ while(!input.eof()){
+ char c = input.get();
if(input.eof()) { break; }
else { output << c; if (c == '\n') {numLines++;} }
}
vector<string> getAllGroups() { sort(namesOfGroups.begin(), namesOfGroups.end()); return namesOfGroups; }
vector<string> Treenames;
- map<string, string> names;
+ //map<string, string> names;
vector<string> binLabelsInFile;
vector<string> currentBinLabels;
string saveNextLabel, argv, sharedHeaderMode;
try {
processors = p;
outputDir = o;
+ TreeMap* tmap = t->getTreeMap();
//if the users enters no groups then give them the score of all groups
vector<string> mGroups = m->getGroups();
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
if(processors == 1){
- data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size());
+ data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), tmap);
}else{
lines.clear();
int numPairs = namesOfGroupCombos.size();
lines.push_back(linePair(startPos, numPairsPerProcessor));
}
- data = createProcesses(t, namesOfGroupCombos);
+ data = createProcesses(t, namesOfGroupCombos, tmap);
}
#else
data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size());
}
/**************************************************************************************************/
-EstOutput Parsimony::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos) {
+EstOutput Parsimony::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, TreeMap* tmap) {
try {
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
int process = 1;
process++;
}else if (pid == 0){
EstOutput myresults;
- myresults = driver(t, namesOfGroupCombos, lines[process].start, lines[process].num);
+ myresults = driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, tmap);
if (m->control_pressed) { exit(0); }
}
}
- results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num);
+ results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, tmap);
//force parent to wait until all the processes are done
for (int i=0;i<processIDS.size();i++) {
}
}
/**************************************************************************************************/
-EstOutput Parsimony::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num) {
+EstOutput Parsimony::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num, TreeMap* tmap) {
try {
EstOutput results; results.resize(num);
class Parsimony : public TreeCalculator {
public:
- Parsimony(TreeMap* t) : tmap(t) {};
+ Parsimony() {};
~Parsimony() {};
EstOutput getValues(Tree*, int, string);
- //EstOutput getValues(Tree*, string, string) { return data; }
private:
struct linePair {
vector<linePair> lines;
EstOutput data;
- TreeMap* tmap;
int processors;
string outputDir;
- EstOutput driver(Tree*, vector< vector<string> >, int, int);
- EstOutput createProcesses(Tree*, vector< vector<string> >);
+ EstOutput driver(Tree*, vector< vector<string> >, int, int, TreeMap*);
+ EstOutput createProcesses(Tree*, vector< vector<string> >, TreeMap*);
};
/***********************************************************************/
*/
#include "parsimonycommand.h"
+#include "treereader.h"
//**********************************************************************************************************************
vector<string> ParsimonyCommand::setParameters(){
}
}
- m->runParse = true;
- m->clearGroups();
- m->clearAllGroups();
- m->Treenames.clear();
- m->names.clear();
-
outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
randomtree = validParameter.validFile(parameters, "random", false); if (randomtree == "not found") { randomtree = ""; }
m->setTreeFile(treefile);
- if (groupfile != "") {
- //read in group map info.
- tmap = new TreeMap(groupfile);
- tmap->readMap();
- }else{ //fake out by putting everyone in one group
- Tree* tree = new Tree(treefile); delete tree; //extracts names from tree to make faked out groupmap
- tmap = new TreeMap();
-
- for (int i = 0; i < m->Treenames.size(); i++) { tmap->addSeq(m->Treenames[i], "Group1"); }
- }
-
- if (namefile != "") { readNamesFile(); }
-
- read = new ReadNewickTree(treefile);
- int readOk = read->read(tmap);
-
- if (readOk != 0) { m->mothurOut("Read Terminated."); m->mothurOutEndLine(); delete tmap; delete read; return 0; }
-
- read->AssembleTrees();
- T = read->getTrees();
- delete read;
-
- //make sure all files match
- //if you provide a namefile we will use the numNames in the namefile as long as the number of unique match the tree names size.
- int numNamesInTree;
- if (namefile != "") {
- if (numUniquesInName == m->Treenames.size()) { numNamesInTree = nameMap.size(); }
- else { numNamesInTree = m->Treenames.size(); }
- }else { numNamesInTree = m->Treenames.size(); }
-
-
- //output any names that are in group file but not in tree
- if (numNamesInTree < tmap->getNumSeqs()) {
- for (int i = 0; i < tmap->namesOfSeqs.size(); i++) {
- //is that name in the tree?
- int count = 0;
- for (int j = 0; j < m->Treenames.size(); j++) {
- if (tmap->namesOfSeqs[i] == m->Treenames[j]) { break; } //found it
- count++;
- }
-
- if (m->control_pressed) {
- delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }
- for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear();
- m->clearGroups();
- return 0;
- }
-
- //then you did not find it so report it
- if (count == m->Treenames.size()) {
- //if it is in your namefile then don't remove
- map<string, string>::iterator it = nameMap.find(tmap->namesOfSeqs[i]);
-
- if (it == nameMap.end()) {
- m->mothurOut(tmap->namesOfSeqs[i] + " is in your groupfile and not in your tree. It will be disregarded."); m->mothurOutEndLine();
- tmap->removeSeq(tmap->namesOfSeqs[i]);
- i--; //need this because removeSeq removes name from namesOfSeqs
- }
- }
- }
- }
-
+ TreeReader* reader = new TreeReader(treefile, groupfile, namefile);
+ T = reader->getTrees();
+ tmap = T[0]->getTreeMap();
+ delete reader;
+
if(outputDir == "") { outputDir += m->hasPath(treefile); }
output = new ColumnFile(outputDir + m->getSimpleName(treefile) + ".parsimony", itersString);
outputNames.push_back(outputDir + m->getSimpleName(treefile) + ".parsimony");
}
//set users groups to analyze
- util = new SharedUtil();
+ SharedUtil util;
vector<string> mGroups = m->getGroups();
vector<string> tGroups = tmap->getNamesOfGroups();
- util->setGroups(mGroups, tGroups, allGroups, numGroups, "parsimony"); //sets the groups the user wants to analyze
- util->getCombos(groupComb, mGroups, numComp);
+ util.setGroups(mGroups, tGroups, allGroups, numGroups, "parsimony"); //sets the groups the user wants to analyze
+ util.getCombos(groupComb, mGroups, numComp);
m->setGroups(mGroups);
- delete util;
if (numGroups == 1) { numComp++; groupComb.push_back(allGroups); }
- pars = new Parsimony(tmap);
+ Parsimony pars;
counter = 0;
Progress* reading;
reading = new Progress("Comparing to random:", iters);
if (m->control_pressed) {
- delete reading; delete pars; delete output;
+ delete reading; delete output;
delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }
if (randomtree == "") { outSum.close(); }
for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear();
if (randomtree == "") {
//get pscores for users trees
for (int i = 0; i < T.size(); i++) {
- userData = pars->getValues(T[i], processors, outputDir); //data = AB, AC, BC, ABC.
+ userData = pars.getValues(T[i], processors, outputDir); //data = AB, AC, BC, ABC.
if (m->control_pressed) {
- delete reading; delete pars; delete output;
+ delete reading; delete output;
delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }
if (randomtree == "") { outSum.close(); }
for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear();
randT->assembleRandomTree();
//get pscore of random tree
- randomData = pars->getValues(randT, processors, outputDir);
+ randomData = pars.getValues(randT, processors, outputDir);
if (m->control_pressed) {
- delete reading; delete pars; delete output; delete randT;
+ delete reading; delete output; delete randT;
if (randomtree == "") { outSum.close(); }
for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear();
delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }
randT->assembleRandomTree();
if (m->control_pressed) {
- delete reading; delete pars; delete output; delete randT;
- delete tmap;
- for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear();
- m->clearGroups();
- return 0;
+ delete reading; delete output; delete randT; delete tmap;
+ for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); return 0;
}
//get pscore of random tree
- randomData = pars->getValues(randT, processors, outputDir);
+ randomData = pars.getValues(randT, processors, outputDir);
if (m->control_pressed) {
- delete reading; delete pars; delete output; delete randT;
- delete tmap;
- for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear();
- m->clearGroups();
- return 0;
+ delete reading; delete output; delete randT; delete tmap;
+ for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); return 0;
}
for(int r = 0; r < numComp; r++) {
}
if (m->control_pressed) {
- delete reading; delete pars; delete output;
+ delete reading; delete output;
delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }
if (randomtree == "") { outSum.close(); }
for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear();
- m->clearGroups();
return 0;
}
//finish progress bar
reading->finish();
delete reading;
-
printParsimonyFile();
if (randomtree == "") { printUSummaryFile(); }
-
- //reset groups parameter
- m->clearGroups();
-
- delete pars; delete output;
- delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }
+
+ delete output; delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }
if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); return 0;}
exit(1);
}
}
-/*****************************************************************/
-int ParsimonyCommand::readNamesFile() {
- try {
- m->names.clear();
- numUniquesInName = 0;
-
- ifstream in;
- m->openInputFile(namefile, in);
-
- string first, second;
- map<string, string>::iterator itNames;
-
- while(!in.eof()) {
- in >> first >> second; m->gobble(in);
-
- numUniquesInName++;
-
- itNames = m->names.find(first);
- if (itNames == m->names.end()) {
- m->names[first] = second;
-
- //we need a list of names in your namefile to use above when removing extra seqs above so we don't remove them
- vector<string> dupNames;
- m->splitAtComma(second, dupNames);
-
- for (int i = 0; i < dupNames.size(); i++) {
- nameMap[dupNames[i]] = dupNames[i];
- if ((groupfile == "") && (i != 0)) { tmap->addSeq(dupNames[i], "Group1"); }
- }
- }else { m->mothurOut(first + " has already been seen in namefile, disregarding names file."); m->mothurOutEndLine(); in.close(); m->names.clear(); namefile = ""; return 1; }
- }
- in.close();
-
- return 0;
- }
- catch(exception& e) {
- m->errorOut(e, "ParsimonyCommand", "readNamesFile");
- exit(1);
- }
-}
/***********************************************************/
void help() { m->mothurOut(getHelpString()); }
private:
- ReadTree* read;
- SharedUtil* util;
FileOutput* output;
vector<Tree*> T; //user trees
Tree* randT; //random tree
Tree* copyUserTree;
TreeMap* tmap;
TreeMap* savetmap;
- Parsimony* pars;
vector<string> groupComb; // AB. AC, BC...
string sumFile, randomtree, allGroups, outputDir, treefile, groupfile, namefile;
int iters, numGroups, numComp, counter, processors, numUniquesInName;
+++ /dev/null
-/*
- * phylodiversity.cpp
- * Mothur
- *
- * Created by westcott on 4/30/10.
- * Copyright 2010 Schloss Lab. All rights reserved.
- *
- */
-
-#include "phylodiversity.h"
-
-/**************************************************************************************************
-EstOutput PhyloDiversity::getValues(Tree* t, vector<int> treeNodes, vector< vector<float> >& data) {
- try {
-
- map<string, float> DScore;
- float totalLength = 0.0;
- data.clear();
-
- //initialize Dscore
- for (int i=0; i<globaldata->Groups.size(); i++) { DScore[globaldata->Groups[i]] = 0.0; }
-
- ********************************************************
- //calculate a D value for each group
- for(int v=0;v<treeNodes.size();v++){
-
- if (m->control_pressed) { return data; }
-
- //calc the branch length
- //while you aren't at root
- float sum = 0.0;
- int index = treeNodes[v];
-
- while(t->tree[index].getParent() != -1){
-
- //if you have a BL
- if(t->tree[index].getBranchLength() != -1){
- sum += abs(t->tree[index].getBranchLength());
- }
- index = t->tree[index].getParent();
- }
-
- //get last breanch length added
- if(t->tree[index].getBranchLength() != -1){
- sum += abs(t->tree[index].getBranchLength());
- }
-
- //for each group in the groups update the total branch length accounting for the names file
- vector<string> groups = t->tree[treeNodes[v]].getGroup();
- for (int j = 0; j < groups.size(); j++) {
- int numSeqsInGroupJ = 0;
- map<string, int>::iterator it;
- it = t->tree[treeNodes[v]].pcount.find(groups[j]);
- if (it != t->tree[treeNodes[v]].pcount.end()) { //this leaf node contains seqs from group j
- numSeqsInGroupJ = it->second;
- }
-
- //add branch length to total for group
- DScore[groups[j]] += (numSeqsInGroupJ * sum);
- }
-
- }
-
-
- for (int i=0; i<globaldata->Groups.size(); i++) {
- float percent = DScore[globaldata->Groups[i]];
- data.push_back(percent);
-
- }
-
- return data;
- }
- catch(exception& e) {
- m->errorOut(e, "PhyloDiversity", "getValues");
- exit(1);
- }
-}
-**************************************************************************************************/
-
-
-
+++ /dev/null
-#ifndef PHYLODIVERSITY_H
-#define PHYLODIVERSITY_H
-
-
-/*
- * phylodiversity.h
- * Mothur
- *
- * Created by westcott on 4/30/10.
- * Copyright 2010 Schloss Lab. All rights reserved.
- *
- */
-
-#include "treemap.h"
-#include "mothurout.h"
-
-
-/***********************************************************************/
-
-class PhyloDiversity {
-
- public:
- PhyloDiversity(TreeMap* t) : tmap(t) { m = MothurOut::getInstance(); }
- ~PhyloDiversity() {};
-
- //int getValues(Tree*, vector<int>, vector< vector< float> >&);
-
-
- private:
- MothurOut* m;
- TreeMap* tmap;
-};
-
-/***********************************************************************/
-
-
-#endif
-
*/
#include "phylodiversitycommand.h"
+#include "treereader.h"
//**********************************************************************************************************************
vector<string> PhyloDiversityCommand::setParameters(){
}
}
- m->runParse = true;
- m->clearGroups();
- m->clearAllGroups();
- m->Treenames.clear();
- m->names.clear();
-
//check for required parameters
treefile = validParameter.validFile(parameters, "tree", true);
if (treefile == "not open") { treefile = ""; abort = true; }
if (abort == true) { if (calledHelp) { return 0; } return 2; }
m->setTreeFile(treefile);
-
- if (groupfile != "") {
- //read in group map info.
- tmap = new TreeMap(groupfile);
- tmap->readMap();
- }else{ //fake out by putting everyone in one group
- Tree* tree = new Tree(treefile); delete tree; //extracts names from tree to make faked out groupmap
- tmap = new TreeMap();
-
- for (int i = 0; i < m->Treenames.size(); i++) { tmap->addSeq(m->Treenames[i], "Group1"); }
- }
-
- if (namefile != "") { readNamesFile(); }
-
- read = new ReadNewickTree(treefile);
- int readOk = read->read(tmap);
-
- if (readOk != 0) { m->mothurOut("Read Terminated."); m->mothurOutEndLine(); delete tmap; delete read; return 0; }
-
- read->AssembleTrees();
- vector<Tree*> trees = read->getTrees();
- delete read;
-
- //make sure all files match
- //if you provide a namefile we will use the numNames in the namefile as long as the number of unique match the tree names size.
- int numNamesInTree;
- if (namefile != "") {
- if (numUniquesInName == m->Treenames.size()) { numNamesInTree = nameMap.size(); }
- else { numNamesInTree = m->Treenames.size(); }
- }else { numNamesInTree = m->Treenames.size(); }
-
-
- //output any names that are in group file but not in tree
- if (numNamesInTree < tmap->getNumSeqs()) {
- for (int i = 0; i < tmap->namesOfSeqs.size(); i++) {
- //is that name in the tree?
- int count = 0;
- for (int j = 0; j < m->Treenames.size(); j++) {
- if (tmap->namesOfSeqs[i] == m->Treenames[j]) { break; } //found it
- count++;
- }
-
- if (m->control_pressed) {
- delete tmap; for (int i = 0; i < trees.size(); i++) { delete trees[i]; }
- for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear();
- m->clearGroups();
- return 0;
- }
-
- //then you did not find it so report it
- if (count == m->Treenames.size()) {
- //if it is in your namefile then don't remove
- map<string, string>::iterator it = nameMap.find(tmap->namesOfSeqs[i]);
-
- if (it == nameMap.end()) {
- m->mothurOut(tmap->namesOfSeqs[i] + " is in your groupfile and not in your tree. It will be disregarded."); m->mothurOutEndLine();
- tmap->removeSeq(tmap->namesOfSeqs[i]);
- i--; //need this because removeSeq removes name from namesOfSeqs
- }
- }
- }
- }
-
- SharedUtil* util = new SharedUtil();
+ TreeReader* reader = new TreeReader(treefile, groupfile, namefile);
+ vector<Tree*> trees = reader->getTrees();
+ tmap = trees[0]->getTreeMap();
+ delete reader;
+
+ SharedUtil util;
vector<string> mGroups = m->getGroups();
vector<string> tGroups = tmap->getNamesOfGroups();
- util->setGroups(mGroups, tGroups, "phylo.diversity"); //sets the groups the user wants to analyze
- delete util;
+ util.setGroups(mGroups, tGroups, "phylo.diversity"); //sets the groups the user wants to analyze
//incase the user had some mismatches between the tree and group files we don't want group xxx to be analyzed
for (int i = 0; i < mGroups.size(); i++) { if (mGroups[i] == "xxx") { mGroups.erase(mGroups.begin()+i); break; } }
exit(1);
}
}
-/*****************************************************************/
-int PhyloDiversityCommand::readNamesFile() {
- try {
- m->names.clear();
- numUniquesInName = 0;
-
- ifstream in;
- m->openInputFile(namefile, in);
-
- string first, second;
- map<string, string>::iterator itNames;
-
- while(!in.eof()) {
- in >> first >> second; m->gobble(in);
-
- numUniquesInName++;
-
- itNames = m->names.find(first);
- if (itNames == m->names.end()) {
- m->names[first] = second;
-
- //we need a list of names in your namefile to use above when removing extra seqs above so we don't remove them
- vector<string> dupNames;
- m->splitAtComma(second, dupNames);
-
- for (int i = 0; i < dupNames.size(); i++) {
- nameMap[dupNames[i]] = dupNames[i];
- if ((groupfile == "") && (i != 0)) { tmap->addSeq(dupNames[i], "Group1"); }
- }
- }else { m->mothurOut(first + " has already been seen in namefile, disregarding names file."); m->mothurOutEndLine(); in.close(); m->names.clear(); namefile = ""; return 1; }
- }
- in.close();
-
- return 0;
- }
- catch(exception& e) {
- m->errorOut(e, "PhyloDiversityCommand", "readNamesFile");
- exit(1);
- }
-}
-
//**********************************************************************************************************************
#include "command.hpp"
#include "treemap.h"
-#include "readtree.h"
#include "sharedutilities.h"
-
+#include "tree.h"
class PhyloDiversityCommand : public Command {
int execute();
void help() { m->mothurOut(getHelpString()); }
private:
- ReadTree* read;
TreeMap* tmap;
float freq;
int iters, processors, numUniquesInName;
bool abort, rarefy, summary, collect, scale;
string groups, outputDir, treefile, groupfile, namefile;
vector<string> Groups, outputNames; //holds groups to be used, and outputFile names
- map<string, string> nameMap;
int readNamesFile();
void printData(set<int>&, map< string, vector<float> >&, ofstream&, int);
}
}
/***********************************************************************/
-int ReadTree::AssembleTrees() {
+int ReadTree::AssembleTrees(map<string, string> nameMap) {
try {
//assemble users trees
for (int i = 0; i < Trees.size(); i++) {
if (m->control_pressed) { return 0; }
- Trees[i]->assembleTree();
+ Trees[i]->assembleTree(nameMap);
}
return 0;
}
float readBranchLength(istream& f);
vector<Tree*> getTrees() { return Trees; }
- int AssembleTrees();
+ int AssembleTrees(map<string, string>);
protected:
vector<Tree*> Trees;
delete tempInput;
m->setGroups(groupsToKeep);
m->clearAllGroups();
- m->names.clear();
m->saveNextLabel = "";
m->printedHeaders = false;
m->currentBinLabels.clear();
numLeaves = num;
numNodes = 2*numLeaves - 1;
-
+
tree.resize(numNodes);
}
catch(exception& e) {
Tree::Tree(string g) { //do not use tree generated by this its just to extract the treenames, its a chicken before the egg thing that needs to be revisited.
try {
m = MothurOut::getInstance();
-
- tmap = NULL;
-
parseTreeFile(); m->runParse = false;
}
catch(exception& e) {
//adjust tree to make sure root to tip length is .5
int root = findRoot();
tree[root].setBranchLength((0.5 - tree[root].getLengthToLeaves()));
- }
+
+ }
catch(exception& e) {
m->errorOut(e, "Tree", "Tree");
exit(1);
}
}
/*****************************************************************/
-int Tree::assembleTree() {
+int Tree::assembleTree(map<string, string> nameMap) {
try {
- //float A = clock();
+ //save for later
+ names = nameMap;
//if user has given a names file we want to include that info in the pgroups and pcount info.
- if(m->names.size() != 0) { addNamesToCounts(m->names); }
+ if(nameMap.size() != 0) { addNamesToCounts(nameMap); }
//build the pGroups in non leaf nodes to be used in the parsimony calcs.
for (int i = numLeaves; i < numNodes; i++) {
tree[i].pGroups = (mergeGroups(i));
tree[i].pcount = (mergeGcounts(i));
}
- //float B = clock();
- //cout << "assembleTree\t" << (B-A) / CLOCKS_PER_SEC << endl;
+
return 0;
}
catch(exception& e) {
exit(1);
}
}
-/*****************************************************************/
+/*****************************************************************
int Tree::assembleTree(string n) {
try {
//copy Tree since we are going to destroy it
Tree* copy = new Tree(tmap);
copy->getCopy(Ctree);
- copy->assembleTree("nonames");
+ map<string, string> empty;
+ copy->assembleTree(empty);
//we want to select some of the leaf nodes to create the output tree
//go through the input Tree starting at parents of leaves
/*************************************************************************************************/
void Tree::assembleRandomUnifracTree(vector<string> g) {
randomLabels(g);
- assembleTree("noNameCounts");
+ map<string, string> empty;
+ assembleTree(empty);
}
/*************************************************************************************************/
void Tree::assembleRandomUnifracTree(string groupA, string groupB) {
-
vector<string> temp; temp.push_back(groupA); temp.push_back(groupB);
randomLabels(temp);
- assembleTree("noNameCounts");
+ map<string, string> empty;
+ assembleTree(empty);
}
/*************************************************************************************************/
//for now it's just random topology but may become random labels as well later that why this is such a simple function now...
void Tree::assembleRandomTree() {
randomTopology();
- assembleTree();
+ map<string, string> empty;
+ assembleTree(empty);
}
/**************************************************************************************************/
}
}
/*****************************************************************/
+void Tree::print(ostream& out, map<string, string> nameMap) {
+ try {
+ int root = findRoot();
+ printBranch(root, out, nameMap);
+ out << ";" << endl;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "Tree", "print");
+ exit(1);
+ }
+}
+/*****************************************************************/
void Tree::print(ostream& out, string mode) {
try {
int root = findRoot();
}
}
/*****************************************************************/
-void Tree::printBranch(int node, ostream& out, string mode) {
+void Tree::printBranch(int node, ostream& out, map<string, string> names) {
try {
// you are not a leaf
+ if (tree[node].getLChild() != -1) {
+ out << "(";
+ printBranch(tree[node].getLChild(), out, names);
+ out << ",";
+ printBranch(tree[node].getRChild(), out, names);
+ out << ")";
+
+ //if there is a branch length then print it
+ if (tree[node].getBranchLength() != -1) {
+ out << ":" << tree[node].getBranchLength();
+ }
+
+ }else { //you are a leaf
+ map<string, string>::iterator itNames = names.find(tree[node].getName());
+
+ string outputString = "";
+ if (itNames != names.end()) {
+
+ vector<string> dupNames;
+ m->splitAtComma((itNames->second), dupNames);
+
+ if (dupNames.size() == 1) {
+ outputString += tree[node].getName();
+ if (tree[node].getBranchLength() != -1) {
+ outputString += ":" + toString(tree[node].getBranchLength());
+ }
+ }else {
+ outputString += "(";
+
+ for (int u = 0; u < dupNames.size()-1; u++) {
+ outputString += dupNames[u];
+
+ if (tree[node].getBranchLength() != -1) {
+ outputString += ":" + toString(0.0);
+ }
+ outputString += ",";
+ }
+
+ outputString += dupNames[dupNames.size()-1];
+ if (tree[node].getBranchLength() != -1) {
+ outputString += ":" + toString(0.0);
+ }
+
+ outputString += ")";
+ if (tree[node].getBranchLength() != -1) {
+ outputString += ":" + toString(tree[node].getBranchLength());
+ }
+ }
+ }else {
+ outputString = tree[node].getName();
+ //if there is a branch length then print it
+ if (tree[node].getBranchLength() != -1) {
+ outputString += ":" + toString(tree[node].getBranchLength());
+ }
+
+ m->mothurOut("[ERROR]: " + tree[node].getName() + " is not in your namefile, please correct."); m->mothurOutEndLine();
+ }
+
+ out << outputString;
+ }
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "Tree", "printBranch");
+ exit(1);
+ }
+}
+/*****************************************************************/
+void Tree::printBranch(int node, ostream& out, string mode) {
+ try {
+
+ // you are not a leaf
if (tree[node].getLChild() != -1) {
out << "(";
printBranch(tree[node].getLChild(), out, mode);
if (tree[node].getBranchLength() != -1) {
out << ":" << tree[node].getBranchLength();
}
- }else if (mode == "deunique") {
- //if there is a branch length then print it
- if (tree[node].getBranchLength() != -1) {
- out << ":" << tree[node].getBranchLength();
- }
}
}else { //you are a leaf
string leafGroup = tmap->getGroup(tree[node].getName());
if (tree[node].getBranchLength() != -1) {
out << ":" << tree[node].getBranchLength();
}
- }else if (mode == "deunique") {
- map<string, string>::iterator itNames = m->names.find(tree[node].getName());
-
- string outputString = "";
- if (itNames != m->names.end()) {
-
- vector<string> dupNames;
- m->splitAtComma((itNames->second), dupNames);
-
- if (dupNames.size() == 1) {
- outputString += tree[node].getName();
- if (tree[node].getBranchLength() != -1) {
- outputString += ":" + toString(tree[node].getBranchLength());
- }
- }else {
- outputString += "(";
-
- for (int u = 0; u < dupNames.size()-1; u++) {
- outputString += dupNames[u];
-
- if (tree[node].getBranchLength() != -1) {
- outputString += ":" + toString(0.0);
- }
- outputString += ",";
- }
-
- outputString += dupNames[dupNames.size()-1];
- if (tree[node].getBranchLength() != -1) {
- outputString += ":" + toString(0.0);
- }
-
- outputString += ")";
- if (tree[node].getBranchLength() != -1) {
- outputString += ":" + toString(tree[node].getBranchLength());
- }
- }
- }else {
- outputString = tree[node].getName();
- //if there is a branch length then print it
- if (tree[node].getBranchLength() != -1) {
- outputString += ":" + toString(tree[node].getBranchLength());
- }
-
- m->mothurOut("[ERROR]: " + tree[node].getName() + " is not in your namefile, please correct."); m->mothurOutEndLine();
- }
-
- out << outputString;
}
}
Tree(TreeMap*, vector< vector<double> >&); //create tree from sim matrix
~Tree();
+ TreeMap* getTreeMap() { return tmap; }
void getCopy(Tree*); //makes tree a copy of the one passed in.
void getSubTree(Tree*, vector<string>); //makes tree a that contains only the names passed in.
int getSubTree(Tree* originalToCopy, vector<string> seqToInclude, map<string, string> nameMap); //used with (int, TreeMap) constructor. SeqsToInclude contains subsample wanted - assumes these are unique seqs and size of vector=numLeaves passed into constructor. nameMap is unique -> redundantList can be empty if no namesfile was provided.
void printTree();
void print(ostream&);
void print(ostream&, string);
+ void print(ostream&, map<string, string>);
int findRoot(); //return index of root node
//this function takes the leaf info and populates the non leaf nodes
- int assembleTree();
- int assembleTree(string);
+ int assembleTree(map<string, string>);
vector<Node> tree; //the first n nodes are the leaves, where n is the number of sequences.
map< string, vector<int> > groupNodeInfo; //maps group to indexes of leaf nodes with that group, different groups may contain same node because of names file.
ofstream out;
string filename;
+ map<string, string> names;
map<string, int>::iterator it, it2;
map<string, int> mergeGroups(int); //returns a map with a groupname and the number of times that group was seen in the children
map<string,int> mergeGcounts(int);
void randomBlengths();
void randomLabels(vector<string>);
//void randomLabels(string, string);
- void printBranch(int, ostream&, string); //recursively print out tree
+ void printBranch(int, ostream&, map<string, string>); //recursively print out tree
+ void printBranch(int, ostream&, string);
void parseTreeFile(); //parses through tree file to find names of nodes and number of them
//this is required in case user has sequences in the names file that are
//not included in the tree.
//create tree
t = new Tree(tmap, simMatrix);
- /* //initialize index
- map<int, int> index; //maps row in simMatrix to vector index in the tree
- for (int g = 0; g < numGroups; g++) { index[g] = g; }
+ if (m->control_pressed) { delete t; t = NULL; return t; }
- //do merges and create tree structure by setting parents and children
- //there are numGroups - 1 merges to do
- for (int i = 0; i < (numGroups - 1); i++) {
- float largest = -1000.0;
-
- if (m->control_pressed) { delete t; t = NULL; return t; }
-
- int row, column;
- //find largest value in sims matrix by searching lower triangle
- for (int j = 1; j < simMatrix.size(); j++) {
- for (int k = 0; k < j; k++) {
- if (simMatrix[j][k] > largest) { largest = simMatrix[j][k]; row = j; column = k; }
- }
- }
+ //assemble tree
+ map<string, string> empty;
+ t->assembleTree(empty);
- //set non-leaf node info and update leaves to know their parents
- //non-leaf
- t->tree[numGroups + i].setChildren(index[row], index[column]);
-
- //parents
- t->tree[index[row]].setParent(numGroups + i);
- t->tree[index[column]].setParent(numGroups + i);
-
- //blength = distance / 2;
- float blength = ((1.0 - largest) / 2);
-
- //branchlengths
- t->tree[index[row]].setBranchLength(blength - t->tree[index[row]].getLengthToLeaves());
- t->tree[index[column]].setBranchLength(blength - t->tree[index[column]].getLengthToLeaves());
-
- //set your length to leaves to your childs length plus branchlength
- t->tree[numGroups + i].setLengthToLeaves(t->tree[index[row]].getLengthToLeaves() + t->tree[index[row]].getBranchLength());
-
-
- //update index
- index[row] = numGroups+i;
- index[column] = numGroups+i;
-
- //remove highest value that caused the merge.
- simMatrix[row][column] = -1000.0;
- simMatrix[column][row] = -1000.0;
-
- //merge values in simsMatrix
- for (int n = 0; n < simMatrix.size(); n++) {
- //row becomes merge of 2 groups
- simMatrix[row][n] = (simMatrix[row][n] + simMatrix[column][n]) / 2;
- simMatrix[n][row] = simMatrix[row][n];
- //delete column
- simMatrix[column][n] = -1000.0;
- simMatrix[n][column] = -1000.0;
- }
- }
-
- //adjust tree to make sure root to tip length is .5
- int root = t->findRoot();
- t->tree[root].setBranchLength((0.5 - t->tree[root].getLengthToLeaves()));
- */
- //assemble tree
- t->assembleTree();
-
- if (m->control_pressed) { delete t; t = NULL; return t; }
-
return t;
-
}
catch(exception& e) {
m->errorOut(e, "TreeGroupCommand", "createTree");
Consensus consensus;
//clear old tree names if any
m->Treenames.clear(); m->Treenames = m->getGroups(); //may have changed if subsample eliminated groups
- Tree* conTree = consensus.getTree(trees, tmap);
+ Tree* conTree = consensus.getTree(trees);
//create a new filename
string conFile = outputDir + m->getRootName(m->getSimpleName(inputfile)) + treeCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".cons.tre";
/************************************************************/
TreeMap::~TreeMap(){}
+/************************************************************/
+int TreeMap::readMap(string gf) {
+
+ groupFileName = gf;
+ m->openInputFile(gf, fileHandle);
+
+ string seqName, seqGroup;
+ int error = 0;
+
+ while(fileHandle){
+ fileHandle >> seqName; m->gobble(fileHandle); //read from first column
+ fileHandle >> seqGroup; //read from second column
+
+ if (m->control_pressed) { fileHandle.close(); return 1; }
+
+ setNamesOfGroups(seqGroup);
+
+ map<string, GroupIndex>::iterator itCheck = treemap.find(seqName);
+ if (itCheck != treemap.end()) { error = 1; m->mothurOut("[WARNING]: Your groupfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); }
+ else {
+ namesOfSeqs.push_back(seqName);
+ treemap[seqName].groupname = seqGroup; //store data in map
+
+ it2 = seqsPerGroup.find(seqGroup);
+ if (it2 == seqsPerGroup.end()) { //if it's a new group
+ seqsPerGroup[seqGroup] = 1;
+ }else {//it's a group we already have
+ seqsPerGroup[seqGroup]++;
+ }
+ }
+
+ m->gobble(fileHandle);
+ }
+ fileHandle.close();
+
+ return error;
+}
/************************************************************/
int TreeMap::readMap() {
int error = 0;
while(fileHandle){
- fileHandle >> seqName; //read from first column
+ fileHandle >> seqName; m->gobble(fileHandle); //read from first column
fileHandle >> seqGroup; //read from second column
if (m->control_pressed) { fileHandle.close(); return 1; }
}
}
/************************************************************/
-int TreeMap::getCopy(TreeMap* copy){
+int TreeMap::getCopy(TreeMap& copy){
try {
- namesOfGroups = copy->getNamesOfGroups();
- numGroups = copy->getNumGroups();
- namesOfSeqs = copy->namesOfSeqs;
- seqsPerGroup = copy->seqsPerGroup;
- treemap = copy->treemap;
+ namesOfGroups = copy.getNamesOfGroups();
+ numGroups = copy.getNumGroups();
+ namesOfSeqs = copy.namesOfSeqs;
+ seqsPerGroup = copy.seqsPerGroup;
+ treemap = copy.treemap;
return 0;
}
*/
#include "mothur.h"
-#include "groupmap.h"
#include "listvector.hpp"
/* This class is used by the read.tree command to build the tree container. */
int vectorIndex;
};
-class GroupMap;
-class ListVector;
-
class TreeMap {
public:
TreeMap() { m = MothurOut::getInstance(); }
TreeMap(string);
~TreeMap();
+
int readMap();
+ int readMap(string);
int getNumGroups();
int getNumSeqs();
void setIndex(string, int); //sequencename, index
void makeSim(ListVector*); //takes listvector info and fills treemap for use by tree.shared command.
vector<string> getNamesSeqs();
vector<string> getNamesSeqs(vector<string>); //get names of seqs belonging to a group or set of groups
- int getCopy(TreeMap*);
+ int getCopy(TreeMap&);
vector<string> namesOfSeqs;
map<string,int> seqsPerGroup; //groupname, number of seqs in that group.
--- /dev/null
+//
+// treereader.cpp
+// Mothur
+//
+// Created by Sarah Westcott on 4/11/12.
+// Copyright (c) 2012 Schloss Lab. All rights reserved.
+//
+
+#include "treereader.h"
+#include "readtree.h"
+
+/***********************************************************************/
+
+TreeReader::TreeReader(string tf) : treefile(tf) {
+ try {
+ m = MothurOut::getInstance();
+ namefile = "";
+ groupfile = "";
+ readTrees();
+ }
+ catch(exception& e) {
+ m->errorOut(e, "TreeReader", "TreeReader");
+ exit(1);
+ }
+}
+/***********************************************************************/
+
+TreeReader::TreeReader(string tf, string gf) : treefile(tf), groupfile(gf) {
+ try {
+ m = MothurOut::getInstance();
+ namefile = "";
+ readTrees();
+ }
+ catch(exception& e) {
+ m->errorOut(e, "TreeReader", "TreeReader");
+ exit(1);
+ }
+}
+/***********************************************************************/
+TreeReader::TreeReader(string tf, string gf, string nf) : treefile(tf), groupfile(gf), namefile(nf) {
+ try {
+ m = MothurOut::getInstance();
+ readTrees();
+ }
+ catch(exception& e) {
+ m->errorOut(e, "TreeReader", "TreeReader");
+ exit(1);
+ }
+}
+/***********************************************************************/
+bool TreeReader::readTrees() {
+ try {
+
+ tmap = new TreeMap();
+ if (groupfile != "") { tmap->readMap(groupfile); }
+ else{ //fake out by putting everyone in one group
+ Tree* tree = new Tree(treefile); delete tree; //extracts names from tree to make faked out groupmap
+ for (int i = 0; i < m->Treenames.size(); i++) { tmap->addSeq(m->Treenames[i], "Group1"); }
+ }
+
+ int numUniquesInName = 0;
+ if (namefile != "") { numUniquesInName = readNamesFile(); }
+
+ ReadTree* read = new ReadNewickTree(treefile);
+ int readOk = read->read(tmap);
+
+ if (readOk != 0) { m->mothurOut("Read Terminated."); m->mothurOutEndLine(); delete read; m->control_pressed=true; return 0; }
+
+ read->AssembleTrees(names);
+ trees = read->getTrees();
+ delete read;
+
+ //make sure all files match
+ //if you provide a namefile we will use the numNames in the namefile as long as the number of unique match the tree names size.
+ int numNamesInTree;
+ if (namefile != "") {
+ if (numUniquesInName == m->Treenames.size()) { numNamesInTree = nameMap.size(); }
+ else { numNamesInTree = m->Treenames.size(); }
+ }else { numNamesInTree = m->Treenames.size(); }
+
+
+ //output any names that are in group file but not in tree
+ if (numNamesInTree < tmap->getNumSeqs()) {
+ for (int i = 0; i < tmap->namesOfSeqs.size(); i++) {
+ //is that name in the tree?
+ int count = 0;
+ for (int j = 0; j < m->Treenames.size(); j++) {
+ if (tmap->namesOfSeqs[i] == m->Treenames[j]) { break; } //found it
+ count++;
+ }
+
+ if (m->control_pressed) { for (int i = 0; i < trees.size(); i++) { delete trees[i]; } return 0; }
+
+ //then you did not find it so report it
+ if (count == m->Treenames.size()) {
+ //if it is in your namefile then don't remove
+ map<string, string>::iterator it = nameMap.find(tmap->namesOfSeqs[i]);
+
+ if (it == nameMap.end()) {
+ m->mothurOut(tmap->namesOfSeqs[i] + " is in your groupfile and not in your tree. It will be disregarded."); m->mothurOutEndLine();
+ tmap->removeSeq(tmap->namesOfSeqs[i]);
+ i--; //need this because removeSeq removes name from namesOfSeqs
+ }
+ }
+ }
+ }
+
+ return true;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "TreeReader", "readTrees");
+ exit(1);
+ }
+}
+/*****************************************************************/
+int TreeReader::readNamesFile() {
+ try {
+ nameMap.clear();
+ names.clear();
+ int numUniquesInName = 0;
+
+ ifstream in;
+ m->openInputFile(namefile, in);
+
+ string first, second;
+ map<string, string>::iterator itNames;
+
+ while(!in.eof()) {
+ in >> first >> second; m->gobble(in);
+
+ numUniquesInName++;
+
+ itNames = nameMap.find(first);
+ if (itNames == nameMap.end()) {
+ names[first] = second;
+
+ //we need a list of names in your namefile to use above when removing extra seqs above so we don't remove them
+ vector<string> dupNames;
+ m->splitAtComma(second, dupNames);
+
+ for (int i = 0; i < dupNames.size(); i++) {
+ nameMap[dupNames[i]] = first;
+ if ((groupfile == "") && (i != 0)) { tmap->addSeq(dupNames[i], "Group1"); }
+ }
+ }else { m->mothurOut(first + " has already been seen in namefile, disregarding names file."); m->mothurOutEndLine(); in.close(); nameMap.clear(); names.clear(); namefile = ""; return 1; }
+ }
+ in.close();
+
+ return numUniquesInName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "TreeReader", "readNamesFile");
+ exit(1);
+ }
+}
+/***********************************************************************/
+
+
--- /dev/null
+#ifndef Mothur_treereader_h
+#define Mothur_treereader_h
+
+//
+// treereader.h
+// Mothur
+//
+// Created by Sarah Westcott on 4/11/12.
+// Copyright (c) 2012 Schloss Lab. All rights reserved.
+//
+
+#include "mothurout.h"
+#include "tree.h"
+
+class TreeReader {
+
+public:
+
+ TreeReader(string tf);
+ TreeReader(string tf, string gf);
+ TreeReader(string tf, string gf, string nf);
+ ~TreeReader() {}
+
+ vector<Tree*> getTrees() { return trees; }
+ map<string, string> getNames() { return nameMap; } //dups -> unique
+ map<string, string> getNameMap() { return names; } //unique -> dups list
+
+
+private:
+ MothurOut* m;
+ vector<Tree*> trees;
+ TreeMap* tmap;
+ map<string, string> nameMap; //dupName -> uniqueName
+ map<string, string> names;
+
+ string treefile, groupfile, namefile;
+
+ bool readTrees();
+ int readNamesFile();
+};
+
+
+
+#endif
*/
#include "unifracunweightedcommand.h"
+#include "treereader.h"
//**********************************************************************************************************************
vector<string> UnifracUnweightedCommand::setParameters(){
}
}
- m->runParse = true;
- m->clearGroups();
- m->clearAllGroups();
- m->Treenames.clear();
- m->names.clear();
-
- //check for required parameters
+ //check for required parameters
treefile = validParameter.validFile(parameters, "tree", true);
if (treefile == "not open") { abort = true; }
else if (treefile == "not found") { //if there is a current design file, use it
m->setTreeFile(treefile);
- if (groupfile != "") {
- //read in group map info.
- tmap = new TreeMap(groupfile);
- tmap->readMap();
- }else{ //fake out by putting everyone in one group
- Tree* tree = new Tree(treefile); delete tree; //extracts names from tree to make faked out groupmap
- tmap = new TreeMap();
-
- for (int i = 0; i < m->Treenames.size(); i++) { tmap->addSeq(m->Treenames[i], "Group1"); }
- }
-
- if (namefile != "") { readNamesFile(); }
-
- read = new ReadNewickTree(treefile);
- int readOk = read->read(tmap);
-
- if (readOk != 0) { m->mothurOut("Read Terminated."); m->mothurOutEndLine(); delete tmap; delete read; return 0; }
-
- read->AssembleTrees();
- T = read->getTrees();
- delete read;
-
- //make sure all files match
- //if you provide a namefile we will use the numNames in the namefile as long as the number of unique match the tree names size.
- int numNamesInTree;
- if (namefile != "") {
- if (numUniquesInName == m->Treenames.size()) { numNamesInTree = nameMap.size(); }
- else { numNamesInTree = m->Treenames.size(); }
- }else { numNamesInTree = m->Treenames.size(); }
-
-
- //output any names that are in group file but not in tree
- if (numNamesInTree < tmap->getNumSeqs()) {
- for (int i = 0; i < tmap->namesOfSeqs.size(); i++) {
- //is that name in the tree?
- int count = 0;
- for (int j = 0; j < m->Treenames.size(); j++) {
- if (tmap->namesOfSeqs[i] == m->Treenames[j]) { break; } //found it
- count++;
- }
-
- if (m->control_pressed) {
- delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }
- for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear();
- m->clearGroups();
- return 0;
- }
-
- //then you did not find it so report it
- if (count == m->Treenames.size()) {
- //if it is in your namefile then don't remove
- map<string, string>::iterator it = nameMap.find(tmap->namesOfSeqs[i]);
-
- if (it == nameMap.end()) {
- m->mothurOut(tmap->namesOfSeqs[i] + " is in your groupfile and not in your tree. It will be disregarded."); m->mothurOutEndLine();
- tmap->removeSeq(tmap->namesOfSeqs[i]);
- i--; //need this because removeSeq removes name from namesOfSeqs
- }
- }
- }
- }
-
+ TreeReader* reader = new TreeReader(treefile, groupfile, namefile);
+ T = reader->getTrees();
+ tmap = T[0]->getTreeMap();
+ map<string, string> nameMap = reader->getNameMap();
+ delete reader;
+
sumFile = outputDir + m->getSimpleName(treefile) + ".uwsummary";
outputNames.push_back(sumFile); outputTypes["uwsummary"].push_back(sumFile);
m->openOutputFile(sumFile, outSum);
- util = new SharedUtil();
+ SharedUtil util;
Groups = m->getGroups();
vector<string> namesGroups = tmap->getNamesOfGroups();
- util->setGroups(Groups, namesGroups, allGroups, numGroups, "unweighted"); //sets the groups the user wants to analyze
- util->getCombos(groupComb, Groups, numComp);
+ util.setGroups(Groups, namesGroups, allGroups, numGroups, "unweighted"); //sets the groups the user wants to analyze
+ util.getCombos(groupComb, Groups, numComp);
m->setGroups(Groups);
- delete util;
if (numGroups == 1) { numComp++; groupComb.push_back(allGroups); }
- unweighted = new Unweighted(tmap, includeRoot);
+ Unweighted unweighted(includeRoot);
int start = time(NULL);
//get pscores for users trees
for (int i = 0; i < T.size(); i++) {
if (m->control_pressed) {
- delete tmap; delete unweighted;
+ delete tmap;
for (int i = 0; i < T.size(); i++) { delete T[i]; }
outSum.close();
for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }
utreeScores.resize(numComp);
UWScoreSig.resize(numComp);
- userData = unweighted->getValues(T[i], processors, outputDir); //userData[0] = unweightedscore
+ userData = unweighted.getValues(T[i], processors, outputDir); //userData[0] = unweightedscore
- if (m->control_pressed) { delete tmap; delete unweighted;
+ if (m->control_pressed) { delete tmap;
for (int i = 0; i < T.size(); i++) { delete T[i]; }if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }return 0; }
//output scores for each combination
for (int j = 0; j < iters; j++) {
//we need a different getValues because when we swap the labels we only want to swap those in each pairwise comparison
- randomData = unweighted->getValues(T[i], "", "", processors, outputDir);
+ randomData = unweighted.getValues(T[i], "", "", processors, outputDir);
- if (m->control_pressed) { delete tmap; delete unweighted;
+ if (m->control_pressed) { delete tmap;
for (int i = 0; i < T.size(); i++) { delete T[i]; }if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
for(int k = 0; k < numComp; k++) {
}
- if (m->control_pressed) { delete tmap; delete unweighted;
+ if (m->control_pressed) { delete tmap;
for (int i = 0; i < T.size(); i++) { delete T[i]; }if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
//print output files
outSum.close();
- m->clearGroups();
- delete tmap; delete unweighted;
+ delete tmap;
for (int i = 0; i < T.size(); i++) { delete T[i]; }
if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
m->errorOut(e, "UnifracUnweightedCommand", "createPhylipFile");
exit(1);
}
-}/*****************************************************************/
-int UnifracUnweightedCommand::readNamesFile() {
- try {
- m->names.clear();
- numUniquesInName = 0;
-
- ifstream in;
- m->openInputFile(namefile, in);
-
- string first, second;
- map<string, string>::iterator itNames;
-
- while(!in.eof()) {
- in >> first >> second; m->gobble(in);
-
- numUniquesInName++;
-
- itNames = m->names.find(first);
- if (itNames == m->names.end()) {
- m->names[first] = second;
-
- //we need a list of names in your namefile to use above when removing extra seqs above so we don't remove them
- vector<string> dupNames;
- m->splitAtComma(second, dupNames);
-
- for (int i = 0; i < dupNames.size(); i++) {
- nameMap[dupNames[i]] = dupNames[i];
- if ((groupfile == "") && (i != 0)) { tmap->addSeq(dupNames[i], "Group1"); }
- }
- }else { m->mothurOut(first + " has already been seen in namefile, disregarding names file."); m->mothurOutEndLine(); in.close(); m->names.clear(); namefile = ""; return 1; }
- }
- in.close();
-
- return 0;
- }
- catch(exception& e) {
- m->errorOut(e, "UnifracUnweightedCommand", "readNamesFile");
- exit(1);
- }
}
/***********************************************************/
private:
- ReadTree* read;
- SharedUtil* util;
FileOutput* output;
vector<Tree*> T; //user trees
TreeMap* tmap;
- Unweighted* unweighted;
string sumFile, allGroups;
vector<string> groupComb; // AB. AC, BC...
int iters, numGroups, numComp, counter, processors, numUniquesInName;
ofstream outSum, out;
ifstream inFile;
- map<string, string> nameMap;
void printUWSummaryFile(int);
void printUnweightedFile();
void createPhylipFile(int);
- int readNamesFile();
-
};
#include "unifracweightedcommand.h"
#include "consensus.h"
#include "subsample.h"
+#include "treereader.h"
//**********************************************************************************************************************
vector<string> UnifracWeightedCommand::setParameters(){
}
}
- m->runParse = true;
- m->clearGroups();
- m->clearAllGroups();
- m->Treenames.clear();
- m->names.clear();
-
//check for required parameters
treefile = validParameter.validFile(parameters, "tree", true);
if (treefile == "not open") { treefile = ""; abort = true; }
m->setTreeFile(treefile);
- readTrees(); if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } return 0; }
+ TreeReader* reader = new TreeReader(treefile, groupfile, namefile);
+ T = reader->getTrees();
+ tmap = T[0]->getTreeMap();
+ map<string, string> nameMap = reader->getNames();
+ delete reader;
+
+ if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } return 0; }
sumFile = outputDir + m->getSimpleName(treefile) + ".wsummary";
m->openOutputFile(sumFile, outSum);
if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } return 0; }
- Weighted weighted(tmap, includeRoot);
+ Weighted weighted(includeRoot);
int start = time(NULL);
if (m->control_pressed) { break; }
- //copy to preserve old one - would do this in subsample but tree needs it and memory cleanup becomes messy.
+ //copy to preserve old one - would do this in subsample but memory cleanup becomes messy.
TreeMap* newTmap = new TreeMap();
- newTmap->getCopy(tmap);
+ newTmap->getCopy(*tmap);
SubSample sample;
Tree* subSampleTree = sample.getSample(T[i], newTmap, nameMap, subsampleSize);
//call new weighted function
vector<double> iterData; iterData.resize(numComp,0);
- Weighted thisWeighted(newTmap, includeRoot);
+ Weighted thisWeighted(includeRoot);
iterData = thisWeighted.getValues(subSampleTree, processors, outputDir); //userData[0] = weightedscore
//save data to make ave dist, std dist
delete newTmap;
delete subSampleTree;
+
+ if((thisIter+1) % 100 == 0){ m->mothurOut(toString(thisIter+1)); m->mothurOutEndLine(); }
}
if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
m->runParse = false;
//create treemap class from groupmap for tree class to use
- TreeMap* newTmap = new TreeMap();
- newTmap->makeSim(m->getGroups());
+ TreeMap newTmap;
+ newTmap.makeSim(m->getGroups());
//clear old tree names if any
m->Treenames.clear();
vector<Tree*> newTrees = buildTrees(dists, treeNum, newTmap); //also creates .all.tre file containing the trees created
- if (m->control_pressed) { delete newTmap; return 0; }
+ if (m->control_pressed) { return 0; }
Consensus con;
- Tree* conTree = con.getTree(newTrees, newTmap);
+ Tree* conTree = con.getTree(newTrees);
//create a new filename
string conFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".weighted.cons.tre";
if (conTree != NULL) { conTree->print(outTree, "boot"); delete conTree; }
outTree.close();
- delete newTmap;
return 0;
}
/**************************************************************************************************/
-vector<Tree*> UnifracWeightedCommand::buildTrees(vector< vector<double> >& dists, int treeNum, TreeMap* mytmap) {
+vector<Tree*> UnifracWeightedCommand::buildTrees(vector< vector<double> >& dists, int treeNum, TreeMap& mytmap) {
try {
vector<Tree*> trees;
}
//create tree
- Tree* tempTree = new Tree(mytmap, sims);
- tempTree->assembleTree();
+ Tree* tempTree = new Tree(&mytmap, sims);
+ map<string, string> empty;
+ tempTree->assembleTree(empty);
trees.push_back(tempTree);
}
/**************************************************************************************************/
-int UnifracWeightedCommand::readTrees() {
- try {
-
- if (groupfile != "") {
- //read in group map info.
- tmap = new TreeMap(groupfile);
- tmap->readMap();
- }else{ //fake out by putting everyone in one group
- Tree* tree = new Tree(treefile); delete tree; //extracts names from tree to make faked out groupmap
- tmap = new TreeMap();
-
- for (int i = 0; i < m->Treenames.size(); i++) { tmap->addSeq(m->Treenames[i], "Group1"); }
- }
-
- if (namefile != "") { readNamesFile(); }
-
- read = new ReadNewickTree(treefile);
- int readOk = read->read(tmap);
-
- if (readOk != 0) { m->mothurOut("Read Terminated."); m->mothurOutEndLine(); delete tmap; delete read; return 0; }
-
- read->AssembleTrees();
- T = read->getTrees();
- delete read;
-
- //make sure all files match
- //if you provide a namefile we will use the numNames in the namefile as long as the number of unique match the tree names size.
- int numNamesInTree;
- if (namefile != "") {
- if (numUniquesInName == m->Treenames.size()) { numNamesInTree = nameMap.size(); }
- else { numNamesInTree = m->Treenames.size(); }
- }else { numNamesInTree = m->Treenames.size(); }
-
-
- //output any names that are in group file but not in tree
- if (numNamesInTree < tmap->getNumSeqs()) {
- for (int i = 0; i < tmap->namesOfSeqs.size(); i++) {
- //is that name in the tree?
- int count = 0;
- for (int j = 0; j < m->Treenames.size(); j++) {
- if (tmap->namesOfSeqs[i] == m->Treenames[j]) { break; } //found it
- count++;
- }
-
- if (m->control_pressed) {
- delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }
- for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear();
- m->clearGroups();
- return 0;
- }
-
- //then you did not find it so report it
- if (count == m->Treenames.size()) {
- //if it is in your namefile then don't remove
- map<string, string>::iterator it = nameMap.find(tmap->namesOfSeqs[i]);
-
- if (it == nameMap.end()) {
- m->mothurOut(tmap->namesOfSeqs[i] + " is in your groupfile and not in your tree. It will be disregarded."); m->mothurOutEndLine();
- tmap->removeSeq(tmap->namesOfSeqs[i]);
- i--; //need this because removeSeq removes name from namesOfSeqs
- }
- }
- }
- }
-
- return 0;
- }
- catch(exception& e) {
- m->errorOut(e, "UnifracWeightedCommand", "readTrees");
- exit(1);
- }
-}
-/**************************************************************************************************/
-
int UnifracWeightedCommand::runRandomCalcs(Tree* thisTree, vector<double> usersScores) {
try {
try {
Tree* randT = new Tree(tmap);
- Weighted weighted(tmap, includeRoot);
+ Weighted weighted(includeRoot);
for (int h = start; h < (start+num); h++) {
exit(1);
}
}
-/*****************************************************************/
-int UnifracWeightedCommand::readNamesFile() {
- try {
- m->names.clear();
- numUniquesInName = 0;
-
- ifstream in;
- m->openInputFile(namefile, in);
-
- string first, second;
- map<string, string>::iterator itNames;
-
- while(!in.eof()) {
- in >> first >> second; m->gobble(in);
-
- numUniquesInName++;
-
- itNames = m->names.find(first);
- if (itNames == m->names.end()) {
- m->names[first] = second;
-
- //we need a list of names in your namefile to use above when removing extra seqs above so we don't remove them
- vector<string> dupNames;
- m->splitAtComma(second, dupNames);
-
- for (int i = 0; i < dupNames.size(); i++) {
- nameMap[dupNames[i]] = first;
- if ((groupfile == "") && (i != 0)) { tmap->addSeq(dupNames[i], "Group1"); }
- }
- }else { m->mothurOut(first + " has already been seen in namefile, disregarding names file."); m->mothurOutEndLine(); in.close(); m->names.clear(); namefile = ""; return 1; }
- }
- in.close();
-
- return 0;
- }
- catch(exception& e) {
- m->errorOut(e, "UnifracWeightedCommand", "readNamesFile");
- exit(1);
- }
-}
/***********************************************************/
linePair(int i, int j) : start(i), num(j) {}
};
vector<linePair> lines;
-
- ReadTree* read;
- SharedUtil* util;
+ TreeMap* tmap;
FileOutput* output;
vector<Tree*> T; //user trees
vector<double> utreeScores; //user tree unweighted scores
vector<double> WScoreSig; //tree weighted score signifigance when compared to random trees - percentage of random trees with that score or lower.
vector<string> groupComb; // AB. AC, BC...
- TreeMap* tmap;
string sumFile, outputDir;
int iters, numGroups, numComp, counter;
vector< vector<double> > rScores; //vector<weighted scores for random trees.> each group comb has an entry
void calculateFreqsCumuls();
int createProcesses(Tree*, vector< vector<string> >, vector< vector<double> >&);
int driver(Tree*, vector< vector<string> >, int, int, vector< vector<double> >&);
- int readNamesFile();
int runRandomCalcs(Tree*, vector<double>);
- int readTrees();
- vector<Tree*> buildTrees(vector< vector<double> >&, int, TreeMap*);
+ vector<Tree*> buildTrees(vector< vector<double> >&, int, TreeMap&);
int getConsensusTrees(vector< vector<double> >&, int);
int getAverageSTDMatrices(vector< vector<double> >&, int);
try {
processors = p;
outputDir = o;
-
+
+ TreeMap* tmap = t->getTreeMap();
+
//if the users enters no groups then give them the score of all groups
int numGroups = m->getNumGroups();
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
if(processors == 1){
- data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size());
+ data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), tmap);
}else{
int numPairs = namesOfGroupCombos.size();
lines.push_back(linePair(startPos, numPairsPerProcessor));
}
- data = createProcesses(t, namesOfGroupCombos);
+ data = createProcesses(t, namesOfGroupCombos, tmap);
lines.clear();
}
#else
- data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size());
+ data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), tmap);
#endif
return data;
}
/**************************************************************************************************/
-EstOutput Unweighted::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos) {
+EstOutput Unweighted::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, TreeMap* tmap) {
try {
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
int process = 1;
process++;
}else if (pid == 0){
EstOutput myresults;
- myresults = driver(t, namesOfGroupCombos, lines[process].start, lines[process].num);
+ myresults = driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, tmap);
if (m->control_pressed) { exit(0); }
}
}
- results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num);
+ results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, tmap);
//force parent to wait until all the processes are done
for (int i=0;i<(processors-1);i++) {
}
}
/**************************************************************************************************/
-EstOutput Unweighted::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num) {
+EstOutput Unweighted::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num, TreeMap* tmap) {
try {
processors = p;
outputDir = o;
+ TreeMap* tmap = t->getTreeMap();
+
//if the users enters no groups then give them the score of all groups
int numGroups = m->getNumGroups();
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
if(processors == 1){
- data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), true);
+ data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), true, tmap);
}else{
int numPairs = namesOfGroupCombos.size();
lines.push_back(linePair(startPos, numPairsPerProcessor));
}
- data = createProcesses(t, namesOfGroupCombos, true);
+ data = createProcesses(t, namesOfGroupCombos, true, tmap);
lines.clear();
}
#else
- data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), true);
+ data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), true, tmap);
#endif
return data;
}
/**************************************************************************************************/
-EstOutput Unweighted::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, bool usingGroups) {
+EstOutput Unweighted::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, bool usingGroups, TreeMap* tmap) {
try {
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
int process = 1;
process++;
}else if (pid == 0){
EstOutput myresults;
- myresults = driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, usingGroups);
+ myresults = driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, usingGroups, tmap);
if (m->control_pressed) { exit(0); }
}
}
- results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, usingGroups);
+ results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, usingGroups, tmap);
//force parent to wait until all the processes are done
for (int i=0;i<(processors-1);i++) {
}
}
/**************************************************************************************************/
-EstOutput Unweighted::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num, bool usingGroups) {
+EstOutput Unweighted::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num, bool usingGroups, TreeMap* tmap) {
try {
EstOutput results; results.resize(num);
class Unweighted : public TreeCalculator {
public:
- Unweighted(TreeMap* t, bool r) : tmap(t), includeRoot(r) {};
+ Unweighted(bool r) : includeRoot(r) {};
~Unweighted() {};
EstOutput getValues(Tree*, int, string);
EstOutput getValues(Tree*, string, string, int, string);
vector<linePair> lines;
EstOutput data;
- TreeMap* tmap;
int processors;
string outputDir;
map< vector<string>, set<int> > rootForGrouping; //maps a grouping combo to the roots for that combo
bool includeRoot;
- EstOutput driver(Tree*, vector< vector<string> >, int, int);
- EstOutput createProcesses(Tree*, vector< vector<string> >);
- EstOutput driver(Tree*, vector< vector<string> >, int, int, bool);
- EstOutput createProcesses(Tree*, vector< vector<string> >, bool);
+ EstOutput driver(Tree*, vector< vector<string> >, int, int, TreeMap*);
+ EstOutput createProcesses(Tree*, vector< vector<string> >, TreeMap*);
+ EstOutput driver(Tree*, vector< vector<string> >, int, int, bool, TreeMap*);
+ EstOutput createProcesses(Tree*, vector< vector<string> >, bool, TreeMap*);
int getRoot(Tree*, int, vector<string>);
};
vector<double> D;
processors = p;
outputDir = o;
+
+ TreeMap* tmap = t->getTreeMap();
numGroups = m->getNumGroups();
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
if(processors == 1){
- data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size());
+ data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), tmap);
}else{
int numPairs = namesOfGroupCombos.size();
lines.push_back(linePair(startPos, numPairsPerProcessor));
}
- data = createProcesses(t, namesOfGroupCombos);
+ data = createProcesses(t, namesOfGroupCombos, tmap);
lines.clear();
}
#else
- data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size());
+ data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), tmap);
#endif
return data;
}
/**************************************************************************************************/
-EstOutput Weighted::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos) {
+EstOutput Weighted::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, TreeMap* tmap) {
try {
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
int process = 1;
}else if (pid == 0){
EstOutput Myresults;
- Myresults = driver(t, namesOfGroupCombos, lines[process].start, lines[process].num);
+ Myresults = driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, tmap);
//m->mothurOut("Merging results."); m->mothurOutEndLine();
}
}
- results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num);
+ results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, tmap);
//force parent to wait until all the processes are done
for (int i=0;i<(processors-1);i++) {
}
}
/**************************************************************************************************/
-EstOutput Weighted::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num) {
+EstOutput Weighted::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num, TreeMap* tmap) {
try {
EstOutput results;
vector<double> D;
try {
data.clear(); //clear out old values
+
+ TreeMap* tmap = t->getTreeMap();
if (m->control_pressed) { return data; }
class Weighted : public TreeCalculator {
public:
- Weighted(TreeMap* t, bool r) : tmap(t), includeRoot(r) {};
+ Weighted( bool r) : includeRoot(r) {};
~Weighted() {};
EstOutput getValues(Tree*, string, string);
vector<linePair> lines;
EstOutput data;
- TreeMap* tmap;
map<string, int>::iterator it;
map<string, double> WScore; //a score for each group combination i.e. AB, AC, BC.
int processors;
map< vector<string>, set<int> > rootForGrouping; //maps a grouping combo to the root for that combo
bool includeRoot;
- EstOutput driver(Tree*, vector< vector<string> >, int, int);
- EstOutput createProcesses(Tree*, vector< vector<string> >);
+ EstOutput driver(Tree*, vector< vector<string> >, int, int, TreeMap*);
+ EstOutput createProcesses(Tree*, vector< vector<string> >, TreeMap*);
double getLengthToRoot(Tree*, int, string, string);
};