#include "classifytreecommand.h"
#include "phylotree.h"
+#include "treereader.h"
//**********************************************************************************************************************
vector<string> ClassifyTreeCommand::setParameters(){
string ClassifyTreeCommand::getHelpString(){
try {
string helpString = "";
- helpString += "The classify.tree command reads a tree and taxonomy file and output the concensus taxonomy for each node on the tree. \n";
+ helpString += "The classify.tree command reads a tree and taxonomy file and output the consensus taxonomy for each node on the tree. \n";
helpString += "If you provide a group file, the concensus for each group will also be provided. \n";
helpString += "The new tree contains labels at each internal node. The label is the node number so you can relate the tree to the summary file.\n";
helpString += "The summary file lists the concensus taxonomy for the descendants of each node.\n";
exit(1);
}
}
-
+//**********************************************************************************************************************
+string ClassifyTreeCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "tree") { outputFileName = "taxonomy.tre"; }
+ else if (type == "summary") { outputFileName = "taxonomy.summary"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ClassifyTreeCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
//**********************************************************************************************************************
ClassifyTreeCommand::ClassifyTreeCommand(){
try {
if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
}
- m->runParse = true;
- m->clearGroups();
- m->clearAllGroups();
- m->Treenames.clear();
- m->names.clear();
-
vector<string> tempOutNames;
outputTypes["tree"] = tempOutNames;
outputTypes["summary"] = tempOutNames;
// reading tree info //
/***************************************************/
m->setTreeFile(treefile);
- if (groupfile != "") {
- //read in group map info.
- tmap = new TreeMap(groupfile);
- tmap->readMap();
- }else{ //fake out by putting everyone in one group
- Tree* tree = new Tree(treefile); delete tree; //extracts names from tree to make faked out groupmap
- tmap = new TreeMap();
-
- for (int i = 0; i < m->Treenames.size(); i++) { tmap->addSeq(m->Treenames[i], "Group1"); }
- }
-
- if (namefile != "") { readNamesFile(); }
-
- read = new ReadNewickTree(treefile);
- int readOk = read->read(tmap);
-
- if (readOk != 0) { m->mothurOut("Read Terminated."); m->mothurOutEndLine(); delete tmap; delete read; return 0; }
-
- read->AssembleTrees();
- vector<Tree*> T = read->getTrees();
- Tree* outputTree = T[0];
- delete read;
-
- //make sure all files match
- //if you provide a namefile we will use the numNames in the namefile as long as the number of unique match the tree names size.
- int numNamesInTree;
- if (namefile != "") {
- if (numUniquesInName == m->Treenames.size()) { numNamesInTree = nameMap.size(); }
- else { numNamesInTree = m->Treenames.size(); }
- }else { numNamesInTree = m->Treenames.size(); }
-
-
- //output any names that are in group file but not in tree
- if (numNamesInTree < tmap->getNumSeqs()) {
- for (int i = 0; i < tmap->namesOfSeqs.size(); i++) {
- //is that name in the tree?
- int count = 0;
- for (int j = 0; j < m->Treenames.size(); j++) {
- if (tmap->namesOfSeqs[i] == m->Treenames[j]) { break; } //found it
- count++;
- }
-
- if (m->control_pressed) {
- delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }
- for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear();
- m->clearGroups();
- return 0;
- }
-
- //then you did not find it so report it
- if (count == m->Treenames.size()) {
- //if it is in your namefile then don't remove
- map<string, string>::iterator it = nameMap.find(tmap->namesOfSeqs[i]);
-
- if (it == nameMap.end()) {
- m->mothurOut(tmap->namesOfSeqs[i] + " is in your groupfile and not in your tree. It will be disregarded."); m->mothurOutEndLine();
- tmap->removeSeq(tmap->namesOfSeqs[i]);
- i--; //need this because removeSeq removes name from namesOfSeqs
- }
- }
- }
- }
+
+ TreeReader* reader = new TreeReader(treefile, groupfile, namefile);
+ vector<Tree*> T = reader->getTrees();
+ TreeMap* tmap = T[0]->getTreeMap();
+ Tree* outputTree = T[0];
+ delete reader;
+
+ if (namefile != "") { m->readNames(namefile, nameMap, nameCount); }
- if (m->control_pressed) { delete outputTree; delete tmap; return 0; }
+ if (m->control_pressed) { delete tmap; delete outputTree; return 0; }
- readTaxonomyFile();
-
+ m->readTax(taxonomyfile, taxMap);
/***************************************************/
// get concensus taxonomies //
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(treefile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(treefile)) + "taxonomy.summary";
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(treefile)) + getOutputFileNameTag("summary");
outputNames.push_back(outputFileName); outputTypes["summary"].push_back(outputFileName);
ofstream out;
string treeOutputDir = outputDir;
if (outputDir == "") { treeOutputDir += m->hasPath(treefile); }
- string outputTreeFileName = treeOutputDir + m->getRootName(m->getSimpleName(treefile)) + "taxonomy.tre";
+ string outputTreeFileName = treeOutputDir + m->getRootName(m->getSimpleName(treefile)) + getOutputFileNameTag("tree");
//create a map from tree node index to names of descendants, save time later
map<int, map<string, set<string> > > nodeToDescendants; //node# -> (groupName -> groupMembers)
int lc = T->tree[i].getLChild();
int rc = T->tree[i].getRChild();
+ TreeMap* tmap = T->getTreeMap();
if (lc == -1) { //you are a leaf your only descendant is yourself
string group = tmap->getGroup(T->tree[i].getName());
exit(1);
}
}
-//**********************************************************************************************************************
-int ClassifyTreeCommand::readTaxonomyFile() {
- try {
-
- ifstream in;
- m->openInputFile(taxonomyfile, in);
-
- string name, tax;
-
- while(!in.eof()){
- in >> name >> tax;
- m->gobble(in);
-
- //are there confidence scores, if so remove them
- if (tax.find_first_of('(') != -1) { m->removeConfidences(tax); }
-
- taxMap[name] = tax;
-
- if (m->control_pressed) { in.close(); taxMap.clear(); return 0; }
- }
- in.close();
-
- return 0;
- }
- catch(exception& e) {
- m->errorOut(e, "ClassifyTreeCommand", "readTaxonomyFile");
- exit(1);
- }
-}
-
-/*****************************************************************/
-int ClassifyTreeCommand::readNamesFile() {
- try {
- ifstream inNames;
- m->openInputFile(namefile, inNames);
-
- string name, names;
-
- while(!inNames.eof()){
- inNames >> name; //read from first column A
- inNames >> names; //read from second column A,B,C,D
- m->gobble(inNames);
-
- //parse names into vector
- vector<string> theseNames;
- m->splitAtComma(names, theseNames);
-
- for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = name; }
- nameCount[name] = theseNames.size();
-
- if (m->control_pressed) { inNames.close(); nameMap.clear(); return 0; }
- }
- inNames.close();
-
- return 0;
- }
- catch(exception& e) {
- m->errorOut(e, "ClassifyTreeCommand", "readNamesFile");
- exit(1);
- }
-}
-
/*****************************************************************/