X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=classifytreecommand.cpp;h=7861a01bab3420f1598e351b30765dbd0ae5f124;hb=529ec122f7cac4af987e121d150b878d7c7a0d5d;hp=e1ef6d304227f7b523ebb9540373a58cdd2339a0;hpb=d6c0a11d1cecfac18b323285e7ffadb7f58e848f;p=mothur.git diff --git a/classifytreecommand.cpp b/classifytreecommand.cpp index e1ef6d3..7861a01 100644 --- a/classifytreecommand.cpp +++ b/classifytreecommand.cpp @@ -8,6 +8,7 @@ #include "classifytreecommand.h" #include "phylotree.h" +#include "treereader.h" //********************************************************************************************************************** vector ClassifyTreeCommand::setParameters(){ @@ -33,7 +34,7 @@ vector ClassifyTreeCommand::setParameters(){ string ClassifyTreeCommand::getHelpString(){ try { string helpString = ""; - helpString += "The classify.tree command reads a tree and taxonomy file and output the concensus taxonomy for each node on the tree. \n"; + helpString += "The classify.tree command reads a tree and taxonomy file and output the consensus taxonomy for each node on the tree. \n"; helpString += "If you provide a group file, the concensus for each group will also be provided. \n"; helpString += "The new tree contains labels at each internal node. The label is the node number so you can relate the tree to the summary file.\n"; helpString += "The summary file lists the concensus taxonomy for the descendants of each node.\n"; @@ -48,7 +49,27 @@ string ClassifyTreeCommand::getHelpString(){ exit(1); } } - +//********************************************************************************************************************** +string ClassifyTreeCommand::getOutputFileNameTag(string type, string inputName=""){ + try { + string outputFileName = ""; + map >::iterator it; + + //is this a type this command creates + it = outputTypes.find(type); + if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); } + else { + if (type == "tree") { outputFileName = "taxonomy.tre"; } + else if (type == "summary") { outputFileName = "taxonomy.summary"; } + else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; } + } + return outputFileName; + } + catch(exception& e) { + m->errorOut(e, "ClassifyTreeCommand", "getOutputFileNameTag"); + exit(1); + } +} //********************************************************************************************************************** ClassifyTreeCommand::ClassifyTreeCommand(){ try { @@ -86,12 +107,6 @@ ClassifyTreeCommand::ClassifyTreeCommand(string option) { if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; } } - m->runParse = true; - m->clearGroups(); - m->clearAllGroups(); - m->Treenames.clear(); - m->names.clear(); - vector tempOutNames; outputTypes["tree"] = tempOutNames; outputTypes["summary"] = tempOutNames; @@ -195,73 +210,18 @@ int ClassifyTreeCommand::execute(){ // reading tree info // /***************************************************/ m->setTreeFile(treefile); - if (groupfile != "") { - //read in group map info. - tmap = new TreeMap(groupfile); - tmap->readMap(); - }else{ //fake out by putting everyone in one group - Tree* tree = new Tree(treefile); delete tree; //extracts names from tree to make faked out groupmap - tmap = new TreeMap(); - - for (int i = 0; i < m->Treenames.size(); i++) { tmap->addSeq(m->Treenames[i], "Group1"); } - } - - if (namefile != "") { readNamesFile(); } - - read = new ReadNewickTree(treefile); - int readOk = read->read(tmap); - - if (readOk != 0) { m->mothurOut("Read Terminated."); m->mothurOutEndLine(); delete tmap; delete read; return 0; } - - read->AssembleTrees(); - vector T = read->getTrees(); - Tree* outputTree = T[0]; - delete read; - - //make sure all files match - //if you provide a namefile we will use the numNames in the namefile as long as the number of unique match the tree names size. - int numNamesInTree; - if (namefile != "") { - if (numUniquesInName == m->Treenames.size()) { numNamesInTree = nameMap.size(); } - else { numNamesInTree = m->Treenames.size(); } - }else { numNamesInTree = m->Treenames.size(); } - - - //output any names that are in group file but not in tree - if (numNamesInTree < tmap->getNumSeqs()) { - for (int i = 0; i < tmap->namesOfSeqs.size(); i++) { - //is that name in the tree? - int count = 0; - for (int j = 0; j < m->Treenames.size(); j++) { - if (tmap->namesOfSeqs[i] == m->Treenames[j]) { break; } //found it - count++; - } - - if (m->control_pressed) { - delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } - for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); - m->clearGroups(); - return 0; - } - - //then you did not find it so report it - if (count == m->Treenames.size()) { - //if it is in your namefile then don't remove - map::iterator it = nameMap.find(tmap->namesOfSeqs[i]); - - if (it == nameMap.end()) { - m->mothurOut(tmap->namesOfSeqs[i] + " is in your groupfile and not in your tree. It will be disregarded."); m->mothurOutEndLine(); - tmap->removeSeq(tmap->namesOfSeqs[i]); - i--; //need this because removeSeq removes name from namesOfSeqs - } - } - } - } + + TreeReader* reader = new TreeReader(treefile, groupfile, namefile); + vector T = reader->getTrees(); + TreeMap* tmap = T[0]->getTreeMap(); + Tree* outputTree = T[0]; + delete reader; + + if (namefile != "") { m->readNames(namefile, nameMap, nameCount); } - if (m->control_pressed) { delete outputTree; delete tmap; return 0; } + if (m->control_pressed) { delete tmap; delete outputTree; return 0; } - readTaxonomyFile(); - + m->readTax(taxonomyfile, taxMap); /***************************************************/ // get concensus taxonomies // @@ -302,7 +262,7 @@ int ClassifyTreeCommand::getClassifications(Tree*& T){ string thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += m->hasPath(treefile); } - string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(treefile)) + "taxonomy.summary"; + string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(treefile)) + getOutputFileNameTag("summary"); outputNames.push_back(outputFileName); outputTypes["summary"].push_back(outputFileName); ofstream out; @@ -316,7 +276,7 @@ int ClassifyTreeCommand::getClassifications(Tree*& T){ string treeOutputDir = outputDir; if (outputDir == "") { treeOutputDir += m->hasPath(treefile); } - string outputTreeFileName = treeOutputDir + m->getRootName(m->getSimpleName(treefile)) + "taxonomy.tre"; + string outputTreeFileName = treeOutputDir + m->getRootName(m->getSimpleName(treefile)) + getOutputFileNameTag("tree"); //create a map from tree node index to names of descendants, save time later map > > nodeToDescendants; //node# -> (groupName -> groupMembers) @@ -484,6 +444,7 @@ map > ClassifyTreeCommand::getDescendantList(Tree*& T, int i int lc = T->tree[i].getLChild(); int rc = T->tree[i].getRChild(); + TreeMap* tmap = T->getTreeMap(); if (lc == -1) { //you are a leaf your only descendant is yourself string group = tmap->getGroup(T->tree[i].getName()); @@ -512,68 +473,6 @@ map > ClassifyTreeCommand::getDescendantList(Tree*& T, int i exit(1); } } -//********************************************************************************************************************** -int ClassifyTreeCommand::readTaxonomyFile() { - try { - - ifstream in; - m->openInputFile(taxonomyfile, in); - - string name, tax; - - while(!in.eof()){ - in >> name >> tax; - m->gobble(in); - - //are there confidence scores, if so remove them - if (tax.find_first_of('(') != -1) { m->removeConfidences(tax); } - - taxMap[name] = tax; - - if (m->control_pressed) { in.close(); taxMap.clear(); return 0; } - } - in.close(); - - return 0; - } - catch(exception& e) { - m->errorOut(e, "ClassifyTreeCommand", "readTaxonomyFile"); - exit(1); - } -} - -/*****************************************************************/ -int ClassifyTreeCommand::readNamesFile() { - try { - ifstream inNames; - m->openInputFile(namefile, inNames); - - string name, names; - - while(!inNames.eof()){ - inNames >> name; //read from first column A - inNames >> names; //read from second column A,B,C,D - m->gobble(inNames); - - //parse names into vector - vector theseNames; - m->splitAtComma(names, theseNames); - - for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = name; } - nameCount[name] = theseNames.size(); - - if (m->control_pressed) { inNames.close(); nameMap.clear(); return 0; } - } - inNames.close(); - - return 0; - } - catch(exception& e) { - m->errorOut(e, "ClassifyTreeCommand", "readNamesFile"); - exit(1); - } -} - /*****************************************************************/