X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=classifytreecommand.cpp;h=79a82454d14304d997a2a46efbc62ffbeac63f60;hp=bcf27698ce2bf68bf3dc6a3909228a9d4ccd5cb2;hb=b206f634aae1b4ce13978d203247fb64757d5482;hpb=f55cf350ca6643f8eb070d8336e1957699a3f109 diff --git a/classifytreecommand.cpp b/classifytreecommand.cpp index bcf2769..79a8245 100644 --- a/classifytreecommand.cpp +++ b/classifytreecommand.cpp @@ -13,13 +13,14 @@ //********************************************************************************************************************** vector ClassifyTreeCommand::setParameters(){ try { - CommandParameter ptree("tree", "InputTypes", "", "", "", "", "none",false,true); parameters.push_back(ptree); - CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "", "", "none",false,true); parameters.push_back(ptaxonomy); - CommandParameter pname("name", "InputTypes", "", "", "", "", "none",false,false); parameters.push_back(pname); - CommandParameter pgroup("group", "InputTypes", "", "", "", "", "none",false,false); parameters.push_back(pgroup); - CommandParameter pcutoff("cutoff", "Number", "", "51", "", "", "",false,true); parameters.push_back(pcutoff); - CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir); - CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir); + CommandParameter ptree("tree", "InputTypes", "", "", "", "", "none","tree-summary",false,true,true); parameters.push_back(ptree); + CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "", "", "none","",false,true,true); parameters.push_back(ptaxonomy); + CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pname); + CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","",false,false,true); parameters.push_back(pcount); + CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup); + CommandParameter pcutoff("cutoff", "Number", "", "51", "", "", "","",false,true); parameters.push_back(pcutoff); + CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); + CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } @@ -37,8 +38,9 @@ string ClassifyTreeCommand::getHelpString(){ helpString += "The classify.tree command reads a tree and taxonomy file and output the consensus taxonomy for each node on the tree. \n"; helpString += "If you provide a group file, the concensus for each group will also be provided. \n"; helpString += "The new tree contains labels at each internal node. The label is the node number so you can relate the tree to the summary file.\n"; + helpString += "The count parameter allows you add a count file so you can have the summary totals broken up by group.\n"; helpString += "The summary file lists the concensus taxonomy for the descendants of each node.\n"; - helpString += "The classify.tree command parameters are tree, group, name and taxonomy. The tree and taxonomy files are required.\n"; + helpString += "The classify.tree command parameters are tree, group, name, count and taxonomy. The tree and taxonomy files are required.\n"; helpString += "The cutoff parameter allows you to specify a consensus confidence threshold for your taxonomy. The default is 51, meaning 51%. Cutoff cannot be below 51.\n"; helpString += "The classify.tree command should be used in the following format: classify.tree(tree=test.tre, group=test.group, taxonomy=test.taxonomy)\n"; helpString += "Note: No spaces between parameter labels (i.e. tree), '=' and parameters (i.e.yourTreefile).\n"; @@ -49,7 +51,22 @@ string ClassifyTreeCommand::getHelpString(){ exit(1); } } - +//********************************************************************************************************************** +string ClassifyTreeCommand::getOutputPattern(string type) { + try { + string pattern = ""; + + if (type == "summary") { pattern = "[filename],taxonomy.summary"; } //makes file like: amazon.0.03.fasta + else if (type == "tree") { pattern = "[filename],taxonomy.tre"; } + else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; } + + return pattern; + } + catch(exception& e) { + m->errorOut(e, "ClassifyTreeCommand", "getOutputPattern"); + exit(1); + } +} //********************************************************************************************************************** ClassifyTreeCommand::ClassifyTreeCommand(){ try { @@ -127,6 +144,14 @@ ClassifyTreeCommand::ClassifyTreeCommand(string option) { //if the user has not given a path then, add inputdir. else leave path alone. if (path == "") { parameters["taxonomy"] = inputDir + it->second; } } + + it = parameters.find("count"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["count"] = inputDir + it->second; } + } } outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; } @@ -158,16 +183,30 @@ ClassifyTreeCommand::ClassifyTreeCommand(string option) { else if (groupfile == "not found") { groupfile = ""; } else { m->setGroupFile(groupfile); } + countfile = validParameter.validFile(parameters, "count", true); + if (countfile == "not open") { countfile = ""; abort = true; } + else if (countfile == "not found") { countfile = ""; } + else { m->setCountTableFile(countfile); } + + if ((namefile != "") && (countfile != "")) { + m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true; + } + + if ((groupfile != "") && (countfile != "")) { + m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true; + } + string temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "51"; } m->mothurConvert(temp, cutoff); if ((cutoff < 51) || (cutoff > 100)) { m->mothurOut("cutoff must be above 50, and no greater than 100."); m->mothurOutEndLine(); abort = true; } - if (namefile == "") { - vector files; files.push_back(treefile); - parser.getNameFile(files); + if (countfile == "") { + if (namefile == "") { + vector files; files.push_back(treefile); + parser.getNameFile(files); + } } - } } catch(exception& e) { @@ -193,15 +232,15 @@ int ClassifyTreeCommand::execute(){ TreeReader* reader = new TreeReader(treefile, groupfile, namefile); vector T = reader->getTrees(); - TreeMap* tmap = T[0]->getTreeMap(); + CountTable* tmap = T[0]->getCountTable(); Tree* outputTree = T[0]; delete reader; - if (namefile != "") { readNamesFile(); } + if (namefile != "") { m->readNames(namefile, nameMap, nameCount); } if (m->control_pressed) { delete tmap; delete outputTree; return 0; } - readTaxonomyFile(); + m->readTax(taxonomyfile, taxMap); /***************************************************/ // get concensus taxonomies // @@ -242,7 +281,9 @@ int ClassifyTreeCommand::getClassifications(Tree*& T){ string thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += m->hasPath(treefile); } - string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(treefile)) + "taxonomy.summary"; + map variables; + variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(treefile)); + string outputFileName = getOutputFileName("summary", variables); outputNames.push_back(outputFileName); outputTypes["summary"].push_back(outputFileName); ofstream out; @@ -256,12 +297,13 @@ int ClassifyTreeCommand::getClassifications(Tree*& T){ string treeOutputDir = outputDir; if (outputDir == "") { treeOutputDir += m->hasPath(treefile); } - string outputTreeFileName = treeOutputDir + m->getRootName(m->getSimpleName(treefile)) + "taxonomy.tre"; + variables["[filename]"] = treeOutputDir + m->getRootName(m->getSimpleName(treefile)); + string outputTreeFileName = getOutputFileName("tree", variables); //create a map from tree node index to names of descendants, save time later map > > nodeToDescendants; //node# -> (groupName -> groupMembers) for (int i = 0; i < T->getNumNodes(); i++) { - if (m->control_pressed) { return 0; } + if (m->control_pressed) { return 0; } nodeToDescendants[i] = getDescendantList(T, i, nodeToDescendants); } @@ -285,7 +327,7 @@ int ClassifyTreeCommand::getClassifications(Tree*& T){ tax = getTaxonomy(nodeToDescendants[i][group], size); out << (i+1) << '\t' << size << '\t' << tax << endl; } - + T->tree[i].setLabel((i+1)); } out.close(); @@ -314,7 +356,6 @@ string ClassifyTreeCommand::getTaxonomy(set names, int& size) { for (set::iterator it = names.begin(); it != names.end(); it++) { - //if namesfile include the names if (namefile != "") { @@ -335,7 +376,7 @@ string ClassifyTreeCommand::getTaxonomy(set names, int& size) { }else{ //add seq to tree int num = nameCount[(*it)]; // we know its there since we found it in nameMap - for (int i = 0; i < num; i++) { phylo->addSeqToTree((*it)+toString(i), it2->second); } + for (int i = 0; i < num; i++) { phylo->addSeqToTree((*it)+toString(i), itTax->second); } size += num; } } @@ -347,10 +388,15 @@ string ClassifyTreeCommand::getTaxonomy(set names, int& size) { if (itTax == taxMap.end()) { //this name is not in taxonomy file, skip it m->mothurOut((*it) + " is not in your taxonomy file. I will not include it in the consensus."); m->mothurOutEndLine(); }else{ - //add seq to tree - phylo->addSeqToTree((*it), itTax->second); - size++; - } + if (countfile != "") { + int numDups = ct->getNumSeqs((*it)); + for (int j = 0; j < numDups; j++) { phylo->addSeqToTree((*it), itTax->second); } + size += numDups; + }else{ + //add seq to tree + phylo->addSeqToTree((*it), itTax->second); + size++; + } } } if (m->control_pressed) { delete phylo; return conTax; } @@ -424,12 +470,12 @@ map > ClassifyTreeCommand::getDescendantList(Tree*& T, int i int lc = T->tree[i].getLChild(); int rc = T->tree[i].getRChild(); - TreeMap* tmap = T->getTreeMap(); + // TreeMap* tmap = T->getTreeMap(); if (lc == -1) { //you are a leaf your only descendant is yourself - string group = tmap->getGroup(T->tree[i].getName()); + vector groups = T->tree[i].getGroup(); set mynames; mynames.insert(T->tree[i].getName()); - names[group] = mynames; //mygroup -> me + for (int j = 0; j < groups.size(); j++) { names[groups[j]] = mynames; } //mygroup -> me names["AllGroups"] = mynames; }else{ //your descedants are the combination of your childrens descendants names = descendants[lc]; @@ -453,68 +499,6 @@ map > ClassifyTreeCommand::getDescendantList(Tree*& T, int i exit(1); } } -//********************************************************************************************************************** -int ClassifyTreeCommand::readTaxonomyFile() { - try { - - ifstream in; - m->openInputFile(taxonomyfile, in); - - string name, tax; - - while(!in.eof()){ - in >> name >> tax; - m->gobble(in); - - //are there confidence scores, if so remove them - if (tax.find_first_of('(') != -1) { m->removeConfidences(tax); } - - taxMap[name] = tax; - - if (m->control_pressed) { in.close(); taxMap.clear(); return 0; } - } - in.close(); - - return 0; - } - catch(exception& e) { - m->errorOut(e, "ClassifyTreeCommand", "readTaxonomyFile"); - exit(1); - } -} - -/*****************************************************************/ -int ClassifyTreeCommand::readNamesFile() { - try { - ifstream inNames; - m->openInputFile(namefile, inNames); - - string name, names; - - while(!inNames.eof()){ - inNames >> name; //read from first column A - inNames >> names; //read from second column A,B,C,D - m->gobble(inNames); - - //parse names into vector - vector theseNames; - m->splitAtComma(names, theseNames); - - for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = name; } - nameCount[name] = theseNames.size(); - - if (m->control_pressed) { inNames.close(); nameMap.clear(); return 0; } - } - inNames.close(); - - return 0; - } - catch(exception& e) { - m->errorOut(e, "ClassifyTreeCommand", "readNamesFile"); - exit(1); - } -} - /*****************************************************************/